1#! /usr/bin/env perl 2 3# Copyright (c) 2003 The Regents of The University of Michigan 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the 12# documentation and/or other materials provided with the distribution; 13# neither the name of the copyright holders nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# Authors: Nathan Binkert 30# Steve Reinhardt 31 32# Diff two streams. 33# 34# Unlike regular diff, this script does not read in the entire input 35# before doing a diff, so it can be used on lengthy outputs piped from 36# other programs (e.g., M5 traces). The best way to do this is to 37# take advantage of the power of Perl's open function, which will 38# automatically fork a subprocess if the last character in the 39# "filename" is a pipe (|). Thus to compare the instruction traces 40# from two versions of m5 (m5a and m5b), you can do this: 41# 42# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |' 43# 44 45use strict; 46use FileHandle; 47use Getopt::Std; 48 49# 50# Options: 51# -c <n> : print n lines of context before & after changes 52# -l <n> : use n lines of lookahead 53# -x : use "complex" diff from Algorithm::Diff (see below) 54# 55our ($opt_c, $opt_l, $opt_x); 56getopts('c:l:x'); 57 58# 59# For the highest-quality (minimal) diffs, we can use the 60# Algorithm::Diff package. By default, a built-in, simple, and 61# generally quite adequate algorithm will be used. If you have 62# Algorithm::Diff installed on your system, and don't mind having the 63# script go slower (like 3-4x slower, based on informal observation), 64# then specify '-x' on the command line to use it. 65my $use_complexdiff = defined($opt_x); 66 67if ($use_complexdiff) { 68 # Don't use 'use', as that's a compile-time option and will fail 69 # on systems that don't have Algorithm::Diff installed even if 70 # $use_complexdiff is false. 'require' is evaluated at runtime, 71 # so it's OK. 72 require Algorithm::Diff; 73 import Algorithm::Diff qw(traverse_sequences); 74}; 75 76my $lookahead_lines = $opt_l || 200; 77 78# in theory you could have different amounts of context before and 79# after a diff, but until someone needs that there's only one arg to 80# set both. 81my $precontext_lines = $opt_c || 3; 82my $postcontext_lines = $precontext_lines; 83 84my $file1 = $ARGV[0]; 85my $file2 = $ARGV[1]; 86 87die "Need two args." if (!(defined($file1) && defined($file2))); 88 89my ($fh1, $fh2); 90open($fh1, $file1) or die "Can't open $file1"; 91open($fh2, $file2) or die "Can't open $file2"; 92 93# print files to output so we know which is which 94print "-$file1\n"; 95print "+$file2\n"; 96 97# buffer of matching lines for pre-diff context 98my @precontext = (); 99# number of post-diff matching lines remaining to print 100my $postcontext = 0; 101 102# lookahead buffers for $file1 and $file2 respectively 103my @lines1 = (); 104my @lines2 = (); 105 106# Next line number available to print from each file. Generally this 107# corresponds to the oldest line in @precontext, or the oldest line in 108# @lines1 and @lines2 if @precontext is empty. 109my $lineno1 = 1; 110my $lineno2 = 1; 111 112# Fill a lookahead buffer to $lookahead_lines lines (or until EOF). 113sub fill 114{ 115 my ($fh, $array) = @_; 116 117 while (@$array < $lookahead_lines) { 118 my $line = <$fh>; 119 last if (!defined($line)); 120 push @$array, $line; 121 } 122} 123 124# Print and delete n lines from front of given array with given prefix. 125sub printlines 126{ 127 my ($array, $n, $prefix) = @_; 128 129 while ($n--) { 130 my $line = shift @$array; 131 last if (!defined($line)); 132 print $prefix, $line; 133 } 134} 135 136# Print a difference region where n1 lines of file1 were replaced by 137# n2 lines of file2 (where either n1 or n2 could be zero). 138sub printdiff 139{ 140 my ($n1, $n2)= @_; 141 142 # If the precontext buffer is full or we're at the beginning of a 143 # file, then this is a new diff region, so we should print a 144 # header indicating the current line numbers. If we're past the 145 # beginning and the precontext buffer isn't full, then whatever 146 # we're about to print is contiguous with the end of the last 147 # region we printed, so we just concatenate them on the output. 148 if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) { 149 print "@@ -$lineno1 +$lineno2 @@\n"; 150 } 151 152 # Print and clear the precontext buffer. 153 if (@precontext) { 154 print ' ', join(' ', @precontext); 155 $lineno1 += scalar(@precontext); 156 $lineno2 += scalar(@precontext); 157 @precontext = (); 158 } 159 160 # Print the differing lines. 161 printlines(\@lines1, $n1, '-'); 162 printlines(\@lines2, $n2, '+'); 163 $lineno1 += $n1; 164 $lineno2 += $n2; 165 166 # Set $postcontext to print the next $postcontext_lines matching lines. 167 $postcontext = $postcontext_lines; 168 169 # Normally we flush after the postcontext lines are printed, but if 170 # the user has decreed that there aren't any we need to flush now 171 if ($postcontext == 0) { 172 STDOUT->flush(); 173 } 174} 175 176 177######################## 178# 179# Complex diff algorithm 180# 181######################## 182 183{ 184 my $match_found; 185 my $discard_lines1; 186 my $discard_lines2; 187 188 sub match { $match_found = 1; } 189 sub discard1 { $discard_lines1++ unless $match_found; } 190 sub discard2 { $discard_lines2++ unless $match_found; } 191 192 sub complex_diff 193 { 194 $match_found = 0; 195 $discard_lines1 = 0; 196 $discard_lines2 = 0; 197 198 # See Diff.pm. Note that even though this call generates a 199 # complete diff of both lookahead buffers, all we use it for 200 # is to figure out how many lines to discard off the front of 201 # each buffer to resync the streams. 202 traverse_sequences( \@lines1, \@lines2, 203 { MATCH => \&match, 204 DISCARD_A => \&discard1, 205 DISCARD_B => \&discard2 }); 206 207 if (!$match_found) { 208 printdiff(scalar(@lines1), scalar(@lines2)); 209 die "Lost sync!"; 210 } 211 212 # Since we shouldn't get here unless the first lines of the 213 # buffers are different, then we must discard some lines off 214 # at least one of the buffers. 215 die if ($discard_lines1 == 0 && $discard_lines2 == 0); 216 217 printdiff($discard_lines1, $discard_lines2); 218 } 219} 220 221####################### 222# 223# Simple diff algorithm 224# 225####################### 226 227# Check for a pair of matching lines; if found, generate appropriate 228# diff output. 229sub checkmatch 230{ 231 my ($n1, $n2) = @_; 232 233 # Check if two adjacent lines match, to reduce false resyncs 234 # (particularly on unrelated blank lines). This generates 235 # larger-than-necessary diffs when a single line really should be 236 # treated as common; if that bugs you, use Algorithm::Diff. 237 if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) { 238 printdiff($n1, $n2); 239 return 1; 240 } 241 242 return 0; 243} 244 245sub simple_diff 246{ 247 # Look for differences of $cnt lines to resync, 248 # increasing $cnt from 1 to $lookahead_lines until we find 249 # something. 250 for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) { 251 # Check for n lines in one file being replaced by 252 # n lines in the other. 253 return if checkmatch($cnt, $cnt); 254 # Find differences where n lines in one file were 255 # replaced by m lines in the other. We let m = $cnt 256 # and iterate for n = 0 to $cnt-1. 257 for (my $n = 0; $n < $cnt; ++$n) { 258 return if checkmatch($n, $cnt); 259 return if checkmatch($cnt, $n); 260 } 261 } 262 263 printdiff(scalar(@lines1), scalar(@lines2)); 264 die "Lost sync!"; 265} 266 267# Set the pointer to the appropriate diff function. 268# 269# Note that in either case the function determines how many lines to 270# discard from the front of each lookahead buffer to resync the 271# streams, then prints the appropriate diff output and discards them. 272# After the function returns, it should always be the case that 273# $lines1[0] eq $lines2[0]. 274my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff; 275 276# The main loop. 277while (1) { 278 # keep lookahead buffers topped up 279 fill($fh1, \@lines1); 280 fill($fh2, \@lines2); 281 282 # peek at first line in each buffer 283 my $l1 = $lines1[0]; 284 my $l2 = $lines2[0]; 285 286 if (!defined($l1) && !defined($l2)) { 287 # reached EOF on both streams: exit 288 exit(1); 289 } 290 291 if ($l1 eq $l2) { 292 # matching lines: delete from lookahead buffer 293 shift @lines1; 294 shift @lines2; 295 # figure out what to do with this line 296 if ($postcontext > 0) { 297 # we're in the post-context of a diff: print it 298 print ' ', $l1; 299 $lineno1++; 300 $lineno2++; 301 if (--$postcontext == 0) { 302 STDOUT->flush(); 303 } 304 } 305 else { 306 # we're in the middle of a matching region... save this 307 # line for precontext in case we run into a difference. 308 push @precontext, $l1; 309 # don't let precontext buffer get bigger than needed 310 while (@precontext > $precontext_lines) { 311 shift @precontext; 312 $lineno1++; 313 $lineno2++; 314 } 315 } 316 } 317 else { 318 # Mismatch. Deal with it. 319 &$find_diff(); 320 } 321} 322