rundiff revision 408
16145Snate@binkert.org#! /usr/bin/env perl
26145Snate@binkert.org
36145Snate@binkert.org# Copyright (c) 2003 The Regents of The University of Michigan
46145Snate@binkert.org# All rights reserved.
56145Snate@binkert.org#
66145Snate@binkert.org# Redistribution and use in source and binary forms, with or without
76145Snate@binkert.org# modification, are permitted provided that the following conditions are
86145Snate@binkert.org# met: redistributions of source code must retain the above copyright
96145Snate@binkert.org# notice, this list of conditions and the following disclaimer;
106145Snate@binkert.org# redistributions in binary form must reproduce the above copyright
116145Snate@binkert.org# notice, this list of conditions and the following disclaimer in the
126145Snate@binkert.org# documentation and/or other materials provided with the distribution;
136145Snate@binkert.org# neither the name of the copyright holders nor the names of its
146145Snate@binkert.org# contributors may be used to endorse or promote products derived from
156145Snate@binkert.org# this software without specific prior written permission.
166145Snate@binkert.org#
176145Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
186145Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
196145Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
206145Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
216145Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
226145Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
236145Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
246145Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
256145Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
266145Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
276145Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
286145Snate@binkert.org
297039Snate@binkert.org# Diff two streams.
307039Snate@binkert.org#
316145Snate@binkert.org# Unlike regular diff, this script does not read in the entire input
327002Snate@binkert.org# before doing a diff, so it can be used on lengthy outputs piped from
337002Snate@binkert.org# other programs (e.g., M5 traces).  The best way to do this is to
347453Snate@binkert.org# take advantage of the power of Perl's open function, which will
359466Snilay@cs.wisc.edu# automatically fork a subprocess if the last character in the
366145Snate@binkert.org# "filename" is a pipe (|).  Thus to compare the instruction traces
376145Snate@binkert.org# from two versions of m5 (m5a and m5b), you can do this:
387453Snate@binkert.org#
396145Snate@binkert.org# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
407453Snate@binkert.org#
417039Snate@binkert.org
427039Snate@binkert.orguse strict;
439508Snilay@cs.wisc.edu
449466Snilay@cs.wisc.eduuse Getopt::Std;
459466Snilay@cs.wisc.edu
469508Snilay@cs.wisc.edu#
477453Snate@binkert.org# Options:
487453Snate@binkert.org#  -c <n> : print n lines of context before & after changes
497453Snate@binkert.org#  -l <n> : use n lines of lookahead
507453Snate@binkert.org#  -x     : use "complex" diff from Algorithm::Diff (see below)
517453Snate@binkert.org#
529508Snilay@cs.wisc.eduour ($opt_c, $opt_l, $opt_x);
537453Snate@binkert.orggetopts('c:l:x');
546145Snate@binkert.org
557039Snate@binkert.org#
566145Snate@binkert.org# For the highest-quality (minimal) diffs, we can use the
577039Snate@binkert.org# Algorithm::Diff package.  By default, a built-in, simple, and
587039Snate@binkert.org# generally quite adequate algorithm will be used.  If you have
597973Snilay@cs.wisc.edu# Algorithm::Diff installed on your system, and don't mind having the
607973Snilay@cs.wisc.edu# script go slower (like 3-4x slower, based on informal observation),
616145Snate@binkert.org# then specify '-x' on the command line to use it.
629302Snilay@cs.wisc.edumy $use_complexdiff = defined($opt_x);
639302Snilay@cs.wisc.edu
649302Snilay@cs.wisc.eduif ($use_complexdiff) {
659302Snilay@cs.wisc.edu    # Don't use 'use', as that's a compile-time option and will fail
669302Snilay@cs.wisc.edu    # on systems that don't have Algorithm::Diff installed even if
679302Snilay@cs.wisc.edu    # $use_complexdiff is false.  'require' is evaluated at runtime,
689302Snilay@cs.wisc.edu    # so it's OK.
699302Snilay@cs.wisc.edu    require Algorithm::Diff;
709302Snilay@cs.wisc.edu    import Algorithm::Diff qw(traverse_sequences);
719302Snilay@cs.wisc.edu};
729302Snilay@cs.wisc.edu
739302Snilay@cs.wisc.edumy $lookahead_lines = $opt_l || 200;
7410074Snilay@cs.wisc.edu
7510074Snilay@cs.wisc.edu# in theory you could have different amounts of context before and
7610074Snilay@cs.wisc.edu# after a diff, but until someone needs that there's only one arg to
7710074Snilay@cs.wisc.edu# set both.
7810074Snilay@cs.wisc.edumy $precontext_lines = $opt_c || 3;
7910074Snilay@cs.wisc.edumy $postcontext_lines = $precontext_lines;
8010074Snilay@cs.wisc.edu
819508Snilay@cs.wisc.edumy $file1 = $ARGV[0];
829465Snilay@cs.wisc.edumy $file2 = $ARGV[1];
839508Snilay@cs.wisc.edu
849508Snilay@cs.wisc.edudie "Need two args." if (!(defined($file1) && defined($file2)));
856145Snate@binkert.org
869508Snilay@cs.wisc.edumy ($fh1, $fh2);
879508Snilay@cs.wisc.eduopen($fh1, $file1) or die "Can't open $file1";
886145Snate@binkert.orgopen($fh2, $file2) or die "Can't open $file2";
897039Snate@binkert.org
909508Snilay@cs.wisc.edu# print files to output so we know which is which
919508Snilay@cs.wisc.eduprint "-$file1\n";
929508Snilay@cs.wisc.eduprint "+$file2\n";
936145Snate@binkert.org
946145Snate@binkert.org# buffer of matching lines for pre-diff context
957039Snate@binkert.orgmy @precontext = ();
967039Snate@binkert.org# number of post-diff matching lines remaining to print
976145Snate@binkert.orgmy $postcontext = 0;
987039Snate@binkert.org
997039Snate@binkert.org# lookahead buffers for $file1 and $file2 respectively
1007039Snate@binkert.orgmy @lines1 = ();
1016145Snate@binkert.orgmy @lines2 = ();
1026145Snate@binkert.org
1037039Snate@binkert.org# Next line number available to print from each file.  Generally this
104# corresponds to the oldest line in @precontext, or the oldest line in
105# @lines1 and @lines2 if @precontext is empty.
106my $lineno1 = 1;
107my $lineno2 = 1;
108
109# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
110sub fill
111{
112    my ($fh, $array) = @_;
113
114    while (@$array < $lookahead_lines) {
115	my $line = <$fh>;
116	last if (!defined($line));
117	push @$array, $line;
118    }
119}
120
121# Print and delete n lines from front of given array with given prefix.
122sub printlines
123{
124    my ($array, $n, $prefix) = @_;
125
126    while ($n--) {
127	my $line = shift @$array;
128	last if (!defined($line));
129	print $prefix, $line;
130    }
131}
132
133# Print a difference region where n1 lines of file1 were replaced by
134# n2 lines of file2 (where either n1 or n2 could be zero).
135sub printdiff
136{
137    my ($n1, $n2)= @_;
138
139    # If the precontext buffer is full or we're at the beginning of a
140    # file, then this is a new diff region, so we should print a
141    # header indicating the current line numbers.  If we're past the
142    # beginning and the precontext buffer isn't full, then whatever
143    # we're about to print is contiguous with the end of the last
144    # region we printed, so we just concatenate them on the output.
145    if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
146	print "@@ -$lineno1 +$lineno2 @@\n";
147    }
148
149    # Print and clear the precontext buffer.
150    if (@precontext) {
151	print ' ', join(' ', @precontext);
152	$lineno1 += scalar(@precontext);
153	$lineno2 += scalar(@precontext);
154	@precontext = ();
155    }
156
157    # Print the differing lines.
158    printlines(\@lines1, $n1, '-');
159    printlines(\@lines2, $n2, '+');
160    $lineno1 += $n1;
161    $lineno2 += $n2;
162
163    # Set $postcontext to print the next $postcontext_lines matching lines.
164    $postcontext = $postcontext_lines;
165}
166
167
168########################
169#
170# Complex diff algorithm
171#
172########################
173
174{
175    my $match_found;
176    my $discard_lines1;
177    my $discard_lines2;
178
179    sub match { $match_found = 1; }
180    sub discard1 { $discard_lines1++ unless $match_found; }
181    sub discard2 { $discard_lines2++ unless $match_found; }
182
183    sub complex_diff
184    {
185	$match_found = 0;
186	$discard_lines1 = 0;
187	$discard_lines2 = 0;
188
189	# See Diff.pm.  Note that even though this call generates a
190	# complete diff of both lookahead buffers, all we use it for
191	# is to figure out how many lines to discard off the front of
192	# each buffer to resync the streams.
193	traverse_sequences( \@lines1, \@lines2,
194			    { MATCH => \&match,
195			      DISCARD_A => \&discard1,
196			      DISCARD_B => \&discard2 });
197
198	if (!$match_found) {
199	    printdiff(scalar(@lines1), scalar(@lines2));
200	    die "Lost sync!";
201	}
202
203	# Since we shouldn't get here unless the first lines of the
204	# buffers are different, then we must discard some lines off
205	# at least one of the buffers.
206	die if ($discard_lines1 == 0 && $discard_lines2 == 0);
207
208	printdiff($discard_lines1, $discard_lines2);
209    }
210}
211
212#######################
213#
214# Simple diff algorithm
215#
216#######################
217
218# Check for a pair of matching lines; if found, generate appropriate
219# diff output.
220sub checkmatch
221{
222    my ($n1, $n2) = @_;
223
224    # Check if two adjacent lines match, to reduce false resyncs
225    # (particularly on unrelated blank lines).  This generates
226    # larger-than-necessary diffs when a single line really should be
227    # treated as common; if that bugs you, use Algorithm::Diff.
228    if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
229	printdiff($n1, $n2);
230	return 1;
231    }
232
233    return 0;
234}
235
236sub simple_diff
237{
238    # Look for differences of $cnt lines to resync,
239    # increasing $cnt from 1 to $lookahead_lines until we find
240    # something.
241    for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
242	# Check for n lines in one file being replaced by
243	# n lines in the other.
244	return if checkmatch($cnt, $cnt);
245	# Find differences where n lines in one file were
246	# replaced by m lines in the other.  We let m = $cnt
247	# and iterate for n = 0 to $cnt-1.
248	for (my $n = 0; $n < $cnt; ++$n) {
249	    return if checkmatch($n, $cnt);
250	    return if checkmatch($cnt, $n);
251	}
252    }
253
254    printdiff(scalar(@lines1), scalar(@lines2));
255    die "Lost sync!";
256}
257
258# Set the pointer to the appropriate diff function.
259#
260# Note that in either case the function determines how many lines to
261# discard from the front of each lookahead buffer to resync the
262# streams, then prints the appropriate diff output and discards them.
263# After the function returns, it should always be the case that
264# $lines1[0] eq $lines2[0].
265my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
266
267# The main loop.
268while (1) {
269    # keep lookahead buffers topped up
270    fill($fh1, \@lines1);
271    fill($fh2, \@lines2);
272
273    # peek at first line in each buffer
274    my $l1 = $lines1[0];
275    my $l2 = $lines2[0];
276
277    if (!defined($l1) && !defined($l2)) {
278	# reached EOF on both streams: exit
279	exit(1);
280    }
281
282    if ($l1 eq $l2) {
283	# matching lines: delete from lookahead buffer
284	shift @lines1;
285	shift @lines2;
286	# figure out what to do with this line
287	if ($postcontext > 0) {
288	    # we're in the post-context of a diff: print it
289	    $postcontext--;
290	    print ' ', $l1;
291	    $lineno1++;
292	    $lineno2++;
293	}
294	else {
295	    # we're in the middle of a matching region... save this
296	    # line for precontext in case we run into a difference.
297	    push @precontext, $l1;
298	    # don't let precontext buffer get bigger than needed
299	    while (@precontext > $precontext_lines) {
300		shift @precontext;
301		$lineno1++;
302		$lineno2++;
303	    }
304	}
305    }
306    else {
307	# Mismatch.  Deal with it.
308	&$find_diff();
309    }
310}
311