rundiff revision 6115
1196Sstever@eecs.umich.edu#! /usr/bin/env perl
22Ssraasch@umich.edu
3196Sstever@eecs.umich.edu# Copyright (c) 2003 The Regents of The University of Michigan
42Ssraasch@umich.edu# All rights reserved.
52Ssraasch@umich.edu#
6196Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without
7196Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are
8196Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright
9196Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
10196Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright
11196Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
12196Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution;
13196Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its
14196Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from
15196Sstever@eecs.umich.edu# this software without specific prior written permission.
162Ssraasch@umich.edu#
17196Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18196Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19196Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20196Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21196Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22196Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23196Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24196Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25196Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26196Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27196Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
282665Ssaidi@eecs.umich.edu#
292665Ssaidi@eecs.umich.edu# Authors: Nathan Binkert
302665Ssaidi@eecs.umich.edu#          Steve Reinhardt
31196Sstever@eecs.umich.edu
32196Sstever@eecs.umich.edu# Diff two streams.
332Ssraasch@umich.edu#
34196Sstever@eecs.umich.edu# Unlike regular diff, this script does not read in the entire input
35196Sstever@eecs.umich.edu# before doing a diff, so it can be used on lengthy outputs piped from
36196Sstever@eecs.umich.edu# other programs (e.g., M5 traces).  The best way to do this is to
37196Sstever@eecs.umich.edu# take advantage of the power of Perl's open function, which will
38196Sstever@eecs.umich.edu# automatically fork a subprocess if the last character in the
39196Sstever@eecs.umich.edu# "filename" is a pipe (|).  Thus to compare the instruction traces
40196Sstever@eecs.umich.edu# from two versions of m5 (m5a and m5b), you can do this:
41196Sstever@eecs.umich.edu#
422783Sktlim@umich.edu# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |'
432Ssraasch@umich.edu#
442Ssraasch@umich.edu
45196Sstever@eecs.umich.eduuse strict;
466115Ssteve.reinhardt@amd.comuse FileHandle;
47354Sstever@eecs.umich.eduuse Getopt::Std;
48354Sstever@eecs.umich.edu
49354Sstever@eecs.umich.edu#
50354Sstever@eecs.umich.edu# Options:
51354Sstever@eecs.umich.edu#  -c <n> : print n lines of context before & after changes
52354Sstever@eecs.umich.edu#  -l <n> : use n lines of lookahead
53354Sstever@eecs.umich.edu#  -x     : use "complex" diff from Algorithm::Diff (see below)
54354Sstever@eecs.umich.edu#
55354Sstever@eecs.umich.eduour ($opt_c, $opt_l, $opt_x);
56354Sstever@eecs.umich.edugetopts('c:l:x');
57354Sstever@eecs.umich.edu
58196Sstever@eecs.umich.edu#
59196Sstever@eecs.umich.edu# For the highest-quality (minimal) diffs, we can use the
60354Sstever@eecs.umich.edu# Algorithm::Diff package.  By default, a built-in, simple, and
61354Sstever@eecs.umich.edu# generally quite adequate algorithm will be used.  If you have
62354Sstever@eecs.umich.edu# Algorithm::Diff installed on your system, and don't mind having the
63354Sstever@eecs.umich.edu# script go slower (like 3-4x slower, based on informal observation),
64354Sstever@eecs.umich.edu# then specify '-x' on the command line to use it.
65354Sstever@eecs.umich.edumy $use_complexdiff = defined($opt_x);
662Ssraasch@umich.edu
67354Sstever@eecs.umich.eduif ($use_complexdiff) {
68354Sstever@eecs.umich.edu    # Don't use 'use', as that's a compile-time option and will fail
69354Sstever@eecs.umich.edu    # on systems that don't have Algorithm::Diff installed even if
70354Sstever@eecs.umich.edu    # $use_complexdiff is false.  'require' is evaluated at runtime,
71354Sstever@eecs.umich.edu    # so it's OK.
72354Sstever@eecs.umich.edu    require Algorithm::Diff;
73354Sstever@eecs.umich.edu    import Algorithm::Diff qw(traverse_sequences);
74354Sstever@eecs.umich.edu};
752Ssraasch@umich.edu
76354Sstever@eecs.umich.edumy $lookahead_lines = $opt_l || 200;
77354Sstever@eecs.umich.edu
78354Sstever@eecs.umich.edu# in theory you could have different amounts of context before and
79354Sstever@eecs.umich.edu# after a diff, but until someone needs that there's only one arg to
80354Sstever@eecs.umich.edu# set both.
81354Sstever@eecs.umich.edumy $precontext_lines = $opt_c || 3;
82354Sstever@eecs.umich.edumy $postcontext_lines = $precontext_lines;
832Ssraasch@umich.edu
84196Sstever@eecs.umich.edumy $file1 = $ARGV[0];
85196Sstever@eecs.umich.edumy $file2 = $ARGV[1];
862Ssraasch@umich.edu
87196Sstever@eecs.umich.edudie "Need two args." if (!(defined($file1) && defined($file2)));
882Ssraasch@umich.edu
89196Sstever@eecs.umich.edumy ($fh1, $fh2);
90196Sstever@eecs.umich.eduopen($fh1, $file1) or die "Can't open $file1";
91196Sstever@eecs.umich.eduopen($fh2, $file2) or die "Can't open $file2";
922Ssraasch@umich.edu
93408Sstever@eecs.umich.edu# print files to output so we know which is which
94408Sstever@eecs.umich.eduprint "-$file1\n";
95408Sstever@eecs.umich.eduprint "+$file2\n";
96408Sstever@eecs.umich.edu
97196Sstever@eecs.umich.edu# buffer of matching lines for pre-diff context
98196Sstever@eecs.umich.edumy @precontext = ();
99196Sstever@eecs.umich.edu# number of post-diff matching lines remaining to print
100196Sstever@eecs.umich.edumy $postcontext = 0;
1012Ssraasch@umich.edu
102196Sstever@eecs.umich.edu# lookahead buffers for $file1 and $file2 respectively
103196Sstever@eecs.umich.edumy @lines1 = ();
104196Sstever@eecs.umich.edumy @lines2 = ();
1052Ssraasch@umich.edu
106196Sstever@eecs.umich.edu# Next line number available to print from each file.  Generally this
107196Sstever@eecs.umich.edu# corresponds to the oldest line in @precontext, or the oldest line in
108196Sstever@eecs.umich.edu# @lines1 and @lines2 if @precontext is empty.
109196Sstever@eecs.umich.edumy $lineno1 = 1;
110196Sstever@eecs.umich.edumy $lineno2 = 1;
1112Ssraasch@umich.edu
112196Sstever@eecs.umich.edu# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
113196Sstever@eecs.umich.edusub fill
114196Sstever@eecs.umich.edu{
115196Sstever@eecs.umich.edu    my ($fh, $array) = @_;
1162Ssraasch@umich.edu
117196Sstever@eecs.umich.edu    while (@$array < $lookahead_lines) {
118196Sstever@eecs.umich.edu	my $line = <$fh>;
119196Sstever@eecs.umich.edu	last if (!defined($line));
120196Sstever@eecs.umich.edu	push @$array, $line;
1212Ssraasch@umich.edu    }
1222Ssraasch@umich.edu}
1232Ssraasch@umich.edu
124196Sstever@eecs.umich.edu# Print and delete n lines from front of given array with given prefix.
125196Sstever@eecs.umich.edusub printlines
126196Sstever@eecs.umich.edu{
127196Sstever@eecs.umich.edu    my ($array, $n, $prefix) = @_;
1282Ssraasch@umich.edu
129196Sstever@eecs.umich.edu    while ($n--) {
130196Sstever@eecs.umich.edu	my $line = shift @$array;
131196Sstever@eecs.umich.edu	last if (!defined($line));
132196Sstever@eecs.umich.edu	print $prefix, $line;
133196Sstever@eecs.umich.edu    }
1342Ssraasch@umich.edu}
1352Ssraasch@umich.edu
136196Sstever@eecs.umich.edu# Print a difference region where n1 lines of file1 were replaced by
137196Sstever@eecs.umich.edu# n2 lines of file2 (where either n1 or n2 could be zero).
138196Sstever@eecs.umich.edusub printdiff
139196Sstever@eecs.umich.edu{
140196Sstever@eecs.umich.edu    my ($n1, $n2)= @_;
1412Ssraasch@umich.edu
142196Sstever@eecs.umich.edu    # If the precontext buffer is full or we're at the beginning of a
143196Sstever@eecs.umich.edu    # file, then this is a new diff region, so we should print a
144196Sstever@eecs.umich.edu    # header indicating the current line numbers.  If we're past the
145196Sstever@eecs.umich.edu    # beginning and the precontext buffer isn't full, then whatever
146196Sstever@eecs.umich.edu    # we're about to print is contiguous with the end of the last
147196Sstever@eecs.umich.edu    # region we printed, so we just concatenate them on the output.
148196Sstever@eecs.umich.edu    if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
149196Sstever@eecs.umich.edu	print "@@ -$lineno1 +$lineno2 @@\n";
1502Ssraasch@umich.edu    }
1512Ssraasch@umich.edu
152196Sstever@eecs.umich.edu    # Print and clear the precontext buffer.
153196Sstever@eecs.umich.edu    if (@precontext) {
154196Sstever@eecs.umich.edu	print ' ', join(' ', @precontext);
155196Sstever@eecs.umich.edu	$lineno1 += scalar(@precontext);
156196Sstever@eecs.umich.edu	$lineno2 += scalar(@precontext);
157196Sstever@eecs.umich.edu	@precontext = ();
158196Sstever@eecs.umich.edu    }
1592Ssraasch@umich.edu
160196Sstever@eecs.umich.edu    # Print the differing lines.
161196Sstever@eecs.umich.edu    printlines(\@lines1, $n1, '-');
162196Sstever@eecs.umich.edu    printlines(\@lines2, $n2, '+');
163196Sstever@eecs.umich.edu    $lineno1 += $n1;
164196Sstever@eecs.umich.edu    $lineno2 += $n2;
165196Sstever@eecs.umich.edu
166196Sstever@eecs.umich.edu    # Set $postcontext to print the next $postcontext_lines matching lines.
167196Sstever@eecs.umich.edu    $postcontext = $postcontext_lines;
1686115Ssteve.reinhardt@amd.com
1696115Ssteve.reinhardt@amd.com    STDOUT->flush();
1702Ssraasch@umich.edu}
1712Ssraasch@umich.edu
1722Ssraasch@umich.edu
173196Sstever@eecs.umich.edu########################
174196Sstever@eecs.umich.edu#
175196Sstever@eecs.umich.edu# Complex diff algorithm
176196Sstever@eecs.umich.edu#
177196Sstever@eecs.umich.edu########################
1782Ssraasch@umich.edu
179196Sstever@eecs.umich.edu{
180196Sstever@eecs.umich.edu    my $match_found;
181196Sstever@eecs.umich.edu    my $discard_lines1;
182196Sstever@eecs.umich.edu    my $discard_lines2;
183196Sstever@eecs.umich.edu
184196Sstever@eecs.umich.edu    sub match { $match_found = 1; }
185196Sstever@eecs.umich.edu    sub discard1 { $discard_lines1++ unless $match_found; }
186196Sstever@eecs.umich.edu    sub discard2 { $discard_lines2++ unless $match_found; }
187196Sstever@eecs.umich.edu
188196Sstever@eecs.umich.edu    sub complex_diff
189196Sstever@eecs.umich.edu    {
190196Sstever@eecs.umich.edu	$match_found = 0;
191196Sstever@eecs.umich.edu	$discard_lines1 = 0;
192196Sstever@eecs.umich.edu	$discard_lines2 = 0;
193196Sstever@eecs.umich.edu
194196Sstever@eecs.umich.edu	# See Diff.pm.  Note that even though this call generates a
195196Sstever@eecs.umich.edu	# complete diff of both lookahead buffers, all we use it for
196196Sstever@eecs.umich.edu	# is to figure out how many lines to discard off the front of
197196Sstever@eecs.umich.edu	# each buffer to resync the streams.
198196Sstever@eecs.umich.edu	traverse_sequences( \@lines1, \@lines2,
199196Sstever@eecs.umich.edu			    { MATCH => \&match,
200196Sstever@eecs.umich.edu			      DISCARD_A => \&discard1,
201196Sstever@eecs.umich.edu			      DISCARD_B => \&discard2 });
202196Sstever@eecs.umich.edu
203404Sstever@eecs.umich.edu	if (!$match_found) {
204404Sstever@eecs.umich.edu	    printdiff(scalar(@lines1), scalar(@lines2));
205404Sstever@eecs.umich.edu	    die "Lost sync!";
206404Sstever@eecs.umich.edu	}
207196Sstever@eecs.umich.edu
208196Sstever@eecs.umich.edu	# Since we shouldn't get here unless the first lines of the
209196Sstever@eecs.umich.edu	# buffers are different, then we must discard some lines off
210196Sstever@eecs.umich.edu	# at least one of the buffers.
211196Sstever@eecs.umich.edu	die if ($discard_lines1 == 0 && $discard_lines2 == 0);
212196Sstever@eecs.umich.edu
213196Sstever@eecs.umich.edu	printdiff($discard_lines1, $discard_lines2);
214196Sstever@eecs.umich.edu    }
2152Ssraasch@umich.edu}
2162Ssraasch@umich.edu
217196Sstever@eecs.umich.edu#######################
218196Sstever@eecs.umich.edu#
219196Sstever@eecs.umich.edu# Simple diff algorithm
220196Sstever@eecs.umich.edu#
221196Sstever@eecs.umich.edu#######################
2222Ssraasch@umich.edu
223196Sstever@eecs.umich.edu# Check for a pair of matching lines; if found, generate appropriate
224196Sstever@eecs.umich.edu# diff output.
225196Sstever@eecs.umich.edusub checkmatch
226196Sstever@eecs.umich.edu{
227196Sstever@eecs.umich.edu    my ($n1, $n2) = @_;
2282Ssraasch@umich.edu
229196Sstever@eecs.umich.edu    # Check if two adjacent lines match, to reduce false resyncs
230196Sstever@eecs.umich.edu    # (particularly on unrelated blank lines).  This generates
231196Sstever@eecs.umich.edu    # larger-than-necessary diffs when a single line really should be
232196Sstever@eecs.umich.edu    # treated as common; if that bugs you, use Algorithm::Diff.
233196Sstever@eecs.umich.edu    if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
234196Sstever@eecs.umich.edu	printdiff($n1, $n2);
235404Sstever@eecs.umich.edu	return 1;
236196Sstever@eecs.umich.edu    }
237404Sstever@eecs.umich.edu
238404Sstever@eecs.umich.edu    return 0;
239196Sstever@eecs.umich.edu}
240196Sstever@eecs.umich.edu
241196Sstever@eecs.umich.edusub simple_diff
242196Sstever@eecs.umich.edu{
243196Sstever@eecs.umich.edu    # Look for differences of $cnt lines to resync,
244196Sstever@eecs.umich.edu    # increasing $cnt from 1 to $lookahead_lines until we find
245196Sstever@eecs.umich.edu    # something.
246196Sstever@eecs.umich.edu    for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
247196Sstever@eecs.umich.edu	# Check for n lines in one file being replaced by
248196Sstever@eecs.umich.edu	# n lines in the other.
249196Sstever@eecs.umich.edu	return if checkmatch($cnt, $cnt);
250196Sstever@eecs.umich.edu	# Find differences where n lines in one file were
251196Sstever@eecs.umich.edu	# replaced by m lines in the other.  We let m = $cnt
252196Sstever@eecs.umich.edu	# and iterate for n = 0 to $cnt-1.
253196Sstever@eecs.umich.edu	for (my $n = 0; $n < $cnt; ++$n) {
254196Sstever@eecs.umich.edu	    return if checkmatch($n, $cnt);
255196Sstever@eecs.umich.edu	    return if checkmatch($cnt, $n);
256196Sstever@eecs.umich.edu	}
257196Sstever@eecs.umich.edu    }
258404Sstever@eecs.umich.edu
259404Sstever@eecs.umich.edu    printdiff(scalar(@lines1), scalar(@lines2));
260196Sstever@eecs.umich.edu    die "Lost sync!";
261196Sstever@eecs.umich.edu}
262196Sstever@eecs.umich.edu
263196Sstever@eecs.umich.edu# Set the pointer to the appropriate diff function.
264196Sstever@eecs.umich.edu#
265196Sstever@eecs.umich.edu# Note that in either case the function determines how many lines to
266196Sstever@eecs.umich.edu# discard from the front of each lookahead buffer to resync the
267196Sstever@eecs.umich.edu# streams, then prints the appropriate diff output and discards them.
268196Sstever@eecs.umich.edu# After the function returns, it should always be the case that
269196Sstever@eecs.umich.edu# $lines1[0] eq $lines2[0].
270196Sstever@eecs.umich.edumy $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
271196Sstever@eecs.umich.edu
272196Sstever@eecs.umich.edu# The main loop.
273196Sstever@eecs.umich.eduwhile (1) {
274196Sstever@eecs.umich.edu    # keep lookahead buffers topped up
275196Sstever@eecs.umich.edu    fill($fh1, \@lines1);
276196Sstever@eecs.umich.edu    fill($fh2, \@lines2);
277196Sstever@eecs.umich.edu
278196Sstever@eecs.umich.edu    # peek at first line in each buffer
279196Sstever@eecs.umich.edu    my $l1 = $lines1[0];
280196Sstever@eecs.umich.edu    my $l2 = $lines2[0];
281196Sstever@eecs.umich.edu
282196Sstever@eecs.umich.edu    if (!defined($l1) && !defined($l2)) {
283196Sstever@eecs.umich.edu	# reached EOF on both streams: exit
284196Sstever@eecs.umich.edu	exit(1);
285196Sstever@eecs.umich.edu    }
286196Sstever@eecs.umich.edu
287196Sstever@eecs.umich.edu    if ($l1 eq $l2) {
288196Sstever@eecs.umich.edu	# matching lines: delete from lookahead buffer
289196Sstever@eecs.umich.edu	shift @lines1;
290196Sstever@eecs.umich.edu	shift @lines2;
291196Sstever@eecs.umich.edu	# figure out what to do with this line
292196Sstever@eecs.umich.edu	if ($postcontext > 0) {
293196Sstever@eecs.umich.edu	    # we're in the post-context of a diff: print it
294196Sstever@eecs.umich.edu	    $postcontext--;
295196Sstever@eecs.umich.edu	    print ' ', $l1;
296196Sstever@eecs.umich.edu	    $lineno1++;
297196Sstever@eecs.umich.edu	    $lineno2++;
298196Sstever@eecs.umich.edu	}
299196Sstever@eecs.umich.edu	else {
300196Sstever@eecs.umich.edu	    # we're in the middle of a matching region... save this
301196Sstever@eecs.umich.edu	    # line for precontext in case we run into a difference.
302196Sstever@eecs.umich.edu	    push @precontext, $l1;
303196Sstever@eecs.umich.edu	    # don't let precontext buffer get bigger than needed
304196Sstever@eecs.umich.edu	    while (@precontext > $precontext_lines) {
305196Sstever@eecs.umich.edu		shift @precontext;
306196Sstever@eecs.umich.edu		$lineno1++;
307196Sstever@eecs.umich.edu		$lineno2++;
308196Sstever@eecs.umich.edu	    }
309196Sstever@eecs.umich.edu	}
310196Sstever@eecs.umich.edu    }
311196Sstever@eecs.umich.edu    else {
312196Sstever@eecs.umich.edu	# Mismatch.  Deal with it.
313196Sstever@eecs.umich.edu	&$find_diff();
314196Sstever@eecs.umich.edu    }
315196Sstever@eecs.umich.edu}
316