rundiff revision 2783:381a5413b55a
110428Sandreas.hansson@arm.com#! /usr/bin/env perl
210428Sandreas.hansson@arm.com
310428Sandreas.hansson@arm.com# Copyright (c) 2003 The Regents of The University of Michigan
410428Sandreas.hansson@arm.com# All rights reserved.
510428Sandreas.hansson@arm.com#
610428Sandreas.hansson@arm.com# Redistribution and use in source and binary forms, with or without
710428Sandreas.hansson@arm.com# modification, are permitted provided that the following conditions are
810428Sandreas.hansson@arm.com# met: redistributions of source code must retain the above copyright
910428Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer;
1010428Sandreas.hansson@arm.com# redistributions in binary form must reproduce the above copyright
1110428Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer in the
1210428Sandreas.hansson@arm.com# documentation and/or other materials provided with the distribution;
1310428Sandreas.hansson@arm.com# neither the name of the copyright holders nor the names of its
1410428Sandreas.hansson@arm.com# contributors may be used to endorse or promote products derived from
1510428Sandreas.hansson@arm.com# this software without specific prior written permission.
1610428Sandreas.hansson@arm.com#
1710428Sandreas.hansson@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1810428Sandreas.hansson@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1910428Sandreas.hansson@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2010428Sandreas.hansson@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2110428Sandreas.hansson@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2210428Sandreas.hansson@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2310428Sandreas.hansson@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2410428Sandreas.hansson@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2510428Sandreas.hansson@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2610428Sandreas.hansson@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2710428Sandreas.hansson@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2810428Sandreas.hansson@arm.com#
2910428Sandreas.hansson@arm.com# Authors: Nathan Binkert
3010428Sandreas.hansson@arm.com#          Steve Reinhardt
3110428Sandreas.hansson@arm.com
3210428Sandreas.hansson@arm.com# Diff two streams.
3310428Sandreas.hansson@arm.com#
3410428Sandreas.hansson@arm.com# Unlike regular diff, this script does not read in the entire input
3510428Sandreas.hansson@arm.com# before doing a diff, so it can be used on lengthy outputs piped from
3610428Sandreas.hansson@arm.com# other programs (e.g., M5 traces).  The best way to do this is to
3710428Sandreas.hansson@arm.com# take advantage of the power of Perl's open function, which will
3810428Sandreas.hansson@arm.com# automatically fork a subprocess if the last character in the
3910428Sandreas.hansson@arm.com# "filename" is a pipe (|).  Thus to compare the instruction traces
4010428Sandreas.hansson@arm.com# from two versions of m5 (m5a and m5b), you can do this:
4110428Sandreas.hansson@arm.com#
4210428Sandreas.hansson@arm.com# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |'
4310428Sandreas.hansson@arm.com#
4410428Sandreas.hansson@arm.com
4510428Sandreas.hansson@arm.comuse strict;
4610428Sandreas.hansson@arm.com
4710428Sandreas.hansson@arm.comuse Getopt::Std;
4810428Sandreas.hansson@arm.com
4911555Sjungma@eit.uni-kl.de#
5010428Sandreas.hansson@arm.com# Options:
5110428Sandreas.hansson@arm.com#  -c <n> : print n lines of context before & after changes
5210428Sandreas.hansson@arm.com#  -l <n> : use n lines of lookahead
5310428Sandreas.hansson@arm.com#  -x     : use "complex" diff from Algorithm::Diff (see below)
5410428Sandreas.hansson@arm.com#
5510428Sandreas.hansson@arm.comour ($opt_c, $opt_l, $opt_x);
5610428Sandreas.hansson@arm.comgetopts('c:l:x');
5710428Sandreas.hansson@arm.com
5810428Sandreas.hansson@arm.com#
5910428Sandreas.hansson@arm.com# For the highest-quality (minimal) diffs, we can use the
6010428Sandreas.hansson@arm.com# Algorithm::Diff package.  By default, a built-in, simple, and
6110428Sandreas.hansson@arm.com# generally quite adequate algorithm will be used.  If you have
6210428Sandreas.hansson@arm.com# Algorithm::Diff installed on your system, and don't mind having the
6310428Sandreas.hansson@arm.com# script go slower (like 3-4x slower, based on informal observation),
6410428Sandreas.hansson@arm.com# then specify '-x' on the command line to use it.
6510428Sandreas.hansson@arm.commy $use_complexdiff = defined($opt_x);
6610428Sandreas.hansson@arm.com
6710428Sandreas.hansson@arm.comif ($use_complexdiff) {
6810428Sandreas.hansson@arm.com    # Don't use 'use', as that's a compile-time option and will fail
6910428Sandreas.hansson@arm.com    # on systems that don't have Algorithm::Diff installed even if
7010428Sandreas.hansson@arm.com    # $use_complexdiff is false.  'require' is evaluated at runtime,
7110428Sandreas.hansson@arm.com    # so it's OK.
7210428Sandreas.hansson@arm.com    require Algorithm::Diff;
7310428Sandreas.hansson@arm.com    import Algorithm::Diff qw(traverse_sequences);
7410428Sandreas.hansson@arm.com};
7510428Sandreas.hansson@arm.com
7610428Sandreas.hansson@arm.commy $lookahead_lines = $opt_l || 200;
7711555Sjungma@eit.uni-kl.de
7810428Sandreas.hansson@arm.com# in theory you could have different amounts of context before and
7910428Sandreas.hansson@arm.com# after a diff, but until someone needs that there's only one arg to
8011555Sjungma@eit.uni-kl.de# set both.
8110428Sandreas.hansson@arm.commy $precontext_lines = $opt_c || 3;
8211555Sjungma@eit.uni-kl.demy $postcontext_lines = $precontext_lines;
8311555Sjungma@eit.uni-kl.de
8411555Sjungma@eit.uni-kl.demy $file1 = $ARGV[0];
8511555Sjungma@eit.uni-kl.demy $file2 = $ARGV[1];
8611555Sjungma@eit.uni-kl.de
8711555Sjungma@eit.uni-kl.dedie "Need two args." if (!(defined($file1) && defined($file2)));
8811555Sjungma@eit.uni-kl.de
8910428Sandreas.hansson@arm.commy ($fh1, $fh2);
9010428Sandreas.hansson@arm.comopen($fh1, $file1) or die "Can't open $file1";
9110428Sandreas.hansson@arm.comopen($fh2, $file2) or die "Can't open $file2";
9210428Sandreas.hansson@arm.com
9310428Sandreas.hansson@arm.com# print files to output so we know which is which
9410428Sandreas.hansson@arm.comprint "-$file1\n";
9511555Sjungma@eit.uni-kl.deprint "+$file2\n";
9610428Sandreas.hansson@arm.com
9711555Sjungma@eit.uni-kl.de# buffer of matching lines for pre-diff context
9810428Sandreas.hansson@arm.commy @precontext = ();
9910428Sandreas.hansson@arm.com# number of post-diff matching lines remaining to print
10010428Sandreas.hansson@arm.commy $postcontext = 0;
10110428Sandreas.hansson@arm.com
10210428Sandreas.hansson@arm.com# lookahead buffers for $file1 and $file2 respectively
10311555Sjungma@eit.uni-kl.demy @lines1 = ();
10410428Sandreas.hansson@arm.commy @lines2 = ();
10511555Sjungma@eit.uni-kl.de
10610428Sandreas.hansson@arm.com# Next line number available to print from each file.  Generally this
10710428Sandreas.hansson@arm.com# corresponds to the oldest line in @precontext, or the oldest line in
10810428Sandreas.hansson@arm.com# @lines1 and @lines2 if @precontext is empty.
10910428Sandreas.hansson@arm.commy $lineno1 = 1;
11010428Sandreas.hansson@arm.commy $lineno2 = 1;
11110428Sandreas.hansson@arm.com
11210428Sandreas.hansson@arm.com# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
11310428Sandreas.hansson@arm.comsub fill
11410428Sandreas.hansson@arm.com{
11510428Sandreas.hansson@arm.com    my ($fh, $array) = @_;
11610428Sandreas.hansson@arm.com
11710428Sandreas.hansson@arm.com    while (@$array < $lookahead_lines) {
11810428Sandreas.hansson@arm.com	my $line = <$fh>;
11910428Sandreas.hansson@arm.com	last if (!defined($line));
12010428Sandreas.hansson@arm.com	push @$array, $line;
12110428Sandreas.hansson@arm.com    }
12210428Sandreas.hansson@arm.com}
12310428Sandreas.hansson@arm.com
12410428Sandreas.hansson@arm.com# Print and delete n lines from front of given array with given prefix.
12510428Sandreas.hansson@arm.comsub printlines
12610428Sandreas.hansson@arm.com{
12710428Sandreas.hansson@arm.com    my ($array, $n, $prefix) = @_;
12810428Sandreas.hansson@arm.com
12910428Sandreas.hansson@arm.com    while ($n--) {
13011555Sjungma@eit.uni-kl.de	my $line = shift @$array;
13110428Sandreas.hansson@arm.com	last if (!defined($line));
13210428Sandreas.hansson@arm.com	print $prefix, $line;
13310428Sandreas.hansson@arm.com    }
13410428Sandreas.hansson@arm.com}
13511555Sjungma@eit.uni-kl.de
13610428Sandreas.hansson@arm.com# Print a difference region where n1 lines of file1 were replaced by
13710428Sandreas.hansson@arm.com# n2 lines of file2 (where either n1 or n2 could be zero).
13810428Sandreas.hansson@arm.comsub printdiff
13910428Sandreas.hansson@arm.com{
14010428Sandreas.hansson@arm.com    my ($n1, $n2)= @_;
14110428Sandreas.hansson@arm.com
14210428Sandreas.hansson@arm.com    # If the precontext buffer is full or we're at the beginning of a
14310428Sandreas.hansson@arm.com    # file, then this is a new diff region, so we should print a
14410428Sandreas.hansson@arm.com    # header indicating the current line numbers.  If we're past the
14510428Sandreas.hansson@arm.com    # beginning and the precontext buffer isn't full, then whatever
14610428Sandreas.hansson@arm.com    # we're about to print is contiguous with the end of the last
14710428Sandreas.hansson@arm.com    # region we printed, so we just concatenate them on the output.
14810428Sandreas.hansson@arm.com    if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
149	print "@@ -$lineno1 +$lineno2 @@\n";
150    }
151
152    # Print and clear the precontext buffer.
153    if (@precontext) {
154	print ' ', join(' ', @precontext);
155	$lineno1 += scalar(@precontext);
156	$lineno2 += scalar(@precontext);
157	@precontext = ();
158    }
159
160    # Print the differing lines.
161    printlines(\@lines1, $n1, '-');
162    printlines(\@lines2, $n2, '+');
163    $lineno1 += $n1;
164    $lineno2 += $n2;
165
166    # Set $postcontext to print the next $postcontext_lines matching lines.
167    $postcontext = $postcontext_lines;
168}
169
170
171########################
172#
173# Complex diff algorithm
174#
175########################
176
177{
178    my $match_found;
179    my $discard_lines1;
180    my $discard_lines2;
181
182    sub match { $match_found = 1; }
183    sub discard1 { $discard_lines1++ unless $match_found; }
184    sub discard2 { $discard_lines2++ unless $match_found; }
185
186    sub complex_diff
187    {
188	$match_found = 0;
189	$discard_lines1 = 0;
190	$discard_lines2 = 0;
191
192	# See Diff.pm.  Note that even though this call generates a
193	# complete diff of both lookahead buffers, all we use it for
194	# is to figure out how many lines to discard off the front of
195	# each buffer to resync the streams.
196	traverse_sequences( \@lines1, \@lines2,
197			    { MATCH => \&match,
198			      DISCARD_A => \&discard1,
199			      DISCARD_B => \&discard2 });
200
201	if (!$match_found) {
202	    printdiff(scalar(@lines1), scalar(@lines2));
203	    die "Lost sync!";
204	}
205
206	# Since we shouldn't get here unless the first lines of the
207	# buffers are different, then we must discard some lines off
208	# at least one of the buffers.
209	die if ($discard_lines1 == 0 && $discard_lines2 == 0);
210
211	printdiff($discard_lines1, $discard_lines2);
212    }
213}
214
215#######################
216#
217# Simple diff algorithm
218#
219#######################
220
221# Check for a pair of matching lines; if found, generate appropriate
222# diff output.
223sub checkmatch
224{
225    my ($n1, $n2) = @_;
226
227    # Check if two adjacent lines match, to reduce false resyncs
228    # (particularly on unrelated blank lines).  This generates
229    # larger-than-necessary diffs when a single line really should be
230    # treated as common; if that bugs you, use Algorithm::Diff.
231    if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
232	printdiff($n1, $n2);
233	return 1;
234    }
235
236    return 0;
237}
238
239sub simple_diff
240{
241    # Look for differences of $cnt lines to resync,
242    # increasing $cnt from 1 to $lookahead_lines until we find
243    # something.
244    for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
245	# Check for n lines in one file being replaced by
246	# n lines in the other.
247	return if checkmatch($cnt, $cnt);
248	# Find differences where n lines in one file were
249	# replaced by m lines in the other.  We let m = $cnt
250	# and iterate for n = 0 to $cnt-1.
251	for (my $n = 0; $n < $cnt; ++$n) {
252	    return if checkmatch($n, $cnt);
253	    return if checkmatch($cnt, $n);
254	}
255    }
256
257    printdiff(scalar(@lines1), scalar(@lines2));
258    die "Lost sync!";
259}
260
261# Set the pointer to the appropriate diff function.
262#
263# Note that in either case the function determines how many lines to
264# discard from the front of each lookahead buffer to resync the
265# streams, then prints the appropriate diff output and discards them.
266# After the function returns, it should always be the case that
267# $lines1[0] eq $lines2[0].
268my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
269
270# The main loop.
271while (1) {
272    # keep lookahead buffers topped up
273    fill($fh1, \@lines1);
274    fill($fh2, \@lines2);
275
276    # peek at first line in each buffer
277    my $l1 = $lines1[0];
278    my $l2 = $lines2[0];
279
280    if (!defined($l1) && !defined($l2)) {
281	# reached EOF on both streams: exit
282	exit(1);
283    }
284
285    if ($l1 eq $l2) {
286	# matching lines: delete from lookahead buffer
287	shift @lines1;
288	shift @lines2;
289	# figure out what to do with this line
290	if ($postcontext > 0) {
291	    # we're in the post-context of a diff: print it
292	    $postcontext--;
293	    print ' ', $l1;
294	    $lineno1++;
295	    $lineno2++;
296	}
297	else {
298	    # we're in the middle of a matching region... save this
299	    # line for precontext in case we run into a difference.
300	    push @precontext, $l1;
301	    # don't let precontext buffer get bigger than needed
302	    while (@precontext > $precontext_lines) {
303		shift @precontext;
304		$lineno1++;
305		$lineno2++;
306	    }
307	}
308    }
309    else {
310	# Mismatch.  Deal with it.
311	&$find_diff();
312    }
313}
314