rundiff (338:6cf264d111b4) rundiff (354:fbfbff4f09c3)
1#! /usr/bin/env perl
2
3# Copyright (c) 2003 The Regents of The University of Michigan
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29# Diff two streams.
30#
31# Unlike regular diff, this script does not read in the entire input
32# before doing a diff, so it can be used on lengthy outputs piped from
33# other programs (e.g., M5 traces). The best way to do this is to
34# take advantage of the power of Perl's open function, which will
35# automatically fork a subprocess if the last character in the
36# "filename" is a pipe (|). Thus to compare the instruction traces
37# from two versions of m5 (m5a and m5b), you can do this:
38#
39# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
40#
41
42use strict;
43
1#! /usr/bin/env perl
2
3# Copyright (c) 2003 The Regents of The University of Michigan
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29# Diff two streams.
30#
31# Unlike regular diff, this script does not read in the entire input
32# before doing a diff, so it can be used on lengthy outputs piped from
33# other programs (e.g., M5 traces). The best way to do this is to
34# take advantage of the power of Perl's open function, which will
35# automatically fork a subprocess if the last character in the
36# "filename" is a pipe (|). Thus to compare the instruction traces
37# from two versions of m5 (m5a and m5b), you can do this:
38#
39# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
40#
41
42use strict;
43
44use Getopt::Std;
45
44#
46#
47# Options:
48# -c <n> : print n lines of context before & after changes
49# -l <n> : use n lines of lookahead
50# -x : use "complex" diff from Algorithm::Diff (see below)
51#
52our ($opt_c, $opt_l, $opt_x);
53getopts('c:l:x');
54
55#
45# For the highest-quality (minimal) diffs, we can use the
56# For the highest-quality (minimal) diffs, we can use the
46# Algorithm::Diff package. If you don't have this installed, or want
47# the script to run faster (like 3-4x faster, based on informal
48# observation), set $use_complexdiff to 0; then a built-in, simple,
49# and generally quite adequate algorithm will be used instead.
50my $use_complexdiff = 0;
57# Algorithm::Diff package. By default, a built-in, simple, and
58# generally quite adequate algorithm will be used. If you have
59# Algorithm::Diff installed on your system, and don't mind having the
60# script go slower (like 3-4x slower, based on informal observation),
61# then specify '-x' on the command line to use it.
62my $use_complexdiff = defined($opt_x);
51
63
52#if ($use_complexdiff) {
53# use Algorithm::Diff qw(traverse_sequences);
54#};
64if ($use_complexdiff) {
65 # Don't use 'use', as that's a compile-time option and will fail
66 # on systems that don't have Algorithm::Diff installed even if
67 # $use_complexdiff is false. 'require' is evaluated at runtime,
68 # so it's OK.
69 require Algorithm::Diff;
70 import Algorithm::Diff qw(traverse_sequences);
71};
55
72
56my $lookahead_lines = 200;
57my $precontext_lines = 3;
58my $postcontext_lines = 3;
73my $lookahead_lines = $opt_l || 200;
59
74
75# in theory you could have different amounts of context before and
76# after a diff, but until someone needs that there's only one arg to
77# set both.
78my $precontext_lines = $opt_c || 3;
79my $postcontext_lines = $precontext_lines;
80
60my $file1 = $ARGV[0];
61my $file2 = $ARGV[1];
62
63die "Need two args." if (!(defined($file1) && defined($file2)));
64
65my ($fh1, $fh2);
66open($fh1, $file1) or die "Can't open $file1";
67open($fh2, $file2) or die "Can't open $file2";
68
69# buffer of matching lines for pre-diff context
70my @precontext = ();
71# number of post-diff matching lines remaining to print
72my $postcontext = 0;
73
74# lookahead buffers for $file1 and $file2 respectively
75my @lines1 = ();
76my @lines2 = ();
77
78# Next line number available to print from each file. Generally this
79# corresponds to the oldest line in @precontext, or the oldest line in
80# @lines1 and @lines2 if @precontext is empty.
81my $lineno1 = 1;
82my $lineno2 = 1;
83
84# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
85sub fill
86{
87 my ($fh, $array) = @_;
88
89 while (@$array < $lookahead_lines) {
90 my $line = <$fh>;
91 last if (!defined($line));
92 push @$array, $line;
93 }
94}
95
96# Print and delete n lines from front of given array with given prefix.
97sub printlines
98{
99 my ($array, $n, $prefix) = @_;
100
101 while ($n--) {
102 my $line = shift @$array;
103 last if (!defined($line));
104 print $prefix, $line;
105 }
106}
107
108# Print a difference region where n1 lines of file1 were replaced by
109# n2 lines of file2 (where either n1 or n2 could be zero).
110sub printdiff
111{
112 my ($n1, $n2)= @_;
113
114 # If the precontext buffer is full or we're at the beginning of a
115 # file, then this is a new diff region, so we should print a
116 # header indicating the current line numbers. If we're past the
117 # beginning and the precontext buffer isn't full, then whatever
118 # we're about to print is contiguous with the end of the last
119 # region we printed, so we just concatenate them on the output.
120 if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
121 print "@@ -$lineno1 +$lineno2 @@\n";
122 }
123
124 # Print and clear the precontext buffer.
125 if (@precontext) {
126 print ' ', join(' ', @precontext);
127 $lineno1 += scalar(@precontext);
128 $lineno2 += scalar(@precontext);
129 @precontext = ();
130 }
131
132 # Print the differing lines.
133 printlines(\@lines1, $n1, '-');
134 printlines(\@lines2, $n2, '+');
135 $lineno1 += $n1;
136 $lineno2 += $n2;
137
138 # Set $postcontext to print the next $postcontext_lines matching lines.
139 $postcontext = $postcontext_lines;
140}
141
142
143########################
144#
145# Complex diff algorithm
146#
147########################
148
149{
150 my $match_found;
151 my $discard_lines1;
152 my $discard_lines2;
153
154 sub match { $match_found = 1; }
155 sub discard1 { $discard_lines1++ unless $match_found; }
156 sub discard2 { $discard_lines2++ unless $match_found; }
157
158 sub complex_diff
159 {
160 $match_found = 0;
161 $discard_lines1 = 0;
162 $discard_lines2 = 0;
163
164 # See Diff.pm. Note that even though this call generates a
165 # complete diff of both lookahead buffers, all we use it for
166 # is to figure out how many lines to discard off the front of
167 # each buffer to resync the streams.
168 traverse_sequences( \@lines1, \@lines2,
169 { MATCH => \&match,
170 DISCARD_A => \&discard1,
171 DISCARD_B => \&discard2 });
172
173 die "Lost sync!" if (!$match_found);
174
175 # Since we shouldn't get here unless the first lines of the
176 # buffers are different, then we must discard some lines off
177 # at least one of the buffers.
178 die if ($discard_lines1 == 0 && $discard_lines2 == 0);
179
180 printdiff($discard_lines1, $discard_lines2);
181 }
182}
183
184#######################
185#
186# Simple diff algorithm
187#
188#######################
189
190# Check for a pair of matching lines; if found, generate appropriate
191# diff output.
192sub checkmatch
193{
194 my ($n1, $n2) = @_;
195
196 # Check if two adjacent lines match, to reduce false resyncs
197 # (particularly on unrelated blank lines). This generates
198 # larger-than-necessary diffs when a single line really should be
199 # treated as common; if that bugs you, use Algorithm::Diff.
200 if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
201 printdiff($n1, $n2);
202 }
203}
204
205sub simple_diff
206{
207 # Look for differences of $cnt lines to resync,
208 # increasing $cnt from 1 to $lookahead_lines until we find
209 # something.
210 for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
211 # Check for n lines in one file being replaced by
212 # n lines in the other.
213 return if checkmatch($cnt, $cnt);
214 # Find differences where n lines in one file were
215 # replaced by m lines in the other. We let m = $cnt
216 # and iterate for n = 0 to $cnt-1.
217 for (my $n = 0; $n < $cnt; ++$n) {
218 return if checkmatch($n, $cnt);
219 return if checkmatch($cnt, $n);
220 }
221 }
222 die "Lost sync!";
223}
224
225# Set the pointer to the appropriate diff function.
226#
227# Note that in either case the function determines how many lines to
228# discard from the front of each lookahead buffer to resync the
229# streams, then prints the appropriate diff output and discards them.
230# After the function returns, it should always be the case that
231# $lines1[0] eq $lines2[0].
232my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
233
234# The main loop.
235while (1) {
236 # keep lookahead buffers topped up
237 fill($fh1, \@lines1);
238 fill($fh2, \@lines2);
239
240 # peek at first line in each buffer
241 my $l1 = $lines1[0];
242 my $l2 = $lines2[0];
243
244 if (!defined($l1) && !defined($l2)) {
245 # reached EOF on both streams: exit
246 exit(1);
247 }
248
249 if ($l1 eq $l2) {
250 # matching lines: delete from lookahead buffer
251 shift @lines1;
252 shift @lines2;
253 # figure out what to do with this line
254 if ($postcontext > 0) {
255 # we're in the post-context of a diff: print it
256 $postcontext--;
257 print ' ', $l1;
258 $lineno1++;
259 $lineno2++;
260 }
261 else {
262 # we're in the middle of a matching region... save this
263 # line for precontext in case we run into a difference.
264 push @precontext, $l1;
265 # don't let precontext buffer get bigger than needed
266 while (@precontext > $precontext_lines) {
267 shift @precontext;
268 $lineno1++;
269 $lineno2++;
270 }
271 }
272 }
273 else {
274 # Mismatch. Deal with it.
275 &$find_diff();
276 }
277}
81my $file1 = $ARGV[0];
82my $file2 = $ARGV[1];
83
84die "Need two args." if (!(defined($file1) && defined($file2)));
85
86my ($fh1, $fh2);
87open($fh1, $file1) or die "Can't open $file1";
88open($fh2, $file2) or die "Can't open $file2";
89
90# buffer of matching lines for pre-diff context
91my @precontext = ();
92# number of post-diff matching lines remaining to print
93my $postcontext = 0;
94
95# lookahead buffers for $file1 and $file2 respectively
96my @lines1 = ();
97my @lines2 = ();
98
99# Next line number available to print from each file. Generally this
100# corresponds to the oldest line in @precontext, or the oldest line in
101# @lines1 and @lines2 if @precontext is empty.
102my $lineno1 = 1;
103my $lineno2 = 1;
104
105# Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
106sub fill
107{
108 my ($fh, $array) = @_;
109
110 while (@$array < $lookahead_lines) {
111 my $line = <$fh>;
112 last if (!defined($line));
113 push @$array, $line;
114 }
115}
116
117# Print and delete n lines from front of given array with given prefix.
118sub printlines
119{
120 my ($array, $n, $prefix) = @_;
121
122 while ($n--) {
123 my $line = shift @$array;
124 last if (!defined($line));
125 print $prefix, $line;
126 }
127}
128
129# Print a difference region where n1 lines of file1 were replaced by
130# n2 lines of file2 (where either n1 or n2 could be zero).
131sub printdiff
132{
133 my ($n1, $n2)= @_;
134
135 # If the precontext buffer is full or we're at the beginning of a
136 # file, then this is a new diff region, so we should print a
137 # header indicating the current line numbers. If we're past the
138 # beginning and the precontext buffer isn't full, then whatever
139 # we're about to print is contiguous with the end of the last
140 # region we printed, so we just concatenate them on the output.
141 if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
142 print "@@ -$lineno1 +$lineno2 @@\n";
143 }
144
145 # Print and clear the precontext buffer.
146 if (@precontext) {
147 print ' ', join(' ', @precontext);
148 $lineno1 += scalar(@precontext);
149 $lineno2 += scalar(@precontext);
150 @precontext = ();
151 }
152
153 # Print the differing lines.
154 printlines(\@lines1, $n1, '-');
155 printlines(\@lines2, $n2, '+');
156 $lineno1 += $n1;
157 $lineno2 += $n2;
158
159 # Set $postcontext to print the next $postcontext_lines matching lines.
160 $postcontext = $postcontext_lines;
161}
162
163
164########################
165#
166# Complex diff algorithm
167#
168########################
169
170{
171 my $match_found;
172 my $discard_lines1;
173 my $discard_lines2;
174
175 sub match { $match_found = 1; }
176 sub discard1 { $discard_lines1++ unless $match_found; }
177 sub discard2 { $discard_lines2++ unless $match_found; }
178
179 sub complex_diff
180 {
181 $match_found = 0;
182 $discard_lines1 = 0;
183 $discard_lines2 = 0;
184
185 # See Diff.pm. Note that even though this call generates a
186 # complete diff of both lookahead buffers, all we use it for
187 # is to figure out how many lines to discard off the front of
188 # each buffer to resync the streams.
189 traverse_sequences( \@lines1, \@lines2,
190 { MATCH => \&match,
191 DISCARD_A => \&discard1,
192 DISCARD_B => \&discard2 });
193
194 die "Lost sync!" if (!$match_found);
195
196 # Since we shouldn't get here unless the first lines of the
197 # buffers are different, then we must discard some lines off
198 # at least one of the buffers.
199 die if ($discard_lines1 == 0 && $discard_lines2 == 0);
200
201 printdiff($discard_lines1, $discard_lines2);
202 }
203}
204
205#######################
206#
207# Simple diff algorithm
208#
209#######################
210
211# Check for a pair of matching lines; if found, generate appropriate
212# diff output.
213sub checkmatch
214{
215 my ($n1, $n2) = @_;
216
217 # Check if two adjacent lines match, to reduce false resyncs
218 # (particularly on unrelated blank lines). This generates
219 # larger-than-necessary diffs when a single line really should be
220 # treated as common; if that bugs you, use Algorithm::Diff.
221 if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
222 printdiff($n1, $n2);
223 }
224}
225
226sub simple_diff
227{
228 # Look for differences of $cnt lines to resync,
229 # increasing $cnt from 1 to $lookahead_lines until we find
230 # something.
231 for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
232 # Check for n lines in one file being replaced by
233 # n lines in the other.
234 return if checkmatch($cnt, $cnt);
235 # Find differences where n lines in one file were
236 # replaced by m lines in the other. We let m = $cnt
237 # and iterate for n = 0 to $cnt-1.
238 for (my $n = 0; $n < $cnt; ++$n) {
239 return if checkmatch($n, $cnt);
240 return if checkmatch($cnt, $n);
241 }
242 }
243 die "Lost sync!";
244}
245
246# Set the pointer to the appropriate diff function.
247#
248# Note that in either case the function determines how many lines to
249# discard from the front of each lookahead buffer to resync the
250# streams, then prints the appropriate diff output and discards them.
251# After the function returns, it should always be the case that
252# $lines1[0] eq $lines2[0].
253my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
254
255# The main loop.
256while (1) {
257 # keep lookahead buffers topped up
258 fill($fh1, \@lines1);
259 fill($fh2, \@lines2);
260
261 # peek at first line in each buffer
262 my $l1 = $lines1[0];
263 my $l2 = $lines2[0];
264
265 if (!defined($l1) && !defined($l2)) {
266 # reached EOF on both streams: exit
267 exit(1);
268 }
269
270 if ($l1 eq $l2) {
271 # matching lines: delete from lookahead buffer
272 shift @lines1;
273 shift @lines2;
274 # figure out what to do with this line
275 if ($postcontext > 0) {
276 # we're in the post-context of a diff: print it
277 $postcontext--;
278 print ' ', $l1;
279 $lineno1++;
280 $lineno2++;
281 }
282 else {
283 # we're in the middle of a matching region... save this
284 # line for precontext in case we run into a difference.
285 push @precontext, $l1;
286 # don't let precontext buffer get bigger than needed
287 while (@precontext > $precontext_lines) {
288 shift @precontext;
289 $lineno1++;
290 $lineno2++;
291 }
292 }
293 }
294 else {
295 # Mismatch. Deal with it.
296 &$find_diff();
297 }
298}