rundiff revision 408
16145Snate@binkert.org#! /usr/bin/env perl 26145Snate@binkert.org 36145Snate@binkert.org# Copyright (c) 2003 The Regents of The University of Michigan 46145Snate@binkert.org# All rights reserved. 56145Snate@binkert.org# 66145Snate@binkert.org# Redistribution and use in source and binary forms, with or without 76145Snate@binkert.org# modification, are permitted provided that the following conditions are 86145Snate@binkert.org# met: redistributions of source code must retain the above copyright 96145Snate@binkert.org# notice, this list of conditions and the following disclaimer; 106145Snate@binkert.org# redistributions in binary form must reproduce the above copyright 116145Snate@binkert.org# notice, this list of conditions and the following disclaimer in the 126145Snate@binkert.org# documentation and/or other materials provided with the distribution; 136145Snate@binkert.org# neither the name of the copyright holders nor the names of its 146145Snate@binkert.org# contributors may be used to endorse or promote products derived from 156145Snate@binkert.org# this software without specific prior written permission. 166145Snate@binkert.org# 176145Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 186145Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 196145Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 206145Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 216145Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 226145Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 236145Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 246145Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 256145Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 266145Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 276145Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 286145Snate@binkert.org 297039Snate@binkert.org# Diff two streams. 307039Snate@binkert.org# 316145Snate@binkert.org# Unlike regular diff, this script does not read in the entire input 327002Snate@binkert.org# before doing a diff, so it can be used on lengthy outputs piped from 337002Snate@binkert.org# other programs (e.g., M5 traces). The best way to do this is to 347453Snate@binkert.org# take advantage of the power of Perl's open function, which will 359466Snilay@cs.wisc.edu# automatically fork a subprocess if the last character in the 366145Snate@binkert.org# "filename" is a pipe (|). Thus to compare the instruction traces 376145Snate@binkert.org# from two versions of m5 (m5a and m5b), you can do this: 387453Snate@binkert.org# 396145Snate@binkert.org# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |' 407453Snate@binkert.org# 417039Snate@binkert.org 427039Snate@binkert.orguse strict; 439508Snilay@cs.wisc.edu 449466Snilay@cs.wisc.eduuse Getopt::Std; 459466Snilay@cs.wisc.edu 469508Snilay@cs.wisc.edu# 477453Snate@binkert.org# Options: 487453Snate@binkert.org# -c <n> : print n lines of context before & after changes 497453Snate@binkert.org# -l <n> : use n lines of lookahead 507453Snate@binkert.org# -x : use "complex" diff from Algorithm::Diff (see below) 517453Snate@binkert.org# 529508Snilay@cs.wisc.eduour ($opt_c, $opt_l, $opt_x); 537453Snate@binkert.orggetopts('c:l:x'); 546145Snate@binkert.org 557039Snate@binkert.org# 566145Snate@binkert.org# For the highest-quality (minimal) diffs, we can use the 577039Snate@binkert.org# Algorithm::Diff package. By default, a built-in, simple, and 587039Snate@binkert.org# generally quite adequate algorithm will be used. If you have 597973Snilay@cs.wisc.edu# Algorithm::Diff installed on your system, and don't mind having the 607973Snilay@cs.wisc.edu# script go slower (like 3-4x slower, based on informal observation), 616145Snate@binkert.org# then specify '-x' on the command line to use it. 629302Snilay@cs.wisc.edumy $use_complexdiff = defined($opt_x); 639302Snilay@cs.wisc.edu 649302Snilay@cs.wisc.eduif ($use_complexdiff) { 659302Snilay@cs.wisc.edu # Don't use 'use', as that's a compile-time option and will fail 669302Snilay@cs.wisc.edu # on systems that don't have Algorithm::Diff installed even if 679302Snilay@cs.wisc.edu # $use_complexdiff is false. 'require' is evaluated at runtime, 689302Snilay@cs.wisc.edu # so it's OK. 699302Snilay@cs.wisc.edu require Algorithm::Diff; 709302Snilay@cs.wisc.edu import Algorithm::Diff qw(traverse_sequences); 719302Snilay@cs.wisc.edu}; 729302Snilay@cs.wisc.edu 739302Snilay@cs.wisc.edumy $lookahead_lines = $opt_l || 200; 7410074Snilay@cs.wisc.edu 7510074Snilay@cs.wisc.edu# in theory you could have different amounts of context before and 7610074Snilay@cs.wisc.edu# after a diff, but until someone needs that there's only one arg to 7710074Snilay@cs.wisc.edu# set both. 7810074Snilay@cs.wisc.edumy $precontext_lines = $opt_c || 3; 7910074Snilay@cs.wisc.edumy $postcontext_lines = $precontext_lines; 8010074Snilay@cs.wisc.edu 819508Snilay@cs.wisc.edumy $file1 = $ARGV[0]; 829465Snilay@cs.wisc.edumy $file2 = $ARGV[1]; 839508Snilay@cs.wisc.edu 849508Snilay@cs.wisc.edudie "Need two args." if (!(defined($file1) && defined($file2))); 856145Snate@binkert.org 869508Snilay@cs.wisc.edumy ($fh1, $fh2); 879508Snilay@cs.wisc.eduopen($fh1, $file1) or die "Can't open $file1"; 886145Snate@binkert.orgopen($fh2, $file2) or die "Can't open $file2"; 897039Snate@binkert.org 909508Snilay@cs.wisc.edu# print files to output so we know which is which 919508Snilay@cs.wisc.eduprint "-$file1\n"; 929508Snilay@cs.wisc.eduprint "+$file2\n"; 936145Snate@binkert.org 946145Snate@binkert.org# buffer of matching lines for pre-diff context 957039Snate@binkert.orgmy @precontext = (); 967039Snate@binkert.org# number of post-diff matching lines remaining to print 976145Snate@binkert.orgmy $postcontext = 0; 987039Snate@binkert.org 997039Snate@binkert.org# lookahead buffers for $file1 and $file2 respectively 1007039Snate@binkert.orgmy @lines1 = (); 1016145Snate@binkert.orgmy @lines2 = (); 1026145Snate@binkert.org 1037039Snate@binkert.org# Next line number available to print from each file. Generally this 104# corresponds to the oldest line in @precontext, or the oldest line in 105# @lines1 and @lines2 if @precontext is empty. 106my $lineno1 = 1; 107my $lineno2 = 1; 108 109# Fill a lookahead buffer to $lookahead_lines lines (or until EOF). 110sub fill 111{ 112 my ($fh, $array) = @_; 113 114 while (@$array < $lookahead_lines) { 115 my $line = <$fh>; 116 last if (!defined($line)); 117 push @$array, $line; 118 } 119} 120 121# Print and delete n lines from front of given array with given prefix. 122sub printlines 123{ 124 my ($array, $n, $prefix) = @_; 125 126 while ($n--) { 127 my $line = shift @$array; 128 last if (!defined($line)); 129 print $prefix, $line; 130 } 131} 132 133# Print a difference region where n1 lines of file1 were replaced by 134# n2 lines of file2 (where either n1 or n2 could be zero). 135sub printdiff 136{ 137 my ($n1, $n2)= @_; 138 139 # If the precontext buffer is full or we're at the beginning of a 140 # file, then this is a new diff region, so we should print a 141 # header indicating the current line numbers. If we're past the 142 # beginning and the precontext buffer isn't full, then whatever 143 # we're about to print is contiguous with the end of the last 144 # region we printed, so we just concatenate them on the output. 145 if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) { 146 print "@@ -$lineno1 +$lineno2 @@\n"; 147 } 148 149 # Print and clear the precontext buffer. 150 if (@precontext) { 151 print ' ', join(' ', @precontext); 152 $lineno1 += scalar(@precontext); 153 $lineno2 += scalar(@precontext); 154 @precontext = (); 155 } 156 157 # Print the differing lines. 158 printlines(\@lines1, $n1, '-'); 159 printlines(\@lines2, $n2, '+'); 160 $lineno1 += $n1; 161 $lineno2 += $n2; 162 163 # Set $postcontext to print the next $postcontext_lines matching lines. 164 $postcontext = $postcontext_lines; 165} 166 167 168######################## 169# 170# Complex diff algorithm 171# 172######################## 173 174{ 175 my $match_found; 176 my $discard_lines1; 177 my $discard_lines2; 178 179 sub match { $match_found = 1; } 180 sub discard1 { $discard_lines1++ unless $match_found; } 181 sub discard2 { $discard_lines2++ unless $match_found; } 182 183 sub complex_diff 184 { 185 $match_found = 0; 186 $discard_lines1 = 0; 187 $discard_lines2 = 0; 188 189 # See Diff.pm. Note that even though this call generates a 190 # complete diff of both lookahead buffers, all we use it for 191 # is to figure out how many lines to discard off the front of 192 # each buffer to resync the streams. 193 traverse_sequences( \@lines1, \@lines2, 194 { MATCH => \&match, 195 DISCARD_A => \&discard1, 196 DISCARD_B => \&discard2 }); 197 198 if (!$match_found) { 199 printdiff(scalar(@lines1), scalar(@lines2)); 200 die "Lost sync!"; 201 } 202 203 # Since we shouldn't get here unless the first lines of the 204 # buffers are different, then we must discard some lines off 205 # at least one of the buffers. 206 die if ($discard_lines1 == 0 && $discard_lines2 == 0); 207 208 printdiff($discard_lines1, $discard_lines2); 209 } 210} 211 212####################### 213# 214# Simple diff algorithm 215# 216####################### 217 218# Check for a pair of matching lines; if found, generate appropriate 219# diff output. 220sub checkmatch 221{ 222 my ($n1, $n2) = @_; 223 224 # Check if two adjacent lines match, to reduce false resyncs 225 # (particularly on unrelated blank lines). This generates 226 # larger-than-necessary diffs when a single line really should be 227 # treated as common; if that bugs you, use Algorithm::Diff. 228 if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) { 229 printdiff($n1, $n2); 230 return 1; 231 } 232 233 return 0; 234} 235 236sub simple_diff 237{ 238 # Look for differences of $cnt lines to resync, 239 # increasing $cnt from 1 to $lookahead_lines until we find 240 # something. 241 for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) { 242 # Check for n lines in one file being replaced by 243 # n lines in the other. 244 return if checkmatch($cnt, $cnt); 245 # Find differences where n lines in one file were 246 # replaced by m lines in the other. We let m = $cnt 247 # and iterate for n = 0 to $cnt-1. 248 for (my $n = 0; $n < $cnt; ++$n) { 249 return if checkmatch($n, $cnt); 250 return if checkmatch($cnt, $n); 251 } 252 } 253 254 printdiff(scalar(@lines1), scalar(@lines2)); 255 die "Lost sync!"; 256} 257 258# Set the pointer to the appropriate diff function. 259# 260# Note that in either case the function determines how many lines to 261# discard from the front of each lookahead buffer to resync the 262# streams, then prints the appropriate diff output and discards them. 263# After the function returns, it should always be the case that 264# $lines1[0] eq $lines2[0]. 265my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff; 266 267# The main loop. 268while (1) { 269 # keep lookahead buffers topped up 270 fill($fh1, \@lines1); 271 fill($fh2, \@lines2); 272 273 # peek at first line in each buffer 274 my $l1 = $lines1[0]; 275 my $l2 = $lines2[0]; 276 277 if (!defined($l1) && !defined($l2)) { 278 # reached EOF on both streams: exit 279 exit(1); 280 } 281 282 if ($l1 eq $l2) { 283 # matching lines: delete from lookahead buffer 284 shift @lines1; 285 shift @lines2; 286 # figure out what to do with this line 287 if ($postcontext > 0) { 288 # we're in the post-context of a diff: print it 289 $postcontext--; 290 print ' ', $l1; 291 $lineno1++; 292 $lineno2++; 293 } 294 else { 295 # we're in the middle of a matching region... save this 296 # line for precontext in case we run into a difference. 297 push @precontext, $l1; 298 # don't let precontext buffer get bigger than needed 299 while (@precontext > $precontext_lines) { 300 shift @precontext; 301 $lineno1++; 302 $lineno2++; 303 } 304 } 305 } 306 else { 307 # Mismatch. Deal with it. 308 &$find_diff(); 309 } 310} 311