rundiff revision 6668
1196Sstever@eecs.umich.edu#! /usr/bin/env perl 22Ssraasch@umich.edu 3196Sstever@eecs.umich.edu# Copyright (c) 2003 The Regents of The University of Michigan 42Ssraasch@umich.edu# All rights reserved. 52Ssraasch@umich.edu# 6196Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without 7196Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are 8196Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright 9196Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer; 10196Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright 11196Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the 12196Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution; 13196Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its 14196Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from 15196Sstever@eecs.umich.edu# this software without specific prior written permission. 162Ssraasch@umich.edu# 17196Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18196Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19196Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20196Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21196Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22196Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23196Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24196Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25196Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26196Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27196Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 282665Ssaidi@eecs.umich.edu# 292665Ssaidi@eecs.umich.edu# Authors: Nathan Binkert 302665Ssaidi@eecs.umich.edu# Steve Reinhardt 31196Sstever@eecs.umich.edu 32196Sstever@eecs.umich.edu# Diff two streams. 332Ssraasch@umich.edu# 34196Sstever@eecs.umich.edu# Unlike regular diff, this script does not read in the entire input 35196Sstever@eecs.umich.edu# before doing a diff, so it can be used on lengthy outputs piped from 36196Sstever@eecs.umich.edu# other programs (e.g., M5 traces). The best way to do this is to 37196Sstever@eecs.umich.edu# take advantage of the power of Perl's open function, which will 38196Sstever@eecs.umich.edu# automatically fork a subprocess if the last character in the 39196Sstever@eecs.umich.edu# "filename" is a pipe (|). Thus to compare the instruction traces 40196Sstever@eecs.umich.edu# from two versions of m5 (m5a and m5b), you can do this: 41196Sstever@eecs.umich.edu# 422783Sktlim@umich.edu# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |' 432Ssraasch@umich.edu# 442Ssraasch@umich.edu 45196Sstever@eecs.umich.eduuse strict; 466115Ssteve.reinhardt@amd.comuse FileHandle; 47354Sstever@eecs.umich.eduuse Getopt::Std; 48354Sstever@eecs.umich.edu 49354Sstever@eecs.umich.edu# 50354Sstever@eecs.umich.edu# Options: 51354Sstever@eecs.umich.edu# -c <n> : print n lines of context before & after changes 52354Sstever@eecs.umich.edu# -l <n> : use n lines of lookahead 53354Sstever@eecs.umich.edu# -x : use "complex" diff from Algorithm::Diff (see below) 54354Sstever@eecs.umich.edu# 55354Sstever@eecs.umich.eduour ($opt_c, $opt_l, $opt_x); 56354Sstever@eecs.umich.edugetopts('c:l:x'); 57354Sstever@eecs.umich.edu 58196Sstever@eecs.umich.edu# 59196Sstever@eecs.umich.edu# For the highest-quality (minimal) diffs, we can use the 60354Sstever@eecs.umich.edu# Algorithm::Diff package. By default, a built-in, simple, and 61354Sstever@eecs.umich.edu# generally quite adequate algorithm will be used. If you have 62354Sstever@eecs.umich.edu# Algorithm::Diff installed on your system, and don't mind having the 63354Sstever@eecs.umich.edu# script go slower (like 3-4x slower, based on informal observation), 64354Sstever@eecs.umich.edu# then specify '-x' on the command line to use it. 65354Sstever@eecs.umich.edumy $use_complexdiff = defined($opt_x); 662Ssraasch@umich.edu 67354Sstever@eecs.umich.eduif ($use_complexdiff) { 68354Sstever@eecs.umich.edu # Don't use 'use', as that's a compile-time option and will fail 69354Sstever@eecs.umich.edu # on systems that don't have Algorithm::Diff installed even if 70354Sstever@eecs.umich.edu # $use_complexdiff is false. 'require' is evaluated at runtime, 71354Sstever@eecs.umich.edu # so it's OK. 72354Sstever@eecs.umich.edu require Algorithm::Diff; 73354Sstever@eecs.umich.edu import Algorithm::Diff qw(traverse_sequences); 74354Sstever@eecs.umich.edu}; 752Ssraasch@umich.edu 76354Sstever@eecs.umich.edumy $lookahead_lines = $opt_l || 200; 77354Sstever@eecs.umich.edu 78354Sstever@eecs.umich.edu# in theory you could have different amounts of context before and 79354Sstever@eecs.umich.edu# after a diff, but until someone needs that there's only one arg to 80354Sstever@eecs.umich.edu# set both. 81354Sstever@eecs.umich.edumy $precontext_lines = $opt_c || 3; 82354Sstever@eecs.umich.edumy $postcontext_lines = $precontext_lines; 832Ssraasch@umich.edu 84196Sstever@eecs.umich.edumy $file1 = $ARGV[0]; 85196Sstever@eecs.umich.edumy $file2 = $ARGV[1]; 862Ssraasch@umich.edu 87196Sstever@eecs.umich.edudie "Need two args." if (!(defined($file1) && defined($file2))); 882Ssraasch@umich.edu 89196Sstever@eecs.umich.edumy ($fh1, $fh2); 90196Sstever@eecs.umich.eduopen($fh1, $file1) or die "Can't open $file1"; 91196Sstever@eecs.umich.eduopen($fh2, $file2) or die "Can't open $file2"; 922Ssraasch@umich.edu 93408Sstever@eecs.umich.edu# print files to output so we know which is which 94408Sstever@eecs.umich.eduprint "-$file1\n"; 95408Sstever@eecs.umich.eduprint "+$file2\n"; 96408Sstever@eecs.umich.edu 97196Sstever@eecs.umich.edu# buffer of matching lines for pre-diff context 98196Sstever@eecs.umich.edumy @precontext = (); 99196Sstever@eecs.umich.edu# number of post-diff matching lines remaining to print 100196Sstever@eecs.umich.edumy $postcontext = 0; 1012Ssraasch@umich.edu 102196Sstever@eecs.umich.edu# lookahead buffers for $file1 and $file2 respectively 103196Sstever@eecs.umich.edumy @lines1 = (); 104196Sstever@eecs.umich.edumy @lines2 = (); 1052Ssraasch@umich.edu 106196Sstever@eecs.umich.edu# Next line number available to print from each file. Generally this 107196Sstever@eecs.umich.edu# corresponds to the oldest line in @precontext, or the oldest line in 108196Sstever@eecs.umich.edu# @lines1 and @lines2 if @precontext is empty. 109196Sstever@eecs.umich.edumy $lineno1 = 1; 110196Sstever@eecs.umich.edumy $lineno2 = 1; 1112Ssraasch@umich.edu 112196Sstever@eecs.umich.edu# Fill a lookahead buffer to $lookahead_lines lines (or until EOF). 113196Sstever@eecs.umich.edusub fill 114196Sstever@eecs.umich.edu{ 115196Sstever@eecs.umich.edu my ($fh, $array) = @_; 1162Ssraasch@umich.edu 117196Sstever@eecs.umich.edu while (@$array < $lookahead_lines) { 118196Sstever@eecs.umich.edu my $line = <$fh>; 119196Sstever@eecs.umich.edu last if (!defined($line)); 120196Sstever@eecs.umich.edu push @$array, $line; 1212Ssraasch@umich.edu } 1222Ssraasch@umich.edu} 1232Ssraasch@umich.edu 124196Sstever@eecs.umich.edu# Print and delete n lines from front of given array with given prefix. 125196Sstever@eecs.umich.edusub printlines 126196Sstever@eecs.umich.edu{ 127196Sstever@eecs.umich.edu my ($array, $n, $prefix) = @_; 1282Ssraasch@umich.edu 129196Sstever@eecs.umich.edu while ($n--) { 130196Sstever@eecs.umich.edu my $line = shift @$array; 131196Sstever@eecs.umich.edu last if (!defined($line)); 132196Sstever@eecs.umich.edu print $prefix, $line; 133196Sstever@eecs.umich.edu } 1342Ssraasch@umich.edu} 1352Ssraasch@umich.edu 136196Sstever@eecs.umich.edu# Print a difference region where n1 lines of file1 were replaced by 137196Sstever@eecs.umich.edu# n2 lines of file2 (where either n1 or n2 could be zero). 138196Sstever@eecs.umich.edusub printdiff 139196Sstever@eecs.umich.edu{ 140196Sstever@eecs.umich.edu my ($n1, $n2)= @_; 1412Ssraasch@umich.edu 142196Sstever@eecs.umich.edu # If the precontext buffer is full or we're at the beginning of a 143196Sstever@eecs.umich.edu # file, then this is a new diff region, so we should print a 144196Sstever@eecs.umich.edu # header indicating the current line numbers. If we're past the 145196Sstever@eecs.umich.edu # beginning and the precontext buffer isn't full, then whatever 146196Sstever@eecs.umich.edu # we're about to print is contiguous with the end of the last 147196Sstever@eecs.umich.edu # region we printed, so we just concatenate them on the output. 148196Sstever@eecs.umich.edu if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) { 149196Sstever@eecs.umich.edu print "@@ -$lineno1 +$lineno2 @@\n"; 1502Ssraasch@umich.edu } 1512Ssraasch@umich.edu 152196Sstever@eecs.umich.edu # Print and clear the precontext buffer. 153196Sstever@eecs.umich.edu if (@precontext) { 154196Sstever@eecs.umich.edu print ' ', join(' ', @precontext); 155196Sstever@eecs.umich.edu $lineno1 += scalar(@precontext); 156196Sstever@eecs.umich.edu $lineno2 += scalar(@precontext); 157196Sstever@eecs.umich.edu @precontext = (); 158196Sstever@eecs.umich.edu } 1592Ssraasch@umich.edu 160196Sstever@eecs.umich.edu # Print the differing lines. 161196Sstever@eecs.umich.edu printlines(\@lines1, $n1, '-'); 162196Sstever@eecs.umich.edu printlines(\@lines2, $n2, '+'); 163196Sstever@eecs.umich.edu $lineno1 += $n1; 164196Sstever@eecs.umich.edu $lineno2 += $n2; 165196Sstever@eecs.umich.edu 166196Sstever@eecs.umich.edu # Set $postcontext to print the next $postcontext_lines matching lines. 167196Sstever@eecs.umich.edu $postcontext = $postcontext_lines; 1686115Ssteve.reinhardt@amd.com 1696668Ssteve.reinhardt@amd.com # Normally we flush after the postcontext lines are printed, but if 1706668Ssteve.reinhardt@amd.com # the user has decreed that there aren't any we need to flush now 1716668Ssteve.reinhardt@amd.com if ($postcontext == 0) { 1726668Ssteve.reinhardt@amd.com STDOUT->flush(); 1736668Ssteve.reinhardt@amd.com } 1742Ssraasch@umich.edu} 1752Ssraasch@umich.edu 1762Ssraasch@umich.edu 177196Sstever@eecs.umich.edu######################## 178196Sstever@eecs.umich.edu# 179196Sstever@eecs.umich.edu# Complex diff algorithm 180196Sstever@eecs.umich.edu# 181196Sstever@eecs.umich.edu######################## 1822Ssraasch@umich.edu 183196Sstever@eecs.umich.edu{ 184196Sstever@eecs.umich.edu my $match_found; 185196Sstever@eecs.umich.edu my $discard_lines1; 186196Sstever@eecs.umich.edu my $discard_lines2; 187196Sstever@eecs.umich.edu 188196Sstever@eecs.umich.edu sub match { $match_found = 1; } 189196Sstever@eecs.umich.edu sub discard1 { $discard_lines1++ unless $match_found; } 190196Sstever@eecs.umich.edu sub discard2 { $discard_lines2++ unless $match_found; } 191196Sstever@eecs.umich.edu 192196Sstever@eecs.umich.edu sub complex_diff 193196Sstever@eecs.umich.edu { 194196Sstever@eecs.umich.edu $match_found = 0; 195196Sstever@eecs.umich.edu $discard_lines1 = 0; 196196Sstever@eecs.umich.edu $discard_lines2 = 0; 197196Sstever@eecs.umich.edu 198196Sstever@eecs.umich.edu # See Diff.pm. Note that even though this call generates a 199196Sstever@eecs.umich.edu # complete diff of both lookahead buffers, all we use it for 200196Sstever@eecs.umich.edu # is to figure out how many lines to discard off the front of 201196Sstever@eecs.umich.edu # each buffer to resync the streams. 202196Sstever@eecs.umich.edu traverse_sequences( \@lines1, \@lines2, 203196Sstever@eecs.umich.edu { MATCH => \&match, 204196Sstever@eecs.umich.edu DISCARD_A => \&discard1, 205196Sstever@eecs.umich.edu DISCARD_B => \&discard2 }); 206196Sstever@eecs.umich.edu 207404Sstever@eecs.umich.edu if (!$match_found) { 208404Sstever@eecs.umich.edu printdiff(scalar(@lines1), scalar(@lines2)); 209404Sstever@eecs.umich.edu die "Lost sync!"; 210404Sstever@eecs.umich.edu } 211196Sstever@eecs.umich.edu 212196Sstever@eecs.umich.edu # Since we shouldn't get here unless the first lines of the 213196Sstever@eecs.umich.edu # buffers are different, then we must discard some lines off 214196Sstever@eecs.umich.edu # at least one of the buffers. 215196Sstever@eecs.umich.edu die if ($discard_lines1 == 0 && $discard_lines2 == 0); 216196Sstever@eecs.umich.edu 217196Sstever@eecs.umich.edu printdiff($discard_lines1, $discard_lines2); 218196Sstever@eecs.umich.edu } 2192Ssraasch@umich.edu} 2202Ssraasch@umich.edu 221196Sstever@eecs.umich.edu####################### 222196Sstever@eecs.umich.edu# 223196Sstever@eecs.umich.edu# Simple diff algorithm 224196Sstever@eecs.umich.edu# 225196Sstever@eecs.umich.edu####################### 2262Ssraasch@umich.edu 227196Sstever@eecs.umich.edu# Check for a pair of matching lines; if found, generate appropriate 228196Sstever@eecs.umich.edu# diff output. 229196Sstever@eecs.umich.edusub checkmatch 230196Sstever@eecs.umich.edu{ 231196Sstever@eecs.umich.edu my ($n1, $n2) = @_; 2322Ssraasch@umich.edu 233196Sstever@eecs.umich.edu # Check if two adjacent lines match, to reduce false resyncs 234196Sstever@eecs.umich.edu # (particularly on unrelated blank lines). This generates 235196Sstever@eecs.umich.edu # larger-than-necessary diffs when a single line really should be 236196Sstever@eecs.umich.edu # treated as common; if that bugs you, use Algorithm::Diff. 237196Sstever@eecs.umich.edu if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) { 238196Sstever@eecs.umich.edu printdiff($n1, $n2); 239404Sstever@eecs.umich.edu return 1; 240196Sstever@eecs.umich.edu } 241404Sstever@eecs.umich.edu 242404Sstever@eecs.umich.edu return 0; 243196Sstever@eecs.umich.edu} 244196Sstever@eecs.umich.edu 245196Sstever@eecs.umich.edusub simple_diff 246196Sstever@eecs.umich.edu{ 247196Sstever@eecs.umich.edu # Look for differences of $cnt lines to resync, 248196Sstever@eecs.umich.edu # increasing $cnt from 1 to $lookahead_lines until we find 249196Sstever@eecs.umich.edu # something. 250196Sstever@eecs.umich.edu for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) { 251196Sstever@eecs.umich.edu # Check for n lines in one file being replaced by 252196Sstever@eecs.umich.edu # n lines in the other. 253196Sstever@eecs.umich.edu return if checkmatch($cnt, $cnt); 254196Sstever@eecs.umich.edu # Find differences where n lines in one file were 255196Sstever@eecs.umich.edu # replaced by m lines in the other. We let m = $cnt 256196Sstever@eecs.umich.edu # and iterate for n = 0 to $cnt-1. 257196Sstever@eecs.umich.edu for (my $n = 0; $n < $cnt; ++$n) { 258196Sstever@eecs.umich.edu return if checkmatch($n, $cnt); 259196Sstever@eecs.umich.edu return if checkmatch($cnt, $n); 260196Sstever@eecs.umich.edu } 261196Sstever@eecs.umich.edu } 262404Sstever@eecs.umich.edu 263404Sstever@eecs.umich.edu printdiff(scalar(@lines1), scalar(@lines2)); 264196Sstever@eecs.umich.edu die "Lost sync!"; 265196Sstever@eecs.umich.edu} 266196Sstever@eecs.umich.edu 267196Sstever@eecs.umich.edu# Set the pointer to the appropriate diff function. 268196Sstever@eecs.umich.edu# 269196Sstever@eecs.umich.edu# Note that in either case the function determines how many lines to 270196Sstever@eecs.umich.edu# discard from the front of each lookahead buffer to resync the 271196Sstever@eecs.umich.edu# streams, then prints the appropriate diff output and discards them. 272196Sstever@eecs.umich.edu# After the function returns, it should always be the case that 273196Sstever@eecs.umich.edu# $lines1[0] eq $lines2[0]. 274196Sstever@eecs.umich.edumy $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff; 275196Sstever@eecs.umich.edu 276196Sstever@eecs.umich.edu# The main loop. 277196Sstever@eecs.umich.eduwhile (1) { 278196Sstever@eecs.umich.edu # keep lookahead buffers topped up 279196Sstever@eecs.umich.edu fill($fh1, \@lines1); 280196Sstever@eecs.umich.edu fill($fh2, \@lines2); 281196Sstever@eecs.umich.edu 282196Sstever@eecs.umich.edu # peek at first line in each buffer 283196Sstever@eecs.umich.edu my $l1 = $lines1[0]; 284196Sstever@eecs.umich.edu my $l2 = $lines2[0]; 285196Sstever@eecs.umich.edu 286196Sstever@eecs.umich.edu if (!defined($l1) && !defined($l2)) { 287196Sstever@eecs.umich.edu # reached EOF on both streams: exit 288196Sstever@eecs.umich.edu exit(1); 289196Sstever@eecs.umich.edu } 290196Sstever@eecs.umich.edu 291196Sstever@eecs.umich.edu if ($l1 eq $l2) { 292196Sstever@eecs.umich.edu # matching lines: delete from lookahead buffer 293196Sstever@eecs.umich.edu shift @lines1; 294196Sstever@eecs.umich.edu shift @lines2; 295196Sstever@eecs.umich.edu # figure out what to do with this line 296196Sstever@eecs.umich.edu if ($postcontext > 0) { 297196Sstever@eecs.umich.edu # we're in the post-context of a diff: print it 298196Sstever@eecs.umich.edu print ' ', $l1; 299196Sstever@eecs.umich.edu $lineno1++; 300196Sstever@eecs.umich.edu $lineno2++; 3016668Ssteve.reinhardt@amd.com if (--$postcontext == 0) { 3026668Ssteve.reinhardt@amd.com STDOUT->flush(); 3036668Ssteve.reinhardt@amd.com } 304196Sstever@eecs.umich.edu } 305196Sstever@eecs.umich.edu else { 306196Sstever@eecs.umich.edu # we're in the middle of a matching region... save this 307196Sstever@eecs.umich.edu # line for precontext in case we run into a difference. 308196Sstever@eecs.umich.edu push @precontext, $l1; 309196Sstever@eecs.umich.edu # don't let precontext buffer get bigger than needed 310196Sstever@eecs.umich.edu while (@precontext > $precontext_lines) { 311196Sstever@eecs.umich.edu shift @precontext; 312196Sstever@eecs.umich.edu $lineno1++; 313196Sstever@eecs.umich.edu $lineno2++; 314196Sstever@eecs.umich.edu } 315196Sstever@eecs.umich.edu } 316196Sstever@eecs.umich.edu } 317196Sstever@eecs.umich.edu else { 318196Sstever@eecs.umich.edu # Mismatch. Deal with it. 319196Sstever@eecs.umich.edu &$find_diff(); 320196Sstever@eecs.umich.edu } 321196Sstever@eecs.umich.edu} 322