rundiff revision 2:7ab458527c41
1#!/usr/bin/perl 2 3# Copyright (c) 2001 Nathan L. Binkert 4# All rights reserved. 5# 6# Permission to redistribute, use, copy, and modify this software 7# without fee is hereby granted, provided that the following 8# conditions are met: 9# 10# 1. This entire notice is included in all source code copies of any 11# software which is or includes a copy or modification of this 12# software. 13# 2. The name of the author may not be used to endorse or promote 14# products derived from this software without specific prior 15# written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 21# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 23# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29 30use Algorithm::Diff qw(diff); 31use vars qw ($opt_C $opt_c $opt_u $opt_U); 32 33$opt_u = ""; 34$opt_c = undef; 35 36$diffsize = 2000; 37# After we've read up to a certain point in each file, the number of items 38# we've read from each file will differ by $FLD (could be 0) 39my $File_Length_Difference = 0; 40my $Context_Lines = 9; 41 42$progname = $0; 43if (scalar(@ARGV) != 2) { 44 usage(); 45} 46 47my ($filename1, $filename2); 48($filename1, $start1) = parse_filearg($ARGV[0]); 49($filename2, $start2) = parse_filearg($ARGV[1]); 50 51if ($filename1 eq "-" && $filename2 eq "-") { 52 die "Only one of the inputs may be standard in\n"; 53} 54 55my ($file1, $file2); 56if ($filename1 eq "-") { 57 $file1 = STDIN; 58} else { 59 open(FILE1, $filename1) || die "can't open $file1: $!\n"; 60 $file1 = FILE1; 61} 62 63if ($filename2 eq "-") { 64 $file2 = STDIN; 65} else { 66 open(FILE2, $filename2) || die "can't open $file2: $!\n"; 67 $file2 = FILE2; 68} 69 70my $file_offset1 = ffw($file1, $start1); 71my $file_offset2 = ffw($file2, $start2); 72 73$skip_first = 0; 74my (@buf1, @buf2, @printbuf1, @printbuf2); 75 76$Compare_Ahead = 0; 77 78while (!eof($file1) && !eof($file2)) { 79 my $line1 = <$file1>; chomp $line1; 80 my $line2 = <$file2>; chomp $line2; 81 my $printline1 = $line1; 82 my $printline2 = $line2; 83 84 push @buf1, $line1; 85 push @buf2, $line2; 86 push @printbuf1, $printline1; 87 push @printbuf2, $printline2; 88 89# while ($Compare_Ahead < $Context_Lines) { 90# $line1 = @buf1[$Compare_Ahead]; 91# $line2 = @buf2[$Compare_Ahead]; 92# $line2 =~ s/ *--.*$//; 93# if ($line1 ne $line2) { last; } 94# ++$Compare_Ahead; 95# } 96 97 $line1 = @buf1[$Compare_Ahead]; 98 $line2 = @buf2[$Compare_Ahead]; 99 $line2 =~ s/ *--.*$//; 100 101 if ($line1 ne $line2) { 102 while (!eof($file1) && scalar(@buf1) < $diffsize) { 103 $line = <$file1>; chomp $line; 104 my $printline = $line; 105 106 push @printbuf1, $printline; 107 push @buf1, $line; 108 } 109 110 while (!eof($file2) && scalar(@buf2) < $diffsize) { 111 $line = <$file2>; chomp $line; 112 my $printline = $line; 113# $line =~ s/ *--.*$//; 114 115 push @printbuf2, $printline; 116 push @buf2, $line; 117 } 118 119 my $diffs = diff(\@buf1, \@buf2); 120 121 next unless @$diffs; 122 123 my @hunklist; 124 my ($hunk,$oldhunk); 125 # Loop over hunks. If a hunk overlaps with the last hunk, join them. 126 # Otherwise, print out the old one. 127 foreach my $piece (@$diffs) { 128 $hunk = new Hunk ($piece, $Context_Lines, scalar(@buf1)); 129 next unless $oldhunk; 130 131 if ($hunk->does_overlap($oldhunk)) { 132 $hunk->prepend_hunk($oldhunk); 133 } else { 134 push @hunklist, $oldhunk; 135 } 136 } continue { 137 $oldhunk = $hunk; 138 } 139 140 my $change = 0; 141 while (scalar(@hunklist) && !$change) { 142 $hunk = pop @hunklist; 143 $change = $hunk->{"change"}; 144 } 145 push @hunklist, $hunk; 146 $last_start1 = $hunk->{"start1"}; 147 $last_start2 = $hunk->{"start2"}; 148 $last_end1 = $hunk->{"end1"}; 149 $last_end2 = $hunk->{"end2"}; 150 151 while (scalar(@hunklist)) { 152 $hunk = shift @hunklist; 153# $hunk->output_diff(\@buf1, \@buf2); 154 $hunk->output_diff(\@printbuf1, \@printbuf2); 155 } 156 157 $last_end1 -= $Context_Lines - 1; 158 $last_end2 -= $Context_Lines - 1; 159 $file_offset1 += $last_end1; 160 $file_offset2 += $last_end2; 161 @printbuf1 = @printbuf1[$last_end1..$#printbuf1]; 162 @printbuf2 = @printbuf2[$last_end2..$#printbuf2]; 163 @buf1 = @buf1[$last_end1..$#buf1]; 164 @buf2 = @buf2[$last_end2..$#buf2]; 165 while (scalar(@buf1) > $Context_Lines && 166 scalar(@buf2) > $Context_Lines) { 167 $foo1 = @buf1[$Context_Lines]; 168 $foo2 = @buf2[$Context_Lines]; 169 if (scalar($foo1) != scalar($foo2) || $foo1 ne $foo2) { last; } 170 $foo1 = shift @printbuf1; 171 $foo2 = shift @printbuf2; 172 $foo1 = shift @buf1; 173 $foo2 = shift @buf2; 174 ++$file_offset1; 175 ++$file_offset2; 176 } 177 } else { 178 ++$file_offset1; 179 ++$file_offset2; 180 $foo1 = shift @printbuf1; 181 $foo2 = shift @printbuf2; 182 $foo1 = shift @buf1; 183 $foo2 = shift @buf2; 184 } 185} 186 187close $file1; 188close $file2; 189 190sub ffw() { 191 if (scalar(@_) != 2) { die "improper usage of ffw\n"; } 192 193 my $FILE = $_[0]; 194 my $start = $_[1]; 195 my $count = 0; 196 197 while ($start-- > 0 && !eof($FILE)) { 198 <$FILE>; 199 $count++; 200 } 201 202 if ($start > 0) {die "File too short for ffw amount\n"; } 203 return $count; 204} 205 206sub parse_filearg() { 207 $start = 0; 208 split /:/, @_[0]; 209 if (scalar(@_) > 2) { usage(); } 210 211 $file = $_[0]; 212 if (scalar(@_) > 1) { $start = $_[1]; } 213 214 return ($file, $start); 215} 216 217sub usage() { 218 printf "usage: $progname <file1>[:start] <file2>[:start]\n"; 219 exit 1; 220} 221 222 223# Package Hunk. A Hunk is a group of Blocks which overlap because of the 224# context surrounding each block. (So if we're not using context, every 225# hunk will contain one block.) 226{ 227package Hunk; 228 229sub new { 230# Arg1 is output from &LCS::diff (which corresponds to one Block) 231# Arg2 is the number of items (lines, e.g.,) of context around each block 232# 233# This subroutine changes $File_Length_Difference 234# 235# Fields in a Hunk: 236# blocks - a list of Block objects 237# start - index in file 1 where first block of the hunk starts 238# end - index in file 1 where last block of the hunk ends 239# 240# Variables: 241# before_diff - how much longer file 2 is than file 1 due to all hunks 242# until but NOT including this one 243# after_diff - difference due to all hunks including this one 244 my ($class, $piece, $context_items, $maxlen) = @_; 245 246 my $block = new Block ($piece); # this modifies $FLD! 247 248 my $before_diff = $File_Length_Difference; # BEFORE this hunk 249 my $after_diff = $before_diff + $block->{"length_diff"}; 250 $File_Length_Difference += $block->{"length_diff"}; 251 252 # @remove_array and @insert_array hold the items to insert and remove 253 # Save the start & beginning of each array. If the array doesn't exist 254 # though (e.g., we're only adding items in this block), then figure 255 # out the line number based on the line number of the other file and 256 # the current difference in file lenghts 257 my @remove_array = $block->remove; 258 my @insert_array = $block->insert; 259 my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2, $change); 260 $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1; 261 $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1; 262 $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1; 263 $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1; 264 265 $start1 = $a1 == -1 ? $b1 - $before_diff : $a1; 266 $end1 = $a2 == -1 ? $b2 - $after_diff : $a2; 267 $start2 = $b1 == -1 ? $a1 + $before_diff : $b1; 268 $end2 = $b2 == -1 ? $a2 + $after_diff : $b2; 269 $change = scalar(@remove_array) && scalar(@insert_array); 270 271 # At first, a hunk will have just one Block in it 272 my $hunk = { 273 "start1" => $start1, 274 "start2" => $start2, 275 "end1" => $end1, 276 "end2" => $end2, 277 "maxlen" => $maxlen, 278 "change" => $change, 279 "blocks" => [$block], 280 }; 281 bless $hunk, $class; 282 283 $hunk->flag_context($context_items); 284 285 return $hunk; 286} 287 288# Change the "start" and "end" fields to note that context should be added 289# to this hunk 290sub flag_context { 291 my ($hunk, $context_items) = @_; 292 return unless $context_items; # no context 293 294 # add context before 295 my $start1 = $hunk->{"start1"}; 296 my $num_added = $context_items > $start1 ? $start1 : $context_items; 297 $hunk->{"start1"} -= $num_added; 298 $hunk->{"start2"} -= $num_added; 299 300 # context after 301 my $end1 = $hunk->{"end1"}; 302 $num_added = ($end1+$context_items > $hunk->{"maxlen"}) ? 303 $hunk->{"maxlen"} - $end1 : 304 $context_items; 305 $hunk->{"end1"} += $num_added; 306 $hunk->{"end2"} += $num_added; 307} 308 309# Is there an overlap between hunk arg0 and old hunk arg1? 310# Note: if end of old hunk is one less than beginning of second, they overlap 311sub does_overlap { 312 my ($hunk, $oldhunk) = @_; 313 return "" unless $oldhunk; # first time through, $oldhunk is empty 314 315 # Do I actually need to test both? 316 return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 || 317 $hunk->{"start2"} - $oldhunk->{"end2"} <= 1); 318} 319 320# Prepend hunk arg1 to hunk arg0 321# Note that arg1 isn't updated! Only arg0 is. 322sub prepend_hunk { 323 my ($hunk, $oldhunk) = @_; 324 325 $hunk->{"start1"} = $oldhunk->{"start1"}; 326 $hunk->{"start2"} = $oldhunk->{"start2"}; 327 328 unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}}); 329} 330 331 332# DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO... 333sub output_diff { 334 if (defined $main::opt_u) {&output_unified_diff(@_)} 335 elsif (defined $main::opt_c) {&output_context_diff(@_)} 336 else {die "unknown diff"} 337} 338 339sub output_unified_diff { 340 my ($hunk, $fileref1, $fileref2) = @_; 341 my @blocklist; 342 343 # Calculate item number range. 344 my $range1 = $hunk->unified_range(1, $file_offset1); 345 my $range2 = $hunk->unified_range(2, $file_offset2); 346 print "@@ -$range1 +$range2 @@\n"; 347 348 # Outlist starts containing the hunk of file 1. 349 # Removing an item just means putting a '-' in front of it. 350 # Inserting an item requires getting it from file2 and splicing it in. 351 # We splice in $num_added items. Remove blocks use $num_added because 352 # splicing changed the length of outlist. 353 # We remove $num_removed items. Insert blocks use $num_removed because 354 # their item numbers---corresponding to positions in file *2*--- don't take 355 # removed items into account. 356 my $low = $hunk->{"start1"}; 357 my $hi = $hunk->{"end1"}; 358 my ($num_added, $num_removed) = (0,0); 359 my @outlist = @$fileref1[$low..$hi]; 360 map {s/^/ /} @outlist; # assume it's just context 361 362 foreach my $block (@{$hunk->{"blocks"}}) { 363 foreach my $item ($block->remove) { 364 my $op = $item->{"sign"}; # - 365 my $offset = $item->{"item_no"} - $low + $num_added; 366 $outlist[$offset] =~ s/^ /$op/; 367 $num_removed++; 368 } 369 foreach my $item ($block->insert) { 370 my $op = $item->{"sign"}; # + 371 my $i = $item->{"item_no"}; 372 my $offset = $i - $hunk->{"start2"} + $num_removed; 373 splice(@outlist,$offset,0,"$op$$fileref2[$i]"); 374 $num_added++; 375 } 376 } 377 378 map {s/$/\n/} @outlist; # add \n's 379 print @outlist; 380 381} 382 383sub output_context_diff { 384 my ($hunk, $fileref1, $fileref2) = @_; 385 my @blocklist; 386 387 print "***************\n"; 388 # Calculate item number range. 389 my $range1 = $hunk->context_range(1, $file_offset1); 390 my $range2 = $hunk->context_range(2, $file_offset2); 391 392 # Print out file 1 part for each block in context diff format if there are 393 # any blocks that remove items 394 print "*** $range1 ****\n"; 395 my $low = $hunk->{"start1"}; 396 my $hi = $hunk->{"end1"}; 397 if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) { 398 my @outlist = @$fileref1[$low..$hi]; 399 map {s/^/ /} @outlist; # assume it's just context 400 foreach my $block (@blocklist) { 401 my $op = $block->op; # - or ! 402 foreach my $item ($block->remove) { 403 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; 404 } 405 } 406 map {s/$/\n/} @outlist; # add \n's 407 print @outlist; 408 } 409 410 print "--- $range2 ----\n"; 411 $low = $hunk->{"start2"}; 412 $hi = $hunk->{"end2"}; 413 if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) { 414 my @outlist = @$fileref2[$low..$hi]; 415 map {s/^/ /} @outlist; # assume it's just context 416 foreach my $block (@blocklist) { 417 my $op = $block->op; # + or ! 418 foreach my $item ($block->insert) { 419 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; 420 } 421 } 422 map {s/$/\n/} @outlist; # add \n's 423 print @outlist; 424 } 425} 426 427sub context_range { 428# Generate a range of item numbers to print. Only print 1 number if the range 429# has only one item in it. Otherwise, it's 'start,end' 430 my ($hunk, $flag, $offset) = @_; 431 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); 432 433 # index from 1, not zero 434 $start += $offset + 1; 435 $end += $offset + 1; 436 my $range = ($start < $end) ? "$start,$end" : $end; 437 return $range; 438} 439 440sub unified_range { 441# Generate a range of item numbers to print for unified diff 442# Print number where block starts, followed by number of lines in the block 443# (don't print number of lines if it's 1) 444 my ($hunk, $flag, $offset) = @_; 445 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); 446 447 # index from 1, not zero 448 $start += $offset + 1; 449 $end += $offset + 1; 450 my $length = $end - $start + 1; 451 my $first = $length < 2 ? $end : $start; # strange, but correct... 452 my $range = $length== 1 ? $first : "$first,$length"; 453 return $range; 454} 455} # end Package Hunk 456 457# Package Block. A block is an operation removing, adding, or changing 458# a group of items. Basically, this is just a list of changes, where each 459# change adds or deletes a single item. 460# (Change could be a separate class, but it didn't seem worth it) 461{ 462package Block; 463sub new { 464# Input is a chunk from &Algorithm::LCS::diff 465# Fields in a block: 466# length_diff - how much longer file 2 is than file 1 due to this block 467# Each change has: 468# sign - '+' for insert, '-' for remove 469# item_no - number of the item in the file (e.g., line number) 470# We don't bother storing the text of the item 471# 472 my ($class,$chunk) = @_; 473 my @changes = (); 474 475# This just turns each change into a hash. 476 foreach my $item (@$chunk) { 477 my ($sign, $item_no, $text) = @$item; 478 my $hashref = {"sign" => $sign, "item_no" => $item_no}; 479 push @changes, $hashref; 480 } 481 482 my $block = { "changes" => \@changes }; 483 bless $block, $class; 484 485 $block->{"length_diff"} = $block->insert - $block->remove; 486 return $block; 487} 488 489 490# LOW LEVEL FUNCTIONS 491sub op { 492# what kind of block is this? 493 my $block = shift; 494 my $insert = $block->insert; 495 my $remove = $block->remove; 496 497 $remove && $insert and return '!'; 498 $remove and return '-'; 499 $insert and return '+'; 500 warn "unknown block type"; 501 return '^'; # context block 502} 503 504# Returns a list of the changes in this block that remove items 505# (or the number of removals if called in scalar context) 506sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; } 507 508# Returns a list of the changes in this block that insert items 509sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; } 510 511} # end of package Block 512