diff-out revision 2929
1#!/usr/bin/perl
2# Copyright (c) 2001-2005 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28# Authors: Steve Reinhardt
29
30#
31# This script diffs two SimpleScalar statistics output files.
32#
33
34use Getopt::Std;
35
36#
37# -t thresh sets threshold for ignoring differences (in %)
38# -p sorts differences by % chg (default is alphabetic)
39# -f ignores fetch-loss statistics
40# -d ignores all distributions
41#
42
43getopts('dfn:pt:h');
44
45if ($#ARGV < 1)
46{
47    print "\nError: need two file arguments (<reference> <new>).\n";
48    print "   Options: -d  =  Ignore distributions\n";
49    print "            -f  =  Ignore fetch-loss stats\n";
50    print "            -p  =  Sort errors by percentage\n";
51    print "            -h  =  Diff header info separately from stats\n";
52    print "            -n <num>  =  Print top <num> errors (default 20)\n";
53    print "            -t <num>  =  Error threshold in percent (default 1)\n\n";
54    die -1;
55}
56
57open(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n";
58open(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n";
59
60
61#
62# Things that really should be adjustable via the command line
63#
64
65# Ignorable error (in percent)
66$err_thresh = ($opt_t) ? $opt_t : 0;
67
68# Number of stats to print before omitting
69$omit_count = ($opt_n) ? $opt_n : 20;
70
71
72#
73# First copy everything up to the simulation statistics to a pair of
74# temporary files, stripping out date-related items, and do a plain
75# diff.  Any differences in the arguments are not necessarily an issue;
76# any differences in the program output should be caught by the EIO
77# mechanism if an EIO file is used.
78# 
79
80# copy_header takes input filehandle and output filename
81
82sub copy_header
83{
84    my ($inhandle, $outname) = @_;
85
86    open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n";
87
88    while (<$inhandle>)
89    {
90	# strip out lines that can vary
91	next if /^(command line:|M5 compiled on |M5 simulation started |M5 executing on )/;
92	last if /Begin Simulation Statistics/;
93	print OUTPUT;
94    }
95    close OUTPUT;
96}
97
98if ($opt_h) {
99
100    # Diff header separately from stats
101
102    $refheader = "/tmp/smt-test.refheader.$$";
103    $newheader = "/tmp/smt-test.newheader.$$";
104
105    copy_header(\*REF, $refheader);
106    copy_header(\*NEW, $newheader);
107
108    print "\n===== Header and program output differences =====\n\n";
109
110    print `diff $refheader $newheader`;
111
112    print "\n===== Statistics differences =====\n\n";
113}
114
115#
116# Now parse statistics
117#
118
119#
120# This function takes an open filehandle and returns a reference to
121# a hash containing all the statistics variables and their values.
122#
123sub parse_file
124{
125    $stathandle = shift;
126
127    $in_dist = undef;
128    $hashref = { };	# initialize hash for values
129
130    while (<$stathandle>)
131    {
132	next if /^\s*$/;	# skip blank lines
133                next if /^\*\*Ignore/;   # temporary, to make totaling scripts easy for ISCA 03
134	last if /End Simulation Statistics/;
135
136	s/ *#.*//;		# strip comments
137
138	if (/^Memory usage: (\d+) KBytes/) {
139	    $stat = 'memory usage';
140	    $value = $1;
141	}
142	elsif ($in_dist) {
143	    if ($in_dist =~ /^fetch_loss_counters/) {
144		if (/^fetch_loss_counters_\d+\.end/) {
145		    # end line of distribution: clear $in_dist flag
146		    $in_dist = undef;
147		    next;
148		}
149		else {
150		    next if $opt_f;
151
152		    ($stat, $value) = /^(\S+)\s+(.*)/;
153		}
154	    }
155	    else {
156		if (/(.*)\.end_dist/) {
157		    # end line of distribution: clear $in_dist flag
158		    $in_dist = undef;
159		    next;
160		}
161		if ($opt_d) {
162		    next;  #  bail out if we are ignoring dists...
163		}
164		elsif (/(.*)\.(min|max)_value/) {
165		    # treat these like normal stats
166		    ($stat, $value) = /^(\S+)\s+(.*)/;
167		}
168		else {
169		    # this is ugly because labels in the distribution
170		    # buckets don't start in column 0 and may include
171		    # embedded spaces
172		    ($stat, $value) =
173		      /^\s*(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/;
174		    $stat = $in_dist . '::' . $stat;
175		}
176	    }
177	}
178	else {
179	    if (/(.*)\.start_dist/) {
180		# start line of distribution: set $in_dist flag
181		# and save distribution name for future reference
182		$in_dist = $1;
183		$stat = $1;
184		$value = 0;
185	    }
186	    elsif (/^(fetch_loss_counters_\d+)\.start/) {
187		# treat fetch loss counters like distribution, sort of
188		$in_dist = $1;
189		$stat = $1;
190		$value = 0;
191	    }
192	    else {
193		($stat, $value) = /^(\S+)\s+(.*)/;
194	    }
195	}
196
197	$$hashref{$stat} = $value;
198    }
199
200    close($stathandle);
201    return $hashref;
202}
203
204
205#
206# pct_diff($old, $new) returns percent difference from $old to $new.
207#
208sub pct_diff
209{
210    my ($old, $new) = @_;
211    return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old;
212}
213
214
215#
216# Statistics to ignore: these relate to simulator performance, not
217# correctness, so don't fail on changes here.
218#
219%ignore = (
220  'host_seconds' => 1,
221  'host_tick_rate' => 1,
222  'host_inst_rate' => 1,
223  'host_mem_usage' => 1
224);
225
226#
227# List of key statistics (always displayed)
228#  ==> list stats here WITHOUT trailing thread ID
229#
230@key_stat_list = (
231  'COM:IPC',
232  'ISSUE:MSIPC',
233  'COM:count',
234  'host_inst_rate',
235  'sim_insts',
236  'sim_ticks',
237  'host_mem_usage'
238);
239
240$key_stat_pattern = join('|', @key_stat_list);
241
242# initialize first statistics from each file
243
244$max_err_mag = 0;
245
246$refhash = parse_file(\*REF);
247$newhash = parse_file(\*NEW);
248
249# The string sim-smt prints on a divide by zero
250$divbyzero = '<err: divide by zero>';
251
252foreach $stat (sort keys %$refhash)
253{
254    $refvalue = $$refhash{$stat};
255    $newvalue = $$newhash{$stat};
256
257    if (!defined($newvalue)) {
258	# stat missing from new file
259	push @missing_stats, $stat;
260	next;
261    }
262
263    if ($stat =~ /($key_stat_pattern)/o) {
264	# key statistics: always record & display changes in these
265	push @key_stats, [$stat, $refvalue, $newvalue];
266    }
267
268    if ($ignore{$stat} or $refvalue eq $newvalue) {
269	# stat is in "ignore" list, or hasn't changed
270    }
271    else {
272	if ($refvalue eq $divbyzero || $newvalue eq $divbyzero) {
273	    # one or the other was a divide by zero:
274	    # no point in trying to quantify error
275	    print "$stat: $refvalue --> $newvalue\n";
276	}
277	else {
278	    $reldiff = pct_diff($refvalue, $newvalue);
279	    $diffmag = abs($reldiff);
280
281	    if ($diffmag > $err_thresh) {
282		push @errs,
283		[$stat, $refvalue, $newvalue, $reldiff];
284	    }
285
286	    if ($diffmag > $max_err_mag) {
287		$max_err_mag = $diffmag;
288	    }
289	}
290    }
291
292    # remove from new hash so we can detect added stats
293    delete $$newhash{$stat};
294}
295
296
297#
298# All done.  Print comparison summary.
299#
300
301printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag);
302
303printf("  %-30s %10s %10s %10s   %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg');
304
305printf("Key statistics:\n\n");
306
307foreach $key_stat (@key_stats)
308{
309    ($statname, $refvalue, $newvalue, $reldiff) = @$key_stat;
310
311    # deduce format from reference value
312    $pointpos = rindex($refvalue, '.');
313    $digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1);
314    $fmt = "%10.${digits}f";
315
316    # print differing values with absolute and relative error
317    printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
318	   $statname, $refvalue, $newvalue,
319	   $newvalue - $refvalue, pct_diff($refvalue, $newvalue));
320}
321
322printf("\nLargest $omit_count relative errors (> %d%%):\n\n", $err_thresh);
323
324$num_errs = 0;
325
326if ($opt_p)
327{
328    # sort differences by percent change
329    @errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs;
330}
331
332foreach $err (@errs)
333{
334    ($statname, $refvalue, $newvalue, $reldiff) = @$err;
335
336    # deduce format from reference value
337    $pointpos1 = rindex($refvalue, '.');
338    $digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1);
339    $pointpos2 = rindex($newvalue, '.');
340    $digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1);
341    $digits = ($digits1 > $digits2) ? $digits1 : $digits2;
342    $fmt = "%10.${digits}f";
343
344    # print differing values with absolute and relative error
345    printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
346	   $statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff);
347
348    # only print top N errors
349    if (++$num_errs >= $omit_count)
350    {
351	print "[... additional errors omitted ...]\n";
352	last;
353    }
354}
355
356#
357# Report missing stats, but first filter out distribution buckets:
358# these are mostly noise
359
360@missing_stats = grep { !/::(\d+|overflows)?$/ } @missing_stats;
361
362# get count
363$missing_stats = scalar(@missing_stats);
364
365if ($missing_stats)
366{
367    print "\nMissing $missing_stats reference statistics:\n\n";
368    foreach $stat (@missing_stats)
369    {
370#	print "\t$stat\n";
371	printf "  %-50s    ", $stat;
372	print  "$$refhash{$stat}\n";
373    }
374}
375
376#
377# Any stats left in newhash are added since the reference file
378#
379
380@added_stats = keys %$newhash;
381
382# first filter out distribution buckets: mostly noise
383
384@added_stats = grep { !/::(\d+|overflows)?$/ } @added_stats;
385
386# get count
387$added_stats = scalar(@added_stats);
388
389if ($added_stats)
390{
391    print "\nFound $added_stats new statistics:\n\n";
392    foreach $stat (sort @added_stats)
393    {
394#	print "\t$stat\n";
395	printf "  %-50s    ", $stat;
396	print  "$$newhash{$stat}\n";
397    }
398}
399
400cleanup();
401# Exit code is 0 if no stats error, 1 otherwise
402$status = ($max_err_mag == 0.0) ? 0 : 1;
403exit $status;
404
405sub cleanup
406{
407    unlink($refheader) if ($refheader);
408    unlink($newheader) if ($newheader);
409}
410