Cross Reference: /gem5/tests/diff-out

13511Sgabeblack@google.com#!/usr/bin/perl
13511Sgabeblack@google.com# Copyright (c) 2001-2005 The Regents of The University of Michigan
13511Sgabeblack@google.com# All rights reserved.
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# Redistribution and use in source and binary forms, with or without
13511Sgabeblack@google.com# modification, are permitted provided that the following conditions are
13511Sgabeblack@google.com# met: redistributions of source code must retain the above copyright
13511Sgabeblack@google.com# notice, this list of conditions and the following disclaimer;
13511Sgabeblack@google.com# redistributions in binary form must reproduce the above copyright
13511Sgabeblack@google.com# notice, this list of conditions and the following disclaimer in the
13511Sgabeblack@google.com# documentation and/or other materials provided with the distribution;
13511Sgabeblack@google.com# neither the name of the copyright holders nor the names of its
13511Sgabeblack@google.com# contributors may be used to endorse or promote products derived from
13511Sgabeblack@google.com# this software without specific prior written permission.
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
13511Sgabeblack@google.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
13511Sgabeblack@google.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
13511Sgabeblack@google.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
13511Sgabeblack@google.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
13511Sgabeblack@google.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
13511Sgabeblack@google.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
13511Sgabeblack@google.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
13511Sgabeblack@google.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
13511Sgabeblack@google.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
13511Sgabeblack@google.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# Authors: Steve Reinhardt
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# This script diffs two SimpleScalar statistics output files.
13511Sgabeblack@google.com#
13511Sgabeblack@google.com
13511Sgabeblack@google.comuse Getopt::Std;
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# -t thresh sets threshold for ignoring differences (in %)
13511Sgabeblack@google.com# -p sorts differences by % chg (default is alphabetic)
13511Sgabeblack@google.com# -f ignores fetch-loss statistics
13511Sgabeblack@google.com# -d ignores all distributions
13511Sgabeblack@google.com#
13511Sgabeblack@google.com
13511Sgabeblack@google.comgetopts('dfn:pt:h');
13511Sgabeblack@google.com
13511Sgabeblack@google.comif ($#ARGV < 1)
13511Sgabeblack@google.com{
13511Sgabeblack@google.com    print "\nError: need two file arguments (<reference> <new>).\n";
13511Sgabeblack@google.com    print "   Options: -d  =  Ignore distributions\n";
13511Sgabeblack@google.com    print "            -f  =  Ignore fetch-loss stats\n";
13511Sgabeblack@google.com    print "            -p  =  Sort errors by percentage\n";
13511Sgabeblack@google.com    print "            -h  =  Diff header info separately from stats\n";
13511Sgabeblack@google.com    print "            -n <num>  =  Print top <num> errors (default 20)\n";
13511Sgabeblack@google.com    print "            -t <num>  =  Error threshold in percent (default 1)\n\n";
13511Sgabeblack@google.com    die -1;
13511Sgabeblack@google.com}
13511Sgabeblack@google.com
13511Sgabeblack@google.comopen(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n";
13511Sgabeblack@google.comopen(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n";
13511Sgabeblack@google.com
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# Things that really should be adjustable via the command line
13511Sgabeblack@google.com#
13511Sgabeblack@google.com
13511Sgabeblack@google.com# Ignorable error (in percent)
13511Sgabeblack@google.com$err_thresh = ($opt_t) ? $opt_t : 0;
13511Sgabeblack@google.com
13511Sgabeblack@google.com# Number of stats to print before omitting
13511Sgabeblack@google.com$omit_count = ($opt_n) ? $opt_n : 20;
13511Sgabeblack@google.com
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# First copy everything up to the simulation statistics to a pair of
13511Sgabeblack@google.com# temporary files, stripping out date-related items, and do a plain
13511Sgabeblack@google.com# diff.  Any differences in the arguments are not necessarily an issue;
13511Sgabeblack@google.com# any differences in the program output should be caught by the EIO
13511Sgabeblack@google.com# mechanism if an EIO file is used.
13511Sgabeblack@google.com#
13511Sgabeblack@google.com
13511Sgabeblack@google.com# copy_header takes input filehandle and output filename
13511Sgabeblack@google.com
13511Sgabeblack@google.comsub copy_header
13511Sgabeblack@google.com{
13511Sgabeblack@google.com    my ($inhandle, $outname) = @_;
13511Sgabeblack@google.com
13511Sgabeblack@google.com    open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n";
13511Sgabeblack@google.com
13511Sgabeblack@google.com    while (<$inhandle>)
13511Sgabeblack@google.com    {
13511Sgabeblack@google.com	# strip out lines that can vary
13511Sgabeblack@google.com	next if /^(command line:|M5 compiled on |M5 simulation started |M5 executing on )/;
13511Sgabeblack@google.com	last if /Begin Simulation Statistics/;
13511Sgabeblack@google.com	print OUTPUT;
13511Sgabeblack@google.com    }
13511Sgabeblack@google.com    close OUTPUT;
13511Sgabeblack@google.com}
13511Sgabeblack@google.com
13511Sgabeblack@google.comif ($opt_h) {
13511Sgabeblack@google.com
13511Sgabeblack@google.com    # Diff header separately from stats
13511Sgabeblack@google.com
13511Sgabeblack@google.com    $refheader = "/tmp/smt-test.refheader.$$";
13511Sgabeblack@google.com    $newheader = "/tmp/smt-test.newheader.$$";
13511Sgabeblack@google.com
13511Sgabeblack@google.com    copy_header(\*REF, $refheader);
13511Sgabeblack@google.com    copy_header(\*NEW, $newheader);
13511Sgabeblack@google.com
13511Sgabeblack@google.com    print "\n===== Header and program output differences =====\n\n";
13511Sgabeblack@google.com
13511Sgabeblack@google.com    print `diff $refheader $newheader`;
13511Sgabeblack@google.com
13511Sgabeblack@google.com    print "\n===== Statistics differences =====\n\n";
13511Sgabeblack@google.com}
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# Now parse statistics
13511Sgabeblack@google.com#
13511Sgabeblack@google.com
13511Sgabeblack@google.com#
13511Sgabeblack@google.com# This function takes an open filehandle and returns a reference to
13511Sgabeblack@google.com# a hash containing all the statistics variables and their values.
13511Sgabeblack@google.com#
13511Sgabeblack@google.comsub parse_file
13511Sgabeblack@google.com{
13511Sgabeblack@google.com    $stathandle = shift;
13511Sgabeblack@google.com
13511Sgabeblack@google.com    $in_dist = undef;
13511Sgabeblack@google.com    $hashref = { };	# initialize hash for values
13511Sgabeblack@google.com
13511Sgabeblack@google.com    while (<$stathandle>)
13511Sgabeblack@google.com    {
13511Sgabeblack@google.com	next if /^\s*$/;	# skip blank lines
13511Sgabeblack@google.com                next if /^\*\*Ignore/;   # temporary, to make totaling scripts easy for ISCA 03
13511Sgabeblack@google.com	last if /End Simulation Statistics/;
13511Sgabeblack@google.com
13511Sgabeblack@google.com	s/ *#.*//;		# strip comments
13511Sgabeblack@google.com
13511Sgabeblack@google.com	if (/^Memory usage: (\d+) KBytes/) {
13511Sgabeblack@google.com	    $stat = 'memory usage';
13511Sgabeblack@google.com	    $value = $1;
13511Sgabeblack@google.com	}
13511Sgabeblack@google.com	elsif ($in_dist) {
13511Sgabeblack@google.com	    if ($in_dist =~ /^fetch_loss_counters/) {
13511Sgabeblack@google.com		if (/^fetch_loss_counters_\d+\.end/) {
13511Sgabeblack@google.com		    # end line of distribution: clear $in_dist flag
13511Sgabeblack@google.com		    $in_dist = undef;
13511Sgabeblack@google.com		    next;
13511Sgabeblack@google.com		}
13511Sgabeblack@google.com		else {
13511Sgabeblack@google.com		    next if $opt_f;
13511Sgabeblack@google.com
13511Sgabeblack@google.com		    ($stat, $value) = /^(\S+)\s+(.*)/;
13511Sgabeblack@google.com		}
13511Sgabeblack@google.com	    }
13511Sgabeblack@google.com	    else {
13511Sgabeblack@google.com		if (/(.*)\.end_dist/) {
13511Sgabeblack@google.com		    # end line of distribution: clear $in_dist flag
13511Sgabeblack@google.com		    $in_dist = undef;
13511Sgabeblack@google.com		    next;
13511Sgabeblack@google.com		}
13511Sgabeblack@google.com		if ($opt_d) {
13511Sgabeblack@google.com		    next;  #  bail out if we are ignoring dists...
13511Sgabeblack@google.com		}
13511Sgabeblack@google.com		elsif (/(.*)\.(min|max)_value/) {
13511Sgabeblack@google.com		    # treat these like normal stats
13511Sgabeblack@google.com		    ($stat, $value) = /^(\S+)\s+(.*)/;
13511Sgabeblack@google.com		}
13511Sgabeblack@google.com		else {
13511Sgabeblack@google.com		    # this is ugly because labels in the distribution
13511Sgabeblack@google.com		    # buckets don't start in column 0 and may include
13511Sgabeblack@google.com		    # embedded spaces
13511Sgabeblack@google.com		    ($stat, $value) =
13511Sgabeblack@google.com		      /^\s*(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/;
13511Sgabeblack@google.com		    $stat = $in_dist . '::' . $stat;
		}
	    }
	}
	else {
	    if (/(.*)\.start_dist/) {
		# start line of distribution: set $in_dist flag
		# and save distribution name for future reference
		$in_dist = $1;
		$stat = $1;
		$value = 0;
	    }
	    elsif (/^(fetch_loss_counters_\d+)\.start/) {
		# treat fetch loss counters like distribution, sort of
		$in_dist = $1;
		$stat = $1;
		$value = 0;
	    }
	    else {
		($stat, $value) = /^(\S+)\s+(.*)/;
	    }
	}

	$$hashref{$stat} = $value;
    }

    close($stathandle);
    return $hashref;
}


#
# pct_diff($old, $new) returns percent difference from $old to $new.
#
sub pct_diff
{
    my ($old, $new) = @_;
    return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old;
}


#
# Statistics to ignore: these relate to simulator performance, not
# correctness, so don't fail on changes here.
#
%ignore = (
  'host_seconds' => 1,
  'host_tick_rate' => 1,
  'host_inst_rate' => 1,
  'host_mem_usage' => 1
);

#
# List of key statistics (always displayed)
#  ==> list stats here WITHOUT trailing thread ID
#
@key_stat_list = (
  'COM:IPC',
  'ISSUE:MSIPC',
  'COM:count',
  'host_inst_rate',
  'sim_insts',
  'sim_ticks',
  'host_mem_usage'
);

$key_stat_pattern = join('|', @key_stat_list);

# initialize first statistics from each file

$max_err_mag = 0;

$refhash = parse_file(\*REF);
$newhash = parse_file(\*NEW);

# The string sim-smt prints on a divide by zero
$divbyzero = '<err: divide by zero>';

foreach $stat (sort keys %$refhash)
{
    $refvalue = $$refhash{$stat};
    $newvalue = $$newhash{$stat};

    if (!defined($newvalue)) {
	# stat missing from new file
	push @missing_stats, $stat;
	next;
    }

    if ($stat =~ /($key_stat_pattern)/o) {
	# key statistics: always record & display changes in these
	push @key_stats, [$stat, $refvalue, $newvalue];
    }

    if ($ignore{$stat} or $refvalue eq $newvalue) {
	# stat is in "ignore" list, or hasn't changed
    }
    else {
	if ($refvalue eq $divbyzero || $newvalue eq $divbyzero) {
	    # one or the other was a divide by zero:
	    # no point in trying to quantify error
	    print "$stat: $refvalue --> $newvalue\n";
	}
	else {
	    $reldiff = pct_diff($refvalue, $newvalue);
	    $diffmag = abs($reldiff);

	    if ($diffmag > $err_thresh) {
		push @errs,
		[$stat, $refvalue, $newvalue, $reldiff];
	    }

	    if ($diffmag > $max_err_mag) {
		$max_err_mag = $diffmag;
	    }
	}
    }

    # remove from new hash so we can detect added stats
    delete $$newhash{$stat};
}


#
# All done.  Print comparison summary.
#

printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag);

printf("  %-30s %10s %10s %10s   %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg');

printf("Key statistics:\n\n");

foreach $key_stat (@key_stats)
{
    ($statname, $refvalue, $newvalue, $reldiff) = @$key_stat;

    # deduce format from reference value
    $pointpos = rindex($refvalue, '.');
    $digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1);
    $fmt = "%10.${digits}f";

    # print differing values with absolute and relative error
    printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
	   $statname, $refvalue, $newvalue,
	   $newvalue - $refvalue, pct_diff($refvalue, $newvalue));
}

printf("\nLargest $omit_count relative errors (> %d%%):\n\n", $err_thresh);

$num_errs = 0;

if ($opt_p)
{
    # sort differences by percent change
    @errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs;
}

foreach $err (@errs)
{
    ($statname, $refvalue, $newvalue, $reldiff) = @$err;

    # deduce format from reference value
    $pointpos1 = rindex($refvalue, '.');
    $digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1);
    $pointpos2 = rindex($newvalue, '.');
    $digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1);
    $digits = ($digits1 > $digits2) ? $digits1 : $digits2;
    $fmt = "%10.${digits}f";

    # print differing values with absolute and relative error
    printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
	   $statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff);

    # only print top N errors
    if (++$num_errs >= $omit_count)
    {
	print "[... additional errors omitted ...]\n";
	last;
    }
}

#
# Report missing stats, but first filter out distribution buckets:
# these are mostly noise

@missing_stats = grep { !/::(\d+|overflows)?$/ } @missing_stats;

# get count
$missing_stats = scalar(@missing_stats);

if ($missing_stats)
{
    print "\nMissing $missing_stats reference statistics:\n\n";
    foreach $stat (@missing_stats)
    {
#	print "\t$stat\n";
	printf "  %-50s    ", $stat;
	print  "$$refhash{$stat}\n";
    }
}

#
# Any stats left in newhash are added since the reference file
#

@added_stats = keys %$newhash;

# first filter out distribution buckets: mostly noise

@added_stats = grep { !/::(\d+|overflows)?$/ } @added_stats;

# get count
$added_stats = scalar(@added_stats);

if ($added_stats)
{
    print "\nFound $added_stats new statistics:\n\n";
    foreach $stat (sort @added_stats)
    {
#	print "\t$stat\n";
	printf "  %-50s    ", $stat;
	print  "$$newhash{$stat}\n";
    }
}

cleanup();
# Exit code is 0 if no stats error, 1 otherwise
$status = ($max_err_mag == 0.0) ? 0 : 1;
exit $status;

sub cleanup
{
    unlink($refheader) if ($refheader);
    unlink($newheader) if ($newheader);
}