trace_cpu.hh revision 11252:18bb597fc40c
13096SN/A/* 23096SN/A * Copyright (c) 2013 - 2015 ARM Limited 35520SN/A * All rights reserved 48844SAli.Saidi@ARM.com * 58844SAli.Saidi@ARM.com * The license below extends only to copyright in the software and shall 68428SN/A * not be construed as granting a license to any other intellectual 78983Snate@binkert.org * property including but not limited to intellectual property relating 88983Snate@binkert.org * to a hardware implementation of the functionality of the software 98983Snate@binkert.org * licensed hereunder. You may use the software subject to the license 108983Snate@binkert.org * terms below provided that you ensure that this notice is replicated 118983Snate@binkert.org * unmodified and in its entirety in all distributions of the software, 128428SN/A * modified or unmodified, in source code or in binary form. 138835SAli.Saidi@ARM.com * 148844SAli.Saidi@ARM.com * Redistribution and use in source and binary forms, with or without 158844SAli.Saidi@ARM.com * modification, are permitted provided that the following conditions are 168721SN/A * met: redistributions of source code must retain the above copyright 178844SAli.Saidi@ARM.com * notice, this list of conditions and the following disclaimer; 188721SN/A * redistributions in binary form must reproduce the above copyright 198721SN/A * notice, this list of conditions and the following disclaimer in the 208844SAli.Saidi@ARM.com * documentation and/or other materials provided with the distribution; 218844SAli.Saidi@ARM.com * neither the name of the copyright holders nor the names of its 228844SAli.Saidi@ARM.com * contributors may be used to endorse or promote products derived from 238428SN/A * this software without specific prior written permission. 248428SN/A * 258428SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 268428SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 278844SAli.Saidi@ARM.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 288844SAli.Saidi@ARM.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 298428SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 308844SAli.Saidi@ARM.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 318844SAli.Saidi@ARM.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 328844SAli.Saidi@ARM.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 338428SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 348844SAli.Saidi@ARM.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 358844SAli.Saidi@ARM.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 368844SAli.Saidi@ARM.com * 378428SN/A * Authors: Radhika Jagtap 388844SAli.Saidi@ARM.com * Andreas Hansson 398844SAli.Saidi@ARM.com * Thomas Grass 408844SAli.Saidi@ARM.com */ 418428SN/A 428844SAli.Saidi@ARM.com#ifndef __CPU_TRACE_TRACE_CPU_HH__ 438428SN/A#define __CPU_TRACE_TRACE_CPU_HH__ 448428SN/A 458428SN/A#include <array> 468428SN/A#include <cstdint> 478428SN/A#include <queue> 488428SN/A#include <set> 498428SN/A#include <unordered_map> 508428SN/A 518428SN/A#include "arch/registers.hh" 528428SN/A#include "base/statistics.hh" 538428SN/A#include "cpu/base.hh" 548428SN/A#include "debug/TraceCPUData.hh" 558428SN/A#include "debug/TraceCPUInst.hh" 568844SAli.Saidi@ARM.com#include "params/TraceCPU.hh" 578428SN/A#include "proto/inst_dep_record.pb.h" 588428SN/A#include "proto/packet.pb.h" 598844SAli.Saidi@ARM.com#include "proto/protoio.hh" 608844SAli.Saidi@ARM.com#include "sim/sim_events.hh" 618844SAli.Saidi@ARM.com 628844SAli.Saidi@ARM.com/** 638844SAli.Saidi@ARM.com * The trace cpu replays traces generated using the elastic trace probe 648428SN/A * attached to the O3 CPU model. The elastic trace is an execution trace with 658844SAli.Saidi@ARM.com * register data dependencies and ordering dependencies annotated to it. The 668844SAli.Saidi@ARM.com * trace cpu also replays a fixed timestamp fetch trace that is also generated 678844SAli.Saidi@ARM.com * by the elastic trace probe. This trace cpu model aims at achieving faster 688844SAli.Saidi@ARM.com * simulation compared to the detailed cpu model and good correlation when the 698844SAli.Saidi@ARM.com * same trace is used for playback on different memory sub-systems. 708844SAli.Saidi@ARM.com * 718844SAli.Saidi@ARM.com * The TraceCPU inherits from BaseCPU so some virtual methods need to be 728844SAli.Saidi@ARM.com * defined. It has two port subclasses inherited from MasterPort for 738844SAli.Saidi@ARM.com * instruction and data ports. It issues the memory requests deducing the 748844SAli.Saidi@ARM.com * timing from the trace and without performing real execution of micro-ops. As 758844SAli.Saidi@ARM.com * soon as the last dependency for an instruction is complete, its 768844SAli.Saidi@ARM.com * computational delay, also provided in the input trace is added. The 778844SAli.Saidi@ARM.com * dependency-free nodes are maintained in a list, called 'ReadyList', ordered 788844SAli.Saidi@ARM.com * by ready time. Instructions which depend on load stall until the responses 798844SAli.Saidi@ARM.com * for read requests are received thus achieving elastic replay. If the 808844SAli.Saidi@ARM.com * dependency is not found when adding a new node, it is assumed complete. 816291SN/A * Thus, if this node is found to be completely dependency-free its issue time 828844SAli.Saidi@ARM.com * is calculated and it is added to the ready list immediately. This is 838844SAli.Saidi@ARM.com * encapsulated in the subclass ElasticDataGen. 848844SAli.Saidi@ARM.com * 858844SAli.Saidi@ARM.com * If ready nodes are issued in an unconstrained way there can be more nodes 868844SAli.Saidi@ARM.com * outstanding which results in divergence in timing compared to the O3CPU. 878844SAli.Saidi@ARM.com * Therefore, the Trace CPU also models hardware resources. A sub-class to 888844SAli.Saidi@ARM.com * model hardware resources contains the maximum sizes of load buffer, store 898844SAli.Saidi@ARM.com * buffer and ROB. If resources are not available, the node is not issued. Such 908844SAli.Saidi@ARM.com * nodes that are pending issue are held in the 'depFreeQueue' structure. 916291SN/A * 926291SN/A * Modeling the ROB size in the Trace CPU as a resource limitation is arguably 936291SN/A * the most important parameter of all resources. The ROB occupancy is 948844SAli.Saidi@ARM.com * estimated using the newly added field 'robNum'. We need to use ROB number as 958844SAli.Saidi@ARM.com * sequence number is at times much higher due to squashing and trace replay is 968844SAli.Saidi@ARM.com * focused on correct path modeling. 978844SAli.Saidi@ARM.com * 988844SAli.Saidi@ARM.com * A map called 'inFlightNodes' is added to track nodes that are not only in 998844SAli.Saidi@ARM.com * the readyList but also load nodes that are executed (and thus removed from 1008844SAli.Saidi@ARM.com * readyList) but are not complete. ReadyList handles what and when to execute 1018844SAli.Saidi@ARM.com * next node while the inFlightNodes is used for resource modelling. The oldest 1028844SAli.Saidi@ARM.com * ROB number is updated when any node occupies the ROB or when an entry in the 1038844SAli.Saidi@ARM.com * ROB is released. The ROB occupancy is equal to the difference in the ROB 1048844SAli.Saidi@ARM.com * number of the newly dependency-free node and the oldest ROB number in 1058844SAli.Saidi@ARM.com * flight. 1068844SAli.Saidi@ARM.com * 1078844SAli.Saidi@ARM.com * If no node depends on a non load/store node then there is no reason to 1088844SAli.Saidi@ARM.com * track it in the dependency graph. We filter out such nodes but count them 1098844SAli.Saidi@ARM.com * and add a weight field to the subsequent node that we do include in the 1108844SAli.Saidi@ARM.com * trace. The weight field is used to model ROB occupancy during replay. 1118844SAli.Saidi@ARM.com * 1128844SAli.Saidi@ARM.com * The depFreeQueue is chosen to be FIFO so that child nodes which are in 1138844SAli.Saidi@ARM.com * program order get pushed into it in that order and thus issued in program 1148844SAli.Saidi@ARM.com * order, like in the O3CPU. This is also why the dependents is made a 1158844SAli.Saidi@ARM.com * sequential container, std::set to std::vector. We only check head of the 1168844SAli.Saidi@ARM.com * depFreeQueue as nodes are issued in order and blocking on head models that 1178844SAli.Saidi@ARM.com * better than looping the entire queue. An alternative choice would be to 1188428SN/A * inspect top N pending nodes where N is the issue-width. This is left for 1198428SN/A * future as the timing correlation looks good as it is. 1208844SAli.Saidi@ARM.com * 1218428SN/A * At the start of an execution event, first we attempt to issue such pending 1228428SN/A * nodes by checking if appropriate resources have become available. If yes, we 1238844SAli.Saidi@ARM.com * compute the execute tick with respect to the time then. Then we proceed to 1248844SAli.Saidi@ARM.com * complete nodes from the readyList. 1258844SAli.Saidi@ARM.com * 1268844SAli.Saidi@ARM.com * When a read response is received, sometimes a dependency on it that was 1278428SN/A * supposed to be released when it was issued is still not released. This 1288844SAli.Saidi@ARM.com * occurs because the dependent gets added to the graph after the read was 1298844SAli.Saidi@ARM.com * sent. So the check is made less strict and the dependency is marked complete 1308844SAli.Saidi@ARM.com * on read response instead of insisting that it should have been removed on 1318844SAli.Saidi@ARM.com * read sent. 1328844SAli.Saidi@ARM.com * 1338844SAli.Saidi@ARM.com * There is a check for requests spanning two cache lines as this condition 1348844SAli.Saidi@ARM.com * triggers an assert fail in the L1 cache. If it does then truncate the size 1358844SAli.Saidi@ARM.com * to access only until the end of that line and ignore the remainder. 1368844SAli.Saidi@ARM.com * Strictly-ordered requests are skipped and the dependencies on such requests 1378844SAli.Saidi@ARM.com * are handled by simply marking them complete immediately. 1388428SN/A * 1398844SAli.Saidi@ARM.com * The simulated seconds can be calculated as the difference between the 1408844SAli.Saidi@ARM.com * final_tick stat and the tickOffset stat. A CountedExitEvent that contains a 1418844SAli.Saidi@ARM.com * static int belonging to the Trace CPU class as a down counter is used to 1428844SAli.Saidi@ARM.com * implement multi Trace CPU simulation exit. 1438844SAli.Saidi@ARM.com */ 1448844SAli.Saidi@ARM.com 1458844SAli.Saidi@ARM.comclass TraceCPU : public BaseCPU 1468844SAli.Saidi@ARM.com{ 1478844SAli.Saidi@ARM.com 1488428SN/A public: 1498428SN/A TraceCPU(TraceCPUParams *params); 1508428SN/A ~TraceCPU(); 1518844SAli.Saidi@ARM.com 1528428SN/A void init(); 1538844SAli.Saidi@ARM.com 1548844SAli.Saidi@ARM.com /** 1558844SAli.Saidi@ARM.com * This is a pure virtual function in BaseCPU. As we don't know how many 1568844SAli.Saidi@ARM.com * insts are in the trace but only know how how many micro-ops are we 1578844SAli.Saidi@ARM.com * cannot count this stat. 1588844SAli.Saidi@ARM.com * 1598844SAli.Saidi@ARM.com * @return 0 1608844SAli.Saidi@ARM.com */ 1618844SAli.Saidi@ARM.com Counter totalInsts() const 1628844SAli.Saidi@ARM.com { 1638844SAli.Saidi@ARM.com return 0; 1648844SAli.Saidi@ARM.com } 1658844SAli.Saidi@ARM.com 1668844SAli.Saidi@ARM.com /** 1678844SAli.Saidi@ARM.com * Return totalOps as the number of committed micro-ops plus the 1688844SAli.Saidi@ARM.com * speculatively issued loads that are modelled in the TraceCPU replay. 1698844SAli.Saidi@ARM.com * 1708844SAli.Saidi@ARM.com * @return number of micro-ops i.e. nodes in the elastic data generator 1718844SAli.Saidi@ARM.com */ 1728844SAli.Saidi@ARM.com Counter totalOps() const 1738844SAli.Saidi@ARM.com { 1748844SAli.Saidi@ARM.com return dcacheGen.getMicroOpCount(); 1758844SAli.Saidi@ARM.com } 1768844SAli.Saidi@ARM.com 1778844SAli.Saidi@ARM.com /* Pure virtual function in BaseCPU. Do nothing. */ 1788844SAli.Saidi@ARM.com void wakeup(ThreadID tid = 0) 1798844SAli.Saidi@ARM.com { 1808844SAli.Saidi@ARM.com return; 1818844SAli.Saidi@ARM.com } 1828844SAli.Saidi@ARM.com 1838844SAli.Saidi@ARM.com /* 1848428SN/A * When resuming from checkpoint in FS mode, the TraceCPU takes over from 1858428SN/A * the old cpu. This function overrides the takeOverFrom() function in the 1868241SN/A * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the 1878844SAli.Saidi@ARM.com * TraceCPU. 1888844SAli.Saidi@ARM.com */ 1898844SAli.Saidi@ARM.com void takeOverFrom(BaseCPU *oldCPU); 1908844SAli.Saidi@ARM.com 1918844SAli.Saidi@ARM.com /** 1928844SAli.Saidi@ARM.com * When instruction cache port receives a retry, schedule event 1938844SAli.Saidi@ARM.com * icacheNextEvent. 1948844SAli.Saidi@ARM.com */ 1958844SAli.Saidi@ARM.com void icacheRetryRecvd(); 1968844SAli.Saidi@ARM.com 1978844SAli.Saidi@ARM.com /** 1988844SAli.Saidi@ARM.com * When data cache port receives a retry, schedule event 1998844SAli.Saidi@ARM.com * dcacheNextEvent. 2008844SAli.Saidi@ARM.com */ 2018844SAli.Saidi@ARM.com void dcacheRetryRecvd(); 2028844SAli.Saidi@ARM.com 2038844SAli.Saidi@ARM.com /** 2048844SAli.Saidi@ARM.com * When data cache port receives a response, this calls the dcache 2058844SAli.Saidi@ARM.com * generator method handle to complete the load writeback. 2068844SAli.Saidi@ARM.com * 2078844SAli.Saidi@ARM.com * @param pkt Pointer to packet received 2088844SAli.Saidi@ARM.com */ 2098844SAli.Saidi@ARM.com void dcacheRecvTimingResp(PacketPtr pkt); 2108844SAli.Saidi@ARM.com 2118844SAli.Saidi@ARM.com /** 2128844SAli.Saidi@ARM.com * Schedule event dcacheNextEvent at the given tick 2138844SAli.Saidi@ARM.com * 2148844SAli.Saidi@ARM.com * @param when Tick at which to schedule event 2158844SAli.Saidi@ARM.com */ 2168844SAli.Saidi@ARM.com void schedDcacheNextEvent(Tick when); 2178844SAli.Saidi@ARM.com 2188241SN/A protected: 2198241SN/A 2208844SAli.Saidi@ARM.com /** 2218844SAli.Saidi@ARM.com * IcachePort class that interfaces with L1 Instruction Cache. 2228844SAli.Saidi@ARM.com */ 2238844SAli.Saidi@ARM.com class IcachePort : public MasterPort 2248844SAli.Saidi@ARM.com { 2258844SAli.Saidi@ARM.com public: 2268844SAli.Saidi@ARM.com /** Default constructor. */ 2278428SN/A IcachePort(TraceCPU* _cpu) 2288428SN/A : MasterPort(_cpu->name() + ".icache_port", _cpu), 2298428SN/A owner(_cpu) 2308844SAli.Saidi@ARM.com { } 2318428SN/A 2328844SAli.Saidi@ARM.com public: 2338428SN/A /** 2348844SAli.Saidi@ARM.com * Receive the timing reponse and simply delete the packet since 2358464SN/A * instruction fetch requests are issued as per the timing in the trace 2368844SAli.Saidi@ARM.com * and responses are ignored. 2378844SAli.Saidi@ARM.com * 2388428SN/A * @param pkt Pointer to packet received 2398428SN/A * @return true 2408428SN/A */ 2418428SN/A bool recvTimingResp(PacketPtr pkt); 2428428SN/A 2438844SAli.Saidi@ARM.com /** 2448844SAli.Saidi@ARM.com * Required functionally but do nothing. 2458844SAli.Saidi@ARM.com * 2468844SAli.Saidi@ARM.com * @param pkt Pointer to packet received 2478844SAli.Saidi@ARM.com */ 2488844SAli.Saidi@ARM.com void recvTimingSnoopReq(PacketPtr pkt) { } 2498844SAli.Saidi@ARM.com 2508844SAli.Saidi@ARM.com /** 2518844SAli.Saidi@ARM.com * Handle a retry signalled by the cache if instruction read failed in 2528428SN/A * the first attempt. 2538844SAli.Saidi@ARM.com */ 2548844SAli.Saidi@ARM.com void recvReqRetry(); 2558844SAli.Saidi@ARM.com 2568844SAli.Saidi@ARM.com private: 2578844SAli.Saidi@ARM.com TraceCPU* owner; 2588844SAli.Saidi@ARM.com }; 2598844SAli.Saidi@ARM.com 2608428SN/A /** 2618844SAli.Saidi@ARM.com * DcachePort class that interfaces with L1 Data Cache. 2628844SAli.Saidi@ARM.com */ 2638844SAli.Saidi@ARM.com class DcachePort : public MasterPort 2648844SAli.Saidi@ARM.com { 2658844SAli.Saidi@ARM.com 2668844SAli.Saidi@ARM.com public: 2678844SAli.Saidi@ARM.com /** Default constructor. */ 2688844SAli.Saidi@ARM.com DcachePort(TraceCPU* _cpu) 2698844SAli.Saidi@ARM.com : MasterPort(_cpu->name() + ".dcache_port", _cpu), 2708428SN/A owner(_cpu) 2718844SAli.Saidi@ARM.com { } 2728844SAli.Saidi@ARM.com 2738428SN/A public: 2748428SN/A 2758835SAli.Saidi@ARM.com /** 2768844SAli.Saidi@ARM.com * Receive the timing reponse and call dcacheRecvTimingResp() method 2778428SN/A * of the dcacheGen to handle completing the load 2788844SAli.Saidi@ARM.com * 2798844SAli.Saidi@ARM.com * @param pkt Pointer to packet received 2808844SAli.Saidi@ARM.com * @return true 2818844SAli.Saidi@ARM.com */ 2828428SN/A bool recvTimingResp(PacketPtr pkt); 2838844SAli.Saidi@ARM.com 2848844SAli.Saidi@ARM.com /** 2858844SAli.Saidi@ARM.com * Required functionally but do nothing. 2868844SAli.Saidi@ARM.com * 2878844SAli.Saidi@ARM.com * @param pkt Pointer to packet received 2888844SAli.Saidi@ARM.com */ 2898844SAli.Saidi@ARM.com void recvTimingSnoopReq(PacketPtr pkt) 2908844SAli.Saidi@ARM.com { } 2918844SAli.Saidi@ARM.com 2928428SN/A /** 2938428SN/A * Required functionally but do nothing. 2948428SN/A * 2958844SAli.Saidi@ARM.com * @param pkt Pointer to packet received 2968835SAli.Saidi@ARM.com */ 2978835SAli.Saidi@ARM.com void recvFunctionalSnoop(PacketPtr pkt) 2988428SN/A { } 2998428SN/A 3008428SN/A /** 3018428SN/A * Handle a retry signalled by the cache if data access failed in the 3028428SN/A * first attempt. 3038428SN/A */ 3048428SN/A void recvReqRetry(); 3058428SN/A 3068844SAli.Saidi@ARM.com /** 3078428SN/A * Required functionally. 3088844SAli.Saidi@ARM.com * 3098844SAli.Saidi@ARM.com * @return true since we have to snoop 3108844SAli.Saidi@ARM.com */ 3118844SAli.Saidi@ARM.com bool isSnooping() const { return true; } 3128428SN/A 3138835SAli.Saidi@ARM.com private: 3148428SN/A TraceCPU* owner; 3158844SAli.Saidi@ARM.com }; 3168844SAli.Saidi@ARM.com 3178844SAli.Saidi@ARM.com /** Port to connect to L1 instruction cache. */ 3188844SAli.Saidi@ARM.com IcachePort icachePort; 3198844SAli.Saidi@ARM.com 3208844SAli.Saidi@ARM.com /** Port to connect to L1 data cache. */ 3218428SN/A DcachePort dcachePort; 3228428SN/A 3238428SN/A /** Master id for instruction read requests. */ 3248428SN/A const MasterID instMasterID; 3258428SN/A 3268844SAli.Saidi@ARM.com /** Master id for data read and write requests. */ 3278844SAli.Saidi@ARM.com const MasterID dataMasterID; 3288844SAli.Saidi@ARM.com 3298844SAli.Saidi@ARM.com /** File names for input instruction and data traces. */ 3308428SN/A std::string instTraceFile, dataTraceFile; 3318844SAli.Saidi@ARM.com 3328844SAli.Saidi@ARM.com /** 3338844SAli.Saidi@ARM.com * Generator to read protobuf trace containing memory requests at fixed 3348844SAli.Saidi@ARM.com * timestamps, perform flow control and issue memory requests. If L1 cache 3358844SAli.Saidi@ARM.com * port sends packet succesfully, determine the tick to send the next 3368844SAli.Saidi@ARM.com * packet else wait for retry from cache. 3378844SAli.Saidi@ARM.com */ 3388844SAli.Saidi@ARM.com class FixedRetryGen 3398844SAli.Saidi@ARM.com { 3408844SAli.Saidi@ARM.com 3418844SAli.Saidi@ARM.com private: 3428844SAli.Saidi@ARM.com 3438844SAli.Saidi@ARM.com /** 3448844SAli.Saidi@ARM.com * This struct stores a line in the trace file. 3458844SAli.Saidi@ARM.com */ 3468844SAli.Saidi@ARM.com struct TraceElement { 3478844SAli.Saidi@ARM.com 3488844SAli.Saidi@ARM.com /** Specifies if the request is to be a read or a write */ 3498844SAli.Saidi@ARM.com MemCmd cmd; 3508844SAli.Saidi@ARM.com 3518844SAli.Saidi@ARM.com /** The address for the request */ 3528844SAli.Saidi@ARM.com Addr addr; 3538844SAli.Saidi@ARM.com 3548844SAli.Saidi@ARM.com /** The size of the access for the request */ 3558844SAli.Saidi@ARM.com Addr blocksize; 3568844SAli.Saidi@ARM.com 3578844SAli.Saidi@ARM.com /** The time at which the request should be sent */ 3588844SAli.Saidi@ARM.com Tick tick; 3598844SAli.Saidi@ARM.com 3608844SAli.Saidi@ARM.com /** Potential request flags to use */ 3618844SAli.Saidi@ARM.com Request::FlagsType flags; 3628844SAli.Saidi@ARM.com 3638844SAli.Saidi@ARM.com /** Instruction PC */ 3648428SN/A Addr pc; 3658428SN/A 3668428SN/A /** 3678428SN/A * Check validity of this element. 3688983Snate@binkert.org * 3698983Snate@binkert.org * @return if this element is valid 3708428SN/A */ 3718428SN/A bool isValid() const { 3728844SAli.Saidi@ARM.com return cmd != MemCmd::InvalidCmd; 3738844SAli.Saidi@ARM.com } 3748844SAli.Saidi@ARM.com 3758844SAli.Saidi@ARM.com /** 3768844SAli.Saidi@ARM.com * Make this element invalid. 3778844SAli.Saidi@ARM.com */ 3788844SAli.Saidi@ARM.com void clear() { 3798844SAli.Saidi@ARM.com cmd = MemCmd::InvalidCmd; 3808844SAli.Saidi@ARM.com } 3818844SAli.Saidi@ARM.com }; 3828844SAli.Saidi@ARM.com 3838844SAli.Saidi@ARM.com /** 3848844SAli.Saidi@ARM.com * The InputStream encapsulates a trace file and the 3858844SAli.Saidi@ARM.com * internal buffers and populates TraceElements based on 3868844SAli.Saidi@ARM.com * the input. 3878844SAli.Saidi@ARM.com */ 3888844SAli.Saidi@ARM.com class InputStream 3898844SAli.Saidi@ARM.com { 3908844SAli.Saidi@ARM.com 3918844SAli.Saidi@ARM.com private: 3928844SAli.Saidi@ARM.com 3938844SAli.Saidi@ARM.com // Input file stream for the protobuf trace 3948844SAli.Saidi@ARM.com ProtoInputStream trace; 3958844SAli.Saidi@ARM.com 3968428SN/A public: 3978428SN/A 3988844SAli.Saidi@ARM.com /** 3998844SAli.Saidi@ARM.com * Create a trace input stream for a given file name. 4008844SAli.Saidi@ARM.com * 4018844SAli.Saidi@ARM.com * @param filename Path to the file to read from 4028428SN/A */ 4038844SAli.Saidi@ARM.com InputStream(const std::string& filename); 4048844SAli.Saidi@ARM.com 4058844SAli.Saidi@ARM.com /** 4068844SAli.Saidi@ARM.com * Reset the stream such that it can be played once 4078844SAli.Saidi@ARM.com * again. 4088835SAli.Saidi@ARM.com */ 4098835SAli.Saidi@ARM.com void reset(); 4108844SAli.Saidi@ARM.com 4118844SAli.Saidi@ARM.com /** 4128844SAli.Saidi@ARM.com * Attempt to read a trace element from the stream, 4138844SAli.Saidi@ARM.com * and also notify the caller if the end of the file 4148844SAli.Saidi@ARM.com * was reached. 4158844SAli.Saidi@ARM.com * 4168835SAli.Saidi@ARM.com * @param element Trace element to populate 4178835SAli.Saidi@ARM.com * @return True if an element could be read successfully 4188844SAli.Saidi@ARM.com */ 4198844SAli.Saidi@ARM.com bool read(TraceElement* element); 4208844SAli.Saidi@ARM.com }; 4218844SAli.Saidi@ARM.com 4228844SAli.Saidi@ARM.com public: 4238844SAli.Saidi@ARM.com /* Constructor */ 4248844SAli.Saidi@ARM.com FixedRetryGen(TraceCPU& _owner, const std::string& _name, 4258844SAli.Saidi@ARM.com MasterPort& _port, MasterID master_id, 4268844SAli.Saidi@ARM.com const std::string& trace_file) 4278844SAli.Saidi@ARM.com : owner(_owner), 4288844SAli.Saidi@ARM.com port(_port), 4298844SAli.Saidi@ARM.com masterID(master_id), 4308844SAli.Saidi@ARM.com trace(trace_file), 4318844SAli.Saidi@ARM.com genName(owner.name() + ".fixedretry" + _name), 4328835SAli.Saidi@ARM.com retryPkt(nullptr), 4338835SAli.Saidi@ARM.com delta(0), 4348844SAli.Saidi@ARM.com traceComplete(false) 4358844SAli.Saidi@ARM.com { 4368844SAli.Saidi@ARM.com } 4378844SAli.Saidi@ARM.com 4388844SAli.Saidi@ARM.com /** 4398835SAli.Saidi@ARM.com * Called from TraceCPU init(). Reads the first message from the 4408844SAli.Saidi@ARM.com * input trace file and returns the send tick. 4418844SAli.Saidi@ARM.com * 4428844SAli.Saidi@ARM.com * @return Tick when first packet must be sent 4438844SAli.Saidi@ARM.com */ 4448844SAli.Saidi@ARM.com Tick init(); 4458844SAli.Saidi@ARM.com 4468428SN/A /** 4478428SN/A * This tries to send current or retry packet and returns true if 4488428SN/A * successfull. It calls nextExecute() to read next message. 4498428SN/A * 4508983Snate@binkert.org * @return bool true if packet is sent successfully 4518983Snate@binkert.org */ 4528428SN/A bool tryNext(); 4538428SN/A 4548844SAli.Saidi@ARM.com /** Returns name of the FixedRetryGen instance. */ 4558844SAli.Saidi@ARM.com const std::string& name() const { return genName; } 4568844SAli.Saidi@ARM.com 4578844SAli.Saidi@ARM.com /** 4588844SAli.Saidi@ARM.com * Creates a new request assigning the request parameters passed by the 4598844SAli.Saidi@ARM.com * arguments. Calls the port's sendTimingReq() and returns true if 4608844SAli.Saidi@ARM.com * the packet was sent succesfully. It is called by tryNext() 4618844SAli.Saidi@ARM.com * 4628844SAli.Saidi@ARM.com * @param addr address of request 4638844SAli.Saidi@ARM.com * @param size size of request 4648844SAli.Saidi@ARM.com * @param cmd if it is a read or write request 4658844SAli.Saidi@ARM.com * @param flags associated request flags 4668844SAli.Saidi@ARM.com * @param pc instruction PC that generated the request 4678844SAli.Saidi@ARM.com * 4688844SAli.Saidi@ARM.com * @return true if packet was sent successfully 4698844SAli.Saidi@ARM.com */ 4708844SAli.Saidi@ARM.com bool send(Addr addr, unsigned size, const MemCmd& cmd, 4718844SAli.Saidi@ARM.com Request::FlagsType flags, Addr pc); 4728844SAli.Saidi@ARM.com 4738844SAli.Saidi@ARM.com /** Exit the FixedRetryGen. */ 4748844SAli.Saidi@ARM.com void exit(); 4758844SAli.Saidi@ARM.com 4768844SAli.Saidi@ARM.com /** 4778844SAli.Saidi@ARM.com * Reads a line of the trace file. Returns the tick 4788844SAli.Saidi@ARM.com * when the next request should be generated. If the end 4798844SAli.Saidi@ARM.com * of the file has been reached, it returns false. 4808844SAli.Saidi@ARM.com * 4818844SAli.Saidi@ARM.com * @return bool false id end of file has been reached 4828844SAli.Saidi@ARM.com */ 4838844SAli.Saidi@ARM.com bool nextExecute(); 4848844SAli.Saidi@ARM.com 4858844SAli.Saidi@ARM.com /** 4868428SN/A * Returns the traceComplete variable which is set when end of the 4878428SN/A * input trace file is reached. 4888844SAli.Saidi@ARM.com * 4898428SN/A * @return bool true if traceComplete is set, false otherwise. 4908844SAli.Saidi@ARM.com */ 4918844SAli.Saidi@ARM.com bool isTraceComplete() { return traceComplete; } 4928428SN/A 4938844SAli.Saidi@ARM.com int64_t tickDelta() { return delta; } 4948844SAli.Saidi@ARM.com 4958844SAli.Saidi@ARM.com void regStats(); 4968844SAli.Saidi@ARM.com 4978844SAli.Saidi@ARM.com private: 4988835SAli.Saidi@ARM.com 4998835SAli.Saidi@ARM.com /** Reference of the TraceCPU. */ 5008835SAli.Saidi@ARM.com TraceCPU& owner; 5018835SAli.Saidi@ARM.com 5028835SAli.Saidi@ARM.com /** Reference of the port to be used to issue memory requests. */ 5038835SAli.Saidi@ARM.com MasterPort& port; 5048844SAli.Saidi@ARM.com 5058844SAli.Saidi@ARM.com /** MasterID used for the requests being sent. */ 5068844SAli.Saidi@ARM.com const MasterID masterID; 5078844SAli.Saidi@ARM.com 5088844SAli.Saidi@ARM.com /** Input stream used for reading the input trace file. */ 5098844SAli.Saidi@ARM.com InputStream trace; 5108844SAli.Saidi@ARM.com 5118844SAli.Saidi@ARM.com /** String to store the name of the FixedRetryGen. */ 5128844SAli.Saidi@ARM.com std::string genName; 5138844SAli.Saidi@ARM.com 5148844SAli.Saidi@ARM.com /** PacketPtr used to store the packet to retry. */ 5158844SAli.Saidi@ARM.com PacketPtr retryPkt; 5168844SAli.Saidi@ARM.com 5178844SAli.Saidi@ARM.com /** 5188844SAli.Saidi@ARM.com * Stores the difference in the send ticks of the current and last 5198844SAli.Saidi@ARM.com * packets. Keeping this signed to check overflow to a negative value 5208844SAli.Saidi@ARM.com * which will be caught by assert(delta > 0) 5218844SAli.Saidi@ARM.com */ 5228844SAli.Saidi@ARM.com int64_t delta; 5238844SAli.Saidi@ARM.com 5248844SAli.Saidi@ARM.com /** 5258844SAli.Saidi@ARM.com * Set to true when end of trace is reached. 5268844SAli.Saidi@ARM.com */ 5278844SAli.Saidi@ARM.com bool traceComplete; 5288844SAli.Saidi@ARM.com 5298844SAli.Saidi@ARM.com /** Store an element read from the trace to send as the next packet. */ 5308844SAli.Saidi@ARM.com TraceElement currElement; 5318844SAli.Saidi@ARM.com 5328844SAli.Saidi@ARM.com /** Stats for instruction accesses replayed. */ 5338844SAli.Saidi@ARM.com Stats::Scalar numSendAttempted; 5348844SAli.Saidi@ARM.com Stats::Scalar numSendSucceeded; 5358844SAli.Saidi@ARM.com Stats::Scalar numSendFailed; 5368844SAli.Saidi@ARM.com Stats::Scalar numRetrySucceeded; 5378844SAli.Saidi@ARM.com /** Last simulated tick by the FixedRetryGen */ 5388835SAli.Saidi@ARM.com Stats::Scalar instLastTick; 5398835SAli.Saidi@ARM.com 5408844SAli.Saidi@ARM.com }; 5418835SAli.Saidi@ARM.com 5428844SAli.Saidi@ARM.com /** 5438835SAli.Saidi@ARM.com * The elastic data memory request generator to read protobuf trace 5448844SAli.Saidi@ARM.com * containing execution trace annotated with data and ordering 5458844SAli.Saidi@ARM.com * dependencies. It deduces the time at which to send a load/store request 5468844SAli.Saidi@ARM.com * by tracking the dependencies. It attempts to send a memory request for a 5478844SAli.Saidi@ARM.com * load/store without performing real execution of micro-ops. If L1 cache 5488844SAli.Saidi@ARM.com * port sends packet succesfully, the generator checks which instructions 5498844SAli.Saidi@ARM.com * became dependency free as a result of this and schedules an event 5508844SAli.Saidi@ARM.com * accordingly. If it fails to send the packet, it waits for a retry from 5518428SN/A * the cache. 5528428SN/A */ 5538428SN/A class ElasticDataGen 5548428SN/A { 5558983Snate@binkert.org 5568983Snate@binkert.org private: 5578428SN/A 5588428SN/A /** Node sequence number type. */ 5598844SAli.Saidi@ARM.com typedef uint64_t NodeSeqNum; 5608844SAli.Saidi@ARM.com 5618844SAli.Saidi@ARM.com /** Node ROB number type. */ 5628844SAli.Saidi@ARM.com typedef uint64_t NodeRobNum; 5638844SAli.Saidi@ARM.com 5648844SAli.Saidi@ARM.com typedef ProtoMessage::InstDepRecord::RecordType RecordType; 5658844SAli.Saidi@ARM.com typedef ProtoMessage::InstDepRecord Record; 5668844SAli.Saidi@ARM.com 5678844SAli.Saidi@ARM.com /** 5688844SAli.Saidi@ARM.com * The struct GraphNode stores an instruction in the trace file. The 5698844SAli.Saidi@ARM.com * format of the trace file favours constructing a dependency graph of 5708844SAli.Saidi@ARM.com * the execution and this struct is used to encapsulate the request 5718844SAli.Saidi@ARM.com * data as well as pointers to its dependent GraphNodes. 5728844SAli.Saidi@ARM.com */ 5738844SAli.Saidi@ARM.com class GraphNode { 5748844SAli.Saidi@ARM.com 5758844SAli.Saidi@ARM.com public: 5768844SAli.Saidi@ARM.com /** 5778844SAli.Saidi@ARM.com * The maximum no. of ROB dependencies. There can be at most 2 5788844SAli.Saidi@ARM.com * order dependencies which could exist for a store. For a load 5798844SAli.Saidi@ARM.com * and comp node there can be at most one order dependency. 5808844SAli.Saidi@ARM.com */ 5818844SAli.Saidi@ARM.com static const uint8_t maxRobDep = 2; 5828835SAli.Saidi@ARM.com 5838835SAli.Saidi@ARM.com /** Typedef for the array containing the ROB dependencies */ 5848844SAli.Saidi@ARM.com typedef std::array<NodeSeqNum, maxRobDep> RobDepArray; 5858835SAli.Saidi@ARM.com 5868844SAli.Saidi@ARM.com /** Typedef for the array containing the register dependencies */ 5878835SAli.Saidi@ARM.com typedef std::array<NodeSeqNum, TheISA::MaxInstSrcRegs> RegDepArray; 5888844SAli.Saidi@ARM.com 5898844SAli.Saidi@ARM.com /** Instruction sequence number */ 5908844SAli.Saidi@ARM.com NodeSeqNum seqNum; 5918844SAli.Saidi@ARM.com 5928844SAli.Saidi@ARM.com /** ROB occupancy number */ 5938844SAli.Saidi@ARM.com NodeRobNum robNum; 5948844SAli.Saidi@ARM.com 5958428SN/A /** Type of the node corresponding to the instruction modelled by it */ 5963096SN/A RecordType type; 5973096SN/A 598 /** The address for the request if any */ 599 Addr addr; 600 601 /** Size of request if any */ 602 uint32_t size; 603 604 /** Request flags if any */ 605 Request::Flags flags; 606 607 /** Instruction PC */ 608 Addr pc; 609 610 /** Array of order dependencies. */ 611 RobDepArray robDep; 612 613 /** Number of order dependencies */ 614 uint8_t numRobDep; 615 616 /** Computational delay */ 617 uint64_t compDelay; 618 619 /** 620 * Array of register dependencies (incoming) if any. Maximum number 621 * of source registers used to set maximum size of the array 622 */ 623 RegDepArray regDep; 624 625 /** Number of register dependencies */ 626 uint8_t numRegDep; 627 628 /** 629 * A vector of nodes dependent (outgoing) on this node. A 630 * sequential container is chosen because when dependents become 631 * free, they attempt to issue in program order. 632 */ 633 std::vector<GraphNode *> dependents; 634 635 /** Is the node a load */ 636 bool isLoad() const { return (type == Record::LOAD); } 637 638 /** Is the node a store */ 639 bool isStore() const { return (type == Record::STORE); } 640 641 /** Is the node a compute (non load/store) node */ 642 bool isComp() const { return (type == Record::COMP); } 643 644 /** Initialize register dependency array to all zeroes */ 645 void clearRegDep(); 646 647 /** Initialize register dependency array to all zeroes */ 648 void clearRobDep(); 649 650 /** Remove completed instruction from register dependency array */ 651 bool removeRegDep(NodeSeqNum reg_dep); 652 653 /** Remove completed instruction from order dependency array */ 654 bool removeRobDep(NodeSeqNum rob_dep); 655 656 /** Check for all dependencies on completed inst */ 657 bool removeDepOnInst(NodeSeqNum done_seq_num); 658 659 /** Return true if node has a request which is strictly ordered */ 660 bool isStrictlyOrdered() const { 661 return (flags.isSet(Request::STRICT_ORDER)); 662 } 663 /** 664 * Write out element in trace-compatible format using debug flag 665 * TraceCPUData. 666 */ 667 void writeElementAsTrace() const; 668 669 /** Return string specifying the type of the node */ 670 std::string typeToStr() const; 671 }; 672 673 /** Struct to store a ready-to-execute node and its execution tick. */ 674 struct ReadyNode 675 { 676 /** The sequence number of the ready node */ 677 NodeSeqNum seqNum; 678 679 /** The tick at which the ready node must be executed */ 680 Tick execTick; 681 }; 682 683 /** 684 * The HardwareResource class models structures that hold the in-flight 685 * nodes. When a node becomes dependency free, first check if resources 686 * are available to issue it. 687 */ 688 class HardwareResource 689 { 690 public: 691 /** 692 * Constructor that initializes the sizes of the structures. 693 * 694 * @param max_rob size of the Reorder Buffer 695 * @param max_stores size of Store Buffer 696 * @param max_loads size of Load Buffer 697 */ 698 HardwareResource(uint16_t max_rob, uint16_t max_stores, 699 uint16_t max_loads); 700 701 /** 702 * Occupy appropriate structures for an issued node. 703 * 704 * @param node_ptr pointer to the issued node 705 */ 706 void occupy(const GraphNode* new_node); 707 708 /** 709 * Release appropriate structures for a completed node. 710 * 711 * @param node_ptr pointer to the completed node 712 */ 713 void release(const GraphNode* done_node); 714 715 /** Release store buffer entry for a completed store */ 716 void releaseStoreBuffer(); 717 718 /** 719 * Check if structures required to issue a node are free. 720 * 721 * @param node_ptr pointer to the node ready to issue 722 * @return true if resources are available 723 */ 724 bool isAvailable(const GraphNode* new_node) const; 725 726 /** 727 * Check if there are any outstanding requests, i.e. requests for 728 * which we are yet to receive a response. 729 * 730 * @return true if there is at least one read or write request 731 * outstanding 732 */ 733 bool awaitingResponse() const; 734 735 /** Print resource occupancy for debugging */ 736 void printOccupancy(); 737 738 private: 739 /** 740 * The size of the ROB used to throttle the max. number of in-flight 741 * nodes. 742 */ 743 const uint16_t sizeROB; 744 745 /** 746 * The size of store buffer. This is used to throttle the max. number 747 * of in-flight stores. 748 */ 749 const uint16_t sizeStoreBuffer; 750 751 /** 752 * The size of load buffer. This is used to throttle the max. number 753 * of in-flight loads. 754 */ 755 const uint16_t sizeLoadBuffer; 756 757 /** 758 * A map from the sequence number to the ROB number of the in- 759 * flight nodes. This includes all nodes that are in the readyList 760 * plus the loads for which a request has been sent which are not 761 * present in the readyList. But such loads are not yet complete 762 * and thus occupy resources. We need to query the oldest in-flight 763 * node and since a map container keeps all its keys sorted using 764 * the less than criterion, the first element is the in-flight node 765 * with the least sequence number, i.e. the oldest in-flight node. 766 */ 767 std::map<NodeSeqNum, NodeRobNum> inFlightNodes; 768 769 /** The ROB number of the oldest in-flight node */ 770 NodeRobNum oldestInFlightRobNum; 771 772 /** Number of ready loads for which request may or may not be sent */ 773 uint16_t numInFlightLoads; 774 775 /** Number of ready stores for which request may or may not be sent */ 776 uint16_t numInFlightStores; 777 }; 778 779 /** 780 * The InputStream encapsulates a trace file and the 781 * internal buffers and populates GraphNodes based on 782 * the input. 783 */ 784 class InputStream 785 { 786 787 private: 788 789 /** Input file stream for the protobuf trace */ 790 ProtoInputStream trace; 791 792 /** Count of committed ops read from trace plus the filtered ops */ 793 uint64_t microOpCount; 794 795 /** 796 * The window size that is read from the header of the protobuf 797 * trace and used to process the dependency trace 798 */ 799 uint32_t windowSize; 800 public: 801 802 /** 803 * Create a trace input stream for a given file name. 804 * 805 * @param filename Path to the file to read from 806 */ 807 InputStream(const std::string& filename); 808 809 /** 810 * Reset the stream such that it can be played once 811 * again. 812 */ 813 void reset(); 814 815 /** 816 * Attempt to read a trace element from the stream, 817 * and also notify the caller if the end of the file 818 * was reached. 819 * 820 * @param element Trace element to populate 821 * @param size of register dependency array stored in the element 822 * @return True if an element could be read successfully 823 */ 824 bool read(GraphNode* element); 825 826 /** Get window size from trace */ 827 uint32_t getWindowSize() const { return windowSize; } 828 829 /** Get number of micro-ops modelled in the TraceCPU replay */ 830 uint64_t getMicroOpCount() const { return microOpCount; } 831 }; 832 833 public: 834 /* Constructor */ 835 ElasticDataGen(TraceCPU& _owner, const std::string& _name, 836 MasterPort& _port, MasterID master_id, 837 const std::string& trace_file, uint16_t max_rob, 838 uint16_t max_stores, uint16_t max_loads) 839 : owner(_owner), 840 port(_port), 841 masterID(master_id), 842 trace(trace_file), 843 genName(owner.name() + ".elastic" + _name), 844 retryPkt(nullptr), 845 traceComplete(false), 846 nextRead(false), 847 execComplete(false), 848 windowSize(trace.getWindowSize()), 849 hwResource(max_rob, max_stores, max_loads) 850 { 851 DPRINTF(TraceCPUData, "Window size in the trace is %d.\n", 852 windowSize); 853 } 854 855 /** 856 * Called from TraceCPU init(). Reads the first message from the 857 * input trace file and returns the send tick. 858 * 859 * @return Tick when first packet must be sent 860 */ 861 Tick init(); 862 863 /** Returns name of the ElasticDataGen instance. */ 864 const std::string& name() const { return genName; } 865 866 /** Exit the ElasticDataGen. */ 867 void exit(); 868 869 /** 870 * Reads a line of the trace file. Returns the tick when the next 871 * request should be generated. If the end of the file has been 872 * reached, it returns false. 873 * 874 * @return bool false if end of file has been reached else true 875 */ 876 bool readNextWindow(); 877 878 /** 879 * Iterate over the dependencies of a new node and add the new node 880 * to the list of dependents of the parent node. 881 * 882 * @param new_node new node to add to the graph 883 * @tparam dep_array the dependency array of type rob or register, 884 * that is to be iterated, and may get modified 885 * @param num_dep the number of dependencies set in the array 886 * which may get modified during iteration 887 */ 888 template<typename T> void addDepsOnParent(GraphNode *new_node, 889 T& dep_array, 890 uint8_t& num_dep); 891 892 /** 893 * This is the main execute function which consumes nodes from the 894 * sorted readyList. First attempt to issue the pending dependency-free 895 * nodes held in the depFreeQueue. Insert the ready-to-issue nodes into 896 * the readyList. Then iterate through the readyList and when a node 897 * has its execute tick equal to curTick(), execute it. If the node is 898 * a load or a store call executeMemReq() and if it is neither, simply 899 * mark it complete. 900 */ 901 void execute(); 902 903 /** 904 * Creates a new request for a load or store assigning the request 905 * parameters. Calls the port's sendTimingReq() and returns a packet 906 * if the send failed so that it can be saved for a retry. 907 * 908 * @param node_ptr pointer to the load or store node to be executed 909 * 910 * @return packet pointer if the request failed and nullptr if it was 911 * sent successfully 912 */ 913 PacketPtr executeMemReq(GraphNode* node_ptr); 914 915 /** 916 * Add a ready node to the readyList. When inserting, ensure the nodes 917 * are sorted in ascending order of their execute ticks. 918 * 919 * @param seq_num seq. num of ready node 920 * @param exec_tick the execute tick of the ready node 921 */ 922 void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick); 923 924 /** Print readyList for debugging using debug flag TraceCPUData. */ 925 void printReadyList(); 926 927 /** 928 * When a load writeback is received, that is when the load completes, 929 * release the dependents on it. This is called from the dcache port 930 * recvTimingResp(). 931 */ 932 void completeMemAccess(PacketPtr pkt); 933 934 /** 935 * Returns the execComplete variable which is set when the last 936 * node is executed. 937 * 938 * @return bool true if execComplete is set, false otherwise. 939 */ 940 bool isExecComplete() const { return execComplete; } 941 942 /** 943 * Attempts to issue a node once the node's source dependencies are 944 * complete. If resources are available then add it to the readyList, 945 * otherwise the node is not issued and is stored in depFreeQueue 946 * until resources become available. 947 * 948 * @param node_ptr pointer to node to be issued 949 * @param first true if this is the first attempt to issue this node 950 * @return true if node was added to readyList 951 */ 952 bool checkAndIssue(const GraphNode* node_ptr, bool first = true); 953 954 /** Get number of micro-ops modelled in the TraceCPU replay */ 955 uint64_t getMicroOpCount() const { return trace.getMicroOpCount(); } 956 957 void regStats(); 958 959 private: 960 961 /** Reference of the TraceCPU. */ 962 TraceCPU& owner; 963 964 /** Reference of the port to be used to issue memory requests. */ 965 MasterPort& port; 966 967 /** MasterID used for the requests being sent. */ 968 const MasterID masterID; 969 970 /** Input stream used for reading the input trace file. */ 971 InputStream trace; 972 973 /** String to store the name of the FixedRetryGen. */ 974 std::string genName; 975 976 /** PacketPtr used to store the packet to retry. */ 977 PacketPtr retryPkt; 978 979 /** Set to true when end of trace is reached. */ 980 bool traceComplete; 981 982 /** Set to true when the next window of instructions need to be read */ 983 bool nextRead; 984 985 /** Set true when execution of trace is complete */ 986 bool execComplete; 987 988 /** 989 * Window size within which to check for dependencies. Its value is 990 * made equal to the window size used to generate the trace which is 991 * recorded in the trace header. The dependency graph must be 992 * populated enough such that when a node completes, its potential 993 * child node must be found and the dependency removed before the 994 * completed node itself is removed. Thus as soon as the graph shrinks 995 * to become smaller than this window, we read in the next window. 996 */ 997 const uint32_t windowSize; 998 999 /** 1000 * Hardware resources required to contain in-flight nodes and to 1001 * throttle issuing of new nodes when resources are not available. 1002 */ 1003 HardwareResource hwResource; 1004 1005 /** Store the depGraph of GraphNodes */ 1006 std::unordered_map<NodeSeqNum, GraphNode*> depGraph; 1007 1008 /** 1009 * Queue of dependency-free nodes that are pending issue because 1010 * resources are not available. This is chosen to be FIFO so that 1011 * dependent nodes which become free in program order get pushed 1012 * into the queue in that order. Thus nodes are more likely to 1013 * issue in program order. 1014 */ 1015 std::queue<const GraphNode*> depFreeQueue; 1016 1017 /** List of nodes that are ready to execute */ 1018 std::list<ReadyNode> readyList; 1019 1020 /** Stats for data memory accesses replayed. */ 1021 Stats::Scalar maxDependents; 1022 Stats::Scalar maxReadyListSize; 1023 Stats::Scalar numSendAttempted; 1024 Stats::Scalar numSendSucceeded; 1025 Stats::Scalar numSendFailed; 1026 Stats::Scalar numRetrySucceeded; 1027 Stats::Scalar numSplitReqs; 1028 Stats::Scalar numSOLoads; 1029 Stats::Scalar numSOStores; 1030 /** Tick when ElasticDataGen completes execution */ 1031 Stats::Scalar dataLastTick; 1032 }; 1033 1034 /** Instance of FixedRetryGen to replay instruction read requests. */ 1035 FixedRetryGen icacheGen; 1036 1037 /** Instance of ElasticDataGen to replay data read and write requests. */ 1038 ElasticDataGen dcacheGen; 1039 1040 /** 1041 * This is the control flow that uses the functionality of the icacheGen to 1042 * replay the trace. It calls tryNext(). If it returns true then next event 1043 * is scheduled at curTick() plus delta. If it returns false then delta is 1044 * ignored and control is brought back via recvRetry(). 1045 */ 1046 void schedIcacheNext(); 1047 1048 /** 1049 * This is the control flow that uses the functionality of the dcacheGen to 1050 * replay the trace. It calls execute(). It checks if execution is complete 1051 * and schedules an event to exit simulation accordingly. 1052 */ 1053 void schedDcacheNext(); 1054 1055 /** Event for the control flow method schedIcacheNext() */ 1056 EventWrapper<TraceCPU, &TraceCPU::schedIcacheNext> icacheNextEvent; 1057 1058 /** Event for the control flow method schedDcacheNext() */ 1059 EventWrapper<TraceCPU, &TraceCPU::schedDcacheNext> dcacheNextEvent; 1060 1061 /** This is called when either generator finishes executing from the trace */ 1062 void checkAndSchedExitEvent(); 1063 1064 /** Set to true when one of the generators finishes replaying its trace. */ 1065 bool oneTraceComplete; 1066 1067 /** 1068 * This is stores the tick of the first instruction fetch request 1069 * which is later used for dumping the tickOffset stat. 1070 */ 1071 Tick firstFetchTick; 1072 1073 /** 1074 * Number of Trace CPUs in the system used as a shared variable and passed 1075 * to the CountedExitEvent event used for counting down exit events. It is 1076 * incremented in the constructor call so that the total is arrived at 1077 * automatically. 1078 */ 1079 static int numTraceCPUs; 1080 1081 /** 1082 * A CountedExitEvent which when serviced decrements the counter. A sim 1083 * exit event is scheduled when the counter equals zero, that is all 1084 * instances of Trace CPU have had their execCompleteEvent serviced. 1085 */ 1086 CountedExitEvent *execCompleteEvent; 1087 1088 Stats::Scalar numSchedDcacheEvent; 1089 Stats::Scalar numSchedIcacheEvent; 1090 1091 /** Stat for number of simulated micro-ops. */ 1092 Stats::Scalar numOps; 1093 /** Stat for the CPI. This is really cycles per micro-op and not inst. */ 1094 Stats::Formula cpi; 1095 1096 /** 1097 * The first execution tick is dumped as a stat so that the simulated 1098 * seconds for a trace replay can be calculated as a difference between the 1099 * final_tick stat and the tickOffset stat 1100 */ 1101 Stats::Scalar tickOffset; 1102 1103 public: 1104 1105 /** Used to get a reference to the icache port. */ 1106 MasterPort &getInstPort() { return icachePort; } 1107 1108 /** Used to get a reference to the dcache port. */ 1109 MasterPort &getDataPort() { return dcachePort; } 1110 1111 void regStats(); 1112}; 1113#endif // __CPU_TRACE_TRACE_CPU_HH__ 1114