trace_cpu.hh revision 11632:a96d6787b385
18840Sandreas.hansson@arm.com/* 28840Sandreas.hansson@arm.com * Copyright (c) 2013 - 2016 ARM Limited 38840Sandreas.hansson@arm.com * All rights reserved 48840Sandreas.hansson@arm.com * 58840Sandreas.hansson@arm.com * The license below extends only to copyright in the software and shall 68840Sandreas.hansson@arm.com * not be construed as granting a license to any other intellectual 78840Sandreas.hansson@arm.com * property including but not limited to intellectual property relating 88840Sandreas.hansson@arm.com * to a hardware implementation of the functionality of the software 98840Sandreas.hansson@arm.com * licensed hereunder. You may use the software subject to the license 108840Sandreas.hansson@arm.com * terms below provided that you ensure that this notice is replicated 118840Sandreas.hansson@arm.com * unmodified and in its entirety in all distributions of the software, 128840Sandreas.hansson@arm.com * modified or unmodified, in source code or in binary form. 132740SN/A * 147534Ssteve.reinhardt@amd.com * Redistribution and use in source and binary forms, with or without 151046SN/A * modification, are permitted provided that the following conditions are 161046SN/A * met: redistributions of source code must retain the above copyright 171046SN/A * notice, this list of conditions and the following disclaimer; 181046SN/A * redistributions in binary form must reproduce the above copyright 191046SN/A * notice, this list of conditions and the following disclaimer in the 201046SN/A * documentation and/or other materials provided with the distribution; 211046SN/A * neither the name of the copyright holders nor the names of its 221046SN/A * contributors may be used to endorse or promote products derived from 231046SN/A * this software without specific prior written permission. 241046SN/A * 251046SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 261046SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 271046SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 281046SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 291046SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 301046SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 311046SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 321046SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 331046SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 341046SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 351046SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 361046SN/A * 371046SN/A * Authors: Radhika Jagtap 381046SN/A * Andreas Hansson 392665SN/A * Thomas Grass 402665SN/A */ 412665SN/A 428840Sandreas.hansson@arm.com#ifndef __CPU_TRACE_TRACE_CPU_HH__ 431046SN/A#define __CPU_TRACE_TRACE_CPU_HH__ 445766Snate@binkert.org 458331Ssteve.reinhardt@amd.com#include <array> 461438SN/A#include <cstdint> 474762Snate@binkert.org#include <queue> 486654Snate@binkert.org#include <set> 493102Sstever@eecs.umich.edu#include <unordered_map> 503102Sstever@eecs.umich.edu 513102Sstever@eecs.umich.edu#include "arch/registers.hh" 523102Sstever@eecs.umich.edu#include "base/statistics.hh" 536654Snate@binkert.org#include "cpu/base.hh" 543102Sstever@eecs.umich.edu#include "debug/TraceCPUData.hh" 553102Sstever@eecs.umich.edu#include "debug/TraceCPUInst.hh" 567528Ssteve.reinhardt@amd.com#include "params/TraceCPU.hh" 578839Sandreas.hansson@arm.com#include "proto/inst_dep_record.pb.h" 583102Sstever@eecs.umich.edu#include "proto/packet.pb.h" 596654Snate@binkert.org#include "proto/protoio.hh" 606654Snate@binkert.org#include "sim/sim_events.hh" 61679SN/A 62679SN/A/** 63679SN/A * The trace cpu replays traces generated using the elastic trace probe 64679SN/A * attached to the O3 CPU model. The elastic trace is an execution trace with 65679SN/A * register data dependencies and ordering dependencies annotated to it. The 66679SN/A * trace cpu also replays a fixed timestamp fetch trace that is also generated 671692SN/A * by the elastic trace probe. This trace cpu model aims at achieving faster 68679SN/A * simulation compared to the detailed cpu model and good correlation when the 69679SN/A * same trace is used for playback on different memory sub-systems. 70679SN/A * 71679SN/A * The TraceCPU inherits from BaseCPU so some virtual methods need to be 72679SN/A * defined. It has two port subclasses inherited from MasterPort for 73679SN/A * instruction and data ports. It issues the memory requests deducing the 74679SN/A * timing from the trace and without performing real execution of micro-ops. As 75679SN/A * soon as the last dependency for an instruction is complete, its 76679SN/A * computational delay, also provided in the input trace is added. The 77679SN/A * dependency-free nodes are maintained in a list, called 'ReadyList', ordered 78679SN/A * by ready time. Instructions which depend on load stall until the responses 79679SN/A * for read requests are received thus achieving elastic replay. If the 80679SN/A * dependency is not found when adding a new node, it is assumed complete. 81679SN/A * Thus, if this node is found to be completely dependency-free its issue time 821692SN/A * is calculated and it is added to the ready list immediately. This is 83679SN/A * encapsulated in the subclass ElasticDataGen. 84679SN/A * 85679SN/A * If ready nodes are issued in an unconstrained way there can be more nodes 86679SN/A * outstanding which results in divergence in timing compared to the O3CPU. 87679SN/A * Therefore, the Trace CPU also models hardware resources. A sub-class to 88679SN/A * model hardware resources contains the maximum sizes of load buffer, store 89679SN/A * buffer and ROB. If resources are not available, the node is not issued. Such 90679SN/A * nodes that are pending issue are held in the 'depFreeQueue' structure. 91679SN/A * 92679SN/A * Modeling the ROB size in the Trace CPU as a resource limitation is arguably 93679SN/A * the most important parameter of all resources. The ROB occupancy is 94679SN/A * estimated using the newly added field 'robNum'. We need to use ROB number as 95679SN/A * sequence number is at times much higher due to squashing and trace replay is 96679SN/A * focused on correct path modeling. 97679SN/A * 982740SN/A * A map called 'inFlightNodes' is added to track nodes that are not only in 99679SN/A * the readyList but also load nodes that are executed (and thus removed from 100679SN/A * readyList) but are not complete. ReadyList handles what and when to execute 101679SN/A * next node while the inFlightNodes is used for resource modelling. The oldest 1024762Snate@binkert.org * ROB number is updated when any node occupies the ROB or when an entry in the 1034762Snate@binkert.org * ROB is released. The ROB occupancy is equal to the difference in the ROB 1044762Snate@binkert.org * number of the newly dependency-free node and the oldest ROB number in 1052738SN/A * flight. 1062738SN/A * 1072738SN/A * If no node depends on a non load/store node then there is no reason to 1087673Snate@binkert.org * track it in the dependency graph. We filter out such nodes but count them 1097673Snate@binkert.org * and add a weight field to the subsequent node that we do include in the 1108331Ssteve.reinhardt@amd.com * trace. The weight field is used to model ROB occupancy during replay. 1118331Ssteve.reinhardt@amd.com * 1127673Snate@binkert.org * The depFreeQueue is chosen to be FIFO so that child nodes which are in 1132740SN/A * program order get pushed into it in that order and thus issued in program 1142740SN/A * order, like in the O3CPU. This is also why the dependents is made a 1152740SN/A * sequential container, std::set to std::vector. We only check head of the 1162740SN/A * depFreeQueue as nodes are issued in order and blocking on head models that 1171692SN/A * better than looping the entire queue. An alternative choice would be to 1181427SN/A * inspect top N pending nodes where N is the issue-width. This is left for 1197493Ssteve.reinhardt@amd.com * future as the timing correlation looks good as it is. 1207493Ssteve.reinhardt@amd.com * 1217493Ssteve.reinhardt@amd.com * At the start of an execution event, first we attempt to issue such pending 1227493Ssteve.reinhardt@amd.com * nodes by checking if appropriate resources have become available. If yes, we 1231427SN/A * compute the execute tick with respect to the time then. Then we proceed to 1247493Ssteve.reinhardt@amd.com * complete nodes from the readyList. 125679SN/A * 126679SN/A * When a read response is received, sometimes a dependency on it that was 127679SN/A * supposed to be released when it was issued is still not released. This 1282740SN/A * occurs because the dependent gets added to the graph after the read was 129679SN/A * sent. So the check is made less strict and the dependency is marked complete 130679SN/A * on read response instead of insisting that it should have been removed on 1311310SN/A * read sent. 1326654Snate@binkert.org * 1334762Snate@binkert.org * There is a check for requests spanning two cache lines as this condition 1342740SN/A * triggers an assert fail in the L1 cache. If it does then truncate the size 1352740SN/A * to access only until the end of that line and ignore the remainder. 1362740SN/A * Strictly-ordered requests are skipped and the dependencies on such requests 1372740SN/A * are handled by simply marking them complete immediately. 1382740SN/A * 1392740SN/A * A CountedExitEvent that contains a static int belonging to the Trace CPU 1407673Snate@binkert.org * class as a down counter is used to implement multi Trace CPU simulation 1412740SN/A * exit. 1422740SN/A */ 1432740SN/A 1442740SN/Aclass TraceCPU : public BaseCPU 1454762Snate@binkert.org{ 1464762Snate@binkert.org 1472740SN/A public: 1484762Snate@binkert.org TraceCPU(TraceCPUParams *params); 1494762Snate@binkert.org ~TraceCPU(); 1504762Snate@binkert.org 1514762Snate@binkert.org void init(); 152679SN/A 1532711SN/A /** 154679SN/A * This is a pure virtual function in BaseCPU. As we don't know how many 1552711SN/A * insts are in the trace but only know how how many micro-ops are we 1562711SN/A * cannot count this stat. 1571692SN/A * 1581310SN/A * @return 0 1591427SN/A */ 1602740SN/A Counter totalInsts() const 1612740SN/A { 1622740SN/A return 0; 1632740SN/A } 1642740SN/A 1652740SN/A /** 1662740SN/A * Return totalOps as the number of committed micro-ops plus the 1677528Ssteve.reinhardt@amd.com * speculatively issued loads that are modelled in the TraceCPU replay. 1683105Sstever@eecs.umich.edu * 1692740SN/A * @return number of micro-ops i.e. nodes in the elastic data generator 1701310SN/A */ 1711692SN/A Counter totalOps() const 1721585SN/A { 1731692SN/A return numOps.value(); 1741692SN/A } 1751692SN/A 1761692SN/A /* 1771692SN/A * Set the no. of ops when elastic data generator completes executing a 1782740SN/A * node. 1792740SN/A */ 1802740SN/A void updateNumOps(uint64_t rob_num) { numOps = rob_num; } 1812740SN/A 1821692SN/A /* Pure virtual function in BaseCPU. Do nothing. */ 1835610Snate@binkert.org void wakeup(ThreadID tid = 0) 1841692SN/A { 1852740SN/A return; 1861692SN/A } 1877528Ssteve.reinhardt@amd.com 1883105Sstever@eecs.umich.edu /* 1892740SN/A * When resuming from checkpoint in FS mode, the TraceCPU takes over from 1902712SN/A * the old cpu. This function overrides the takeOverFrom() function in the 1915610Snate@binkert.org * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the 1925610Snate@binkert.org * TraceCPU. 1931692SN/A */ 1944762Snate@binkert.org void takeOverFrom(BaseCPU *oldCPU); 1954762Snate@binkert.org 1964762Snate@binkert.org /** 1975610Snate@binkert.org * When instruction cache port receives a retry, schedule event 1984762Snate@binkert.org * icacheNextEvent. 1995610Snate@binkert.org */ 2004859Snate@binkert.org void icacheRetryRecvd(); 2018597Ssteve.reinhardt@amd.com 2028597Ssteve.reinhardt@amd.com /** 2038597Ssteve.reinhardt@amd.com * When data cache port receives a retry, schedule event 2048597Ssteve.reinhardt@amd.com * dcacheNextEvent. 2058597Ssteve.reinhardt@amd.com */ 2068597Ssteve.reinhardt@amd.com void dcacheRetryRecvd(); 2078597Ssteve.reinhardt@amd.com 2088597Ssteve.reinhardt@amd.com /** 2098597Ssteve.reinhardt@amd.com * When data cache port receives a response, this calls the dcache 2108597Ssteve.reinhardt@amd.com * generator method handle to complete the load writeback. 2118597Ssteve.reinhardt@amd.com * 2128597Ssteve.reinhardt@amd.com * @param pkt Pointer to packet received 2138597Ssteve.reinhardt@amd.com */ 2148597Ssteve.reinhardt@amd.com void dcacheRecvTimingResp(PacketPtr pkt); 2152740SN/A 2162740SN/A /** 2172740SN/A * Schedule event dcacheNextEvent at the given tick 2182740SN/A * 2192740SN/A * @param when Tick at which to schedule event 2202740SN/A */ 2212740SN/A void schedDcacheNextEvent(Tick when); 2222740SN/A 2231527SN/A protected: 2242740SN/A 2251585SN/A /** 2261427SN/A * IcachePort class that interfaces with L1 Instruction Cache. 2272738SN/A */ 2282738SN/A class IcachePort : public MasterPort 2293105Sstever@eecs.umich.edu { 2302738SN/A public: 2311427SN/A /** Default constructor. */ 2321427SN/A IcachePort(TraceCPU* _cpu) 2331427SN/A : MasterPort(_cpu->name() + ".icache_port", _cpu), 2341427SN/A owner(_cpu) 2351427SN/A { } 2361427SN/A 2371427SN/A public: 2381427SN/A /** 2391427SN/A * Receive the timing reponse and simply delete the packet since 2401427SN/A * instruction fetch requests are issued as per the timing in the trace 2411427SN/A * and responses are ignored. 2421427SN/A * 2437493Ssteve.reinhardt@amd.com * @param pkt Pointer to packet received 2441427SN/A * @return true 2451427SN/A */ 2461427SN/A bool recvTimingResp(PacketPtr pkt); 2473100SN/A 2483100SN/A /** 2493100SN/A * Required functionally but do nothing. 2503100SN/A * 2513100SN/A * @param pkt Pointer to packet received 2523100SN/A */ 2533105Sstever@eecs.umich.edu void recvTimingSnoopReq(PacketPtr pkt) { } 2543105Sstever@eecs.umich.edu 2553105Sstever@eecs.umich.edu /** 2563105Sstever@eecs.umich.edu * Handle a retry signalled by the cache if instruction read failed in 2573105Sstever@eecs.umich.edu * the first attempt. 2588321Ssteve.reinhardt@amd.com */ 2593105Sstever@eecs.umich.edu void recvReqRetry(); 2603105Sstever@eecs.umich.edu 2613105Sstever@eecs.umich.edu private: 2623105Sstever@eecs.umich.edu TraceCPU* owner; 2633105Sstever@eecs.umich.edu }; 2648321Ssteve.reinhardt@amd.com 2658321Ssteve.reinhardt@amd.com /** 2668321Ssteve.reinhardt@amd.com * DcachePort class that interfaces with L1 Data Cache. 2678321Ssteve.reinhardt@amd.com */ 2688321Ssteve.reinhardt@amd.com class DcachePort : public MasterPort 2698321Ssteve.reinhardt@amd.com { 2708321Ssteve.reinhardt@amd.com 2718321Ssteve.reinhardt@amd.com public: 2728321Ssteve.reinhardt@amd.com /** Default constructor. */ 2738321Ssteve.reinhardt@amd.com DcachePort(TraceCPU* _cpu) 2748321Ssteve.reinhardt@amd.com : MasterPort(_cpu->name() + ".dcache_port", _cpu), 2758321Ssteve.reinhardt@amd.com owner(_cpu) 2768321Ssteve.reinhardt@amd.com { } 2778321Ssteve.reinhardt@amd.com 2783105Sstever@eecs.umich.edu public: 2793105Sstever@eecs.umich.edu 2803105Sstever@eecs.umich.edu /** 2813105Sstever@eecs.umich.edu * Receive the timing reponse and call dcacheRecvTimingResp() method 2823105Sstever@eecs.umich.edu * of the dcacheGen to handle completing the load 2833105Sstever@eecs.umich.edu * 2843105Sstever@eecs.umich.edu * @param pkt Pointer to packet received 2853105Sstever@eecs.umich.edu * @return true 2863105Sstever@eecs.umich.edu */ 2873105Sstever@eecs.umich.edu bool recvTimingResp(PacketPtr pkt); 2883105Sstever@eecs.umich.edu 2893105Sstever@eecs.umich.edu /** 2903105Sstever@eecs.umich.edu * Required functionally but do nothing. 2913105Sstever@eecs.umich.edu * 2923105Sstever@eecs.umich.edu * @param pkt Pointer to packet received 2933105Sstever@eecs.umich.edu */ 2943105Sstever@eecs.umich.edu void recvTimingSnoopReq(PacketPtr pkt) 2951585SN/A { } 2961310SN/A 2971310SN/A /** 2981310SN/A * Required functionally but do nothing. 2991310SN/A * 3007673Snate@binkert.org * @param pkt Pointer to packet received 3011310SN/A */ 3021310SN/A void recvFunctionalSnoop(PacketPtr pkt) 3031310SN/A { } 3041310SN/A 3051427SN/A /** 3061310SN/A * Handle a retry signalled by the cache if data access failed in the 3071310SN/A * first attempt. 3082738SN/A */ 3093105Sstever@eecs.umich.edu void recvReqRetry(); 3102738SN/A 3112738SN/A /** 3122740SN/A * Required functionally. 3132740SN/A * 3142740SN/A * @return true since we have to snoop 3152740SN/A */ 3162740SN/A bool isSnooping() const { return true; } 3172740SN/A 3182740SN/A private: 3193105Sstever@eecs.umich.edu TraceCPU* owner; 3201310SN/A }; 3213105Sstever@eecs.umich.edu 3223105Sstever@eecs.umich.edu /** Port to connect to L1 instruction cache. */ 3233105Sstever@eecs.umich.edu IcachePort icachePort; 3243105Sstever@eecs.umich.edu 3253105Sstever@eecs.umich.edu /** Port to connect to L1 data cache. */ 3268321Ssteve.reinhardt@amd.com DcachePort dcachePort; 3273105Sstever@eecs.umich.edu 3283105Sstever@eecs.umich.edu /** Master id for instruction read requests. */ 3293105Sstever@eecs.umich.edu const MasterID instMasterID; 3303105Sstever@eecs.umich.edu 3313105Sstever@eecs.umich.edu /** Master id for data read and write requests. */ 3321310SN/A const MasterID dataMasterID; 3331585SN/A 3347675Snate@binkert.org /** File names for input instruction and data traces. */ 3357675Snate@binkert.org std::string instTraceFile, dataTraceFile; 3367675Snate@binkert.org 3377675Snate@binkert.org /** 3387675Snate@binkert.org * Generator to read protobuf trace containing memory requests at fixed 3397675Snate@binkert.org * timestamps, perform flow control and issue memory requests. If L1 cache 3407675Snate@binkert.org * port sends packet succesfully, determine the tick to send the next 3417675Snate@binkert.org * packet else wait for retry from cache. 3427675Snate@binkert.org */ 3431692SN/A class FixedRetryGen 3441692SN/A { 3451585SN/A 3467528Ssteve.reinhardt@amd.com private: 3477528Ssteve.reinhardt@amd.com 3487528Ssteve.reinhardt@amd.com /** 3491585SN/A * This struct stores a line in the trace file. 3501585SN/A */ 3511585SN/A struct TraceElement { 3523100SN/A 3533100SN/A /** Specifies if the request is to be a read or a write */ 3543100SN/A MemCmd cmd; 3558596Ssteve.reinhardt@amd.com 3568596Ssteve.reinhardt@amd.com /** The address for the request */ 3578596Ssteve.reinhardt@amd.com Addr addr; 3588596Ssteve.reinhardt@amd.com 3598596Ssteve.reinhardt@amd.com /** The size of the access for the request */ 3608596Ssteve.reinhardt@amd.com Addr blocksize; 3618596Ssteve.reinhardt@amd.com 3628596Ssteve.reinhardt@amd.com /** The time at which the request should be sent */ 3638597Ssteve.reinhardt@amd.com Tick tick; 3648597Ssteve.reinhardt@amd.com 3658597Ssteve.reinhardt@amd.com /** Potential request flags to use */ 3668597Ssteve.reinhardt@amd.com Request::FlagsType flags; 3678597Ssteve.reinhardt@amd.com 3688597Ssteve.reinhardt@amd.com /** Instruction PC */ 3698597Ssteve.reinhardt@amd.com Addr pc; 3708597Ssteve.reinhardt@amd.com 3718597Ssteve.reinhardt@amd.com /** 3728597Ssteve.reinhardt@amd.com * Check validity of this element. 3738597Ssteve.reinhardt@amd.com * 3748597Ssteve.reinhardt@amd.com * @return if this element is valid 3758597Ssteve.reinhardt@amd.com */ 3768597Ssteve.reinhardt@amd.com bool isValid() const { 3778597Ssteve.reinhardt@amd.com return cmd != MemCmd::InvalidCmd; 3788597Ssteve.reinhardt@amd.com } 3798597Ssteve.reinhardt@amd.com 3808597Ssteve.reinhardt@amd.com /** 3818597Ssteve.reinhardt@amd.com * Make this element invalid. 3828597Ssteve.reinhardt@amd.com */ 3838597Ssteve.reinhardt@amd.com void clear() { 3848596Ssteve.reinhardt@amd.com cmd = MemCmd::InvalidCmd; 3858596Ssteve.reinhardt@amd.com } 3868596Ssteve.reinhardt@amd.com }; 3878596Ssteve.reinhardt@amd.com 3888596Ssteve.reinhardt@amd.com /** 3898596Ssteve.reinhardt@amd.com * The InputStream encapsulates a trace file and the 3908596Ssteve.reinhardt@amd.com * internal buffers and populates TraceElements based on 3918596Ssteve.reinhardt@amd.com * the input. 3928596Ssteve.reinhardt@amd.com */ 3938596Ssteve.reinhardt@amd.com class InputStream 3948596Ssteve.reinhardt@amd.com { 3958596Ssteve.reinhardt@amd.com 3968596Ssteve.reinhardt@amd.com private: 3978840Sandreas.hansson@arm.com 3988596Ssteve.reinhardt@amd.com // Input file stream for the protobuf trace 3998596Ssteve.reinhardt@amd.com ProtoInputStream trace; 4008596Ssteve.reinhardt@amd.com 4018596Ssteve.reinhardt@amd.com public: 4028596Ssteve.reinhardt@amd.com 4038596Ssteve.reinhardt@amd.com /** 4048596Ssteve.reinhardt@amd.com * Create a trace input stream for a given file name. 4058597Ssteve.reinhardt@amd.com * 4068860Sandreas.hansson@arm.com * @param filename Path to the file to read from 4078860Sandreas.hansson@arm.com */ 4088860Sandreas.hansson@arm.com InputStream(const std::string& filename); 4098860Sandreas.hansson@arm.com 4108860Sandreas.hansson@arm.com /** 4118860Sandreas.hansson@arm.com * Reset the stream such that it can be played once 4128860Sandreas.hansson@arm.com * again. 4138860Sandreas.hansson@arm.com */ 4148860Sandreas.hansson@arm.com void reset(); 4158860Sandreas.hansson@arm.com 4168860Sandreas.hansson@arm.com /** 4178860Sandreas.hansson@arm.com * Attempt to read a trace element from the stream, 4188860Sandreas.hansson@arm.com * and also notify the caller if the end of the file 4198596Ssteve.reinhardt@amd.com * was reached. 4208596Ssteve.reinhardt@amd.com * 4218596Ssteve.reinhardt@amd.com * @param element Trace element to populate 4228596Ssteve.reinhardt@amd.com * @return True if an element could be read successfully 4238596Ssteve.reinhardt@amd.com */ 4248597Ssteve.reinhardt@amd.com bool read(TraceElement* element); 4258596Ssteve.reinhardt@amd.com }; 4268596Ssteve.reinhardt@amd.com 4278596Ssteve.reinhardt@amd.com public: 4288596Ssteve.reinhardt@amd.com /* Constructor */ 4298596Ssteve.reinhardt@amd.com FixedRetryGen(TraceCPU& _owner, const std::string& _name, 4308596Ssteve.reinhardt@amd.com MasterPort& _port, MasterID master_id, 4318596Ssteve.reinhardt@amd.com const std::string& trace_file) 4328596Ssteve.reinhardt@amd.com : owner(_owner), 4338596Ssteve.reinhardt@amd.com port(_port), 4348596Ssteve.reinhardt@amd.com masterID(master_id), 4358596Ssteve.reinhardt@amd.com trace(trace_file), 4368596Ssteve.reinhardt@amd.com genName(owner.name() + ".fixedretry" + _name), 4378596Ssteve.reinhardt@amd.com retryPkt(nullptr), 4388596Ssteve.reinhardt@amd.com delta(0), 4398596Ssteve.reinhardt@amd.com traceComplete(false) 4408597Ssteve.reinhardt@amd.com { 4418597Ssteve.reinhardt@amd.com } 4428597Ssteve.reinhardt@amd.com 4438597Ssteve.reinhardt@amd.com /** 4448597Ssteve.reinhardt@amd.com * Called from TraceCPU init(). Reads the first message from the 4458597Ssteve.reinhardt@amd.com * input trace file and returns the send tick. 4468597Ssteve.reinhardt@amd.com * 4478597Ssteve.reinhardt@amd.com * @return Tick when first packet must be sent 4488597Ssteve.reinhardt@amd.com */ 4498597Ssteve.reinhardt@amd.com Tick init(); 4508596Ssteve.reinhardt@amd.com 4518596Ssteve.reinhardt@amd.com /** 4528596Ssteve.reinhardt@amd.com * This tries to send current or retry packet and returns true if 4538596Ssteve.reinhardt@amd.com * successfull. It calls nextExecute() to read next message. 4548596Ssteve.reinhardt@amd.com * 4558596Ssteve.reinhardt@amd.com * @return bool true if packet is sent successfully 4568596Ssteve.reinhardt@amd.com */ 4578596Ssteve.reinhardt@amd.com bool tryNext(); 4588596Ssteve.reinhardt@amd.com 4598596Ssteve.reinhardt@amd.com /** Returns name of the FixedRetryGen instance. */ 4608596Ssteve.reinhardt@amd.com const std::string& name() const { return genName; } 4618596Ssteve.reinhardt@amd.com 4623100SN/A /** 4633100SN/A * Creates a new request assigning the request parameters passed by the 4643100SN/A * arguments. Calls the port's sendTimingReq() and returns true if 4654762Snate@binkert.org * the packet was sent succesfully. It is called by tryNext() 4668840Sandreas.hansson@arm.com * 4673100SN/A * @param addr address of request 4683100SN/A * @param size size of request 4693100SN/A * @param cmd if it is a read or write request 4703100SN/A * @param flags associated request flags 4713100SN/A * @param pc instruction PC that generated the request 4723100SN/A * 4733100SN/A * @return true if packet was sent successfully 4747675Snate@binkert.org */ 4757675Snate@binkert.org bool send(Addr addr, unsigned size, const MemCmd& cmd, 4767675Snate@binkert.org Request::FlagsType flags, Addr pc); 4777675Snate@binkert.org 4787675Snate@binkert.org /** Exit the FixedRetryGen. */ 4797675Snate@binkert.org void exit(); 4807675Snate@binkert.org 4817675Snate@binkert.org /** 4827675Snate@binkert.org * Reads a line of the trace file. Returns the tick 4837675Snate@binkert.org * when the next request should be generated. If the end 4847675Snate@binkert.org * of the file has been reached, it returns false. 4857675Snate@binkert.org * 4867675Snate@binkert.org * @return bool false id end of file has been reached 4877675Snate@binkert.org */ 4887811Ssteve.reinhardt@amd.com bool nextExecute(); 4897675Snate@binkert.org 4907675Snate@binkert.org /** 4918597Ssteve.reinhardt@amd.com * Returns the traceComplete variable which is set when end of the 4928597Ssteve.reinhardt@amd.com * input trace file is reached. 4938597Ssteve.reinhardt@amd.com * 4948597Ssteve.reinhardt@amd.com * @return bool true if traceComplete is set, false otherwise. 4958597Ssteve.reinhardt@amd.com */ 4968597Ssteve.reinhardt@amd.com bool isTraceComplete() { return traceComplete; } 4978597Ssteve.reinhardt@amd.com 4988597Ssteve.reinhardt@amd.com int64_t tickDelta() { return delta; } 4998597Ssteve.reinhardt@amd.com 5008597Ssteve.reinhardt@amd.com void regStats(); 5018597Ssteve.reinhardt@amd.com 5028597Ssteve.reinhardt@amd.com private: 5038737Skoansin.tan@gmail.com 5048597Ssteve.reinhardt@amd.com /** Reference of the TraceCPU. */ 5057673Snate@binkert.org TraceCPU& owner; 5067673Snate@binkert.org 5078840Sandreas.hansson@arm.com /** Reference of the port to be used to issue memory requests. */ 5088840Sandreas.hansson@arm.com MasterPort& port; 5097673Snate@binkert.org 5104762Snate@binkert.org /** MasterID used for the requests being sent. */ 5115610Snate@binkert.org const MasterID masterID; 5127673Snate@binkert.org 5137673Snate@binkert.org /** Input stream used for reading the input trace file. */ 5144762Snate@binkert.org InputStream trace; 5154762Snate@binkert.org 5164762Snate@binkert.org /** String to store the name of the FixedRetryGen. */ 5177673Snate@binkert.org std::string genName; 5187673Snate@binkert.org 5194762Snate@binkert.org /** PacketPtr used to store the packet to retry. */ 5208596Ssteve.reinhardt@amd.com PacketPtr retryPkt; 5218597Ssteve.reinhardt@amd.com 5228597Ssteve.reinhardt@amd.com /** 5238597Ssteve.reinhardt@amd.com * Stores the difference in the send ticks of the current and last 5248597Ssteve.reinhardt@amd.com * packets. Keeping this signed to check overflow to a negative value 5258597Ssteve.reinhardt@amd.com * which will be caught by assert(delta > 0) 5268597Ssteve.reinhardt@amd.com */ 5278597Ssteve.reinhardt@amd.com int64_t delta; 5288597Ssteve.reinhardt@amd.com 5298597Ssteve.reinhardt@amd.com /** 5308596Ssteve.reinhardt@amd.com * Set to true when end of trace is reached. 5318597Ssteve.reinhardt@amd.com */ 5328597Ssteve.reinhardt@amd.com bool traceComplete; 5338597Ssteve.reinhardt@amd.com 5348597Ssteve.reinhardt@amd.com /** Store an element read from the trace to send as the next packet. */ 5358597Ssteve.reinhardt@amd.com TraceElement currElement; 5368597Ssteve.reinhardt@amd.com 5378597Ssteve.reinhardt@amd.com /** Stats for instruction accesses replayed. */ 5388596Ssteve.reinhardt@amd.com Stats::Scalar numSendAttempted; 5398597Ssteve.reinhardt@amd.com Stats::Scalar numSendSucceeded; 5408597Ssteve.reinhardt@amd.com Stats::Scalar numSendFailed; 5418597Ssteve.reinhardt@amd.com Stats::Scalar numRetrySucceeded; 5428597Ssteve.reinhardt@amd.com /** Last simulated tick by the FixedRetryGen */ 5438597Ssteve.reinhardt@amd.com Stats::Scalar instLastTick; 5448597Ssteve.reinhardt@amd.com 5458840Sandreas.hansson@arm.com }; 5468840Sandreas.hansson@arm.com 5478840Sandreas.hansson@arm.com /** 5488597Ssteve.reinhardt@amd.com * The elastic data memory request generator to read protobuf trace 5498597Ssteve.reinhardt@amd.com * containing execution trace annotated with data and ordering 5505488Snate@binkert.org * dependencies. It deduces the time at which to send a load/store request 5517673Snate@binkert.org * by tracking the dependencies. It attempts to send a memory request for a 5527673Snate@binkert.org * load/store without performing real execution of micro-ops. If L1 cache 5535488Snate@binkert.org * port sends packet succesfully, the generator checks which instructions 5545488Snate@binkert.org * became dependency free as a result of this and schedules an event 5555488Snate@binkert.org * accordingly. If it fails to send the packet, it waits for a retry from 5563100SN/A * the cache. 5572740SN/A */ 558679SN/A class ElasticDataGen 559679SN/A { 5601692SN/A 5611692SN/A private: 562679SN/A 5631692SN/A /** Node sequence number type. */ 5643100SN/A typedef uint64_t NodeSeqNum; 5654762Snate@binkert.org 5663100SN/A /** Node ROB number type. */ 5678597Ssteve.reinhardt@amd.com typedef uint64_t NodeRobNum; 5688597Ssteve.reinhardt@amd.com 5698597Ssteve.reinhardt@amd.com typedef ProtoMessage::InstDepRecord::RecordType RecordType; 5708597Ssteve.reinhardt@amd.com typedef ProtoMessage::InstDepRecord Record; 5718597Ssteve.reinhardt@amd.com 5728597Ssteve.reinhardt@amd.com /** 5738597Ssteve.reinhardt@amd.com * The struct GraphNode stores an instruction in the trace file. The 5748597Ssteve.reinhardt@amd.com * format of the trace file favours constructing a dependency graph of 5758597Ssteve.reinhardt@amd.com * the execution and this struct is used to encapsulate the request 5768597Ssteve.reinhardt@amd.com * data as well as pointers to its dependent GraphNodes. 5778597Ssteve.reinhardt@amd.com */ 5788597Ssteve.reinhardt@amd.com class GraphNode { 5798597Ssteve.reinhardt@amd.com 5808597Ssteve.reinhardt@amd.com public: 5818597Ssteve.reinhardt@amd.com /** 5828597Ssteve.reinhardt@amd.com * The maximum no. of ROB dependencies. There can be at most 2 5838597Ssteve.reinhardt@amd.com * order dependencies which could exist for a store. For a load 5848597Ssteve.reinhardt@amd.com * and comp node there can be at most one order dependency. 5858597Ssteve.reinhardt@amd.com */ 5868597Ssteve.reinhardt@amd.com static const uint8_t maxRobDep = 2; 5878597Ssteve.reinhardt@amd.com 5888597Ssteve.reinhardt@amd.com /** Typedef for the array containing the ROB dependencies */ 5898597Ssteve.reinhardt@amd.com typedef std::array<NodeSeqNum, maxRobDep> RobDepArray; 5908597Ssteve.reinhardt@amd.com 5918597Ssteve.reinhardt@amd.com /** Typedef for the array containing the register dependencies */ 5928597Ssteve.reinhardt@amd.com typedef std::array<NodeSeqNum, TheISA::MaxInstSrcRegs> RegDepArray; 5938597Ssteve.reinhardt@amd.com 5948597Ssteve.reinhardt@amd.com /** Instruction sequence number */ 5958597Ssteve.reinhardt@amd.com NodeSeqNum seqNum; 5968597Ssteve.reinhardt@amd.com 5978597Ssteve.reinhardt@amd.com /** ROB occupancy number */ 5988597Ssteve.reinhardt@amd.com NodeRobNum robNum; 5998597Ssteve.reinhardt@amd.com 6008597Ssteve.reinhardt@amd.com /** Type of the node corresponding to the instruction modelled by it */ 6018597Ssteve.reinhardt@amd.com RecordType type; 6028597Ssteve.reinhardt@amd.com 6038597Ssteve.reinhardt@amd.com /** The address for the request if any */ 6048597Ssteve.reinhardt@amd.com Addr physAddr; 6052740SN/A 6062740SN/A /** The virtual address for the request if any */ 6072740SN/A Addr virtAddr; 6082740SN/A 6092740SN/A /** The address space id which is set if the virtual address is set */ 6102740SN/A uint32_t asid; 6112740SN/A 6122740SN/A /** Size of request if any */ 6132740SN/A uint32_t size; 6142740SN/A 6152740SN/A /** Request flags if any */ 6162740SN/A Request::Flags flags; 6172740SN/A 6182740SN/A /** Instruction PC */ 6192740SN/A Addr pc; 6202740SN/A 6212711SN/A /** Array of order dependencies. */ 6222740SN/A RobDepArray robDep; 6232740SN/A 6242740SN/A /** Number of order dependencies */ 6252711SN/A uint8_t numRobDep; 6262740SN/A 6272740SN/A /** Computational delay */ 6287528Ssteve.reinhardt@amd.com uint64_t compDelay; 6292740SN/A 6304762Snate@binkert.org /** 6312740SN/A * Array of register dependencies (incoming) if any. Maximum number 6322712SN/A * of source registers used to set maximum size of the array 6338321Ssteve.reinhardt@amd.com */ 6348321Ssteve.reinhardt@amd.com RegDepArray regDep; 6358321Ssteve.reinhardt@amd.com 6368321Ssteve.reinhardt@amd.com /** Number of register dependencies */ 6378321Ssteve.reinhardt@amd.com uint8_t numRegDep; 6388321Ssteve.reinhardt@amd.com 6398321Ssteve.reinhardt@amd.com /** 6408321Ssteve.reinhardt@amd.com * A vector of nodes dependent (outgoing) on this node. A 6412711SN/A * sequential container is chosen because when dependents become 6427528Ssteve.reinhardt@amd.com * free, they attempt to issue in program order. 6437528Ssteve.reinhardt@amd.com */ 6442740SN/A std::vector<GraphNode *> dependents; 6452740SN/A 6462740SN/A /** Is the node a load */ 6477528Ssteve.reinhardt@amd.com bool isLoad() const { return (type == Record::LOAD); } 6487528Ssteve.reinhardt@amd.com 6497528Ssteve.reinhardt@amd.com /** Is the node a store */ 6507528Ssteve.reinhardt@amd.com bool isStore() const { return (type == Record::STORE); } 6512740SN/A 6522740SN/A /** Is the node a compute (non load/store) node */ 6533105Sstever@eecs.umich.edu bool isComp() const { return (type == Record::COMP); } 6543105Sstever@eecs.umich.edu 6553105Sstever@eecs.umich.edu /** Initialize register dependency array to all zeroes */ 6561692SN/A void clearRegDep(); 6571692SN/A 6581692SN/A /** Initialize register dependency array to all zeroes */ 659679SN/A void clearRobDep(); 6602740SN/A 6612740SN/A /** Remove completed instruction from register dependency array */ 6622740SN/A bool removeRegDep(NodeSeqNum reg_dep); 6632740SN/A 6642740SN/A /** Remove completed instruction from order dependency array */ 6651692SN/A bool removeRobDep(NodeSeqNum rob_dep); 6662740SN/A 6672740SN/A /** Check for all dependencies on completed inst */ 6682740SN/A bool removeDepOnInst(NodeSeqNum done_seq_num); 6692740SN/A 6702740SN/A /** Return true if node has a request which is strictly ordered */ 6712740SN/A bool isStrictlyOrdered() const { 6722740SN/A return (flags.isSet(Request::STRICT_ORDER)); 6732740SN/A } 6742740SN/A /** 6752740SN/A * Write out element in trace-compatible format using debug flag 6762740SN/A * TraceCPUData. 6772740SN/A */ 6782740SN/A void writeElementAsTrace() const; 6792740SN/A 6802740SN/A /** Return string specifying the type of the node */ 6811343SN/A std::string typeToStr() const; 6823105Sstever@eecs.umich.edu }; 6833105Sstever@eecs.umich.edu 6843105Sstever@eecs.umich.edu /** Struct to store a ready-to-execute node and its execution tick. */ 6853105Sstever@eecs.umich.edu struct ReadyNode 6863105Sstever@eecs.umich.edu { 6873105Sstever@eecs.umich.edu /** The sequence number of the ready node */ 6883105Sstever@eecs.umich.edu NodeSeqNum seqNum; 6893105Sstever@eecs.umich.edu 6903105Sstever@eecs.umich.edu /** The tick at which the ready node must be executed */ 6913105Sstever@eecs.umich.edu Tick execTick; 6921692SN/A }; 6932738SN/A 6943105Sstever@eecs.umich.edu /** 6952738SN/A * The HardwareResource class models structures that hold the in-flight 6961692SN/A * nodes. When a node becomes dependency free, first check if resources 6971692SN/A * are available to issue it. 6981427SN/A */ 6997528Ssteve.reinhardt@amd.com class HardwareResource 7007528Ssteve.reinhardt@amd.com { 7017528Ssteve.reinhardt@amd.com public: 7027500Ssteve.reinhardt@amd.com /** 7037500Ssteve.reinhardt@amd.com * Constructor that initializes the sizes of the structures. 7047500Ssteve.reinhardt@amd.com * 7057527Ssteve.reinhardt@amd.com * @param max_rob size of the Reorder Buffer 7067527Ssteve.reinhardt@amd.com * @param max_stores size of Store Buffer 7077500Ssteve.reinhardt@amd.com * @param max_loads size of Load Buffer 7087500Ssteve.reinhardt@amd.com */ 7097500Ssteve.reinhardt@amd.com HardwareResource(uint16_t max_rob, uint16_t max_stores, 7101692SN/A uint16_t max_loads); 7111692SN/A 7121427SN/A /** 7131692SN/A * Occupy appropriate structures for an issued node. 7141692SN/A * 7151692SN/A * @param node_ptr pointer to the issued node 7161692SN/A */ 7171692SN/A void occupy(const GraphNode* new_node); 7181692SN/A 7191692SN/A /** 7201427SN/A * Release appropriate structures for a completed node. 7212738SN/A * 7222738SN/A * @param node_ptr pointer to the completed node 7233105Sstever@eecs.umich.edu */ 7242738SN/A void release(const GraphNode* done_node); 7252738SN/A 7262740SN/A /** Release store buffer entry for a completed store */ 7272740SN/A void releaseStoreBuffer(); 7282740SN/A 7292740SN/A /** 7302740SN/A * Check if structures required to issue a node are free. 7313105Sstever@eecs.umich.edu * 7321692SN/A * @param node_ptr pointer to the node ready to issue 7331310SN/A * @return true if resources are available 7341692SN/A */ 7351587SN/A bool isAvailable(const GraphNode* new_node) const; 7361692SN/A 7371692SN/A /** 7381605SN/A * Check if there are any outstanding requests, i.e. requests for 7391605SN/A * which we are yet to receive a response. 7407528Ssteve.reinhardt@amd.com * 7418321Ssteve.reinhardt@amd.com * @return true if there is at least one read or write request 7428321Ssteve.reinhardt@amd.com * outstanding 7438321Ssteve.reinhardt@amd.com */ 7443105Sstever@eecs.umich.edu bool awaitingResponse() const; 7451310SN/A 7467528Ssteve.reinhardt@amd.com /** Print resource occupancy for debugging */ 7473105Sstever@eecs.umich.edu void printOccupancy(); 7487528Ssteve.reinhardt@amd.com 7493105Sstever@eecs.umich.edu private: 7501693SN/A /** 7513105Sstever@eecs.umich.edu * The size of the ROB used to throttle the max. number of in-flight 7523105Sstever@eecs.umich.edu * nodes. 7533105Sstever@eecs.umich.edu */ 7541310SN/A const uint16_t sizeROB; 7551310SN/A 7561692SN/A /** 7571692SN/A * The size of store buffer. This is used to throttle the max. number 7581692SN/A * of in-flight stores. 7591692SN/A */ 7601692SN/A const uint16_t sizeStoreBuffer; 7611692SN/A 7621310SN/A /** 7637528Ssteve.reinhardt@amd.com * The size of load buffer. This is used to throttle the max. number 7647528Ssteve.reinhardt@amd.com * of in-flight loads. 7657528Ssteve.reinhardt@amd.com */ 7667528Ssteve.reinhardt@amd.com const uint16_t sizeLoadBuffer; 7677528Ssteve.reinhardt@amd.com 7687528Ssteve.reinhardt@amd.com /** 7697528Ssteve.reinhardt@amd.com * A map from the sequence number to the ROB number of the in- 7707528Ssteve.reinhardt@amd.com * flight nodes. This includes all nodes that are in the readyList 7717528Ssteve.reinhardt@amd.com * plus the loads for which a request has been sent which are not 7727528Ssteve.reinhardt@amd.com * present in the readyList. But such loads are not yet complete 7737528Ssteve.reinhardt@amd.com * and thus occupy resources. We need to query the oldest in-flight 7747528Ssteve.reinhardt@amd.com * node and since a map container keeps all its keys sorted using 7757528Ssteve.reinhardt@amd.com * the less than criterion, the first element is the in-flight node 7767528Ssteve.reinhardt@amd.com * with the least sequence number, i.e. the oldest in-flight node. 7778321Ssteve.reinhardt@amd.com */ 7788321Ssteve.reinhardt@amd.com std::map<NodeSeqNum, NodeRobNum> inFlightNodes; 7798321Ssteve.reinhardt@amd.com 7807528Ssteve.reinhardt@amd.com /** The ROB number of the oldest in-flight node */ 7817742Sgblack@eecs.umich.edu NodeRobNum oldestInFlightRobNum; 7827742Sgblack@eecs.umich.edu 7831693SN/A /** Number of ready loads for which request may or may not be sent */ 7841693SN/A uint16_t numInFlightLoads; 7857528Ssteve.reinhardt@amd.com 7861693SN/A /** Number of ready stores for which request may or may not be sent */ 7871693SN/A uint16_t numInFlightStores; 7887528Ssteve.reinhardt@amd.com }; 7897528Ssteve.reinhardt@amd.com 7907528Ssteve.reinhardt@amd.com /** 7918321Ssteve.reinhardt@amd.com * The InputStream encapsulates a trace file and the 7928321Ssteve.reinhardt@amd.com * internal buffers and populates GraphNodes based on 7938321Ssteve.reinhardt@amd.com * the input. 7947528Ssteve.reinhardt@amd.com */ 7957742Sgblack@eecs.umich.edu class InputStream 7967742Sgblack@eecs.umich.edu { 7977742Sgblack@eecs.umich.edu 7987742Sgblack@eecs.umich.edu private: 7997738Sgblack@eecs.umich.edu 8007528Ssteve.reinhardt@amd.com /** Input file stream for the protobuf trace */ 8017528Ssteve.reinhardt@amd.com ProtoInputStream trace; 8021310SN/A 8037528Ssteve.reinhardt@amd.com /** 8047528Ssteve.reinhardt@amd.com * A multiplier for the compute delays in the trace to modulate 8057528Ssteve.reinhardt@amd.com * the Trace CPU frequency either up or down. The Trace CPU's 8067528Ssteve.reinhardt@amd.com * clock domain frequency must also be set to match the expected 8077528Ssteve.reinhardt@amd.com * result of frequency scaling. 8087528Ssteve.reinhardt@amd.com */ 8097528Ssteve.reinhardt@amd.com const double timeMultiplier; 8107528Ssteve.reinhardt@amd.com 8117528Ssteve.reinhardt@amd.com /** Count of committed ops read from trace plus the filtered ops */ 8128321Ssteve.reinhardt@amd.com uint64_t microOpCount; 8137528Ssteve.reinhardt@amd.com 8147528Ssteve.reinhardt@amd.com /** 8158321Ssteve.reinhardt@amd.com * The window size that is read from the header of the protobuf 8168321Ssteve.reinhardt@amd.com * trace and used to process the dependency trace 8178321Ssteve.reinhardt@amd.com */ 8187528Ssteve.reinhardt@amd.com uint32_t windowSize; 8193105Sstever@eecs.umich.edu public: 8201692SN/A 8212740SN/A /** 8228321Ssteve.reinhardt@amd.com * Create a trace input stream for a given file name. 8231692SN/A * 8241692SN/A * @param filename Path to the file to read from 8251692SN/A * @param time_multiplier used to scale the compute delays 8261692SN/A */ 8271310SN/A InputStream(const std::string& filename, 8281692SN/A const double time_multiplier); 8291692SN/A 8301310SN/A /** 8311692SN/A * Reset the stream such that it can be played once 8321692SN/A * again. 8331310SN/A */ 8341692SN/A void reset(); 8351692SN/A 8361692SN/A /** 8371310SN/A * Attempt to read a trace element from the stream, 8381692SN/A * and also notify the caller if the end of the file 8391692SN/A * was reached. 8401692SN/A * 8411692SN/A * @param element Trace element to populate 8421692SN/A * @param size of register dependency array stored in the element 8431692SN/A * @return True if an element could be read successfully 8441814SN/A */ 8451692SN/A bool read(GraphNode* element); 8461692SN/A 8471692SN/A /** Get window size from trace */ 8481692SN/A uint32_t getWindowSize() const { return windowSize; } 8491692SN/A 8501692SN/A /** Get number of micro-ops modelled in the TraceCPU replay */ 8511692SN/A uint64_t getMicroOpCount() const { return microOpCount; } 8525952Ssaidi@eecs.umich.edu }; 8531692SN/A 8541692SN/A public: 8551692SN/A /* Constructor */ 8568459SAli.Saidi@ARM.com ElasticDataGen(TraceCPU& _owner, const std::string& _name, 8578459SAli.Saidi@ARM.com MasterPort& _port, MasterID master_id, 8588459SAli.Saidi@ARM.com const std::string& trace_file, TraceCPUParams *params) 8598459SAli.Saidi@ARM.com : owner(_owner), 8608459SAli.Saidi@ARM.com port(_port), 8618459SAli.Saidi@ARM.com masterID(master_id), 8628459SAli.Saidi@ARM.com trace(trace_file, 1.0 / params->freqMultiplier), 8638927Sandreas.hansson@arm.com genName(owner.name() + ".elastic" + _name), 8648927Sandreas.hansson@arm.com retryPkt(nullptr), 8658927Sandreas.hansson@arm.com traceComplete(false), 8668927Sandreas.hansson@arm.com nextRead(false), 8678459SAli.Saidi@ARM.com execComplete(false), 8688459SAli.Saidi@ARM.com windowSize(trace.getWindowSize()), 8698459SAli.Saidi@ARM.com hwResource(params->sizeROB, params->sizeStoreBuffer, 8708459SAli.Saidi@ARM.com params->sizeLoadBuffer) 8718459SAli.Saidi@ARM.com { 8728459SAli.Saidi@ARM.com DPRINTF(TraceCPUData, "Window size in the trace is %d.\n", 8738459SAli.Saidi@ARM.com windowSize); 8748459SAli.Saidi@ARM.com } 8751815SN/A 8761815SN/A /** 8771815SN/A * Called from TraceCPU init(). Reads the first message from the 8787527Ssteve.reinhardt@amd.com * input trace file and returns the send tick. 8793105Sstever@eecs.umich.edu * 8803105Sstever@eecs.umich.edu * @return Tick when first packet must be sent 8816654Snate@binkert.org */ 8823105Sstever@eecs.umich.edu Tick init(); 8833105Sstever@eecs.umich.edu 8843105Sstever@eecs.umich.edu /** 8853105Sstever@eecs.umich.edu * Adjust traceOffset based on what TraceCPU init() determines on 8863105Sstever@eecs.umich.edu * comparing the offsets in the fetch request and elastic traces. 8873105Sstever@eecs.umich.edu * 8883105Sstever@eecs.umich.edu * @param trace_offset trace offset set by comparing both traces 8893105Sstever@eecs.umich.edu */ 8903107Sstever@eecs.umich.edu void adjustInitTraceOffset(Tick& offset); 8913107Sstever@eecs.umich.edu 8923107Sstever@eecs.umich.edu /** Returns name of the ElasticDataGen instance. */ 8933107Sstever@eecs.umich.edu const std::string& name() const { return genName; } 8943107Sstever@eecs.umich.edu 8953105Sstever@eecs.umich.edu /** Exit the ElasticDataGen. */ 8963105Sstever@eecs.umich.edu void exit(); 8973105Sstever@eecs.umich.edu 8983105Sstever@eecs.umich.edu /** 8995037Smilesck@eecs.umich.edu * Reads a line of the trace file. Returns the tick when the next 9005543Ssaidi@eecs.umich.edu * request should be generated. If the end of the file has been 9011692SN/A * reached, it returns false. 9022738SN/A * 9032738SN/A * @return bool false if end of file has been reached else true 9044081Sbinkertn@umich.edu */ 9055037Smilesck@eecs.umich.edu bool readNextWindow(); 9061692SN/A 9078664SAli.Saidi@ARM.com /** 9087528Ssteve.reinhardt@amd.com * Iterate over the dependencies of a new node and add the new node 9098664SAli.Saidi@ARM.com * to the list of dependents of the parent node. 9108664SAli.Saidi@ARM.com * 9111692SN/A * @param new_node new node to add to the graph 9128664SAli.Saidi@ARM.com * @tparam dep_array the dependency array of type rob or register, 9133105Sstever@eecs.umich.edu * that is to be iterated, and may get modified 9141692SN/A * @param num_dep the number of dependencies set in the array 9155037Smilesck@eecs.umich.edu * which may get modified during iteration 9165037Smilesck@eecs.umich.edu */ 9171692SN/A template<typename T> void addDepsOnParent(GraphNode *new_node, 9188664SAli.Saidi@ARM.com T& dep_array, 9193105Sstever@eecs.umich.edu uint8_t& num_dep); 9203105Sstever@eecs.umich.edu 9215037Smilesck@eecs.umich.edu /** 9223103Sstever@eecs.umich.edu * This is the main execute function which consumes nodes from the 9235543Ssaidi@eecs.umich.edu * sorted readyList. First attempt to issue the pending dependency-free 9241692SN/A * nodes held in the depFreeQueue. Insert the ready-to-issue nodes into 9258664SAli.Saidi@ARM.com * the readyList. Then iterate through the readyList and when a node 9268664SAli.Saidi@ARM.com * has its execute tick equal to curTick(), execute it. If the node is 9278664SAli.Saidi@ARM.com * a load or a store call executeMemReq() and if it is neither, simply 9288664SAli.Saidi@ARM.com * mark it complete. 9298664SAli.Saidi@ARM.com */ 9308664SAli.Saidi@ARM.com void execute(); 9318664SAli.Saidi@ARM.com 9328664SAli.Saidi@ARM.com /** 9338664SAli.Saidi@ARM.com * Creates a new request for a load or store assigning the request 9349017Sandreas.hansson@arm.com * parameters. Calls the port's sendTimingReq() and returns a packet 9359017Sandreas.hansson@arm.com * if the send failed so that it can be saved for a retry. 9369017Sandreas.hansson@arm.com * 9379017Sandreas.hansson@arm.com * @param node_ptr pointer to the load or store node to be executed 9388664SAli.Saidi@ARM.com * 9398664SAli.Saidi@ARM.com * @return packet pointer if the request failed and nullptr if it was 9408664SAli.Saidi@ARM.com * sent successfully 9418848Ssteve.reinhardt@amd.com */ 9428848Ssteve.reinhardt@amd.com PacketPtr executeMemReq(GraphNode* node_ptr); 9438848Ssteve.reinhardt@amd.com 9448848Ssteve.reinhardt@amd.com /** 9458848Ssteve.reinhardt@amd.com * Add a ready node to the readyList. When inserting, ensure the nodes 9468848Ssteve.reinhardt@amd.com * are sorted in ascending order of their execute ticks. 9478848Ssteve.reinhardt@amd.com * 9488848Ssteve.reinhardt@amd.com * @param seq_num seq. num of ready node 9498848Ssteve.reinhardt@amd.com * @param exec_tick the execute tick of the ready node 9508848Ssteve.reinhardt@amd.com */ 9518669Ssaidi@eecs.umich.edu void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick); 9528848Ssteve.reinhardt@amd.com 9538848Ssteve.reinhardt@amd.com /** Print readyList for debugging using debug flag TraceCPUData. */ 9548664SAli.Saidi@ARM.com void printReadyList(); 9558664SAli.Saidi@ARM.com 9569017Sandreas.hansson@arm.com /** 9579017Sandreas.hansson@arm.com * When a load writeback is received, that is when the load completes, 9589017Sandreas.hansson@arm.com * release the dependents on it. This is called from the dcache port 9599017Sandreas.hansson@arm.com * recvTimingResp(). 9609017Sandreas.hansson@arm.com */ 9618664SAli.Saidi@ARM.com void completeMemAccess(PacketPtr pkt); 9628664SAli.Saidi@ARM.com 9638664SAli.Saidi@ARM.com /** 9648664SAli.Saidi@ARM.com * Returns the execComplete variable which is set when the last 9659017Sandreas.hansson@arm.com * node is executed. 9669017Sandreas.hansson@arm.com * 9679017Sandreas.hansson@arm.com * @return bool true if execComplete is set, false otherwise. 9688664SAli.Saidi@ARM.com */ 9698664SAli.Saidi@ARM.com bool isExecComplete() const { return execComplete; } 9708664SAli.Saidi@ARM.com 9714762Snate@binkert.org /** 9724762Snate@binkert.org * Attempts to issue a node once the node's source dependencies are 9734762Snate@binkert.org * complete. If resources are available then add it to the readyList, 9744762Snate@binkert.org * otherwise the node is not issued and is stored in depFreeQueue 9757677Snate@binkert.org * until resources become available. 9764762Snate@binkert.org * 9775488Snate@binkert.org * @param node_ptr pointer to node to be issued 9784762Snate@binkert.org * @param first true if this is the first attempt to issue this node 9794762Snate@binkert.org * @return true if node was added to readyList 9804762Snate@binkert.org */ 9814762Snate@binkert.org bool checkAndIssue(const GraphNode* node_ptr, bool first = true); 9824762Snate@binkert.org 9834762Snate@binkert.org /** Get number of micro-ops modelled in the TraceCPU replay */ 9844762Snate@binkert.org uint64_t getMicroOpCount() const { return trace.getMicroOpCount(); } 9856654Snate@binkert.org 9866654Snate@binkert.org void regStats(); 9874762Snate@binkert.org 9884762Snate@binkert.org private: 9894762Snate@binkert.org 9904762Snate@binkert.org /** Reference of the TraceCPU. */ 9914762Snate@binkert.org TraceCPU& owner; 9924762Snate@binkert.org 9934762Snate@binkert.org /** Reference of the port to be used to issue memory requests. */ 9944762Snate@binkert.org MasterPort& port; 9954762Snate@binkert.org 9964762Snate@binkert.org /** MasterID used for the requests being sent. */ 9974762Snate@binkert.org const MasterID masterID; 9984762Snate@binkert.org 9994762Snate@binkert.org /** Input stream used for reading the input trace file. */ 10004762Snate@binkert.org InputStream trace; 10014762Snate@binkert.org 10028912Sandreas.hansson@arm.com /** String to store the name of the FixedRetryGen. */ 10038912Sandreas.hansson@arm.com std::string genName; 10048912Sandreas.hansson@arm.com 10058912Sandreas.hansson@arm.com /** PacketPtr used to store the packet to retry. */ 10068900Sandreas.hansson@arm.com PacketPtr retryPkt; 10078912Sandreas.hansson@arm.com 10084762Snate@binkert.org /** Set to true when end of trace is reached. */ 10094762Snate@binkert.org bool traceComplete; 10102738SN/A 10112740SN/A /** Set to true when the next window of instructions need to be read */ 10122740SN/A bool nextRead; 10132740SN/A 10142740SN/A /** Set true when execution of trace is complete */ 10152740SN/A bool execComplete; 10167526Ssteve.reinhardt@amd.com 10177526Ssteve.reinhardt@amd.com /** 10187526Ssteve.reinhardt@amd.com * Window size within which to check for dependencies. Its value is 10197526Ssteve.reinhardt@amd.com * made equal to the window size used to generate the trace which is 10205244Sgblack@eecs.umich.edu * recorded in the trace header. The dependency graph must be 10215244Sgblack@eecs.umich.edu * populated enough such that when a node completes, its potential 10225244Sgblack@eecs.umich.edu * child node must be found and the dependency removed before the 10234762Snate@binkert.org * completed node itself is removed. Thus as soon as the graph shrinks 10242740SN/A * to become smaller than this window, we read in the next window. 10257526Ssteve.reinhardt@amd.com */ 10262740SN/A const uint32_t windowSize; 10272740SN/A 10282740SN/A /** 10297527Ssteve.reinhardt@amd.com * Hardware resources required to contain in-flight nodes and to 10307527Ssteve.reinhardt@amd.com * throttle issuing of new nodes when resources are not available. 10317527Ssteve.reinhardt@amd.com */ 10327527Ssteve.reinhardt@amd.com HardwareResource hwResource; 10337527Ssteve.reinhardt@amd.com 10347527Ssteve.reinhardt@amd.com /** Store the depGraph of GraphNodes */ 10357527Ssteve.reinhardt@amd.com std::unordered_map<NodeSeqNum, GraphNode*> depGraph; 10364762Snate@binkert.org 10374762Snate@binkert.org /** 10384762Snate@binkert.org * Queue of dependency-free nodes that are pending issue because 10394762Snate@binkert.org * resources are not available. This is chosen to be FIFO so that 10404762Snate@binkert.org * dependent nodes which become free in program order get pushed 10414762Snate@binkert.org * into the queue in that order. Thus nodes are more likely to 10424762Snate@binkert.org * issue in program order. 10432738SN/A */ 10447527Ssteve.reinhardt@amd.com std::queue<const GraphNode*> depFreeQueue; 10452738SN/A 10463105Sstever@eecs.umich.edu /** List of nodes that are ready to execute */ 10473105Sstever@eecs.umich.edu std::list<ReadyNode> readyList; 10482797SN/A 10494553Sbinkertn@umich.edu /** Stats for data memory accesses replayed. */ 10504553Sbinkertn@umich.edu Stats::Scalar maxDependents; 10514553Sbinkertn@umich.edu Stats::Scalar maxReadyListSize; 10524553Sbinkertn@umich.edu Stats::Scalar numSendAttempted; 10534859Snate@binkert.org Stats::Scalar numSendSucceeded; 10544553Sbinkertn@umich.edu Stats::Scalar numSendFailed; 10552797SN/A Stats::Scalar numRetrySucceeded; 10563202Shsul@eecs.umich.edu Stats::Scalar numSplitReqs; 10573202Shsul@eecs.umich.edu Stats::Scalar numSOLoads; 10583202Shsul@eecs.umich.edu Stats::Scalar numSOStores; 10593202Shsul@eecs.umich.edu /** Tick when ElasticDataGen completes execution */ 10604859Snate@binkert.org Stats::Scalar dataLastTick; 10612797SN/A }; 10622797SN/A 10634859Snate@binkert.org /** Instance of FixedRetryGen to replay instruction read requests. */ 10642797SN/A FixedRetryGen icacheGen; 10653101Sstever@eecs.umich.edu 10663101Sstever@eecs.umich.edu /** Instance of ElasticDataGen to replay data read and write requests. */ 10673101Sstever@eecs.umich.edu ElasticDataGen dcacheGen; 10683101Sstever@eecs.umich.edu 1069679SN/A /** 10706654Snate@binkert.org * This is the control flow that uses the functionality of the icacheGen to 10716654Snate@binkert.org * replay the trace. It calls tryNext(). If it returns true then next event 10726654Snate@binkert.org * is scheduled at curTick() plus delta. If it returns false then delta is 10736654Snate@binkert.org * ignored and control is brought back via recvRetry(). 10746654Snate@binkert.org */ 10756654Snate@binkert.org void schedIcacheNext(); 10767528Ssteve.reinhardt@amd.com 10777528Ssteve.reinhardt@amd.com /** 10787528Ssteve.reinhardt@amd.com * This is the control flow that uses the functionality of the dcacheGen to 10796654Snate@binkert.org * replay the trace. It calls execute(). It checks if execution is complete 10806654Snate@binkert.org * and schedules an event to exit simulation accordingly. 10816654Snate@binkert.org */ 10826654Snate@binkert.org void schedDcacheNext(); 10836654Snate@binkert.org 10846654Snate@binkert.org /** Event for the control flow method schedIcacheNext() */ 10856654Snate@binkert.org EventWrapper<TraceCPU, &TraceCPU::schedIcacheNext> icacheNextEvent; 10866654Snate@binkert.org 10876654Snate@binkert.org /** Event for the control flow method schedDcacheNext() */ 10886654Snate@binkert.org EventWrapper<TraceCPU, &TraceCPU::schedDcacheNext> dcacheNextEvent; 10896654Snate@binkert.org 10906654Snate@binkert.org /** This is called when either generator finishes executing from the trace */ 10916654Snate@binkert.org void checkAndSchedExitEvent(); 10927526Ssteve.reinhardt@amd.com 10937526Ssteve.reinhardt@amd.com /** Set to true when one of the generators finishes replaying its trace. */ 10947526Ssteve.reinhardt@amd.com bool oneTraceComplete; 10957526Ssteve.reinhardt@amd.com 10967528Ssteve.reinhardt@amd.com /** 10977528Ssteve.reinhardt@amd.com * This stores the time offset in the trace, which is taken away from 10987528Ssteve.reinhardt@amd.com * the ready times of requests. This is specially useful because the time 10997528Ssteve.reinhardt@amd.com * offset can be very large if the traces are generated from the middle of 11007528Ssteve.reinhardt@amd.com * a program. 11017528Ssteve.reinhardt@amd.com */ 11027528Ssteve.reinhardt@amd.com Tick traceOffset; 11037528Ssteve.reinhardt@amd.com 11047528Ssteve.reinhardt@amd.com /** 11057528Ssteve.reinhardt@amd.com * Number of Trace CPUs in the system used as a shared variable and passed 11067528Ssteve.reinhardt@amd.com * to the CountedExitEvent event used for counting down exit events. It is 11077528Ssteve.reinhardt@amd.com * incremented in the constructor call so that the total is arrived at 11087528Ssteve.reinhardt@amd.com * automatically. 11097528Ssteve.reinhardt@amd.com */ 11107528Ssteve.reinhardt@amd.com static int numTraceCPUs; 11117528Ssteve.reinhardt@amd.com 11126654Snate@binkert.org /** 11136654Snate@binkert.org * A CountedExitEvent which when serviced decrements the counter. A sim 11146654Snate@binkert.org * exit event is scheduled when the counter equals zero, that is all 11156654Snate@binkert.org * instances of Trace CPU have had their execCompleteEvent serviced. 11166654Snate@binkert.org */ 11176654Snate@binkert.org CountedExitEvent *execCompleteEvent; 11186654Snate@binkert.org 11196654Snate@binkert.org Stats::Scalar numSchedDcacheEvent; 11206654Snate@binkert.org Stats::Scalar numSchedIcacheEvent; 11211528SN/A 11221528SN/A /** Stat for number of simulated micro-ops. */ 11231528SN/A Stats::Scalar numOps; 11244762Snate@binkert.org /** Stat for the CPI. This is really cycles per micro-op and not inst. */ 1125 Stats::Formula cpi; 1126 1127 public: 1128 1129 /** Used to get a reference to the icache port. */ 1130 MasterPort &getInstPort() { return icachePort; } 1131 1132 /** Used to get a reference to the dcache port. */ 1133 MasterPort &getDataPort() { return dcachePort; } 1134 1135 void regStats(); 1136}; 1137#endif // __CPU_TRACE_TRACE_CPU_HH__ 1138