Cross Reference: /gem5/src/cpu/trace/trace

Deleted Added

sdiff udiff text old ( 11249:0733a1c08600 ) new ( 11252:18bb597fc40c )

full compact

trace_cpu.cc (11249:0733a1c08600)	trace_cpu.cc (11252:18bb597fc40c)
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 / 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params->sizeROB, params->sizeStoreBuffer, 60 params->sizeLoadBuffer), 61 icacheNextEvent(this), 62 dcacheNextEvent(this), 63 oneTraceComplete(false), 64 firstFetchTick(0), 65 execCompleteEvent(nullptr) 66{ 67 // Increment static counter for number of Trace CPUs. 68 ++TraceCPU::numTraceCPUs; 69 70 // Check that the python parameters for sizes of ROB, store buffer and load 71 // buffer do not overflow the corresponding C++ variables. 72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 73 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 75 "exceeds the max. value of %d.\n", params->sizeROB, 76 UINT16_MAX); 77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 78 " %d exceeds the max. value of %d.\n", 79 params->sizeLoadBuffer, UINT16_MAX); 80} 81 82TraceCPU::~TraceCPU() 83{ 84 85} 86 87TraceCPU* 88TraceCPUParams::create() 89{ 90 return new TraceCPU(this); 91} 92 93void 94TraceCPU::takeOverFrom(BaseCPU oldCPU) 95{ 96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 97 assert(!getInstPort().isConnected()); 98 assert(oldCPU->getInstPort().isConnected()); 99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 100* oldCPU->getInstPort().unbind(); 101 getInstPort().bind(inst_peer_port); 102 103 assert(!getDataPort().isConnected()); 104 assert(oldCPU->getDataPort().isConnected()); 105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 106 oldCPU->getDataPort().unbind(); 107 getDataPort().bind(data_peer_port); 108} 109 110void 111TraceCPU::init() 112{ 113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 114 "\n", instTraceFile); 115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 116 dataTraceFile); 117 118 BaseCPU::init(); 119 120 // Get the send tick of the first instruction read request and schedule 121 // icacheNextEvent at that tick. 122 Tick first_icache_tick = icacheGen.init(); 123 schedule(icacheNextEvent, first_icache_tick); 124 125 // Get the send tick of the first data read/write request and schedule 126 // dcacheNextEvent at that tick. 127 Tick first_dcache_tick = dcacheGen.init(); 128 schedule(dcacheNextEvent, first_dcache_tick); 129 130 // The static counter for number of Trace CPUs is correctly set at this 131 // point so create an event and pass it. 132 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 133 numTraceCPUs); 134 // Save the first fetch request tick to dump it as tickOffset 135 firstFetchTick = first_icache_tick; 136} 137 138void 139TraceCPU::schedIcacheNext() 140{ 141 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 142 143 // Try to send the current packet or a retry packet if there is one 144 bool sched_next = icacheGen.tryNext(); 145 // If packet sent successfully, schedule next event 146 if (sched_next) { 147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 148 "at %d.\n", curTick() + icacheGen.tickDelta()); 149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 150 ++numSchedIcacheEvent; 151 } else { 152 // check if traceComplete. If not, do nothing because sending failed 153 // and next event will be scheduled via RecvRetry() 154 if (icacheGen.isTraceComplete()) { 155 // If this is the first trace to complete, set the variable. If it 156 // is already set then both traces are complete to exit sim. 157 checkAndSchedExitEvent(); 158 } 159 } 160 return; 161} 162 163void 164TraceCPU::schedDcacheNext() 165{ 166 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 167 168 dcacheGen.execute(); 169 if (dcacheGen.isExecComplete()) { 170 checkAndSchedExitEvent(); 171 } 172} 173 174void 175TraceCPU::checkAndSchedExitEvent() 176{ 177 if (!oneTraceComplete) { 178 oneTraceComplete = true; 179 } else { 180 // Schedule event to indicate execution is complete as both 181 // instruction and data access traces have been played back. 182 inform("%s: Execution complete.\n", name()); 183 184 // Record stats which are computed at the end of simulation 185 tickOffset = firstFetchTick; 186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); 187 numOps = dcacheGen.getMicroOpCount(); 188 schedule(execCompleteEvent, curTick()); 189* } 190} 191 192void 193TraceCPU::regStats() 194{ 195 196 BaseCPU::regStats(); 197 198 numSchedDcacheEvent 199 .name(name() + ".numSchedDcacheEvent") 200 .desc("Number of events scheduled to trigger data request generator") 201 ; 202 203 numSchedIcacheEvent 204 .name(name() + ".numSchedIcacheEvent") 205 .desc("Number of events scheduled to trigger instruction request generator") 206 ; 207 208 numOps 209 .name(name() + ".numOps") 210 .desc("Number of micro-ops simulated by the Trace CPU") 211 ; 212 213 cpi 214 .name(name() + ".cpi") 215 .desc("Cycles per micro-op used as a proxy for CPI") 216 .precision(6) 217 ; 218 cpi = numCycles/numOps; 219 220 tickOffset 221 .name(name() + ".tickOffset") 222 .desc("The first execution tick for the root node of elastic traces") 223 ; 224 225 icacheGen.regStats(); 226 dcacheGen.regStats(); 227} 228 229void 230TraceCPU::ElasticDataGen::regStats() 231{ 232 using namespace Stats; 233 234 maxDependents 235 .name(name() + ".maxDependents") 236 .desc("Max number of dependents observed on a node") 237 ; 238 239 maxReadyListSize 240 .name(name() + ".maxReadyListSize") 241 .desc("Max size of the ready list observed") 242 ; 243 244 numSendAttempted 245 .name(name() + ".numSendAttempted") 246 .desc("Number of first attempts to send a request") 247 ; 248 249 numSendSucceeded 250 .name(name() + ".numSendSucceeded") 251 .desc("Number of successful first attempts") 252 ; 253 254 numSendFailed 255 .name(name() + ".numSendFailed") 256 .desc("Number of failed first attempts") 257 ; 258 259 numRetrySucceeded 260 .name(name() + ".numRetrySucceeded") 261 .desc("Number of successful retries") 262 ; 263 264 numSplitReqs 265 .name(name() + ".numSplitReqs") 266 .desc("Number of split requests") 267 ; 268 269 numSOLoads 270 .name(name() + ".numSOLoads") 271 .desc("Number of strictly ordered loads") 272 ; 273 274 numSOStores 275 .name(name() + ".numSOStores") 276 .desc("Number of strictly ordered stores") 277 ; 278 279 dataLastTick 280 .name(name() + ".dataLastTick") 281 .desc("Last tick simulated from the elastic data trace") 282 ; 283} 284 285Tick 286TraceCPU::ElasticDataGen::init() 287{ 288 DPRINTF(TraceCPUData, "Initializing data memory request generator " 289 "DcacheGen: elastic issue with retry.\n"); 290 291 if (!readNextWindow()) 292 panic("Trace has %d elements. It must have at least %d elements.\n", 293 depGraph.size(), 2 * windowSize); 294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 295 depGraph.size()); 296 297 if (!readNextWindow()) 298 panic("Trace has %d elements. It must have at least %d elements.\n", 299 depGraph.size(), 2 * windowSize); 300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 301 depGraph.size()); 302 303 // Print readyList 304 if (DTRACE(TraceCPUData)) { 305 printReadyList(); 306 } 307 auto free_itr = readyList.begin(); 308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 309 " is %d.\n", free_itr->seqNum, free_itr->execTick); 310 // Return the execute tick of the earliest ready node so that an event 311 // can be scheduled to call execute() 312 return (free_itr->execTick); 313} 314 315void 316TraceCPU::ElasticDataGen::exit() 317{ 318 trace.reset(); 319} 320 321bool 322TraceCPU::ElasticDataGen::readNextWindow() 323{ 324 325 // Read and add next window 326 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 327 328 if (traceComplete) { 329 // We are at the end of the file, thus we have no more records. 330 // Return false. 331 return false; 332 } 333 334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 335 depGraph.size()); 336 337 uint32_t num_read = 0; 338 while (num_read != windowSize) { 339 340 // Create a new graph node 341 GraphNode* new_node = new GraphNode; 342 343 // Read the next line to get the next record. If that fails then end of 344 // trace has been reached and traceComplete needs to be set in addition 345 // to returning false. 346 if (!trace.read(new_node)) { 347 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 348 traceComplete = true; 349 return false; 350 } 351 352 // Annotate the ROB dependencies of the new node onto the parent nodes. 353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 354 // Annotate the register dependencies of the new node onto the parent 355 // nodes. 356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 357 358 num_read++; 359 // Add to map 360 depGraph[new_node->seqNum] = new_node; 361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 362 // Source dependencies are already complete, check if resources 363 // are available and issue. The execution time is approximated 364 // to current time plus the computational delay. 365 checkAndIssue(new_node); 366 } 367 } 368 369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 return true; 372} 373 374template<typename T> void 375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode new_node, 376* T& dep_array, uint8_t& num_dep) 377{ 378 for (auto& a_dep : dep_array) { 379 // The convention is to set the dependencies starting with the first 380 // index in the ROB and register dependency arrays. Thus, when we reach 381 // a dependency equal to the initialisation value of zero, we know have 382 // iterated over all dependencies and can break. 383 if (a_dep == 0) 384 break; 385 // We look up the valid dependency, i.e. the parent of this node 386 auto parent_itr = depGraph.find(a_dep); 387 if (parent_itr != depGraph.end()) { 388 // If the parent is found, it is yet to be executed. Append a 389 // pointer to the new node to the dependents list of the parent 390 // node. 391 parent_itr->second->dependents.push_back(new_node); 392 auto num_depts = parent_itr->second->dependents.size(); 393 maxDependents = std::max<double>(num_depts, maxDependents.value()); 394 } else { 395 // The dependency is not found in the graph. So consider 396 // the execution of the parent is complete, i.e. remove this 397 // dependency. 398 a_dep = 0; 399 num_dep--; 400 } 401 } 402} 403 404void 405TraceCPU::ElasticDataGen::execute() 406{ 407 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 410 depFreeQueue.size()); 411 hwResource.printOccupancy(); 412 413 // Read next window to make sure that dependents of all dep-free nodes 414 // are in the depGraph 415 if (nextRead) { 416 readNextWindow(); 417 nextRead = false; 418 } 419 420 // First attempt to issue the pending dependency-free nodes held 421 // in depFreeQueue. If resources have become available for a node, 422 // then issue it, i.e. add the node to readyList. 423 while (!depFreeQueue.empty()) { 424 if (checkAndIssue(depFreeQueue.front(), false)) { 425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 426 "%lli.\n", (depFreeQueue.front())->seqNum); 427 depFreeQueue.pop(); 428 } else { 429 break; 430 } 431 } 432 // Proceed to execute from readyList 433 auto graph_itr = depGraph.begin(); 434 auto free_itr = readyList.begin(); 435 // Iterate through readyList until the next free node has its execute 436 // tick later than curTick or the end of readyList is reached 437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 438 439 // Get pointer to the node to be executed 440 graph_itr = depGraph.find(free_itr->seqNum); 441 assert(graph_itr != depGraph.end()); 442 GraphNode* node_ptr = graph_itr->second; 443 444 // If there is a retryPkt send that else execute the load 445 if (retryPkt) { 446 // The retryPkt must be the request that was created by the 447 // first node in the readyList. 448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 449 panic("Retry packet's seqence number does not match " 450 "the first node in the readyList.\n"); 451 } 452 if (port.sendTimingReq(retryPkt)) { 453 ++numRetrySucceeded; 454 retryPkt = nullptr; 455 }	1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 / 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params->sizeROB, params->sizeStoreBuffer, 60 params->sizeLoadBuffer), 61 icacheNextEvent(this), 62 dcacheNextEvent(this), 63 oneTraceComplete(false), 64 firstFetchTick(0), 65 execCompleteEvent(nullptr) 66{ 67 // Increment static counter for number of Trace CPUs. 68 ++TraceCPU::numTraceCPUs; 69 70 // Check that the python parameters for sizes of ROB, store buffer and load 71 // buffer do not overflow the corresponding C++ variables. 72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 73 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 75 "exceeds the max. value of %d.\n", params->sizeROB, 76 UINT16_MAX); 77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 78 " %d exceeds the max. value of %d.\n", 79 params->sizeLoadBuffer, UINT16_MAX); 80} 81 82TraceCPU::~TraceCPU() 83{ 84 85} 86 87TraceCPU* 88TraceCPUParams::create() 89{ 90 return new TraceCPU(this); 91} 92 93void 94TraceCPU::takeOverFrom(BaseCPU oldCPU) 95{ 96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 97 assert(!getInstPort().isConnected()); 98 assert(oldCPU->getInstPort().isConnected()); 99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 100* oldCPU->getInstPort().unbind(); 101 getInstPort().bind(inst_peer_port); 102 103 assert(!getDataPort().isConnected()); 104 assert(oldCPU->getDataPort().isConnected()); 105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 106 oldCPU->getDataPort().unbind(); 107 getDataPort().bind(data_peer_port); 108} 109 110void 111TraceCPU::init() 112{ 113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 114 "\n", instTraceFile); 115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 116 dataTraceFile); 117 118 BaseCPU::init(); 119 120 // Get the send tick of the first instruction read request and schedule 121 // icacheNextEvent at that tick. 122 Tick first_icache_tick = icacheGen.init(); 123 schedule(icacheNextEvent, first_icache_tick); 124 125 // Get the send tick of the first data read/write request and schedule 126 // dcacheNextEvent at that tick. 127 Tick first_dcache_tick = dcacheGen.init(); 128 schedule(dcacheNextEvent, first_dcache_tick); 129 130 // The static counter for number of Trace CPUs is correctly set at this 131 // point so create an event and pass it. 132 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 133 numTraceCPUs); 134 // Save the first fetch request tick to dump it as tickOffset 135 firstFetchTick = first_icache_tick; 136} 137 138void 139TraceCPU::schedIcacheNext() 140{ 141 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 142 143 // Try to send the current packet or a retry packet if there is one 144 bool sched_next = icacheGen.tryNext(); 145 // If packet sent successfully, schedule next event 146 if (sched_next) { 147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 148 "at %d.\n", curTick() + icacheGen.tickDelta()); 149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 150 ++numSchedIcacheEvent; 151 } else { 152 // check if traceComplete. If not, do nothing because sending failed 153 // and next event will be scheduled via RecvRetry() 154 if (icacheGen.isTraceComplete()) { 155 // If this is the first trace to complete, set the variable. If it 156 // is already set then both traces are complete to exit sim. 157 checkAndSchedExitEvent(); 158 } 159 } 160 return; 161} 162 163void 164TraceCPU::schedDcacheNext() 165{ 166 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 167 168 dcacheGen.execute(); 169 if (dcacheGen.isExecComplete()) { 170 checkAndSchedExitEvent(); 171 } 172} 173 174void 175TraceCPU::checkAndSchedExitEvent() 176{ 177 if (!oneTraceComplete) { 178 oneTraceComplete = true; 179 } else { 180 // Schedule event to indicate execution is complete as both 181 // instruction and data access traces have been played back. 182 inform("%s: Execution complete.\n", name()); 183 184 // Record stats which are computed at the end of simulation 185 tickOffset = firstFetchTick; 186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); 187 numOps = dcacheGen.getMicroOpCount(); 188 schedule(execCompleteEvent, curTick()); 189* } 190} 191 192void 193TraceCPU::regStats() 194{ 195 196 BaseCPU::regStats(); 197 198 numSchedDcacheEvent 199 .name(name() + ".numSchedDcacheEvent") 200 .desc("Number of events scheduled to trigger data request generator") 201 ; 202 203 numSchedIcacheEvent 204 .name(name() + ".numSchedIcacheEvent") 205 .desc("Number of events scheduled to trigger instruction request generator") 206 ; 207 208 numOps 209 .name(name() + ".numOps") 210 .desc("Number of micro-ops simulated by the Trace CPU") 211 ; 212 213 cpi 214 .name(name() + ".cpi") 215 .desc("Cycles per micro-op used as a proxy for CPI") 216 .precision(6) 217 ; 218 cpi = numCycles/numOps; 219 220 tickOffset 221 .name(name() + ".tickOffset") 222 .desc("The first execution tick for the root node of elastic traces") 223 ; 224 225 icacheGen.regStats(); 226 dcacheGen.regStats(); 227} 228 229void 230TraceCPU::ElasticDataGen::regStats() 231{ 232 using namespace Stats; 233 234 maxDependents 235 .name(name() + ".maxDependents") 236 .desc("Max number of dependents observed on a node") 237 ; 238 239 maxReadyListSize 240 .name(name() + ".maxReadyListSize") 241 .desc("Max size of the ready list observed") 242 ; 243 244 numSendAttempted 245 .name(name() + ".numSendAttempted") 246 .desc("Number of first attempts to send a request") 247 ; 248 249 numSendSucceeded 250 .name(name() + ".numSendSucceeded") 251 .desc("Number of successful first attempts") 252 ; 253 254 numSendFailed 255 .name(name() + ".numSendFailed") 256 .desc("Number of failed first attempts") 257 ; 258 259 numRetrySucceeded 260 .name(name() + ".numRetrySucceeded") 261 .desc("Number of successful retries") 262 ; 263 264 numSplitReqs 265 .name(name() + ".numSplitReqs") 266 .desc("Number of split requests") 267 ; 268 269 numSOLoads 270 .name(name() + ".numSOLoads") 271 .desc("Number of strictly ordered loads") 272 ; 273 274 numSOStores 275 .name(name() + ".numSOStores") 276 .desc("Number of strictly ordered stores") 277 ; 278 279 dataLastTick 280 .name(name() + ".dataLastTick") 281 .desc("Last tick simulated from the elastic data trace") 282 ; 283} 284 285Tick 286TraceCPU::ElasticDataGen::init() 287{ 288 DPRINTF(TraceCPUData, "Initializing data memory request generator " 289 "DcacheGen: elastic issue with retry.\n"); 290 291 if (!readNextWindow()) 292 panic("Trace has %d elements. It must have at least %d elements.\n", 293 depGraph.size(), 2 * windowSize); 294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 295 depGraph.size()); 296 297 if (!readNextWindow()) 298 panic("Trace has %d elements. It must have at least %d elements.\n", 299 depGraph.size(), 2 * windowSize); 300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 301 depGraph.size()); 302 303 // Print readyList 304 if (DTRACE(TraceCPUData)) { 305 printReadyList(); 306 } 307 auto free_itr = readyList.begin(); 308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 309 " is %d.\n", free_itr->seqNum, free_itr->execTick); 310 // Return the execute tick of the earliest ready node so that an event 311 // can be scheduled to call execute() 312 return (free_itr->execTick); 313} 314 315void 316TraceCPU::ElasticDataGen::exit() 317{ 318 trace.reset(); 319} 320 321bool 322TraceCPU::ElasticDataGen::readNextWindow() 323{ 324 325 // Read and add next window 326 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 327 328 if (traceComplete) { 329 // We are at the end of the file, thus we have no more records. 330 // Return false. 331 return false; 332 } 333 334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 335 depGraph.size()); 336 337 uint32_t num_read = 0; 338 while (num_read != windowSize) { 339 340 // Create a new graph node 341 GraphNode* new_node = new GraphNode; 342 343 // Read the next line to get the next record. If that fails then end of 344 // trace has been reached and traceComplete needs to be set in addition 345 // to returning false. 346 if (!trace.read(new_node)) { 347 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 348 traceComplete = true; 349 return false; 350 } 351 352 // Annotate the ROB dependencies of the new node onto the parent nodes. 353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 354 // Annotate the register dependencies of the new node onto the parent 355 // nodes. 356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 357 358 num_read++; 359 // Add to map 360 depGraph[new_node->seqNum] = new_node; 361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 362 // Source dependencies are already complete, check if resources 363 // are available and issue. The execution time is approximated 364 // to current time plus the computational delay. 365 checkAndIssue(new_node); 366 } 367 } 368 369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 return true; 372} 373 374template<typename T> void 375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode new_node, 376* T& dep_array, uint8_t& num_dep) 377{ 378 for (auto& a_dep : dep_array) { 379 // The convention is to set the dependencies starting with the first 380 // index in the ROB and register dependency arrays. Thus, when we reach 381 // a dependency equal to the initialisation value of zero, we know have 382 // iterated over all dependencies and can break. 383 if (a_dep == 0) 384 break; 385 // We look up the valid dependency, i.e. the parent of this node 386 auto parent_itr = depGraph.find(a_dep); 387 if (parent_itr != depGraph.end()) { 388 // If the parent is found, it is yet to be executed. Append a 389 // pointer to the new node to the dependents list of the parent 390 // node. 391 parent_itr->second->dependents.push_back(new_node); 392 auto num_depts = parent_itr->second->dependents.size(); 393 maxDependents = std::max<double>(num_depts, maxDependents.value()); 394 } else { 395 // The dependency is not found in the graph. So consider 396 // the execution of the parent is complete, i.e. remove this 397 // dependency. 398 a_dep = 0; 399 num_dep--; 400 } 401 } 402} 403 404void 405TraceCPU::ElasticDataGen::execute() 406{ 407 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 410 depFreeQueue.size()); 411 hwResource.printOccupancy(); 412 413 // Read next window to make sure that dependents of all dep-free nodes 414 // are in the depGraph 415 if (nextRead) { 416 readNextWindow(); 417 nextRead = false; 418 } 419 420 // First attempt to issue the pending dependency-free nodes held 421 // in depFreeQueue. If resources have become available for a node, 422 // then issue it, i.e. add the node to readyList. 423 while (!depFreeQueue.empty()) { 424 if (checkAndIssue(depFreeQueue.front(), false)) { 425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 426 "%lli.\n", (depFreeQueue.front())->seqNum); 427 depFreeQueue.pop(); 428 } else { 429 break; 430 } 431 } 432 // Proceed to execute from readyList 433 auto graph_itr = depGraph.begin(); 434 auto free_itr = readyList.begin(); 435 // Iterate through readyList until the next free node has its execute 436 // tick later than curTick or the end of readyList is reached 437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 438 439 // Get pointer to the node to be executed 440 graph_itr = depGraph.find(free_itr->seqNum); 441 assert(graph_itr != depGraph.end()); 442 GraphNode* node_ptr = graph_itr->second; 443 444 // If there is a retryPkt send that else execute the load 445 if (retryPkt) { 446 // The retryPkt must be the request that was created by the 447 // first node in the readyList. 448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 449 panic("Retry packet's seqence number does not match " 450 "the first node in the readyList.\n"); 451 } 452 if (port.sendTimingReq(retryPkt)) { 453 ++numRetrySucceeded; 454 retryPkt = nullptr; 455 }
456 } else if (node_ptr->isLoad \|\| node_ptr->isStore) {	456 } else if (node_ptr->isLoad() \|\| node_ptr->isStore()) {
457 // If there is no retryPkt, attempt to send a memory request in 458 // case of a load or store node. If the send fails, executeMemReq() 459 // returns a packet pointer, which we save in retryPkt. In case of 460 // a comp node we don't do anything and simply continue as if the 461 // execution of the comp node succedded. 462 retryPkt = executeMemReq(node_ptr); 463 } 464 // If the retryPkt or a new load/store node failed, we exit from here 465 // as a retry from cache will bring the control to execute(). The 466 // first node in readyList then, will be the failed node. 467 if (retryPkt) { 468 break; 469 } 470 471 // Proceed to remove dependencies for the successfully executed node. 472 // If it is a load which is not strictly ordered and we sent a 473 // request for it successfully, we do not yet mark any register 474 // dependencies complete. But as per dependency modelling we need 475 // to mark ROB dependencies of load and non load/store nodes which 476 // are based on successful sending of the load as complete.	457 // If there is no retryPkt, attempt to send a memory request in 458 // case of a load or store node. If the send fails, executeMemReq() 459 // returns a packet pointer, which we save in retryPkt. In case of 460 // a comp node we don't do anything and simply continue as if the 461 // execution of the comp node succedded. 462 retryPkt = executeMemReq(node_ptr); 463 } 464 // If the retryPkt or a new load/store node failed, we exit from here 465 // as a retry from cache will bring the control to execute(). The 466 // first node in readyList then, will be the failed node. 467 if (retryPkt) { 468 break; 469 } 470 471 // Proceed to remove dependencies for the successfully executed node. 472 // If it is a load which is not strictly ordered and we sent a 473 // request for it successfully, we do not yet mark any register 474 // dependencies complete. But as per dependency modelling we need 475 // to mark ROB dependencies of load and non load/store nodes which 476 // are based on successful sending of the load as complete.
477 if (node_ptr->isLoad && !node_ptr->isStrictlyOrdered()) {	477 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
478 // If execute succeeded mark its dependents as complete 479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 480 "dependents..\n", node_ptr->seqNum); 481 482 auto child_itr = (node_ptr->dependents).begin(); 483 while (child_itr != (node_ptr->dependents).end()) { 484 // ROB dependency of a store on a load must not be removed 485 // after load is sent but after response is received	478 // If execute succeeded mark its dependents as complete 479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 480 "dependents..\n", node_ptr->seqNum); 481 482 auto child_itr = (node_ptr->dependents).begin(); 483 while (child_itr != (node_ptr->dependents).end()) { 484 // ROB dependency of a store on a load must not be removed 485 // after load is sent but after response is received
486 if (!(*child_itr)->isStore &&	486 if (!(*child_itr)->isStore() &&
487 (child_itr)->removeRobDep(node_ptr->seqNum)) { 488* 489 // Check if the child node has become dependency free 490 if ((child_itr)->numRobDep == 0 && 491* (child_itr)->numRegDep == 0) { 492* 493 // Source dependencies are complete, check if 494 // resources are available and issue 495 checkAndIssue(child_itr); 496* } 497 // Remove this child for the sent load and point to new 498 // location of the element following the erased element 499 child_itr = node_ptr->dependents.erase(child_itr); 500 } else { 501 // This child is not dependency-free, point to the next 502 // child 503 child_itr++; 504 } 505 } 506 } else { 507 // If it is a strictly ordered load mark its dependents as complete 508 // as we do not send a request for this case. If it is a store or a 509 // comp node we also mark all its dependents complete. 510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 511 " up dependents..\n", node_ptr->seqNum); 512 513 for (auto child : node_ptr->dependents) { 514 // If the child node is dependency free removeDepOnInst() 515 // returns true. 516 if (child->removeDepOnInst(node_ptr->seqNum)) { 517 // Source dependencies are complete, check if resources 518 // are available and issue 519 checkAndIssue(child); 520 } 521 } 522 } 523 524 // After executing the node, remove from readyList and delete node. 525 readyList.erase(free_itr); 526 // If it is a cacheable load which was sent, don't delete 527 // just yet. Delete it in completeMemAccess() after the 528 // response is received. If it is an strictly ordered 529 // load, it was not sent and all dependencies were simply 530 // marked complete. Thus it is safe to delete it. For 531 // stores and non load/store nodes all dependencies were 532 // marked complete so it is safe to delete it.	487 (child_itr)->removeRobDep(node_ptr->seqNum)) { 488* 489 // Check if the child node has become dependency free 490 if ((child_itr)->numRobDep == 0 && 491* (child_itr)->numRegDep == 0) { 492* 493 // Source dependencies are complete, check if 494 // resources are available and issue 495 checkAndIssue(child_itr); 496* } 497 // Remove this child for the sent load and point to new 498 // location of the element following the erased element 499 child_itr = node_ptr->dependents.erase(child_itr); 500 } else { 501 // This child is not dependency-free, point to the next 502 // child 503 child_itr++; 504 } 505 } 506 } else { 507 // If it is a strictly ordered load mark its dependents as complete 508 // as we do not send a request for this case. If it is a store or a 509 // comp node we also mark all its dependents complete. 510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 511 " up dependents..\n", node_ptr->seqNum); 512 513 for (auto child : node_ptr->dependents) { 514 // If the child node is dependency free removeDepOnInst() 515 // returns true. 516 if (child->removeDepOnInst(node_ptr->seqNum)) { 517 // Source dependencies are complete, check if resources 518 // are available and issue 519 checkAndIssue(child); 520 } 521 } 522 } 523 524 // After executing the node, remove from readyList and delete node. 525 readyList.erase(free_itr); 526 // If it is a cacheable load which was sent, don't delete 527 // just yet. Delete it in completeMemAccess() after the 528 // response is received. If it is an strictly ordered 529 // load, it was not sent and all dependencies were simply 530 // marked complete. Thus it is safe to delete it. For 531 // stores and non load/store nodes all dependencies were 532 // marked complete so it is safe to delete it.
533 if (!node_ptr->isLoad \|\| node_ptr->isStrictlyOrdered()) {	533 if (!node_ptr->isLoad() \|\| node_ptr->isStrictlyOrdered()) {
534 // Release all resources occupied by the completed node 535 hwResource.release(node_ptr); 536 // clear the dynamically allocated set of dependents 537 (node_ptr->dependents).clear(); 538 // delete node 539 delete node_ptr; 540 // remove from graph 541 depGraph.erase(graph_itr); 542 } 543 // Point to first node to continue to next iteration of while loop 544 free_itr = readyList.begin(); 545 } // end of while loop 546 547 // Print readyList, sizes of queues and resource status after updating 548 if (DTRACE(TraceCPUData)) { 549 printReadyList(); 550 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 553 depFreeQueue.size()); 554 hwResource.printOccupancy(); 555 } 556 557 if (retryPkt) { 558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 559 "event from the cache for seq. num %lli.\n", 560 retryPkt->req->getReqInstSeqNum()); 561 return; 562 } 563 // If the size of the dependency graph is less than the dependency window 564 // then read from the trace file to populate the graph next time we are in 565 // execute. 566 if (depGraph.size() < windowSize && !traceComplete) 567 nextRead = true; 568 569 // If cache is not blocked, schedule an event for the first execTick in 570 // readyList else retry from cache will schedule the event. If the ready 571 // list is empty then check if the next pending node has resources 572 // available to issue. If yes, then schedule an event for the next cycle. 573 if (!readyList.empty()) { 574 Tick next_event_tick = std::max(readyList.begin()->execTick, 575 curTick()); 576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 577 next_event_tick); 578 owner.schedDcacheNextEvent(next_event_tick); 579 } else if (readyList.empty() && !depFreeQueue.empty() && 580 hwResource.isAvailable(depFreeQueue.front())) { 581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 582 owner.clockEdge(Cycles(1))); 583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 584 } 585 586 // If trace is completely read, readyList is empty and depGraph is empty, 587 // set execComplete to true 588 if (depGraph.empty() && readyList.empty() && traceComplete && 589 !hwResource.awaitingResponse()) { 590 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 591 execComplete = true; 592 dataLastTick = curTick(); 593 } 594} 595 596PacketPtr 597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 598{ 599 600 DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, " 601 "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr, 602 node_ptr->pc, node_ptr->size, node_ptr->flags); 603 604 // If the request is strictly ordered, do not send it. Just return nullptr 605 // as if it was succesfully sent. 606 if (node_ptr->isStrictlyOrdered()) {	534 // Release all resources occupied by the completed node 535 hwResource.release(node_ptr); 536 // clear the dynamically allocated set of dependents 537 (node_ptr->dependents).clear(); 538 // delete node 539 delete node_ptr; 540 // remove from graph 541 depGraph.erase(graph_itr); 542 } 543 // Point to first node to continue to next iteration of while loop 544 free_itr = readyList.begin(); 545 } // end of while loop 546 547 // Print readyList, sizes of queues and resource status after updating 548 if (DTRACE(TraceCPUData)) { 549 printReadyList(); 550 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 553 depFreeQueue.size()); 554 hwResource.printOccupancy(); 555 } 556 557 if (retryPkt) { 558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 559 "event from the cache for seq. num %lli.\n", 560 retryPkt->req->getReqInstSeqNum()); 561 return; 562 } 563 // If the size of the dependency graph is less than the dependency window 564 // then read from the trace file to populate the graph next time we are in 565 // execute. 566 if (depGraph.size() < windowSize && !traceComplete) 567 nextRead = true; 568 569 // If cache is not blocked, schedule an event for the first execTick in 570 // readyList else retry from cache will schedule the event. If the ready 571 // list is empty then check if the next pending node has resources 572 // available to issue. If yes, then schedule an event for the next cycle. 573 if (!readyList.empty()) { 574 Tick next_event_tick = std::max(readyList.begin()->execTick, 575 curTick()); 576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 577 next_event_tick); 578 owner.schedDcacheNextEvent(next_event_tick); 579 } else if (readyList.empty() && !depFreeQueue.empty() && 580 hwResource.isAvailable(depFreeQueue.front())) { 581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 582 owner.clockEdge(Cycles(1))); 583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 584 } 585 586 // If trace is completely read, readyList is empty and depGraph is empty, 587 // set execComplete to true 588 if (depGraph.empty() && readyList.empty() && traceComplete && 589 !hwResource.awaitingResponse()) { 590 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 591 execComplete = true; 592 dataLastTick = curTick(); 593 } 594} 595 596PacketPtr 597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 598{ 599 600 DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, " 601 "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr, 602 node_ptr->pc, node_ptr->size, node_ptr->flags); 603 604 // If the request is strictly ordered, do not send it. Just return nullptr 605 // as if it was succesfully sent. 606 if (node_ptr->isStrictlyOrdered()) {
607 node_ptr->isLoad ? ++numSOLoads : ++numSOStores;	607 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 609 node_ptr->seqNum); 610 return nullptr; 611 } 612 613 // Check if the request spans two cache lines as this condition triggers 614 // an assert fail in the L1 cache. If it does then truncate the size to 615 // access only until the end of that line and ignore the remainder. The 616 // stat counting this is useful to keep a check on how frequently this 617 // happens. If required the code could be revised to mimick splitting such 618 // a request into two. 619 unsigned blk_size = owner.cacheLineSize(); 620 Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1)); 621 if (!(blk_offset + node_ptr->size <= blk_size)) { 622 node_ptr->size = blk_size - blk_offset; 623 ++numSplitReqs; 624 } 625 626 // Create a request and the packet containing request 627 Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags, 628 masterID, node_ptr->seqNum, 629 ContextID(0), ThreadID(0)); 630 req->setPC(node_ptr->pc); 631 PacketPtr pkt; 632 uint8_t* pkt_data = new uint8_t[req->getSize()];	608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 609 node_ptr->seqNum); 610 return nullptr; 611 } 612 613 // Check if the request spans two cache lines as this condition triggers 614 // an assert fail in the L1 cache. If it does then truncate the size to 615 // access only until the end of that line and ignore the remainder. The 616 // stat counting this is useful to keep a check on how frequently this 617 // happens. If required the code could be revised to mimick splitting such 618 // a request into two. 619 unsigned blk_size = owner.cacheLineSize(); 620 Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1)); 621 if (!(blk_offset + node_ptr->size <= blk_size)) { 622 node_ptr->size = blk_size - blk_offset; 623 ++numSplitReqs; 624 } 625 626 // Create a request and the packet containing request 627 Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags, 628 masterID, node_ptr->seqNum, 629 ContextID(0), ThreadID(0)); 630 req->setPC(node_ptr->pc); 631 PacketPtr pkt; 632 uint8_t* pkt_data = new uint8_t[req->getSize()];
633 if (node_ptr->isLoad) {	633 if (node_ptr->isLoad()) {
634 pkt = Packet::createRead(req); 635 } else { 636 pkt = Packet::createWrite(req); 637 memset(pkt_data, 0xA, req->getSize()); 638 } 639 pkt->dataDynamic(pkt_data); 640 641 // Call MasterPort method to send a timing request for this packet 642 bool success = port.sendTimingReq(pkt); 643 ++numSendAttempted; 644 645 if (!success) { 646 // If it fails, return the packet to retry when a retry is signalled by 647 // the cache 648 ++numSendFailed; 649 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 650 return pkt; 651 } else { 652 // It is succeeds, return nullptr 653 ++numSendSucceeded; 654 return nullptr; 655 } 656} 657 658bool 659TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 660{ 661 // Assert the node is dependency-free 662 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 663 664 // If this is the first attempt, print a debug message to indicate this. 665 if (first) { 666 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"	634 pkt = Packet::createRead(req); 635 } else { 636 pkt = Packet::createWrite(req); 637 memset(pkt_data, 0xA, req->getSize()); 638 } 639 pkt->dataDynamic(pkt_data); 640 641 // Call MasterPort method to send a timing request for this packet 642 bool success = port.sendTimingReq(pkt); 643 ++numSendAttempted; 644 645 if (!success) { 646 // If it fails, return the packet to retry when a retry is signalled by 647 // the cache 648 ++numSendFailed; 649 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 650 return pkt; 651 } else { 652 // It is succeeds, return nullptr 653 ++numSendSucceeded; 654 return nullptr; 655 } 656} 657 658bool 659TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 660{ 661 // Assert the node is dependency-free 662 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 663 664 // If this is the first attempt, print a debug message to indicate this. 665 if (first) { 666 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
667 " dependency free.\n", node_ptr->seqNum, 668 node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"),	667 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
669 node_ptr->robNum); 670 } 671 672 // Check if resources are available to issue the specific node 673 if (hwResource.isAvailable(node_ptr)) { 674 // If resources are free only then add to readyList 675 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 676 " to readyList, occupying resources.\n", node_ptr->seqNum); 677 // Compute the execute tick by adding the compute delay for the node 678 // and add the ready node to the ready list 679 addToSortedReadyList(node_ptr->seqNum, 680 owner.clockEdge() + node_ptr->compDelay); 681 // Account for the resources taken up by this issued node. 682 hwResource.occupy(node_ptr); 683 return true; 684 685 } else { 686 if (first) { 687 // Although dependencies are complete, resources are not available. 688 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 689 " Adding to depFreeQueue.\n", node_ptr->seqNum); 690 depFreeQueue.push(node_ptr); 691 } else { 692 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 693 "Still pending issue.\n", node_ptr->seqNum); 694 } 695 return false; 696 } 697} 698 699void 700TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 701{ 702 // Release the resources for this completed node. 703 if (pkt->isWrite()) { 704 // Consider store complete. 705 hwResource.releaseStoreBuffer(); 706 // If it is a store response then do nothing since we do not model 707 // dependencies on store completion in the trace. But if we were 708 // blocking execution due to store buffer fullness, we need to schedule 709 // an event and attempt to progress. 710 } else { 711 // If it is a load response then release the dependents waiting on it. 712 // Get pointer to the completed load 713 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 714 assert(graph_itr != depGraph.end()); 715 GraphNode* node_ptr = graph_itr->second; 716 717 // Release resources occupied by the load 718 hwResource.release(node_ptr); 719 720 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 721 " dependents..\n", node_ptr->seqNum); 722 723 for (auto child : node_ptr->dependents) { 724 if (child->removeDepOnInst(node_ptr->seqNum)) { 725 checkAndIssue(child); 726 } 727 } 728 729 // clear the dynamically allocated set of dependents 730 (node_ptr->dependents).clear(); 731 // delete node 732 delete node_ptr; 733 // remove from graph 734 depGraph.erase(graph_itr); 735 } 736 737 if (DTRACE(TraceCPUData)) { 738 printReadyList(); 739 } 740 741 // If the size of the dependency graph is less than the dependency window 742 // then read from the trace file to populate the graph next time we are in 743 // execute. 744 if (depGraph.size() < windowSize && !traceComplete) 745 nextRead = true; 746 747 // If not waiting for retry, attempt to schedule next event 748 if (!retryPkt) { 749 // We might have new dep-free nodes in the list which will have execute 750 // tick greater than or equal to curTick. But a new dep-free node might 751 // have its execute tick earlier. Therefore, attempt to reschedule. It 752 // could happen that the readyList is empty and we got here via a 753 // last remaining response. So, either the trace is complete or there 754 // are pending nodes in the depFreeQueue. The checking is done in the 755 // execute() control flow, so schedule an event to go via that flow. 756 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 757 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 758 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 759 next_event_tick); 760 owner.schedDcacheNextEvent(next_event_tick); 761 } 762} 763 764void 765TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 766 Tick exec_tick) 767{ 768 ReadyNode ready_node; 769 ready_node.seqNum = seq_num; 770 ready_node.execTick = exec_tick; 771 772 // Iterator to readyList 773 auto itr = readyList.begin(); 774 775 // If the readyList is empty, simply insert the new node at the beginning 776 // and return 777 if (itr == readyList.end()) { 778 readyList.insert(itr, ready_node); 779 maxReadyListSize = std::max<double>(readyList.size(), 780 maxReadyListSize.value()); 781 return; 782 } 783 784 // If the new node has its execution tick equal to the first node in the 785 // list then go to the next node. If the first node in the list failed 786 // to execute, its position as the first is thus maintained. 787 if (retryPkt) 788 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 789 itr++; 790 791 // Increment the iterator and compare the node pointed to by it to the new 792 // node till the position to insert the new node is found. 793 bool found = false; 794 while (!found && itr != readyList.end()) { 795 // If the execution tick of the new node is less than the node then 796 // this is the position to insert 797 if (exec_tick < itr->execTick) 798 found = true; 799 // If the execution tick of the new node is equal to the node then 800 // sort in ascending order of sequence numbers 801 else if (exec_tick == itr->execTick) { 802 // If the sequence number of the new node is less than the node 803 // then this is the position to insert 804 if (seq_num < itr->seqNum) 805 found = true; 806 // Else go to next node 807 else 808 itr++; 809 } 810 // If the execution tick of the new node is greater than the node then 811 // go to the next node 812 else 813 itr++; 814 } 815 readyList.insert(itr, ready_node); 816 // Update the stat for max size reached of the readyList 817 maxReadyListSize = std::max<double>(readyList.size(), 818 maxReadyListSize.value()); 819} 820 821void 822TraceCPU::ElasticDataGen::printReadyList() { 823 824 auto itr = readyList.begin(); 825 if (itr == readyList.end()) { 826 DPRINTF(TraceCPUData, "readyList is empty.\n"); 827 return; 828 } 829 DPRINTF(TraceCPUData, "Printing readyList:\n"); 830 while (itr != readyList.end()) { 831 auto graph_itr = depGraph.find(itr->seqNum); 832 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 833 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,	668 node_ptr->robNum); 669 } 670 671 // Check if resources are available to issue the specific node 672 if (hwResource.isAvailable(node_ptr)) { 673 // If resources are free only then add to readyList 674 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 675 " to readyList, occupying resources.\n", node_ptr->seqNum); 676 // Compute the execute tick by adding the compute delay for the node 677 // and add the ready node to the ready list 678 addToSortedReadyList(node_ptr->seqNum, 679 owner.clockEdge() + node_ptr->compDelay); 680 // Account for the resources taken up by this issued node. 681 hwResource.occupy(node_ptr); 682 return true; 683 684 } else { 685 if (first) { 686 // Although dependencies are complete, resources are not available. 687 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 688 " Adding to depFreeQueue.\n", node_ptr->seqNum); 689 depFreeQueue.push(node_ptr); 690 } else { 691 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 692 "Still pending issue.\n", node_ptr->seqNum); 693 } 694 return false; 695 } 696} 697 698void 699TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 700{ 701 // Release the resources for this completed node. 702 if (pkt->isWrite()) { 703 // Consider store complete. 704 hwResource.releaseStoreBuffer(); 705 // If it is a store response then do nothing since we do not model 706 // dependencies on store completion in the trace. But if we were 707 // blocking execution due to store buffer fullness, we need to schedule 708 // an event and attempt to progress. 709 } else { 710 // If it is a load response then release the dependents waiting on it. 711 // Get pointer to the completed load 712 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 713 assert(graph_itr != depGraph.end()); 714 GraphNode* node_ptr = graph_itr->second; 715 716 // Release resources occupied by the load 717 hwResource.release(node_ptr); 718 719 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 720 " dependents..\n", node_ptr->seqNum); 721 722 for (auto child : node_ptr->dependents) { 723 if (child->removeDepOnInst(node_ptr->seqNum)) { 724 checkAndIssue(child); 725 } 726 } 727 728 // clear the dynamically allocated set of dependents 729 (node_ptr->dependents).clear(); 730 // delete node 731 delete node_ptr; 732 // remove from graph 733 depGraph.erase(graph_itr); 734 } 735 736 if (DTRACE(TraceCPUData)) { 737 printReadyList(); 738 } 739 740 // If the size of the dependency graph is less than the dependency window 741 // then read from the trace file to populate the graph next time we are in 742 // execute. 743 if (depGraph.size() < windowSize && !traceComplete) 744 nextRead = true; 745 746 // If not waiting for retry, attempt to schedule next event 747 if (!retryPkt) { 748 // We might have new dep-free nodes in the list which will have execute 749 // tick greater than or equal to curTick. But a new dep-free node might 750 // have its execute tick earlier. Therefore, attempt to reschedule. It 751 // could happen that the readyList is empty and we got here via a 752 // last remaining response. So, either the trace is complete or there 753 // are pending nodes in the depFreeQueue. The checking is done in the 754 // execute() control flow, so schedule an event to go via that flow. 755 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 756 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 757 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 758 next_event_tick); 759 owner.schedDcacheNextEvent(next_event_tick); 760 } 761} 762 763void 764TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 765 Tick exec_tick) 766{ 767 ReadyNode ready_node; 768 ready_node.seqNum = seq_num; 769 ready_node.execTick = exec_tick; 770 771 // Iterator to readyList 772 auto itr = readyList.begin(); 773 774 // If the readyList is empty, simply insert the new node at the beginning 775 // and return 776 if (itr == readyList.end()) { 777 readyList.insert(itr, ready_node); 778 maxReadyListSize = std::max<double>(readyList.size(), 779 maxReadyListSize.value()); 780 return; 781 } 782 783 // If the new node has its execution tick equal to the first node in the 784 // list then go to the next node. If the first node in the list failed 785 // to execute, its position as the first is thus maintained. 786 if (retryPkt) 787 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 788 itr++; 789 790 // Increment the iterator and compare the node pointed to by it to the new 791 // node till the position to insert the new node is found. 792 bool found = false; 793 while (!found && itr != readyList.end()) { 794 // If the execution tick of the new node is less than the node then 795 // this is the position to insert 796 if (exec_tick < itr->execTick) 797 found = true; 798 // If the execution tick of the new node is equal to the node then 799 // sort in ascending order of sequence numbers 800 else if (exec_tick == itr->execTick) { 801 // If the sequence number of the new node is less than the node 802 // then this is the position to insert 803 if (seq_num < itr->seqNum) 804 found = true; 805 // Else go to next node 806 else 807 itr++; 808 } 809 // If the execution tick of the new node is greater than the node then 810 // go to the next node 811 else 812 itr++; 813 } 814 readyList.insert(itr, ready_node); 815 // Update the stat for max size reached of the readyList 816 maxReadyListSize = std::max<double>(readyList.size(), 817 maxReadyListSize.value()); 818} 819 820void 821TraceCPU::ElasticDataGen::printReadyList() { 822 823 auto itr = readyList.begin(); 824 if (itr == readyList.end()) { 825 DPRINTF(TraceCPUData, "readyList is empty.\n"); 826 return; 827 } 828 DPRINTF(TraceCPUData, "Printing readyList:\n"); 829 while (itr != readyList.end()) { 830 auto graph_itr = depGraph.find(itr->seqNum); 831 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 832 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
834 node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"), 835 itr->execTick);	833 node_ptr->typeToStr(), itr->execTick);
836 itr++; 837 } 838} 839 840TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 841 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 842 : sizeROB(max_rob), 843 sizeStoreBuffer(max_stores), 844 sizeLoadBuffer(max_loads), 845 oldestInFlightRobNum(UINT64_MAX), 846 numInFlightLoads(0), 847 numInFlightStores(0) 848{} 849 850void 851TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 852{ 853 // Occupy ROB entry for the issued node 854 // Merely maintain the oldest node, i.e. numerically least robNum by saving 855 // it in the variable oldestInFLightRobNum. 856 inFlightNodes[new_node->seqNum] = new_node->robNum; 857 oldestInFlightRobNum = inFlightNodes.begin()->second; 858 859 // Occupy Load/Store Buffer entry for the issued node if applicable	834 itr++; 835 } 836} 837 838TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 839 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 840 : sizeROB(max_rob), 841 sizeStoreBuffer(max_stores), 842 sizeLoadBuffer(max_loads), 843 oldestInFlightRobNum(UINT64_MAX), 844 numInFlightLoads(0), 845 numInFlightStores(0) 846{} 847 848void 849TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 850{ 851 // Occupy ROB entry for the issued node 852 // Merely maintain the oldest node, i.e. numerically least robNum by saving 853 // it in the variable oldestInFLightRobNum. 854 inFlightNodes[new_node->seqNum] = new_node->robNum; 855 oldestInFlightRobNum = inFlightNodes.begin()->second; 856 857 // Occupy Load/Store Buffer entry for the issued node if applicable
860 if (new_node->isLoad) {	858 if (new_node->isLoad()) {
861 ++numInFlightLoads;	859 ++numInFlightLoads;
862 } else if (new_node->isStore) {	860 } else if (new_node->isStore()) {
863 ++numInFlightStores; 864 } // else if it is a non load/store node, no buffer entry is occupied 865 866 printOccupancy(); 867} 868 869void 870TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 871{ 872 assert(!inFlightNodes.empty()); 873 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 874 done_node->seqNum); 875 876 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 877 inFlightNodes.erase(done_node->seqNum); 878 879 if (inFlightNodes.empty()) { 880 // If we delete the only in-flight node and then the 881 // oldestInFlightRobNum is set to it's initialized (max) value. 882 oldestInFlightRobNum = UINT64_MAX; 883 } else { 884 // Set the oldest in-flight node rob number equal to the first node in 885 // the inFlightNodes since that will have the numerically least value. 886 oldestInFlightRobNum = inFlightNodes.begin()->second; 887 } 888 889 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 890 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 891 oldestInFlightRobNum); 892 893 // A store is considered complete when a request is sent, thus ROB entry is 894 // freed. But it occupies an entry in the Store Buffer until its response 895 // is received. A load is considered complete when a response is received, 896 // thus both ROB and Load Buffer entries can be released.	861 ++numInFlightStores; 862 } // else if it is a non load/store node, no buffer entry is occupied 863 864 printOccupancy(); 865} 866 867void 868TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 869{ 870 assert(!inFlightNodes.empty()); 871 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 872 done_node->seqNum); 873 874 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 875 inFlightNodes.erase(done_node->seqNum); 876 877 if (inFlightNodes.empty()) { 878 // If we delete the only in-flight node and then the 879 // oldestInFlightRobNum is set to it's initialized (max) value. 880 oldestInFlightRobNum = UINT64_MAX; 881 } else { 882 // Set the oldest in-flight node rob number equal to the first node in 883 // the inFlightNodes since that will have the numerically least value. 884 oldestInFlightRobNum = inFlightNodes.begin()->second; 885 } 886 887 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 888 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 889 oldestInFlightRobNum); 890 891 // A store is considered complete when a request is sent, thus ROB entry is 892 // freed. But it occupies an entry in the Store Buffer until its response 893 // is received. A load is considered complete when a response is received, 894 // thus both ROB and Load Buffer entries can be released.
897 if (done_node->isLoad) {	895 if (done_node->isLoad()) {
898 assert(numInFlightLoads != 0); 899 --numInFlightLoads; 900 } 901 // For normal writes, we send the requests out and clear a store buffer 902 // entry on response. For writes which are strictly ordered, for e.g. 903 // writes to device registers, we do that within release() which is called 904 // when node is executed and taken off from readyList.	896 assert(numInFlightLoads != 0); 897 --numInFlightLoads; 898 } 899 // For normal writes, we send the requests out and clear a store buffer 900 // entry on response. For writes which are strictly ordered, for e.g. 901 // writes to device registers, we do that within release() which is called 902 // when node is executed and taken off from readyList.
905 if (done_node->isStore && done_node->isStrictlyOrdered()) {	903 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
906 releaseStoreBuffer(); 907 } 908} 909 910void 911TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 912{ 913 assert(numInFlightStores != 0); 914 --numInFlightStores; 915} 916 917bool 918TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 919 const GraphNode* new_node) const 920{ 921 uint16_t num_in_flight_nodes; 922 if (inFlightNodes.empty()) { 923 num_in_flight_nodes = 0; 924 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 925 " #in-flight nodes = 0", new_node->seqNum); 926 } else if (new_node->robNum > oldestInFlightRobNum) { 927 // This is the intuitive case where new dep-free node is younger 928 // instruction than the oldest instruction in-flight. Thus we make sure 929 // in_flight_nodes does not overflow. 930 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 931 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 932 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 933 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 934 } else { 935 // This is the case where an instruction older than the oldest in- 936 // flight instruction becomes dep-free. Thus we must have already 937 // accounted for the entry in ROB for this new dep-free node. 938 // Immediately after this check returns true, oldestInFlightRobNum will 939 // be updated in occupy(). We simply let this node issue now. 940 num_in_flight_nodes = 0; 941 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 942 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 943 new_node->seqNum, new_node->robNum); 944 } 945 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 946 numInFlightLoads, sizeLoadBuffer, 947 numInFlightStores, sizeStoreBuffer); 948 // Check if resources are available to issue the specific node 949 if (num_in_flight_nodes >= sizeROB) { 950 return false; 951 }	904 releaseStoreBuffer(); 905 } 906} 907 908void 909TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 910{ 911 assert(numInFlightStores != 0); 912 --numInFlightStores; 913} 914 915bool 916TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 917 const GraphNode* new_node) const 918{ 919 uint16_t num_in_flight_nodes; 920 if (inFlightNodes.empty()) { 921 num_in_flight_nodes = 0; 922 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 923 " #in-flight nodes = 0", new_node->seqNum); 924 } else if (new_node->robNum > oldestInFlightRobNum) { 925 // This is the intuitive case where new dep-free node is younger 926 // instruction than the oldest instruction in-flight. Thus we make sure 927 // in_flight_nodes does not overflow. 928 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 929 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 930 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 931 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 932 } else { 933 // This is the case where an instruction older than the oldest in- 934 // flight instruction becomes dep-free. Thus we must have already 935 // accounted for the entry in ROB for this new dep-free node. 936 // Immediately after this check returns true, oldestInFlightRobNum will 937 // be updated in occupy(). We simply let this node issue now. 938 num_in_flight_nodes = 0; 939 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 940 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 941 new_node->seqNum, new_node->robNum); 942 } 943 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 944 numInFlightLoads, sizeLoadBuffer, 945 numInFlightStores, sizeStoreBuffer); 946 // Check if resources are available to issue the specific node 947 if (num_in_flight_nodes >= sizeROB) { 948 return false; 949 }
952 if (new_node->isLoad && numInFlightLoads >= sizeLoadBuffer) {	950 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
953 return false; 954 }	951 return false; 952 }
955 if (new_node->isStore && numInFlightStores >= sizeStoreBuffer) {	953 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
956 return false; 957 } 958 return true; 959} 960 961bool 962TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 963 // Return true if there is at least one read or write request in flight 964 return (numInFlightStores != 0 \|\| numInFlightLoads != 0); 965} 966 967void 968TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 969 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 970 "LQ = %d/%d, SQ = %d/%d.\n", 971 oldestInFlightRobNum, 972 numInFlightLoads, sizeLoadBuffer, 973 numInFlightStores, sizeStoreBuffer); 974} 975 976void 977TraceCPU::FixedRetryGen::regStats() 978{ 979 using namespace Stats; 980 981 numSendAttempted 982 .name(name() + ".numSendAttempted") 983 .desc("Number of first attempts to send a request") 984 ; 985 986 numSendSucceeded 987 .name(name() + ".numSendSucceeded") 988 .desc("Number of successful first attempts") 989 ; 990 991 numSendFailed 992 .name(name() + ".numSendFailed") 993 .desc("Number of failed first attempts") 994 ; 995 996 numRetrySucceeded 997 .name(name() + ".numRetrySucceeded") 998 .desc("Number of successful retries") 999 ; 1000 1001 instLastTick 1002 .name(name() + ".instLastTick") 1003 .desc("Last tick simulated from the fixed inst trace") 1004 ; 1005} 1006 1007Tick 1008TraceCPU::FixedRetryGen::init() 1009{ 1010 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1011 " IcacheGen: fixed issue with retry.\n"); 1012 1013 if (nextExecute()) { 1014 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1015 return currElement.tick; 1016 } else { 1017 panic("Read of first message in the trace failed.\n"); 1018 return MaxTick; 1019 } 1020} 1021 1022bool 1023TraceCPU::FixedRetryGen::tryNext() 1024{ 1025 // If there is a retry packet, try to send it 1026 if (retryPkt) { 1027 1028 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1029 1030 if (!port.sendTimingReq(retryPkt)) { 1031 // Still blocked! This should never occur. 1032 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1033 return false; 1034 } 1035 ++numRetrySucceeded; 1036 } else { 1037 1038 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1039 1040 // try sending current element 1041 assert(currElement.isValid()); 1042 1043 ++numSendAttempted; 1044 1045 if (!send(currElement.addr, currElement.blocksize, 1046 currElement.cmd, currElement.flags, currElement.pc)) { 1047 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1048 ++numSendFailed; 1049 // return false to indicate not to schedule next event 1050 return false; 1051 } else { 1052 ++numSendSucceeded; 1053 } 1054 } 1055 // If packet was sent successfully, either retryPkt or currElement, return 1056 // true to indicate to schedule event at current Tick plus delta. If packet 1057 // was sent successfully and there is no next packet to send, return false. 1058 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1059 "element.\n"); 1060 retryPkt = nullptr; 1061 // Read next element into currElement, currElement gets cleared so save the 1062 // tick to calculate delta 1063 Tick last_tick = currElement.tick; 1064 if (nextExecute()) { 1065 assert(currElement.tick >= last_tick); 1066 delta = currElement.tick - last_tick; 1067 } 1068 return !traceComplete; 1069} 1070 1071void 1072TraceCPU::FixedRetryGen::exit() 1073{ 1074 trace.reset(); 1075} 1076 1077bool 1078TraceCPU::FixedRetryGen::nextExecute() 1079{ 1080 if (traceComplete) 1081 // We are at the end of the file, thus we have no more messages. 1082 // Return false. 1083 return false; 1084 1085 1086 //Reset the currElement to the default values 1087 currElement.clear(); 1088 1089 // Read the next line to get the next message. If that fails then end of 1090 // trace has been reached and traceComplete needs to be set in addition 1091 // to returning false. If successful then next message is in currElement. 1092 if (!trace.read(&currElement)) { 1093 traceComplete = true; 1094 instLastTick = curTick(); 1095 return false; 1096 } 1097 1098 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1099 currElement.cmd.isRead() ? 'r' : 'w', 1100 currElement.addr, 1101 currElement.pc, 1102 currElement.blocksize, 1103 currElement.tick); 1104 1105 return true; 1106} 1107 1108bool 1109TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1110 Request::FlagsType flags, Addr pc) 1111{ 1112 1113 // Create new request 1114 Request* req = new Request(addr, size, flags, masterID); 1115 req->setPC(pc); 1116 1117 // If this is not done it triggers assert in L1 cache for invalid contextId 1118 req->setThreadContext(ContextID(0), ThreadID(0)); 1119 1120 // Embed it in a packet 1121 PacketPtr pkt = new Packet(req, cmd); 1122 1123 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1124 pkt->dataDynamic(pkt_data); 1125 1126 if (cmd.isWrite()) { 1127 memset(pkt_data, 0xA, req->getSize()); 1128 } 1129 1130 // Call MasterPort method to send a timing request for this packet 1131 bool success = port.sendTimingReq(pkt); 1132 if (!success) { 1133 // If it fails, save the packet to retry when a retry is signalled by 1134 // the cache 1135 retryPkt = pkt; 1136 } 1137 return success; 1138} 1139 1140void 1141TraceCPU::icacheRetryRecvd() 1142{ 1143 // Schedule an event to go through the control flow in the same tick as 1144 // retry is received 1145 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1146 " event @%lli.\n", curTick()); 1147 schedule(icacheNextEvent, curTick()); 1148} 1149 1150void 1151TraceCPU::dcacheRetryRecvd() 1152{ 1153 // Schedule an event to go through the execute flow in the same tick as 1154 // retry is received 1155 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1156 " event @%lli.\n", curTick()); 1157 schedule(dcacheNextEvent, curTick()); 1158} 1159 1160void 1161TraceCPU::schedDcacheNextEvent(Tick when) 1162{ 1163 if (!dcacheNextEvent.scheduled()) { 1164 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1165 when); 1166 schedule(dcacheNextEvent, when); 1167 ++numSchedDcacheEvent; 1168 } else if (when < dcacheNextEvent.when()) { 1169 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1170 " to %lli.\n", dcacheNextEvent.when(), when); 1171 reschedule(dcacheNextEvent, when); 1172 } 1173 1174} 1175 1176bool 1177TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1178{ 1179 // All responses on the instruction fetch side are ignored. Simply delete 1180 // the request and packet to free allocated memory 1181 delete pkt->req; 1182 delete pkt; 1183 1184 return true; 1185} 1186 1187void 1188TraceCPU::IcachePort::recvReqRetry() 1189{ 1190 owner->icacheRetryRecvd(); 1191} 1192 1193void 1194TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1195{ 1196 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1197 dcacheGen.completeMemAccess(pkt); 1198} 1199 1200bool 1201TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1202{ 1203 // Handle the responses for data memory requests which is done inside the 1204 // elastic data generator 1205 owner->dcacheRecvTimingResp(pkt); 1206 // After processing the response delete the request and packet to free 1207 // memory 1208 delete pkt->req; 1209 delete pkt; 1210 1211 return true; 1212} 1213 1214void 1215TraceCPU::DcachePort::recvReqRetry() 1216{ 1217 owner->dcacheRetryRecvd(); 1218} 1219 1220TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename) 1221 : trace(filename), 1222 microOpCount(0) 1223{ 1224 // Create a protobuf message for the header and read it from the stream 1225 ProtoMessage::InstDepRecordHeader header_msg; 1226 if (!trace.read(header_msg)) { 1227 panic("Failed to read packet header from %s\n", filename); 1228 1229 if (header_msg.tick_freq() != SimClock::Frequency) { 1230 panic("Trace %s was recorded with a different tick frequency %d\n", 1231 header_msg.tick_freq()); 1232 } 1233 } else { 1234 // Assign window size equal to the field in the trace that was recorded 1235 // when the data dependency trace was captured in the o3cpu model 1236 windowSize = header_msg.window_size(); 1237 } 1238} 1239 1240void 1241TraceCPU::ElasticDataGen::InputStream::reset() 1242{ 1243 trace.reset(); 1244} 1245 1246bool 1247TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1248{ 1249 ProtoMessage::InstDepRecord pkt_msg; 1250 if (trace.read(pkt_msg)) { 1251 // Required fields 1252 element->seqNum = pkt_msg.seq_num();	954 return false; 955 } 956 return true; 957} 958 959bool 960TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 961 // Return true if there is at least one read or write request in flight 962 return (numInFlightStores != 0 \|\| numInFlightLoads != 0); 963} 964 965void 966TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 967 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 968 "LQ = %d/%d, SQ = %d/%d.\n", 969 oldestInFlightRobNum, 970 numInFlightLoads, sizeLoadBuffer, 971 numInFlightStores, sizeStoreBuffer); 972} 973 974void 975TraceCPU::FixedRetryGen::regStats() 976{ 977 using namespace Stats; 978 979 numSendAttempted 980 .name(name() + ".numSendAttempted") 981 .desc("Number of first attempts to send a request") 982 ; 983 984 numSendSucceeded 985 .name(name() + ".numSendSucceeded") 986 .desc("Number of successful first attempts") 987 ; 988 989 numSendFailed 990 .name(name() + ".numSendFailed") 991 .desc("Number of failed first attempts") 992 ; 993 994 numRetrySucceeded 995 .name(name() + ".numRetrySucceeded") 996 .desc("Number of successful retries") 997 ; 998 999 instLastTick 1000 .name(name() + ".instLastTick") 1001 .desc("Last tick simulated from the fixed inst trace") 1002 ; 1003} 1004 1005Tick 1006TraceCPU::FixedRetryGen::init() 1007{ 1008 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1009 " IcacheGen: fixed issue with retry.\n"); 1010 1011 if (nextExecute()) { 1012 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1013 return currElement.tick; 1014 } else { 1015 panic("Read of first message in the trace failed.\n"); 1016 return MaxTick; 1017 } 1018} 1019 1020bool 1021TraceCPU::FixedRetryGen::tryNext() 1022{ 1023 // If there is a retry packet, try to send it 1024 if (retryPkt) { 1025 1026 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1027 1028 if (!port.sendTimingReq(retryPkt)) { 1029 // Still blocked! This should never occur. 1030 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1031 return false; 1032 } 1033 ++numRetrySucceeded; 1034 } else { 1035 1036 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1037 1038 // try sending current element 1039 assert(currElement.isValid()); 1040 1041 ++numSendAttempted; 1042 1043 if (!send(currElement.addr, currElement.blocksize, 1044 currElement.cmd, currElement.flags, currElement.pc)) { 1045 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1046 ++numSendFailed; 1047 // return false to indicate not to schedule next event 1048 return false; 1049 } else { 1050 ++numSendSucceeded; 1051 } 1052 } 1053 // If packet was sent successfully, either retryPkt or currElement, return 1054 // true to indicate to schedule event at current Tick plus delta. If packet 1055 // was sent successfully and there is no next packet to send, return false. 1056 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1057 "element.\n"); 1058 retryPkt = nullptr; 1059 // Read next element into currElement, currElement gets cleared so save the 1060 // tick to calculate delta 1061 Tick last_tick = currElement.tick; 1062 if (nextExecute()) { 1063 assert(currElement.tick >= last_tick); 1064 delta = currElement.tick - last_tick; 1065 } 1066 return !traceComplete; 1067} 1068 1069void 1070TraceCPU::FixedRetryGen::exit() 1071{ 1072 trace.reset(); 1073} 1074 1075bool 1076TraceCPU::FixedRetryGen::nextExecute() 1077{ 1078 if (traceComplete) 1079 // We are at the end of the file, thus we have no more messages. 1080 // Return false. 1081 return false; 1082 1083 1084 //Reset the currElement to the default values 1085 currElement.clear(); 1086 1087 // Read the next line to get the next message. If that fails then end of 1088 // trace has been reached and traceComplete needs to be set in addition 1089 // to returning false. If successful then next message is in currElement. 1090 if (!trace.read(&currElement)) { 1091 traceComplete = true; 1092 instLastTick = curTick(); 1093 return false; 1094 } 1095 1096 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1097 currElement.cmd.isRead() ? 'r' : 'w', 1098 currElement.addr, 1099 currElement.pc, 1100 currElement.blocksize, 1101 currElement.tick); 1102 1103 return true; 1104} 1105 1106bool 1107TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1108 Request::FlagsType flags, Addr pc) 1109{ 1110 1111 // Create new request 1112 Request* req = new Request(addr, size, flags, masterID); 1113 req->setPC(pc); 1114 1115 // If this is not done it triggers assert in L1 cache for invalid contextId 1116 req->setThreadContext(ContextID(0), ThreadID(0)); 1117 1118 // Embed it in a packet 1119 PacketPtr pkt = new Packet(req, cmd); 1120 1121 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1122 pkt->dataDynamic(pkt_data); 1123 1124 if (cmd.isWrite()) { 1125 memset(pkt_data, 0xA, req->getSize()); 1126 } 1127 1128 // Call MasterPort method to send a timing request for this packet 1129 bool success = port.sendTimingReq(pkt); 1130 if (!success) { 1131 // If it fails, save the packet to retry when a retry is signalled by 1132 // the cache 1133 retryPkt = pkt; 1134 } 1135 return success; 1136} 1137 1138void 1139TraceCPU::icacheRetryRecvd() 1140{ 1141 // Schedule an event to go through the control flow in the same tick as 1142 // retry is received 1143 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1144 " event @%lli.\n", curTick()); 1145 schedule(icacheNextEvent, curTick()); 1146} 1147 1148void 1149TraceCPU::dcacheRetryRecvd() 1150{ 1151 // Schedule an event to go through the execute flow in the same tick as 1152 // retry is received 1153 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1154 " event @%lli.\n", curTick()); 1155 schedule(dcacheNextEvent, curTick()); 1156} 1157 1158void 1159TraceCPU::schedDcacheNextEvent(Tick when) 1160{ 1161 if (!dcacheNextEvent.scheduled()) { 1162 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1163 when); 1164 schedule(dcacheNextEvent, when); 1165 ++numSchedDcacheEvent; 1166 } else if (when < dcacheNextEvent.when()) { 1167 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1168 " to %lli.\n", dcacheNextEvent.when(), when); 1169 reschedule(dcacheNextEvent, when); 1170 } 1171 1172} 1173 1174bool 1175TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1176{ 1177 // All responses on the instruction fetch side are ignored. Simply delete 1178 // the request and packet to free allocated memory 1179 delete pkt->req; 1180 delete pkt; 1181 1182 return true; 1183} 1184 1185void 1186TraceCPU::IcachePort::recvReqRetry() 1187{ 1188 owner->icacheRetryRecvd(); 1189} 1190 1191void 1192TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1193{ 1194 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1195 dcacheGen.completeMemAccess(pkt); 1196} 1197 1198bool 1199TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1200{ 1201 // Handle the responses for data memory requests which is done inside the 1202 // elastic data generator 1203 owner->dcacheRecvTimingResp(pkt); 1204 // After processing the response delete the request and packet to free 1205 // memory 1206 delete pkt->req; 1207 delete pkt; 1208 1209 return true; 1210} 1211 1212void 1213TraceCPU::DcachePort::recvReqRetry() 1214{ 1215 owner->dcacheRetryRecvd(); 1216} 1217 1218TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename) 1219 : trace(filename), 1220 microOpCount(0) 1221{ 1222 // Create a protobuf message for the header and read it from the stream 1223 ProtoMessage::InstDepRecordHeader header_msg; 1224 if (!trace.read(header_msg)) { 1225 panic("Failed to read packet header from %s\n", filename); 1226 1227 if (header_msg.tick_freq() != SimClock::Frequency) { 1228 panic("Trace %s was recorded with a different tick frequency %d\n", 1229 header_msg.tick_freq()); 1230 } 1231 } else { 1232 // Assign window size equal to the field in the trace that was recorded 1233 // when the data dependency trace was captured in the o3cpu model 1234 windowSize = header_msg.window_size(); 1235 } 1236} 1237 1238void 1239TraceCPU::ElasticDataGen::InputStream::reset() 1240{ 1241 trace.reset(); 1242} 1243 1244bool 1245TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1246{ 1247 ProtoMessage::InstDepRecord pkt_msg; 1248 if (trace.read(pkt_msg)) { 1249 // Required fields 1250 element->seqNum = pkt_msg.seq_num();
1253 element->isLoad = pkt_msg.load(); 1254 element->isStore = pkt_msg.store();	1251 element->type = pkt_msg.type();
1255 element->compDelay = pkt_msg.comp_delay(); 1256 1257 // Repeated field robDepList 1258 element->clearRobDep(); 1259 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1260 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1261 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1262 element->numRobDep += 1; 1263 } 1264 1265 // Repeated field 1266 element->clearRegDep(); 1267 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1268 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1269 // There is a possibility that an instruction has both, a register 1270 // and order dependency on an instruction. In such a case, the 1271 // register dependency is omitted 1272 bool duplicate = false; 1273 for (int j = 0; j < element->numRobDep; j++) { 1274 duplicate \|= (pkt_msg.reg_dep(i) == element->robDep[j]); 1275 } 1276 if (!duplicate) { 1277 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1278 element->numRegDep += 1; 1279 } 1280 } 1281 1282 // Optional fields 1283 if (pkt_msg.has_addr()) 1284 element->addr = pkt_msg.addr(); 1285 else 1286 element->addr = 0; 1287 1288 if (pkt_msg.has_size()) 1289 element->size = pkt_msg.size(); 1290 else 1291 element->size = 0; 1292 1293 if (pkt_msg.has_flags()) 1294 element->flags = pkt_msg.flags(); 1295 else 1296 element->flags = 0; 1297 1298 if (pkt_msg.has_pc()) 1299 element->pc = pkt_msg.pc(); 1300 else 1301 element->pc = 0; 1302 1303 // ROB occupancy number 1304 ++microOpCount; 1305 if (pkt_msg.has_weight()) { 1306 microOpCount += pkt_msg.weight(); 1307 } 1308 element->robNum = microOpCount; 1309 return true; 1310 } 1311 1312 // We have reached the end of the file 1313 return false; 1314} 1315 1316bool 1317TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1318{ 1319 for (auto& own_reg_dep : regDep) { 1320 if (own_reg_dep == reg_dep) { 1321 // If register dependency is found, make it zero and return true 1322 own_reg_dep = 0; 1323 --numRegDep; 1324 assert(numRegDep >= 0); 1325 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1326 "done.\n", seqNum, reg_dep); 1327 return true; 1328 } 1329 } 1330 1331 // Return false if the dependency is not found 1332 return false; 1333} 1334 1335bool 1336TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1337{ 1338 for (auto& own_rob_dep : robDep) { 1339 if (own_rob_dep == rob_dep) { 1340 // If the rob dependency is found, make it zero and return true 1341 own_rob_dep = 0; 1342 --numRobDep; 1343 assert(numRobDep >= 0); 1344 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1345 "done.\n", seqNum, rob_dep); 1346 return true; 1347 } 1348 } 1349 return false; 1350} 1351 1352void 1353TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1354 for (auto& own_reg_dep : regDep) { 1355 own_reg_dep = 0; 1356 } 1357 numRegDep = 0; 1358} 1359 1360void 1361TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1362 for (auto& own_rob_dep : robDep) { 1363 own_rob_dep = 0; 1364 } 1365 numRobDep = 0; 1366} 1367 1368bool 1369TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1370{ 1371 // If it is an rob dependency then remove it 1372 if (!removeRobDep(done_seq_num)) { 1373 // If it is not an rob dependency then it must be a register dependency 1374 // If the register dependency is not found, it violates an assumption 1375 // and must be caught by assert. 1376 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1377 assert(regdep_found); 1378 } 1379 // Return true if the node is dependency free 1380 return (numRobDep == 0 && numRegDep == 0); 1381} 1382 1383void 1384TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1385{ 1386 DPRINTFR(TraceCPUData, "%lli", seqNum);	1252 element->compDelay = pkt_msg.comp_delay(); 1253 1254 // Repeated field robDepList 1255 element->clearRobDep(); 1256 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1257 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1258 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1259 element->numRobDep += 1; 1260 } 1261 1262 // Repeated field 1263 element->clearRegDep(); 1264 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1265 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1266 // There is a possibility that an instruction has both, a register 1267 // and order dependency on an instruction. In such a case, the 1268 // register dependency is omitted 1269 bool duplicate = false; 1270 for (int j = 0; j < element->numRobDep; j++) { 1271 duplicate \|= (pkt_msg.reg_dep(i) == element->robDep[j]); 1272 } 1273 if (!duplicate) { 1274 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1275 element->numRegDep += 1; 1276 } 1277 } 1278 1279 // Optional fields 1280 if (pkt_msg.has_addr()) 1281 element->addr = pkt_msg.addr(); 1282 else 1283 element->addr = 0; 1284 1285 if (pkt_msg.has_size()) 1286 element->size = pkt_msg.size(); 1287 else 1288 element->size = 0; 1289 1290 if (pkt_msg.has_flags()) 1291 element->flags = pkt_msg.flags(); 1292 else 1293 element->flags = 0; 1294 1295 if (pkt_msg.has_pc()) 1296 element->pc = pkt_msg.pc(); 1297 else 1298 element->pc = 0; 1299 1300 // ROB occupancy number 1301 ++microOpCount; 1302 if (pkt_msg.has_weight()) { 1303 microOpCount += pkt_msg.weight(); 1304 } 1305 element->robNum = microOpCount; 1306 return true; 1307 } 1308 1309 // We have reached the end of the file 1310 return false; 1311} 1312 1313bool 1314TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1315{ 1316 for (auto& own_reg_dep : regDep) { 1317 if (own_reg_dep == reg_dep) { 1318 // If register dependency is found, make it zero and return true 1319 own_reg_dep = 0; 1320 --numRegDep; 1321 assert(numRegDep >= 0); 1322 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1323 "done.\n", seqNum, reg_dep); 1324 return true; 1325 } 1326 } 1327 1328 // Return false if the dependency is not found 1329 return false; 1330} 1331 1332bool 1333TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1334{ 1335 for (auto& own_rob_dep : robDep) { 1336 if (own_rob_dep == rob_dep) { 1337 // If the rob dependency is found, make it zero and return true 1338 own_rob_dep = 0; 1339 --numRobDep; 1340 assert(numRobDep >= 0); 1341 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1342 "done.\n", seqNum, rob_dep); 1343 return true; 1344 } 1345 } 1346 return false; 1347} 1348 1349void 1350TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1351 for (auto& own_reg_dep : regDep) { 1352 own_reg_dep = 0; 1353 } 1354 numRegDep = 0; 1355} 1356 1357void 1358TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1359 for (auto& own_rob_dep : robDep) { 1360 own_rob_dep = 0; 1361 } 1362 numRobDep = 0; 1363} 1364 1365bool 1366TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1367{ 1368 // If it is an rob dependency then remove it 1369 if (!removeRobDep(done_seq_num)) { 1370 // If it is not an rob dependency then it must be a register dependency 1371 // If the register dependency is not found, it violates an assumption 1372 // and must be caught by assert. 1373 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1374 assert(regdep_found); 1375 } 1376 // Return true if the node is dependency free 1377 return (numRobDep == 0 && numRegDep == 0); 1378} 1379 1380void 1381TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1382{ 1383 DPRINTFR(TraceCPUData, "%lli", seqNum);
1387 DPRINTFR(TraceCPUData, ",%s", (isLoad ? "True" : "False")); 1388 DPRINTFR(TraceCPUData, ",%s", (isStore ? "True" : "False")); 1389 if (isLoad \|\| isStore) {	1384 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1385 if (isLoad() \|\| isStore()) {
1390 DPRINTFR(TraceCPUData, ",%i", addr); 1391 DPRINTFR(TraceCPUData, ",%i", size); 1392 DPRINTFR(TraceCPUData, ",%i", flags); 1393 } 1394 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1395 int i = 0; 1396 DPRINTFR(TraceCPUData, "robDep:"); 1397 while (robDep[i] != 0) { 1398 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1399 i++; 1400 } 1401 i = 0; 1402 DPRINTFR(TraceCPUData, "regDep:"); 1403 while (regDep[i] != 0) { 1404 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1405 i++; 1406 } 1407 auto child_itr = dependents.begin(); 1408 DPRINTFR(TraceCPUData, "dependents:"); 1409 while (child_itr != dependents.end()) { 1410 DPRINTFR(TraceCPUData, ":%lli", (child_itr)->seqNum); 1411* child_itr++; 1412 } 1413 1414 DPRINTFR(TraceCPUData, "\n"); 1415} 1416	1386 DPRINTFR(TraceCPUData, ",%i", addr); 1387 DPRINTFR(TraceCPUData, ",%i", size); 1388 DPRINTFR(TraceCPUData, ",%i", flags); 1389 } 1390 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1391 int i = 0; 1392 DPRINTFR(TraceCPUData, "robDep:"); 1393 while (robDep[i] != 0) { 1394 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1395 i++; 1396 } 1397 i = 0; 1398 DPRINTFR(TraceCPUData, "regDep:"); 1399 while (regDep[i] != 0) { 1400 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1401 i++; 1402 } 1403 auto child_itr = dependents.begin(); 1404 DPRINTFR(TraceCPUData, "dependents:"); 1405 while (child_itr != dependents.end()) { 1406 DPRINTFR(TraceCPUData, ":%lli", (child_itr)->seqNum); 1407* child_itr++; 1408 } 1409 1410 DPRINTFR(TraceCPUData, "\n"); 1411} 1412
	1413std::string 1414TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1415{ 1416 return Record::RecordType_Name(type); 1417} 1418
1417TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1418 : trace(filename) 1419{ 1420 // Create a protobuf message for the header and read it from the stream 1421 ProtoMessage::PacketHeader header_msg; 1422 if (!trace.read(header_msg)) { 1423 panic("Failed to read packet header from %s\n", filename); 1424 1425 if (header_msg.tick_freq() != SimClock::Frequency) { 1426 panic("Trace %s was recorded with a different tick frequency %d\n", 1427 header_msg.tick_freq()); 1428 } 1429 } 1430} 1431 1432void 1433TraceCPU::FixedRetryGen::InputStream::reset() 1434{ 1435 trace.reset(); 1436} 1437 1438bool 1439TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1440{ 1441 ProtoMessage::Packet pkt_msg; 1442 if (trace.read(pkt_msg)) { 1443 element->cmd = pkt_msg.cmd(); 1444 element->addr = pkt_msg.addr(); 1445 element->blocksize = pkt_msg.size(); 1446 element->tick = pkt_msg.tick(); 1447 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1448 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1449 return true; 1450 } 1451 1452 // We have reached the end of the file 1453 return false; 1454}	1419TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1420 : trace(filename) 1421{ 1422 // Create a protobuf message for the header and read it from the stream 1423 ProtoMessage::PacketHeader header_msg; 1424 if (!trace.read(header_msg)) { 1425 panic("Failed to read packet header from %s\n", filename); 1426 1427 if (header_msg.tick_freq() != SimClock::Frequency) { 1428 panic("Trace %s was recorded with a different tick frequency %d\n", 1429 header_msg.tick_freq()); 1430 } 1431 } 1432} 1433 1434void 1435TraceCPU::FixedRetryGen::InputStream::reset() 1436{ 1437 trace.reset(); 1438} 1439 1440bool 1441TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1442{ 1443 ProtoMessage::Packet pkt_msg; 1444 if (trace.read(pkt_msg)) { 1445 element->cmd = pkt_msg.cmd(); 1446 element->addr = pkt_msg.addr(); 1447 element->blocksize = pkt_msg.size(); 1448 element->tick = pkt_msg.tick(); 1449 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1450 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1451 return true; 1452 } 1453 1454 // We have reached the end of the file 1455 return false; 1456}