Cross Reference: /gem5/src/cpu/trace/trace

Deleted Added

sdiff udiff text old ( 12085:de78ea63e0ca ) new ( 12680:91f4d6668b4f )

full compact

trace_cpu.cc (12085:de78ea63e0ca)	trace_cpu.cc (12680:91f4d6668b4f)
1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 / 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this),	1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 / 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this),
53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")),	53 instMasterID(params->system->getMasterId(this, "inst")), 54 dataMasterID(params->system->getMasterId(this, "data")),
55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent([this]{ schedIcacheNext(); }, name()), 61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()), 62 oneTraceComplete(false), 63 traceOffset(0), 64 execCompleteEvent(nullptr), 65 enableEarlyExit(params->enableEarlyExit), 66 progressMsgInterval(params->progressMsgInterval), 67 progressMsgThreshold(params->progressMsgInterval) 68{ 69 // Increment static counter for number of Trace CPUs. 70 ++TraceCPU::numTraceCPUs; 71 72 // Check that the python parameters for sizes of ROB, store buffer and 73 // load buffer do not overflow the corresponding C++ variables. 74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 75 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 77 "exceeds the max. value of %d.\n", params->sizeROB, 78 UINT16_MAX); 79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 80 " %d exceeds the max. value of %d.\n", 81 params->sizeLoadBuffer, UINT16_MAX); 82} 83 84TraceCPU::~TraceCPU() 85{ 86 87} 88 89TraceCPU* 90TraceCPUParams::create() 91{ 92 return new TraceCPU(this); 93} 94 95void 96TraceCPU::updateNumOps(uint64_t rob_num) 97{ 98 numOps = rob_num; 99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) { 100 inform("%s: %i insts committed\n", name(), progressMsgThreshold); 101 progressMsgThreshold += progressMsgInterval; 102 } 103} 104 105void 106TraceCPU::takeOverFrom(BaseCPU oldCPU) 107{ 108* // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 109 assert(!getInstPort().isConnected()); 110 assert(oldCPU->getInstPort().isConnected()); 111 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 112 oldCPU->getInstPort().unbind(); 113 getInstPort().bind(inst_peer_port); 114 115 assert(!getDataPort().isConnected()); 116 assert(oldCPU->getDataPort().isConnected()); 117 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 118 oldCPU->getDataPort().unbind(); 119 getDataPort().bind(data_peer_port); 120} 121 122void 123TraceCPU::init() 124{ 125 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 126 "\n", instTraceFile); 127 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 128 dataTraceFile); 129 130 BaseCPU::init(); 131 132 // Get the send tick of the first instruction read request 133 Tick first_icache_tick = icacheGen.init(); 134 135 // Get the send tick of the first data read/write request 136 Tick first_dcache_tick = dcacheGen.init(); 137 138 // Set the trace offset as the minimum of that in both traces 139 traceOffset = std::min(first_icache_tick, first_dcache_tick); 140 inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 141 name(), traceOffset); 142 143 // Schedule next icache and dcache event by subtracting the offset 144 schedule(icacheNextEvent, first_icache_tick - traceOffset); 145 schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 146 147 // Adjust the trace offset for the dcache generator's ready nodes 148 // We don't need to do this for the icache generator as it will 149 // send its first request at the first event and schedule subsequent 150 // events using a relative tick delta 151 dcacheGen.adjustInitTraceOffset(traceOffset); 152 153 // If the Trace CPU simulation is configured to exit on any one trace 154 // completion then we don't need a counted event to count down all Trace 155 // CPUs in the system. If not then instantiate a counted event. 156 if (!enableEarlyExit) { 157 // The static counter for number of Trace CPUs is correctly set at 158 // this point so create an event and pass it. 159 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 160 numTraceCPUs); 161 } 162 163} 164 165void 166TraceCPU::schedIcacheNext() 167{ 168 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 169 170 // Try to send the current packet or a retry packet if there is one 171 bool sched_next = icacheGen.tryNext(); 172 // If packet sent successfully, schedule next event 173 if (sched_next) { 174 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 175 "at %d.\n", curTick() + icacheGen.tickDelta()); 176 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 177 ++numSchedIcacheEvent; 178 } else { 179 // check if traceComplete. If not, do nothing because sending failed 180 // and next event will be scheduled via RecvRetry() 181 if (icacheGen.isTraceComplete()) { 182 // If this is the first trace to complete, set the variable. If it 183 // is already set then both traces are complete to exit sim. 184 checkAndSchedExitEvent(); 185 } 186 } 187 return; 188} 189 190void 191TraceCPU::schedDcacheNext() 192{ 193 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 194 195 // Update stat for numCycles 196 numCycles = clockEdge() / clockPeriod(); 197 198 dcacheGen.execute(); 199 if (dcacheGen.isExecComplete()) { 200 checkAndSchedExitEvent(); 201 } 202} 203 204void 205TraceCPU::checkAndSchedExitEvent() 206{ 207 if (!oneTraceComplete) { 208 oneTraceComplete = true; 209 } else { 210 // Schedule event to indicate execution is complete as both 211 // instruction and data access traces have been played back. 212 inform("%s: Execution complete.\n", name()); 213 // If the replay is configured to exit early, that is when any one 214 // execution is complete then exit immediately and return. Otherwise, 215 // schedule the counted exit that counts down completion of each Trace 216 // CPU. 217 if (enableEarlyExit) { 218 exitSimLoop("End of trace reached"); 219 } else { 220 schedule(execCompleteEvent, curTick()); 221* } 222 } 223} 224 225void 226TraceCPU::regStats() 227{ 228 229 BaseCPU::regStats(); 230 231 numSchedDcacheEvent 232 .name(name() + ".numSchedDcacheEvent") 233 .desc("Number of events scheduled to trigger data request generator") 234 ; 235 236 numSchedIcacheEvent 237 .name(name() + ".numSchedIcacheEvent") 238 .desc("Number of events scheduled to trigger instruction request generator") 239 ; 240 241 numOps 242 .name(name() + ".numOps") 243 .desc("Number of micro-ops simulated by the Trace CPU") 244 ; 245 246 cpi 247 .name(name() + ".cpi") 248 .desc("Cycles per micro-op used as a proxy for CPI") 249 .precision(6) 250 ; 251 cpi = numCycles/numOps; 252 253 icacheGen.regStats(); 254 dcacheGen.regStats(); 255} 256 257void 258TraceCPU::ElasticDataGen::regStats() 259{ 260 using namespace Stats; 261 262 maxDependents 263 .name(name() + ".maxDependents") 264 .desc("Max number of dependents observed on a node") 265 ; 266 267 maxReadyListSize 268 .name(name() + ".maxReadyListSize") 269 .desc("Max size of the ready list observed") 270 ; 271 272 numSendAttempted 273 .name(name() + ".numSendAttempted") 274 .desc("Number of first attempts to send a request") 275 ; 276 277 numSendSucceeded 278 .name(name() + ".numSendSucceeded") 279 .desc("Number of successful first attempts") 280 ; 281 282 numSendFailed 283 .name(name() + ".numSendFailed") 284 .desc("Number of failed first attempts") 285 ; 286 287 numRetrySucceeded 288 .name(name() + ".numRetrySucceeded") 289 .desc("Number of successful retries") 290 ; 291 292 numSplitReqs 293 .name(name() + ".numSplitReqs") 294 .desc("Number of split requests") 295 ; 296 297 numSOLoads 298 .name(name() + ".numSOLoads") 299 .desc("Number of strictly ordered loads") 300 ; 301 302 numSOStores 303 .name(name() + ".numSOStores") 304 .desc("Number of strictly ordered stores") 305 ; 306 307 dataLastTick 308 .name(name() + ".dataLastTick") 309 .desc("Last tick simulated from the elastic data trace") 310 ; 311} 312 313Tick 314TraceCPU::ElasticDataGen::init() 315{ 316 DPRINTF(TraceCPUData, "Initializing data memory request generator " 317 "DcacheGen: elastic issue with retry.\n"); 318 319 if (!readNextWindow()) 320 panic("Trace has %d elements. It must have at least %d elements.\n", 321 depGraph.size(), 2 * windowSize); 322 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 323 depGraph.size()); 324 325 if (!readNextWindow()) 326 panic("Trace has %d elements. It must have at least %d elements.\n", 327 depGraph.size(), 2 * windowSize); 328 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 329 depGraph.size()); 330 331 // Print readyList 332 if (DTRACE(TraceCPUData)) { 333 printReadyList(); 334 } 335 auto free_itr = readyList.begin(); 336 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 337 " is %d.\n", free_itr->seqNum, free_itr->execTick); 338 // Return the execute tick of the earliest ready node so that an event 339 // can be scheduled to call execute() 340 return (free_itr->execTick); 341} 342 343void 344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 345 for (auto& free_node : readyList) { 346 free_node.execTick -= offset; 347 } 348} 349 350void 351TraceCPU::ElasticDataGen::exit() 352{ 353 trace.reset(); 354} 355 356bool 357TraceCPU::ElasticDataGen::readNextWindow() 358{ 359 360 // Read and add next window 361 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 362 363 if (traceComplete) { 364 // We are at the end of the file, thus we have no more records. 365 // Return false. 366 return false; 367 } 368 369 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 372 uint32_t num_read = 0; 373 while (num_read != windowSize) { 374 375 // Create a new graph node 376 GraphNode* new_node = new GraphNode; 377 378 // Read the next line to get the next record. If that fails then end of 379 // trace has been reached and traceComplete needs to be set in addition 380 // to returning false. 381 if (!trace.read(new_node)) { 382 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 383 traceComplete = true; 384 return false; 385 } 386 387 // Annotate the ROB dependencies of the new node onto the parent nodes. 388 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 389 // Annotate the register dependencies of the new node onto the parent 390 // nodes. 391 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 392 393 num_read++; 394 // Add to map 395 depGraph[new_node->seqNum] = new_node; 396 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 397 // Source dependencies are already complete, check if resources 398 // are available and issue. The execution time is approximated 399 // to current time plus the computational delay. 400 checkAndIssue(new_node); 401 } 402 } 403 404 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 405 depGraph.size()); 406 return true; 407} 408 409template<typename T> void 410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode new_node, 411* T& dep_array, uint8_t& num_dep) 412{ 413 for (auto& a_dep : dep_array) { 414 // The convention is to set the dependencies starting with the first 415 // index in the ROB and register dependency arrays. Thus, when we reach 416 // a dependency equal to the initialisation value of zero, we know have 417 // iterated over all dependencies and can break. 418 if (a_dep == 0) 419 break; 420 // We look up the valid dependency, i.e. the parent of this node 421 auto parent_itr = depGraph.find(a_dep); 422 if (parent_itr != depGraph.end()) { 423 // If the parent is found, it is yet to be executed. Append a 424 // pointer to the new node to the dependents list of the parent 425 // node. 426 parent_itr->second->dependents.push_back(new_node); 427 auto num_depts = parent_itr->second->dependents.size(); 428 maxDependents = std::max<double>(num_depts, maxDependents.value()); 429 } else { 430 // The dependency is not found in the graph. So consider 431 // the execution of the parent is complete, i.e. remove this 432 // dependency. 433 a_dep = 0; 434 num_dep--; 435 } 436 } 437} 438 439void 440TraceCPU::ElasticDataGen::execute() 441{ 442 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 443 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 444 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 445 depFreeQueue.size()); 446 hwResource.printOccupancy(); 447 448 // Read next window to make sure that dependents of all dep-free nodes 449 // are in the depGraph 450 if (nextRead) { 451 readNextWindow(); 452 nextRead = false; 453 } 454 455 // First attempt to issue the pending dependency-free nodes held 456 // in depFreeQueue. If resources have become available for a node, 457 // then issue it, i.e. add the node to readyList. 458 while (!depFreeQueue.empty()) { 459 if (checkAndIssue(depFreeQueue.front(), false)) { 460 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 461 "%lli.\n", (depFreeQueue.front())->seqNum); 462 depFreeQueue.pop(); 463 } else { 464 break; 465 } 466 } 467 // Proceed to execute from readyList 468 auto graph_itr = depGraph.begin(); 469 auto free_itr = readyList.begin(); 470 // Iterate through readyList until the next free node has its execute 471 // tick later than curTick or the end of readyList is reached 472 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 473 474 // Get pointer to the node to be executed 475 graph_itr = depGraph.find(free_itr->seqNum); 476 assert(graph_itr != depGraph.end()); 477 GraphNode* node_ptr = graph_itr->second; 478 479 // If there is a retryPkt send that else execute the load 480 if (retryPkt) { 481 // The retryPkt must be the request that was created by the 482 // first node in the readyList. 483 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 484 panic("Retry packet's seqence number does not match " 485 "the first node in the readyList.\n"); 486 } 487 if (port.sendTimingReq(retryPkt)) { 488 ++numRetrySucceeded; 489 retryPkt = nullptr; 490 } 491 } else if (node_ptr->isLoad() \|\| node_ptr->isStore()) { 492 // If there is no retryPkt, attempt to send a memory request in 493 // case of a load or store node. If the send fails, executeMemReq() 494 // returns a packet pointer, which we save in retryPkt. In case of 495 // a comp node we don't do anything and simply continue as if the 496 // execution of the comp node succedded. 497 retryPkt = executeMemReq(node_ptr); 498 } 499 // If the retryPkt or a new load/store node failed, we exit from here 500 // as a retry from cache will bring the control to execute(). The 501 // first node in readyList then, will be the failed node. 502 if (retryPkt) { 503 break; 504 } 505 506 // Proceed to remove dependencies for the successfully executed node. 507 // If it is a load which is not strictly ordered and we sent a 508 // request for it successfully, we do not yet mark any register 509 // dependencies complete. But as per dependency modelling we need 510 // to mark ROB dependencies of load and non load/store nodes which 511 // are based on successful sending of the load as complete. 512 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 513 // If execute succeeded mark its dependents as complete 514 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 515 "dependents..\n", node_ptr->seqNum); 516 517 auto child_itr = (node_ptr->dependents).begin(); 518 while (child_itr != (node_ptr->dependents).end()) { 519 // ROB dependency of a store on a load must not be removed 520 // after load is sent but after response is received 521 if (!(child_itr)->isStore() && 522* (child_itr)->removeRobDep(node_ptr->seqNum)) { 523* 524 // Check if the child node has become dependency free 525 if ((child_itr)->numRobDep == 0 && 526* (child_itr)->numRegDep == 0) { 527* 528 // Source dependencies are complete, check if 529 // resources are available and issue 530 checkAndIssue(child_itr); 531* } 532 // Remove this child for the sent load and point to new 533 // location of the element following the erased element 534 child_itr = node_ptr->dependents.erase(child_itr); 535 } else { 536 // This child is not dependency-free, point to the next 537 // child 538 child_itr++; 539 } 540 } 541 } else { 542 // If it is a strictly ordered load mark its dependents as complete 543 // as we do not send a request for this case. If it is a store or a 544 // comp node we also mark all its dependents complete. 545 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 546 " up dependents..\n", node_ptr->seqNum); 547 548 for (auto child : node_ptr->dependents) { 549 // If the child node is dependency free removeDepOnInst() 550 // returns true. 551 if (child->removeDepOnInst(node_ptr->seqNum)) { 552 // Source dependencies are complete, check if resources 553 // are available and issue 554 checkAndIssue(child); 555 } 556 } 557 } 558 559 // After executing the node, remove from readyList and delete node. 560 readyList.erase(free_itr); 561 // If it is a cacheable load which was sent, don't delete 562 // just yet. Delete it in completeMemAccess() after the 563 // response is received. If it is an strictly ordered 564 // load, it was not sent and all dependencies were simply 565 // marked complete. Thus it is safe to delete it. For 566 // stores and non load/store nodes all dependencies were 567 // marked complete so it is safe to delete it. 568 if (!node_ptr->isLoad() \|\| node_ptr->isStrictlyOrdered()) { 569 // Release all resources occupied by the completed node 570 hwResource.release(node_ptr); 571 // clear the dynamically allocated set of dependents 572 (node_ptr->dependents).clear(); 573 // Update the stat for numOps simulated 574 owner.updateNumOps(node_ptr->robNum); 575 // delete node 576 delete node_ptr; 577 // remove from graph 578 depGraph.erase(graph_itr); 579 } 580 // Point to first node to continue to next iteration of while loop 581 free_itr = readyList.begin(); 582 } // end of while loop 583 584 // Print readyList, sizes of queues and resource status after updating 585 if (DTRACE(TraceCPUData)) { 586 printReadyList(); 587 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 588 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 589 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 590 depFreeQueue.size()); 591 hwResource.printOccupancy(); 592 } 593 594 if (retryPkt) { 595 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 596 "event from the cache for seq. num %lli.\n", 597 retryPkt->req->getReqInstSeqNum()); 598 return; 599 } 600 // If the size of the dependency graph is less than the dependency window 601 // then read from the trace file to populate the graph next time we are in 602 // execute. 603 if (depGraph.size() < windowSize && !traceComplete) 604 nextRead = true; 605 606 // If cache is not blocked, schedule an event for the first execTick in 607 // readyList else retry from cache will schedule the event. If the ready 608 // list is empty then check if the next pending node has resources 609 // available to issue. If yes, then schedule an event for the next cycle. 610 if (!readyList.empty()) { 611 Tick next_event_tick = std::max(readyList.begin()->execTick, 612 curTick()); 613 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 614 next_event_tick); 615 owner.schedDcacheNextEvent(next_event_tick); 616 } else if (readyList.empty() && !depFreeQueue.empty() && 617 hwResource.isAvailable(depFreeQueue.front())) { 618 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 619 owner.clockEdge(Cycles(1))); 620 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 621 } 622 623 // If trace is completely read, readyList is empty and depGraph is empty, 624 // set execComplete to true 625 if (depGraph.empty() && readyList.empty() && traceComplete && 626 !hwResource.awaitingResponse()) { 627 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 628 execComplete = true; 629 dataLastTick = curTick(); 630 } 631} 632 633PacketPtr 634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 635{ 636 637 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 638 "virt addr %d, pc %#x, size %d, flags %d).\n", 639 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 640 node_ptr->pc, node_ptr->size, node_ptr->flags); 641 642 // If the request is strictly ordered, do not send it. Just return nullptr 643 // as if it was succesfully sent. 644 if (node_ptr->isStrictlyOrdered()) { 645 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 646 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 647 node_ptr->seqNum); 648 return nullptr; 649 } 650 651 // Check if the request spans two cache lines as this condition triggers 652 // an assert fail in the L1 cache. If it does then truncate the size to 653 // access only until the end of that line and ignore the remainder. The 654 // stat counting this is useful to keep a check on how frequently this 655 // happens. If required the code could be revised to mimick splitting such 656 // a request into two. 657 unsigned blk_size = owner.cacheLineSize(); 658 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 659 if (!(blk_offset + node_ptr->size <= blk_size)) { 660 node_ptr->size = blk_size - blk_offset; 661 ++numSplitReqs; 662 } 663 664 // Create a request and the packet containing request 665 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 666 node_ptr->flags, masterID, node_ptr->seqNum, 667 ContextID(0)); 668 req->setPC(node_ptr->pc); 669 // If virtual address is valid, set the asid and virtual address fields 670 // of the request. 671 if (node_ptr->virtAddr != 0) { 672 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 673 node_ptr->flags, masterID, node_ptr->pc); 674 req->setPaddr(node_ptr->physAddr); 675 req->setReqInstSeqNum(node_ptr->seqNum); 676 } 677 678 PacketPtr pkt; 679 uint8_t* pkt_data = new uint8_t[req->getSize()]; 680 if (node_ptr->isLoad()) { 681 pkt = Packet::createRead(req); 682 } else { 683 pkt = Packet::createWrite(req); 684 memset(pkt_data, 0xA, req->getSize()); 685 } 686 pkt->dataDynamic(pkt_data); 687 688 // Call MasterPort method to send a timing request for this packet 689 bool success = port.sendTimingReq(pkt); 690 ++numSendAttempted; 691 692 if (!success) { 693 // If it fails, return the packet to retry when a retry is signalled by 694 // the cache 695 ++numSendFailed; 696 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 697 return pkt; 698 } else { 699 // It is succeeds, return nullptr 700 ++numSendSucceeded; 701 return nullptr; 702 } 703} 704 705bool 706TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 707{ 708 // Assert the node is dependency-free 709 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 710 711 // If this is the first attempt, print a debug message to indicate this. 712 if (first) { 713 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 714 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 715 node_ptr->robNum); 716 } 717 718 // Check if resources are available to issue the specific node 719 if (hwResource.isAvailable(node_ptr)) { 720 // If resources are free only then add to readyList 721 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 722 " to readyList, occupying resources.\n", node_ptr->seqNum); 723 // Compute the execute tick by adding the compute delay for the node 724 // and add the ready node to the ready list 725 addToSortedReadyList(node_ptr->seqNum, 726 owner.clockEdge() + node_ptr->compDelay); 727 // Account for the resources taken up by this issued node. 728 hwResource.occupy(node_ptr); 729 return true; 730 731 } else { 732 if (first) { 733 // Although dependencies are complete, resources are not available. 734 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 735 " Adding to depFreeQueue.\n", node_ptr->seqNum); 736 depFreeQueue.push(node_ptr); 737 } else { 738 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 739 "Still pending issue.\n", node_ptr->seqNum); 740 } 741 return false; 742 } 743} 744 745void 746TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 747{ 748 // Release the resources for this completed node. 749 if (pkt->isWrite()) { 750 // Consider store complete. 751 hwResource.releaseStoreBuffer(); 752 // If it is a store response then do nothing since we do not model 753 // dependencies on store completion in the trace. But if we were 754 // blocking execution due to store buffer fullness, we need to schedule 755 // an event and attempt to progress. 756 } else { 757 // If it is a load response then release the dependents waiting on it. 758 // Get pointer to the completed load 759 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 760 assert(graph_itr != depGraph.end()); 761 GraphNode* node_ptr = graph_itr->second; 762 763 // Release resources occupied by the load 764 hwResource.release(node_ptr); 765 766 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 767 " dependents..\n", node_ptr->seqNum); 768 769 for (auto child : node_ptr->dependents) { 770 if (child->removeDepOnInst(node_ptr->seqNum)) { 771 checkAndIssue(child); 772 } 773 } 774 775 // clear the dynamically allocated set of dependents 776 (node_ptr->dependents).clear(); 777 // Update the stat for numOps completed 778 owner.updateNumOps(node_ptr->robNum); 779 // delete node 780 delete node_ptr; 781 // remove from graph 782 depGraph.erase(graph_itr); 783 } 784 785 if (DTRACE(TraceCPUData)) { 786 printReadyList(); 787 } 788 789 // If the size of the dependency graph is less than the dependency window 790 // then read from the trace file to populate the graph next time we are in 791 // execute. 792 if (depGraph.size() < windowSize && !traceComplete) 793 nextRead = true; 794 795 // If not waiting for retry, attempt to schedule next event 796 if (!retryPkt) { 797 // We might have new dep-free nodes in the list which will have execute 798 // tick greater than or equal to curTick. But a new dep-free node might 799 // have its execute tick earlier. Therefore, attempt to reschedule. It 800 // could happen that the readyList is empty and we got here via a 801 // last remaining response. So, either the trace is complete or there 802 // are pending nodes in the depFreeQueue. The checking is done in the 803 // execute() control flow, so schedule an event to go via that flow. 804 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 805 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 806 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 807 next_event_tick); 808 owner.schedDcacheNextEvent(next_event_tick); 809 } 810} 811 812void 813TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 814 Tick exec_tick) 815{ 816 ReadyNode ready_node; 817 ready_node.seqNum = seq_num; 818 ready_node.execTick = exec_tick; 819 820 // Iterator to readyList 821 auto itr = readyList.begin(); 822 823 // If the readyList is empty, simply insert the new node at the beginning 824 // and return 825 if (itr == readyList.end()) { 826 readyList.insert(itr, ready_node); 827 maxReadyListSize = std::max<double>(readyList.size(), 828 maxReadyListSize.value()); 829 return; 830 } 831 832 // If the new node has its execution tick equal to the first node in the 833 // list then go to the next node. If the first node in the list failed 834 // to execute, its position as the first is thus maintained. 835 if (retryPkt) 836 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 837 itr++; 838 839 // Increment the iterator and compare the node pointed to by it to the new 840 // node till the position to insert the new node is found. 841 bool found = false; 842 while (!found && itr != readyList.end()) { 843 // If the execution tick of the new node is less than the node then 844 // this is the position to insert 845 if (exec_tick < itr->execTick) 846 found = true; 847 // If the execution tick of the new node is equal to the node then 848 // sort in ascending order of sequence numbers 849 else if (exec_tick == itr->execTick) { 850 // If the sequence number of the new node is less than the node 851 // then this is the position to insert 852 if (seq_num < itr->seqNum) 853 found = true; 854 // Else go to next node 855 else 856 itr++; 857 } 858 // If the execution tick of the new node is greater than the node then 859 // go to the next node 860 else 861 itr++; 862 } 863 readyList.insert(itr, ready_node); 864 // Update the stat for max size reached of the readyList 865 maxReadyListSize = std::max<double>(readyList.size(), 866 maxReadyListSize.value()); 867} 868 869void 870TraceCPU::ElasticDataGen::printReadyList() { 871 872 auto itr = readyList.begin(); 873 if (itr == readyList.end()) { 874 DPRINTF(TraceCPUData, "readyList is empty.\n"); 875 return; 876 } 877 DPRINTF(TraceCPUData, "Printing readyList:\n"); 878 while (itr != readyList.end()) { 879 auto graph_itr = depGraph.find(itr->seqNum); 880 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 881 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 882 node_ptr->typeToStr(), itr->execTick); 883 itr++; 884 } 885} 886 887TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 888 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 889 : sizeROB(max_rob), 890 sizeStoreBuffer(max_stores), 891 sizeLoadBuffer(max_loads), 892 oldestInFlightRobNum(UINT64_MAX), 893 numInFlightLoads(0), 894 numInFlightStores(0) 895{} 896 897void 898TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 899{ 900 // Occupy ROB entry for the issued node 901 // Merely maintain the oldest node, i.e. numerically least robNum by saving 902 // it in the variable oldestInFLightRobNum. 903 inFlightNodes[new_node->seqNum] = new_node->robNum; 904 oldestInFlightRobNum = inFlightNodes.begin()->second; 905 906 // Occupy Load/Store Buffer entry for the issued node if applicable 907 if (new_node->isLoad()) { 908 ++numInFlightLoads; 909 } else if (new_node->isStore()) { 910 ++numInFlightStores; 911 } // else if it is a non load/store node, no buffer entry is occupied 912 913 printOccupancy(); 914} 915 916void 917TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 918{ 919 assert(!inFlightNodes.empty()); 920 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 921 done_node->seqNum); 922 923 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 924 inFlightNodes.erase(done_node->seqNum); 925 926 if (inFlightNodes.empty()) { 927 // If we delete the only in-flight node and then the 928 // oldestInFlightRobNum is set to it's initialized (max) value. 929 oldestInFlightRobNum = UINT64_MAX; 930 } else { 931 // Set the oldest in-flight node rob number equal to the first node in 932 // the inFlightNodes since that will have the numerically least value. 933 oldestInFlightRobNum = inFlightNodes.begin()->second; 934 } 935 936 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 937 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 938 oldestInFlightRobNum); 939 940 // A store is considered complete when a request is sent, thus ROB entry is 941 // freed. But it occupies an entry in the Store Buffer until its response 942 // is received. A load is considered complete when a response is received, 943 // thus both ROB and Load Buffer entries can be released. 944 if (done_node->isLoad()) { 945 assert(numInFlightLoads != 0); 946 --numInFlightLoads; 947 } 948 // For normal writes, we send the requests out and clear a store buffer 949 // entry on response. For writes which are strictly ordered, for e.g. 950 // writes to device registers, we do that within release() which is called 951 // when node is executed and taken off from readyList. 952 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 953 releaseStoreBuffer(); 954 } 955} 956 957void 958TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 959{ 960 assert(numInFlightStores != 0); 961 --numInFlightStores; 962} 963 964bool 965TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 966 const GraphNode* new_node) const 967{ 968 uint16_t num_in_flight_nodes; 969 if (inFlightNodes.empty()) { 970 num_in_flight_nodes = 0; 971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 972 " #in-flight nodes = 0", new_node->seqNum); 973 } else if (new_node->robNum > oldestInFlightRobNum) { 974 // This is the intuitive case where new dep-free node is younger 975 // instruction than the oldest instruction in-flight. Thus we make sure 976 // in_flight_nodes does not overflow. 977 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 978 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 979 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 980 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 981 } else { 982 // This is the case where an instruction older than the oldest in- 983 // flight instruction becomes dep-free. Thus we must have already 984 // accounted for the entry in ROB for this new dep-free node. 985 // Immediately after this check returns true, oldestInFlightRobNum will 986 // be updated in occupy(). We simply let this node issue now. 987 num_in_flight_nodes = 0; 988 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 989 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 990 new_node->seqNum, new_node->robNum); 991 } 992 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 993 numInFlightLoads, sizeLoadBuffer, 994 numInFlightStores, sizeStoreBuffer); 995 // Check if resources are available to issue the specific node 996 if (num_in_flight_nodes >= sizeROB) { 997 return false; 998 } 999 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 1000 return false; 1001 } 1002 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 1003 return false; 1004 } 1005 return true; 1006} 1007 1008bool 1009TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 1010 // Return true if there is at least one read or write request in flight 1011 return (numInFlightStores != 0 \|\| numInFlightLoads != 0); 1012} 1013 1014void 1015TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 1016 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 1017 "LQ = %d/%d, SQ = %d/%d.\n", 1018 oldestInFlightRobNum, 1019 numInFlightLoads, sizeLoadBuffer, 1020 numInFlightStores, sizeStoreBuffer); 1021} 1022 1023void 1024TraceCPU::FixedRetryGen::regStats() 1025{ 1026 using namespace Stats; 1027 1028 numSendAttempted 1029 .name(name() + ".numSendAttempted") 1030 .desc("Number of first attempts to send a request") 1031 ; 1032 1033 numSendSucceeded 1034 .name(name() + ".numSendSucceeded") 1035 .desc("Number of successful first attempts") 1036 ; 1037 1038 numSendFailed 1039 .name(name() + ".numSendFailed") 1040 .desc("Number of failed first attempts") 1041 ; 1042 1043 numRetrySucceeded 1044 .name(name() + ".numRetrySucceeded") 1045 .desc("Number of successful retries") 1046 ; 1047 1048 instLastTick 1049 .name(name() + ".instLastTick") 1050 .desc("Last tick simulated from the fixed inst trace") 1051 ; 1052} 1053 1054Tick 1055TraceCPU::FixedRetryGen::init() 1056{ 1057 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1058 " IcacheGen: fixed issue with retry.\n"); 1059 1060 if (nextExecute()) { 1061 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1062 return currElement.tick; 1063 } else { 1064 panic("Read of first message in the trace failed.\n"); 1065 return MaxTick; 1066 } 1067} 1068 1069bool 1070TraceCPU::FixedRetryGen::tryNext() 1071{ 1072 // If there is a retry packet, try to send it 1073 if (retryPkt) { 1074 1075 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1076 1077 if (!port.sendTimingReq(retryPkt)) { 1078 // Still blocked! This should never occur. 1079 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1080 return false; 1081 } 1082 ++numRetrySucceeded; 1083 } else { 1084 1085 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1086 1087 // try sending current element 1088 assert(currElement.isValid()); 1089 1090 ++numSendAttempted; 1091 1092 if (!send(currElement.addr, currElement.blocksize, 1093 currElement.cmd, currElement.flags, currElement.pc)) { 1094 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1095 ++numSendFailed; 1096 // return false to indicate not to schedule next event 1097 return false; 1098 } else { 1099 ++numSendSucceeded; 1100 } 1101 } 1102 // If packet was sent successfully, either retryPkt or currElement, return 1103 // true to indicate to schedule event at current Tick plus delta. If packet 1104 // was sent successfully and there is no next packet to send, return false. 1105 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1106 "element.\n"); 1107 retryPkt = nullptr; 1108 // Read next element into currElement, currElement gets cleared so save the 1109 // tick to calculate delta 1110 Tick last_tick = currElement.tick; 1111 if (nextExecute()) { 1112 assert(currElement.tick >= last_tick); 1113 delta = currElement.tick - last_tick; 1114 } 1115 return !traceComplete; 1116} 1117 1118void 1119TraceCPU::FixedRetryGen::exit() 1120{ 1121 trace.reset(); 1122} 1123 1124bool 1125TraceCPU::FixedRetryGen::nextExecute() 1126{ 1127 if (traceComplete) 1128 // We are at the end of the file, thus we have no more messages. 1129 // Return false. 1130 return false; 1131 1132 1133 //Reset the currElement to the default values 1134 currElement.clear(); 1135 1136 // Read the next line to get the next message. If that fails then end of 1137 // trace has been reached and traceComplete needs to be set in addition 1138 // to returning false. If successful then next message is in currElement. 1139 if (!trace.read(&currElement)) { 1140 traceComplete = true; 1141 instLastTick = curTick(); 1142 return false; 1143 } 1144 1145 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1146 currElement.cmd.isRead() ? 'r' : 'w', 1147 currElement.addr, 1148 currElement.pc, 1149 currElement.blocksize, 1150 currElement.tick); 1151 1152 return true; 1153} 1154 1155bool 1156TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1157 Request::FlagsType flags, Addr pc) 1158{ 1159 1160 // Create new request 1161 Request* req = new Request(addr, size, flags, masterID); 1162 req->setPC(pc); 1163 1164 // If this is not done it triggers assert in L1 cache for invalid contextId 1165 req->setContext(ContextID(0)); 1166 1167 // Embed it in a packet 1168 PacketPtr pkt = new Packet(req, cmd); 1169 1170 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1171 pkt->dataDynamic(pkt_data); 1172 1173 if (cmd.isWrite()) { 1174 memset(pkt_data, 0xA, req->getSize()); 1175 } 1176 1177 // Call MasterPort method to send a timing request for this packet 1178 bool success = port.sendTimingReq(pkt); 1179 if (!success) { 1180 // If it fails, save the packet to retry when a retry is signalled by 1181 // the cache 1182 retryPkt = pkt; 1183 } 1184 return success; 1185} 1186 1187void 1188TraceCPU::icacheRetryRecvd() 1189{ 1190 // Schedule an event to go through the control flow in the same tick as 1191 // retry is received 1192 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1193 " event @%lli.\n", curTick()); 1194 schedule(icacheNextEvent, curTick()); 1195} 1196 1197void 1198TraceCPU::dcacheRetryRecvd() 1199{ 1200 // Schedule an event to go through the execute flow in the same tick as 1201 // retry is received 1202 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1203 " event @%lli.\n", curTick()); 1204 schedule(dcacheNextEvent, curTick()); 1205} 1206 1207void 1208TraceCPU::schedDcacheNextEvent(Tick when) 1209{ 1210 if (!dcacheNextEvent.scheduled()) { 1211 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1212 when); 1213 schedule(dcacheNextEvent, when); 1214 ++numSchedDcacheEvent; 1215 } else if (when < dcacheNextEvent.when()) { 1216 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1217 " to %lli.\n", dcacheNextEvent.when(), when); 1218 reschedule(dcacheNextEvent, when); 1219 } 1220 1221} 1222 1223bool 1224TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1225{ 1226 // All responses on the instruction fetch side are ignored. Simply delete 1227 // the request and packet to free allocated memory 1228 delete pkt->req; 1229 delete pkt; 1230 1231 return true; 1232} 1233 1234void 1235TraceCPU::IcachePort::recvReqRetry() 1236{ 1237 owner->icacheRetryRecvd(); 1238} 1239 1240void 1241TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1242{ 1243 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1244 dcacheGen.completeMemAccess(pkt); 1245} 1246 1247bool 1248TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1249{ 1250 // Handle the responses for data memory requests which is done inside the 1251 // elastic data generator 1252 owner->dcacheRecvTimingResp(pkt); 1253 // After processing the response delete the request and packet to free 1254 // memory 1255 delete pkt->req; 1256 delete pkt; 1257 1258 return true; 1259} 1260 1261void 1262TraceCPU::DcachePort::recvReqRetry() 1263{ 1264 owner->dcacheRetryRecvd(); 1265} 1266 1267TraceCPU::ElasticDataGen::InputStream::InputStream( 1268 const std::string& filename, 1269 const double time_multiplier) 1270 : trace(filename), 1271 timeMultiplier(time_multiplier), 1272 microOpCount(0) 1273{ 1274 // Create a protobuf message for the header and read it from the stream 1275 ProtoMessage::InstDepRecordHeader header_msg; 1276 if (!trace.read(header_msg)) { 1277 panic("Failed to read packet header from %s\n", filename); 1278 1279 if (header_msg.tick_freq() != SimClock::Frequency) { 1280 panic("Trace %s was recorded with a different tick frequency %d\n", 1281 header_msg.tick_freq()); 1282 } 1283 } else { 1284 // Assign window size equal to the field in the trace that was recorded 1285 // when the data dependency trace was captured in the o3cpu model 1286 windowSize = header_msg.window_size(); 1287 } 1288} 1289 1290void 1291TraceCPU::ElasticDataGen::InputStream::reset() 1292{ 1293 trace.reset(); 1294} 1295 1296bool 1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1298{ 1299 ProtoMessage::InstDepRecord pkt_msg; 1300 if (trace.read(pkt_msg)) { 1301 // Required fields 1302 element->seqNum = pkt_msg.seq_num(); 1303 element->type = pkt_msg.type(); 1304 // Scale the compute delay to effectively scale the Trace CPU frequency 1305 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1306 1307 // Repeated field robDepList 1308 element->clearRobDep(); 1309 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1310 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1311 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1312 element->numRobDep += 1; 1313 } 1314 1315 // Repeated field 1316 element->clearRegDep(); 1317 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1318 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1319 // There is a possibility that an instruction has both, a register 1320 // and order dependency on an instruction. In such a case, the 1321 // register dependency is omitted 1322 bool duplicate = false; 1323 for (int j = 0; j < element->numRobDep; j++) { 1324 duplicate \|= (pkt_msg.reg_dep(i) == element->robDep[j]); 1325 } 1326 if (!duplicate) { 1327 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1328 element->numRegDep += 1; 1329 } 1330 } 1331 1332 // Optional fields 1333 if (pkt_msg.has_p_addr()) 1334 element->physAddr = pkt_msg.p_addr(); 1335 else 1336 element->physAddr = 0; 1337 1338 if (pkt_msg.has_v_addr()) 1339 element->virtAddr = pkt_msg.v_addr(); 1340 else 1341 element->virtAddr = 0; 1342 1343 if (pkt_msg.has_asid()) 1344 element->asid = pkt_msg.asid(); 1345 else 1346 element->asid = 0; 1347 1348 if (pkt_msg.has_size()) 1349 element->size = pkt_msg.size(); 1350 else 1351 element->size = 0; 1352 1353 if (pkt_msg.has_flags()) 1354 element->flags = pkt_msg.flags(); 1355 else 1356 element->flags = 0; 1357 1358 if (pkt_msg.has_pc()) 1359 element->pc = pkt_msg.pc(); 1360 else 1361 element->pc = 0; 1362 1363 // ROB occupancy number 1364 ++microOpCount; 1365 if (pkt_msg.has_weight()) { 1366 microOpCount += pkt_msg.weight(); 1367 } 1368 element->robNum = microOpCount; 1369 return true; 1370 } 1371 1372 // We have reached the end of the file 1373 return false; 1374} 1375 1376bool 1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1378{ 1379 for (auto& own_reg_dep : regDep) { 1380 if (own_reg_dep == reg_dep) { 1381 // If register dependency is found, make it zero and return true 1382 own_reg_dep = 0; 1383 assert(numRegDep > 0); 1384 --numRegDep; 1385 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1386 "done.\n", seqNum, reg_dep); 1387 return true; 1388 } 1389 } 1390 1391 // Return false if the dependency is not found 1392 return false; 1393} 1394 1395bool 1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1397{ 1398 for (auto& own_rob_dep : robDep) { 1399 if (own_rob_dep == rob_dep) { 1400 // If the rob dependency is found, make it zero and return true 1401 own_rob_dep = 0; 1402 assert(numRobDep > 0); 1403 --numRobDep; 1404 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1405 "done.\n", seqNum, rob_dep); 1406 return true; 1407 } 1408 } 1409 return false; 1410} 1411 1412void 1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1414 for (auto& own_reg_dep : regDep) { 1415 own_reg_dep = 0; 1416 } 1417 numRegDep = 0; 1418} 1419 1420void 1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1422 for (auto& own_rob_dep : robDep) { 1423 own_rob_dep = 0; 1424 } 1425 numRobDep = 0; 1426} 1427 1428bool 1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1430{ 1431 // If it is an rob dependency then remove it 1432 if (!removeRobDep(done_seq_num)) { 1433 // If it is not an rob dependency then it must be a register dependency 1434 // If the register dependency is not found, it violates an assumption 1435 // and must be caught by assert. 1436 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1437 assert(regdep_found); 1438 } 1439 // Return true if the node is dependency free 1440 return (numRobDep == 0 && numRegDep == 0); 1441} 1442 1443void 1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1445{ 1446 DPRINTFR(TraceCPUData, "%lli", seqNum); 1447 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1448 if (isLoad() \|\| isStore()) { 1449 DPRINTFR(TraceCPUData, ",%i", physAddr); 1450 DPRINTFR(TraceCPUData, ",%i", size); 1451 DPRINTFR(TraceCPUData, ",%i", flags); 1452 } 1453 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1454 int i = 0; 1455 DPRINTFR(TraceCPUData, "robDep:"); 1456 while (robDep[i] != 0) { 1457 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1458 i++; 1459 } 1460 i = 0; 1461 DPRINTFR(TraceCPUData, "regDep:"); 1462 while (regDep[i] != 0) { 1463 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1464 i++; 1465 } 1466 auto child_itr = dependents.begin(); 1467 DPRINTFR(TraceCPUData, "dependents:"); 1468 while (child_itr != dependents.end()) { 1469 DPRINTFR(TraceCPUData, ":%lli", (child_itr)->seqNum); 1470* child_itr++; 1471 } 1472 1473 DPRINTFR(TraceCPUData, "\n"); 1474} 1475 1476std::string 1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1478{ 1479 return Record::RecordType_Name(type); 1480} 1481 1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1483 : trace(filename) 1484{ 1485 // Create a protobuf message for the header and read it from the stream 1486 ProtoMessage::PacketHeader header_msg; 1487 if (!trace.read(header_msg)) { 1488 panic("Failed to read packet header from %s\n", filename); 1489 1490 if (header_msg.tick_freq() != SimClock::Frequency) { 1491 panic("Trace %s was recorded with a different tick frequency %d\n", 1492 header_msg.tick_freq()); 1493 } 1494 } 1495} 1496 1497void 1498TraceCPU::FixedRetryGen::InputStream::reset() 1499{ 1500 trace.reset(); 1501} 1502 1503bool 1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1505{ 1506 ProtoMessage::Packet pkt_msg; 1507 if (trace.read(pkt_msg)) { 1508 element->cmd = pkt_msg.cmd(); 1509 element->addr = pkt_msg.addr(); 1510 element->blocksize = pkt_msg.size(); 1511 element->tick = pkt_msg.tick(); 1512 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1513 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1514 return true; 1515 } 1516 1517 // We have reached the end of the file 1518 return false; 1519}	55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent([this]{ schedIcacheNext(); }, name()), 61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()), 62 oneTraceComplete(false), 63 traceOffset(0), 64 execCompleteEvent(nullptr), 65 enableEarlyExit(params->enableEarlyExit), 66 progressMsgInterval(params->progressMsgInterval), 67 progressMsgThreshold(params->progressMsgInterval) 68{ 69 // Increment static counter for number of Trace CPUs. 70 ++TraceCPU::numTraceCPUs; 71 72 // Check that the python parameters for sizes of ROB, store buffer and 73 // load buffer do not overflow the corresponding C++ variables. 74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 75 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 77 "exceeds the max. value of %d.\n", params->sizeROB, 78 UINT16_MAX); 79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 80 " %d exceeds the max. value of %d.\n", 81 params->sizeLoadBuffer, UINT16_MAX); 82} 83 84TraceCPU::~TraceCPU() 85{ 86 87} 88 89TraceCPU* 90TraceCPUParams::create() 91{ 92 return new TraceCPU(this); 93} 94 95void 96TraceCPU::updateNumOps(uint64_t rob_num) 97{ 98 numOps = rob_num; 99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) { 100 inform("%s: %i insts committed\n", name(), progressMsgThreshold); 101 progressMsgThreshold += progressMsgInterval; 102 } 103} 104 105void 106TraceCPU::takeOverFrom(BaseCPU oldCPU) 107{ 108* // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 109 assert(!getInstPort().isConnected()); 110 assert(oldCPU->getInstPort().isConnected()); 111 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 112 oldCPU->getInstPort().unbind(); 113 getInstPort().bind(inst_peer_port); 114 115 assert(!getDataPort().isConnected()); 116 assert(oldCPU->getDataPort().isConnected()); 117 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 118 oldCPU->getDataPort().unbind(); 119 getDataPort().bind(data_peer_port); 120} 121 122void 123TraceCPU::init() 124{ 125 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 126 "\n", instTraceFile); 127 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 128 dataTraceFile); 129 130 BaseCPU::init(); 131 132 // Get the send tick of the first instruction read request 133 Tick first_icache_tick = icacheGen.init(); 134 135 // Get the send tick of the first data read/write request 136 Tick first_dcache_tick = dcacheGen.init(); 137 138 // Set the trace offset as the minimum of that in both traces 139 traceOffset = std::min(first_icache_tick, first_dcache_tick); 140 inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 141 name(), traceOffset); 142 143 // Schedule next icache and dcache event by subtracting the offset 144 schedule(icacheNextEvent, first_icache_tick - traceOffset); 145 schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 146 147 // Adjust the trace offset for the dcache generator's ready nodes 148 // We don't need to do this for the icache generator as it will 149 // send its first request at the first event and schedule subsequent 150 // events using a relative tick delta 151 dcacheGen.adjustInitTraceOffset(traceOffset); 152 153 // If the Trace CPU simulation is configured to exit on any one trace 154 // completion then we don't need a counted event to count down all Trace 155 // CPUs in the system. If not then instantiate a counted event. 156 if (!enableEarlyExit) { 157 // The static counter for number of Trace CPUs is correctly set at 158 // this point so create an event and pass it. 159 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 160 numTraceCPUs); 161 } 162 163} 164 165void 166TraceCPU::schedIcacheNext() 167{ 168 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 169 170 // Try to send the current packet or a retry packet if there is one 171 bool sched_next = icacheGen.tryNext(); 172 // If packet sent successfully, schedule next event 173 if (sched_next) { 174 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 175 "at %d.\n", curTick() + icacheGen.tickDelta()); 176 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 177 ++numSchedIcacheEvent; 178 } else { 179 // check if traceComplete. If not, do nothing because sending failed 180 // and next event will be scheduled via RecvRetry() 181 if (icacheGen.isTraceComplete()) { 182 // If this is the first trace to complete, set the variable. If it 183 // is already set then both traces are complete to exit sim. 184 checkAndSchedExitEvent(); 185 } 186 } 187 return; 188} 189 190void 191TraceCPU::schedDcacheNext() 192{ 193 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 194 195 // Update stat for numCycles 196 numCycles = clockEdge() / clockPeriod(); 197 198 dcacheGen.execute(); 199 if (dcacheGen.isExecComplete()) { 200 checkAndSchedExitEvent(); 201 } 202} 203 204void 205TraceCPU::checkAndSchedExitEvent() 206{ 207 if (!oneTraceComplete) { 208 oneTraceComplete = true; 209 } else { 210 // Schedule event to indicate execution is complete as both 211 // instruction and data access traces have been played back. 212 inform("%s: Execution complete.\n", name()); 213 // If the replay is configured to exit early, that is when any one 214 // execution is complete then exit immediately and return. Otherwise, 215 // schedule the counted exit that counts down completion of each Trace 216 // CPU. 217 if (enableEarlyExit) { 218 exitSimLoop("End of trace reached"); 219 } else { 220 schedule(execCompleteEvent, curTick()); 221* } 222 } 223} 224 225void 226TraceCPU::regStats() 227{ 228 229 BaseCPU::regStats(); 230 231 numSchedDcacheEvent 232 .name(name() + ".numSchedDcacheEvent") 233 .desc("Number of events scheduled to trigger data request generator") 234 ; 235 236 numSchedIcacheEvent 237 .name(name() + ".numSchedIcacheEvent") 238 .desc("Number of events scheduled to trigger instruction request generator") 239 ; 240 241 numOps 242 .name(name() + ".numOps") 243 .desc("Number of micro-ops simulated by the Trace CPU") 244 ; 245 246 cpi 247 .name(name() + ".cpi") 248 .desc("Cycles per micro-op used as a proxy for CPI") 249 .precision(6) 250 ; 251 cpi = numCycles/numOps; 252 253 icacheGen.regStats(); 254 dcacheGen.regStats(); 255} 256 257void 258TraceCPU::ElasticDataGen::regStats() 259{ 260 using namespace Stats; 261 262 maxDependents 263 .name(name() + ".maxDependents") 264 .desc("Max number of dependents observed on a node") 265 ; 266 267 maxReadyListSize 268 .name(name() + ".maxReadyListSize") 269 .desc("Max size of the ready list observed") 270 ; 271 272 numSendAttempted 273 .name(name() + ".numSendAttempted") 274 .desc("Number of first attempts to send a request") 275 ; 276 277 numSendSucceeded 278 .name(name() + ".numSendSucceeded") 279 .desc("Number of successful first attempts") 280 ; 281 282 numSendFailed 283 .name(name() + ".numSendFailed") 284 .desc("Number of failed first attempts") 285 ; 286 287 numRetrySucceeded 288 .name(name() + ".numRetrySucceeded") 289 .desc("Number of successful retries") 290 ; 291 292 numSplitReqs 293 .name(name() + ".numSplitReqs") 294 .desc("Number of split requests") 295 ; 296 297 numSOLoads 298 .name(name() + ".numSOLoads") 299 .desc("Number of strictly ordered loads") 300 ; 301 302 numSOStores 303 .name(name() + ".numSOStores") 304 .desc("Number of strictly ordered stores") 305 ; 306 307 dataLastTick 308 .name(name() + ".dataLastTick") 309 .desc("Last tick simulated from the elastic data trace") 310 ; 311} 312 313Tick 314TraceCPU::ElasticDataGen::init() 315{ 316 DPRINTF(TraceCPUData, "Initializing data memory request generator " 317 "DcacheGen: elastic issue with retry.\n"); 318 319 if (!readNextWindow()) 320 panic("Trace has %d elements. It must have at least %d elements.\n", 321 depGraph.size(), 2 * windowSize); 322 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 323 depGraph.size()); 324 325 if (!readNextWindow()) 326 panic("Trace has %d elements. It must have at least %d elements.\n", 327 depGraph.size(), 2 * windowSize); 328 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 329 depGraph.size()); 330 331 // Print readyList 332 if (DTRACE(TraceCPUData)) { 333 printReadyList(); 334 } 335 auto free_itr = readyList.begin(); 336 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 337 " is %d.\n", free_itr->seqNum, free_itr->execTick); 338 // Return the execute tick of the earliest ready node so that an event 339 // can be scheduled to call execute() 340 return (free_itr->execTick); 341} 342 343void 344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 345 for (auto& free_node : readyList) { 346 free_node.execTick -= offset; 347 } 348} 349 350void 351TraceCPU::ElasticDataGen::exit() 352{ 353 trace.reset(); 354} 355 356bool 357TraceCPU::ElasticDataGen::readNextWindow() 358{ 359 360 // Read and add next window 361 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 362 363 if (traceComplete) { 364 // We are at the end of the file, thus we have no more records. 365 // Return false. 366 return false; 367 } 368 369 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 372 uint32_t num_read = 0; 373 while (num_read != windowSize) { 374 375 // Create a new graph node 376 GraphNode* new_node = new GraphNode; 377 378 // Read the next line to get the next record. If that fails then end of 379 // trace has been reached and traceComplete needs to be set in addition 380 // to returning false. 381 if (!trace.read(new_node)) { 382 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 383 traceComplete = true; 384 return false; 385 } 386 387 // Annotate the ROB dependencies of the new node onto the parent nodes. 388 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 389 // Annotate the register dependencies of the new node onto the parent 390 // nodes. 391 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 392 393 num_read++; 394 // Add to map 395 depGraph[new_node->seqNum] = new_node; 396 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 397 // Source dependencies are already complete, check if resources 398 // are available and issue. The execution time is approximated 399 // to current time plus the computational delay. 400 checkAndIssue(new_node); 401 } 402 } 403 404 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 405 depGraph.size()); 406 return true; 407} 408 409template<typename T> void 410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode new_node, 411* T& dep_array, uint8_t& num_dep) 412{ 413 for (auto& a_dep : dep_array) { 414 // The convention is to set the dependencies starting with the first 415 // index in the ROB and register dependency arrays. Thus, when we reach 416 // a dependency equal to the initialisation value of zero, we know have 417 // iterated over all dependencies and can break. 418 if (a_dep == 0) 419 break; 420 // We look up the valid dependency, i.e. the parent of this node 421 auto parent_itr = depGraph.find(a_dep); 422 if (parent_itr != depGraph.end()) { 423 // If the parent is found, it is yet to be executed. Append a 424 // pointer to the new node to the dependents list of the parent 425 // node. 426 parent_itr->second->dependents.push_back(new_node); 427 auto num_depts = parent_itr->second->dependents.size(); 428 maxDependents = std::max<double>(num_depts, maxDependents.value()); 429 } else { 430 // The dependency is not found in the graph. So consider 431 // the execution of the parent is complete, i.e. remove this 432 // dependency. 433 a_dep = 0; 434 num_dep--; 435 } 436 } 437} 438 439void 440TraceCPU::ElasticDataGen::execute() 441{ 442 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 443 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 444 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 445 depFreeQueue.size()); 446 hwResource.printOccupancy(); 447 448 // Read next window to make sure that dependents of all dep-free nodes 449 // are in the depGraph 450 if (nextRead) { 451 readNextWindow(); 452 nextRead = false; 453 } 454 455 // First attempt to issue the pending dependency-free nodes held 456 // in depFreeQueue. If resources have become available for a node, 457 // then issue it, i.e. add the node to readyList. 458 while (!depFreeQueue.empty()) { 459 if (checkAndIssue(depFreeQueue.front(), false)) { 460 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 461 "%lli.\n", (depFreeQueue.front())->seqNum); 462 depFreeQueue.pop(); 463 } else { 464 break; 465 } 466 } 467 // Proceed to execute from readyList 468 auto graph_itr = depGraph.begin(); 469 auto free_itr = readyList.begin(); 470 // Iterate through readyList until the next free node has its execute 471 // tick later than curTick or the end of readyList is reached 472 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 473 474 // Get pointer to the node to be executed 475 graph_itr = depGraph.find(free_itr->seqNum); 476 assert(graph_itr != depGraph.end()); 477 GraphNode* node_ptr = graph_itr->second; 478 479 // If there is a retryPkt send that else execute the load 480 if (retryPkt) { 481 // The retryPkt must be the request that was created by the 482 // first node in the readyList. 483 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 484 panic("Retry packet's seqence number does not match " 485 "the first node in the readyList.\n"); 486 } 487 if (port.sendTimingReq(retryPkt)) { 488 ++numRetrySucceeded; 489 retryPkt = nullptr; 490 } 491 } else if (node_ptr->isLoad() \|\| node_ptr->isStore()) { 492 // If there is no retryPkt, attempt to send a memory request in 493 // case of a load or store node. If the send fails, executeMemReq() 494 // returns a packet pointer, which we save in retryPkt. In case of 495 // a comp node we don't do anything and simply continue as if the 496 // execution of the comp node succedded. 497 retryPkt = executeMemReq(node_ptr); 498 } 499 // If the retryPkt or a new load/store node failed, we exit from here 500 // as a retry from cache will bring the control to execute(). The 501 // first node in readyList then, will be the failed node. 502 if (retryPkt) { 503 break; 504 } 505 506 // Proceed to remove dependencies for the successfully executed node. 507 // If it is a load which is not strictly ordered and we sent a 508 // request for it successfully, we do not yet mark any register 509 // dependencies complete. But as per dependency modelling we need 510 // to mark ROB dependencies of load and non load/store nodes which 511 // are based on successful sending of the load as complete. 512 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 513 // If execute succeeded mark its dependents as complete 514 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 515 "dependents..\n", node_ptr->seqNum); 516 517 auto child_itr = (node_ptr->dependents).begin(); 518 while (child_itr != (node_ptr->dependents).end()) { 519 // ROB dependency of a store on a load must not be removed 520 // after load is sent but after response is received 521 if (!(child_itr)->isStore() && 522* (child_itr)->removeRobDep(node_ptr->seqNum)) { 523* 524 // Check if the child node has become dependency free 525 if ((child_itr)->numRobDep == 0 && 526* (child_itr)->numRegDep == 0) { 527* 528 // Source dependencies are complete, check if 529 // resources are available and issue 530 checkAndIssue(child_itr); 531* } 532 // Remove this child for the sent load and point to new 533 // location of the element following the erased element 534 child_itr = node_ptr->dependents.erase(child_itr); 535 } else { 536 // This child is not dependency-free, point to the next 537 // child 538 child_itr++; 539 } 540 } 541 } else { 542 // If it is a strictly ordered load mark its dependents as complete 543 // as we do not send a request for this case. If it is a store or a 544 // comp node we also mark all its dependents complete. 545 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 546 " up dependents..\n", node_ptr->seqNum); 547 548 for (auto child : node_ptr->dependents) { 549 // If the child node is dependency free removeDepOnInst() 550 // returns true. 551 if (child->removeDepOnInst(node_ptr->seqNum)) { 552 // Source dependencies are complete, check if resources 553 // are available and issue 554 checkAndIssue(child); 555 } 556 } 557 } 558 559 // After executing the node, remove from readyList and delete node. 560 readyList.erase(free_itr); 561 // If it is a cacheable load which was sent, don't delete 562 // just yet. Delete it in completeMemAccess() after the 563 // response is received. If it is an strictly ordered 564 // load, it was not sent and all dependencies were simply 565 // marked complete. Thus it is safe to delete it. For 566 // stores and non load/store nodes all dependencies were 567 // marked complete so it is safe to delete it. 568 if (!node_ptr->isLoad() \|\| node_ptr->isStrictlyOrdered()) { 569 // Release all resources occupied by the completed node 570 hwResource.release(node_ptr); 571 // clear the dynamically allocated set of dependents 572 (node_ptr->dependents).clear(); 573 // Update the stat for numOps simulated 574 owner.updateNumOps(node_ptr->robNum); 575 // delete node 576 delete node_ptr; 577 // remove from graph 578 depGraph.erase(graph_itr); 579 } 580 // Point to first node to continue to next iteration of while loop 581 free_itr = readyList.begin(); 582 } // end of while loop 583 584 // Print readyList, sizes of queues and resource status after updating 585 if (DTRACE(TraceCPUData)) { 586 printReadyList(); 587 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 588 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 589 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 590 depFreeQueue.size()); 591 hwResource.printOccupancy(); 592 } 593 594 if (retryPkt) { 595 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 596 "event from the cache for seq. num %lli.\n", 597 retryPkt->req->getReqInstSeqNum()); 598 return; 599 } 600 // If the size of the dependency graph is less than the dependency window 601 // then read from the trace file to populate the graph next time we are in 602 // execute. 603 if (depGraph.size() < windowSize && !traceComplete) 604 nextRead = true; 605 606 // If cache is not blocked, schedule an event for the first execTick in 607 // readyList else retry from cache will schedule the event. If the ready 608 // list is empty then check if the next pending node has resources 609 // available to issue. If yes, then schedule an event for the next cycle. 610 if (!readyList.empty()) { 611 Tick next_event_tick = std::max(readyList.begin()->execTick, 612 curTick()); 613 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 614 next_event_tick); 615 owner.schedDcacheNextEvent(next_event_tick); 616 } else if (readyList.empty() && !depFreeQueue.empty() && 617 hwResource.isAvailable(depFreeQueue.front())) { 618 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 619 owner.clockEdge(Cycles(1))); 620 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 621 } 622 623 // If trace is completely read, readyList is empty and depGraph is empty, 624 // set execComplete to true 625 if (depGraph.empty() && readyList.empty() && traceComplete && 626 !hwResource.awaitingResponse()) { 627 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 628 execComplete = true; 629 dataLastTick = curTick(); 630 } 631} 632 633PacketPtr 634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 635{ 636 637 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 638 "virt addr %d, pc %#x, size %d, flags %d).\n", 639 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 640 node_ptr->pc, node_ptr->size, node_ptr->flags); 641 642 // If the request is strictly ordered, do not send it. Just return nullptr 643 // as if it was succesfully sent. 644 if (node_ptr->isStrictlyOrdered()) { 645 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 646 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 647 node_ptr->seqNum); 648 return nullptr; 649 } 650 651 // Check if the request spans two cache lines as this condition triggers 652 // an assert fail in the L1 cache. If it does then truncate the size to 653 // access only until the end of that line and ignore the remainder. The 654 // stat counting this is useful to keep a check on how frequently this 655 // happens. If required the code could be revised to mimick splitting such 656 // a request into two. 657 unsigned blk_size = owner.cacheLineSize(); 658 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 659 if (!(blk_offset + node_ptr->size <= blk_size)) { 660 node_ptr->size = blk_size - blk_offset; 661 ++numSplitReqs; 662 } 663 664 // Create a request and the packet containing request 665 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 666 node_ptr->flags, masterID, node_ptr->seqNum, 667 ContextID(0)); 668 req->setPC(node_ptr->pc); 669 // If virtual address is valid, set the asid and virtual address fields 670 // of the request. 671 if (node_ptr->virtAddr != 0) { 672 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 673 node_ptr->flags, masterID, node_ptr->pc); 674 req->setPaddr(node_ptr->physAddr); 675 req->setReqInstSeqNum(node_ptr->seqNum); 676 } 677 678 PacketPtr pkt; 679 uint8_t* pkt_data = new uint8_t[req->getSize()]; 680 if (node_ptr->isLoad()) { 681 pkt = Packet::createRead(req); 682 } else { 683 pkt = Packet::createWrite(req); 684 memset(pkt_data, 0xA, req->getSize()); 685 } 686 pkt->dataDynamic(pkt_data); 687 688 // Call MasterPort method to send a timing request for this packet 689 bool success = port.sendTimingReq(pkt); 690 ++numSendAttempted; 691 692 if (!success) { 693 // If it fails, return the packet to retry when a retry is signalled by 694 // the cache 695 ++numSendFailed; 696 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 697 return pkt; 698 } else { 699 // It is succeeds, return nullptr 700 ++numSendSucceeded; 701 return nullptr; 702 } 703} 704 705bool 706TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 707{ 708 // Assert the node is dependency-free 709 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 710 711 // If this is the first attempt, print a debug message to indicate this. 712 if (first) { 713 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 714 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 715 node_ptr->robNum); 716 } 717 718 // Check if resources are available to issue the specific node 719 if (hwResource.isAvailable(node_ptr)) { 720 // If resources are free only then add to readyList 721 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 722 " to readyList, occupying resources.\n", node_ptr->seqNum); 723 // Compute the execute tick by adding the compute delay for the node 724 // and add the ready node to the ready list 725 addToSortedReadyList(node_ptr->seqNum, 726 owner.clockEdge() + node_ptr->compDelay); 727 // Account for the resources taken up by this issued node. 728 hwResource.occupy(node_ptr); 729 return true; 730 731 } else { 732 if (first) { 733 // Although dependencies are complete, resources are not available. 734 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 735 " Adding to depFreeQueue.\n", node_ptr->seqNum); 736 depFreeQueue.push(node_ptr); 737 } else { 738 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 739 "Still pending issue.\n", node_ptr->seqNum); 740 } 741 return false; 742 } 743} 744 745void 746TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 747{ 748 // Release the resources for this completed node. 749 if (pkt->isWrite()) { 750 // Consider store complete. 751 hwResource.releaseStoreBuffer(); 752 // If it is a store response then do nothing since we do not model 753 // dependencies on store completion in the trace. But if we were 754 // blocking execution due to store buffer fullness, we need to schedule 755 // an event and attempt to progress. 756 } else { 757 // If it is a load response then release the dependents waiting on it. 758 // Get pointer to the completed load 759 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 760 assert(graph_itr != depGraph.end()); 761 GraphNode* node_ptr = graph_itr->second; 762 763 // Release resources occupied by the load 764 hwResource.release(node_ptr); 765 766 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 767 " dependents..\n", node_ptr->seqNum); 768 769 for (auto child : node_ptr->dependents) { 770 if (child->removeDepOnInst(node_ptr->seqNum)) { 771 checkAndIssue(child); 772 } 773 } 774 775 // clear the dynamically allocated set of dependents 776 (node_ptr->dependents).clear(); 777 // Update the stat for numOps completed 778 owner.updateNumOps(node_ptr->robNum); 779 // delete node 780 delete node_ptr; 781 // remove from graph 782 depGraph.erase(graph_itr); 783 } 784 785 if (DTRACE(TraceCPUData)) { 786 printReadyList(); 787 } 788 789 // If the size of the dependency graph is less than the dependency window 790 // then read from the trace file to populate the graph next time we are in 791 // execute. 792 if (depGraph.size() < windowSize && !traceComplete) 793 nextRead = true; 794 795 // If not waiting for retry, attempt to schedule next event 796 if (!retryPkt) { 797 // We might have new dep-free nodes in the list which will have execute 798 // tick greater than or equal to curTick. But a new dep-free node might 799 // have its execute tick earlier. Therefore, attempt to reschedule. It 800 // could happen that the readyList is empty and we got here via a 801 // last remaining response. So, either the trace is complete or there 802 // are pending nodes in the depFreeQueue. The checking is done in the 803 // execute() control flow, so schedule an event to go via that flow. 804 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 805 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 806 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 807 next_event_tick); 808 owner.schedDcacheNextEvent(next_event_tick); 809 } 810} 811 812void 813TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 814 Tick exec_tick) 815{ 816 ReadyNode ready_node; 817 ready_node.seqNum = seq_num; 818 ready_node.execTick = exec_tick; 819 820 // Iterator to readyList 821 auto itr = readyList.begin(); 822 823 // If the readyList is empty, simply insert the new node at the beginning 824 // and return 825 if (itr == readyList.end()) { 826 readyList.insert(itr, ready_node); 827 maxReadyListSize = std::max<double>(readyList.size(), 828 maxReadyListSize.value()); 829 return; 830 } 831 832 // If the new node has its execution tick equal to the first node in the 833 // list then go to the next node. If the first node in the list failed 834 // to execute, its position as the first is thus maintained. 835 if (retryPkt) 836 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 837 itr++; 838 839 // Increment the iterator and compare the node pointed to by it to the new 840 // node till the position to insert the new node is found. 841 bool found = false; 842 while (!found && itr != readyList.end()) { 843 // If the execution tick of the new node is less than the node then 844 // this is the position to insert 845 if (exec_tick < itr->execTick) 846 found = true; 847 // If the execution tick of the new node is equal to the node then 848 // sort in ascending order of sequence numbers 849 else if (exec_tick == itr->execTick) { 850 // If the sequence number of the new node is less than the node 851 // then this is the position to insert 852 if (seq_num < itr->seqNum) 853 found = true; 854 // Else go to next node 855 else 856 itr++; 857 } 858 // If the execution tick of the new node is greater than the node then 859 // go to the next node 860 else 861 itr++; 862 } 863 readyList.insert(itr, ready_node); 864 // Update the stat for max size reached of the readyList 865 maxReadyListSize = std::max<double>(readyList.size(), 866 maxReadyListSize.value()); 867} 868 869void 870TraceCPU::ElasticDataGen::printReadyList() { 871 872 auto itr = readyList.begin(); 873 if (itr == readyList.end()) { 874 DPRINTF(TraceCPUData, "readyList is empty.\n"); 875 return; 876 } 877 DPRINTF(TraceCPUData, "Printing readyList:\n"); 878 while (itr != readyList.end()) { 879 auto graph_itr = depGraph.find(itr->seqNum); 880 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 881 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 882 node_ptr->typeToStr(), itr->execTick); 883 itr++; 884 } 885} 886 887TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 888 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 889 : sizeROB(max_rob), 890 sizeStoreBuffer(max_stores), 891 sizeLoadBuffer(max_loads), 892 oldestInFlightRobNum(UINT64_MAX), 893 numInFlightLoads(0), 894 numInFlightStores(0) 895{} 896 897void 898TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 899{ 900 // Occupy ROB entry for the issued node 901 // Merely maintain the oldest node, i.e. numerically least robNum by saving 902 // it in the variable oldestInFLightRobNum. 903 inFlightNodes[new_node->seqNum] = new_node->robNum; 904 oldestInFlightRobNum = inFlightNodes.begin()->second; 905 906 // Occupy Load/Store Buffer entry for the issued node if applicable 907 if (new_node->isLoad()) { 908 ++numInFlightLoads; 909 } else if (new_node->isStore()) { 910 ++numInFlightStores; 911 } // else if it is a non load/store node, no buffer entry is occupied 912 913 printOccupancy(); 914} 915 916void 917TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 918{ 919 assert(!inFlightNodes.empty()); 920 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 921 done_node->seqNum); 922 923 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 924 inFlightNodes.erase(done_node->seqNum); 925 926 if (inFlightNodes.empty()) { 927 // If we delete the only in-flight node and then the 928 // oldestInFlightRobNum is set to it's initialized (max) value. 929 oldestInFlightRobNum = UINT64_MAX; 930 } else { 931 // Set the oldest in-flight node rob number equal to the first node in 932 // the inFlightNodes since that will have the numerically least value. 933 oldestInFlightRobNum = inFlightNodes.begin()->second; 934 } 935 936 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 937 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 938 oldestInFlightRobNum); 939 940 // A store is considered complete when a request is sent, thus ROB entry is 941 // freed. But it occupies an entry in the Store Buffer until its response 942 // is received. A load is considered complete when a response is received, 943 // thus both ROB and Load Buffer entries can be released. 944 if (done_node->isLoad()) { 945 assert(numInFlightLoads != 0); 946 --numInFlightLoads; 947 } 948 // For normal writes, we send the requests out and clear a store buffer 949 // entry on response. For writes which are strictly ordered, for e.g. 950 // writes to device registers, we do that within release() which is called 951 // when node is executed and taken off from readyList. 952 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 953 releaseStoreBuffer(); 954 } 955} 956 957void 958TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 959{ 960 assert(numInFlightStores != 0); 961 --numInFlightStores; 962} 963 964bool 965TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 966 const GraphNode* new_node) const 967{ 968 uint16_t num_in_flight_nodes; 969 if (inFlightNodes.empty()) { 970 num_in_flight_nodes = 0; 971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 972 " #in-flight nodes = 0", new_node->seqNum); 973 } else if (new_node->robNum > oldestInFlightRobNum) { 974 // This is the intuitive case where new dep-free node is younger 975 // instruction than the oldest instruction in-flight. Thus we make sure 976 // in_flight_nodes does not overflow. 977 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 978 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 979 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 980 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 981 } else { 982 // This is the case where an instruction older than the oldest in- 983 // flight instruction becomes dep-free. Thus we must have already 984 // accounted for the entry in ROB for this new dep-free node. 985 // Immediately after this check returns true, oldestInFlightRobNum will 986 // be updated in occupy(). We simply let this node issue now. 987 num_in_flight_nodes = 0; 988 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 989 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 990 new_node->seqNum, new_node->robNum); 991 } 992 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 993 numInFlightLoads, sizeLoadBuffer, 994 numInFlightStores, sizeStoreBuffer); 995 // Check if resources are available to issue the specific node 996 if (num_in_flight_nodes >= sizeROB) { 997 return false; 998 } 999 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 1000 return false; 1001 } 1002 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 1003 return false; 1004 } 1005 return true; 1006} 1007 1008bool 1009TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 1010 // Return true if there is at least one read or write request in flight 1011 return (numInFlightStores != 0 \|\| numInFlightLoads != 0); 1012} 1013 1014void 1015TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 1016 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 1017 "LQ = %d/%d, SQ = %d/%d.\n", 1018 oldestInFlightRobNum, 1019 numInFlightLoads, sizeLoadBuffer, 1020 numInFlightStores, sizeStoreBuffer); 1021} 1022 1023void 1024TraceCPU::FixedRetryGen::regStats() 1025{ 1026 using namespace Stats; 1027 1028 numSendAttempted 1029 .name(name() + ".numSendAttempted") 1030 .desc("Number of first attempts to send a request") 1031 ; 1032 1033 numSendSucceeded 1034 .name(name() + ".numSendSucceeded") 1035 .desc("Number of successful first attempts") 1036 ; 1037 1038 numSendFailed 1039 .name(name() + ".numSendFailed") 1040 .desc("Number of failed first attempts") 1041 ; 1042 1043 numRetrySucceeded 1044 .name(name() + ".numRetrySucceeded") 1045 .desc("Number of successful retries") 1046 ; 1047 1048 instLastTick 1049 .name(name() + ".instLastTick") 1050 .desc("Last tick simulated from the fixed inst trace") 1051 ; 1052} 1053 1054Tick 1055TraceCPU::FixedRetryGen::init() 1056{ 1057 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1058 " IcacheGen: fixed issue with retry.\n"); 1059 1060 if (nextExecute()) { 1061 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1062 return currElement.tick; 1063 } else { 1064 panic("Read of first message in the trace failed.\n"); 1065 return MaxTick; 1066 } 1067} 1068 1069bool 1070TraceCPU::FixedRetryGen::tryNext() 1071{ 1072 // If there is a retry packet, try to send it 1073 if (retryPkt) { 1074 1075 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1076 1077 if (!port.sendTimingReq(retryPkt)) { 1078 // Still blocked! This should never occur. 1079 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1080 return false; 1081 } 1082 ++numRetrySucceeded; 1083 } else { 1084 1085 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1086 1087 // try sending current element 1088 assert(currElement.isValid()); 1089 1090 ++numSendAttempted; 1091 1092 if (!send(currElement.addr, currElement.blocksize, 1093 currElement.cmd, currElement.flags, currElement.pc)) { 1094 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1095 ++numSendFailed; 1096 // return false to indicate not to schedule next event 1097 return false; 1098 } else { 1099 ++numSendSucceeded; 1100 } 1101 } 1102 // If packet was sent successfully, either retryPkt or currElement, return 1103 // true to indicate to schedule event at current Tick plus delta. If packet 1104 // was sent successfully and there is no next packet to send, return false. 1105 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1106 "element.\n"); 1107 retryPkt = nullptr; 1108 // Read next element into currElement, currElement gets cleared so save the 1109 // tick to calculate delta 1110 Tick last_tick = currElement.tick; 1111 if (nextExecute()) { 1112 assert(currElement.tick >= last_tick); 1113 delta = currElement.tick - last_tick; 1114 } 1115 return !traceComplete; 1116} 1117 1118void 1119TraceCPU::FixedRetryGen::exit() 1120{ 1121 trace.reset(); 1122} 1123 1124bool 1125TraceCPU::FixedRetryGen::nextExecute() 1126{ 1127 if (traceComplete) 1128 // We are at the end of the file, thus we have no more messages. 1129 // Return false. 1130 return false; 1131 1132 1133 //Reset the currElement to the default values 1134 currElement.clear(); 1135 1136 // Read the next line to get the next message. If that fails then end of 1137 // trace has been reached and traceComplete needs to be set in addition 1138 // to returning false. If successful then next message is in currElement. 1139 if (!trace.read(&currElement)) { 1140 traceComplete = true; 1141 instLastTick = curTick(); 1142 return false; 1143 } 1144 1145 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1146 currElement.cmd.isRead() ? 'r' : 'w', 1147 currElement.addr, 1148 currElement.pc, 1149 currElement.blocksize, 1150 currElement.tick); 1151 1152 return true; 1153} 1154 1155bool 1156TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1157 Request::FlagsType flags, Addr pc) 1158{ 1159 1160 // Create new request 1161 Request* req = new Request(addr, size, flags, masterID); 1162 req->setPC(pc); 1163 1164 // If this is not done it triggers assert in L1 cache for invalid contextId 1165 req->setContext(ContextID(0)); 1166 1167 // Embed it in a packet 1168 PacketPtr pkt = new Packet(req, cmd); 1169 1170 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1171 pkt->dataDynamic(pkt_data); 1172 1173 if (cmd.isWrite()) { 1174 memset(pkt_data, 0xA, req->getSize()); 1175 } 1176 1177 // Call MasterPort method to send a timing request for this packet 1178 bool success = port.sendTimingReq(pkt); 1179 if (!success) { 1180 // If it fails, save the packet to retry when a retry is signalled by 1181 // the cache 1182 retryPkt = pkt; 1183 } 1184 return success; 1185} 1186 1187void 1188TraceCPU::icacheRetryRecvd() 1189{ 1190 // Schedule an event to go through the control flow in the same tick as 1191 // retry is received 1192 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1193 " event @%lli.\n", curTick()); 1194 schedule(icacheNextEvent, curTick()); 1195} 1196 1197void 1198TraceCPU::dcacheRetryRecvd() 1199{ 1200 // Schedule an event to go through the execute flow in the same tick as 1201 // retry is received 1202 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1203 " event @%lli.\n", curTick()); 1204 schedule(dcacheNextEvent, curTick()); 1205} 1206 1207void 1208TraceCPU::schedDcacheNextEvent(Tick when) 1209{ 1210 if (!dcacheNextEvent.scheduled()) { 1211 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1212 when); 1213 schedule(dcacheNextEvent, when); 1214 ++numSchedDcacheEvent; 1215 } else if (when < dcacheNextEvent.when()) { 1216 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1217 " to %lli.\n", dcacheNextEvent.when(), when); 1218 reschedule(dcacheNextEvent, when); 1219 } 1220 1221} 1222 1223bool 1224TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1225{ 1226 // All responses on the instruction fetch side are ignored. Simply delete 1227 // the request and packet to free allocated memory 1228 delete pkt->req; 1229 delete pkt; 1230 1231 return true; 1232} 1233 1234void 1235TraceCPU::IcachePort::recvReqRetry() 1236{ 1237 owner->icacheRetryRecvd(); 1238} 1239 1240void 1241TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1242{ 1243 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1244 dcacheGen.completeMemAccess(pkt); 1245} 1246 1247bool 1248TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1249{ 1250 // Handle the responses for data memory requests which is done inside the 1251 // elastic data generator 1252 owner->dcacheRecvTimingResp(pkt); 1253 // After processing the response delete the request and packet to free 1254 // memory 1255 delete pkt->req; 1256 delete pkt; 1257 1258 return true; 1259} 1260 1261void 1262TraceCPU::DcachePort::recvReqRetry() 1263{ 1264 owner->dcacheRetryRecvd(); 1265} 1266 1267TraceCPU::ElasticDataGen::InputStream::InputStream( 1268 const std::string& filename, 1269 const double time_multiplier) 1270 : trace(filename), 1271 timeMultiplier(time_multiplier), 1272 microOpCount(0) 1273{ 1274 // Create a protobuf message for the header and read it from the stream 1275 ProtoMessage::InstDepRecordHeader header_msg; 1276 if (!trace.read(header_msg)) { 1277 panic("Failed to read packet header from %s\n", filename); 1278 1279 if (header_msg.tick_freq() != SimClock::Frequency) { 1280 panic("Trace %s was recorded with a different tick frequency %d\n", 1281 header_msg.tick_freq()); 1282 } 1283 } else { 1284 // Assign window size equal to the field in the trace that was recorded 1285 // when the data dependency trace was captured in the o3cpu model 1286 windowSize = header_msg.window_size(); 1287 } 1288} 1289 1290void 1291TraceCPU::ElasticDataGen::InputStream::reset() 1292{ 1293 trace.reset(); 1294} 1295 1296bool 1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1298{ 1299 ProtoMessage::InstDepRecord pkt_msg; 1300 if (trace.read(pkt_msg)) { 1301 // Required fields 1302 element->seqNum = pkt_msg.seq_num(); 1303 element->type = pkt_msg.type(); 1304 // Scale the compute delay to effectively scale the Trace CPU frequency 1305 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1306 1307 // Repeated field robDepList 1308 element->clearRobDep(); 1309 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1310 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1311 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1312 element->numRobDep += 1; 1313 } 1314 1315 // Repeated field 1316 element->clearRegDep(); 1317 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1318 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1319 // There is a possibility that an instruction has both, a register 1320 // and order dependency on an instruction. In such a case, the 1321 // register dependency is omitted 1322 bool duplicate = false; 1323 for (int j = 0; j < element->numRobDep; j++) { 1324 duplicate \|= (pkt_msg.reg_dep(i) == element->robDep[j]); 1325 } 1326 if (!duplicate) { 1327 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1328 element->numRegDep += 1; 1329 } 1330 } 1331 1332 // Optional fields 1333 if (pkt_msg.has_p_addr()) 1334 element->physAddr = pkt_msg.p_addr(); 1335 else 1336 element->physAddr = 0; 1337 1338 if (pkt_msg.has_v_addr()) 1339 element->virtAddr = pkt_msg.v_addr(); 1340 else 1341 element->virtAddr = 0; 1342 1343 if (pkt_msg.has_asid()) 1344 element->asid = pkt_msg.asid(); 1345 else 1346 element->asid = 0; 1347 1348 if (pkt_msg.has_size()) 1349 element->size = pkt_msg.size(); 1350 else 1351 element->size = 0; 1352 1353 if (pkt_msg.has_flags()) 1354 element->flags = pkt_msg.flags(); 1355 else 1356 element->flags = 0; 1357 1358 if (pkt_msg.has_pc()) 1359 element->pc = pkt_msg.pc(); 1360 else 1361 element->pc = 0; 1362 1363 // ROB occupancy number 1364 ++microOpCount; 1365 if (pkt_msg.has_weight()) { 1366 microOpCount += pkt_msg.weight(); 1367 } 1368 element->robNum = microOpCount; 1369 return true; 1370 } 1371 1372 // We have reached the end of the file 1373 return false; 1374} 1375 1376bool 1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1378{ 1379 for (auto& own_reg_dep : regDep) { 1380 if (own_reg_dep == reg_dep) { 1381 // If register dependency is found, make it zero and return true 1382 own_reg_dep = 0; 1383 assert(numRegDep > 0); 1384 --numRegDep; 1385 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1386 "done.\n", seqNum, reg_dep); 1387 return true; 1388 } 1389 } 1390 1391 // Return false if the dependency is not found 1392 return false; 1393} 1394 1395bool 1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1397{ 1398 for (auto& own_rob_dep : robDep) { 1399 if (own_rob_dep == rob_dep) { 1400 // If the rob dependency is found, make it zero and return true 1401 own_rob_dep = 0; 1402 assert(numRobDep > 0); 1403 --numRobDep; 1404 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1405 "done.\n", seqNum, rob_dep); 1406 return true; 1407 } 1408 } 1409 return false; 1410} 1411 1412void 1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1414 for (auto& own_reg_dep : regDep) { 1415 own_reg_dep = 0; 1416 } 1417 numRegDep = 0; 1418} 1419 1420void 1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1422 for (auto& own_rob_dep : robDep) { 1423 own_rob_dep = 0; 1424 } 1425 numRobDep = 0; 1426} 1427 1428bool 1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1430{ 1431 // If it is an rob dependency then remove it 1432 if (!removeRobDep(done_seq_num)) { 1433 // If it is not an rob dependency then it must be a register dependency 1434 // If the register dependency is not found, it violates an assumption 1435 // and must be caught by assert. 1436 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1437 assert(regdep_found); 1438 } 1439 // Return true if the node is dependency free 1440 return (numRobDep == 0 && numRegDep == 0); 1441} 1442 1443void 1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1445{ 1446 DPRINTFR(TraceCPUData, "%lli", seqNum); 1447 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1448 if (isLoad() \|\| isStore()) { 1449 DPRINTFR(TraceCPUData, ",%i", physAddr); 1450 DPRINTFR(TraceCPUData, ",%i", size); 1451 DPRINTFR(TraceCPUData, ",%i", flags); 1452 } 1453 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1454 int i = 0; 1455 DPRINTFR(TraceCPUData, "robDep:"); 1456 while (robDep[i] != 0) { 1457 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1458 i++; 1459 } 1460 i = 0; 1461 DPRINTFR(TraceCPUData, "regDep:"); 1462 while (regDep[i] != 0) { 1463 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1464 i++; 1465 } 1466 auto child_itr = dependents.begin(); 1467 DPRINTFR(TraceCPUData, "dependents:"); 1468 while (child_itr != dependents.end()) { 1469 DPRINTFR(TraceCPUData, ":%lli", (child_itr)->seqNum); 1470* child_itr++; 1471 } 1472 1473 DPRINTFR(TraceCPUData, "\n"); 1474} 1475 1476std::string 1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1478{ 1479 return Record::RecordType_Name(type); 1480} 1481 1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1483 : trace(filename) 1484{ 1485 // Create a protobuf message for the header and read it from the stream 1486 ProtoMessage::PacketHeader header_msg; 1487 if (!trace.read(header_msg)) { 1488 panic("Failed to read packet header from %s\n", filename); 1489 1490 if (header_msg.tick_freq() != SimClock::Frequency) { 1491 panic("Trace %s was recorded with a different tick frequency %d\n", 1492 header_msg.tick_freq()); 1493 } 1494 } 1495} 1496 1497void 1498TraceCPU::FixedRetryGen::InputStream::reset() 1499{ 1500 trace.reset(); 1501} 1502 1503bool 1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1505{ 1506 ProtoMessage::Packet pkt_msg; 1507 if (trace.read(pkt_msg)) { 1508 element->cmd = pkt_msg.cmd(); 1509 element->addr = pkt_msg.addr(); 1510 element->blocksize = pkt_msg.size(); 1511 element->tick = pkt_msg.tick(); 1512 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1513 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1514 return true; 1515 } 1516 1517 // We have reached the end of the file 1518 return false; 1519}