trace_cpu.cc revision 11633
1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent(this), 61 dcacheNextEvent(this), 62 oneTraceComplete(false), 63 traceOffset(0), 64 execCompleteEvent(nullptr), 65 enableEarlyExit(params->enableEarlyExit) 66{ 67 // Increment static counter for number of Trace CPUs. 68 ++TraceCPU::numTraceCPUs; 69 70 // Check that the python parameters for sizes of ROB, store buffer and load 71 // buffer do not overflow the corresponding C++ variables. 72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 73 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 75 "exceeds the max. value of %d.\n", params->sizeROB, 76 UINT16_MAX); 77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 78 " %d exceeds the max. value of %d.\n", 79 params->sizeLoadBuffer, UINT16_MAX); 80} 81 82TraceCPU::~TraceCPU() 83{ 84 85} 86 87TraceCPU* 88TraceCPUParams::create() 89{ 90 return new TraceCPU(this); 91} 92 93void 94TraceCPU::takeOverFrom(BaseCPU *oldCPU) 95{ 96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 97 assert(!getInstPort().isConnected()); 98 assert(oldCPU->getInstPort().isConnected()); 99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 100 oldCPU->getInstPort().unbind(); 101 getInstPort().bind(inst_peer_port); 102 103 assert(!getDataPort().isConnected()); 104 assert(oldCPU->getDataPort().isConnected()); 105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 106 oldCPU->getDataPort().unbind(); 107 getDataPort().bind(data_peer_port); 108} 109 110void 111TraceCPU::init() 112{ 113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 114 "\n", instTraceFile); 115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 116 dataTraceFile); 117 118 BaseCPU::init(); 119 120 // Get the send tick of the first instruction read request 121 Tick first_icache_tick = icacheGen.init(); 122 123 // Get the send tick of the first data read/write request 124 Tick first_dcache_tick = dcacheGen.init(); 125 126 // Set the trace offset as the minimum of that in both traces 127 traceOffset = std::min(first_icache_tick, first_dcache_tick); 128 inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 129 name(), traceOffset); 130 131 // Schedule next icache and dcache event by subtracting the offset 132 schedule(icacheNextEvent, first_icache_tick - traceOffset); 133 schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 134 135 // Adjust the trace offset for the dcache generator's ready nodes 136 // We don't need to do this for the icache generator as it will 137 // send its first request at the first event and schedule subsequent 138 // events using a relative tick delta 139 dcacheGen.adjustInitTraceOffset(traceOffset); 140 141 // If the Trace CPU simulation is configured to exit on any one trace 142 // completion then we don't need a counted event to count down all Trace 143 // CPUs in the system. If not then instantiate a counted event. 144 if (!enableEarlyExit) { 145 // The static counter for number of Trace CPUs is correctly set at 146 // this point so create an event and pass it. 147 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 148 numTraceCPUs); 149 } 150 151} 152 153void 154TraceCPU::schedIcacheNext() 155{ 156 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 157 158 // Try to send the current packet or a retry packet if there is one 159 bool sched_next = icacheGen.tryNext(); 160 // If packet sent successfully, schedule next event 161 if (sched_next) { 162 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 163 "at %d.\n", curTick() + icacheGen.tickDelta()); 164 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 165 ++numSchedIcacheEvent; 166 } else { 167 // check if traceComplete. If not, do nothing because sending failed 168 // and next event will be scheduled via RecvRetry() 169 if (icacheGen.isTraceComplete()) { 170 // If this is the first trace to complete, set the variable. If it 171 // is already set then both traces are complete to exit sim. 172 checkAndSchedExitEvent(); 173 } 174 } 175 return; 176} 177 178void 179TraceCPU::schedDcacheNext() 180{ 181 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 182 183 // Update stat for numCycles 184 numCycles = clockEdge() / clockPeriod(); 185 186 dcacheGen.execute(); 187 if (dcacheGen.isExecComplete()) { 188 checkAndSchedExitEvent(); 189 } 190} 191 192void 193TraceCPU::checkAndSchedExitEvent() 194{ 195 if (!oneTraceComplete) { 196 oneTraceComplete = true; 197 } else { 198 // Schedule event to indicate execution is complete as both 199 // instruction and data access traces have been played back. 200 inform("%s: Execution complete.\n", name()); 201 // If the replay is configured to exit early, that is when any one 202 // execution is complete then exit immediately and return. Otherwise, 203 // schedule the counted exit that counts down completion of each Trace 204 // CPU. 205 if (enableEarlyExit) { 206 exitSimLoop("End of trace reached"); 207 } else { 208 schedule(*execCompleteEvent, curTick()); 209 } 210 } 211} 212 213void 214TraceCPU::regStats() 215{ 216 217 BaseCPU::regStats(); 218 219 numSchedDcacheEvent 220 .name(name() + ".numSchedDcacheEvent") 221 .desc("Number of events scheduled to trigger data request generator") 222 ; 223 224 numSchedIcacheEvent 225 .name(name() + ".numSchedIcacheEvent") 226 .desc("Number of events scheduled to trigger instruction request generator") 227 ; 228 229 numOps 230 .name(name() + ".numOps") 231 .desc("Number of micro-ops simulated by the Trace CPU") 232 ; 233 234 cpi 235 .name(name() + ".cpi") 236 .desc("Cycles per micro-op used as a proxy for CPI") 237 .precision(6) 238 ; 239 cpi = numCycles/numOps; 240 241 icacheGen.regStats(); 242 dcacheGen.regStats(); 243} 244 245void 246TraceCPU::ElasticDataGen::regStats() 247{ 248 using namespace Stats; 249 250 maxDependents 251 .name(name() + ".maxDependents") 252 .desc("Max number of dependents observed on a node") 253 ; 254 255 maxReadyListSize 256 .name(name() + ".maxReadyListSize") 257 .desc("Max size of the ready list observed") 258 ; 259 260 numSendAttempted 261 .name(name() + ".numSendAttempted") 262 .desc("Number of first attempts to send a request") 263 ; 264 265 numSendSucceeded 266 .name(name() + ".numSendSucceeded") 267 .desc("Number of successful first attempts") 268 ; 269 270 numSendFailed 271 .name(name() + ".numSendFailed") 272 .desc("Number of failed first attempts") 273 ; 274 275 numRetrySucceeded 276 .name(name() + ".numRetrySucceeded") 277 .desc("Number of successful retries") 278 ; 279 280 numSplitReqs 281 .name(name() + ".numSplitReqs") 282 .desc("Number of split requests") 283 ; 284 285 numSOLoads 286 .name(name() + ".numSOLoads") 287 .desc("Number of strictly ordered loads") 288 ; 289 290 numSOStores 291 .name(name() + ".numSOStores") 292 .desc("Number of strictly ordered stores") 293 ; 294 295 dataLastTick 296 .name(name() + ".dataLastTick") 297 .desc("Last tick simulated from the elastic data trace") 298 ; 299} 300 301Tick 302TraceCPU::ElasticDataGen::init() 303{ 304 DPRINTF(TraceCPUData, "Initializing data memory request generator " 305 "DcacheGen: elastic issue with retry.\n"); 306 307 if (!readNextWindow()) 308 panic("Trace has %d elements. It must have at least %d elements.\n", 309 depGraph.size(), 2 * windowSize); 310 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 311 depGraph.size()); 312 313 if (!readNextWindow()) 314 panic("Trace has %d elements. It must have at least %d elements.\n", 315 depGraph.size(), 2 * windowSize); 316 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 317 depGraph.size()); 318 319 // Print readyList 320 if (DTRACE(TraceCPUData)) { 321 printReadyList(); 322 } 323 auto free_itr = readyList.begin(); 324 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 325 " is %d.\n", free_itr->seqNum, free_itr->execTick); 326 // Return the execute tick of the earliest ready node so that an event 327 // can be scheduled to call execute() 328 return (free_itr->execTick); 329} 330 331void 332TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 333 for (auto& free_node : readyList) { 334 free_node.execTick -= offset; 335 } 336} 337 338void 339TraceCPU::ElasticDataGen::exit() 340{ 341 trace.reset(); 342} 343 344bool 345TraceCPU::ElasticDataGen::readNextWindow() 346{ 347 348 // Read and add next window 349 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 350 351 if (traceComplete) { 352 // We are at the end of the file, thus we have no more records. 353 // Return false. 354 return false; 355 } 356 357 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 358 depGraph.size()); 359 360 uint32_t num_read = 0; 361 while (num_read != windowSize) { 362 363 // Create a new graph node 364 GraphNode* new_node = new GraphNode; 365 366 // Read the next line to get the next record. If that fails then end of 367 // trace has been reached and traceComplete needs to be set in addition 368 // to returning false. 369 if (!trace.read(new_node)) { 370 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 371 traceComplete = true; 372 return false; 373 } 374 375 // Annotate the ROB dependencies of the new node onto the parent nodes. 376 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 377 // Annotate the register dependencies of the new node onto the parent 378 // nodes. 379 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 380 381 num_read++; 382 // Add to map 383 depGraph[new_node->seqNum] = new_node; 384 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 385 // Source dependencies are already complete, check if resources 386 // are available and issue. The execution time is approximated 387 // to current time plus the computational delay. 388 checkAndIssue(new_node); 389 } 390 } 391 392 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 393 depGraph.size()); 394 return true; 395} 396 397template<typename T> void 398TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 399 T& dep_array, uint8_t& num_dep) 400{ 401 for (auto& a_dep : dep_array) { 402 // The convention is to set the dependencies starting with the first 403 // index in the ROB and register dependency arrays. Thus, when we reach 404 // a dependency equal to the initialisation value of zero, we know have 405 // iterated over all dependencies and can break. 406 if (a_dep == 0) 407 break; 408 // We look up the valid dependency, i.e. the parent of this node 409 auto parent_itr = depGraph.find(a_dep); 410 if (parent_itr != depGraph.end()) { 411 // If the parent is found, it is yet to be executed. Append a 412 // pointer to the new node to the dependents list of the parent 413 // node. 414 parent_itr->second->dependents.push_back(new_node); 415 auto num_depts = parent_itr->second->dependents.size(); 416 maxDependents = std::max<double>(num_depts, maxDependents.value()); 417 } else { 418 // The dependency is not found in the graph. So consider 419 // the execution of the parent is complete, i.e. remove this 420 // dependency. 421 a_dep = 0; 422 num_dep--; 423 } 424 } 425} 426 427void 428TraceCPU::ElasticDataGen::execute() 429{ 430 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 431 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 432 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 433 depFreeQueue.size()); 434 hwResource.printOccupancy(); 435 436 // Read next window to make sure that dependents of all dep-free nodes 437 // are in the depGraph 438 if (nextRead) { 439 readNextWindow(); 440 nextRead = false; 441 } 442 443 // First attempt to issue the pending dependency-free nodes held 444 // in depFreeQueue. If resources have become available for a node, 445 // then issue it, i.e. add the node to readyList. 446 while (!depFreeQueue.empty()) { 447 if (checkAndIssue(depFreeQueue.front(), false)) { 448 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 449 "%lli.\n", (depFreeQueue.front())->seqNum); 450 depFreeQueue.pop(); 451 } else { 452 break; 453 } 454 } 455 // Proceed to execute from readyList 456 auto graph_itr = depGraph.begin(); 457 auto free_itr = readyList.begin(); 458 // Iterate through readyList until the next free node has its execute 459 // tick later than curTick or the end of readyList is reached 460 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 461 462 // Get pointer to the node to be executed 463 graph_itr = depGraph.find(free_itr->seqNum); 464 assert(graph_itr != depGraph.end()); 465 GraphNode* node_ptr = graph_itr->second; 466 467 // If there is a retryPkt send that else execute the load 468 if (retryPkt) { 469 // The retryPkt must be the request that was created by the 470 // first node in the readyList. 471 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 472 panic("Retry packet's seqence number does not match " 473 "the first node in the readyList.\n"); 474 } 475 if (port.sendTimingReq(retryPkt)) { 476 ++numRetrySucceeded; 477 retryPkt = nullptr; 478 } 479 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 480 // If there is no retryPkt, attempt to send a memory request in 481 // case of a load or store node. If the send fails, executeMemReq() 482 // returns a packet pointer, which we save in retryPkt. In case of 483 // a comp node we don't do anything and simply continue as if the 484 // execution of the comp node succedded. 485 retryPkt = executeMemReq(node_ptr); 486 } 487 // If the retryPkt or a new load/store node failed, we exit from here 488 // as a retry from cache will bring the control to execute(). The 489 // first node in readyList then, will be the failed node. 490 if (retryPkt) { 491 break; 492 } 493 494 // Proceed to remove dependencies for the successfully executed node. 495 // If it is a load which is not strictly ordered and we sent a 496 // request for it successfully, we do not yet mark any register 497 // dependencies complete. But as per dependency modelling we need 498 // to mark ROB dependencies of load and non load/store nodes which 499 // are based on successful sending of the load as complete. 500 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 501 // If execute succeeded mark its dependents as complete 502 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 503 "dependents..\n", node_ptr->seqNum); 504 505 auto child_itr = (node_ptr->dependents).begin(); 506 while (child_itr != (node_ptr->dependents).end()) { 507 // ROB dependency of a store on a load must not be removed 508 // after load is sent but after response is received 509 if (!(*child_itr)->isStore() && 510 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 511 512 // Check if the child node has become dependency free 513 if ((*child_itr)->numRobDep == 0 && 514 (*child_itr)->numRegDep == 0) { 515 516 // Source dependencies are complete, check if 517 // resources are available and issue 518 checkAndIssue(*child_itr); 519 } 520 // Remove this child for the sent load and point to new 521 // location of the element following the erased element 522 child_itr = node_ptr->dependents.erase(child_itr); 523 } else { 524 // This child is not dependency-free, point to the next 525 // child 526 child_itr++; 527 } 528 } 529 } else { 530 // If it is a strictly ordered load mark its dependents as complete 531 // as we do not send a request for this case. If it is a store or a 532 // comp node we also mark all its dependents complete. 533 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 534 " up dependents..\n", node_ptr->seqNum); 535 536 for (auto child : node_ptr->dependents) { 537 // If the child node is dependency free removeDepOnInst() 538 // returns true. 539 if (child->removeDepOnInst(node_ptr->seqNum)) { 540 // Source dependencies are complete, check if resources 541 // are available and issue 542 checkAndIssue(child); 543 } 544 } 545 } 546 547 // After executing the node, remove from readyList and delete node. 548 readyList.erase(free_itr); 549 // If it is a cacheable load which was sent, don't delete 550 // just yet. Delete it in completeMemAccess() after the 551 // response is received. If it is an strictly ordered 552 // load, it was not sent and all dependencies were simply 553 // marked complete. Thus it is safe to delete it. For 554 // stores and non load/store nodes all dependencies were 555 // marked complete so it is safe to delete it. 556 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 557 // Release all resources occupied by the completed node 558 hwResource.release(node_ptr); 559 // clear the dynamically allocated set of dependents 560 (node_ptr->dependents).clear(); 561 // Update the stat for numOps simulated 562 owner.updateNumOps(node_ptr->robNum); 563 // delete node 564 delete node_ptr; 565 // remove from graph 566 depGraph.erase(graph_itr); 567 } 568 // Point to first node to continue to next iteration of while loop 569 free_itr = readyList.begin(); 570 } // end of while loop 571 572 // Print readyList, sizes of queues and resource status after updating 573 if (DTRACE(TraceCPUData)) { 574 printReadyList(); 575 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 576 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 577 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 578 depFreeQueue.size()); 579 hwResource.printOccupancy(); 580 } 581 582 if (retryPkt) { 583 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 584 "event from the cache for seq. num %lli.\n", 585 retryPkt->req->getReqInstSeqNum()); 586 return; 587 } 588 // If the size of the dependency graph is less than the dependency window 589 // then read from the trace file to populate the graph next time we are in 590 // execute. 591 if (depGraph.size() < windowSize && !traceComplete) 592 nextRead = true; 593 594 // If cache is not blocked, schedule an event for the first execTick in 595 // readyList else retry from cache will schedule the event. If the ready 596 // list is empty then check if the next pending node has resources 597 // available to issue. If yes, then schedule an event for the next cycle. 598 if (!readyList.empty()) { 599 Tick next_event_tick = std::max(readyList.begin()->execTick, 600 curTick()); 601 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 602 next_event_tick); 603 owner.schedDcacheNextEvent(next_event_tick); 604 } else if (readyList.empty() && !depFreeQueue.empty() && 605 hwResource.isAvailable(depFreeQueue.front())) { 606 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 607 owner.clockEdge(Cycles(1))); 608 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 609 } 610 611 // If trace is completely read, readyList is empty and depGraph is empty, 612 // set execComplete to true 613 if (depGraph.empty() && readyList.empty() && traceComplete && 614 !hwResource.awaitingResponse()) { 615 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 616 execComplete = true; 617 dataLastTick = curTick(); 618 } 619} 620 621PacketPtr 622TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 623{ 624 625 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 626 "virt addr %d, pc %#x, size %d, flags %d).\n", 627 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 628 node_ptr->pc, node_ptr->size, node_ptr->flags); 629 630 // If the request is strictly ordered, do not send it. Just return nullptr 631 // as if it was succesfully sent. 632 if (node_ptr->isStrictlyOrdered()) { 633 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 634 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 635 node_ptr->seqNum); 636 return nullptr; 637 } 638 639 // Check if the request spans two cache lines as this condition triggers 640 // an assert fail in the L1 cache. If it does then truncate the size to 641 // access only until the end of that line and ignore the remainder. The 642 // stat counting this is useful to keep a check on how frequently this 643 // happens. If required the code could be revised to mimick splitting such 644 // a request into two. 645 unsigned blk_size = owner.cacheLineSize(); 646 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 647 if (!(blk_offset + node_ptr->size <= blk_size)) { 648 node_ptr->size = blk_size - blk_offset; 649 ++numSplitReqs; 650 } 651 652 // Create a request and the packet containing request 653 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 654 node_ptr->flags, masterID, node_ptr->seqNum, 655 ContextID(0)); 656 req->setPC(node_ptr->pc); 657 // If virtual address is valid, set the asid and virtual address fields 658 // of the request. 659 if (node_ptr->virtAddr != 0) { 660 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 661 node_ptr->flags, masterID, node_ptr->pc); 662 req->setPaddr(node_ptr->physAddr); 663 req->setReqInstSeqNum(node_ptr->seqNum); 664 } 665 666 PacketPtr pkt; 667 uint8_t* pkt_data = new uint8_t[req->getSize()]; 668 if (node_ptr->isLoad()) { 669 pkt = Packet::createRead(req); 670 } else { 671 pkt = Packet::createWrite(req); 672 memset(pkt_data, 0xA, req->getSize()); 673 } 674 pkt->dataDynamic(pkt_data); 675 676 // Call MasterPort method to send a timing request for this packet 677 bool success = port.sendTimingReq(pkt); 678 ++numSendAttempted; 679 680 if (!success) { 681 // If it fails, return the packet to retry when a retry is signalled by 682 // the cache 683 ++numSendFailed; 684 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 685 return pkt; 686 } else { 687 // It is succeeds, return nullptr 688 ++numSendSucceeded; 689 return nullptr; 690 } 691} 692 693bool 694TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 695{ 696 // Assert the node is dependency-free 697 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 698 699 // If this is the first attempt, print a debug message to indicate this. 700 if (first) { 701 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 702 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 703 node_ptr->robNum); 704 } 705 706 // Check if resources are available to issue the specific node 707 if (hwResource.isAvailable(node_ptr)) { 708 // If resources are free only then add to readyList 709 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 710 " to readyList, occupying resources.\n", node_ptr->seqNum); 711 // Compute the execute tick by adding the compute delay for the node 712 // and add the ready node to the ready list 713 addToSortedReadyList(node_ptr->seqNum, 714 owner.clockEdge() + node_ptr->compDelay); 715 // Account for the resources taken up by this issued node. 716 hwResource.occupy(node_ptr); 717 return true; 718 719 } else { 720 if (first) { 721 // Although dependencies are complete, resources are not available. 722 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 723 " Adding to depFreeQueue.\n", node_ptr->seqNum); 724 depFreeQueue.push(node_ptr); 725 } else { 726 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 727 "Still pending issue.\n", node_ptr->seqNum); 728 } 729 return false; 730 } 731} 732 733void 734TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 735{ 736 // Release the resources for this completed node. 737 if (pkt->isWrite()) { 738 // Consider store complete. 739 hwResource.releaseStoreBuffer(); 740 // If it is a store response then do nothing since we do not model 741 // dependencies on store completion in the trace. But if we were 742 // blocking execution due to store buffer fullness, we need to schedule 743 // an event and attempt to progress. 744 } else { 745 // If it is a load response then release the dependents waiting on it. 746 // Get pointer to the completed load 747 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 748 assert(graph_itr != depGraph.end()); 749 GraphNode* node_ptr = graph_itr->second; 750 751 // Release resources occupied by the load 752 hwResource.release(node_ptr); 753 754 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 755 " dependents..\n", node_ptr->seqNum); 756 757 for (auto child : node_ptr->dependents) { 758 if (child->removeDepOnInst(node_ptr->seqNum)) { 759 checkAndIssue(child); 760 } 761 } 762 763 // clear the dynamically allocated set of dependents 764 (node_ptr->dependents).clear(); 765 // Update the stat for numOps completed 766 owner.updateNumOps(node_ptr->robNum); 767 // delete node 768 delete node_ptr; 769 // remove from graph 770 depGraph.erase(graph_itr); 771 } 772 773 if (DTRACE(TraceCPUData)) { 774 printReadyList(); 775 } 776 777 // If the size of the dependency graph is less than the dependency window 778 // then read from the trace file to populate the graph next time we are in 779 // execute. 780 if (depGraph.size() < windowSize && !traceComplete) 781 nextRead = true; 782 783 // If not waiting for retry, attempt to schedule next event 784 if (!retryPkt) { 785 // We might have new dep-free nodes in the list which will have execute 786 // tick greater than or equal to curTick. But a new dep-free node might 787 // have its execute tick earlier. Therefore, attempt to reschedule. It 788 // could happen that the readyList is empty and we got here via a 789 // last remaining response. So, either the trace is complete or there 790 // are pending nodes in the depFreeQueue. The checking is done in the 791 // execute() control flow, so schedule an event to go via that flow. 792 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 793 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 794 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 795 next_event_tick); 796 owner.schedDcacheNextEvent(next_event_tick); 797 } 798} 799 800void 801TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 802 Tick exec_tick) 803{ 804 ReadyNode ready_node; 805 ready_node.seqNum = seq_num; 806 ready_node.execTick = exec_tick; 807 808 // Iterator to readyList 809 auto itr = readyList.begin(); 810 811 // If the readyList is empty, simply insert the new node at the beginning 812 // and return 813 if (itr == readyList.end()) { 814 readyList.insert(itr, ready_node); 815 maxReadyListSize = std::max<double>(readyList.size(), 816 maxReadyListSize.value()); 817 return; 818 } 819 820 // If the new node has its execution tick equal to the first node in the 821 // list then go to the next node. If the first node in the list failed 822 // to execute, its position as the first is thus maintained. 823 if (retryPkt) 824 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 825 itr++; 826 827 // Increment the iterator and compare the node pointed to by it to the new 828 // node till the position to insert the new node is found. 829 bool found = false; 830 while (!found && itr != readyList.end()) { 831 // If the execution tick of the new node is less than the node then 832 // this is the position to insert 833 if (exec_tick < itr->execTick) 834 found = true; 835 // If the execution tick of the new node is equal to the node then 836 // sort in ascending order of sequence numbers 837 else if (exec_tick == itr->execTick) { 838 // If the sequence number of the new node is less than the node 839 // then this is the position to insert 840 if (seq_num < itr->seqNum) 841 found = true; 842 // Else go to next node 843 else 844 itr++; 845 } 846 // If the execution tick of the new node is greater than the node then 847 // go to the next node 848 else 849 itr++; 850 } 851 readyList.insert(itr, ready_node); 852 // Update the stat for max size reached of the readyList 853 maxReadyListSize = std::max<double>(readyList.size(), 854 maxReadyListSize.value()); 855} 856 857void 858TraceCPU::ElasticDataGen::printReadyList() { 859 860 auto itr = readyList.begin(); 861 if (itr == readyList.end()) { 862 DPRINTF(TraceCPUData, "readyList is empty.\n"); 863 return; 864 } 865 DPRINTF(TraceCPUData, "Printing readyList:\n"); 866 while (itr != readyList.end()) { 867 auto graph_itr = depGraph.find(itr->seqNum); 868 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 869 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 870 node_ptr->typeToStr(), itr->execTick); 871 itr++; 872 } 873} 874 875TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 876 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 877 : sizeROB(max_rob), 878 sizeStoreBuffer(max_stores), 879 sizeLoadBuffer(max_loads), 880 oldestInFlightRobNum(UINT64_MAX), 881 numInFlightLoads(0), 882 numInFlightStores(0) 883{} 884 885void 886TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 887{ 888 // Occupy ROB entry for the issued node 889 // Merely maintain the oldest node, i.e. numerically least robNum by saving 890 // it in the variable oldestInFLightRobNum. 891 inFlightNodes[new_node->seqNum] = new_node->robNum; 892 oldestInFlightRobNum = inFlightNodes.begin()->second; 893 894 // Occupy Load/Store Buffer entry for the issued node if applicable 895 if (new_node->isLoad()) { 896 ++numInFlightLoads; 897 } else if (new_node->isStore()) { 898 ++numInFlightStores; 899 } // else if it is a non load/store node, no buffer entry is occupied 900 901 printOccupancy(); 902} 903 904void 905TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 906{ 907 assert(!inFlightNodes.empty()); 908 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 909 done_node->seqNum); 910 911 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 912 inFlightNodes.erase(done_node->seqNum); 913 914 if (inFlightNodes.empty()) { 915 // If we delete the only in-flight node and then the 916 // oldestInFlightRobNum is set to it's initialized (max) value. 917 oldestInFlightRobNum = UINT64_MAX; 918 } else { 919 // Set the oldest in-flight node rob number equal to the first node in 920 // the inFlightNodes since that will have the numerically least value. 921 oldestInFlightRobNum = inFlightNodes.begin()->second; 922 } 923 924 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 925 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 926 oldestInFlightRobNum); 927 928 // A store is considered complete when a request is sent, thus ROB entry is 929 // freed. But it occupies an entry in the Store Buffer until its response 930 // is received. A load is considered complete when a response is received, 931 // thus both ROB and Load Buffer entries can be released. 932 if (done_node->isLoad()) { 933 assert(numInFlightLoads != 0); 934 --numInFlightLoads; 935 } 936 // For normal writes, we send the requests out and clear a store buffer 937 // entry on response. For writes which are strictly ordered, for e.g. 938 // writes to device registers, we do that within release() which is called 939 // when node is executed and taken off from readyList. 940 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 941 releaseStoreBuffer(); 942 } 943} 944 945void 946TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 947{ 948 assert(numInFlightStores != 0); 949 --numInFlightStores; 950} 951 952bool 953TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 954 const GraphNode* new_node) const 955{ 956 uint16_t num_in_flight_nodes; 957 if (inFlightNodes.empty()) { 958 num_in_flight_nodes = 0; 959 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 960 " #in-flight nodes = 0", new_node->seqNum); 961 } else if (new_node->robNum > oldestInFlightRobNum) { 962 // This is the intuitive case where new dep-free node is younger 963 // instruction than the oldest instruction in-flight. Thus we make sure 964 // in_flight_nodes does not overflow. 965 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 966 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 967 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 968 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 969 } else { 970 // This is the case where an instruction older than the oldest in- 971 // flight instruction becomes dep-free. Thus we must have already 972 // accounted for the entry in ROB for this new dep-free node. 973 // Immediately after this check returns true, oldestInFlightRobNum will 974 // be updated in occupy(). We simply let this node issue now. 975 num_in_flight_nodes = 0; 976 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 977 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 978 new_node->seqNum, new_node->robNum); 979 } 980 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 981 numInFlightLoads, sizeLoadBuffer, 982 numInFlightStores, sizeStoreBuffer); 983 // Check if resources are available to issue the specific node 984 if (num_in_flight_nodes >= sizeROB) { 985 return false; 986 } 987 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 988 return false; 989 } 990 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 991 return false; 992 } 993 return true; 994} 995 996bool 997TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 998 // Return true if there is at least one read or write request in flight 999 return (numInFlightStores != 0 || numInFlightLoads != 0); 1000} 1001 1002void 1003TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 1004 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 1005 "LQ = %d/%d, SQ = %d/%d.\n", 1006 oldestInFlightRobNum, 1007 numInFlightLoads, sizeLoadBuffer, 1008 numInFlightStores, sizeStoreBuffer); 1009} 1010 1011void 1012TraceCPU::FixedRetryGen::regStats() 1013{ 1014 using namespace Stats; 1015 1016 numSendAttempted 1017 .name(name() + ".numSendAttempted") 1018 .desc("Number of first attempts to send a request") 1019 ; 1020 1021 numSendSucceeded 1022 .name(name() + ".numSendSucceeded") 1023 .desc("Number of successful first attempts") 1024 ; 1025 1026 numSendFailed 1027 .name(name() + ".numSendFailed") 1028 .desc("Number of failed first attempts") 1029 ; 1030 1031 numRetrySucceeded 1032 .name(name() + ".numRetrySucceeded") 1033 .desc("Number of successful retries") 1034 ; 1035 1036 instLastTick 1037 .name(name() + ".instLastTick") 1038 .desc("Last tick simulated from the fixed inst trace") 1039 ; 1040} 1041 1042Tick 1043TraceCPU::FixedRetryGen::init() 1044{ 1045 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1046 " IcacheGen: fixed issue with retry.\n"); 1047 1048 if (nextExecute()) { 1049 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1050 return currElement.tick; 1051 } else { 1052 panic("Read of first message in the trace failed.\n"); 1053 return MaxTick; 1054 } 1055} 1056 1057bool 1058TraceCPU::FixedRetryGen::tryNext() 1059{ 1060 // If there is a retry packet, try to send it 1061 if (retryPkt) { 1062 1063 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1064 1065 if (!port.sendTimingReq(retryPkt)) { 1066 // Still blocked! This should never occur. 1067 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1068 return false; 1069 } 1070 ++numRetrySucceeded; 1071 } else { 1072 1073 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1074 1075 // try sending current element 1076 assert(currElement.isValid()); 1077 1078 ++numSendAttempted; 1079 1080 if (!send(currElement.addr, currElement.blocksize, 1081 currElement.cmd, currElement.flags, currElement.pc)) { 1082 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1083 ++numSendFailed; 1084 // return false to indicate not to schedule next event 1085 return false; 1086 } else { 1087 ++numSendSucceeded; 1088 } 1089 } 1090 // If packet was sent successfully, either retryPkt or currElement, return 1091 // true to indicate to schedule event at current Tick plus delta. If packet 1092 // was sent successfully and there is no next packet to send, return false. 1093 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1094 "element.\n"); 1095 retryPkt = nullptr; 1096 // Read next element into currElement, currElement gets cleared so save the 1097 // tick to calculate delta 1098 Tick last_tick = currElement.tick; 1099 if (nextExecute()) { 1100 assert(currElement.tick >= last_tick); 1101 delta = currElement.tick - last_tick; 1102 } 1103 return !traceComplete; 1104} 1105 1106void 1107TraceCPU::FixedRetryGen::exit() 1108{ 1109 trace.reset(); 1110} 1111 1112bool 1113TraceCPU::FixedRetryGen::nextExecute() 1114{ 1115 if (traceComplete) 1116 // We are at the end of the file, thus we have no more messages. 1117 // Return false. 1118 return false; 1119 1120 1121 //Reset the currElement to the default values 1122 currElement.clear(); 1123 1124 // Read the next line to get the next message. If that fails then end of 1125 // trace has been reached and traceComplete needs to be set in addition 1126 // to returning false. If successful then next message is in currElement. 1127 if (!trace.read(&currElement)) { 1128 traceComplete = true; 1129 instLastTick = curTick(); 1130 return false; 1131 } 1132 1133 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1134 currElement.cmd.isRead() ? 'r' : 'w', 1135 currElement.addr, 1136 currElement.pc, 1137 currElement.blocksize, 1138 currElement.tick); 1139 1140 return true; 1141} 1142 1143bool 1144TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1145 Request::FlagsType flags, Addr pc) 1146{ 1147 1148 // Create new request 1149 Request* req = new Request(addr, size, flags, masterID); 1150 req->setPC(pc); 1151 1152 // If this is not done it triggers assert in L1 cache for invalid contextId 1153 req->setContext(ContextID(0)); 1154 1155 // Embed it in a packet 1156 PacketPtr pkt = new Packet(req, cmd); 1157 1158 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1159 pkt->dataDynamic(pkt_data); 1160 1161 if (cmd.isWrite()) { 1162 memset(pkt_data, 0xA, req->getSize()); 1163 } 1164 1165 // Call MasterPort method to send a timing request for this packet 1166 bool success = port.sendTimingReq(pkt); 1167 if (!success) { 1168 // If it fails, save the packet to retry when a retry is signalled by 1169 // the cache 1170 retryPkt = pkt; 1171 } 1172 return success; 1173} 1174 1175void 1176TraceCPU::icacheRetryRecvd() 1177{ 1178 // Schedule an event to go through the control flow in the same tick as 1179 // retry is received 1180 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1181 " event @%lli.\n", curTick()); 1182 schedule(icacheNextEvent, curTick()); 1183} 1184 1185void 1186TraceCPU::dcacheRetryRecvd() 1187{ 1188 // Schedule an event to go through the execute flow in the same tick as 1189 // retry is received 1190 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1191 " event @%lli.\n", curTick()); 1192 schedule(dcacheNextEvent, curTick()); 1193} 1194 1195void 1196TraceCPU::schedDcacheNextEvent(Tick when) 1197{ 1198 if (!dcacheNextEvent.scheduled()) { 1199 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1200 when); 1201 schedule(dcacheNextEvent, when); 1202 ++numSchedDcacheEvent; 1203 } else if (when < dcacheNextEvent.when()) { 1204 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1205 " to %lli.\n", dcacheNextEvent.when(), when); 1206 reschedule(dcacheNextEvent, when); 1207 } 1208 1209} 1210 1211bool 1212TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1213{ 1214 // All responses on the instruction fetch side are ignored. Simply delete 1215 // the request and packet to free allocated memory 1216 delete pkt->req; 1217 delete pkt; 1218 1219 return true; 1220} 1221 1222void 1223TraceCPU::IcachePort::recvReqRetry() 1224{ 1225 owner->icacheRetryRecvd(); 1226} 1227 1228void 1229TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1230{ 1231 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1232 dcacheGen.completeMemAccess(pkt); 1233} 1234 1235bool 1236TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1237{ 1238 // Handle the responses for data memory requests which is done inside the 1239 // elastic data generator 1240 owner->dcacheRecvTimingResp(pkt); 1241 // After processing the response delete the request and packet to free 1242 // memory 1243 delete pkt->req; 1244 delete pkt; 1245 1246 return true; 1247} 1248 1249void 1250TraceCPU::DcachePort::recvReqRetry() 1251{ 1252 owner->dcacheRetryRecvd(); 1253} 1254 1255TraceCPU::ElasticDataGen::InputStream::InputStream( 1256 const std::string& filename, 1257 const double time_multiplier) 1258 : trace(filename), 1259 timeMultiplier(time_multiplier), 1260 microOpCount(0) 1261{ 1262 // Create a protobuf message for the header and read it from the stream 1263 ProtoMessage::InstDepRecordHeader header_msg; 1264 if (!trace.read(header_msg)) { 1265 panic("Failed to read packet header from %s\n", filename); 1266 1267 if (header_msg.tick_freq() != SimClock::Frequency) { 1268 panic("Trace %s was recorded with a different tick frequency %d\n", 1269 header_msg.tick_freq()); 1270 } 1271 } else { 1272 // Assign window size equal to the field in the trace that was recorded 1273 // when the data dependency trace was captured in the o3cpu model 1274 windowSize = header_msg.window_size(); 1275 } 1276} 1277 1278void 1279TraceCPU::ElasticDataGen::InputStream::reset() 1280{ 1281 trace.reset(); 1282} 1283 1284bool 1285TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1286{ 1287 ProtoMessage::InstDepRecord pkt_msg; 1288 if (trace.read(pkt_msg)) { 1289 // Required fields 1290 element->seqNum = pkt_msg.seq_num(); 1291 element->type = pkt_msg.type(); 1292 // Scale the compute delay to effectively scale the Trace CPU frequency 1293 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1294 1295 // Repeated field robDepList 1296 element->clearRobDep(); 1297 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1298 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1299 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1300 element->numRobDep += 1; 1301 } 1302 1303 // Repeated field 1304 element->clearRegDep(); 1305 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1306 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1307 // There is a possibility that an instruction has both, a register 1308 // and order dependency on an instruction. In such a case, the 1309 // register dependency is omitted 1310 bool duplicate = false; 1311 for (int j = 0; j < element->numRobDep; j++) { 1312 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1313 } 1314 if (!duplicate) { 1315 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1316 element->numRegDep += 1; 1317 } 1318 } 1319 1320 // Optional fields 1321 if (pkt_msg.has_p_addr()) 1322 element->physAddr = pkt_msg.p_addr(); 1323 else 1324 element->physAddr = 0; 1325 1326 if (pkt_msg.has_v_addr()) 1327 element->virtAddr = pkt_msg.v_addr(); 1328 else 1329 element->virtAddr = 0; 1330 1331 if (pkt_msg.has_asid()) 1332 element->asid = pkt_msg.asid(); 1333 else 1334 element->asid = 0; 1335 1336 if (pkt_msg.has_size()) 1337 element->size = pkt_msg.size(); 1338 else 1339 element->size = 0; 1340 1341 if (pkt_msg.has_flags()) 1342 element->flags = pkt_msg.flags(); 1343 else 1344 element->flags = 0; 1345 1346 if (pkt_msg.has_pc()) 1347 element->pc = pkt_msg.pc(); 1348 else 1349 element->pc = 0; 1350 1351 // ROB occupancy number 1352 ++microOpCount; 1353 if (pkt_msg.has_weight()) { 1354 microOpCount += pkt_msg.weight(); 1355 } 1356 element->robNum = microOpCount; 1357 return true; 1358 } 1359 1360 // We have reached the end of the file 1361 return false; 1362} 1363 1364bool 1365TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1366{ 1367 for (auto& own_reg_dep : regDep) { 1368 if (own_reg_dep == reg_dep) { 1369 // If register dependency is found, make it zero and return true 1370 own_reg_dep = 0; 1371 assert(numRegDep > 0); 1372 --numRegDep; 1373 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1374 "done.\n", seqNum, reg_dep); 1375 return true; 1376 } 1377 } 1378 1379 // Return false if the dependency is not found 1380 return false; 1381} 1382 1383bool 1384TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1385{ 1386 for (auto& own_rob_dep : robDep) { 1387 if (own_rob_dep == rob_dep) { 1388 // If the rob dependency is found, make it zero and return true 1389 own_rob_dep = 0; 1390 assert(numRobDep > 0); 1391 --numRobDep; 1392 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1393 "done.\n", seqNum, rob_dep); 1394 return true; 1395 } 1396 } 1397 return false; 1398} 1399 1400void 1401TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1402 for (auto& own_reg_dep : regDep) { 1403 own_reg_dep = 0; 1404 } 1405 numRegDep = 0; 1406} 1407 1408void 1409TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1410 for (auto& own_rob_dep : robDep) { 1411 own_rob_dep = 0; 1412 } 1413 numRobDep = 0; 1414} 1415 1416bool 1417TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1418{ 1419 // If it is an rob dependency then remove it 1420 if (!removeRobDep(done_seq_num)) { 1421 // If it is not an rob dependency then it must be a register dependency 1422 // If the register dependency is not found, it violates an assumption 1423 // and must be caught by assert. 1424 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1425 assert(regdep_found); 1426 } 1427 // Return true if the node is dependency free 1428 return (numRobDep == 0 && numRegDep == 0); 1429} 1430 1431void 1432TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1433{ 1434 DPRINTFR(TraceCPUData, "%lli", seqNum); 1435 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1436 if (isLoad() || isStore()) { 1437 DPRINTFR(TraceCPUData, ",%i", physAddr); 1438 DPRINTFR(TraceCPUData, ",%i", size); 1439 DPRINTFR(TraceCPUData, ",%i", flags); 1440 } 1441 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1442 int i = 0; 1443 DPRINTFR(TraceCPUData, "robDep:"); 1444 while (robDep[i] != 0) { 1445 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1446 i++; 1447 } 1448 i = 0; 1449 DPRINTFR(TraceCPUData, "regDep:"); 1450 while (regDep[i] != 0) { 1451 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1452 i++; 1453 } 1454 auto child_itr = dependents.begin(); 1455 DPRINTFR(TraceCPUData, "dependents:"); 1456 while (child_itr != dependents.end()) { 1457 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1458 child_itr++; 1459 } 1460 1461 DPRINTFR(TraceCPUData, "\n"); 1462} 1463 1464std::string 1465TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1466{ 1467 return Record::RecordType_Name(type); 1468} 1469 1470TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1471 : trace(filename) 1472{ 1473 // Create a protobuf message for the header and read it from the stream 1474 ProtoMessage::PacketHeader header_msg; 1475 if (!trace.read(header_msg)) { 1476 panic("Failed to read packet header from %s\n", filename); 1477 1478 if (header_msg.tick_freq() != SimClock::Frequency) { 1479 panic("Trace %s was recorded with a different tick frequency %d\n", 1480 header_msg.tick_freq()); 1481 } 1482 } 1483} 1484 1485void 1486TraceCPU::FixedRetryGen::InputStream::reset() 1487{ 1488 trace.reset(); 1489} 1490 1491bool 1492TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1493{ 1494 ProtoMessage::Packet pkt_msg; 1495 if (trace.read(pkt_msg)) { 1496 element->cmd = pkt_msg.cmd(); 1497 element->addr = pkt_msg.addr(); 1498 element->blocksize = pkt_msg.size(); 1499 element->tick = pkt_msg.tick(); 1500 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1501 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1502 return true; 1503 } 1504 1505 // We have reached the end of the file 1506 return false; 1507} 1508