trace_cpu.cc revision 11253
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params->sizeROB, params->sizeStoreBuffer, 60 params->sizeLoadBuffer), 61 icacheNextEvent(this), 62 dcacheNextEvent(this), 63 oneTraceComplete(false), 64 firstFetchTick(0), 65 execCompleteEvent(nullptr) 66{ 67 // Increment static counter for number of Trace CPUs. 68 ++TraceCPU::numTraceCPUs; 69 70 // Check that the python parameters for sizes of ROB, store buffer and load 71 // buffer do not overflow the corresponding C++ variables. 72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 73 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 75 "exceeds the max. value of %d.\n", params->sizeROB, 76 UINT16_MAX); 77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 78 " %d exceeds the max. value of %d.\n", 79 params->sizeLoadBuffer, UINT16_MAX); 80} 81 82TraceCPU::~TraceCPU() 83{ 84 85} 86 87TraceCPU* 88TraceCPUParams::create() 89{ 90 return new TraceCPU(this); 91} 92 93void 94TraceCPU::takeOverFrom(BaseCPU *oldCPU) 95{ 96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 97 assert(!getInstPort().isConnected()); 98 assert(oldCPU->getInstPort().isConnected()); 99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 100 oldCPU->getInstPort().unbind(); 101 getInstPort().bind(inst_peer_port); 102 103 assert(!getDataPort().isConnected()); 104 assert(oldCPU->getDataPort().isConnected()); 105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 106 oldCPU->getDataPort().unbind(); 107 getDataPort().bind(data_peer_port); 108} 109 110void 111TraceCPU::init() 112{ 113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 114 "\n", instTraceFile); 115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 116 dataTraceFile); 117 118 BaseCPU::init(); 119 120 // Get the send tick of the first instruction read request and schedule 121 // icacheNextEvent at that tick. 122 Tick first_icache_tick = icacheGen.init(); 123 schedule(icacheNextEvent, first_icache_tick); 124 125 // Get the send tick of the first data read/write request and schedule 126 // dcacheNextEvent at that tick. 127 Tick first_dcache_tick = dcacheGen.init(); 128 schedule(dcacheNextEvent, first_dcache_tick); 129 130 // The static counter for number of Trace CPUs is correctly set at this 131 // point so create an event and pass it. 132 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 133 numTraceCPUs); 134 // Save the first fetch request tick to dump it as tickOffset 135 firstFetchTick = first_icache_tick; 136} 137 138void 139TraceCPU::schedIcacheNext() 140{ 141 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 142 143 // Try to send the current packet or a retry packet if there is one 144 bool sched_next = icacheGen.tryNext(); 145 // If packet sent successfully, schedule next event 146 if (sched_next) { 147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 148 "at %d.\n", curTick() + icacheGen.tickDelta()); 149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 150 ++numSchedIcacheEvent; 151 } else { 152 // check if traceComplete. If not, do nothing because sending failed 153 // and next event will be scheduled via RecvRetry() 154 if (icacheGen.isTraceComplete()) { 155 // If this is the first trace to complete, set the variable. If it 156 // is already set then both traces are complete to exit sim. 157 checkAndSchedExitEvent(); 158 } 159 } 160 return; 161} 162 163void 164TraceCPU::schedDcacheNext() 165{ 166 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 167 168 dcacheGen.execute(); 169 if (dcacheGen.isExecComplete()) { 170 checkAndSchedExitEvent(); 171 } 172} 173 174void 175TraceCPU::checkAndSchedExitEvent() 176{ 177 if (!oneTraceComplete) { 178 oneTraceComplete = true; 179 } else { 180 // Schedule event to indicate execution is complete as both 181 // instruction and data access traces have been played back. 182 inform("%s: Execution complete.\n", name()); 183 184 // Record stats which are computed at the end of simulation 185 tickOffset = firstFetchTick; 186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); 187 numOps = dcacheGen.getMicroOpCount(); 188 schedule(*execCompleteEvent, curTick()); 189 } 190} 191 192void 193TraceCPU::regStats() 194{ 195 196 BaseCPU::regStats(); 197 198 numSchedDcacheEvent 199 .name(name() + ".numSchedDcacheEvent") 200 .desc("Number of events scheduled to trigger data request generator") 201 ; 202 203 numSchedIcacheEvent 204 .name(name() + ".numSchedIcacheEvent") 205 .desc("Number of events scheduled to trigger instruction request generator") 206 ; 207 208 numOps 209 .name(name() + ".numOps") 210 .desc("Number of micro-ops simulated by the Trace CPU") 211 ; 212 213 cpi 214 .name(name() + ".cpi") 215 .desc("Cycles per micro-op used as a proxy for CPI") 216 .precision(6) 217 ; 218 cpi = numCycles/numOps; 219 220 tickOffset 221 .name(name() + ".tickOffset") 222 .desc("The first execution tick for the root node of elastic traces") 223 ; 224 225 icacheGen.regStats(); 226 dcacheGen.regStats(); 227} 228 229void 230TraceCPU::ElasticDataGen::regStats() 231{ 232 using namespace Stats; 233 234 maxDependents 235 .name(name() + ".maxDependents") 236 .desc("Max number of dependents observed on a node") 237 ; 238 239 maxReadyListSize 240 .name(name() + ".maxReadyListSize") 241 .desc("Max size of the ready list observed") 242 ; 243 244 numSendAttempted 245 .name(name() + ".numSendAttempted") 246 .desc("Number of first attempts to send a request") 247 ; 248 249 numSendSucceeded 250 .name(name() + ".numSendSucceeded") 251 .desc("Number of successful first attempts") 252 ; 253 254 numSendFailed 255 .name(name() + ".numSendFailed") 256 .desc("Number of failed first attempts") 257 ; 258 259 numRetrySucceeded 260 .name(name() + ".numRetrySucceeded") 261 .desc("Number of successful retries") 262 ; 263 264 numSplitReqs 265 .name(name() + ".numSplitReqs") 266 .desc("Number of split requests") 267 ; 268 269 numSOLoads 270 .name(name() + ".numSOLoads") 271 .desc("Number of strictly ordered loads") 272 ; 273 274 numSOStores 275 .name(name() + ".numSOStores") 276 .desc("Number of strictly ordered stores") 277 ; 278 279 dataLastTick 280 .name(name() + ".dataLastTick") 281 .desc("Last tick simulated from the elastic data trace") 282 ; 283} 284 285Tick 286TraceCPU::ElasticDataGen::init() 287{ 288 DPRINTF(TraceCPUData, "Initializing data memory request generator " 289 "DcacheGen: elastic issue with retry.\n"); 290 291 if (!readNextWindow()) 292 panic("Trace has %d elements. It must have at least %d elements.\n", 293 depGraph.size(), 2 * windowSize); 294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 295 depGraph.size()); 296 297 if (!readNextWindow()) 298 panic("Trace has %d elements. It must have at least %d elements.\n", 299 depGraph.size(), 2 * windowSize); 300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 301 depGraph.size()); 302 303 // Print readyList 304 if (DTRACE(TraceCPUData)) { 305 printReadyList(); 306 } 307 auto free_itr = readyList.begin(); 308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 309 " is %d.\n", free_itr->seqNum, free_itr->execTick); 310 // Return the execute tick of the earliest ready node so that an event 311 // can be scheduled to call execute() 312 return (free_itr->execTick); 313} 314 315void 316TraceCPU::ElasticDataGen::exit() 317{ 318 trace.reset(); 319} 320 321bool 322TraceCPU::ElasticDataGen::readNextWindow() 323{ 324 325 // Read and add next window 326 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 327 328 if (traceComplete) { 329 // We are at the end of the file, thus we have no more records. 330 // Return false. 331 return false; 332 } 333 334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 335 depGraph.size()); 336 337 uint32_t num_read = 0; 338 while (num_read != windowSize) { 339 340 // Create a new graph node 341 GraphNode* new_node = new GraphNode; 342 343 // Read the next line to get the next record. If that fails then end of 344 // trace has been reached and traceComplete needs to be set in addition 345 // to returning false. 346 if (!trace.read(new_node)) { 347 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 348 traceComplete = true; 349 return false; 350 } 351 352 // Annotate the ROB dependencies of the new node onto the parent nodes. 353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 354 // Annotate the register dependencies of the new node onto the parent 355 // nodes. 356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 357 358 num_read++; 359 // Add to map 360 depGraph[new_node->seqNum] = new_node; 361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 362 // Source dependencies are already complete, check if resources 363 // are available and issue. The execution time is approximated 364 // to current time plus the computational delay. 365 checkAndIssue(new_node); 366 } 367 } 368 369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 return true; 372} 373 374template<typename T> void 375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 376 T& dep_array, uint8_t& num_dep) 377{ 378 for (auto& a_dep : dep_array) { 379 // The convention is to set the dependencies starting with the first 380 // index in the ROB and register dependency arrays. Thus, when we reach 381 // a dependency equal to the initialisation value of zero, we know have 382 // iterated over all dependencies and can break. 383 if (a_dep == 0) 384 break; 385 // We look up the valid dependency, i.e. the parent of this node 386 auto parent_itr = depGraph.find(a_dep); 387 if (parent_itr != depGraph.end()) { 388 // If the parent is found, it is yet to be executed. Append a 389 // pointer to the new node to the dependents list of the parent 390 // node. 391 parent_itr->second->dependents.push_back(new_node); 392 auto num_depts = parent_itr->second->dependents.size(); 393 maxDependents = std::max<double>(num_depts, maxDependents.value()); 394 } else { 395 // The dependency is not found in the graph. So consider 396 // the execution of the parent is complete, i.e. remove this 397 // dependency. 398 a_dep = 0; 399 num_dep--; 400 } 401 } 402} 403 404void 405TraceCPU::ElasticDataGen::execute() 406{ 407 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 410 depFreeQueue.size()); 411 hwResource.printOccupancy(); 412 413 // Read next window to make sure that dependents of all dep-free nodes 414 // are in the depGraph 415 if (nextRead) { 416 readNextWindow(); 417 nextRead = false; 418 } 419 420 // First attempt to issue the pending dependency-free nodes held 421 // in depFreeQueue. If resources have become available for a node, 422 // then issue it, i.e. add the node to readyList. 423 while (!depFreeQueue.empty()) { 424 if (checkAndIssue(depFreeQueue.front(), false)) { 425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 426 "%lli.\n", (depFreeQueue.front())->seqNum); 427 depFreeQueue.pop(); 428 } else { 429 break; 430 } 431 } 432 // Proceed to execute from readyList 433 auto graph_itr = depGraph.begin(); 434 auto free_itr = readyList.begin(); 435 // Iterate through readyList until the next free node has its execute 436 // tick later than curTick or the end of readyList is reached 437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 438 439 // Get pointer to the node to be executed 440 graph_itr = depGraph.find(free_itr->seqNum); 441 assert(graph_itr != depGraph.end()); 442 GraphNode* node_ptr = graph_itr->second; 443 444 // If there is a retryPkt send that else execute the load 445 if (retryPkt) { 446 // The retryPkt must be the request that was created by the 447 // first node in the readyList. 448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 449 panic("Retry packet's seqence number does not match " 450 "the first node in the readyList.\n"); 451 } 452 if (port.sendTimingReq(retryPkt)) { 453 ++numRetrySucceeded; 454 retryPkt = nullptr; 455 } 456 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 457 // If there is no retryPkt, attempt to send a memory request in 458 // case of a load or store node. If the send fails, executeMemReq() 459 // returns a packet pointer, which we save in retryPkt. In case of 460 // a comp node we don't do anything and simply continue as if the 461 // execution of the comp node succedded. 462 retryPkt = executeMemReq(node_ptr); 463 } 464 // If the retryPkt or a new load/store node failed, we exit from here 465 // as a retry from cache will bring the control to execute(). The 466 // first node in readyList then, will be the failed node. 467 if (retryPkt) { 468 break; 469 } 470 471 // Proceed to remove dependencies for the successfully executed node. 472 // If it is a load which is not strictly ordered and we sent a 473 // request for it successfully, we do not yet mark any register 474 // dependencies complete. But as per dependency modelling we need 475 // to mark ROB dependencies of load and non load/store nodes which 476 // are based on successful sending of the load as complete. 477 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 478 // If execute succeeded mark its dependents as complete 479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 480 "dependents..\n", node_ptr->seqNum); 481 482 auto child_itr = (node_ptr->dependents).begin(); 483 while (child_itr != (node_ptr->dependents).end()) { 484 // ROB dependency of a store on a load must not be removed 485 // after load is sent but after response is received 486 if (!(*child_itr)->isStore() && 487 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 488 489 // Check if the child node has become dependency free 490 if ((*child_itr)->numRobDep == 0 && 491 (*child_itr)->numRegDep == 0) { 492 493 // Source dependencies are complete, check if 494 // resources are available and issue 495 checkAndIssue(*child_itr); 496 } 497 // Remove this child for the sent load and point to new 498 // location of the element following the erased element 499 child_itr = node_ptr->dependents.erase(child_itr); 500 } else { 501 // This child is not dependency-free, point to the next 502 // child 503 child_itr++; 504 } 505 } 506 } else { 507 // If it is a strictly ordered load mark its dependents as complete 508 // as we do not send a request for this case. If it is a store or a 509 // comp node we also mark all its dependents complete. 510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 511 " up dependents..\n", node_ptr->seqNum); 512 513 for (auto child : node_ptr->dependents) { 514 // If the child node is dependency free removeDepOnInst() 515 // returns true. 516 if (child->removeDepOnInst(node_ptr->seqNum)) { 517 // Source dependencies are complete, check if resources 518 // are available and issue 519 checkAndIssue(child); 520 } 521 } 522 } 523 524 // After executing the node, remove from readyList and delete node. 525 readyList.erase(free_itr); 526 // If it is a cacheable load which was sent, don't delete 527 // just yet. Delete it in completeMemAccess() after the 528 // response is received. If it is an strictly ordered 529 // load, it was not sent and all dependencies were simply 530 // marked complete. Thus it is safe to delete it. For 531 // stores and non load/store nodes all dependencies were 532 // marked complete so it is safe to delete it. 533 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 534 // Release all resources occupied by the completed node 535 hwResource.release(node_ptr); 536 // clear the dynamically allocated set of dependents 537 (node_ptr->dependents).clear(); 538 // delete node 539 delete node_ptr; 540 // remove from graph 541 depGraph.erase(graph_itr); 542 } 543 // Point to first node to continue to next iteration of while loop 544 free_itr = readyList.begin(); 545 } // end of while loop 546 547 // Print readyList, sizes of queues and resource status after updating 548 if (DTRACE(TraceCPUData)) { 549 printReadyList(); 550 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 553 depFreeQueue.size()); 554 hwResource.printOccupancy(); 555 } 556 557 if (retryPkt) { 558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 559 "event from the cache for seq. num %lli.\n", 560 retryPkt->req->getReqInstSeqNum()); 561 return; 562 } 563 // If the size of the dependency graph is less than the dependency window 564 // then read from the trace file to populate the graph next time we are in 565 // execute. 566 if (depGraph.size() < windowSize && !traceComplete) 567 nextRead = true; 568 569 // If cache is not blocked, schedule an event for the first execTick in 570 // readyList else retry from cache will schedule the event. If the ready 571 // list is empty then check if the next pending node has resources 572 // available to issue. If yes, then schedule an event for the next cycle. 573 if (!readyList.empty()) { 574 Tick next_event_tick = std::max(readyList.begin()->execTick, 575 curTick()); 576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 577 next_event_tick); 578 owner.schedDcacheNextEvent(next_event_tick); 579 } else if (readyList.empty() && !depFreeQueue.empty() && 580 hwResource.isAvailable(depFreeQueue.front())) { 581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 582 owner.clockEdge(Cycles(1))); 583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 584 } 585 586 // If trace is completely read, readyList is empty and depGraph is empty, 587 // set execComplete to true 588 if (depGraph.empty() && readyList.empty() && traceComplete && 589 !hwResource.awaitingResponse()) { 590 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 591 execComplete = true; 592 dataLastTick = curTick(); 593 } 594} 595 596PacketPtr 597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 598{ 599 600 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 601 "virt addr %d, pc %#x, size %d, flags %d).\n", 602 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 603 node_ptr->pc, node_ptr->size, node_ptr->flags); 604 605 // If the request is strictly ordered, do not send it. Just return nullptr 606 // as if it was succesfully sent. 607 if (node_ptr->isStrictlyOrdered()) { 608 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 609 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 610 node_ptr->seqNum); 611 return nullptr; 612 } 613 614 // Check if the request spans two cache lines as this condition triggers 615 // an assert fail in the L1 cache. If it does then truncate the size to 616 // access only until the end of that line and ignore the remainder. The 617 // stat counting this is useful to keep a check on how frequently this 618 // happens. If required the code could be revised to mimick splitting such 619 // a request into two. 620 unsigned blk_size = owner.cacheLineSize(); 621 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 622 if (!(blk_offset + node_ptr->size <= blk_size)) { 623 node_ptr->size = blk_size - blk_offset; 624 ++numSplitReqs; 625 } 626 627 // Create a request and the packet containing request 628 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 629 node_ptr->flags, masterID, node_ptr->seqNum, 630 ContextID(0), ThreadID(0)); 631 req->setPC(node_ptr->pc); 632 // If virtual address is valid, set the asid and virtual address fields 633 // of the request. 634 if (node_ptr->virtAddr != 0) { 635 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 636 node_ptr->flags, masterID, node_ptr->pc); 637 req->setPaddr(node_ptr->physAddr); 638 req->setReqInstSeqNum(node_ptr->seqNum); 639 } 640 641 PacketPtr pkt; 642 uint8_t* pkt_data = new uint8_t[req->getSize()]; 643 if (node_ptr->isLoad()) { 644 pkt = Packet::createRead(req); 645 } else { 646 pkt = Packet::createWrite(req); 647 memset(pkt_data, 0xA, req->getSize()); 648 } 649 pkt->dataDynamic(pkt_data); 650 651 // Call MasterPort method to send a timing request for this packet 652 bool success = port.sendTimingReq(pkt); 653 ++numSendAttempted; 654 655 if (!success) { 656 // If it fails, return the packet to retry when a retry is signalled by 657 // the cache 658 ++numSendFailed; 659 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 660 return pkt; 661 } else { 662 // It is succeeds, return nullptr 663 ++numSendSucceeded; 664 return nullptr; 665 } 666} 667 668bool 669TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 670{ 671 // Assert the node is dependency-free 672 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 673 674 // If this is the first attempt, print a debug message to indicate this. 675 if (first) { 676 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 677 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 678 node_ptr->robNum); 679 } 680 681 // Check if resources are available to issue the specific node 682 if (hwResource.isAvailable(node_ptr)) { 683 // If resources are free only then add to readyList 684 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 685 " to readyList, occupying resources.\n", node_ptr->seqNum); 686 // Compute the execute tick by adding the compute delay for the node 687 // and add the ready node to the ready list 688 addToSortedReadyList(node_ptr->seqNum, 689 owner.clockEdge() + node_ptr->compDelay); 690 // Account for the resources taken up by this issued node. 691 hwResource.occupy(node_ptr); 692 return true; 693 694 } else { 695 if (first) { 696 // Although dependencies are complete, resources are not available. 697 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 698 " Adding to depFreeQueue.\n", node_ptr->seqNum); 699 depFreeQueue.push(node_ptr); 700 } else { 701 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 702 "Still pending issue.\n", node_ptr->seqNum); 703 } 704 return false; 705 } 706} 707 708void 709TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 710{ 711 // Release the resources for this completed node. 712 if (pkt->isWrite()) { 713 // Consider store complete. 714 hwResource.releaseStoreBuffer(); 715 // If it is a store response then do nothing since we do not model 716 // dependencies on store completion in the trace. But if we were 717 // blocking execution due to store buffer fullness, we need to schedule 718 // an event and attempt to progress. 719 } else { 720 // If it is a load response then release the dependents waiting on it. 721 // Get pointer to the completed load 722 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 723 assert(graph_itr != depGraph.end()); 724 GraphNode* node_ptr = graph_itr->second; 725 726 // Release resources occupied by the load 727 hwResource.release(node_ptr); 728 729 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 730 " dependents..\n", node_ptr->seqNum); 731 732 for (auto child : node_ptr->dependents) { 733 if (child->removeDepOnInst(node_ptr->seqNum)) { 734 checkAndIssue(child); 735 } 736 } 737 738 // clear the dynamically allocated set of dependents 739 (node_ptr->dependents).clear(); 740 // delete node 741 delete node_ptr; 742 // remove from graph 743 depGraph.erase(graph_itr); 744 } 745 746 if (DTRACE(TraceCPUData)) { 747 printReadyList(); 748 } 749 750 // If the size of the dependency graph is less than the dependency window 751 // then read from the trace file to populate the graph next time we are in 752 // execute. 753 if (depGraph.size() < windowSize && !traceComplete) 754 nextRead = true; 755 756 // If not waiting for retry, attempt to schedule next event 757 if (!retryPkt) { 758 // We might have new dep-free nodes in the list which will have execute 759 // tick greater than or equal to curTick. But a new dep-free node might 760 // have its execute tick earlier. Therefore, attempt to reschedule. It 761 // could happen that the readyList is empty and we got here via a 762 // last remaining response. So, either the trace is complete or there 763 // are pending nodes in the depFreeQueue. The checking is done in the 764 // execute() control flow, so schedule an event to go via that flow. 765 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 766 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 767 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 768 next_event_tick); 769 owner.schedDcacheNextEvent(next_event_tick); 770 } 771} 772 773void 774TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 775 Tick exec_tick) 776{ 777 ReadyNode ready_node; 778 ready_node.seqNum = seq_num; 779 ready_node.execTick = exec_tick; 780 781 // Iterator to readyList 782 auto itr = readyList.begin(); 783 784 // If the readyList is empty, simply insert the new node at the beginning 785 // and return 786 if (itr == readyList.end()) { 787 readyList.insert(itr, ready_node); 788 maxReadyListSize = std::max<double>(readyList.size(), 789 maxReadyListSize.value()); 790 return; 791 } 792 793 // If the new node has its execution tick equal to the first node in the 794 // list then go to the next node. If the first node in the list failed 795 // to execute, its position as the first is thus maintained. 796 if (retryPkt) 797 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 798 itr++; 799 800 // Increment the iterator and compare the node pointed to by it to the new 801 // node till the position to insert the new node is found. 802 bool found = false; 803 while (!found && itr != readyList.end()) { 804 // If the execution tick of the new node is less than the node then 805 // this is the position to insert 806 if (exec_tick < itr->execTick) 807 found = true; 808 // If the execution tick of the new node is equal to the node then 809 // sort in ascending order of sequence numbers 810 else if (exec_tick == itr->execTick) { 811 // If the sequence number of the new node is less than the node 812 // then this is the position to insert 813 if (seq_num < itr->seqNum) 814 found = true; 815 // Else go to next node 816 else 817 itr++; 818 } 819 // If the execution tick of the new node is greater than the node then 820 // go to the next node 821 else 822 itr++; 823 } 824 readyList.insert(itr, ready_node); 825 // Update the stat for max size reached of the readyList 826 maxReadyListSize = std::max<double>(readyList.size(), 827 maxReadyListSize.value()); 828} 829 830void 831TraceCPU::ElasticDataGen::printReadyList() { 832 833 auto itr = readyList.begin(); 834 if (itr == readyList.end()) { 835 DPRINTF(TraceCPUData, "readyList is empty.\n"); 836 return; 837 } 838 DPRINTF(TraceCPUData, "Printing readyList:\n"); 839 while (itr != readyList.end()) { 840 auto graph_itr = depGraph.find(itr->seqNum); 841 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 842 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 843 node_ptr->typeToStr(), itr->execTick); 844 itr++; 845 } 846} 847 848TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 849 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 850 : sizeROB(max_rob), 851 sizeStoreBuffer(max_stores), 852 sizeLoadBuffer(max_loads), 853 oldestInFlightRobNum(UINT64_MAX), 854 numInFlightLoads(0), 855 numInFlightStores(0) 856{} 857 858void 859TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 860{ 861 // Occupy ROB entry for the issued node 862 // Merely maintain the oldest node, i.e. numerically least robNum by saving 863 // it in the variable oldestInFLightRobNum. 864 inFlightNodes[new_node->seqNum] = new_node->robNum; 865 oldestInFlightRobNum = inFlightNodes.begin()->second; 866 867 // Occupy Load/Store Buffer entry for the issued node if applicable 868 if (new_node->isLoad()) { 869 ++numInFlightLoads; 870 } else if (new_node->isStore()) { 871 ++numInFlightStores; 872 } // else if it is a non load/store node, no buffer entry is occupied 873 874 printOccupancy(); 875} 876 877void 878TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 879{ 880 assert(!inFlightNodes.empty()); 881 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 882 done_node->seqNum); 883 884 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 885 inFlightNodes.erase(done_node->seqNum); 886 887 if (inFlightNodes.empty()) { 888 // If we delete the only in-flight node and then the 889 // oldestInFlightRobNum is set to it's initialized (max) value. 890 oldestInFlightRobNum = UINT64_MAX; 891 } else { 892 // Set the oldest in-flight node rob number equal to the first node in 893 // the inFlightNodes since that will have the numerically least value. 894 oldestInFlightRobNum = inFlightNodes.begin()->second; 895 } 896 897 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 898 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 899 oldestInFlightRobNum); 900 901 // A store is considered complete when a request is sent, thus ROB entry is 902 // freed. But it occupies an entry in the Store Buffer until its response 903 // is received. A load is considered complete when a response is received, 904 // thus both ROB and Load Buffer entries can be released. 905 if (done_node->isLoad()) { 906 assert(numInFlightLoads != 0); 907 --numInFlightLoads; 908 } 909 // For normal writes, we send the requests out and clear a store buffer 910 // entry on response. For writes which are strictly ordered, for e.g. 911 // writes to device registers, we do that within release() which is called 912 // when node is executed and taken off from readyList. 913 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 914 releaseStoreBuffer(); 915 } 916} 917 918void 919TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 920{ 921 assert(numInFlightStores != 0); 922 --numInFlightStores; 923} 924 925bool 926TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 927 const GraphNode* new_node) const 928{ 929 uint16_t num_in_flight_nodes; 930 if (inFlightNodes.empty()) { 931 num_in_flight_nodes = 0; 932 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 933 " #in-flight nodes = 0", new_node->seqNum); 934 } else if (new_node->robNum > oldestInFlightRobNum) { 935 // This is the intuitive case where new dep-free node is younger 936 // instruction than the oldest instruction in-flight. Thus we make sure 937 // in_flight_nodes does not overflow. 938 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 939 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 940 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 941 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 942 } else { 943 // This is the case where an instruction older than the oldest in- 944 // flight instruction becomes dep-free. Thus we must have already 945 // accounted for the entry in ROB for this new dep-free node. 946 // Immediately after this check returns true, oldestInFlightRobNum will 947 // be updated in occupy(). We simply let this node issue now. 948 num_in_flight_nodes = 0; 949 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 950 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 951 new_node->seqNum, new_node->robNum); 952 } 953 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 954 numInFlightLoads, sizeLoadBuffer, 955 numInFlightStores, sizeStoreBuffer); 956 // Check if resources are available to issue the specific node 957 if (num_in_flight_nodes >= sizeROB) { 958 return false; 959 } 960 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 961 return false; 962 } 963 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 964 return false; 965 } 966 return true; 967} 968 969bool 970TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 971 // Return true if there is at least one read or write request in flight 972 return (numInFlightStores != 0 || numInFlightLoads != 0); 973} 974 975void 976TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 977 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 978 "LQ = %d/%d, SQ = %d/%d.\n", 979 oldestInFlightRobNum, 980 numInFlightLoads, sizeLoadBuffer, 981 numInFlightStores, sizeStoreBuffer); 982} 983 984void 985TraceCPU::FixedRetryGen::regStats() 986{ 987 using namespace Stats; 988 989 numSendAttempted 990 .name(name() + ".numSendAttempted") 991 .desc("Number of first attempts to send a request") 992 ; 993 994 numSendSucceeded 995 .name(name() + ".numSendSucceeded") 996 .desc("Number of successful first attempts") 997 ; 998 999 numSendFailed 1000 .name(name() + ".numSendFailed") 1001 .desc("Number of failed first attempts") 1002 ; 1003 1004 numRetrySucceeded 1005 .name(name() + ".numRetrySucceeded") 1006 .desc("Number of successful retries") 1007 ; 1008 1009 instLastTick 1010 .name(name() + ".instLastTick") 1011 .desc("Last tick simulated from the fixed inst trace") 1012 ; 1013} 1014 1015Tick 1016TraceCPU::FixedRetryGen::init() 1017{ 1018 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1019 " IcacheGen: fixed issue with retry.\n"); 1020 1021 if (nextExecute()) { 1022 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1023 return currElement.tick; 1024 } else { 1025 panic("Read of first message in the trace failed.\n"); 1026 return MaxTick; 1027 } 1028} 1029 1030bool 1031TraceCPU::FixedRetryGen::tryNext() 1032{ 1033 // If there is a retry packet, try to send it 1034 if (retryPkt) { 1035 1036 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1037 1038 if (!port.sendTimingReq(retryPkt)) { 1039 // Still blocked! This should never occur. 1040 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1041 return false; 1042 } 1043 ++numRetrySucceeded; 1044 } else { 1045 1046 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1047 1048 // try sending current element 1049 assert(currElement.isValid()); 1050 1051 ++numSendAttempted; 1052 1053 if (!send(currElement.addr, currElement.blocksize, 1054 currElement.cmd, currElement.flags, currElement.pc)) { 1055 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1056 ++numSendFailed; 1057 // return false to indicate not to schedule next event 1058 return false; 1059 } else { 1060 ++numSendSucceeded; 1061 } 1062 } 1063 // If packet was sent successfully, either retryPkt or currElement, return 1064 // true to indicate to schedule event at current Tick plus delta. If packet 1065 // was sent successfully and there is no next packet to send, return false. 1066 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1067 "element.\n"); 1068 retryPkt = nullptr; 1069 // Read next element into currElement, currElement gets cleared so save the 1070 // tick to calculate delta 1071 Tick last_tick = currElement.tick; 1072 if (nextExecute()) { 1073 assert(currElement.tick >= last_tick); 1074 delta = currElement.tick - last_tick; 1075 } 1076 return !traceComplete; 1077} 1078 1079void 1080TraceCPU::FixedRetryGen::exit() 1081{ 1082 trace.reset(); 1083} 1084 1085bool 1086TraceCPU::FixedRetryGen::nextExecute() 1087{ 1088 if (traceComplete) 1089 // We are at the end of the file, thus we have no more messages. 1090 // Return false. 1091 return false; 1092 1093 1094 //Reset the currElement to the default values 1095 currElement.clear(); 1096 1097 // Read the next line to get the next message. If that fails then end of 1098 // trace has been reached and traceComplete needs to be set in addition 1099 // to returning false. If successful then next message is in currElement. 1100 if (!trace.read(&currElement)) { 1101 traceComplete = true; 1102 instLastTick = curTick(); 1103 return false; 1104 } 1105 1106 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1107 currElement.cmd.isRead() ? 'r' : 'w', 1108 currElement.addr, 1109 currElement.pc, 1110 currElement.blocksize, 1111 currElement.tick); 1112 1113 return true; 1114} 1115 1116bool 1117TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1118 Request::FlagsType flags, Addr pc) 1119{ 1120 1121 // Create new request 1122 Request* req = new Request(addr, size, flags, masterID); 1123 req->setPC(pc); 1124 1125 // If this is not done it triggers assert in L1 cache for invalid contextId 1126 req->setThreadContext(ContextID(0), ThreadID(0)); 1127 1128 // Embed it in a packet 1129 PacketPtr pkt = new Packet(req, cmd); 1130 1131 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1132 pkt->dataDynamic(pkt_data); 1133 1134 if (cmd.isWrite()) { 1135 memset(pkt_data, 0xA, req->getSize()); 1136 } 1137 1138 // Call MasterPort method to send a timing request for this packet 1139 bool success = port.sendTimingReq(pkt); 1140 if (!success) { 1141 // If it fails, save the packet to retry when a retry is signalled by 1142 // the cache 1143 retryPkt = pkt; 1144 } 1145 return success; 1146} 1147 1148void 1149TraceCPU::icacheRetryRecvd() 1150{ 1151 // Schedule an event to go through the control flow in the same tick as 1152 // retry is received 1153 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1154 " event @%lli.\n", curTick()); 1155 schedule(icacheNextEvent, curTick()); 1156} 1157 1158void 1159TraceCPU::dcacheRetryRecvd() 1160{ 1161 // Schedule an event to go through the execute flow in the same tick as 1162 // retry is received 1163 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1164 " event @%lli.\n", curTick()); 1165 schedule(dcacheNextEvent, curTick()); 1166} 1167 1168void 1169TraceCPU::schedDcacheNextEvent(Tick when) 1170{ 1171 if (!dcacheNextEvent.scheduled()) { 1172 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1173 when); 1174 schedule(dcacheNextEvent, when); 1175 ++numSchedDcacheEvent; 1176 } else if (when < dcacheNextEvent.when()) { 1177 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1178 " to %lli.\n", dcacheNextEvent.when(), when); 1179 reschedule(dcacheNextEvent, when); 1180 } 1181 1182} 1183 1184bool 1185TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1186{ 1187 // All responses on the instruction fetch side are ignored. Simply delete 1188 // the request and packet to free allocated memory 1189 delete pkt->req; 1190 delete pkt; 1191 1192 return true; 1193} 1194 1195void 1196TraceCPU::IcachePort::recvReqRetry() 1197{ 1198 owner->icacheRetryRecvd(); 1199} 1200 1201void 1202TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1203{ 1204 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1205 dcacheGen.completeMemAccess(pkt); 1206} 1207 1208bool 1209TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1210{ 1211 // Handle the responses for data memory requests which is done inside the 1212 // elastic data generator 1213 owner->dcacheRecvTimingResp(pkt); 1214 // After processing the response delete the request and packet to free 1215 // memory 1216 delete pkt->req; 1217 delete pkt; 1218 1219 return true; 1220} 1221 1222void 1223TraceCPU::DcachePort::recvReqRetry() 1224{ 1225 owner->dcacheRetryRecvd(); 1226} 1227 1228TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename) 1229 : trace(filename), 1230 microOpCount(0) 1231{ 1232 // Create a protobuf message for the header and read it from the stream 1233 ProtoMessage::InstDepRecordHeader header_msg; 1234 if (!trace.read(header_msg)) { 1235 panic("Failed to read packet header from %s\n", filename); 1236 1237 if (header_msg.tick_freq() != SimClock::Frequency) { 1238 panic("Trace %s was recorded with a different tick frequency %d\n", 1239 header_msg.tick_freq()); 1240 } 1241 } else { 1242 // Assign window size equal to the field in the trace that was recorded 1243 // when the data dependency trace was captured in the o3cpu model 1244 windowSize = header_msg.window_size(); 1245 } 1246} 1247 1248void 1249TraceCPU::ElasticDataGen::InputStream::reset() 1250{ 1251 trace.reset(); 1252} 1253 1254bool 1255TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1256{ 1257 ProtoMessage::InstDepRecord pkt_msg; 1258 if (trace.read(pkt_msg)) { 1259 // Required fields 1260 element->seqNum = pkt_msg.seq_num(); 1261 element->type = pkt_msg.type(); 1262 element->compDelay = pkt_msg.comp_delay(); 1263 1264 // Repeated field robDepList 1265 element->clearRobDep(); 1266 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1267 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1268 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1269 element->numRobDep += 1; 1270 } 1271 1272 // Repeated field 1273 element->clearRegDep(); 1274 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1275 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1276 // There is a possibility that an instruction has both, a register 1277 // and order dependency on an instruction. In such a case, the 1278 // register dependency is omitted 1279 bool duplicate = false; 1280 for (int j = 0; j < element->numRobDep; j++) { 1281 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1282 } 1283 if (!duplicate) { 1284 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1285 element->numRegDep += 1; 1286 } 1287 } 1288 1289 // Optional fields 1290 if (pkt_msg.has_p_addr()) 1291 element->physAddr = pkt_msg.p_addr(); 1292 else 1293 element->physAddr = 0; 1294 1295 if (pkt_msg.has_v_addr()) 1296 element->virtAddr = pkt_msg.v_addr(); 1297 else 1298 element->virtAddr = 0; 1299 1300 if (pkt_msg.has_asid()) 1301 element->asid = pkt_msg.asid(); 1302 else 1303 element->asid = 0; 1304 1305 if (pkt_msg.has_size()) 1306 element->size = pkt_msg.size(); 1307 else 1308 element->size = 0; 1309 1310 if (pkt_msg.has_flags()) 1311 element->flags = pkt_msg.flags(); 1312 else 1313 element->flags = 0; 1314 1315 if (pkt_msg.has_pc()) 1316 element->pc = pkt_msg.pc(); 1317 else 1318 element->pc = 0; 1319 1320 // ROB occupancy number 1321 ++microOpCount; 1322 if (pkt_msg.has_weight()) { 1323 microOpCount += pkt_msg.weight(); 1324 } 1325 element->robNum = microOpCount; 1326 return true; 1327 } 1328 1329 // We have reached the end of the file 1330 return false; 1331} 1332 1333bool 1334TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1335{ 1336 for (auto& own_reg_dep : regDep) { 1337 if (own_reg_dep == reg_dep) { 1338 // If register dependency is found, make it zero and return true 1339 own_reg_dep = 0; 1340 --numRegDep; 1341 assert(numRegDep >= 0); 1342 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1343 "done.\n", seqNum, reg_dep); 1344 return true; 1345 } 1346 } 1347 1348 // Return false if the dependency is not found 1349 return false; 1350} 1351 1352bool 1353TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1354{ 1355 for (auto& own_rob_dep : robDep) { 1356 if (own_rob_dep == rob_dep) { 1357 // If the rob dependency is found, make it zero and return true 1358 own_rob_dep = 0; 1359 --numRobDep; 1360 assert(numRobDep >= 0); 1361 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1362 "done.\n", seqNum, rob_dep); 1363 return true; 1364 } 1365 } 1366 return false; 1367} 1368 1369void 1370TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1371 for (auto& own_reg_dep : regDep) { 1372 own_reg_dep = 0; 1373 } 1374 numRegDep = 0; 1375} 1376 1377void 1378TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1379 for (auto& own_rob_dep : robDep) { 1380 own_rob_dep = 0; 1381 } 1382 numRobDep = 0; 1383} 1384 1385bool 1386TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1387{ 1388 // If it is an rob dependency then remove it 1389 if (!removeRobDep(done_seq_num)) { 1390 // If it is not an rob dependency then it must be a register dependency 1391 // If the register dependency is not found, it violates an assumption 1392 // and must be caught by assert. 1393 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1394 assert(regdep_found); 1395 } 1396 // Return true if the node is dependency free 1397 return (numRobDep == 0 && numRegDep == 0); 1398} 1399 1400void 1401TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1402{ 1403 DPRINTFR(TraceCPUData, "%lli", seqNum); 1404 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1405 if (isLoad() || isStore()) { 1406 DPRINTFR(TraceCPUData, ",%i", physAddr); 1407 DPRINTFR(TraceCPUData, ",%i", size); 1408 DPRINTFR(TraceCPUData, ",%i", flags); 1409 } 1410 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1411 int i = 0; 1412 DPRINTFR(TraceCPUData, "robDep:"); 1413 while (robDep[i] != 0) { 1414 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1415 i++; 1416 } 1417 i = 0; 1418 DPRINTFR(TraceCPUData, "regDep:"); 1419 while (regDep[i] != 0) { 1420 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1421 i++; 1422 } 1423 auto child_itr = dependents.begin(); 1424 DPRINTFR(TraceCPUData, "dependents:"); 1425 while (child_itr != dependents.end()) { 1426 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1427 child_itr++; 1428 } 1429 1430 DPRINTFR(TraceCPUData, "\n"); 1431} 1432 1433std::string 1434TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1435{ 1436 return Record::RecordType_Name(type); 1437} 1438 1439TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1440 : trace(filename) 1441{ 1442 // Create a protobuf message for the header and read it from the stream 1443 ProtoMessage::PacketHeader header_msg; 1444 if (!trace.read(header_msg)) { 1445 panic("Failed to read packet header from %s\n", filename); 1446 1447 if (header_msg.tick_freq() != SimClock::Frequency) { 1448 panic("Trace %s was recorded with a different tick frequency %d\n", 1449 header_msg.tick_freq()); 1450 } 1451 } 1452} 1453 1454void 1455TraceCPU::FixedRetryGen::InputStream::reset() 1456{ 1457 trace.reset(); 1458} 1459 1460bool 1461TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1462{ 1463 ProtoMessage::Packet pkt_msg; 1464 if (trace.read(pkt_msg)) { 1465 element->cmd = pkt_msg.cmd(); 1466 element->addr = pkt_msg.addr(); 1467 element->blocksize = pkt_msg.size(); 1468 element->tick = pkt_msg.tick(); 1469 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1470 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1471 return true; 1472 } 1473 1474 // We have reached the end of the file 1475 return false; 1476} 1477