trace_cpu.cc revision 11252
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params->sizeROB, params->sizeStoreBuffer, 60 params->sizeLoadBuffer), 61 icacheNextEvent(this), 62 dcacheNextEvent(this), 63 oneTraceComplete(false), 64 firstFetchTick(0), 65 execCompleteEvent(nullptr) 66{ 67 // Increment static counter for number of Trace CPUs. 68 ++TraceCPU::numTraceCPUs; 69 70 // Check that the python parameters for sizes of ROB, store buffer and load 71 // buffer do not overflow the corresponding C++ variables. 72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 73 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 75 "exceeds the max. value of %d.\n", params->sizeROB, 76 UINT16_MAX); 77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 78 " %d exceeds the max. value of %d.\n", 79 params->sizeLoadBuffer, UINT16_MAX); 80} 81 82TraceCPU::~TraceCPU() 83{ 84 85} 86 87TraceCPU* 88TraceCPUParams::create() 89{ 90 return new TraceCPU(this); 91} 92 93void 94TraceCPU::takeOverFrom(BaseCPU *oldCPU) 95{ 96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 97 assert(!getInstPort().isConnected()); 98 assert(oldCPU->getInstPort().isConnected()); 99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 100 oldCPU->getInstPort().unbind(); 101 getInstPort().bind(inst_peer_port); 102 103 assert(!getDataPort().isConnected()); 104 assert(oldCPU->getDataPort().isConnected()); 105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 106 oldCPU->getDataPort().unbind(); 107 getDataPort().bind(data_peer_port); 108} 109 110void 111TraceCPU::init() 112{ 113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 114 "\n", instTraceFile); 115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 116 dataTraceFile); 117 118 BaseCPU::init(); 119 120 // Get the send tick of the first instruction read request and schedule 121 // icacheNextEvent at that tick. 122 Tick first_icache_tick = icacheGen.init(); 123 schedule(icacheNextEvent, first_icache_tick); 124 125 // Get the send tick of the first data read/write request and schedule 126 // dcacheNextEvent at that tick. 127 Tick first_dcache_tick = dcacheGen.init(); 128 schedule(dcacheNextEvent, first_dcache_tick); 129 130 // The static counter for number of Trace CPUs is correctly set at this 131 // point so create an event and pass it. 132 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 133 numTraceCPUs); 134 // Save the first fetch request tick to dump it as tickOffset 135 firstFetchTick = first_icache_tick; 136} 137 138void 139TraceCPU::schedIcacheNext() 140{ 141 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 142 143 // Try to send the current packet or a retry packet if there is one 144 bool sched_next = icacheGen.tryNext(); 145 // If packet sent successfully, schedule next event 146 if (sched_next) { 147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 148 "at %d.\n", curTick() + icacheGen.tickDelta()); 149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 150 ++numSchedIcacheEvent; 151 } else { 152 // check if traceComplete. If not, do nothing because sending failed 153 // and next event will be scheduled via RecvRetry() 154 if (icacheGen.isTraceComplete()) { 155 // If this is the first trace to complete, set the variable. If it 156 // is already set then both traces are complete to exit sim. 157 checkAndSchedExitEvent(); 158 } 159 } 160 return; 161} 162 163void 164TraceCPU::schedDcacheNext() 165{ 166 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 167 168 dcacheGen.execute(); 169 if (dcacheGen.isExecComplete()) { 170 checkAndSchedExitEvent(); 171 } 172} 173 174void 175TraceCPU::checkAndSchedExitEvent() 176{ 177 if (!oneTraceComplete) { 178 oneTraceComplete = true; 179 } else { 180 // Schedule event to indicate execution is complete as both 181 // instruction and data access traces have been played back. 182 inform("%s: Execution complete.\n", name()); 183 184 // Record stats which are computed at the end of simulation 185 tickOffset = firstFetchTick; 186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); 187 numOps = dcacheGen.getMicroOpCount(); 188 schedule(*execCompleteEvent, curTick()); 189 } 190} 191 192void 193TraceCPU::regStats() 194{ 195 196 BaseCPU::regStats(); 197 198 numSchedDcacheEvent 199 .name(name() + ".numSchedDcacheEvent") 200 .desc("Number of events scheduled to trigger data request generator") 201 ; 202 203 numSchedIcacheEvent 204 .name(name() + ".numSchedIcacheEvent") 205 .desc("Number of events scheduled to trigger instruction request generator") 206 ; 207 208 numOps 209 .name(name() + ".numOps") 210 .desc("Number of micro-ops simulated by the Trace CPU") 211 ; 212 213 cpi 214 .name(name() + ".cpi") 215 .desc("Cycles per micro-op used as a proxy for CPI") 216 .precision(6) 217 ; 218 cpi = numCycles/numOps; 219 220 tickOffset 221 .name(name() + ".tickOffset") 222 .desc("The first execution tick for the root node of elastic traces") 223 ; 224 225 icacheGen.regStats(); 226 dcacheGen.regStats(); 227} 228 229void 230TraceCPU::ElasticDataGen::regStats() 231{ 232 using namespace Stats; 233 234 maxDependents 235 .name(name() + ".maxDependents") 236 .desc("Max number of dependents observed on a node") 237 ; 238 239 maxReadyListSize 240 .name(name() + ".maxReadyListSize") 241 .desc("Max size of the ready list observed") 242 ; 243 244 numSendAttempted 245 .name(name() + ".numSendAttempted") 246 .desc("Number of first attempts to send a request") 247 ; 248 249 numSendSucceeded 250 .name(name() + ".numSendSucceeded") 251 .desc("Number of successful first attempts") 252 ; 253 254 numSendFailed 255 .name(name() + ".numSendFailed") 256 .desc("Number of failed first attempts") 257 ; 258 259 numRetrySucceeded 260 .name(name() + ".numRetrySucceeded") 261 .desc("Number of successful retries") 262 ; 263 264 numSplitReqs 265 .name(name() + ".numSplitReqs") 266 .desc("Number of split requests") 267 ; 268 269 numSOLoads 270 .name(name() + ".numSOLoads") 271 .desc("Number of strictly ordered loads") 272 ; 273 274 numSOStores 275 .name(name() + ".numSOStores") 276 .desc("Number of strictly ordered stores") 277 ; 278 279 dataLastTick 280 .name(name() + ".dataLastTick") 281 .desc("Last tick simulated from the elastic data trace") 282 ; 283} 284 285Tick 286TraceCPU::ElasticDataGen::init() 287{ 288 DPRINTF(TraceCPUData, "Initializing data memory request generator " 289 "DcacheGen: elastic issue with retry.\n"); 290 291 if (!readNextWindow()) 292 panic("Trace has %d elements. It must have at least %d elements.\n", 293 depGraph.size(), 2 * windowSize); 294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 295 depGraph.size()); 296 297 if (!readNextWindow()) 298 panic("Trace has %d elements. It must have at least %d elements.\n", 299 depGraph.size(), 2 * windowSize); 300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 301 depGraph.size()); 302 303 // Print readyList 304 if (DTRACE(TraceCPUData)) { 305 printReadyList(); 306 } 307 auto free_itr = readyList.begin(); 308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 309 " is %d.\n", free_itr->seqNum, free_itr->execTick); 310 // Return the execute tick of the earliest ready node so that an event 311 // can be scheduled to call execute() 312 return (free_itr->execTick); 313} 314 315void 316TraceCPU::ElasticDataGen::exit() 317{ 318 trace.reset(); 319} 320 321bool 322TraceCPU::ElasticDataGen::readNextWindow() 323{ 324 325 // Read and add next window 326 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 327 328 if (traceComplete) { 329 // We are at the end of the file, thus we have no more records. 330 // Return false. 331 return false; 332 } 333 334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 335 depGraph.size()); 336 337 uint32_t num_read = 0; 338 while (num_read != windowSize) { 339 340 // Create a new graph node 341 GraphNode* new_node = new GraphNode; 342 343 // Read the next line to get the next record. If that fails then end of 344 // trace has been reached and traceComplete needs to be set in addition 345 // to returning false. 346 if (!trace.read(new_node)) { 347 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 348 traceComplete = true; 349 return false; 350 } 351 352 // Annotate the ROB dependencies of the new node onto the parent nodes. 353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 354 // Annotate the register dependencies of the new node onto the parent 355 // nodes. 356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 357 358 num_read++; 359 // Add to map 360 depGraph[new_node->seqNum] = new_node; 361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 362 // Source dependencies are already complete, check if resources 363 // are available and issue. The execution time is approximated 364 // to current time plus the computational delay. 365 checkAndIssue(new_node); 366 } 367 } 368 369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 return true; 372} 373 374template<typename T> void 375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 376 T& dep_array, uint8_t& num_dep) 377{ 378 for (auto& a_dep : dep_array) { 379 // The convention is to set the dependencies starting with the first 380 // index in the ROB and register dependency arrays. Thus, when we reach 381 // a dependency equal to the initialisation value of zero, we know have 382 // iterated over all dependencies and can break. 383 if (a_dep == 0) 384 break; 385 // We look up the valid dependency, i.e. the parent of this node 386 auto parent_itr = depGraph.find(a_dep); 387 if (parent_itr != depGraph.end()) { 388 // If the parent is found, it is yet to be executed. Append a 389 // pointer to the new node to the dependents list of the parent 390 // node. 391 parent_itr->second->dependents.push_back(new_node); 392 auto num_depts = parent_itr->second->dependents.size(); 393 maxDependents = std::max<double>(num_depts, maxDependents.value()); 394 } else { 395 // The dependency is not found in the graph. So consider 396 // the execution of the parent is complete, i.e. remove this 397 // dependency. 398 a_dep = 0; 399 num_dep--; 400 } 401 } 402} 403 404void 405TraceCPU::ElasticDataGen::execute() 406{ 407 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 410 depFreeQueue.size()); 411 hwResource.printOccupancy(); 412 413 // Read next window to make sure that dependents of all dep-free nodes 414 // are in the depGraph 415 if (nextRead) { 416 readNextWindow(); 417 nextRead = false; 418 } 419 420 // First attempt to issue the pending dependency-free nodes held 421 // in depFreeQueue. If resources have become available for a node, 422 // then issue it, i.e. add the node to readyList. 423 while (!depFreeQueue.empty()) { 424 if (checkAndIssue(depFreeQueue.front(), false)) { 425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 426 "%lli.\n", (depFreeQueue.front())->seqNum); 427 depFreeQueue.pop(); 428 } else { 429 break; 430 } 431 } 432 // Proceed to execute from readyList 433 auto graph_itr = depGraph.begin(); 434 auto free_itr = readyList.begin(); 435 // Iterate through readyList until the next free node has its execute 436 // tick later than curTick or the end of readyList is reached 437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 438 439 // Get pointer to the node to be executed 440 graph_itr = depGraph.find(free_itr->seqNum); 441 assert(graph_itr != depGraph.end()); 442 GraphNode* node_ptr = graph_itr->second; 443 444 // If there is a retryPkt send that else execute the load 445 if (retryPkt) { 446 // The retryPkt must be the request that was created by the 447 // first node in the readyList. 448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 449 panic("Retry packet's seqence number does not match " 450 "the first node in the readyList.\n"); 451 } 452 if (port.sendTimingReq(retryPkt)) { 453 ++numRetrySucceeded; 454 retryPkt = nullptr; 455 } 456 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 457 // If there is no retryPkt, attempt to send a memory request in 458 // case of a load or store node. If the send fails, executeMemReq() 459 // returns a packet pointer, which we save in retryPkt. In case of 460 // a comp node we don't do anything and simply continue as if the 461 // execution of the comp node succedded. 462 retryPkt = executeMemReq(node_ptr); 463 } 464 // If the retryPkt or a new load/store node failed, we exit from here 465 // as a retry from cache will bring the control to execute(). The 466 // first node in readyList then, will be the failed node. 467 if (retryPkt) { 468 break; 469 } 470 471 // Proceed to remove dependencies for the successfully executed node. 472 // If it is a load which is not strictly ordered and we sent a 473 // request for it successfully, we do not yet mark any register 474 // dependencies complete. But as per dependency modelling we need 475 // to mark ROB dependencies of load and non load/store nodes which 476 // are based on successful sending of the load as complete. 477 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 478 // If execute succeeded mark its dependents as complete 479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 480 "dependents..\n", node_ptr->seqNum); 481 482 auto child_itr = (node_ptr->dependents).begin(); 483 while (child_itr != (node_ptr->dependents).end()) { 484 // ROB dependency of a store on a load must not be removed 485 // after load is sent but after response is received 486 if (!(*child_itr)->isStore() && 487 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 488 489 // Check if the child node has become dependency free 490 if ((*child_itr)->numRobDep == 0 && 491 (*child_itr)->numRegDep == 0) { 492 493 // Source dependencies are complete, check if 494 // resources are available and issue 495 checkAndIssue(*child_itr); 496 } 497 // Remove this child for the sent load and point to new 498 // location of the element following the erased element 499 child_itr = node_ptr->dependents.erase(child_itr); 500 } else { 501 // This child is not dependency-free, point to the next 502 // child 503 child_itr++; 504 } 505 } 506 } else { 507 // If it is a strictly ordered load mark its dependents as complete 508 // as we do not send a request for this case. If it is a store or a 509 // comp node we also mark all its dependents complete. 510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 511 " up dependents..\n", node_ptr->seqNum); 512 513 for (auto child : node_ptr->dependents) { 514 // If the child node is dependency free removeDepOnInst() 515 // returns true. 516 if (child->removeDepOnInst(node_ptr->seqNum)) { 517 // Source dependencies are complete, check if resources 518 // are available and issue 519 checkAndIssue(child); 520 } 521 } 522 } 523 524 // After executing the node, remove from readyList and delete node. 525 readyList.erase(free_itr); 526 // If it is a cacheable load which was sent, don't delete 527 // just yet. Delete it in completeMemAccess() after the 528 // response is received. If it is an strictly ordered 529 // load, it was not sent and all dependencies were simply 530 // marked complete. Thus it is safe to delete it. For 531 // stores and non load/store nodes all dependencies were 532 // marked complete so it is safe to delete it. 533 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 534 // Release all resources occupied by the completed node 535 hwResource.release(node_ptr); 536 // clear the dynamically allocated set of dependents 537 (node_ptr->dependents).clear(); 538 // delete node 539 delete node_ptr; 540 // remove from graph 541 depGraph.erase(graph_itr); 542 } 543 // Point to first node to continue to next iteration of while loop 544 free_itr = readyList.begin(); 545 } // end of while loop 546 547 // Print readyList, sizes of queues and resource status after updating 548 if (DTRACE(TraceCPUData)) { 549 printReadyList(); 550 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 553 depFreeQueue.size()); 554 hwResource.printOccupancy(); 555 } 556 557 if (retryPkt) { 558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 559 "event from the cache for seq. num %lli.\n", 560 retryPkt->req->getReqInstSeqNum()); 561 return; 562 } 563 // If the size of the dependency graph is less than the dependency window 564 // then read from the trace file to populate the graph next time we are in 565 // execute. 566 if (depGraph.size() < windowSize && !traceComplete) 567 nextRead = true; 568 569 // If cache is not blocked, schedule an event for the first execTick in 570 // readyList else retry from cache will schedule the event. If the ready 571 // list is empty then check if the next pending node has resources 572 // available to issue. If yes, then schedule an event for the next cycle. 573 if (!readyList.empty()) { 574 Tick next_event_tick = std::max(readyList.begin()->execTick, 575 curTick()); 576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 577 next_event_tick); 578 owner.schedDcacheNextEvent(next_event_tick); 579 } else if (readyList.empty() && !depFreeQueue.empty() && 580 hwResource.isAvailable(depFreeQueue.front())) { 581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 582 owner.clockEdge(Cycles(1))); 583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 584 } 585 586 // If trace is completely read, readyList is empty and depGraph is empty, 587 // set execComplete to true 588 if (depGraph.empty() && readyList.empty() && traceComplete && 589 !hwResource.awaitingResponse()) { 590 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 591 execComplete = true; 592 dataLastTick = curTick(); 593 } 594} 595 596PacketPtr 597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 598{ 599 600 DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, " 601 "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr, 602 node_ptr->pc, node_ptr->size, node_ptr->flags); 603 604 // If the request is strictly ordered, do not send it. Just return nullptr 605 // as if it was succesfully sent. 606 if (node_ptr->isStrictlyOrdered()) { 607 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 609 node_ptr->seqNum); 610 return nullptr; 611 } 612 613 // Check if the request spans two cache lines as this condition triggers 614 // an assert fail in the L1 cache. If it does then truncate the size to 615 // access only until the end of that line and ignore the remainder. The 616 // stat counting this is useful to keep a check on how frequently this 617 // happens. If required the code could be revised to mimick splitting such 618 // a request into two. 619 unsigned blk_size = owner.cacheLineSize(); 620 Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1)); 621 if (!(blk_offset + node_ptr->size <= blk_size)) { 622 node_ptr->size = blk_size - blk_offset; 623 ++numSplitReqs; 624 } 625 626 // Create a request and the packet containing request 627 Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags, 628 masterID, node_ptr->seqNum, 629 ContextID(0), ThreadID(0)); 630 req->setPC(node_ptr->pc); 631 PacketPtr pkt; 632 uint8_t* pkt_data = new uint8_t[req->getSize()]; 633 if (node_ptr->isLoad()) { 634 pkt = Packet::createRead(req); 635 } else { 636 pkt = Packet::createWrite(req); 637 memset(pkt_data, 0xA, req->getSize()); 638 } 639 pkt->dataDynamic(pkt_data); 640 641 // Call MasterPort method to send a timing request for this packet 642 bool success = port.sendTimingReq(pkt); 643 ++numSendAttempted; 644 645 if (!success) { 646 // If it fails, return the packet to retry when a retry is signalled by 647 // the cache 648 ++numSendFailed; 649 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 650 return pkt; 651 } else { 652 // It is succeeds, return nullptr 653 ++numSendSucceeded; 654 return nullptr; 655 } 656} 657 658bool 659TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 660{ 661 // Assert the node is dependency-free 662 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 663 664 // If this is the first attempt, print a debug message to indicate this. 665 if (first) { 666 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 667 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 668 node_ptr->robNum); 669 } 670 671 // Check if resources are available to issue the specific node 672 if (hwResource.isAvailable(node_ptr)) { 673 // If resources are free only then add to readyList 674 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 675 " to readyList, occupying resources.\n", node_ptr->seqNum); 676 // Compute the execute tick by adding the compute delay for the node 677 // and add the ready node to the ready list 678 addToSortedReadyList(node_ptr->seqNum, 679 owner.clockEdge() + node_ptr->compDelay); 680 // Account for the resources taken up by this issued node. 681 hwResource.occupy(node_ptr); 682 return true; 683 684 } else { 685 if (first) { 686 // Although dependencies are complete, resources are not available. 687 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 688 " Adding to depFreeQueue.\n", node_ptr->seqNum); 689 depFreeQueue.push(node_ptr); 690 } else { 691 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 692 "Still pending issue.\n", node_ptr->seqNum); 693 } 694 return false; 695 } 696} 697 698void 699TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 700{ 701 // Release the resources for this completed node. 702 if (pkt->isWrite()) { 703 // Consider store complete. 704 hwResource.releaseStoreBuffer(); 705 // If it is a store response then do nothing since we do not model 706 // dependencies on store completion in the trace. But if we were 707 // blocking execution due to store buffer fullness, we need to schedule 708 // an event and attempt to progress. 709 } else { 710 // If it is a load response then release the dependents waiting on it. 711 // Get pointer to the completed load 712 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 713 assert(graph_itr != depGraph.end()); 714 GraphNode* node_ptr = graph_itr->second; 715 716 // Release resources occupied by the load 717 hwResource.release(node_ptr); 718 719 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 720 " dependents..\n", node_ptr->seqNum); 721 722 for (auto child : node_ptr->dependents) { 723 if (child->removeDepOnInst(node_ptr->seqNum)) { 724 checkAndIssue(child); 725 } 726 } 727 728 // clear the dynamically allocated set of dependents 729 (node_ptr->dependents).clear(); 730 // delete node 731 delete node_ptr; 732 // remove from graph 733 depGraph.erase(graph_itr); 734 } 735 736 if (DTRACE(TraceCPUData)) { 737 printReadyList(); 738 } 739 740 // If the size of the dependency graph is less than the dependency window 741 // then read from the trace file to populate the graph next time we are in 742 // execute. 743 if (depGraph.size() < windowSize && !traceComplete) 744 nextRead = true; 745 746 // If not waiting for retry, attempt to schedule next event 747 if (!retryPkt) { 748 // We might have new dep-free nodes in the list which will have execute 749 // tick greater than or equal to curTick. But a new dep-free node might 750 // have its execute tick earlier. Therefore, attempt to reschedule. It 751 // could happen that the readyList is empty and we got here via a 752 // last remaining response. So, either the trace is complete or there 753 // are pending nodes in the depFreeQueue. The checking is done in the 754 // execute() control flow, so schedule an event to go via that flow. 755 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 756 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 757 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 758 next_event_tick); 759 owner.schedDcacheNextEvent(next_event_tick); 760 } 761} 762 763void 764TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 765 Tick exec_tick) 766{ 767 ReadyNode ready_node; 768 ready_node.seqNum = seq_num; 769 ready_node.execTick = exec_tick; 770 771 // Iterator to readyList 772 auto itr = readyList.begin(); 773 774 // If the readyList is empty, simply insert the new node at the beginning 775 // and return 776 if (itr == readyList.end()) { 777 readyList.insert(itr, ready_node); 778 maxReadyListSize = std::max<double>(readyList.size(), 779 maxReadyListSize.value()); 780 return; 781 } 782 783 // If the new node has its execution tick equal to the first node in the 784 // list then go to the next node. If the first node in the list failed 785 // to execute, its position as the first is thus maintained. 786 if (retryPkt) 787 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 788 itr++; 789 790 // Increment the iterator and compare the node pointed to by it to the new 791 // node till the position to insert the new node is found. 792 bool found = false; 793 while (!found && itr != readyList.end()) { 794 // If the execution tick of the new node is less than the node then 795 // this is the position to insert 796 if (exec_tick < itr->execTick) 797 found = true; 798 // If the execution tick of the new node is equal to the node then 799 // sort in ascending order of sequence numbers 800 else if (exec_tick == itr->execTick) { 801 // If the sequence number of the new node is less than the node 802 // then this is the position to insert 803 if (seq_num < itr->seqNum) 804 found = true; 805 // Else go to next node 806 else 807 itr++; 808 } 809 // If the execution tick of the new node is greater than the node then 810 // go to the next node 811 else 812 itr++; 813 } 814 readyList.insert(itr, ready_node); 815 // Update the stat for max size reached of the readyList 816 maxReadyListSize = std::max<double>(readyList.size(), 817 maxReadyListSize.value()); 818} 819 820void 821TraceCPU::ElasticDataGen::printReadyList() { 822 823 auto itr = readyList.begin(); 824 if (itr == readyList.end()) { 825 DPRINTF(TraceCPUData, "readyList is empty.\n"); 826 return; 827 } 828 DPRINTF(TraceCPUData, "Printing readyList:\n"); 829 while (itr != readyList.end()) { 830 auto graph_itr = depGraph.find(itr->seqNum); 831 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 832 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 833 node_ptr->typeToStr(), itr->execTick); 834 itr++; 835 } 836} 837 838TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 839 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 840 : sizeROB(max_rob), 841 sizeStoreBuffer(max_stores), 842 sizeLoadBuffer(max_loads), 843 oldestInFlightRobNum(UINT64_MAX), 844 numInFlightLoads(0), 845 numInFlightStores(0) 846{} 847 848void 849TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 850{ 851 // Occupy ROB entry for the issued node 852 // Merely maintain the oldest node, i.e. numerically least robNum by saving 853 // it in the variable oldestInFLightRobNum. 854 inFlightNodes[new_node->seqNum] = new_node->robNum; 855 oldestInFlightRobNum = inFlightNodes.begin()->second; 856 857 // Occupy Load/Store Buffer entry for the issued node if applicable 858 if (new_node->isLoad()) { 859 ++numInFlightLoads; 860 } else if (new_node->isStore()) { 861 ++numInFlightStores; 862 } // else if it is a non load/store node, no buffer entry is occupied 863 864 printOccupancy(); 865} 866 867void 868TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 869{ 870 assert(!inFlightNodes.empty()); 871 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 872 done_node->seqNum); 873 874 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 875 inFlightNodes.erase(done_node->seqNum); 876 877 if (inFlightNodes.empty()) { 878 // If we delete the only in-flight node and then the 879 // oldestInFlightRobNum is set to it's initialized (max) value. 880 oldestInFlightRobNum = UINT64_MAX; 881 } else { 882 // Set the oldest in-flight node rob number equal to the first node in 883 // the inFlightNodes since that will have the numerically least value. 884 oldestInFlightRobNum = inFlightNodes.begin()->second; 885 } 886 887 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 888 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 889 oldestInFlightRobNum); 890 891 // A store is considered complete when a request is sent, thus ROB entry is 892 // freed. But it occupies an entry in the Store Buffer until its response 893 // is received. A load is considered complete when a response is received, 894 // thus both ROB and Load Buffer entries can be released. 895 if (done_node->isLoad()) { 896 assert(numInFlightLoads != 0); 897 --numInFlightLoads; 898 } 899 // For normal writes, we send the requests out and clear a store buffer 900 // entry on response. For writes which are strictly ordered, for e.g. 901 // writes to device registers, we do that within release() which is called 902 // when node is executed and taken off from readyList. 903 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 904 releaseStoreBuffer(); 905 } 906} 907 908void 909TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 910{ 911 assert(numInFlightStores != 0); 912 --numInFlightStores; 913} 914 915bool 916TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 917 const GraphNode* new_node) const 918{ 919 uint16_t num_in_flight_nodes; 920 if (inFlightNodes.empty()) { 921 num_in_flight_nodes = 0; 922 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 923 " #in-flight nodes = 0", new_node->seqNum); 924 } else if (new_node->robNum > oldestInFlightRobNum) { 925 // This is the intuitive case where new dep-free node is younger 926 // instruction than the oldest instruction in-flight. Thus we make sure 927 // in_flight_nodes does not overflow. 928 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 929 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 930 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 931 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 932 } else { 933 // This is the case where an instruction older than the oldest in- 934 // flight instruction becomes dep-free. Thus we must have already 935 // accounted for the entry in ROB for this new dep-free node. 936 // Immediately after this check returns true, oldestInFlightRobNum will 937 // be updated in occupy(). We simply let this node issue now. 938 num_in_flight_nodes = 0; 939 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 940 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 941 new_node->seqNum, new_node->robNum); 942 } 943 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 944 numInFlightLoads, sizeLoadBuffer, 945 numInFlightStores, sizeStoreBuffer); 946 // Check if resources are available to issue the specific node 947 if (num_in_flight_nodes >= sizeROB) { 948 return false; 949 } 950 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 951 return false; 952 } 953 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 954 return false; 955 } 956 return true; 957} 958 959bool 960TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 961 // Return true if there is at least one read or write request in flight 962 return (numInFlightStores != 0 || numInFlightLoads != 0); 963} 964 965void 966TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 967 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 968 "LQ = %d/%d, SQ = %d/%d.\n", 969 oldestInFlightRobNum, 970 numInFlightLoads, sizeLoadBuffer, 971 numInFlightStores, sizeStoreBuffer); 972} 973 974void 975TraceCPU::FixedRetryGen::regStats() 976{ 977 using namespace Stats; 978 979 numSendAttempted 980 .name(name() + ".numSendAttempted") 981 .desc("Number of first attempts to send a request") 982 ; 983 984 numSendSucceeded 985 .name(name() + ".numSendSucceeded") 986 .desc("Number of successful first attempts") 987 ; 988 989 numSendFailed 990 .name(name() + ".numSendFailed") 991 .desc("Number of failed first attempts") 992 ; 993 994 numRetrySucceeded 995 .name(name() + ".numRetrySucceeded") 996 .desc("Number of successful retries") 997 ; 998 999 instLastTick 1000 .name(name() + ".instLastTick") 1001 .desc("Last tick simulated from the fixed inst trace") 1002 ; 1003} 1004 1005Tick 1006TraceCPU::FixedRetryGen::init() 1007{ 1008 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1009 " IcacheGen: fixed issue with retry.\n"); 1010 1011 if (nextExecute()) { 1012 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1013 return currElement.tick; 1014 } else { 1015 panic("Read of first message in the trace failed.\n"); 1016 return MaxTick; 1017 } 1018} 1019 1020bool 1021TraceCPU::FixedRetryGen::tryNext() 1022{ 1023 // If there is a retry packet, try to send it 1024 if (retryPkt) { 1025 1026 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1027 1028 if (!port.sendTimingReq(retryPkt)) { 1029 // Still blocked! This should never occur. 1030 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1031 return false; 1032 } 1033 ++numRetrySucceeded; 1034 } else { 1035 1036 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1037 1038 // try sending current element 1039 assert(currElement.isValid()); 1040 1041 ++numSendAttempted; 1042 1043 if (!send(currElement.addr, currElement.blocksize, 1044 currElement.cmd, currElement.flags, currElement.pc)) { 1045 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1046 ++numSendFailed; 1047 // return false to indicate not to schedule next event 1048 return false; 1049 } else { 1050 ++numSendSucceeded; 1051 } 1052 } 1053 // If packet was sent successfully, either retryPkt or currElement, return 1054 // true to indicate to schedule event at current Tick plus delta. If packet 1055 // was sent successfully and there is no next packet to send, return false. 1056 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1057 "element.\n"); 1058 retryPkt = nullptr; 1059 // Read next element into currElement, currElement gets cleared so save the 1060 // tick to calculate delta 1061 Tick last_tick = currElement.tick; 1062 if (nextExecute()) { 1063 assert(currElement.tick >= last_tick); 1064 delta = currElement.tick - last_tick; 1065 } 1066 return !traceComplete; 1067} 1068 1069void 1070TraceCPU::FixedRetryGen::exit() 1071{ 1072 trace.reset(); 1073} 1074 1075bool 1076TraceCPU::FixedRetryGen::nextExecute() 1077{ 1078 if (traceComplete) 1079 // We are at the end of the file, thus we have no more messages. 1080 // Return false. 1081 return false; 1082 1083 1084 //Reset the currElement to the default values 1085 currElement.clear(); 1086 1087 // Read the next line to get the next message. If that fails then end of 1088 // trace has been reached and traceComplete needs to be set in addition 1089 // to returning false. If successful then next message is in currElement. 1090 if (!trace.read(&currElement)) { 1091 traceComplete = true; 1092 instLastTick = curTick(); 1093 return false; 1094 } 1095 1096 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1097 currElement.cmd.isRead() ? 'r' : 'w', 1098 currElement.addr, 1099 currElement.pc, 1100 currElement.blocksize, 1101 currElement.tick); 1102 1103 return true; 1104} 1105 1106bool 1107TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1108 Request::FlagsType flags, Addr pc) 1109{ 1110 1111 // Create new request 1112 Request* req = new Request(addr, size, flags, masterID); 1113 req->setPC(pc); 1114 1115 // If this is not done it triggers assert in L1 cache for invalid contextId 1116 req->setThreadContext(ContextID(0), ThreadID(0)); 1117 1118 // Embed it in a packet 1119 PacketPtr pkt = new Packet(req, cmd); 1120 1121 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1122 pkt->dataDynamic(pkt_data); 1123 1124 if (cmd.isWrite()) { 1125 memset(pkt_data, 0xA, req->getSize()); 1126 } 1127 1128 // Call MasterPort method to send a timing request for this packet 1129 bool success = port.sendTimingReq(pkt); 1130 if (!success) { 1131 // If it fails, save the packet to retry when a retry is signalled by 1132 // the cache 1133 retryPkt = pkt; 1134 } 1135 return success; 1136} 1137 1138void 1139TraceCPU::icacheRetryRecvd() 1140{ 1141 // Schedule an event to go through the control flow in the same tick as 1142 // retry is received 1143 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1144 " event @%lli.\n", curTick()); 1145 schedule(icacheNextEvent, curTick()); 1146} 1147 1148void 1149TraceCPU::dcacheRetryRecvd() 1150{ 1151 // Schedule an event to go through the execute flow in the same tick as 1152 // retry is received 1153 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1154 " event @%lli.\n", curTick()); 1155 schedule(dcacheNextEvent, curTick()); 1156} 1157 1158void 1159TraceCPU::schedDcacheNextEvent(Tick when) 1160{ 1161 if (!dcacheNextEvent.scheduled()) { 1162 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1163 when); 1164 schedule(dcacheNextEvent, when); 1165 ++numSchedDcacheEvent; 1166 } else if (when < dcacheNextEvent.when()) { 1167 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1168 " to %lli.\n", dcacheNextEvent.when(), when); 1169 reschedule(dcacheNextEvent, when); 1170 } 1171 1172} 1173 1174bool 1175TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1176{ 1177 // All responses on the instruction fetch side are ignored. Simply delete 1178 // the request and packet to free allocated memory 1179 delete pkt->req; 1180 delete pkt; 1181 1182 return true; 1183} 1184 1185void 1186TraceCPU::IcachePort::recvReqRetry() 1187{ 1188 owner->icacheRetryRecvd(); 1189} 1190 1191void 1192TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1193{ 1194 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1195 dcacheGen.completeMemAccess(pkt); 1196} 1197 1198bool 1199TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1200{ 1201 // Handle the responses for data memory requests which is done inside the 1202 // elastic data generator 1203 owner->dcacheRecvTimingResp(pkt); 1204 // After processing the response delete the request and packet to free 1205 // memory 1206 delete pkt->req; 1207 delete pkt; 1208 1209 return true; 1210} 1211 1212void 1213TraceCPU::DcachePort::recvReqRetry() 1214{ 1215 owner->dcacheRetryRecvd(); 1216} 1217 1218TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename) 1219 : trace(filename), 1220 microOpCount(0) 1221{ 1222 // Create a protobuf message for the header and read it from the stream 1223 ProtoMessage::InstDepRecordHeader header_msg; 1224 if (!trace.read(header_msg)) { 1225 panic("Failed to read packet header from %s\n", filename); 1226 1227 if (header_msg.tick_freq() != SimClock::Frequency) { 1228 panic("Trace %s was recorded with a different tick frequency %d\n", 1229 header_msg.tick_freq()); 1230 } 1231 } else { 1232 // Assign window size equal to the field in the trace that was recorded 1233 // when the data dependency trace was captured in the o3cpu model 1234 windowSize = header_msg.window_size(); 1235 } 1236} 1237 1238void 1239TraceCPU::ElasticDataGen::InputStream::reset() 1240{ 1241 trace.reset(); 1242} 1243 1244bool 1245TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1246{ 1247 ProtoMessage::InstDepRecord pkt_msg; 1248 if (trace.read(pkt_msg)) { 1249 // Required fields 1250 element->seqNum = pkt_msg.seq_num(); 1251 element->type = pkt_msg.type(); 1252 element->compDelay = pkt_msg.comp_delay(); 1253 1254 // Repeated field robDepList 1255 element->clearRobDep(); 1256 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1257 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1258 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1259 element->numRobDep += 1; 1260 } 1261 1262 // Repeated field 1263 element->clearRegDep(); 1264 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1265 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1266 // There is a possibility that an instruction has both, a register 1267 // and order dependency on an instruction. In such a case, the 1268 // register dependency is omitted 1269 bool duplicate = false; 1270 for (int j = 0; j < element->numRobDep; j++) { 1271 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1272 } 1273 if (!duplicate) { 1274 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1275 element->numRegDep += 1; 1276 } 1277 } 1278 1279 // Optional fields 1280 if (pkt_msg.has_addr()) 1281 element->addr = pkt_msg.addr(); 1282 else 1283 element->addr = 0; 1284 1285 if (pkt_msg.has_size()) 1286 element->size = pkt_msg.size(); 1287 else 1288 element->size = 0; 1289 1290 if (pkt_msg.has_flags()) 1291 element->flags = pkt_msg.flags(); 1292 else 1293 element->flags = 0; 1294 1295 if (pkt_msg.has_pc()) 1296 element->pc = pkt_msg.pc(); 1297 else 1298 element->pc = 0; 1299 1300 // ROB occupancy number 1301 ++microOpCount; 1302 if (pkt_msg.has_weight()) { 1303 microOpCount += pkt_msg.weight(); 1304 } 1305 element->robNum = microOpCount; 1306 return true; 1307 } 1308 1309 // We have reached the end of the file 1310 return false; 1311} 1312 1313bool 1314TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1315{ 1316 for (auto& own_reg_dep : regDep) { 1317 if (own_reg_dep == reg_dep) { 1318 // If register dependency is found, make it zero and return true 1319 own_reg_dep = 0; 1320 --numRegDep; 1321 assert(numRegDep >= 0); 1322 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1323 "done.\n", seqNum, reg_dep); 1324 return true; 1325 } 1326 } 1327 1328 // Return false if the dependency is not found 1329 return false; 1330} 1331 1332bool 1333TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1334{ 1335 for (auto& own_rob_dep : robDep) { 1336 if (own_rob_dep == rob_dep) { 1337 // If the rob dependency is found, make it zero and return true 1338 own_rob_dep = 0; 1339 --numRobDep; 1340 assert(numRobDep >= 0); 1341 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1342 "done.\n", seqNum, rob_dep); 1343 return true; 1344 } 1345 } 1346 return false; 1347} 1348 1349void 1350TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1351 for (auto& own_reg_dep : regDep) { 1352 own_reg_dep = 0; 1353 } 1354 numRegDep = 0; 1355} 1356 1357void 1358TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1359 for (auto& own_rob_dep : robDep) { 1360 own_rob_dep = 0; 1361 } 1362 numRobDep = 0; 1363} 1364 1365bool 1366TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1367{ 1368 // If it is an rob dependency then remove it 1369 if (!removeRobDep(done_seq_num)) { 1370 // If it is not an rob dependency then it must be a register dependency 1371 // If the register dependency is not found, it violates an assumption 1372 // and must be caught by assert. 1373 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1374 assert(regdep_found); 1375 } 1376 // Return true if the node is dependency free 1377 return (numRobDep == 0 && numRegDep == 0); 1378} 1379 1380void 1381TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1382{ 1383 DPRINTFR(TraceCPUData, "%lli", seqNum); 1384 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1385 if (isLoad() || isStore()) { 1386 DPRINTFR(TraceCPUData, ",%i", addr); 1387 DPRINTFR(TraceCPUData, ",%i", size); 1388 DPRINTFR(TraceCPUData, ",%i", flags); 1389 } 1390 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1391 int i = 0; 1392 DPRINTFR(TraceCPUData, "robDep:"); 1393 while (robDep[i] != 0) { 1394 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1395 i++; 1396 } 1397 i = 0; 1398 DPRINTFR(TraceCPUData, "regDep:"); 1399 while (regDep[i] != 0) { 1400 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1401 i++; 1402 } 1403 auto child_itr = dependents.begin(); 1404 DPRINTFR(TraceCPUData, "dependents:"); 1405 while (child_itr != dependents.end()) { 1406 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1407 child_itr++; 1408 } 1409 1410 DPRINTFR(TraceCPUData, "\n"); 1411} 1412 1413std::string 1414TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1415{ 1416 return Record::RecordType_Name(type); 1417} 1418 1419TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1420 : trace(filename) 1421{ 1422 // Create a protobuf message for the header and read it from the stream 1423 ProtoMessage::PacketHeader header_msg; 1424 if (!trace.read(header_msg)) { 1425 panic("Failed to read packet header from %s\n", filename); 1426 1427 if (header_msg.tick_freq() != SimClock::Frequency) { 1428 panic("Trace %s was recorded with a different tick frequency %d\n", 1429 header_msg.tick_freq()); 1430 } 1431 } 1432} 1433 1434void 1435TraceCPU::FixedRetryGen::InputStream::reset() 1436{ 1437 trace.reset(); 1438} 1439 1440bool 1441TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1442{ 1443 ProtoMessage::Packet pkt_msg; 1444 if (trace.read(pkt_msg)) { 1445 element->cmd = pkt_msg.cmd(); 1446 element->addr = pkt_msg.addr(); 1447 element->blocksize = pkt_msg.size(); 1448 element->tick = pkt_msg.tick(); 1449 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1450 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1451 return true; 1452 } 1453 1454 // We have reached the end of the file 1455 return false; 1456} 1457