1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(this, "inst")), 54 dataMasterID(params->system->getMasterId(this, "data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent([this]{ schedIcacheNext(); }, name()), 61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()), 62 oneTraceComplete(false), 63 traceOffset(0), 64 execCompleteEvent(nullptr), 65 enableEarlyExit(params->enableEarlyExit), 66 progressMsgInterval(params->progressMsgInterval), 67 progressMsgThreshold(params->progressMsgInterval) 68{ 69 // Increment static counter for number of Trace CPUs. 70 ++TraceCPU::numTraceCPUs; 71 72 // Check that the python parameters for sizes of ROB, store buffer and 73 // load buffer do not overflow the corresponding C++ variables. 74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 75 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 77 "exceeds the max. value of %d.\n", params->sizeROB, 78 UINT16_MAX); 79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 80 " %d exceeds the max. value of %d.\n", 81 params->sizeLoadBuffer, UINT16_MAX); 82} 83 84TraceCPU::~TraceCPU() 85{ 86 87} 88 89TraceCPU* 90TraceCPUParams::create() 91{ 92 return new TraceCPU(this); 93} 94 95void 96TraceCPU::updateNumOps(uint64_t rob_num) 97{ 98 numOps = rob_num; 99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) { 100 inform("%s: %i insts committed\n", name(), progressMsgThreshold); 101 progressMsgThreshold += progressMsgInterval; 102 } 103} 104 105void 106TraceCPU::takeOverFrom(BaseCPU *oldCPU) 107{ 108 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 109 getInstPort().takeOverFrom(&oldCPU->getInstPort()); 110 getDataPort().takeOverFrom(&oldCPU->getDataPort()); 111} 112 113void 114TraceCPU::init() 115{ 116 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 117 "\n", instTraceFile); 118 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 119 dataTraceFile); 120 121 BaseCPU::init(); 122 123 // Get the send tick of the first instruction read request 124 Tick first_icache_tick = icacheGen.init(); 125 126 // Get the send tick of the first data read/write request 127 Tick first_dcache_tick = dcacheGen.init(); 128 129 // Set the trace offset as the minimum of that in both traces 130 traceOffset = std::min(first_icache_tick, first_dcache_tick); 131 inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 132 name(), traceOffset); 133 134 // Schedule next icache and dcache event by subtracting the offset 135 schedule(icacheNextEvent, first_icache_tick - traceOffset); 136 schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 137 138 // Adjust the trace offset for the dcache generator's ready nodes 139 // We don't need to do this for the icache generator as it will 140 // send its first request at the first event and schedule subsequent 141 // events using a relative tick delta 142 dcacheGen.adjustInitTraceOffset(traceOffset); 143 144 // If the Trace CPU simulation is configured to exit on any one trace 145 // completion then we don't need a counted event to count down all Trace 146 // CPUs in the system. If not then instantiate a counted event. 147 if (!enableEarlyExit) { 148 // The static counter for number of Trace CPUs is correctly set at 149 // this point so create an event and pass it. 150 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 151 numTraceCPUs); 152 } 153 154} 155 156void 157TraceCPU::schedIcacheNext() 158{ 159 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 160 161 // Try to send the current packet or a retry packet if there is one 162 bool sched_next = icacheGen.tryNext(); 163 // If packet sent successfully, schedule next event 164 if (sched_next) { 165 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 166 "at %d.\n", curTick() + icacheGen.tickDelta()); 167 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 168 ++numSchedIcacheEvent; 169 } else { 170 // check if traceComplete. If not, do nothing because sending failed 171 // and next event will be scheduled via RecvRetry() 172 if (icacheGen.isTraceComplete()) { 173 // If this is the first trace to complete, set the variable. If it 174 // is already set then both traces are complete to exit sim. 175 checkAndSchedExitEvent(); 176 } 177 } 178 return; 179} 180 181void 182TraceCPU::schedDcacheNext() 183{ 184 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 185 186 // Update stat for numCycles 187 numCycles = clockEdge() / clockPeriod(); 188 189 dcacheGen.execute(); 190 if (dcacheGen.isExecComplete()) { 191 checkAndSchedExitEvent(); 192 } 193} 194 195void 196TraceCPU::checkAndSchedExitEvent() 197{ 198 if (!oneTraceComplete) { 199 oneTraceComplete = true; 200 } else { 201 // Schedule event to indicate execution is complete as both 202 // instruction and data access traces have been played back. 203 inform("%s: Execution complete.\n", name()); 204 // If the replay is configured to exit early, that is when any one 205 // execution is complete then exit immediately and return. Otherwise, 206 // schedule the counted exit that counts down completion of each Trace 207 // CPU. 208 if (enableEarlyExit) { 209 exitSimLoop("End of trace reached"); 210 } else { 211 schedule(*execCompleteEvent, curTick()); 212 } 213 } 214} 215 216void 217TraceCPU::regStats() 218{ 219 220 BaseCPU::regStats(); 221 222 numSchedDcacheEvent 223 .name(name() + ".numSchedDcacheEvent") 224 .desc("Number of events scheduled to trigger data request generator") 225 ; 226 227 numSchedIcacheEvent 228 .name(name() + ".numSchedIcacheEvent") 229 .desc("Number of events scheduled to trigger instruction request generator") 230 ; 231 232 numOps 233 .name(name() + ".numOps") 234 .desc("Number of micro-ops simulated by the Trace CPU") 235 ; 236 237 cpi 238 .name(name() + ".cpi") 239 .desc("Cycles per micro-op used as a proxy for CPI") 240 .precision(6) 241 ; 242 cpi = numCycles/numOps; 243 244 icacheGen.regStats(); 245 dcacheGen.regStats(); 246} 247 248void 249TraceCPU::ElasticDataGen::regStats() 250{ 251 using namespace Stats; 252 253 maxDependents 254 .name(name() + ".maxDependents") 255 .desc("Max number of dependents observed on a node") 256 ; 257 258 maxReadyListSize 259 .name(name() + ".maxReadyListSize") 260 .desc("Max size of the ready list observed") 261 ; 262 263 numSendAttempted 264 .name(name() + ".numSendAttempted") 265 .desc("Number of first attempts to send a request") 266 ; 267 268 numSendSucceeded 269 .name(name() + ".numSendSucceeded") 270 .desc("Number of successful first attempts") 271 ; 272 273 numSendFailed 274 .name(name() + ".numSendFailed") 275 .desc("Number of failed first attempts") 276 ; 277 278 numRetrySucceeded 279 .name(name() + ".numRetrySucceeded") 280 .desc("Number of successful retries") 281 ; 282 283 numSplitReqs 284 .name(name() + ".numSplitReqs") 285 .desc("Number of split requests") 286 ; 287 288 numSOLoads 289 .name(name() + ".numSOLoads") 290 .desc("Number of strictly ordered loads") 291 ; 292 293 numSOStores 294 .name(name() + ".numSOStores") 295 .desc("Number of strictly ordered stores") 296 ; 297 298 dataLastTick 299 .name(name() + ".dataLastTick") 300 .desc("Last tick simulated from the elastic data trace") 301 ; 302} 303 304Tick 305TraceCPU::ElasticDataGen::init() 306{ 307 DPRINTF(TraceCPUData, "Initializing data memory request generator " 308 "DcacheGen: elastic issue with retry.\n"); 309 310 if (!readNextWindow()) 311 panic("Trace has %d elements. It must have at least %d elements.\n", 312 depGraph.size(), 2 * windowSize); 313 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 314 depGraph.size()); 315 316 if (!readNextWindow()) 317 panic("Trace has %d elements. It must have at least %d elements.\n", 318 depGraph.size(), 2 * windowSize); 319 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 320 depGraph.size()); 321 322 // Print readyList 323 if (DTRACE(TraceCPUData)) { 324 printReadyList(); 325 } 326 auto free_itr = readyList.begin(); 327 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 328 " is %d.\n", free_itr->seqNum, free_itr->execTick); 329 // Return the execute tick of the earliest ready node so that an event 330 // can be scheduled to call execute() 331 return (free_itr->execTick); 332} 333 334void 335TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 336 for (auto& free_node : readyList) { 337 free_node.execTick -= offset; 338 } 339} 340 341void 342TraceCPU::ElasticDataGen::exit() 343{ 344 trace.reset(); 345} 346 347bool 348TraceCPU::ElasticDataGen::readNextWindow() 349{ 350 351 // Read and add next window 352 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 353 354 if (traceComplete) { 355 // We are at the end of the file, thus we have no more records. 356 // Return false. 357 return false; 358 } 359 360 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 361 depGraph.size()); 362 363 uint32_t num_read = 0; 364 while (num_read != windowSize) { 365 366 // Create a new graph node 367 GraphNode* new_node = new GraphNode; 368 369 // Read the next line to get the next record. If that fails then end of 370 // trace has been reached and traceComplete needs to be set in addition 371 // to returning false. 372 if (!trace.read(new_node)) { 373 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 374 traceComplete = true; 375 return false; 376 } 377 378 // Annotate the ROB dependencies of the new node onto the parent nodes. 379 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 380 // Annotate the register dependencies of the new node onto the parent 381 // nodes. 382 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 383 384 num_read++; 385 // Add to map 386 depGraph[new_node->seqNum] = new_node; 387 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 388 // Source dependencies are already complete, check if resources 389 // are available and issue. The execution time is approximated 390 // to current time plus the computational delay. 391 checkAndIssue(new_node); 392 } 393 } 394 395 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 396 depGraph.size()); 397 return true; 398} 399 400template<typename T> void 401TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 402 T& dep_array, uint8_t& num_dep) 403{ 404 for (auto& a_dep : dep_array) { 405 // The convention is to set the dependencies starting with the first 406 // index in the ROB and register dependency arrays. Thus, when we reach 407 // a dependency equal to the initialisation value of zero, we know have 408 // iterated over all dependencies and can break. 409 if (a_dep == 0) 410 break; 411 // We look up the valid dependency, i.e. the parent of this node 412 auto parent_itr = depGraph.find(a_dep); 413 if (parent_itr != depGraph.end()) { 414 // If the parent is found, it is yet to be executed. Append a 415 // pointer to the new node to the dependents list of the parent 416 // node. 417 parent_itr->second->dependents.push_back(new_node); 418 auto num_depts = parent_itr->second->dependents.size(); 419 maxDependents = std::max<double>(num_depts, maxDependents.value()); 420 } else { 421 // The dependency is not found in the graph. So consider 422 // the execution of the parent is complete, i.e. remove this 423 // dependency. 424 a_dep = 0; 425 num_dep--; 426 } 427 } 428} 429 430void 431TraceCPU::ElasticDataGen::execute() 432{ 433 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 434 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 435 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 436 depFreeQueue.size()); 437 hwResource.printOccupancy(); 438 439 // Read next window to make sure that dependents of all dep-free nodes 440 // are in the depGraph 441 if (nextRead) { 442 readNextWindow(); 443 nextRead = false; 444 } 445 446 // First attempt to issue the pending dependency-free nodes held 447 // in depFreeQueue. If resources have become available for a node, 448 // then issue it, i.e. add the node to readyList. 449 while (!depFreeQueue.empty()) { 450 if (checkAndIssue(depFreeQueue.front(), false)) { 451 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 452 "%lli.\n", (depFreeQueue.front())->seqNum); 453 depFreeQueue.pop(); 454 } else { 455 break; 456 } 457 } 458 // Proceed to execute from readyList 459 auto graph_itr = depGraph.begin(); 460 auto free_itr = readyList.begin(); 461 // Iterate through readyList until the next free node has its execute 462 // tick later than curTick or the end of readyList is reached 463 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 464 465 // Get pointer to the node to be executed 466 graph_itr = depGraph.find(free_itr->seqNum); 467 assert(graph_itr != depGraph.end()); 468 GraphNode* node_ptr = graph_itr->second; 469 470 // If there is a retryPkt send that else execute the load 471 if (retryPkt) { 472 // The retryPkt must be the request that was created by the 473 // first node in the readyList. 474 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 475 panic("Retry packet's seqence number does not match " 476 "the first node in the readyList.\n"); 477 } 478 if (port.sendTimingReq(retryPkt)) { 479 ++numRetrySucceeded; 480 retryPkt = nullptr; 481 } 482 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 483 // If there is no retryPkt, attempt to send a memory request in 484 // case of a load or store node. If the send fails, executeMemReq() 485 // returns a packet pointer, which we save in retryPkt. In case of 486 // a comp node we don't do anything and simply continue as if the 487 // execution of the comp node succedded. 488 retryPkt = executeMemReq(node_ptr); 489 } 490 // If the retryPkt or a new load/store node failed, we exit from here 491 // as a retry from cache will bring the control to execute(). The 492 // first node in readyList then, will be the failed node. 493 if (retryPkt) { 494 break; 495 } 496 497 // Proceed to remove dependencies for the successfully executed node. 498 // If it is a load which is not strictly ordered and we sent a 499 // request for it successfully, we do not yet mark any register 500 // dependencies complete. But as per dependency modelling we need 501 // to mark ROB dependencies of load and non load/store nodes which 502 // are based on successful sending of the load as complete. 503 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 504 // If execute succeeded mark its dependents as complete 505 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 506 "dependents..\n", node_ptr->seqNum); 507 508 auto child_itr = (node_ptr->dependents).begin(); 509 while (child_itr != (node_ptr->dependents).end()) { 510 // ROB dependency of a store on a load must not be removed 511 // after load is sent but after response is received 512 if (!(*child_itr)->isStore() && 513 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 514 515 // Check if the child node has become dependency free 516 if ((*child_itr)->numRobDep == 0 && 517 (*child_itr)->numRegDep == 0) { 518 519 // Source dependencies are complete, check if 520 // resources are available and issue 521 checkAndIssue(*child_itr); 522 } 523 // Remove this child for the sent load and point to new 524 // location of the element following the erased element 525 child_itr = node_ptr->dependents.erase(child_itr); 526 } else { 527 // This child is not dependency-free, point to the next 528 // child 529 child_itr++; 530 } 531 } 532 } else { 533 // If it is a strictly ordered load mark its dependents as complete 534 // as we do not send a request for this case. If it is a store or a 535 // comp node we also mark all its dependents complete. 536 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 537 " up dependents..\n", node_ptr->seqNum); 538 539 for (auto child : node_ptr->dependents) { 540 // If the child node is dependency free removeDepOnInst() 541 // returns true. 542 if (child->removeDepOnInst(node_ptr->seqNum)) { 543 // Source dependencies are complete, check if resources 544 // are available and issue 545 checkAndIssue(child); 546 } 547 } 548 } 549 550 // After executing the node, remove from readyList and delete node. 551 readyList.erase(free_itr); 552 // If it is a cacheable load which was sent, don't delete 553 // just yet. Delete it in completeMemAccess() after the 554 // response is received. If it is an strictly ordered 555 // load, it was not sent and all dependencies were simply 556 // marked complete. Thus it is safe to delete it. For 557 // stores and non load/store nodes all dependencies were 558 // marked complete so it is safe to delete it. 559 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 560 // Release all resources occupied by the completed node 561 hwResource.release(node_ptr); 562 // clear the dynamically allocated set of dependents 563 (node_ptr->dependents).clear(); 564 // Update the stat for numOps simulated 565 owner.updateNumOps(node_ptr->robNum); 566 // delete node 567 delete node_ptr; 568 // remove from graph 569 depGraph.erase(graph_itr); 570 } 571 // Point to first node to continue to next iteration of while loop 572 free_itr = readyList.begin(); 573 } // end of while loop 574 575 // Print readyList, sizes of queues and resource status after updating 576 if (DTRACE(TraceCPUData)) { 577 printReadyList(); 578 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 579 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 580 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 581 depFreeQueue.size()); 582 hwResource.printOccupancy(); 583 } 584 585 if (retryPkt) { 586 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 587 "event from the cache for seq. num %lli.\n", 588 retryPkt->req->getReqInstSeqNum()); 589 return; 590 } 591 // If the size of the dependency graph is less than the dependency window 592 // then read from the trace file to populate the graph next time we are in 593 // execute. 594 if (depGraph.size() < windowSize && !traceComplete) 595 nextRead = true; 596 597 // If cache is not blocked, schedule an event for the first execTick in 598 // readyList else retry from cache will schedule the event. If the ready 599 // list is empty then check if the next pending node has resources 600 // available to issue. If yes, then schedule an event for the next cycle. 601 if (!readyList.empty()) { 602 Tick next_event_tick = std::max(readyList.begin()->execTick, 603 curTick()); 604 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 605 next_event_tick); 606 owner.schedDcacheNextEvent(next_event_tick); 607 } else if (readyList.empty() && !depFreeQueue.empty() && 608 hwResource.isAvailable(depFreeQueue.front())) { 609 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 610 owner.clockEdge(Cycles(1))); 611 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 612 } 613 614 // If trace is completely read, readyList is empty and depGraph is empty, 615 // set execComplete to true 616 if (depGraph.empty() && readyList.empty() && traceComplete && 617 !hwResource.awaitingResponse()) { 618 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 619 execComplete = true; 620 dataLastTick = curTick(); 621 } 622} 623 624PacketPtr 625TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 626{ 627 628 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 629 "virt addr %d, pc %#x, size %d, flags %d).\n", 630 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 631 node_ptr->pc, node_ptr->size, node_ptr->flags); 632 633 // If the request is strictly ordered, do not send it. Just return nullptr 634 // as if it was succesfully sent. 635 if (node_ptr->isStrictlyOrdered()) { 636 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 637 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 638 node_ptr->seqNum); 639 return nullptr; 640 } 641 642 // Check if the request spans two cache lines as this condition triggers 643 // an assert fail in the L1 cache. If it does then truncate the size to 644 // access only until the end of that line and ignore the remainder. The 645 // stat counting this is useful to keep a check on how frequently this 646 // happens. If required the code could be revised to mimick splitting such 647 // a request into two. 648 unsigned blk_size = owner.cacheLineSize(); 649 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 650 if (!(blk_offset + node_ptr->size <= blk_size)) { 651 node_ptr->size = blk_size - blk_offset; 652 ++numSplitReqs; 653 } 654 655 // Create a request and the packet containing request 656 auto req = std::make_shared<Request>( 657 node_ptr->physAddr, node_ptr->size, 658 node_ptr->flags, masterID, node_ptr->seqNum, 659 ContextID(0)); 660 661 req->setPC(node_ptr->pc); 662 // If virtual address is valid, set the asid and virtual address fields 663 // of the request. 664 if (node_ptr->virtAddr != 0) { 665 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 666 node_ptr->flags, masterID, node_ptr->pc); 667 req->setPaddr(node_ptr->physAddr); 668 req->setReqInstSeqNum(node_ptr->seqNum); 669 } 670 671 PacketPtr pkt; 672 uint8_t* pkt_data = new uint8_t[req->getSize()]; 673 if (node_ptr->isLoad()) { 674 pkt = Packet::createRead(req); 675 } else { 676 pkt = Packet::createWrite(req); 677 memset(pkt_data, 0xA, req->getSize()); 678 } 679 pkt->dataDynamic(pkt_data); 680 681 // Call MasterPort method to send a timing request for this packet 682 bool success = port.sendTimingReq(pkt); 683 ++numSendAttempted; 684 685 if (!success) { 686 // If it fails, return the packet to retry when a retry is signalled by 687 // the cache 688 ++numSendFailed; 689 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 690 return pkt; 691 } else { 692 // It is succeeds, return nullptr 693 ++numSendSucceeded; 694 return nullptr; 695 } 696} 697 698bool 699TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 700{ 701 // Assert the node is dependency-free 702 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 703 704 // If this is the first attempt, print a debug message to indicate this. 705 if (first) { 706 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 707 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 708 node_ptr->robNum); 709 } 710 711 // Check if resources are available to issue the specific node 712 if (hwResource.isAvailable(node_ptr)) { 713 // If resources are free only then add to readyList 714 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 715 " to readyList, occupying resources.\n", node_ptr->seqNum); 716 // Compute the execute tick by adding the compute delay for the node 717 // and add the ready node to the ready list 718 addToSortedReadyList(node_ptr->seqNum, 719 owner.clockEdge() + node_ptr->compDelay); 720 // Account for the resources taken up by this issued node. 721 hwResource.occupy(node_ptr); 722 return true; 723 724 } else { 725 if (first) { 726 // Although dependencies are complete, resources are not available. 727 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 728 " Adding to depFreeQueue.\n", node_ptr->seqNum); 729 depFreeQueue.push(node_ptr); 730 } else { 731 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 732 "Still pending issue.\n", node_ptr->seqNum); 733 } 734 return false; 735 } 736} 737 738void 739TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 740{ 741 // Release the resources for this completed node. 742 if (pkt->isWrite()) { 743 // Consider store complete. 744 hwResource.releaseStoreBuffer(); 745 // If it is a store response then do nothing since we do not model 746 // dependencies on store completion in the trace. But if we were 747 // blocking execution due to store buffer fullness, we need to schedule 748 // an event and attempt to progress. 749 } else { 750 // If it is a load response then release the dependents waiting on it. 751 // Get pointer to the completed load 752 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 753 assert(graph_itr != depGraph.end()); 754 GraphNode* node_ptr = graph_itr->second; 755 756 // Release resources occupied by the load 757 hwResource.release(node_ptr); 758 759 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 760 " dependents..\n", node_ptr->seqNum); 761 762 for (auto child : node_ptr->dependents) { 763 if (child->removeDepOnInst(node_ptr->seqNum)) { 764 checkAndIssue(child); 765 } 766 } 767 768 // clear the dynamically allocated set of dependents 769 (node_ptr->dependents).clear(); 770 // Update the stat for numOps completed 771 owner.updateNumOps(node_ptr->robNum); 772 // delete node 773 delete node_ptr; 774 // remove from graph 775 depGraph.erase(graph_itr); 776 } 777 778 if (DTRACE(TraceCPUData)) { 779 printReadyList(); 780 } 781 782 // If the size of the dependency graph is less than the dependency window 783 // then read from the trace file to populate the graph next time we are in 784 // execute. 785 if (depGraph.size() < windowSize && !traceComplete) 786 nextRead = true; 787 788 // If not waiting for retry, attempt to schedule next event 789 if (!retryPkt) { 790 // We might have new dep-free nodes in the list which will have execute 791 // tick greater than or equal to curTick. But a new dep-free node might 792 // have its execute tick earlier. Therefore, attempt to reschedule. It 793 // could happen that the readyList is empty and we got here via a 794 // last remaining response. So, either the trace is complete or there 795 // are pending nodes in the depFreeQueue. The checking is done in the 796 // execute() control flow, so schedule an event to go via that flow. 797 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 798 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 799 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 800 next_event_tick); 801 owner.schedDcacheNextEvent(next_event_tick); 802 } 803} 804 805void 806TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 807 Tick exec_tick) 808{ 809 ReadyNode ready_node; 810 ready_node.seqNum = seq_num; 811 ready_node.execTick = exec_tick; 812 813 // Iterator to readyList 814 auto itr = readyList.begin(); 815 816 // If the readyList is empty, simply insert the new node at the beginning 817 // and return 818 if (itr == readyList.end()) { 819 readyList.insert(itr, ready_node); 820 maxReadyListSize = std::max<double>(readyList.size(), 821 maxReadyListSize.value()); 822 return; 823 } 824 825 // If the new node has its execution tick equal to the first node in the 826 // list then go to the next node. If the first node in the list failed 827 // to execute, its position as the first is thus maintained. 828 if (retryPkt) 829 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 830 itr++; 831 832 // Increment the iterator and compare the node pointed to by it to the new 833 // node till the position to insert the new node is found. 834 bool found = false; 835 while (!found && itr != readyList.end()) { 836 // If the execution tick of the new node is less than the node then 837 // this is the position to insert 838 if (exec_tick < itr->execTick) 839 found = true; 840 // If the execution tick of the new node is equal to the node then 841 // sort in ascending order of sequence numbers 842 else if (exec_tick == itr->execTick) { 843 // If the sequence number of the new node is less than the node 844 // then this is the position to insert 845 if (seq_num < itr->seqNum) 846 found = true; 847 // Else go to next node 848 else 849 itr++; 850 } 851 // If the execution tick of the new node is greater than the node then 852 // go to the next node 853 else 854 itr++; 855 } 856 readyList.insert(itr, ready_node); 857 // Update the stat for max size reached of the readyList 858 maxReadyListSize = std::max<double>(readyList.size(), 859 maxReadyListSize.value()); 860} 861 862void 863TraceCPU::ElasticDataGen::printReadyList() { 864 865 auto itr = readyList.begin(); 866 if (itr == readyList.end()) { 867 DPRINTF(TraceCPUData, "readyList is empty.\n"); 868 return; 869 } 870 DPRINTF(TraceCPUData, "Printing readyList:\n"); 871 while (itr != readyList.end()) { 872 auto graph_itr = depGraph.find(itr->seqNum); 873 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 874 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 875 node_ptr->typeToStr(), itr->execTick); 876 itr++; 877 } 878} 879 880TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 881 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 882 : sizeROB(max_rob), 883 sizeStoreBuffer(max_stores), 884 sizeLoadBuffer(max_loads), 885 oldestInFlightRobNum(UINT64_MAX), 886 numInFlightLoads(0), 887 numInFlightStores(0) 888{} 889 890void 891TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 892{ 893 // Occupy ROB entry for the issued node 894 // Merely maintain the oldest node, i.e. numerically least robNum by saving 895 // it in the variable oldestInFLightRobNum. 896 inFlightNodes[new_node->seqNum] = new_node->robNum; 897 oldestInFlightRobNum = inFlightNodes.begin()->second; 898 899 // Occupy Load/Store Buffer entry for the issued node if applicable 900 if (new_node->isLoad()) { 901 ++numInFlightLoads; 902 } else if (new_node->isStore()) { 903 ++numInFlightStores; 904 } // else if it is a non load/store node, no buffer entry is occupied 905 906 printOccupancy(); 907} 908 909void 910TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 911{ 912 assert(!inFlightNodes.empty()); 913 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 914 done_node->seqNum); 915 916 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 917 inFlightNodes.erase(done_node->seqNum); 918 919 if (inFlightNodes.empty()) { 920 // If we delete the only in-flight node and then the 921 // oldestInFlightRobNum is set to it's initialized (max) value. 922 oldestInFlightRobNum = UINT64_MAX; 923 } else { 924 // Set the oldest in-flight node rob number equal to the first node in 925 // the inFlightNodes since that will have the numerically least value. 926 oldestInFlightRobNum = inFlightNodes.begin()->second; 927 } 928 929 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 930 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 931 oldestInFlightRobNum); 932 933 // A store is considered complete when a request is sent, thus ROB entry is 934 // freed. But it occupies an entry in the Store Buffer until its response 935 // is received. A load is considered complete when a response is received, 936 // thus both ROB and Load Buffer entries can be released. 937 if (done_node->isLoad()) { 938 assert(numInFlightLoads != 0); 939 --numInFlightLoads; 940 } 941 // For normal writes, we send the requests out and clear a store buffer 942 // entry on response. For writes which are strictly ordered, for e.g. 943 // writes to device registers, we do that within release() which is called 944 // when node is executed and taken off from readyList. 945 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 946 releaseStoreBuffer(); 947 } 948} 949 950void 951TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 952{ 953 assert(numInFlightStores != 0); 954 --numInFlightStores; 955} 956 957bool 958TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 959 const GraphNode* new_node) const 960{ 961 uint16_t num_in_flight_nodes; 962 if (inFlightNodes.empty()) { 963 num_in_flight_nodes = 0; 964 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 965 " #in-flight nodes = 0", new_node->seqNum); 966 } else if (new_node->robNum > oldestInFlightRobNum) { 967 // This is the intuitive case where new dep-free node is younger 968 // instruction than the oldest instruction in-flight. Thus we make sure 969 // in_flight_nodes does not overflow. 970 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 972 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 973 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 974 } else { 975 // This is the case where an instruction older than the oldest in- 976 // flight instruction becomes dep-free. Thus we must have already 977 // accounted for the entry in ROB for this new dep-free node. 978 // Immediately after this check returns true, oldestInFlightRobNum will 979 // be updated in occupy(). We simply let this node issue now. 980 num_in_flight_nodes = 0; 981 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 982 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 983 new_node->seqNum, new_node->robNum); 984 } 985 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 986 numInFlightLoads, sizeLoadBuffer, 987 numInFlightStores, sizeStoreBuffer); 988 // Check if resources are available to issue the specific node 989 if (num_in_flight_nodes >= sizeROB) { 990 return false; 991 } 992 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 993 return false; 994 } 995 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 996 return false; 997 } 998 return true; 999} 1000 1001bool 1002TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 1003 // Return true if there is at least one read or write request in flight 1004 return (numInFlightStores != 0 || numInFlightLoads != 0); 1005} 1006 1007void 1008TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 1009 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 1010 "LQ = %d/%d, SQ = %d/%d.\n", 1011 oldestInFlightRobNum, 1012 numInFlightLoads, sizeLoadBuffer, 1013 numInFlightStores, sizeStoreBuffer); 1014} 1015 1016void 1017TraceCPU::FixedRetryGen::regStats() 1018{ 1019 using namespace Stats; 1020 1021 numSendAttempted 1022 .name(name() + ".numSendAttempted") 1023 .desc("Number of first attempts to send a request") 1024 ; 1025 1026 numSendSucceeded 1027 .name(name() + ".numSendSucceeded") 1028 .desc("Number of successful first attempts") 1029 ; 1030 1031 numSendFailed 1032 .name(name() + ".numSendFailed") 1033 .desc("Number of failed first attempts") 1034 ; 1035 1036 numRetrySucceeded 1037 .name(name() + ".numRetrySucceeded") 1038 .desc("Number of successful retries") 1039 ; 1040 1041 instLastTick 1042 .name(name() + ".instLastTick") 1043 .desc("Last tick simulated from the fixed inst trace") 1044 ; 1045} 1046 1047Tick 1048TraceCPU::FixedRetryGen::init() 1049{ 1050 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1051 " IcacheGen: fixed issue with retry.\n"); 1052 1053 if (nextExecute()) { 1054 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1055 return currElement.tick; 1056 } else { 1057 panic("Read of first message in the trace failed.\n"); 1058 return MaxTick; 1059 } 1060} 1061 1062bool 1063TraceCPU::FixedRetryGen::tryNext() 1064{ 1065 // If there is a retry packet, try to send it 1066 if (retryPkt) { 1067 1068 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1069 1070 if (!port.sendTimingReq(retryPkt)) { 1071 // Still blocked! This should never occur. 1072 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1073 return false; 1074 } 1075 ++numRetrySucceeded; 1076 } else { 1077 1078 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1079 1080 // try sending current element 1081 assert(currElement.isValid()); 1082 1083 ++numSendAttempted; 1084 1085 if (!send(currElement.addr, currElement.blocksize, 1086 currElement.cmd, currElement.flags, currElement.pc)) { 1087 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1088 ++numSendFailed; 1089 // return false to indicate not to schedule next event 1090 return false; 1091 } else { 1092 ++numSendSucceeded; 1093 } 1094 } 1095 // If packet was sent successfully, either retryPkt or currElement, return 1096 // true to indicate to schedule event at current Tick plus delta. If packet 1097 // was sent successfully and there is no next packet to send, return false. 1098 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1099 "element.\n"); 1100 retryPkt = nullptr; 1101 // Read next element into currElement, currElement gets cleared so save the 1102 // tick to calculate delta 1103 Tick last_tick = currElement.tick; 1104 if (nextExecute()) { 1105 assert(currElement.tick >= last_tick); 1106 delta = currElement.tick - last_tick; 1107 } 1108 return !traceComplete; 1109} 1110 1111void 1112TraceCPU::FixedRetryGen::exit() 1113{ 1114 trace.reset(); 1115} 1116 1117bool 1118TraceCPU::FixedRetryGen::nextExecute() 1119{ 1120 if (traceComplete) 1121 // We are at the end of the file, thus we have no more messages. 1122 // Return false. 1123 return false; 1124 1125 1126 //Reset the currElement to the default values 1127 currElement.clear(); 1128 1129 // Read the next line to get the next message. If that fails then end of 1130 // trace has been reached and traceComplete needs to be set in addition 1131 // to returning false. If successful then next message is in currElement. 1132 if (!trace.read(&currElement)) { 1133 traceComplete = true; 1134 instLastTick = curTick(); 1135 return false; 1136 } 1137 1138 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1139 currElement.cmd.isRead() ? 'r' : 'w', 1140 currElement.addr, 1141 currElement.pc, 1142 currElement.blocksize, 1143 currElement.tick); 1144 1145 return true; 1146} 1147 1148bool 1149TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1150 Request::FlagsType flags, Addr pc) 1151{ 1152 1153 // Create new request 1154 auto req = std::make_shared<Request>(addr, size, flags, masterID); 1155 req->setPC(pc); 1156 1157 // If this is not done it triggers assert in L1 cache for invalid contextId 1158 req->setContext(ContextID(0)); 1159 1160 // Embed it in a packet 1161 PacketPtr pkt = new Packet(req, cmd); 1162 1163 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1164 pkt->dataDynamic(pkt_data); 1165 1166 if (cmd.isWrite()) { 1167 memset(pkt_data, 0xA, req->getSize()); 1168 } 1169 1170 // Call MasterPort method to send a timing request for this packet 1171 bool success = port.sendTimingReq(pkt); 1172 if (!success) { 1173 // If it fails, save the packet to retry when a retry is signalled by 1174 // the cache 1175 retryPkt = pkt; 1176 } 1177 return success; 1178} 1179 1180void 1181TraceCPU::icacheRetryRecvd() 1182{ 1183 // Schedule an event to go through the control flow in the same tick as 1184 // retry is received 1185 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1186 " event @%lli.\n", curTick()); 1187 schedule(icacheNextEvent, curTick()); 1188} 1189 1190void 1191TraceCPU::dcacheRetryRecvd() 1192{ 1193 // Schedule an event to go through the execute flow in the same tick as 1194 // retry is received 1195 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1196 " event @%lli.\n", curTick()); 1197 schedule(dcacheNextEvent, curTick()); 1198} 1199 1200void 1201TraceCPU::schedDcacheNextEvent(Tick when) 1202{ 1203 if (!dcacheNextEvent.scheduled()) { 1204 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1205 when); 1206 schedule(dcacheNextEvent, when); 1207 ++numSchedDcacheEvent; 1208 } else if (when < dcacheNextEvent.when()) { 1209 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1210 " to %lli.\n", dcacheNextEvent.when(), when); 1211 reschedule(dcacheNextEvent, when); 1212 } 1213 1214} 1215 1216bool 1217TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1218{ 1219 // All responses on the instruction fetch side are ignored. Simply delete 1220 // the packet to free allocated memory 1221 delete pkt; 1222 1223 return true; 1224} 1225 1226void 1227TraceCPU::IcachePort::recvReqRetry() 1228{ 1229 owner->icacheRetryRecvd(); 1230} 1231 1232void 1233TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1234{ 1235 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1236 dcacheGen.completeMemAccess(pkt); 1237} 1238 1239bool 1240TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1241{ 1242 // Handle the responses for data memory requests which is done inside the 1243 // elastic data generator 1244 owner->dcacheRecvTimingResp(pkt); 1245 // After processing the response delete the packet to free 1246 // memory 1247 delete pkt; 1248 1249 return true; 1250} 1251 1252void 1253TraceCPU::DcachePort::recvReqRetry() 1254{ 1255 owner->dcacheRetryRecvd(); 1256} 1257 1258TraceCPU::ElasticDataGen::InputStream::InputStream( 1259 const std::string& filename, 1260 const double time_multiplier) 1261 : trace(filename), 1262 timeMultiplier(time_multiplier), 1263 microOpCount(0) 1264{ 1265 // Create a protobuf message for the header and read it from the stream 1266 ProtoMessage::InstDepRecordHeader header_msg; 1267 if (!trace.read(header_msg)) { 1268 panic("Failed to read packet header from %s\n", filename); 1269 1270 if (header_msg.tick_freq() != SimClock::Frequency) { 1271 panic("Trace %s was recorded with a different tick frequency %d\n", 1272 header_msg.tick_freq()); 1273 } 1274 } else { 1275 // Assign window size equal to the field in the trace that was recorded 1276 // when the data dependency trace was captured in the o3cpu model 1277 windowSize = header_msg.window_size(); 1278 } 1279} 1280 1281void 1282TraceCPU::ElasticDataGen::InputStream::reset() 1283{ 1284 trace.reset(); 1285} 1286 1287bool 1288TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1289{ 1290 ProtoMessage::InstDepRecord pkt_msg; 1291 if (trace.read(pkt_msg)) { 1292 // Required fields 1293 element->seqNum = pkt_msg.seq_num(); 1294 element->type = pkt_msg.type(); 1295 // Scale the compute delay to effectively scale the Trace CPU frequency 1296 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1297 1298 // Repeated field robDepList 1299 element->clearRobDep(); 1300 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1301 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1302 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1303 element->numRobDep += 1; 1304 } 1305 1306 // Repeated field 1307 element->clearRegDep(); 1308 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1309 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1310 // There is a possibility that an instruction has both, a register 1311 // and order dependency on an instruction. In such a case, the 1312 // register dependency is omitted 1313 bool duplicate = false; 1314 for (int j = 0; j < element->numRobDep; j++) { 1315 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1316 } 1317 if (!duplicate) { 1318 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1319 element->numRegDep += 1; 1320 } 1321 } 1322 1323 // Optional fields 1324 if (pkt_msg.has_p_addr()) 1325 element->physAddr = pkt_msg.p_addr(); 1326 else 1327 element->physAddr = 0; 1328 1329 if (pkt_msg.has_v_addr()) 1330 element->virtAddr = pkt_msg.v_addr(); 1331 else 1332 element->virtAddr = 0; 1333 1334 if (pkt_msg.has_asid()) 1335 element->asid = pkt_msg.asid(); 1336 else 1337 element->asid = 0; 1338 1339 if (pkt_msg.has_size()) 1340 element->size = pkt_msg.size(); 1341 else 1342 element->size = 0; 1343 1344 if (pkt_msg.has_flags()) 1345 element->flags = pkt_msg.flags(); 1346 else 1347 element->flags = 0; 1348 1349 if (pkt_msg.has_pc()) 1350 element->pc = pkt_msg.pc(); 1351 else 1352 element->pc = 0; 1353 1354 // ROB occupancy number 1355 ++microOpCount; 1356 if (pkt_msg.has_weight()) { 1357 microOpCount += pkt_msg.weight(); 1358 } 1359 element->robNum = microOpCount; 1360 return true; 1361 } 1362 1363 // We have reached the end of the file 1364 return false; 1365} 1366 1367bool 1368TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1369{ 1370 for (auto& own_reg_dep : regDep) { 1371 if (own_reg_dep == reg_dep) { 1372 // If register dependency is found, make it zero and return true 1373 own_reg_dep = 0; 1374 assert(numRegDep > 0); 1375 --numRegDep; 1376 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1377 "done.\n", seqNum, reg_dep); 1378 return true; 1379 } 1380 } 1381 1382 // Return false if the dependency is not found 1383 return false; 1384} 1385 1386bool 1387TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1388{ 1389 for (auto& own_rob_dep : robDep) { 1390 if (own_rob_dep == rob_dep) { 1391 // If the rob dependency is found, make it zero and return true 1392 own_rob_dep = 0; 1393 assert(numRobDep > 0); 1394 --numRobDep; 1395 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1396 "done.\n", seqNum, rob_dep); 1397 return true; 1398 } 1399 } 1400 return false; 1401} 1402 1403void 1404TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1405 for (auto& own_reg_dep : regDep) { 1406 own_reg_dep = 0; 1407 } 1408 numRegDep = 0; 1409} 1410 1411void 1412TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1413 for (auto& own_rob_dep : robDep) { 1414 own_rob_dep = 0; 1415 } 1416 numRobDep = 0; 1417} 1418 1419bool 1420TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1421{ 1422 // If it is an rob dependency then remove it 1423 if (!removeRobDep(done_seq_num)) { 1424 // If it is not an rob dependency then it must be a register dependency 1425 // If the register dependency is not found, it violates an assumption 1426 // and must be caught by assert. 1427 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1428 assert(regdep_found); 1429 } 1430 // Return true if the node is dependency free 1431 return (numRobDep == 0 && numRegDep == 0); 1432} 1433 1434void 1435TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1436{ 1437 DPRINTFR(TraceCPUData, "%lli", seqNum); 1438 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1439 if (isLoad() || isStore()) { 1440 DPRINTFR(TraceCPUData, ",%i", physAddr); 1441 DPRINTFR(TraceCPUData, ",%i", size); 1442 DPRINTFR(TraceCPUData, ",%i", flags); 1443 } 1444 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1445 int i = 0; 1446 DPRINTFR(TraceCPUData, "robDep:"); 1447 while (robDep[i] != 0) { 1448 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1449 i++; 1450 } 1451 i = 0; 1452 DPRINTFR(TraceCPUData, "regDep:"); 1453 while (regDep[i] != 0) { 1454 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1455 i++; 1456 } 1457 auto child_itr = dependents.begin(); 1458 DPRINTFR(TraceCPUData, "dependents:"); 1459 while (child_itr != dependents.end()) { 1460 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1461 child_itr++; 1462 } 1463 1464 DPRINTFR(TraceCPUData, "\n"); 1465} 1466 1467std::string 1468TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1469{ 1470 return Record::RecordType_Name(type); 1471} 1472 1473TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1474 : trace(filename) 1475{ 1476 // Create a protobuf message for the header and read it from the stream 1477 ProtoMessage::PacketHeader header_msg; 1478 if (!trace.read(header_msg)) { 1479 panic("Failed to read packet header from %s\n", filename); 1480 1481 if (header_msg.tick_freq() != SimClock::Frequency) { 1482 panic("Trace %s was recorded with a different tick frequency %d\n", 1483 header_msg.tick_freq()); 1484 } 1485 } 1486} 1487 1488void 1489TraceCPU::FixedRetryGen::InputStream::reset() 1490{ 1491 trace.reset(); 1492} 1493 1494bool 1495TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1496{ 1497 ProtoMessage::Packet pkt_msg; 1498 if (trace.read(pkt_msg)) { 1499 element->cmd = pkt_msg.cmd(); 1500 element->addr = pkt_msg.addr(); 1501 element->blocksize = pkt_msg.size(); 1502 element->tick = pkt_msg.tick(); 1503 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1504 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1505 return true; 1506 } 1507 1508 // We have reached the end of the file 1509 return false; 1510} 1511