trace_cpu.cc revision 11632
1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent(this), 61 dcacheNextEvent(this), 62 oneTraceComplete(false), 63 traceOffset(0), 64 execCompleteEvent(nullptr) 65{ 66 // Increment static counter for number of Trace CPUs. 67 ++TraceCPU::numTraceCPUs; 68 69 // Check that the python parameters for sizes of ROB, store buffer and load 70 // buffer do not overflow the corresponding C++ variables. 71 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 72 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 73 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 74 "exceeds the max. value of %d.\n", params->sizeROB, 75 UINT16_MAX); 76 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 77 " %d exceeds the max. value of %d.\n", 78 params->sizeLoadBuffer, UINT16_MAX); 79} 80 81TraceCPU::~TraceCPU() 82{ 83 84} 85 86TraceCPU* 87TraceCPUParams::create() 88{ 89 return new TraceCPU(this); 90} 91 92void 93TraceCPU::takeOverFrom(BaseCPU *oldCPU) 94{ 95 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 96 assert(!getInstPort().isConnected()); 97 assert(oldCPU->getInstPort().isConnected()); 98 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 99 oldCPU->getInstPort().unbind(); 100 getInstPort().bind(inst_peer_port); 101 102 assert(!getDataPort().isConnected()); 103 assert(oldCPU->getDataPort().isConnected()); 104 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 105 oldCPU->getDataPort().unbind(); 106 getDataPort().bind(data_peer_port); 107} 108 109void 110TraceCPU::init() 111{ 112 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 113 "\n", instTraceFile); 114 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 115 dataTraceFile); 116 117 BaseCPU::init(); 118 119 // Get the send tick of the first instruction read request 120 Tick first_icache_tick = icacheGen.init(); 121 122 // Get the send tick of the first data read/write request 123 Tick first_dcache_tick = dcacheGen.init(); 124 125 // Set the trace offset as the minimum of that in both traces 126 traceOffset = std::min(first_icache_tick, first_dcache_tick); 127 inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 128 name(), traceOffset); 129 130 // Schedule next icache and dcache event by subtracting the offset 131 schedule(icacheNextEvent, first_icache_tick - traceOffset); 132 schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 133 134 // Adjust the trace offset for the dcache generator's ready nodes 135 // We don't need to do this for the icache generator as it will 136 // send its first request at the first event and schedule subsequent 137 // events using a relative tick delta 138 dcacheGen.adjustInitTraceOffset(traceOffset); 139 140 // The static counter for number of Trace CPUs is correctly set at this 141 // point so create an event and pass it. 142 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 143 numTraceCPUs); 144} 145 146void 147TraceCPU::schedIcacheNext() 148{ 149 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 150 151 // Try to send the current packet or a retry packet if there is one 152 bool sched_next = icacheGen.tryNext(); 153 // If packet sent successfully, schedule next event 154 if (sched_next) { 155 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 156 "at %d.\n", curTick() + icacheGen.tickDelta()); 157 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 158 ++numSchedIcacheEvent; 159 } else { 160 // check if traceComplete. If not, do nothing because sending failed 161 // and next event will be scheduled via RecvRetry() 162 if (icacheGen.isTraceComplete()) { 163 // If this is the first trace to complete, set the variable. If it 164 // is already set then both traces are complete to exit sim. 165 checkAndSchedExitEvent(); 166 } 167 } 168 return; 169} 170 171void 172TraceCPU::schedDcacheNext() 173{ 174 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 175 176 // Update stat for numCycles 177 numCycles = clockEdge() / clockPeriod(); 178 179 dcacheGen.execute(); 180 if (dcacheGen.isExecComplete()) { 181 checkAndSchedExitEvent(); 182 } 183} 184 185void 186TraceCPU::checkAndSchedExitEvent() 187{ 188 if (!oneTraceComplete) { 189 oneTraceComplete = true; 190 } else { 191 // Schedule event to indicate execution is complete as both 192 // instruction and data access traces have been played back. 193 inform("%s: Execution complete.\n", name()); 194 schedule(*execCompleteEvent, curTick()); 195 } 196} 197 198void 199TraceCPU::regStats() 200{ 201 202 BaseCPU::regStats(); 203 204 numSchedDcacheEvent 205 .name(name() + ".numSchedDcacheEvent") 206 .desc("Number of events scheduled to trigger data request generator") 207 ; 208 209 numSchedIcacheEvent 210 .name(name() + ".numSchedIcacheEvent") 211 .desc("Number of events scheduled to trigger instruction request generator") 212 ; 213 214 numOps 215 .name(name() + ".numOps") 216 .desc("Number of micro-ops simulated by the Trace CPU") 217 ; 218 219 cpi 220 .name(name() + ".cpi") 221 .desc("Cycles per micro-op used as a proxy for CPI") 222 .precision(6) 223 ; 224 cpi = numCycles/numOps; 225 226 icacheGen.regStats(); 227 dcacheGen.regStats(); 228} 229 230void 231TraceCPU::ElasticDataGen::regStats() 232{ 233 using namespace Stats; 234 235 maxDependents 236 .name(name() + ".maxDependents") 237 .desc("Max number of dependents observed on a node") 238 ; 239 240 maxReadyListSize 241 .name(name() + ".maxReadyListSize") 242 .desc("Max size of the ready list observed") 243 ; 244 245 numSendAttempted 246 .name(name() + ".numSendAttempted") 247 .desc("Number of first attempts to send a request") 248 ; 249 250 numSendSucceeded 251 .name(name() + ".numSendSucceeded") 252 .desc("Number of successful first attempts") 253 ; 254 255 numSendFailed 256 .name(name() + ".numSendFailed") 257 .desc("Number of failed first attempts") 258 ; 259 260 numRetrySucceeded 261 .name(name() + ".numRetrySucceeded") 262 .desc("Number of successful retries") 263 ; 264 265 numSplitReqs 266 .name(name() + ".numSplitReqs") 267 .desc("Number of split requests") 268 ; 269 270 numSOLoads 271 .name(name() + ".numSOLoads") 272 .desc("Number of strictly ordered loads") 273 ; 274 275 numSOStores 276 .name(name() + ".numSOStores") 277 .desc("Number of strictly ordered stores") 278 ; 279 280 dataLastTick 281 .name(name() + ".dataLastTick") 282 .desc("Last tick simulated from the elastic data trace") 283 ; 284} 285 286Tick 287TraceCPU::ElasticDataGen::init() 288{ 289 DPRINTF(TraceCPUData, "Initializing data memory request generator " 290 "DcacheGen: elastic issue with retry.\n"); 291 292 if (!readNextWindow()) 293 panic("Trace has %d elements. It must have at least %d elements.\n", 294 depGraph.size(), 2 * windowSize); 295 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 296 depGraph.size()); 297 298 if (!readNextWindow()) 299 panic("Trace has %d elements. It must have at least %d elements.\n", 300 depGraph.size(), 2 * windowSize); 301 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 302 depGraph.size()); 303 304 // Print readyList 305 if (DTRACE(TraceCPUData)) { 306 printReadyList(); 307 } 308 auto free_itr = readyList.begin(); 309 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 310 " is %d.\n", free_itr->seqNum, free_itr->execTick); 311 // Return the execute tick of the earliest ready node so that an event 312 // can be scheduled to call execute() 313 return (free_itr->execTick); 314} 315 316void 317TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 318 for (auto& free_node : readyList) { 319 free_node.execTick -= offset; 320 } 321} 322 323void 324TraceCPU::ElasticDataGen::exit() 325{ 326 trace.reset(); 327} 328 329bool 330TraceCPU::ElasticDataGen::readNextWindow() 331{ 332 333 // Read and add next window 334 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 335 336 if (traceComplete) { 337 // We are at the end of the file, thus we have no more records. 338 // Return false. 339 return false; 340 } 341 342 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 343 depGraph.size()); 344 345 uint32_t num_read = 0; 346 while (num_read != windowSize) { 347 348 // Create a new graph node 349 GraphNode* new_node = new GraphNode; 350 351 // Read the next line to get the next record. If that fails then end of 352 // trace has been reached and traceComplete needs to be set in addition 353 // to returning false. 354 if (!trace.read(new_node)) { 355 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 356 traceComplete = true; 357 return false; 358 } 359 360 // Annotate the ROB dependencies of the new node onto the parent nodes. 361 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 362 // Annotate the register dependencies of the new node onto the parent 363 // nodes. 364 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 365 366 num_read++; 367 // Add to map 368 depGraph[new_node->seqNum] = new_node; 369 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 370 // Source dependencies are already complete, check if resources 371 // are available and issue. The execution time is approximated 372 // to current time plus the computational delay. 373 checkAndIssue(new_node); 374 } 375 } 376 377 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 378 depGraph.size()); 379 return true; 380} 381 382template<typename T> void 383TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 384 T& dep_array, uint8_t& num_dep) 385{ 386 for (auto& a_dep : dep_array) { 387 // The convention is to set the dependencies starting with the first 388 // index in the ROB and register dependency arrays. Thus, when we reach 389 // a dependency equal to the initialisation value of zero, we know have 390 // iterated over all dependencies and can break. 391 if (a_dep == 0) 392 break; 393 // We look up the valid dependency, i.e. the parent of this node 394 auto parent_itr = depGraph.find(a_dep); 395 if (parent_itr != depGraph.end()) { 396 // If the parent is found, it is yet to be executed. Append a 397 // pointer to the new node to the dependents list of the parent 398 // node. 399 parent_itr->second->dependents.push_back(new_node); 400 auto num_depts = parent_itr->second->dependents.size(); 401 maxDependents = std::max<double>(num_depts, maxDependents.value()); 402 } else { 403 // The dependency is not found in the graph. So consider 404 // the execution of the parent is complete, i.e. remove this 405 // dependency. 406 a_dep = 0; 407 num_dep--; 408 } 409 } 410} 411 412void 413TraceCPU::ElasticDataGen::execute() 414{ 415 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 416 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 417 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 418 depFreeQueue.size()); 419 hwResource.printOccupancy(); 420 421 // Read next window to make sure that dependents of all dep-free nodes 422 // are in the depGraph 423 if (nextRead) { 424 readNextWindow(); 425 nextRead = false; 426 } 427 428 // First attempt to issue the pending dependency-free nodes held 429 // in depFreeQueue. If resources have become available for a node, 430 // then issue it, i.e. add the node to readyList. 431 while (!depFreeQueue.empty()) { 432 if (checkAndIssue(depFreeQueue.front(), false)) { 433 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 434 "%lli.\n", (depFreeQueue.front())->seqNum); 435 depFreeQueue.pop(); 436 } else { 437 break; 438 } 439 } 440 // Proceed to execute from readyList 441 auto graph_itr = depGraph.begin(); 442 auto free_itr = readyList.begin(); 443 // Iterate through readyList until the next free node has its execute 444 // tick later than curTick or the end of readyList is reached 445 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 446 447 // Get pointer to the node to be executed 448 graph_itr = depGraph.find(free_itr->seqNum); 449 assert(graph_itr != depGraph.end()); 450 GraphNode* node_ptr = graph_itr->second; 451 452 // If there is a retryPkt send that else execute the load 453 if (retryPkt) { 454 // The retryPkt must be the request that was created by the 455 // first node in the readyList. 456 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 457 panic("Retry packet's seqence number does not match " 458 "the first node in the readyList.\n"); 459 } 460 if (port.sendTimingReq(retryPkt)) { 461 ++numRetrySucceeded; 462 retryPkt = nullptr; 463 } 464 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 465 // If there is no retryPkt, attempt to send a memory request in 466 // case of a load or store node. If the send fails, executeMemReq() 467 // returns a packet pointer, which we save in retryPkt. In case of 468 // a comp node we don't do anything and simply continue as if the 469 // execution of the comp node succedded. 470 retryPkt = executeMemReq(node_ptr); 471 } 472 // If the retryPkt or a new load/store node failed, we exit from here 473 // as a retry from cache will bring the control to execute(). The 474 // first node in readyList then, will be the failed node. 475 if (retryPkt) { 476 break; 477 } 478 479 // Proceed to remove dependencies for the successfully executed node. 480 // If it is a load which is not strictly ordered and we sent a 481 // request for it successfully, we do not yet mark any register 482 // dependencies complete. But as per dependency modelling we need 483 // to mark ROB dependencies of load and non load/store nodes which 484 // are based on successful sending of the load as complete. 485 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 486 // If execute succeeded mark its dependents as complete 487 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 488 "dependents..\n", node_ptr->seqNum); 489 490 auto child_itr = (node_ptr->dependents).begin(); 491 while (child_itr != (node_ptr->dependents).end()) { 492 // ROB dependency of a store on a load must not be removed 493 // after load is sent but after response is received 494 if (!(*child_itr)->isStore() && 495 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 496 497 // Check if the child node has become dependency free 498 if ((*child_itr)->numRobDep == 0 && 499 (*child_itr)->numRegDep == 0) { 500 501 // Source dependencies are complete, check if 502 // resources are available and issue 503 checkAndIssue(*child_itr); 504 } 505 // Remove this child for the sent load and point to new 506 // location of the element following the erased element 507 child_itr = node_ptr->dependents.erase(child_itr); 508 } else { 509 // This child is not dependency-free, point to the next 510 // child 511 child_itr++; 512 } 513 } 514 } else { 515 // If it is a strictly ordered load mark its dependents as complete 516 // as we do not send a request for this case. If it is a store or a 517 // comp node we also mark all its dependents complete. 518 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 519 " up dependents..\n", node_ptr->seqNum); 520 521 for (auto child : node_ptr->dependents) { 522 // If the child node is dependency free removeDepOnInst() 523 // returns true. 524 if (child->removeDepOnInst(node_ptr->seqNum)) { 525 // Source dependencies are complete, check if resources 526 // are available and issue 527 checkAndIssue(child); 528 } 529 } 530 } 531 532 // After executing the node, remove from readyList and delete node. 533 readyList.erase(free_itr); 534 // If it is a cacheable load which was sent, don't delete 535 // just yet. Delete it in completeMemAccess() after the 536 // response is received. If it is an strictly ordered 537 // load, it was not sent and all dependencies were simply 538 // marked complete. Thus it is safe to delete it. For 539 // stores and non load/store nodes all dependencies were 540 // marked complete so it is safe to delete it. 541 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 542 // Release all resources occupied by the completed node 543 hwResource.release(node_ptr); 544 // clear the dynamically allocated set of dependents 545 (node_ptr->dependents).clear(); 546 // Update the stat for numOps simulated 547 owner.updateNumOps(node_ptr->robNum); 548 // delete node 549 delete node_ptr; 550 // remove from graph 551 depGraph.erase(graph_itr); 552 } 553 // Point to first node to continue to next iteration of while loop 554 free_itr = readyList.begin(); 555 } // end of while loop 556 557 // Print readyList, sizes of queues and resource status after updating 558 if (DTRACE(TraceCPUData)) { 559 printReadyList(); 560 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 561 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 562 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 563 depFreeQueue.size()); 564 hwResource.printOccupancy(); 565 } 566 567 if (retryPkt) { 568 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 569 "event from the cache for seq. num %lli.\n", 570 retryPkt->req->getReqInstSeqNum()); 571 return; 572 } 573 // If the size of the dependency graph is less than the dependency window 574 // then read from the trace file to populate the graph next time we are in 575 // execute. 576 if (depGraph.size() < windowSize && !traceComplete) 577 nextRead = true; 578 579 // If cache is not blocked, schedule an event for the first execTick in 580 // readyList else retry from cache will schedule the event. If the ready 581 // list is empty then check if the next pending node has resources 582 // available to issue. If yes, then schedule an event for the next cycle. 583 if (!readyList.empty()) { 584 Tick next_event_tick = std::max(readyList.begin()->execTick, 585 curTick()); 586 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 587 next_event_tick); 588 owner.schedDcacheNextEvent(next_event_tick); 589 } else if (readyList.empty() && !depFreeQueue.empty() && 590 hwResource.isAvailable(depFreeQueue.front())) { 591 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 592 owner.clockEdge(Cycles(1))); 593 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 594 } 595 596 // If trace is completely read, readyList is empty and depGraph is empty, 597 // set execComplete to true 598 if (depGraph.empty() && readyList.empty() && traceComplete && 599 !hwResource.awaitingResponse()) { 600 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 601 execComplete = true; 602 dataLastTick = curTick(); 603 } 604} 605 606PacketPtr 607TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 608{ 609 610 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 611 "virt addr %d, pc %#x, size %d, flags %d).\n", 612 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 613 node_ptr->pc, node_ptr->size, node_ptr->flags); 614 615 // If the request is strictly ordered, do not send it. Just return nullptr 616 // as if it was succesfully sent. 617 if (node_ptr->isStrictlyOrdered()) { 618 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 619 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 620 node_ptr->seqNum); 621 return nullptr; 622 } 623 624 // Check if the request spans two cache lines as this condition triggers 625 // an assert fail in the L1 cache. If it does then truncate the size to 626 // access only until the end of that line and ignore the remainder. The 627 // stat counting this is useful to keep a check on how frequently this 628 // happens. If required the code could be revised to mimick splitting such 629 // a request into two. 630 unsigned blk_size = owner.cacheLineSize(); 631 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 632 if (!(blk_offset + node_ptr->size <= blk_size)) { 633 node_ptr->size = blk_size - blk_offset; 634 ++numSplitReqs; 635 } 636 637 // Create a request and the packet containing request 638 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 639 node_ptr->flags, masterID, node_ptr->seqNum, 640 ContextID(0)); 641 req->setPC(node_ptr->pc); 642 // If virtual address is valid, set the asid and virtual address fields 643 // of the request. 644 if (node_ptr->virtAddr != 0) { 645 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 646 node_ptr->flags, masterID, node_ptr->pc); 647 req->setPaddr(node_ptr->physAddr); 648 req->setReqInstSeqNum(node_ptr->seqNum); 649 } 650 651 PacketPtr pkt; 652 uint8_t* pkt_data = new uint8_t[req->getSize()]; 653 if (node_ptr->isLoad()) { 654 pkt = Packet::createRead(req); 655 } else { 656 pkt = Packet::createWrite(req); 657 memset(pkt_data, 0xA, req->getSize()); 658 } 659 pkt->dataDynamic(pkt_data); 660 661 // Call MasterPort method to send a timing request for this packet 662 bool success = port.sendTimingReq(pkt); 663 ++numSendAttempted; 664 665 if (!success) { 666 // If it fails, return the packet to retry when a retry is signalled by 667 // the cache 668 ++numSendFailed; 669 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 670 return pkt; 671 } else { 672 // It is succeeds, return nullptr 673 ++numSendSucceeded; 674 return nullptr; 675 } 676} 677 678bool 679TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 680{ 681 // Assert the node is dependency-free 682 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 683 684 // If this is the first attempt, print a debug message to indicate this. 685 if (first) { 686 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 687 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 688 node_ptr->robNum); 689 } 690 691 // Check if resources are available to issue the specific node 692 if (hwResource.isAvailable(node_ptr)) { 693 // If resources are free only then add to readyList 694 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 695 " to readyList, occupying resources.\n", node_ptr->seqNum); 696 // Compute the execute tick by adding the compute delay for the node 697 // and add the ready node to the ready list 698 addToSortedReadyList(node_ptr->seqNum, 699 owner.clockEdge() + node_ptr->compDelay); 700 // Account for the resources taken up by this issued node. 701 hwResource.occupy(node_ptr); 702 return true; 703 704 } else { 705 if (first) { 706 // Although dependencies are complete, resources are not available. 707 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 708 " Adding to depFreeQueue.\n", node_ptr->seqNum); 709 depFreeQueue.push(node_ptr); 710 } else { 711 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 712 "Still pending issue.\n", node_ptr->seqNum); 713 } 714 return false; 715 } 716} 717 718void 719TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 720{ 721 // Release the resources for this completed node. 722 if (pkt->isWrite()) { 723 // Consider store complete. 724 hwResource.releaseStoreBuffer(); 725 // If it is a store response then do nothing since we do not model 726 // dependencies on store completion in the trace. But if we were 727 // blocking execution due to store buffer fullness, we need to schedule 728 // an event and attempt to progress. 729 } else { 730 // If it is a load response then release the dependents waiting on it. 731 // Get pointer to the completed load 732 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 733 assert(graph_itr != depGraph.end()); 734 GraphNode* node_ptr = graph_itr->second; 735 736 // Release resources occupied by the load 737 hwResource.release(node_ptr); 738 739 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 740 " dependents..\n", node_ptr->seqNum); 741 742 for (auto child : node_ptr->dependents) { 743 if (child->removeDepOnInst(node_ptr->seqNum)) { 744 checkAndIssue(child); 745 } 746 } 747 748 // clear the dynamically allocated set of dependents 749 (node_ptr->dependents).clear(); 750 // Update the stat for numOps completed 751 owner.updateNumOps(node_ptr->robNum); 752 // delete node 753 delete node_ptr; 754 // remove from graph 755 depGraph.erase(graph_itr); 756 } 757 758 if (DTRACE(TraceCPUData)) { 759 printReadyList(); 760 } 761 762 // If the size of the dependency graph is less than the dependency window 763 // then read from the trace file to populate the graph next time we are in 764 // execute. 765 if (depGraph.size() < windowSize && !traceComplete) 766 nextRead = true; 767 768 // If not waiting for retry, attempt to schedule next event 769 if (!retryPkt) { 770 // We might have new dep-free nodes in the list which will have execute 771 // tick greater than or equal to curTick. But a new dep-free node might 772 // have its execute tick earlier. Therefore, attempt to reschedule. It 773 // could happen that the readyList is empty and we got here via a 774 // last remaining response. So, either the trace is complete or there 775 // are pending nodes in the depFreeQueue. The checking is done in the 776 // execute() control flow, so schedule an event to go via that flow. 777 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 778 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 779 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 780 next_event_tick); 781 owner.schedDcacheNextEvent(next_event_tick); 782 } 783} 784 785void 786TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 787 Tick exec_tick) 788{ 789 ReadyNode ready_node; 790 ready_node.seqNum = seq_num; 791 ready_node.execTick = exec_tick; 792 793 // Iterator to readyList 794 auto itr = readyList.begin(); 795 796 // If the readyList is empty, simply insert the new node at the beginning 797 // and return 798 if (itr == readyList.end()) { 799 readyList.insert(itr, ready_node); 800 maxReadyListSize = std::max<double>(readyList.size(), 801 maxReadyListSize.value()); 802 return; 803 } 804 805 // If the new node has its execution tick equal to the first node in the 806 // list then go to the next node. If the first node in the list failed 807 // to execute, its position as the first is thus maintained. 808 if (retryPkt) 809 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 810 itr++; 811 812 // Increment the iterator and compare the node pointed to by it to the new 813 // node till the position to insert the new node is found. 814 bool found = false; 815 while (!found && itr != readyList.end()) { 816 // If the execution tick of the new node is less than the node then 817 // this is the position to insert 818 if (exec_tick < itr->execTick) 819 found = true; 820 // If the execution tick of the new node is equal to the node then 821 // sort in ascending order of sequence numbers 822 else if (exec_tick == itr->execTick) { 823 // If the sequence number of the new node is less than the node 824 // then this is the position to insert 825 if (seq_num < itr->seqNum) 826 found = true; 827 // Else go to next node 828 else 829 itr++; 830 } 831 // If the execution tick of the new node is greater than the node then 832 // go to the next node 833 else 834 itr++; 835 } 836 readyList.insert(itr, ready_node); 837 // Update the stat for max size reached of the readyList 838 maxReadyListSize = std::max<double>(readyList.size(), 839 maxReadyListSize.value()); 840} 841 842void 843TraceCPU::ElasticDataGen::printReadyList() { 844 845 auto itr = readyList.begin(); 846 if (itr == readyList.end()) { 847 DPRINTF(TraceCPUData, "readyList is empty.\n"); 848 return; 849 } 850 DPRINTF(TraceCPUData, "Printing readyList:\n"); 851 while (itr != readyList.end()) { 852 auto graph_itr = depGraph.find(itr->seqNum); 853 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 854 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 855 node_ptr->typeToStr(), itr->execTick); 856 itr++; 857 } 858} 859 860TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 861 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 862 : sizeROB(max_rob), 863 sizeStoreBuffer(max_stores), 864 sizeLoadBuffer(max_loads), 865 oldestInFlightRobNum(UINT64_MAX), 866 numInFlightLoads(0), 867 numInFlightStores(0) 868{} 869 870void 871TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 872{ 873 // Occupy ROB entry for the issued node 874 // Merely maintain the oldest node, i.e. numerically least robNum by saving 875 // it in the variable oldestInFLightRobNum. 876 inFlightNodes[new_node->seqNum] = new_node->robNum; 877 oldestInFlightRobNum = inFlightNodes.begin()->second; 878 879 // Occupy Load/Store Buffer entry for the issued node if applicable 880 if (new_node->isLoad()) { 881 ++numInFlightLoads; 882 } else if (new_node->isStore()) { 883 ++numInFlightStores; 884 } // else if it is a non load/store node, no buffer entry is occupied 885 886 printOccupancy(); 887} 888 889void 890TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 891{ 892 assert(!inFlightNodes.empty()); 893 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 894 done_node->seqNum); 895 896 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 897 inFlightNodes.erase(done_node->seqNum); 898 899 if (inFlightNodes.empty()) { 900 // If we delete the only in-flight node and then the 901 // oldestInFlightRobNum is set to it's initialized (max) value. 902 oldestInFlightRobNum = UINT64_MAX; 903 } else { 904 // Set the oldest in-flight node rob number equal to the first node in 905 // the inFlightNodes since that will have the numerically least value. 906 oldestInFlightRobNum = inFlightNodes.begin()->second; 907 } 908 909 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 910 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 911 oldestInFlightRobNum); 912 913 // A store is considered complete when a request is sent, thus ROB entry is 914 // freed. But it occupies an entry in the Store Buffer until its response 915 // is received. A load is considered complete when a response is received, 916 // thus both ROB and Load Buffer entries can be released. 917 if (done_node->isLoad()) { 918 assert(numInFlightLoads != 0); 919 --numInFlightLoads; 920 } 921 // For normal writes, we send the requests out and clear a store buffer 922 // entry on response. For writes which are strictly ordered, for e.g. 923 // writes to device registers, we do that within release() which is called 924 // when node is executed and taken off from readyList. 925 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 926 releaseStoreBuffer(); 927 } 928} 929 930void 931TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 932{ 933 assert(numInFlightStores != 0); 934 --numInFlightStores; 935} 936 937bool 938TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 939 const GraphNode* new_node) const 940{ 941 uint16_t num_in_flight_nodes; 942 if (inFlightNodes.empty()) { 943 num_in_flight_nodes = 0; 944 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 945 " #in-flight nodes = 0", new_node->seqNum); 946 } else if (new_node->robNum > oldestInFlightRobNum) { 947 // This is the intuitive case where new dep-free node is younger 948 // instruction than the oldest instruction in-flight. Thus we make sure 949 // in_flight_nodes does not overflow. 950 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 951 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 952 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 953 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 954 } else { 955 // This is the case where an instruction older than the oldest in- 956 // flight instruction becomes dep-free. Thus we must have already 957 // accounted for the entry in ROB for this new dep-free node. 958 // Immediately after this check returns true, oldestInFlightRobNum will 959 // be updated in occupy(). We simply let this node issue now. 960 num_in_flight_nodes = 0; 961 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 962 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 963 new_node->seqNum, new_node->robNum); 964 } 965 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 966 numInFlightLoads, sizeLoadBuffer, 967 numInFlightStores, sizeStoreBuffer); 968 // Check if resources are available to issue the specific node 969 if (num_in_flight_nodes >= sizeROB) { 970 return false; 971 } 972 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 973 return false; 974 } 975 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 976 return false; 977 } 978 return true; 979} 980 981bool 982TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 983 // Return true if there is at least one read or write request in flight 984 return (numInFlightStores != 0 || numInFlightLoads != 0); 985} 986 987void 988TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 989 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 990 "LQ = %d/%d, SQ = %d/%d.\n", 991 oldestInFlightRobNum, 992 numInFlightLoads, sizeLoadBuffer, 993 numInFlightStores, sizeStoreBuffer); 994} 995 996void 997TraceCPU::FixedRetryGen::regStats() 998{ 999 using namespace Stats; 1000 1001 numSendAttempted 1002 .name(name() + ".numSendAttempted") 1003 .desc("Number of first attempts to send a request") 1004 ; 1005 1006 numSendSucceeded 1007 .name(name() + ".numSendSucceeded") 1008 .desc("Number of successful first attempts") 1009 ; 1010 1011 numSendFailed 1012 .name(name() + ".numSendFailed") 1013 .desc("Number of failed first attempts") 1014 ; 1015 1016 numRetrySucceeded 1017 .name(name() + ".numRetrySucceeded") 1018 .desc("Number of successful retries") 1019 ; 1020 1021 instLastTick 1022 .name(name() + ".instLastTick") 1023 .desc("Last tick simulated from the fixed inst trace") 1024 ; 1025} 1026 1027Tick 1028TraceCPU::FixedRetryGen::init() 1029{ 1030 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1031 " IcacheGen: fixed issue with retry.\n"); 1032 1033 if (nextExecute()) { 1034 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1035 return currElement.tick; 1036 } else { 1037 panic("Read of first message in the trace failed.\n"); 1038 return MaxTick; 1039 } 1040} 1041 1042bool 1043TraceCPU::FixedRetryGen::tryNext() 1044{ 1045 // If there is a retry packet, try to send it 1046 if (retryPkt) { 1047 1048 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1049 1050 if (!port.sendTimingReq(retryPkt)) { 1051 // Still blocked! This should never occur. 1052 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1053 return false; 1054 } 1055 ++numRetrySucceeded; 1056 } else { 1057 1058 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1059 1060 // try sending current element 1061 assert(currElement.isValid()); 1062 1063 ++numSendAttempted; 1064 1065 if (!send(currElement.addr, currElement.blocksize, 1066 currElement.cmd, currElement.flags, currElement.pc)) { 1067 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1068 ++numSendFailed; 1069 // return false to indicate not to schedule next event 1070 return false; 1071 } else { 1072 ++numSendSucceeded; 1073 } 1074 } 1075 // If packet was sent successfully, either retryPkt or currElement, return 1076 // true to indicate to schedule event at current Tick plus delta. If packet 1077 // was sent successfully and there is no next packet to send, return false. 1078 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1079 "element.\n"); 1080 retryPkt = nullptr; 1081 // Read next element into currElement, currElement gets cleared so save the 1082 // tick to calculate delta 1083 Tick last_tick = currElement.tick; 1084 if (nextExecute()) { 1085 assert(currElement.tick >= last_tick); 1086 delta = currElement.tick - last_tick; 1087 } 1088 return !traceComplete; 1089} 1090 1091void 1092TraceCPU::FixedRetryGen::exit() 1093{ 1094 trace.reset(); 1095} 1096 1097bool 1098TraceCPU::FixedRetryGen::nextExecute() 1099{ 1100 if (traceComplete) 1101 // We are at the end of the file, thus we have no more messages. 1102 // Return false. 1103 return false; 1104 1105 1106 //Reset the currElement to the default values 1107 currElement.clear(); 1108 1109 // Read the next line to get the next message. If that fails then end of 1110 // trace has been reached and traceComplete needs to be set in addition 1111 // to returning false. If successful then next message is in currElement. 1112 if (!trace.read(&currElement)) { 1113 traceComplete = true; 1114 instLastTick = curTick(); 1115 return false; 1116 } 1117 1118 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1119 currElement.cmd.isRead() ? 'r' : 'w', 1120 currElement.addr, 1121 currElement.pc, 1122 currElement.blocksize, 1123 currElement.tick); 1124 1125 return true; 1126} 1127 1128bool 1129TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1130 Request::FlagsType flags, Addr pc) 1131{ 1132 1133 // Create new request 1134 Request* req = new Request(addr, size, flags, masterID); 1135 req->setPC(pc); 1136 1137 // If this is not done it triggers assert in L1 cache for invalid contextId 1138 req->setContext(ContextID(0)); 1139 1140 // Embed it in a packet 1141 PacketPtr pkt = new Packet(req, cmd); 1142 1143 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1144 pkt->dataDynamic(pkt_data); 1145 1146 if (cmd.isWrite()) { 1147 memset(pkt_data, 0xA, req->getSize()); 1148 } 1149 1150 // Call MasterPort method to send a timing request for this packet 1151 bool success = port.sendTimingReq(pkt); 1152 if (!success) { 1153 // If it fails, save the packet to retry when a retry is signalled by 1154 // the cache 1155 retryPkt = pkt; 1156 } 1157 return success; 1158} 1159 1160void 1161TraceCPU::icacheRetryRecvd() 1162{ 1163 // Schedule an event to go through the control flow in the same tick as 1164 // retry is received 1165 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1166 " event @%lli.\n", curTick()); 1167 schedule(icacheNextEvent, curTick()); 1168} 1169 1170void 1171TraceCPU::dcacheRetryRecvd() 1172{ 1173 // Schedule an event to go through the execute flow in the same tick as 1174 // retry is received 1175 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1176 " event @%lli.\n", curTick()); 1177 schedule(dcacheNextEvent, curTick()); 1178} 1179 1180void 1181TraceCPU::schedDcacheNextEvent(Tick when) 1182{ 1183 if (!dcacheNextEvent.scheduled()) { 1184 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1185 when); 1186 schedule(dcacheNextEvent, when); 1187 ++numSchedDcacheEvent; 1188 } else if (when < dcacheNextEvent.when()) { 1189 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1190 " to %lli.\n", dcacheNextEvent.when(), when); 1191 reschedule(dcacheNextEvent, when); 1192 } 1193 1194} 1195 1196bool 1197TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1198{ 1199 // All responses on the instruction fetch side are ignored. Simply delete 1200 // the request and packet to free allocated memory 1201 delete pkt->req; 1202 delete pkt; 1203 1204 return true; 1205} 1206 1207void 1208TraceCPU::IcachePort::recvReqRetry() 1209{ 1210 owner->icacheRetryRecvd(); 1211} 1212 1213void 1214TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1215{ 1216 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1217 dcacheGen.completeMemAccess(pkt); 1218} 1219 1220bool 1221TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1222{ 1223 // Handle the responses for data memory requests which is done inside the 1224 // elastic data generator 1225 owner->dcacheRecvTimingResp(pkt); 1226 // After processing the response delete the request and packet to free 1227 // memory 1228 delete pkt->req; 1229 delete pkt; 1230 1231 return true; 1232} 1233 1234void 1235TraceCPU::DcachePort::recvReqRetry() 1236{ 1237 owner->dcacheRetryRecvd(); 1238} 1239 1240TraceCPU::ElasticDataGen::InputStream::InputStream( 1241 const std::string& filename, 1242 const double time_multiplier) 1243 : trace(filename), 1244 timeMultiplier(time_multiplier), 1245 microOpCount(0) 1246{ 1247 // Create a protobuf message for the header and read it from the stream 1248 ProtoMessage::InstDepRecordHeader header_msg; 1249 if (!trace.read(header_msg)) { 1250 panic("Failed to read packet header from %s\n", filename); 1251 1252 if (header_msg.tick_freq() != SimClock::Frequency) { 1253 panic("Trace %s was recorded with a different tick frequency %d\n", 1254 header_msg.tick_freq()); 1255 } 1256 } else { 1257 // Assign window size equal to the field in the trace that was recorded 1258 // when the data dependency trace was captured in the o3cpu model 1259 windowSize = header_msg.window_size(); 1260 } 1261} 1262 1263void 1264TraceCPU::ElasticDataGen::InputStream::reset() 1265{ 1266 trace.reset(); 1267} 1268 1269bool 1270TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1271{ 1272 ProtoMessage::InstDepRecord pkt_msg; 1273 if (trace.read(pkt_msg)) { 1274 // Required fields 1275 element->seqNum = pkt_msg.seq_num(); 1276 element->type = pkt_msg.type(); 1277 // Scale the compute delay to effectively scale the Trace CPU frequency 1278 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1279 1280 // Repeated field robDepList 1281 element->clearRobDep(); 1282 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1283 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1284 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1285 element->numRobDep += 1; 1286 } 1287 1288 // Repeated field 1289 element->clearRegDep(); 1290 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1291 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1292 // There is a possibility that an instruction has both, a register 1293 // and order dependency on an instruction. In such a case, the 1294 // register dependency is omitted 1295 bool duplicate = false; 1296 for (int j = 0; j < element->numRobDep; j++) { 1297 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1298 } 1299 if (!duplicate) { 1300 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1301 element->numRegDep += 1; 1302 } 1303 } 1304 1305 // Optional fields 1306 if (pkt_msg.has_p_addr()) 1307 element->physAddr = pkt_msg.p_addr(); 1308 else 1309 element->physAddr = 0; 1310 1311 if (pkt_msg.has_v_addr()) 1312 element->virtAddr = pkt_msg.v_addr(); 1313 else 1314 element->virtAddr = 0; 1315 1316 if (pkt_msg.has_asid()) 1317 element->asid = pkt_msg.asid(); 1318 else 1319 element->asid = 0; 1320 1321 if (pkt_msg.has_size()) 1322 element->size = pkt_msg.size(); 1323 else 1324 element->size = 0; 1325 1326 if (pkt_msg.has_flags()) 1327 element->flags = pkt_msg.flags(); 1328 else 1329 element->flags = 0; 1330 1331 if (pkt_msg.has_pc()) 1332 element->pc = pkt_msg.pc(); 1333 else 1334 element->pc = 0; 1335 1336 // ROB occupancy number 1337 ++microOpCount; 1338 if (pkt_msg.has_weight()) { 1339 microOpCount += pkt_msg.weight(); 1340 } 1341 element->robNum = microOpCount; 1342 return true; 1343 } 1344 1345 // We have reached the end of the file 1346 return false; 1347} 1348 1349bool 1350TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1351{ 1352 for (auto& own_reg_dep : regDep) { 1353 if (own_reg_dep == reg_dep) { 1354 // If register dependency is found, make it zero and return true 1355 own_reg_dep = 0; 1356 assert(numRegDep > 0); 1357 --numRegDep; 1358 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1359 "done.\n", seqNum, reg_dep); 1360 return true; 1361 } 1362 } 1363 1364 // Return false if the dependency is not found 1365 return false; 1366} 1367 1368bool 1369TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1370{ 1371 for (auto& own_rob_dep : robDep) { 1372 if (own_rob_dep == rob_dep) { 1373 // If the rob dependency is found, make it zero and return true 1374 own_rob_dep = 0; 1375 assert(numRobDep > 0); 1376 --numRobDep; 1377 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1378 "done.\n", seqNum, rob_dep); 1379 return true; 1380 } 1381 } 1382 return false; 1383} 1384 1385void 1386TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1387 for (auto& own_reg_dep : regDep) { 1388 own_reg_dep = 0; 1389 } 1390 numRegDep = 0; 1391} 1392 1393void 1394TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1395 for (auto& own_rob_dep : robDep) { 1396 own_rob_dep = 0; 1397 } 1398 numRobDep = 0; 1399} 1400 1401bool 1402TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1403{ 1404 // If it is an rob dependency then remove it 1405 if (!removeRobDep(done_seq_num)) { 1406 // If it is not an rob dependency then it must be a register dependency 1407 // If the register dependency is not found, it violates an assumption 1408 // and must be caught by assert. 1409 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1410 assert(regdep_found); 1411 } 1412 // Return true if the node is dependency free 1413 return (numRobDep == 0 && numRegDep == 0); 1414} 1415 1416void 1417TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1418{ 1419 DPRINTFR(TraceCPUData, "%lli", seqNum); 1420 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1421 if (isLoad() || isStore()) { 1422 DPRINTFR(TraceCPUData, ",%i", physAddr); 1423 DPRINTFR(TraceCPUData, ",%i", size); 1424 DPRINTFR(TraceCPUData, ",%i", flags); 1425 } 1426 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1427 int i = 0; 1428 DPRINTFR(TraceCPUData, "robDep:"); 1429 while (robDep[i] != 0) { 1430 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1431 i++; 1432 } 1433 i = 0; 1434 DPRINTFR(TraceCPUData, "regDep:"); 1435 while (regDep[i] != 0) { 1436 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1437 i++; 1438 } 1439 auto child_itr = dependents.begin(); 1440 DPRINTFR(TraceCPUData, "dependents:"); 1441 while (child_itr != dependents.end()) { 1442 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1443 child_itr++; 1444 } 1445 1446 DPRINTFR(TraceCPUData, "\n"); 1447} 1448 1449std::string 1450TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1451{ 1452 return Record::RecordType_Name(type); 1453} 1454 1455TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1456 : trace(filename) 1457{ 1458 // Create a protobuf message for the header and read it from the stream 1459 ProtoMessage::PacketHeader header_msg; 1460 if (!trace.read(header_msg)) { 1461 panic("Failed to read packet header from %s\n", filename); 1462 1463 if (header_msg.tick_freq() != SimClock::Frequency) { 1464 panic("Trace %s was recorded with a different tick frequency %d\n", 1465 header_msg.tick_freq()); 1466 } 1467 } 1468} 1469 1470void 1471TraceCPU::FixedRetryGen::InputStream::reset() 1472{ 1473 trace.reset(); 1474} 1475 1476bool 1477TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1478{ 1479 ProtoMessage::Packet pkt_msg; 1480 if (trace.read(pkt_msg)) { 1481 element->cmd = pkt_msg.cmd(); 1482 element->addr = pkt_msg.addr(); 1483 element->blocksize = pkt_msg.size(); 1484 element->tick = pkt_msg.tick(); 1485 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1486 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1487 return true; 1488 } 1489 1490 // We have reached the end of the file 1491 return false; 1492} 1493