trace_cpu.cc revision 11631
1/* 2 * Copyright (c) 2013 - 2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/trace/trace_cpu.hh" 43 44#include "sim/sim_exit.hh" 45 46// Declare and initialize the static counter for number of trace CPUs. 47int TraceCPU::numTraceCPUs = 0; 48 49TraceCPU::TraceCPU(TraceCPUParams *params) 50 : BaseCPU(params), 51 icachePort(this), 52 dcachePort(this), 53 instMasterID(params->system->getMasterId(name() + ".inst")), 54 dataMasterID(params->system->getMasterId(name() + ".data")), 55 instTraceFile(params->instTraceFile), 56 dataTraceFile(params->dataTraceFile), 57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 59 params), 60 icacheNextEvent(this), 61 dcacheNextEvent(this), 62 oneTraceComplete(false), 63 firstFetchTick(0), 64 execCompleteEvent(nullptr) 65{ 66 // Increment static counter for number of Trace CPUs. 67 ++TraceCPU::numTraceCPUs; 68 69 // Check that the python parameters for sizes of ROB, store buffer and load 70 // buffer do not overflow the corresponding C++ variables. 71 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 72 "max. value of %d.\n", params->sizeROB, UINT16_MAX); 73 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 74 "exceeds the max. value of %d.\n", params->sizeROB, 75 UINT16_MAX); 76 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 77 " %d exceeds the max. value of %d.\n", 78 params->sizeLoadBuffer, UINT16_MAX); 79} 80 81TraceCPU::~TraceCPU() 82{ 83 84} 85 86TraceCPU* 87TraceCPUParams::create() 88{ 89 return new TraceCPU(this); 90} 91 92void 93TraceCPU::takeOverFrom(BaseCPU *oldCPU) 94{ 95 // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 96 assert(!getInstPort().isConnected()); 97 assert(oldCPU->getInstPort().isConnected()); 98 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 99 oldCPU->getInstPort().unbind(); 100 getInstPort().bind(inst_peer_port); 101 102 assert(!getDataPort().isConnected()); 103 assert(oldCPU->getDataPort().isConnected()); 104 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 105 oldCPU->getDataPort().unbind(); 106 getDataPort().bind(data_peer_port); 107} 108 109void 110TraceCPU::init() 111{ 112 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 113 "\n", instTraceFile); 114 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 115 dataTraceFile); 116 117 BaseCPU::init(); 118 119 // Get the send tick of the first instruction read request and schedule 120 // icacheNextEvent at that tick. 121 Tick first_icache_tick = icacheGen.init(); 122 schedule(icacheNextEvent, first_icache_tick); 123 124 // Get the send tick of the first data read/write request and schedule 125 // dcacheNextEvent at that tick. 126 Tick first_dcache_tick = dcacheGen.init(); 127 schedule(dcacheNextEvent, first_dcache_tick); 128 129 // The static counter for number of Trace CPUs is correctly set at this 130 // point so create an event and pass it. 131 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 132 numTraceCPUs); 133 // Save the first fetch request tick to dump it as tickOffset 134 firstFetchTick = first_icache_tick; 135} 136 137void 138TraceCPU::schedIcacheNext() 139{ 140 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 141 142 // Try to send the current packet or a retry packet if there is one 143 bool sched_next = icacheGen.tryNext(); 144 // If packet sent successfully, schedule next event 145 if (sched_next) { 146 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 147 "at %d.\n", curTick() + icacheGen.tickDelta()); 148 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 149 ++numSchedIcacheEvent; 150 } else { 151 // check if traceComplete. If not, do nothing because sending failed 152 // and next event will be scheduled via RecvRetry() 153 if (icacheGen.isTraceComplete()) { 154 // If this is the first trace to complete, set the variable. If it 155 // is already set then both traces are complete to exit sim. 156 checkAndSchedExitEvent(); 157 } 158 } 159 return; 160} 161 162void 163TraceCPU::schedDcacheNext() 164{ 165 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 166 167 dcacheGen.execute(); 168 if (dcacheGen.isExecComplete()) { 169 checkAndSchedExitEvent(); 170 } 171} 172 173void 174TraceCPU::checkAndSchedExitEvent() 175{ 176 if (!oneTraceComplete) { 177 oneTraceComplete = true; 178 } else { 179 // Schedule event to indicate execution is complete as both 180 // instruction and data access traces have been played back. 181 inform("%s: Execution complete.\n", name()); 182 183 // Record stats which are computed at the end of simulation 184 tickOffset = firstFetchTick; 185 numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); 186 numOps = dcacheGen.getMicroOpCount(); 187 schedule(*execCompleteEvent, curTick()); 188 } 189} 190 191void 192TraceCPU::regStats() 193{ 194 195 BaseCPU::regStats(); 196 197 numSchedDcacheEvent 198 .name(name() + ".numSchedDcacheEvent") 199 .desc("Number of events scheduled to trigger data request generator") 200 ; 201 202 numSchedIcacheEvent 203 .name(name() + ".numSchedIcacheEvent") 204 .desc("Number of events scheduled to trigger instruction request generator") 205 ; 206 207 numOps 208 .name(name() + ".numOps") 209 .desc("Number of micro-ops simulated by the Trace CPU") 210 ; 211 212 cpi 213 .name(name() + ".cpi") 214 .desc("Cycles per micro-op used as a proxy for CPI") 215 .precision(6) 216 ; 217 cpi = numCycles/numOps; 218 219 tickOffset 220 .name(name() + ".tickOffset") 221 .desc("The first execution tick for the root node of elastic traces") 222 ; 223 224 icacheGen.regStats(); 225 dcacheGen.regStats(); 226} 227 228void 229TraceCPU::ElasticDataGen::regStats() 230{ 231 using namespace Stats; 232 233 maxDependents 234 .name(name() + ".maxDependents") 235 .desc("Max number of dependents observed on a node") 236 ; 237 238 maxReadyListSize 239 .name(name() + ".maxReadyListSize") 240 .desc("Max size of the ready list observed") 241 ; 242 243 numSendAttempted 244 .name(name() + ".numSendAttempted") 245 .desc("Number of first attempts to send a request") 246 ; 247 248 numSendSucceeded 249 .name(name() + ".numSendSucceeded") 250 .desc("Number of successful first attempts") 251 ; 252 253 numSendFailed 254 .name(name() + ".numSendFailed") 255 .desc("Number of failed first attempts") 256 ; 257 258 numRetrySucceeded 259 .name(name() + ".numRetrySucceeded") 260 .desc("Number of successful retries") 261 ; 262 263 numSplitReqs 264 .name(name() + ".numSplitReqs") 265 .desc("Number of split requests") 266 ; 267 268 numSOLoads 269 .name(name() + ".numSOLoads") 270 .desc("Number of strictly ordered loads") 271 ; 272 273 numSOStores 274 .name(name() + ".numSOStores") 275 .desc("Number of strictly ordered stores") 276 ; 277 278 dataLastTick 279 .name(name() + ".dataLastTick") 280 .desc("Last tick simulated from the elastic data trace") 281 ; 282} 283 284Tick 285TraceCPU::ElasticDataGen::init() 286{ 287 DPRINTF(TraceCPUData, "Initializing data memory request generator " 288 "DcacheGen: elastic issue with retry.\n"); 289 290 if (!readNextWindow()) 291 panic("Trace has %d elements. It must have at least %d elements.\n", 292 depGraph.size(), 2 * windowSize); 293 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 294 depGraph.size()); 295 296 if (!readNextWindow()) 297 panic("Trace has %d elements. It must have at least %d elements.\n", 298 depGraph.size(), 2 * windowSize); 299 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 300 depGraph.size()); 301 302 // Print readyList 303 if (DTRACE(TraceCPUData)) { 304 printReadyList(); 305 } 306 auto free_itr = readyList.begin(); 307 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 308 " is %d.\n", free_itr->seqNum, free_itr->execTick); 309 // Return the execute tick of the earliest ready node so that an event 310 // can be scheduled to call execute() 311 return (free_itr->execTick); 312} 313 314void 315TraceCPU::ElasticDataGen::exit() 316{ 317 trace.reset(); 318} 319 320bool 321TraceCPU::ElasticDataGen::readNextWindow() 322{ 323 324 // Read and add next window 325 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 326 327 if (traceComplete) { 328 // We are at the end of the file, thus we have no more records. 329 // Return false. 330 return false; 331 } 332 333 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 334 depGraph.size()); 335 336 uint32_t num_read = 0; 337 while (num_read != windowSize) { 338 339 // Create a new graph node 340 GraphNode* new_node = new GraphNode; 341 342 // Read the next line to get the next record. If that fails then end of 343 // trace has been reached and traceComplete needs to be set in addition 344 // to returning false. 345 if (!trace.read(new_node)) { 346 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 347 traceComplete = true; 348 return false; 349 } 350 351 // Annotate the ROB dependencies of the new node onto the parent nodes. 352 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 353 // Annotate the register dependencies of the new node onto the parent 354 // nodes. 355 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 356 357 num_read++; 358 // Add to map 359 depGraph[new_node->seqNum] = new_node; 360 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 361 // Source dependencies are already complete, check if resources 362 // are available and issue. The execution time is approximated 363 // to current time plus the computational delay. 364 checkAndIssue(new_node); 365 } 366 } 367 368 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 369 depGraph.size()); 370 return true; 371} 372 373template<typename T> void 374TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 375 T& dep_array, uint8_t& num_dep) 376{ 377 for (auto& a_dep : dep_array) { 378 // The convention is to set the dependencies starting with the first 379 // index in the ROB and register dependency arrays. Thus, when we reach 380 // a dependency equal to the initialisation value of zero, we know have 381 // iterated over all dependencies and can break. 382 if (a_dep == 0) 383 break; 384 // We look up the valid dependency, i.e. the parent of this node 385 auto parent_itr = depGraph.find(a_dep); 386 if (parent_itr != depGraph.end()) { 387 // If the parent is found, it is yet to be executed. Append a 388 // pointer to the new node to the dependents list of the parent 389 // node. 390 parent_itr->second->dependents.push_back(new_node); 391 auto num_depts = parent_itr->second->dependents.size(); 392 maxDependents = std::max<double>(num_depts, maxDependents.value()); 393 } else { 394 // The dependency is not found in the graph. So consider 395 // the execution of the parent is complete, i.e. remove this 396 // dependency. 397 a_dep = 0; 398 num_dep--; 399 } 400 } 401} 402 403void 404TraceCPU::ElasticDataGen::execute() 405{ 406 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 407 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 408 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 409 depFreeQueue.size()); 410 hwResource.printOccupancy(); 411 412 // Read next window to make sure that dependents of all dep-free nodes 413 // are in the depGraph 414 if (nextRead) { 415 readNextWindow(); 416 nextRead = false; 417 } 418 419 // First attempt to issue the pending dependency-free nodes held 420 // in depFreeQueue. If resources have become available for a node, 421 // then issue it, i.e. add the node to readyList. 422 while (!depFreeQueue.empty()) { 423 if (checkAndIssue(depFreeQueue.front(), false)) { 424 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 425 "%lli.\n", (depFreeQueue.front())->seqNum); 426 depFreeQueue.pop(); 427 } else { 428 break; 429 } 430 } 431 // Proceed to execute from readyList 432 auto graph_itr = depGraph.begin(); 433 auto free_itr = readyList.begin(); 434 // Iterate through readyList until the next free node has its execute 435 // tick later than curTick or the end of readyList is reached 436 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 437 438 // Get pointer to the node to be executed 439 graph_itr = depGraph.find(free_itr->seqNum); 440 assert(graph_itr != depGraph.end()); 441 GraphNode* node_ptr = graph_itr->second; 442 443 // If there is a retryPkt send that else execute the load 444 if (retryPkt) { 445 // The retryPkt must be the request that was created by the 446 // first node in the readyList. 447 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 448 panic("Retry packet's seqence number does not match " 449 "the first node in the readyList.\n"); 450 } 451 if (port.sendTimingReq(retryPkt)) { 452 ++numRetrySucceeded; 453 retryPkt = nullptr; 454 } 455 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 456 // If there is no retryPkt, attempt to send a memory request in 457 // case of a load or store node. If the send fails, executeMemReq() 458 // returns a packet pointer, which we save in retryPkt. In case of 459 // a comp node we don't do anything and simply continue as if the 460 // execution of the comp node succedded. 461 retryPkt = executeMemReq(node_ptr); 462 } 463 // If the retryPkt or a new load/store node failed, we exit from here 464 // as a retry from cache will bring the control to execute(). The 465 // first node in readyList then, will be the failed node. 466 if (retryPkt) { 467 break; 468 } 469 470 // Proceed to remove dependencies for the successfully executed node. 471 // If it is a load which is not strictly ordered and we sent a 472 // request for it successfully, we do not yet mark any register 473 // dependencies complete. But as per dependency modelling we need 474 // to mark ROB dependencies of load and non load/store nodes which 475 // are based on successful sending of the load as complete. 476 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 477 // If execute succeeded mark its dependents as complete 478 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 479 "dependents..\n", node_ptr->seqNum); 480 481 auto child_itr = (node_ptr->dependents).begin(); 482 while (child_itr != (node_ptr->dependents).end()) { 483 // ROB dependency of a store on a load must not be removed 484 // after load is sent but after response is received 485 if (!(*child_itr)->isStore() && 486 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 487 488 // Check if the child node has become dependency free 489 if ((*child_itr)->numRobDep == 0 && 490 (*child_itr)->numRegDep == 0) { 491 492 // Source dependencies are complete, check if 493 // resources are available and issue 494 checkAndIssue(*child_itr); 495 } 496 // Remove this child for the sent load and point to new 497 // location of the element following the erased element 498 child_itr = node_ptr->dependents.erase(child_itr); 499 } else { 500 // This child is not dependency-free, point to the next 501 // child 502 child_itr++; 503 } 504 } 505 } else { 506 // If it is a strictly ordered load mark its dependents as complete 507 // as we do not send a request for this case. If it is a store or a 508 // comp node we also mark all its dependents complete. 509 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 510 " up dependents..\n", node_ptr->seqNum); 511 512 for (auto child : node_ptr->dependents) { 513 // If the child node is dependency free removeDepOnInst() 514 // returns true. 515 if (child->removeDepOnInst(node_ptr->seqNum)) { 516 // Source dependencies are complete, check if resources 517 // are available and issue 518 checkAndIssue(child); 519 } 520 } 521 } 522 523 // After executing the node, remove from readyList and delete node. 524 readyList.erase(free_itr); 525 // If it is a cacheable load which was sent, don't delete 526 // just yet. Delete it in completeMemAccess() after the 527 // response is received. If it is an strictly ordered 528 // load, it was not sent and all dependencies were simply 529 // marked complete. Thus it is safe to delete it. For 530 // stores and non load/store nodes all dependencies were 531 // marked complete so it is safe to delete it. 532 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 533 // Release all resources occupied by the completed node 534 hwResource.release(node_ptr); 535 // clear the dynamically allocated set of dependents 536 (node_ptr->dependents).clear(); 537 // delete node 538 delete node_ptr; 539 // remove from graph 540 depGraph.erase(graph_itr); 541 } 542 // Point to first node to continue to next iteration of while loop 543 free_itr = readyList.begin(); 544 } // end of while loop 545 546 // Print readyList, sizes of queues and resource status after updating 547 if (DTRACE(TraceCPUData)) { 548 printReadyList(); 549 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 550 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 551 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 552 depFreeQueue.size()); 553 hwResource.printOccupancy(); 554 } 555 556 if (retryPkt) { 557 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 558 "event from the cache for seq. num %lli.\n", 559 retryPkt->req->getReqInstSeqNum()); 560 return; 561 } 562 // If the size of the dependency graph is less than the dependency window 563 // then read from the trace file to populate the graph next time we are in 564 // execute. 565 if (depGraph.size() < windowSize && !traceComplete) 566 nextRead = true; 567 568 // If cache is not blocked, schedule an event for the first execTick in 569 // readyList else retry from cache will schedule the event. If the ready 570 // list is empty then check if the next pending node has resources 571 // available to issue. If yes, then schedule an event for the next cycle. 572 if (!readyList.empty()) { 573 Tick next_event_tick = std::max(readyList.begin()->execTick, 574 curTick()); 575 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 576 next_event_tick); 577 owner.schedDcacheNextEvent(next_event_tick); 578 } else if (readyList.empty() && !depFreeQueue.empty() && 579 hwResource.isAvailable(depFreeQueue.front())) { 580 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 581 owner.clockEdge(Cycles(1))); 582 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 583 } 584 585 // If trace is completely read, readyList is empty and depGraph is empty, 586 // set execComplete to true 587 if (depGraph.empty() && readyList.empty() && traceComplete && 588 !hwResource.awaitingResponse()) { 589 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 590 execComplete = true; 591 dataLastTick = curTick(); 592 } 593} 594 595PacketPtr 596TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 597{ 598 599 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 600 "virt addr %d, pc %#x, size %d, flags %d).\n", 601 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 602 node_ptr->pc, node_ptr->size, node_ptr->flags); 603 604 // If the request is strictly ordered, do not send it. Just return nullptr 605 // as if it was succesfully sent. 606 if (node_ptr->isStrictlyOrdered()) { 607 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 609 node_ptr->seqNum); 610 return nullptr; 611 } 612 613 // Check if the request spans two cache lines as this condition triggers 614 // an assert fail in the L1 cache. If it does then truncate the size to 615 // access only until the end of that line and ignore the remainder. The 616 // stat counting this is useful to keep a check on how frequently this 617 // happens. If required the code could be revised to mimick splitting such 618 // a request into two. 619 unsigned blk_size = owner.cacheLineSize(); 620 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 621 if (!(blk_offset + node_ptr->size <= blk_size)) { 622 node_ptr->size = blk_size - blk_offset; 623 ++numSplitReqs; 624 } 625 626 // Create a request and the packet containing request 627 Request* req = new Request(node_ptr->physAddr, node_ptr->size, 628 node_ptr->flags, masterID, node_ptr->seqNum, 629 ContextID(0)); 630 req->setPC(node_ptr->pc); 631 // If virtual address is valid, set the asid and virtual address fields 632 // of the request. 633 if (node_ptr->virtAddr != 0) { 634 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 635 node_ptr->flags, masterID, node_ptr->pc); 636 req->setPaddr(node_ptr->physAddr); 637 req->setReqInstSeqNum(node_ptr->seqNum); 638 } 639 640 PacketPtr pkt; 641 uint8_t* pkt_data = new uint8_t[req->getSize()]; 642 if (node_ptr->isLoad()) { 643 pkt = Packet::createRead(req); 644 } else { 645 pkt = Packet::createWrite(req); 646 memset(pkt_data, 0xA, req->getSize()); 647 } 648 pkt->dataDynamic(pkt_data); 649 650 // Call MasterPort method to send a timing request for this packet 651 bool success = port.sendTimingReq(pkt); 652 ++numSendAttempted; 653 654 if (!success) { 655 // If it fails, return the packet to retry when a retry is signalled by 656 // the cache 657 ++numSendFailed; 658 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 659 return pkt; 660 } else { 661 // It is succeeds, return nullptr 662 ++numSendSucceeded; 663 return nullptr; 664 } 665} 666 667bool 668TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 669{ 670 // Assert the node is dependency-free 671 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 672 673 // If this is the first attempt, print a debug message to indicate this. 674 if (first) { 675 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 676 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 677 node_ptr->robNum); 678 } 679 680 // Check if resources are available to issue the specific node 681 if (hwResource.isAvailable(node_ptr)) { 682 // If resources are free only then add to readyList 683 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 684 " to readyList, occupying resources.\n", node_ptr->seqNum); 685 // Compute the execute tick by adding the compute delay for the node 686 // and add the ready node to the ready list 687 addToSortedReadyList(node_ptr->seqNum, 688 owner.clockEdge() + node_ptr->compDelay); 689 // Account for the resources taken up by this issued node. 690 hwResource.occupy(node_ptr); 691 return true; 692 693 } else { 694 if (first) { 695 // Although dependencies are complete, resources are not available. 696 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 697 " Adding to depFreeQueue.\n", node_ptr->seqNum); 698 depFreeQueue.push(node_ptr); 699 } else { 700 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 701 "Still pending issue.\n", node_ptr->seqNum); 702 } 703 return false; 704 } 705} 706 707void 708TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 709{ 710 // Release the resources for this completed node. 711 if (pkt->isWrite()) { 712 // Consider store complete. 713 hwResource.releaseStoreBuffer(); 714 // If it is a store response then do nothing since we do not model 715 // dependencies on store completion in the trace. But if we were 716 // blocking execution due to store buffer fullness, we need to schedule 717 // an event and attempt to progress. 718 } else { 719 // If it is a load response then release the dependents waiting on it. 720 // Get pointer to the completed load 721 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 722 assert(graph_itr != depGraph.end()); 723 GraphNode* node_ptr = graph_itr->second; 724 725 // Release resources occupied by the load 726 hwResource.release(node_ptr); 727 728 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 729 " dependents..\n", node_ptr->seqNum); 730 731 for (auto child : node_ptr->dependents) { 732 if (child->removeDepOnInst(node_ptr->seqNum)) { 733 checkAndIssue(child); 734 } 735 } 736 737 // clear the dynamically allocated set of dependents 738 (node_ptr->dependents).clear(); 739 // delete node 740 delete node_ptr; 741 // remove from graph 742 depGraph.erase(graph_itr); 743 } 744 745 if (DTRACE(TraceCPUData)) { 746 printReadyList(); 747 } 748 749 // If the size of the dependency graph is less than the dependency window 750 // then read from the trace file to populate the graph next time we are in 751 // execute. 752 if (depGraph.size() < windowSize && !traceComplete) 753 nextRead = true; 754 755 // If not waiting for retry, attempt to schedule next event 756 if (!retryPkt) { 757 // We might have new dep-free nodes in the list which will have execute 758 // tick greater than or equal to curTick. But a new dep-free node might 759 // have its execute tick earlier. Therefore, attempt to reschedule. It 760 // could happen that the readyList is empty and we got here via a 761 // last remaining response. So, either the trace is complete or there 762 // are pending nodes in the depFreeQueue. The checking is done in the 763 // execute() control flow, so schedule an event to go via that flow. 764 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 765 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 766 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 767 next_event_tick); 768 owner.schedDcacheNextEvent(next_event_tick); 769 } 770} 771 772void 773TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 774 Tick exec_tick) 775{ 776 ReadyNode ready_node; 777 ready_node.seqNum = seq_num; 778 ready_node.execTick = exec_tick; 779 780 // Iterator to readyList 781 auto itr = readyList.begin(); 782 783 // If the readyList is empty, simply insert the new node at the beginning 784 // and return 785 if (itr == readyList.end()) { 786 readyList.insert(itr, ready_node); 787 maxReadyListSize = std::max<double>(readyList.size(), 788 maxReadyListSize.value()); 789 return; 790 } 791 792 // If the new node has its execution tick equal to the first node in the 793 // list then go to the next node. If the first node in the list failed 794 // to execute, its position as the first is thus maintained. 795 if (retryPkt) 796 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 797 itr++; 798 799 // Increment the iterator and compare the node pointed to by it to the new 800 // node till the position to insert the new node is found. 801 bool found = false; 802 while (!found && itr != readyList.end()) { 803 // If the execution tick of the new node is less than the node then 804 // this is the position to insert 805 if (exec_tick < itr->execTick) 806 found = true; 807 // If the execution tick of the new node is equal to the node then 808 // sort in ascending order of sequence numbers 809 else if (exec_tick == itr->execTick) { 810 // If the sequence number of the new node is less than the node 811 // then this is the position to insert 812 if (seq_num < itr->seqNum) 813 found = true; 814 // Else go to next node 815 else 816 itr++; 817 } 818 // If the execution tick of the new node is greater than the node then 819 // go to the next node 820 else 821 itr++; 822 } 823 readyList.insert(itr, ready_node); 824 // Update the stat for max size reached of the readyList 825 maxReadyListSize = std::max<double>(readyList.size(), 826 maxReadyListSize.value()); 827} 828 829void 830TraceCPU::ElasticDataGen::printReadyList() { 831 832 auto itr = readyList.begin(); 833 if (itr == readyList.end()) { 834 DPRINTF(TraceCPUData, "readyList is empty.\n"); 835 return; 836 } 837 DPRINTF(TraceCPUData, "Printing readyList:\n"); 838 while (itr != readyList.end()) { 839 auto graph_itr = depGraph.find(itr->seqNum); 840 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 841 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 842 node_ptr->typeToStr(), itr->execTick); 843 itr++; 844 } 845} 846 847TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 848 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 849 : sizeROB(max_rob), 850 sizeStoreBuffer(max_stores), 851 sizeLoadBuffer(max_loads), 852 oldestInFlightRobNum(UINT64_MAX), 853 numInFlightLoads(0), 854 numInFlightStores(0) 855{} 856 857void 858TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 859{ 860 // Occupy ROB entry for the issued node 861 // Merely maintain the oldest node, i.e. numerically least robNum by saving 862 // it in the variable oldestInFLightRobNum. 863 inFlightNodes[new_node->seqNum] = new_node->robNum; 864 oldestInFlightRobNum = inFlightNodes.begin()->second; 865 866 // Occupy Load/Store Buffer entry for the issued node if applicable 867 if (new_node->isLoad()) { 868 ++numInFlightLoads; 869 } else if (new_node->isStore()) { 870 ++numInFlightStores; 871 } // else if it is a non load/store node, no buffer entry is occupied 872 873 printOccupancy(); 874} 875 876void 877TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 878{ 879 assert(!inFlightNodes.empty()); 880 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 881 done_node->seqNum); 882 883 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 884 inFlightNodes.erase(done_node->seqNum); 885 886 if (inFlightNodes.empty()) { 887 // If we delete the only in-flight node and then the 888 // oldestInFlightRobNum is set to it's initialized (max) value. 889 oldestInFlightRobNum = UINT64_MAX; 890 } else { 891 // Set the oldest in-flight node rob number equal to the first node in 892 // the inFlightNodes since that will have the numerically least value. 893 oldestInFlightRobNum = inFlightNodes.begin()->second; 894 } 895 896 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 897 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 898 oldestInFlightRobNum); 899 900 // A store is considered complete when a request is sent, thus ROB entry is 901 // freed. But it occupies an entry in the Store Buffer until its response 902 // is received. A load is considered complete when a response is received, 903 // thus both ROB and Load Buffer entries can be released. 904 if (done_node->isLoad()) { 905 assert(numInFlightLoads != 0); 906 --numInFlightLoads; 907 } 908 // For normal writes, we send the requests out and clear a store buffer 909 // entry on response. For writes which are strictly ordered, for e.g. 910 // writes to device registers, we do that within release() which is called 911 // when node is executed and taken off from readyList. 912 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 913 releaseStoreBuffer(); 914 } 915} 916 917void 918TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 919{ 920 assert(numInFlightStores != 0); 921 --numInFlightStores; 922} 923 924bool 925TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 926 const GraphNode* new_node) const 927{ 928 uint16_t num_in_flight_nodes; 929 if (inFlightNodes.empty()) { 930 num_in_flight_nodes = 0; 931 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 932 " #in-flight nodes = 0", new_node->seqNum); 933 } else if (new_node->robNum > oldestInFlightRobNum) { 934 // This is the intuitive case where new dep-free node is younger 935 // instruction than the oldest instruction in-flight. Thus we make sure 936 // in_flight_nodes does not overflow. 937 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 938 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 939 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 940 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 941 } else { 942 // This is the case where an instruction older than the oldest in- 943 // flight instruction becomes dep-free. Thus we must have already 944 // accounted for the entry in ROB for this new dep-free node. 945 // Immediately after this check returns true, oldestInFlightRobNum will 946 // be updated in occupy(). We simply let this node issue now. 947 num_in_flight_nodes = 0; 948 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 949 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 950 new_node->seqNum, new_node->robNum); 951 } 952 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 953 numInFlightLoads, sizeLoadBuffer, 954 numInFlightStores, sizeStoreBuffer); 955 // Check if resources are available to issue the specific node 956 if (num_in_flight_nodes >= sizeROB) { 957 return false; 958 } 959 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 960 return false; 961 } 962 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 963 return false; 964 } 965 return true; 966} 967 968bool 969TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 970 // Return true if there is at least one read or write request in flight 971 return (numInFlightStores != 0 || numInFlightLoads != 0); 972} 973 974void 975TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 976 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 977 "LQ = %d/%d, SQ = %d/%d.\n", 978 oldestInFlightRobNum, 979 numInFlightLoads, sizeLoadBuffer, 980 numInFlightStores, sizeStoreBuffer); 981} 982 983void 984TraceCPU::FixedRetryGen::regStats() 985{ 986 using namespace Stats; 987 988 numSendAttempted 989 .name(name() + ".numSendAttempted") 990 .desc("Number of first attempts to send a request") 991 ; 992 993 numSendSucceeded 994 .name(name() + ".numSendSucceeded") 995 .desc("Number of successful first attempts") 996 ; 997 998 numSendFailed 999 .name(name() + ".numSendFailed") 1000 .desc("Number of failed first attempts") 1001 ; 1002 1003 numRetrySucceeded 1004 .name(name() + ".numRetrySucceeded") 1005 .desc("Number of successful retries") 1006 ; 1007 1008 instLastTick 1009 .name(name() + ".instLastTick") 1010 .desc("Last tick simulated from the fixed inst trace") 1011 ; 1012} 1013 1014Tick 1015TraceCPU::FixedRetryGen::init() 1016{ 1017 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1018 " IcacheGen: fixed issue with retry.\n"); 1019 1020 if (nextExecute()) { 1021 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1022 return currElement.tick; 1023 } else { 1024 panic("Read of first message in the trace failed.\n"); 1025 return MaxTick; 1026 } 1027} 1028 1029bool 1030TraceCPU::FixedRetryGen::tryNext() 1031{ 1032 // If there is a retry packet, try to send it 1033 if (retryPkt) { 1034 1035 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1036 1037 if (!port.sendTimingReq(retryPkt)) { 1038 // Still blocked! This should never occur. 1039 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1040 return false; 1041 } 1042 ++numRetrySucceeded; 1043 } else { 1044 1045 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1046 1047 // try sending current element 1048 assert(currElement.isValid()); 1049 1050 ++numSendAttempted; 1051 1052 if (!send(currElement.addr, currElement.blocksize, 1053 currElement.cmd, currElement.flags, currElement.pc)) { 1054 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1055 ++numSendFailed; 1056 // return false to indicate not to schedule next event 1057 return false; 1058 } else { 1059 ++numSendSucceeded; 1060 } 1061 } 1062 // If packet was sent successfully, either retryPkt or currElement, return 1063 // true to indicate to schedule event at current Tick plus delta. If packet 1064 // was sent successfully and there is no next packet to send, return false. 1065 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1066 "element.\n"); 1067 retryPkt = nullptr; 1068 // Read next element into currElement, currElement gets cleared so save the 1069 // tick to calculate delta 1070 Tick last_tick = currElement.tick; 1071 if (nextExecute()) { 1072 assert(currElement.tick >= last_tick); 1073 delta = currElement.tick - last_tick; 1074 } 1075 return !traceComplete; 1076} 1077 1078void 1079TraceCPU::FixedRetryGen::exit() 1080{ 1081 trace.reset(); 1082} 1083 1084bool 1085TraceCPU::FixedRetryGen::nextExecute() 1086{ 1087 if (traceComplete) 1088 // We are at the end of the file, thus we have no more messages. 1089 // Return false. 1090 return false; 1091 1092 1093 //Reset the currElement to the default values 1094 currElement.clear(); 1095 1096 // Read the next line to get the next message. If that fails then end of 1097 // trace has been reached and traceComplete needs to be set in addition 1098 // to returning false. If successful then next message is in currElement. 1099 if (!trace.read(&currElement)) { 1100 traceComplete = true; 1101 instLastTick = curTick(); 1102 return false; 1103 } 1104 1105 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1106 currElement.cmd.isRead() ? 'r' : 'w', 1107 currElement.addr, 1108 currElement.pc, 1109 currElement.blocksize, 1110 currElement.tick); 1111 1112 return true; 1113} 1114 1115bool 1116TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1117 Request::FlagsType flags, Addr pc) 1118{ 1119 1120 // Create new request 1121 Request* req = new Request(addr, size, flags, masterID); 1122 req->setPC(pc); 1123 1124 // If this is not done it triggers assert in L1 cache for invalid contextId 1125 req->setContext(ContextID(0)); 1126 1127 // Embed it in a packet 1128 PacketPtr pkt = new Packet(req, cmd); 1129 1130 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1131 pkt->dataDynamic(pkt_data); 1132 1133 if (cmd.isWrite()) { 1134 memset(pkt_data, 0xA, req->getSize()); 1135 } 1136 1137 // Call MasterPort method to send a timing request for this packet 1138 bool success = port.sendTimingReq(pkt); 1139 if (!success) { 1140 // If it fails, save the packet to retry when a retry is signalled by 1141 // the cache 1142 retryPkt = pkt; 1143 } 1144 return success; 1145} 1146 1147void 1148TraceCPU::icacheRetryRecvd() 1149{ 1150 // Schedule an event to go through the control flow in the same tick as 1151 // retry is received 1152 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1153 " event @%lli.\n", curTick()); 1154 schedule(icacheNextEvent, curTick()); 1155} 1156 1157void 1158TraceCPU::dcacheRetryRecvd() 1159{ 1160 // Schedule an event to go through the execute flow in the same tick as 1161 // retry is received 1162 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1163 " event @%lli.\n", curTick()); 1164 schedule(dcacheNextEvent, curTick()); 1165} 1166 1167void 1168TraceCPU::schedDcacheNextEvent(Tick when) 1169{ 1170 if (!dcacheNextEvent.scheduled()) { 1171 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1172 when); 1173 schedule(dcacheNextEvent, when); 1174 ++numSchedDcacheEvent; 1175 } else if (when < dcacheNextEvent.when()) { 1176 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1177 " to %lli.\n", dcacheNextEvent.when(), when); 1178 reschedule(dcacheNextEvent, when); 1179 } 1180 1181} 1182 1183bool 1184TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1185{ 1186 // All responses on the instruction fetch side are ignored. Simply delete 1187 // the request and packet to free allocated memory 1188 delete pkt->req; 1189 delete pkt; 1190 1191 return true; 1192} 1193 1194void 1195TraceCPU::IcachePort::recvReqRetry() 1196{ 1197 owner->icacheRetryRecvd(); 1198} 1199 1200void 1201TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1202{ 1203 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1204 dcacheGen.completeMemAccess(pkt); 1205} 1206 1207bool 1208TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1209{ 1210 // Handle the responses for data memory requests which is done inside the 1211 // elastic data generator 1212 owner->dcacheRecvTimingResp(pkt); 1213 // After processing the response delete the request and packet to free 1214 // memory 1215 delete pkt->req; 1216 delete pkt; 1217 1218 return true; 1219} 1220 1221void 1222TraceCPU::DcachePort::recvReqRetry() 1223{ 1224 owner->dcacheRetryRecvd(); 1225} 1226 1227TraceCPU::ElasticDataGen::InputStream::InputStream( 1228 const std::string& filename, 1229 const double time_multiplier) 1230 : trace(filename), 1231 timeMultiplier(time_multiplier), 1232 microOpCount(0) 1233{ 1234 // Create a protobuf message for the header and read it from the stream 1235 ProtoMessage::InstDepRecordHeader header_msg; 1236 if (!trace.read(header_msg)) { 1237 panic("Failed to read packet header from %s\n", filename); 1238 1239 if (header_msg.tick_freq() != SimClock::Frequency) { 1240 panic("Trace %s was recorded with a different tick frequency %d\n", 1241 header_msg.tick_freq()); 1242 } 1243 } else { 1244 // Assign window size equal to the field in the trace that was recorded 1245 // when the data dependency trace was captured in the o3cpu model 1246 windowSize = header_msg.window_size(); 1247 } 1248} 1249 1250void 1251TraceCPU::ElasticDataGen::InputStream::reset() 1252{ 1253 trace.reset(); 1254} 1255 1256bool 1257TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1258{ 1259 ProtoMessage::InstDepRecord pkt_msg; 1260 if (trace.read(pkt_msg)) { 1261 // Required fields 1262 element->seqNum = pkt_msg.seq_num(); 1263 element->type = pkt_msg.type(); 1264 // Scale the compute delay to effectively scale the Trace CPU frequency 1265 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1266 1267 // Repeated field robDepList 1268 element->clearRobDep(); 1269 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1270 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1271 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1272 element->numRobDep += 1; 1273 } 1274 1275 // Repeated field 1276 element->clearRegDep(); 1277 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1278 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1279 // There is a possibility that an instruction has both, a register 1280 // and order dependency on an instruction. In such a case, the 1281 // register dependency is omitted 1282 bool duplicate = false; 1283 for (int j = 0; j < element->numRobDep; j++) { 1284 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1285 } 1286 if (!duplicate) { 1287 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1288 element->numRegDep += 1; 1289 } 1290 } 1291 1292 // Optional fields 1293 if (pkt_msg.has_p_addr()) 1294 element->physAddr = pkt_msg.p_addr(); 1295 else 1296 element->physAddr = 0; 1297 1298 if (pkt_msg.has_v_addr()) 1299 element->virtAddr = pkt_msg.v_addr(); 1300 else 1301 element->virtAddr = 0; 1302 1303 if (pkt_msg.has_asid()) 1304 element->asid = pkt_msg.asid(); 1305 else 1306 element->asid = 0; 1307 1308 if (pkt_msg.has_size()) 1309 element->size = pkt_msg.size(); 1310 else 1311 element->size = 0; 1312 1313 if (pkt_msg.has_flags()) 1314 element->flags = pkt_msg.flags(); 1315 else 1316 element->flags = 0; 1317 1318 if (pkt_msg.has_pc()) 1319 element->pc = pkt_msg.pc(); 1320 else 1321 element->pc = 0; 1322 1323 // ROB occupancy number 1324 ++microOpCount; 1325 if (pkt_msg.has_weight()) { 1326 microOpCount += pkt_msg.weight(); 1327 } 1328 element->robNum = microOpCount; 1329 return true; 1330 } 1331 1332 // We have reached the end of the file 1333 return false; 1334} 1335 1336bool 1337TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1338{ 1339 for (auto& own_reg_dep : regDep) { 1340 if (own_reg_dep == reg_dep) { 1341 // If register dependency is found, make it zero and return true 1342 own_reg_dep = 0; 1343 assert(numRegDep > 0); 1344 --numRegDep; 1345 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1346 "done.\n", seqNum, reg_dep); 1347 return true; 1348 } 1349 } 1350 1351 // Return false if the dependency is not found 1352 return false; 1353} 1354 1355bool 1356TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1357{ 1358 for (auto& own_rob_dep : robDep) { 1359 if (own_rob_dep == rob_dep) { 1360 // If the rob dependency is found, make it zero and return true 1361 own_rob_dep = 0; 1362 assert(numRobDep > 0); 1363 --numRobDep; 1364 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1365 "done.\n", seqNum, rob_dep); 1366 return true; 1367 } 1368 } 1369 return false; 1370} 1371 1372void 1373TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1374 for (auto& own_reg_dep : regDep) { 1375 own_reg_dep = 0; 1376 } 1377 numRegDep = 0; 1378} 1379 1380void 1381TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1382 for (auto& own_rob_dep : robDep) { 1383 own_rob_dep = 0; 1384 } 1385 numRobDep = 0; 1386} 1387 1388bool 1389TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1390{ 1391 // If it is an rob dependency then remove it 1392 if (!removeRobDep(done_seq_num)) { 1393 // If it is not an rob dependency then it must be a register dependency 1394 // If the register dependency is not found, it violates an assumption 1395 // and must be caught by assert. 1396 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1397 assert(regdep_found); 1398 } 1399 // Return true if the node is dependency free 1400 return (numRobDep == 0 && numRegDep == 0); 1401} 1402 1403void 1404TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1405{ 1406 DPRINTFR(TraceCPUData, "%lli", seqNum); 1407 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1408 if (isLoad() || isStore()) { 1409 DPRINTFR(TraceCPUData, ",%i", physAddr); 1410 DPRINTFR(TraceCPUData, ",%i", size); 1411 DPRINTFR(TraceCPUData, ",%i", flags); 1412 } 1413 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1414 int i = 0; 1415 DPRINTFR(TraceCPUData, "robDep:"); 1416 while (robDep[i] != 0) { 1417 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1418 i++; 1419 } 1420 i = 0; 1421 DPRINTFR(TraceCPUData, "regDep:"); 1422 while (regDep[i] != 0) { 1423 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1424 i++; 1425 } 1426 auto child_itr = dependents.begin(); 1427 DPRINTFR(TraceCPUData, "dependents:"); 1428 while (child_itr != dependents.end()) { 1429 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1430 child_itr++; 1431 } 1432 1433 DPRINTFR(TraceCPUData, "\n"); 1434} 1435 1436std::string 1437TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1438{ 1439 return Record::RecordType_Name(type); 1440} 1441 1442TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1443 : trace(filename) 1444{ 1445 // Create a protobuf message for the header and read it from the stream 1446 ProtoMessage::PacketHeader header_msg; 1447 if (!trace.read(header_msg)) { 1448 panic("Failed to read packet header from %s\n", filename); 1449 1450 if (header_msg.tick_freq() != SimClock::Frequency) { 1451 panic("Trace %s was recorded with a different tick frequency %d\n", 1452 header_msg.tick_freq()); 1453 } 1454 } 1455} 1456 1457void 1458TraceCPU::FixedRetryGen::InputStream::reset() 1459{ 1460 trace.reset(); 1461} 1462 1463bool 1464TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1465{ 1466 ProtoMessage::Packet pkt_msg; 1467 if (trace.read(pkt_msg)) { 1468 element->cmd = pkt_msg.cmd(); 1469 element->addr = pkt_msg.addr(); 1470 element->blocksize = pkt_msg.size(); 1471 element->tick = pkt_msg.tick(); 1472 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1473 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1474 return true; 1475 } 1476 1477 // We have reached the end of the file 1478 return false; 1479} 1480