trace_cpu.cc revision 12749
111986Sandreas.sandberg@arm.com/* 212391Sjason@lowepower.com * Copyright (c) 2013 - 2016 ARM Limited 311986Sandreas.sandberg@arm.com * All rights reserved 411986Sandreas.sandberg@arm.com * 511986Sandreas.sandberg@arm.com * The license below extends only to copyright in the software and shall 612391Sjason@lowepower.com * not be construed as granting a license to any other intellectual 712391Sjason@lowepower.com * property including but not limited to intellectual property relating 814299Sbbruce@ucdavis.edu * to a hardware implementation of the functionality of the software 914299Sbbruce@ucdavis.edu * licensed hereunder. You may use the software subject to the license 1014299Sbbruce@ucdavis.edu * terms below provided that you ensure that this notice is replicated 1114299Sbbruce@ucdavis.edu * unmodified and in its entirety in all distributions of the software, 1214299Sbbruce@ucdavis.edu * modified or unmodified, in source code or in binary form. 1312391Sjason@lowepower.com * 1412391Sjason@lowepower.com * Redistribution and use in source and binary forms, with or without 1512391Sjason@lowepower.com * modification, are permitted provided that the following conditions are 1612391Sjason@lowepower.com * met: redistributions of source code must retain the above copyright 1712391Sjason@lowepower.com * notice, this list of conditions and the following disclaimer; 1812391Sjason@lowepower.com * redistributions in binary form must reproduce the above copyright 1911986Sandreas.sandberg@arm.com * notice, this list of conditions and the following disclaimer in the 2011986Sandreas.sandberg@arm.com * documentation and/or other materials provided with the distribution; 2111986Sandreas.sandberg@arm.com * neither the name of the copyright holders nor the names of its 2212391Sjason@lowepower.com * contributors may be used to endorse or promote products derived from 2311986Sandreas.sandberg@arm.com * this software without specific prior written permission. 2412391Sjason@lowepower.com * 2512391Sjason@lowepower.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2612391Sjason@lowepower.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2711986Sandreas.sandberg@arm.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2812391Sjason@lowepower.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2912391Sjason@lowepower.com * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3012391Sjason@lowepower.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3112391Sjason@lowepower.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3212391Sjason@lowepower.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3312391Sjason@lowepower.com * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3411986Sandreas.sandberg@arm.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3511986Sandreas.sandberg@arm.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3611986Sandreas.sandberg@arm.com * 3712391Sjason@lowepower.com * Authors: Radhika Jagtap 3812037Sandreas.sandberg@arm.com * Andreas Hansson 3912037Sandreas.sandberg@arm.com * Thomas Grass 4011986Sandreas.sandberg@arm.com */ 4112391Sjason@lowepower.com 4212391Sjason@lowepower.com#include "cpu/trace/trace_cpu.hh" 4311986Sandreas.sandberg@arm.com 4412391Sjason@lowepower.com#include "sim/sim_exit.hh" 4512391Sjason@lowepower.com 4611986Sandreas.sandberg@arm.com// Declare and initialize the static counter for number of trace CPUs. 4711986Sandreas.sandberg@arm.comint TraceCPU::numTraceCPUs = 0; 4811986Sandreas.sandberg@arm.com 4911986Sandreas.sandberg@arm.comTraceCPU::TraceCPU(TraceCPUParams *params) 5012391Sjason@lowepower.com : BaseCPU(params), 5111986Sandreas.sandberg@arm.com icachePort(this), 5211986Sandreas.sandberg@arm.com dcachePort(this), 5311986Sandreas.sandberg@arm.com instMasterID(params->system->getMasterId(this, "inst")), 5412391Sjason@lowepower.com dataMasterID(params->system->getMasterId(this, "data")), 5512037Sandreas.sandberg@arm.com instTraceFile(params->instTraceFile), 5612037Sandreas.sandberg@arm.com dataTraceFile(params->dataTraceFile), 5712037Sandreas.sandberg@arm.com icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), 5812391Sjason@lowepower.com dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, 5912391Sjason@lowepower.com params), 6012391Sjason@lowepower.com icacheNextEvent([this]{ schedIcacheNext(); }, name()), 6112391Sjason@lowepower.com dcacheNextEvent([this]{ schedDcacheNext(); }, name()), 6212037Sandreas.sandberg@arm.com oneTraceComplete(false), 6312037Sandreas.sandberg@arm.com traceOffset(0), 6412037Sandreas.sandberg@arm.com execCompleteEvent(nullptr), 6512037Sandreas.sandberg@arm.com enableEarlyExit(params->enableEarlyExit), 6612037Sandreas.sandberg@arm.com progressMsgInterval(params->progressMsgInterval), 6712037Sandreas.sandberg@arm.com progressMsgThreshold(params->progressMsgInterval) 6812037Sandreas.sandberg@arm.com{ 6912037Sandreas.sandberg@arm.com // Increment static counter for number of Trace CPUs. 7012037Sandreas.sandberg@arm.com ++TraceCPU::numTraceCPUs; 7112037Sandreas.sandberg@arm.com 7212037Sandreas.sandberg@arm.com // Check that the python parameters for sizes of ROB, store buffer and 7312037Sandreas.sandberg@arm.com // load buffer do not overflow the corresponding C++ variables. 7412037Sandreas.sandberg@arm.com fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " 7512037Sandreas.sandberg@arm.com "max. value of %d.\n", params->sizeROB, UINT16_MAX); 7612037Sandreas.sandberg@arm.com fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " 7712037Sandreas.sandberg@arm.com "exceeds the max. value of %d.\n", params->sizeROB, 7812037Sandreas.sandberg@arm.com UINT16_MAX); 7912037Sandreas.sandberg@arm.com fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" 8012037Sandreas.sandberg@arm.com " %d exceeds the max. value of %d.\n", 8112037Sandreas.sandberg@arm.com params->sizeLoadBuffer, UINT16_MAX); 8212037Sandreas.sandberg@arm.com} 8312037Sandreas.sandberg@arm.com 8412037Sandreas.sandberg@arm.comTraceCPU::~TraceCPU() 8512037Sandreas.sandberg@arm.com{ 8612037Sandreas.sandberg@arm.com 8712037Sandreas.sandberg@arm.com} 8812037Sandreas.sandberg@arm.com 8912037Sandreas.sandberg@arm.comTraceCPU* 9012037Sandreas.sandberg@arm.comTraceCPUParams::create() 9112037Sandreas.sandberg@arm.com{ 9212037Sandreas.sandberg@arm.com return new TraceCPU(this); 9312037Sandreas.sandberg@arm.com} 9412037Sandreas.sandberg@arm.com 9512037Sandreas.sandberg@arm.comvoid 9614299Sbbruce@ucdavis.eduTraceCPU::updateNumOps(uint64_t rob_num) 9712037Sandreas.sandberg@arm.com{ 9812037Sandreas.sandberg@arm.com numOps = rob_num; 9912037Sandreas.sandberg@arm.com if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) { 10012037Sandreas.sandberg@arm.com inform("%s: %i insts committed\n", name(), progressMsgThreshold); 10112037Sandreas.sandberg@arm.com progressMsgThreshold += progressMsgInterval; 10212037Sandreas.sandberg@arm.com } 10312037Sandreas.sandberg@arm.com} 10414299Sbbruce@ucdavis.edu 10512037Sandreas.sandberg@arm.comvoid 10612037Sandreas.sandberg@arm.comTraceCPU::takeOverFrom(BaseCPU *oldCPU) 10712037Sandreas.sandberg@arm.com{ 10814299Sbbruce@ucdavis.edu // Unbind the ports of the old CPU and bind the ports of the TraceCPU. 10914299Sbbruce@ucdavis.edu assert(!getInstPort().isConnected()); 11014299Sbbruce@ucdavis.edu assert(oldCPU->getInstPort().isConnected()); 11114299Sbbruce@ucdavis.edu BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); 11214299Sbbruce@ucdavis.edu oldCPU->getInstPort().unbind(); 11314299Sbbruce@ucdavis.edu getInstPort().bind(inst_peer_port); 11414299Sbbruce@ucdavis.edu 11514299Sbbruce@ucdavis.edu assert(!getDataPort().isConnected()); 11614299Sbbruce@ucdavis.edu assert(oldCPU->getDataPort().isConnected()); 11714299Sbbruce@ucdavis.edu BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); 11814299Sbbruce@ucdavis.edu oldCPU->getDataPort().unbind(); 11914299Sbbruce@ucdavis.edu getDataPort().bind(data_peer_port); 12014299Sbbruce@ucdavis.edu} 12114299Sbbruce@ucdavis.edu 12214299Sbbruce@ucdavis.eduvoid 12314299Sbbruce@ucdavis.eduTraceCPU::init() 12414299Sbbruce@ucdavis.edu{ 12514299Sbbruce@ucdavis.edu DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." 12614299Sbbruce@ucdavis.edu "\n", instTraceFile); 12714299Sbbruce@ucdavis.edu DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", 12814299Sbbruce@ucdavis.edu dataTraceFile); 12914299Sbbruce@ucdavis.edu 13014299Sbbruce@ucdavis.edu BaseCPU::init(); 13114299Sbbruce@ucdavis.edu 13214299Sbbruce@ucdavis.edu // Get the send tick of the first instruction read request 13314299Sbbruce@ucdavis.edu Tick first_icache_tick = icacheGen.init(); 13414299Sbbruce@ucdavis.edu 13514299Sbbruce@ucdavis.edu // Get the send tick of the first data read/write request 13614299Sbbruce@ucdavis.edu Tick first_dcache_tick = dcacheGen.init(); 13714299Sbbruce@ucdavis.edu 13814299Sbbruce@ucdavis.edu // Set the trace offset as the minimum of that in both traces 13914299Sbbruce@ucdavis.edu traceOffset = std::min(first_icache_tick, first_dcache_tick); 14014299Sbbruce@ucdavis.edu inform("%s: Time offset (tick) found as min of both traces is %lli.\n", 14114299Sbbruce@ucdavis.edu name(), traceOffset); 14214299Sbbruce@ucdavis.edu 14314299Sbbruce@ucdavis.edu // Schedule next icache and dcache event by subtracting the offset 14414299Sbbruce@ucdavis.edu schedule(icacheNextEvent, first_icache_tick - traceOffset); 14514299Sbbruce@ucdavis.edu schedule(dcacheNextEvent, first_dcache_tick - traceOffset); 14614299Sbbruce@ucdavis.edu 14714299Sbbruce@ucdavis.edu // Adjust the trace offset for the dcache generator's ready nodes 148 // We don't need to do this for the icache generator as it will 149 // send its first request at the first event and schedule subsequent 150 // events using a relative tick delta 151 dcacheGen.adjustInitTraceOffset(traceOffset); 152 153 // If the Trace CPU simulation is configured to exit on any one trace 154 // completion then we don't need a counted event to count down all Trace 155 // CPUs in the system. If not then instantiate a counted event. 156 if (!enableEarlyExit) { 157 // The static counter for number of Trace CPUs is correctly set at 158 // this point so create an event and pass it. 159 execCompleteEvent = new CountedExitEvent("end of all traces reached.", 160 numTraceCPUs); 161 } 162 163} 164 165void 166TraceCPU::schedIcacheNext() 167{ 168 DPRINTF(TraceCPUInst, "IcacheGen event.\n"); 169 170 // Try to send the current packet or a retry packet if there is one 171 bool sched_next = icacheGen.tryNext(); 172 // If packet sent successfully, schedule next event 173 if (sched_next) { 174 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " 175 "at %d.\n", curTick() + icacheGen.tickDelta()); 176 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); 177 ++numSchedIcacheEvent; 178 } else { 179 // check if traceComplete. If not, do nothing because sending failed 180 // and next event will be scheduled via RecvRetry() 181 if (icacheGen.isTraceComplete()) { 182 // If this is the first trace to complete, set the variable. If it 183 // is already set then both traces are complete to exit sim. 184 checkAndSchedExitEvent(); 185 } 186 } 187 return; 188} 189 190void 191TraceCPU::schedDcacheNext() 192{ 193 DPRINTF(TraceCPUData, "DcacheGen event.\n"); 194 195 // Update stat for numCycles 196 numCycles = clockEdge() / clockPeriod(); 197 198 dcacheGen.execute(); 199 if (dcacheGen.isExecComplete()) { 200 checkAndSchedExitEvent(); 201 } 202} 203 204void 205TraceCPU::checkAndSchedExitEvent() 206{ 207 if (!oneTraceComplete) { 208 oneTraceComplete = true; 209 } else { 210 // Schedule event to indicate execution is complete as both 211 // instruction and data access traces have been played back. 212 inform("%s: Execution complete.\n", name()); 213 // If the replay is configured to exit early, that is when any one 214 // execution is complete then exit immediately and return. Otherwise, 215 // schedule the counted exit that counts down completion of each Trace 216 // CPU. 217 if (enableEarlyExit) { 218 exitSimLoop("End of trace reached"); 219 } else { 220 schedule(*execCompleteEvent, curTick()); 221 } 222 } 223} 224 225void 226TraceCPU::regStats() 227{ 228 229 BaseCPU::regStats(); 230 231 numSchedDcacheEvent 232 .name(name() + ".numSchedDcacheEvent") 233 .desc("Number of events scheduled to trigger data request generator") 234 ; 235 236 numSchedIcacheEvent 237 .name(name() + ".numSchedIcacheEvent") 238 .desc("Number of events scheduled to trigger instruction request generator") 239 ; 240 241 numOps 242 .name(name() + ".numOps") 243 .desc("Number of micro-ops simulated by the Trace CPU") 244 ; 245 246 cpi 247 .name(name() + ".cpi") 248 .desc("Cycles per micro-op used as a proxy for CPI") 249 .precision(6) 250 ; 251 cpi = numCycles/numOps; 252 253 icacheGen.regStats(); 254 dcacheGen.regStats(); 255} 256 257void 258TraceCPU::ElasticDataGen::regStats() 259{ 260 using namespace Stats; 261 262 maxDependents 263 .name(name() + ".maxDependents") 264 .desc("Max number of dependents observed on a node") 265 ; 266 267 maxReadyListSize 268 .name(name() + ".maxReadyListSize") 269 .desc("Max size of the ready list observed") 270 ; 271 272 numSendAttempted 273 .name(name() + ".numSendAttempted") 274 .desc("Number of first attempts to send a request") 275 ; 276 277 numSendSucceeded 278 .name(name() + ".numSendSucceeded") 279 .desc("Number of successful first attempts") 280 ; 281 282 numSendFailed 283 .name(name() + ".numSendFailed") 284 .desc("Number of failed first attempts") 285 ; 286 287 numRetrySucceeded 288 .name(name() + ".numRetrySucceeded") 289 .desc("Number of successful retries") 290 ; 291 292 numSplitReqs 293 .name(name() + ".numSplitReqs") 294 .desc("Number of split requests") 295 ; 296 297 numSOLoads 298 .name(name() + ".numSOLoads") 299 .desc("Number of strictly ordered loads") 300 ; 301 302 numSOStores 303 .name(name() + ".numSOStores") 304 .desc("Number of strictly ordered stores") 305 ; 306 307 dataLastTick 308 .name(name() + ".dataLastTick") 309 .desc("Last tick simulated from the elastic data trace") 310 ; 311} 312 313Tick 314TraceCPU::ElasticDataGen::init() 315{ 316 DPRINTF(TraceCPUData, "Initializing data memory request generator " 317 "DcacheGen: elastic issue with retry.\n"); 318 319 if (!readNextWindow()) 320 panic("Trace has %d elements. It must have at least %d elements.\n", 321 depGraph.size(), 2 * windowSize); 322 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", 323 depGraph.size()); 324 325 if (!readNextWindow()) 326 panic("Trace has %d elements. It must have at least %d elements.\n", 327 depGraph.size(), 2 * windowSize); 328 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", 329 depGraph.size()); 330 331 // Print readyList 332 if (DTRACE(TraceCPUData)) { 333 printReadyList(); 334 } 335 auto free_itr = readyList.begin(); 336 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" 337 " is %d.\n", free_itr->seqNum, free_itr->execTick); 338 // Return the execute tick of the earliest ready node so that an event 339 // can be scheduled to call execute() 340 return (free_itr->execTick); 341} 342 343void 344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { 345 for (auto& free_node : readyList) { 346 free_node.execTick -= offset; 347 } 348} 349 350void 351TraceCPU::ElasticDataGen::exit() 352{ 353 trace.reset(); 354} 355 356bool 357TraceCPU::ElasticDataGen::readNextWindow() 358{ 359 360 // Read and add next window 361 DPRINTF(TraceCPUData, "Reading next window from file.\n"); 362 363 if (traceComplete) { 364 // We are at the end of the file, thus we have no more records. 365 // Return false. 366 return false; 367 } 368 369 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", 370 depGraph.size()); 371 372 uint32_t num_read = 0; 373 while (num_read != windowSize) { 374 375 // Create a new graph node 376 GraphNode* new_node = new GraphNode; 377 378 // Read the next line to get the next record. If that fails then end of 379 // trace has been reached and traceComplete needs to be set in addition 380 // to returning false. 381 if (!trace.read(new_node)) { 382 DPRINTF(TraceCPUData, "\tTrace complete!\n"); 383 traceComplete = true; 384 return false; 385 } 386 387 // Annotate the ROB dependencies of the new node onto the parent nodes. 388 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); 389 // Annotate the register dependencies of the new node onto the parent 390 // nodes. 391 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); 392 393 num_read++; 394 // Add to map 395 depGraph[new_node->seqNum] = new_node; 396 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { 397 // Source dependencies are already complete, check if resources 398 // are available and issue. The execution time is approximated 399 // to current time plus the computational delay. 400 checkAndIssue(new_node); 401 } 402 } 403 404 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", 405 depGraph.size()); 406 return true; 407} 408 409template<typename T> void 410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, 411 T& dep_array, uint8_t& num_dep) 412{ 413 for (auto& a_dep : dep_array) { 414 // The convention is to set the dependencies starting with the first 415 // index in the ROB and register dependency arrays. Thus, when we reach 416 // a dependency equal to the initialisation value of zero, we know have 417 // iterated over all dependencies and can break. 418 if (a_dep == 0) 419 break; 420 // We look up the valid dependency, i.e. the parent of this node 421 auto parent_itr = depGraph.find(a_dep); 422 if (parent_itr != depGraph.end()) { 423 // If the parent is found, it is yet to be executed. Append a 424 // pointer to the new node to the dependents list of the parent 425 // node. 426 parent_itr->second->dependents.push_back(new_node); 427 auto num_depts = parent_itr->second->dependents.size(); 428 maxDependents = std::max<double>(num_depts, maxDependents.value()); 429 } else { 430 // The dependency is not found in the graph. So consider 431 // the execution of the parent is complete, i.e. remove this 432 // dependency. 433 a_dep = 0; 434 num_dep--; 435 } 436 } 437} 438 439void 440TraceCPU::ElasticDataGen::execute() 441{ 442 DPRINTF(TraceCPUData, "Execute start occupancy:\n"); 443 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 444 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 445 depFreeQueue.size()); 446 hwResource.printOccupancy(); 447 448 // Read next window to make sure that dependents of all dep-free nodes 449 // are in the depGraph 450 if (nextRead) { 451 readNextWindow(); 452 nextRead = false; 453 } 454 455 // First attempt to issue the pending dependency-free nodes held 456 // in depFreeQueue. If resources have become available for a node, 457 // then issue it, i.e. add the node to readyList. 458 while (!depFreeQueue.empty()) { 459 if (checkAndIssue(depFreeQueue.front(), false)) { 460 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " 461 "%lli.\n", (depFreeQueue.front())->seqNum); 462 depFreeQueue.pop(); 463 } else { 464 break; 465 } 466 } 467 // Proceed to execute from readyList 468 auto graph_itr = depGraph.begin(); 469 auto free_itr = readyList.begin(); 470 // Iterate through readyList until the next free node has its execute 471 // tick later than curTick or the end of readyList is reached 472 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { 473 474 // Get pointer to the node to be executed 475 graph_itr = depGraph.find(free_itr->seqNum); 476 assert(graph_itr != depGraph.end()); 477 GraphNode* node_ptr = graph_itr->second; 478 479 // If there is a retryPkt send that else execute the load 480 if (retryPkt) { 481 // The retryPkt must be the request that was created by the 482 // first node in the readyList. 483 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { 484 panic("Retry packet's seqence number does not match " 485 "the first node in the readyList.\n"); 486 } 487 if (port.sendTimingReq(retryPkt)) { 488 ++numRetrySucceeded; 489 retryPkt = nullptr; 490 } 491 } else if (node_ptr->isLoad() || node_ptr->isStore()) { 492 // If there is no retryPkt, attempt to send a memory request in 493 // case of a load or store node. If the send fails, executeMemReq() 494 // returns a packet pointer, which we save in retryPkt. In case of 495 // a comp node we don't do anything and simply continue as if the 496 // execution of the comp node succedded. 497 retryPkt = executeMemReq(node_ptr); 498 } 499 // If the retryPkt or a new load/store node failed, we exit from here 500 // as a retry from cache will bring the control to execute(). The 501 // first node in readyList then, will be the failed node. 502 if (retryPkt) { 503 break; 504 } 505 506 // Proceed to remove dependencies for the successfully executed node. 507 // If it is a load which is not strictly ordered and we sent a 508 // request for it successfully, we do not yet mark any register 509 // dependencies complete. But as per dependency modelling we need 510 // to mark ROB dependencies of load and non load/store nodes which 511 // are based on successful sending of the load as complete. 512 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { 513 // If execute succeeded mark its dependents as complete 514 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " 515 "dependents..\n", node_ptr->seqNum); 516 517 auto child_itr = (node_ptr->dependents).begin(); 518 while (child_itr != (node_ptr->dependents).end()) { 519 // ROB dependency of a store on a load must not be removed 520 // after load is sent but after response is received 521 if (!(*child_itr)->isStore() && 522 (*child_itr)->removeRobDep(node_ptr->seqNum)) { 523 524 // Check if the child node has become dependency free 525 if ((*child_itr)->numRobDep == 0 && 526 (*child_itr)->numRegDep == 0) { 527 528 // Source dependencies are complete, check if 529 // resources are available and issue 530 checkAndIssue(*child_itr); 531 } 532 // Remove this child for the sent load and point to new 533 // location of the element following the erased element 534 child_itr = node_ptr->dependents.erase(child_itr); 535 } else { 536 // This child is not dependency-free, point to the next 537 // child 538 child_itr++; 539 } 540 } 541 } else { 542 // If it is a strictly ordered load mark its dependents as complete 543 // as we do not send a request for this case. If it is a store or a 544 // comp node we also mark all its dependents complete. 545 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" 546 " up dependents..\n", node_ptr->seqNum); 547 548 for (auto child : node_ptr->dependents) { 549 // If the child node is dependency free removeDepOnInst() 550 // returns true. 551 if (child->removeDepOnInst(node_ptr->seqNum)) { 552 // Source dependencies are complete, check if resources 553 // are available and issue 554 checkAndIssue(child); 555 } 556 } 557 } 558 559 // After executing the node, remove from readyList and delete node. 560 readyList.erase(free_itr); 561 // If it is a cacheable load which was sent, don't delete 562 // just yet. Delete it in completeMemAccess() after the 563 // response is received. If it is an strictly ordered 564 // load, it was not sent and all dependencies were simply 565 // marked complete. Thus it is safe to delete it. For 566 // stores and non load/store nodes all dependencies were 567 // marked complete so it is safe to delete it. 568 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { 569 // Release all resources occupied by the completed node 570 hwResource.release(node_ptr); 571 // clear the dynamically allocated set of dependents 572 (node_ptr->dependents).clear(); 573 // Update the stat for numOps simulated 574 owner.updateNumOps(node_ptr->robNum); 575 // delete node 576 delete node_ptr; 577 // remove from graph 578 depGraph.erase(graph_itr); 579 } 580 // Point to first node to continue to next iteration of while loop 581 free_itr = readyList.begin(); 582 } // end of while loop 583 584 // Print readyList, sizes of queues and resource status after updating 585 if (DTRACE(TraceCPUData)) { 586 printReadyList(); 587 DPRINTF(TraceCPUData, "Execute end occupancy:\n"); 588 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " 589 "depFreeQueue = %d ,", depGraph.size(), readyList.size(), 590 depFreeQueue.size()); 591 hwResource.printOccupancy(); 592 } 593 594 if (retryPkt) { 595 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" 596 "event from the cache for seq. num %lli.\n", 597 retryPkt->req->getReqInstSeqNum()); 598 return; 599 } 600 // If the size of the dependency graph is less than the dependency window 601 // then read from the trace file to populate the graph next time we are in 602 // execute. 603 if (depGraph.size() < windowSize && !traceComplete) 604 nextRead = true; 605 606 // If cache is not blocked, schedule an event for the first execTick in 607 // readyList else retry from cache will schedule the event. If the ready 608 // list is empty then check if the next pending node has resources 609 // available to issue. If yes, then schedule an event for the next cycle. 610 if (!readyList.empty()) { 611 Tick next_event_tick = std::max(readyList.begin()->execTick, 612 curTick()); 613 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 614 next_event_tick); 615 owner.schedDcacheNextEvent(next_event_tick); 616 } else if (readyList.empty() && !depFreeQueue.empty() && 617 hwResource.isAvailable(depFreeQueue.front())) { 618 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 619 owner.clockEdge(Cycles(1))); 620 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); 621 } 622 623 // If trace is completely read, readyList is empty and depGraph is empty, 624 // set execComplete to true 625 if (depGraph.empty() && readyList.empty() && traceComplete && 626 !hwResource.awaitingResponse()) { 627 DPRINTF(TraceCPUData, "\tExecution Complete!\n"); 628 execComplete = true; 629 dataLastTick = curTick(); 630 } 631} 632 633PacketPtr 634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) 635{ 636 637 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " 638 "virt addr %d, pc %#x, size %d, flags %d).\n", 639 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, 640 node_ptr->pc, node_ptr->size, node_ptr->flags); 641 642 // If the request is strictly ordered, do not send it. Just return nullptr 643 // as if it was succesfully sent. 644 if (node_ptr->isStrictlyOrdered()) { 645 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; 646 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", 647 node_ptr->seqNum); 648 return nullptr; 649 } 650 651 // Check if the request spans two cache lines as this condition triggers 652 // an assert fail in the L1 cache. If it does then truncate the size to 653 // access only until the end of that line and ignore the remainder. The 654 // stat counting this is useful to keep a check on how frequently this 655 // happens. If required the code could be revised to mimick splitting such 656 // a request into two. 657 unsigned blk_size = owner.cacheLineSize(); 658 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); 659 if (!(blk_offset + node_ptr->size <= blk_size)) { 660 node_ptr->size = blk_size - blk_offset; 661 ++numSplitReqs; 662 } 663 664 // Create a request and the packet containing request 665 auto req = std::make_shared<Request>( 666 node_ptr->physAddr, node_ptr->size, 667 node_ptr->flags, masterID, node_ptr->seqNum, 668 ContextID(0)); 669 670 req->setPC(node_ptr->pc); 671 // If virtual address is valid, set the asid and virtual address fields 672 // of the request. 673 if (node_ptr->virtAddr != 0) { 674 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size, 675 node_ptr->flags, masterID, node_ptr->pc); 676 req->setPaddr(node_ptr->physAddr); 677 req->setReqInstSeqNum(node_ptr->seqNum); 678 } 679 680 PacketPtr pkt; 681 uint8_t* pkt_data = new uint8_t[req->getSize()]; 682 if (node_ptr->isLoad()) { 683 pkt = Packet::createRead(req); 684 } else { 685 pkt = Packet::createWrite(req); 686 memset(pkt_data, 0xA, req->getSize()); 687 } 688 pkt->dataDynamic(pkt_data); 689 690 // Call MasterPort method to send a timing request for this packet 691 bool success = port.sendTimingReq(pkt); 692 ++numSendAttempted; 693 694 if (!success) { 695 // If it fails, return the packet to retry when a retry is signalled by 696 // the cache 697 ++numSendFailed; 698 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); 699 return pkt; 700 } else { 701 // It is succeeds, return nullptr 702 ++numSendSucceeded; 703 return nullptr; 704 } 705} 706 707bool 708TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) 709{ 710 // Assert the node is dependency-free 711 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); 712 713 // If this is the first attempt, print a debug message to indicate this. 714 if (first) { 715 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" 716 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), 717 node_ptr->robNum); 718 } 719 720 // Check if resources are available to issue the specific node 721 if (hwResource.isAvailable(node_ptr)) { 722 // If resources are free only then add to readyList 723 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" 724 " to readyList, occupying resources.\n", node_ptr->seqNum); 725 // Compute the execute tick by adding the compute delay for the node 726 // and add the ready node to the ready list 727 addToSortedReadyList(node_ptr->seqNum, 728 owner.clockEdge() + node_ptr->compDelay); 729 // Account for the resources taken up by this issued node. 730 hwResource.occupy(node_ptr); 731 return true; 732 733 } else { 734 if (first) { 735 // Although dependencies are complete, resources are not available. 736 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." 737 " Adding to depFreeQueue.\n", node_ptr->seqNum); 738 depFreeQueue.push(node_ptr); 739 } else { 740 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " 741 "Still pending issue.\n", node_ptr->seqNum); 742 } 743 return false; 744 } 745} 746 747void 748TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) 749{ 750 // Release the resources for this completed node. 751 if (pkt->isWrite()) { 752 // Consider store complete. 753 hwResource.releaseStoreBuffer(); 754 // If it is a store response then do nothing since we do not model 755 // dependencies on store completion in the trace. But if we were 756 // blocking execution due to store buffer fullness, we need to schedule 757 // an event and attempt to progress. 758 } else { 759 // If it is a load response then release the dependents waiting on it. 760 // Get pointer to the completed load 761 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); 762 assert(graph_itr != depGraph.end()); 763 GraphNode* node_ptr = graph_itr->second; 764 765 // Release resources occupied by the load 766 hwResource.release(node_ptr); 767 768 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" 769 " dependents..\n", node_ptr->seqNum); 770 771 for (auto child : node_ptr->dependents) { 772 if (child->removeDepOnInst(node_ptr->seqNum)) { 773 checkAndIssue(child); 774 } 775 } 776 777 // clear the dynamically allocated set of dependents 778 (node_ptr->dependents).clear(); 779 // Update the stat for numOps completed 780 owner.updateNumOps(node_ptr->robNum); 781 // delete node 782 delete node_ptr; 783 // remove from graph 784 depGraph.erase(graph_itr); 785 } 786 787 if (DTRACE(TraceCPUData)) { 788 printReadyList(); 789 } 790 791 // If the size of the dependency graph is less than the dependency window 792 // then read from the trace file to populate the graph next time we are in 793 // execute. 794 if (depGraph.size() < windowSize && !traceComplete) 795 nextRead = true; 796 797 // If not waiting for retry, attempt to schedule next event 798 if (!retryPkt) { 799 // We might have new dep-free nodes in the list which will have execute 800 // tick greater than or equal to curTick. But a new dep-free node might 801 // have its execute tick earlier. Therefore, attempt to reschedule. It 802 // could happen that the readyList is empty and we got here via a 803 // last remaining response. So, either the trace is complete or there 804 // are pending nodes in the depFreeQueue. The checking is done in the 805 // execute() control flow, so schedule an event to go via that flow. 806 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : 807 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); 808 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", 809 next_event_tick); 810 owner.schedDcacheNextEvent(next_event_tick); 811 } 812} 813 814void 815TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, 816 Tick exec_tick) 817{ 818 ReadyNode ready_node; 819 ready_node.seqNum = seq_num; 820 ready_node.execTick = exec_tick; 821 822 // Iterator to readyList 823 auto itr = readyList.begin(); 824 825 // If the readyList is empty, simply insert the new node at the beginning 826 // and return 827 if (itr == readyList.end()) { 828 readyList.insert(itr, ready_node); 829 maxReadyListSize = std::max<double>(readyList.size(), 830 maxReadyListSize.value()); 831 return; 832 } 833 834 // If the new node has its execution tick equal to the first node in the 835 // list then go to the next node. If the first node in the list failed 836 // to execute, its position as the first is thus maintained. 837 if (retryPkt) 838 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) 839 itr++; 840 841 // Increment the iterator and compare the node pointed to by it to the new 842 // node till the position to insert the new node is found. 843 bool found = false; 844 while (!found && itr != readyList.end()) { 845 // If the execution tick of the new node is less than the node then 846 // this is the position to insert 847 if (exec_tick < itr->execTick) 848 found = true; 849 // If the execution tick of the new node is equal to the node then 850 // sort in ascending order of sequence numbers 851 else if (exec_tick == itr->execTick) { 852 // If the sequence number of the new node is less than the node 853 // then this is the position to insert 854 if (seq_num < itr->seqNum) 855 found = true; 856 // Else go to next node 857 else 858 itr++; 859 } 860 // If the execution tick of the new node is greater than the node then 861 // go to the next node 862 else 863 itr++; 864 } 865 readyList.insert(itr, ready_node); 866 // Update the stat for max size reached of the readyList 867 maxReadyListSize = std::max<double>(readyList.size(), 868 maxReadyListSize.value()); 869} 870 871void 872TraceCPU::ElasticDataGen::printReadyList() { 873 874 auto itr = readyList.begin(); 875 if (itr == readyList.end()) { 876 DPRINTF(TraceCPUData, "readyList is empty.\n"); 877 return; 878 } 879 DPRINTF(TraceCPUData, "Printing readyList:\n"); 880 while (itr != readyList.end()) { 881 auto graph_itr = depGraph.find(itr->seqNum); 882 GraphNode* node_ptr M5_VAR_USED = graph_itr->second; 883 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, 884 node_ptr->typeToStr(), itr->execTick); 885 itr++; 886 } 887} 888 889TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( 890 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) 891 : sizeROB(max_rob), 892 sizeStoreBuffer(max_stores), 893 sizeLoadBuffer(max_loads), 894 oldestInFlightRobNum(UINT64_MAX), 895 numInFlightLoads(0), 896 numInFlightStores(0) 897{} 898 899void 900TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) 901{ 902 // Occupy ROB entry for the issued node 903 // Merely maintain the oldest node, i.e. numerically least robNum by saving 904 // it in the variable oldestInFLightRobNum. 905 inFlightNodes[new_node->seqNum] = new_node->robNum; 906 oldestInFlightRobNum = inFlightNodes.begin()->second; 907 908 // Occupy Load/Store Buffer entry for the issued node if applicable 909 if (new_node->isLoad()) { 910 ++numInFlightLoads; 911 } else if (new_node->isStore()) { 912 ++numInFlightStores; 913 } // else if it is a non load/store node, no buffer entry is occupied 914 915 printOccupancy(); 916} 917 918void 919TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) 920{ 921 assert(!inFlightNodes.empty()); 922 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", 923 done_node->seqNum); 924 925 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); 926 inFlightNodes.erase(done_node->seqNum); 927 928 if (inFlightNodes.empty()) { 929 // If we delete the only in-flight node and then the 930 // oldestInFlightRobNum is set to it's initialized (max) value. 931 oldestInFlightRobNum = UINT64_MAX; 932 } else { 933 // Set the oldest in-flight node rob number equal to the first node in 934 // the inFlightNodes since that will have the numerically least value. 935 oldestInFlightRobNum = inFlightNodes.begin()->second; 936 } 937 938 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " 939 "oldestInFlightRobNum = %d\n", inFlightNodes.size(), 940 oldestInFlightRobNum); 941 942 // A store is considered complete when a request is sent, thus ROB entry is 943 // freed. But it occupies an entry in the Store Buffer until its response 944 // is received. A load is considered complete when a response is received, 945 // thus both ROB and Load Buffer entries can be released. 946 if (done_node->isLoad()) { 947 assert(numInFlightLoads != 0); 948 --numInFlightLoads; 949 } 950 // For normal writes, we send the requests out and clear a store buffer 951 // entry on response. For writes which are strictly ordered, for e.g. 952 // writes to device registers, we do that within release() which is called 953 // when node is executed and taken off from readyList. 954 if (done_node->isStore() && done_node->isStrictlyOrdered()) { 955 releaseStoreBuffer(); 956 } 957} 958 959void 960TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() 961{ 962 assert(numInFlightStores != 0); 963 --numInFlightStores; 964} 965 966bool 967TraceCPU::ElasticDataGen::HardwareResource::isAvailable( 968 const GraphNode* new_node) const 969{ 970 uint16_t num_in_flight_nodes; 971 if (inFlightNodes.empty()) { 972 num_in_flight_nodes = 0; 973 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 974 " #in-flight nodes = 0", new_node->seqNum); 975 } else if (new_node->robNum > oldestInFlightRobNum) { 976 // This is the intuitive case where new dep-free node is younger 977 // instruction than the oldest instruction in-flight. Thus we make sure 978 // in_flight_nodes does not overflow. 979 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; 980 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 981 " #in-flight nodes = %d - %d = %d", new_node->seqNum, 982 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); 983 } else { 984 // This is the case where an instruction older than the oldest in- 985 // flight instruction becomes dep-free. Thus we must have already 986 // accounted for the entry in ROB for this new dep-free node. 987 // Immediately after this check returns true, oldestInFlightRobNum will 988 // be updated in occupy(). We simply let this node issue now. 989 num_in_flight_nodes = 0; 990 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" 991 " new oldestInFlightRobNum = %d, #in-flight nodes ignored", 992 new_node->seqNum, new_node->robNum); 993 } 994 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", 995 numInFlightLoads, sizeLoadBuffer, 996 numInFlightStores, sizeStoreBuffer); 997 // Check if resources are available to issue the specific node 998 if (num_in_flight_nodes >= sizeROB) { 999 return false; 1000 } 1001 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { 1002 return false; 1003 } 1004 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { 1005 return false; 1006 } 1007 return true; 1008} 1009 1010bool 1011TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { 1012 // Return true if there is at least one read or write request in flight 1013 return (numInFlightStores != 0 || numInFlightLoads != 0); 1014} 1015 1016void 1017TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { 1018 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " 1019 "LQ = %d/%d, SQ = %d/%d.\n", 1020 oldestInFlightRobNum, 1021 numInFlightLoads, sizeLoadBuffer, 1022 numInFlightStores, sizeStoreBuffer); 1023} 1024 1025void 1026TraceCPU::FixedRetryGen::regStats() 1027{ 1028 using namespace Stats; 1029 1030 numSendAttempted 1031 .name(name() + ".numSendAttempted") 1032 .desc("Number of first attempts to send a request") 1033 ; 1034 1035 numSendSucceeded 1036 .name(name() + ".numSendSucceeded") 1037 .desc("Number of successful first attempts") 1038 ; 1039 1040 numSendFailed 1041 .name(name() + ".numSendFailed") 1042 .desc("Number of failed first attempts") 1043 ; 1044 1045 numRetrySucceeded 1046 .name(name() + ".numRetrySucceeded") 1047 .desc("Number of successful retries") 1048 ; 1049 1050 instLastTick 1051 .name(name() + ".instLastTick") 1052 .desc("Last tick simulated from the fixed inst trace") 1053 ; 1054} 1055 1056Tick 1057TraceCPU::FixedRetryGen::init() 1058{ 1059 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" 1060 " IcacheGen: fixed issue with retry.\n"); 1061 1062 if (nextExecute()) { 1063 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); 1064 return currElement.tick; 1065 } else { 1066 panic("Read of first message in the trace failed.\n"); 1067 return MaxTick; 1068 } 1069} 1070 1071bool 1072TraceCPU::FixedRetryGen::tryNext() 1073{ 1074 // If there is a retry packet, try to send it 1075 if (retryPkt) { 1076 1077 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); 1078 1079 if (!port.sendTimingReq(retryPkt)) { 1080 // Still blocked! This should never occur. 1081 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); 1082 return false; 1083 } 1084 ++numRetrySucceeded; 1085 } else { 1086 1087 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); 1088 1089 // try sending current element 1090 assert(currElement.isValid()); 1091 1092 ++numSendAttempted; 1093 1094 if (!send(currElement.addr, currElement.blocksize, 1095 currElement.cmd, currElement.flags, currElement.pc)) { 1096 DPRINTF(TraceCPUInst, "currElement sending failed.\n"); 1097 ++numSendFailed; 1098 // return false to indicate not to schedule next event 1099 return false; 1100 } else { 1101 ++numSendSucceeded; 1102 } 1103 } 1104 // If packet was sent successfully, either retryPkt or currElement, return 1105 // true to indicate to schedule event at current Tick plus delta. If packet 1106 // was sent successfully and there is no next packet to send, return false. 1107 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " 1108 "element.\n"); 1109 retryPkt = nullptr; 1110 // Read next element into currElement, currElement gets cleared so save the 1111 // tick to calculate delta 1112 Tick last_tick = currElement.tick; 1113 if (nextExecute()) { 1114 assert(currElement.tick >= last_tick); 1115 delta = currElement.tick - last_tick; 1116 } 1117 return !traceComplete; 1118} 1119 1120void 1121TraceCPU::FixedRetryGen::exit() 1122{ 1123 trace.reset(); 1124} 1125 1126bool 1127TraceCPU::FixedRetryGen::nextExecute() 1128{ 1129 if (traceComplete) 1130 // We are at the end of the file, thus we have no more messages. 1131 // Return false. 1132 return false; 1133 1134 1135 //Reset the currElement to the default values 1136 currElement.clear(); 1137 1138 // Read the next line to get the next message. If that fails then end of 1139 // trace has been reached and traceComplete needs to be set in addition 1140 // to returning false. If successful then next message is in currElement. 1141 if (!trace.read(&currElement)) { 1142 traceComplete = true; 1143 instLastTick = curTick(); 1144 return false; 1145 } 1146 1147 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", 1148 currElement.cmd.isRead() ? 'r' : 'w', 1149 currElement.addr, 1150 currElement.pc, 1151 currElement.blocksize, 1152 currElement.tick); 1153 1154 return true; 1155} 1156 1157bool 1158TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, 1159 Request::FlagsType flags, Addr pc) 1160{ 1161 1162 // Create new request 1163 auto req = std::make_shared<Request>(addr, size, flags, masterID); 1164 req->setPC(pc); 1165 1166 // If this is not done it triggers assert in L1 cache for invalid contextId 1167 req->setContext(ContextID(0)); 1168 1169 // Embed it in a packet 1170 PacketPtr pkt = new Packet(req, cmd); 1171 1172 uint8_t* pkt_data = new uint8_t[req->getSize()]; 1173 pkt->dataDynamic(pkt_data); 1174 1175 if (cmd.isWrite()) { 1176 memset(pkt_data, 0xA, req->getSize()); 1177 } 1178 1179 // Call MasterPort method to send a timing request for this packet 1180 bool success = port.sendTimingReq(pkt); 1181 if (!success) { 1182 // If it fails, save the packet to retry when a retry is signalled by 1183 // the cache 1184 retryPkt = pkt; 1185 } 1186 return success; 1187} 1188 1189void 1190TraceCPU::icacheRetryRecvd() 1191{ 1192 // Schedule an event to go through the control flow in the same tick as 1193 // retry is received 1194 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" 1195 " event @%lli.\n", curTick()); 1196 schedule(icacheNextEvent, curTick()); 1197} 1198 1199void 1200TraceCPU::dcacheRetryRecvd() 1201{ 1202 // Schedule an event to go through the execute flow in the same tick as 1203 // retry is received 1204 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" 1205 " event @%lli.\n", curTick()); 1206 schedule(dcacheNextEvent, curTick()); 1207} 1208 1209void 1210TraceCPU::schedDcacheNextEvent(Tick when) 1211{ 1212 if (!dcacheNextEvent.scheduled()) { 1213 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", 1214 when); 1215 schedule(dcacheNextEvent, when); 1216 ++numSchedDcacheEvent; 1217 } else if (when < dcacheNextEvent.when()) { 1218 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" 1219 " to %lli.\n", dcacheNextEvent.when(), when); 1220 reschedule(dcacheNextEvent, when); 1221 } 1222 1223} 1224 1225bool 1226TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) 1227{ 1228 // All responses on the instruction fetch side are ignored. Simply delete 1229 // the packet to free allocated memory 1230 delete pkt; 1231 1232 return true; 1233} 1234 1235void 1236TraceCPU::IcachePort::recvReqRetry() 1237{ 1238 owner->icacheRetryRecvd(); 1239} 1240 1241void 1242TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) 1243{ 1244 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); 1245 dcacheGen.completeMemAccess(pkt); 1246} 1247 1248bool 1249TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) 1250{ 1251 // Handle the responses for data memory requests which is done inside the 1252 // elastic data generator 1253 owner->dcacheRecvTimingResp(pkt); 1254 // After processing the response delete the packet to free 1255 // memory 1256 delete pkt; 1257 1258 return true; 1259} 1260 1261void 1262TraceCPU::DcachePort::recvReqRetry() 1263{ 1264 owner->dcacheRetryRecvd(); 1265} 1266 1267TraceCPU::ElasticDataGen::InputStream::InputStream( 1268 const std::string& filename, 1269 const double time_multiplier) 1270 : trace(filename), 1271 timeMultiplier(time_multiplier), 1272 microOpCount(0) 1273{ 1274 // Create a protobuf message for the header and read it from the stream 1275 ProtoMessage::InstDepRecordHeader header_msg; 1276 if (!trace.read(header_msg)) { 1277 panic("Failed to read packet header from %s\n", filename); 1278 1279 if (header_msg.tick_freq() != SimClock::Frequency) { 1280 panic("Trace %s was recorded with a different tick frequency %d\n", 1281 header_msg.tick_freq()); 1282 } 1283 } else { 1284 // Assign window size equal to the field in the trace that was recorded 1285 // when the data dependency trace was captured in the o3cpu model 1286 windowSize = header_msg.window_size(); 1287 } 1288} 1289 1290void 1291TraceCPU::ElasticDataGen::InputStream::reset() 1292{ 1293 trace.reset(); 1294} 1295 1296bool 1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) 1298{ 1299 ProtoMessage::InstDepRecord pkt_msg; 1300 if (trace.read(pkt_msg)) { 1301 // Required fields 1302 element->seqNum = pkt_msg.seq_num(); 1303 element->type = pkt_msg.type(); 1304 // Scale the compute delay to effectively scale the Trace CPU frequency 1305 element->compDelay = pkt_msg.comp_delay() * timeMultiplier; 1306 1307 // Repeated field robDepList 1308 element->clearRobDep(); 1309 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); 1310 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { 1311 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); 1312 element->numRobDep += 1; 1313 } 1314 1315 // Repeated field 1316 element->clearRegDep(); 1317 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); 1318 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { 1319 // There is a possibility that an instruction has both, a register 1320 // and order dependency on an instruction. In such a case, the 1321 // register dependency is omitted 1322 bool duplicate = false; 1323 for (int j = 0; j < element->numRobDep; j++) { 1324 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); 1325 } 1326 if (!duplicate) { 1327 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); 1328 element->numRegDep += 1; 1329 } 1330 } 1331 1332 // Optional fields 1333 if (pkt_msg.has_p_addr()) 1334 element->physAddr = pkt_msg.p_addr(); 1335 else 1336 element->physAddr = 0; 1337 1338 if (pkt_msg.has_v_addr()) 1339 element->virtAddr = pkt_msg.v_addr(); 1340 else 1341 element->virtAddr = 0; 1342 1343 if (pkt_msg.has_asid()) 1344 element->asid = pkt_msg.asid(); 1345 else 1346 element->asid = 0; 1347 1348 if (pkt_msg.has_size()) 1349 element->size = pkt_msg.size(); 1350 else 1351 element->size = 0; 1352 1353 if (pkt_msg.has_flags()) 1354 element->flags = pkt_msg.flags(); 1355 else 1356 element->flags = 0; 1357 1358 if (pkt_msg.has_pc()) 1359 element->pc = pkt_msg.pc(); 1360 else 1361 element->pc = 0; 1362 1363 // ROB occupancy number 1364 ++microOpCount; 1365 if (pkt_msg.has_weight()) { 1366 microOpCount += pkt_msg.weight(); 1367 } 1368 element->robNum = microOpCount; 1369 return true; 1370 } 1371 1372 // We have reached the end of the file 1373 return false; 1374} 1375 1376bool 1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) 1378{ 1379 for (auto& own_reg_dep : regDep) { 1380 if (own_reg_dep == reg_dep) { 1381 // If register dependency is found, make it zero and return true 1382 own_reg_dep = 0; 1383 assert(numRegDep > 0); 1384 --numRegDep; 1385 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " 1386 "done.\n", seqNum, reg_dep); 1387 return true; 1388 } 1389 } 1390 1391 // Return false if the dependency is not found 1392 return false; 1393} 1394 1395bool 1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) 1397{ 1398 for (auto& own_rob_dep : robDep) { 1399 if (own_rob_dep == rob_dep) { 1400 // If the rob dependency is found, make it zero and return true 1401 own_rob_dep = 0; 1402 assert(numRobDep > 0); 1403 --numRobDep; 1404 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " 1405 "done.\n", seqNum, rob_dep); 1406 return true; 1407 } 1408 } 1409 return false; 1410} 1411 1412void 1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { 1414 for (auto& own_reg_dep : regDep) { 1415 own_reg_dep = 0; 1416 } 1417 numRegDep = 0; 1418} 1419 1420void 1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { 1422 for (auto& own_rob_dep : robDep) { 1423 own_rob_dep = 0; 1424 } 1425 numRobDep = 0; 1426} 1427 1428bool 1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) 1430{ 1431 // If it is an rob dependency then remove it 1432 if (!removeRobDep(done_seq_num)) { 1433 // If it is not an rob dependency then it must be a register dependency 1434 // If the register dependency is not found, it violates an assumption 1435 // and must be caught by assert. 1436 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); 1437 assert(regdep_found); 1438 } 1439 // Return true if the node is dependency free 1440 return (numRobDep == 0 && numRegDep == 0); 1441} 1442 1443void 1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const 1445{ 1446 DPRINTFR(TraceCPUData, "%lli", seqNum); 1447 DPRINTFR(TraceCPUData, ",%s", typeToStr()); 1448 if (isLoad() || isStore()) { 1449 DPRINTFR(TraceCPUData, ",%i", physAddr); 1450 DPRINTFR(TraceCPUData, ",%i", size); 1451 DPRINTFR(TraceCPUData, ",%i", flags); 1452 } 1453 DPRINTFR(TraceCPUData, ",%lli", compDelay); 1454 int i = 0; 1455 DPRINTFR(TraceCPUData, "robDep:"); 1456 while (robDep[i] != 0) { 1457 DPRINTFR(TraceCPUData, ",%lli", robDep[i]); 1458 i++; 1459 } 1460 i = 0; 1461 DPRINTFR(TraceCPUData, "regDep:"); 1462 while (regDep[i] != 0) { 1463 DPRINTFR(TraceCPUData, ",%lli", regDep[i]); 1464 i++; 1465 } 1466 auto child_itr = dependents.begin(); 1467 DPRINTFR(TraceCPUData, "dependents:"); 1468 while (child_itr != dependents.end()) { 1469 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); 1470 child_itr++; 1471 } 1472 1473 DPRINTFR(TraceCPUData, "\n"); 1474} 1475 1476std::string 1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const 1478{ 1479 return Record::RecordType_Name(type); 1480} 1481 1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) 1483 : trace(filename) 1484{ 1485 // Create a protobuf message for the header and read it from the stream 1486 ProtoMessage::PacketHeader header_msg; 1487 if (!trace.read(header_msg)) { 1488 panic("Failed to read packet header from %s\n", filename); 1489 1490 if (header_msg.tick_freq() != SimClock::Frequency) { 1491 panic("Trace %s was recorded with a different tick frequency %d\n", 1492 header_msg.tick_freq()); 1493 } 1494 } 1495} 1496 1497void 1498TraceCPU::FixedRetryGen::InputStream::reset() 1499{ 1500 trace.reset(); 1501} 1502 1503bool 1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) 1505{ 1506 ProtoMessage::Packet pkt_msg; 1507 if (trace.read(pkt_msg)) { 1508 element->cmd = pkt_msg.cmd(); 1509 element->addr = pkt_msg.addr(); 1510 element->blocksize = pkt_msg.size(); 1511 element->tick = pkt_msg.tick(); 1512 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; 1513 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; 1514 return true; 1515 } 1516 1517 // We have reached the end of the file 1518 return false; 1519} 1520