elastic_trace.cc revision 12104
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/o3/probe/elastic_trace.hh" 43 44#include "base/callback.hh" 45#include "base/output.hh" 46#include "base/trace.hh" 47#include "cpu/reg_class.hh" 48#include "debug/ElasticTrace.hh" 49#include "mem/packet.hh" 50 51ElasticTrace::ElasticTrace(const ElasticTraceParams* params) 52 : ProbeListenerObject(params), 53 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()), 54 firstWin(true), 55 lastClearedSeqNum(0), 56 depWindowSize(params->depWindowSize), 57 dataTraceStream(nullptr), 58 instTraceStream(nullptr), 59 startTraceInst(params->startTraceInst), 60 allProbesReg(false), 61 traceVirtAddr(params->traceVirtAddr) 62{ 63 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager); 64 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\ 65 "support dependency tracing.\n", name()); 66 67 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\ 68 "Recommended size is 3x ROB size in the O3CPU.\n"); 69 70 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\ 71 "single-threaded workload only", cpu->numThreads, name()); 72 // Initialize the protobuf output stream 73 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\ 74 "trace file path to instFetchTraceFile"); 75 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\ 76 "trace file path to dataDepTraceFile"); 77 std::string filename = simout.resolve(name() + "." + 78 params->instFetchTraceFile); 79 instTraceStream = new ProtoOutputStream(filename); 80 filename = simout.resolve(name() + "." + params->dataDepTraceFile); 81 dataTraceStream = new ProtoOutputStream(filename); 82 // Create a protobuf message for the header and write it to the stream 83 ProtoMessage::PacketHeader inst_pkt_header; 84 inst_pkt_header.set_obj_id(name()); 85 inst_pkt_header.set_tick_freq(SimClock::Frequency); 86 instTraceStream->write(inst_pkt_header); 87 // Create a protobuf message for the header and write it to 88 // the stream 89 ProtoMessage::InstDepRecordHeader data_rec_header; 90 data_rec_header.set_obj_id(name()); 91 data_rec_header.set_tick_freq(SimClock::Frequency); 92 data_rec_header.set_window_size(depWindowSize); 93 dataTraceStream->write(data_rec_header); 94 // Register a callback to flush trace records and close the output streams. 95 Callback* cb = new MakeCallback<ElasticTrace, 96 &ElasticTrace::flushTraces>(this); 97 registerExitCallback(cb); 98} 99 100void 101ElasticTrace::regProbeListeners() 102{ 103 inform("@%llu: regProbeListeners() called, startTraceInst = %llu", 104 curTick(), startTraceInst); 105 if (startTraceInst == 0) { 106 // If we want to start tracing from the start of the simulation, 107 // register all elastic trace probes now. 108 regEtraceListeners(); 109 } else { 110 // Schedule an event to register all elastic trace probes when 111 // specified no. of instructions are committed. 112 cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent, 113 startTraceInst); 114 } 115} 116 117void 118ElasticTrace::regEtraceListeners() 119{ 120 assert(!allProbesReg); 121 inform("@%llu: No. of instructions committed = %llu, registering elastic" 122 " probe listeners", curTick(), cpu->numSimulatedInsts()); 123 // Create new listeners: provide method to be called upon a notify() for 124 // each probe point. 125 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this, 126 "FetchRequest", &ElasticTrace::fetchReqTrace)); 127 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 128 "Execute", &ElasticTrace::recordExecTick)); 129 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 130 "ToCommit", &ElasticTrace::recordToCommTick)); 131 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 132 "Rename", &ElasticTrace::updateRegDep)); 133 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this, 134 "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); 135 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 136 "Squash", &ElasticTrace::addSquashedInst)); 137 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 138 "Commit", &ElasticTrace::addCommittedInst)); 139 allProbesReg = true; 140} 141 142void 143ElasticTrace::fetchReqTrace(const RequestPtr &req) 144{ 145 146 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n", 147 (MemCmd::ReadReq), 148 req->getPC(), req->getVaddr(), req->getPaddr(), 149 req->getFlags(), req->getSize(), curTick()); 150 151 // Create a protobuf message including the request fields necessary to 152 // recreate the request in the TraceCPU. 153 ProtoMessage::Packet inst_fetch_pkt; 154 inst_fetch_pkt.set_tick(curTick()); 155 inst_fetch_pkt.set_cmd(MemCmd::ReadReq); 156 inst_fetch_pkt.set_pc(req->getPC()); 157 inst_fetch_pkt.set_flags(req->getFlags()); 158 inst_fetch_pkt.set_addr(req->getPaddr()); 159 inst_fetch_pkt.set_size(req->getSize()); 160 // Write the message to the stream. 161 instTraceStream->write(inst_fetch_pkt); 162} 163 164void 165ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst) 166{ 167 168 // In a corner case, a retired instruction is propagated backward to the 169 // IEW instruction queue to handle some side-channel information. But we 170 // must not process an instruction again. So we test the sequence number 171 // against the lastClearedSeqNum and skip adding the instruction for such 172 // corner cases. 173 if (dyn_inst->seqNum <= lastClearedSeqNum) { 174 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \ 175 has already retired (mostly squashed)", dyn_inst->seqNum); 176 // Do nothing as program has proceeded and this inst has been 177 // propagated backwards to handle something. 178 return; 179 } 180 181 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum, 182 curTick()); 183 // Either the execution info object will already exist if this 184 // instruction had a register dependency recorded in the rename probe 185 // listener before entering execute stage or it will not exist and will 186 // need to be created here. 187 InstExecInfo* exec_info_ptr; 188 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 189 if (itr_exec_info != tempStore.end()) { 190 exec_info_ptr = itr_exec_info->second; 191 } else { 192 exec_info_ptr = new InstExecInfo; 193 tempStore[dyn_inst->seqNum] = exec_info_ptr; 194 } 195 196 exec_info_ptr->executeTick = curTick(); 197 maxTempStoreSize = std::max(tempStore.size(), 198 (std::size_t)maxTempStoreSize.value()); 199} 200 201void 202ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst) 203{ 204 // If tracing has just been enabled then the instruction at this stage of 205 // execution is far enough that we cannot gather info about its past like 206 // the tick it started execution. Simply return until we see an instruction 207 // that is found in the tempStore. 208 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 209 if (itr_exec_info == tempStore.end()) { 210 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store," 211 " skipping.\n", dyn_inst->seqNum); 212 return; 213 } 214 215 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum, 216 curTick()); 217 InstExecInfo* exec_info_ptr = itr_exec_info->second; 218 exec_info_ptr->toCommitTick = curTick(); 219 220} 221 222void 223ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst) 224{ 225 // Get the sequence number of the instruction 226 InstSeqNum seq_num = dyn_inst->seqNum; 227 228 assert(dyn_inst->seqNum > lastClearedSeqNum); 229 230 // Since this is the first probe activated in the pipeline, create 231 // a new execution info object to track this instruction as it 232 // progresses through the pipeline. 233 InstExecInfo* exec_info_ptr = new InstExecInfo; 234 tempStore[seq_num] = exec_info_ptr; 235 236 // Loop through the source registers and look up the dependency map. If 237 // the source register entry is found in the dependency map, add a 238 // dependency on the last writer. 239 int8_t max_regs = dyn_inst->numSrcRegs(); 240 for (int src_idx = 0; src_idx < max_regs; src_idx++) { 241 // Get the physical register index of the i'th source register. 242 PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx); 243 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num, 244 src_reg); 245 auto itr_last_writer = physRegDepMap.find(src_reg); 246 if (itr_last_writer != physRegDepMap.end()) { 247 InstSeqNum last_writer = itr_last_writer->second; 248 // Additionally the dependency distance is kept less than the window 249 // size parameter to limit the memory allocation to nodes in the 250 // graph. If the window were tending to infinite we would have to 251 // load a large number of node objects during replay. 252 if (seq_num - last_writer < depWindowSize) { 253 // Record a physical register dependency. 254 exec_info_ptr->physRegDepSet.insert(last_writer); 255 } 256 } 257 } 258 259 // Loop through the destination registers of this instruction and update 260 // the physical register dependency map for last writers to registers. 261 max_regs = dyn_inst->numDestRegs(); 262 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) { 263 // For data dependency tracking the register must be an int, float or 264 // CC register and not a Misc register. 265 RegId dest_reg = dyn_inst->destRegIdx(dest_idx); 266 if (dest_reg.isRenameable() && 267 !dest_reg.isZeroReg()) { 268 // Get the physical register index of the i'th destination 269 // register. 270 PhysRegIndex phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx); 271 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n", 272 seq_num, dest_reg.regIdx); 273 physRegDepMap[phys_dest_reg] = seq_num; 274 } 275 } 276 maxPhysRegDepMapSize = std::max(physRegDepMap.size(), 277 (std::size_t)maxPhysRegDepMapSize.value()); 278} 279 280void 281ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) 282{ 283 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n", 284 inst_reg_pair.second); 285 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second); 286 if (itr_regdep_map != physRegDepMap.end()) 287 physRegDepMap.erase(itr_regdep_map); 288} 289 290void 291ElasticTrace::addSquashedInst(const DynInstPtr &head_inst) 292{ 293 // If the squashed instruction was squashed before being processed by 294 // execute stage then it will not be in the temporary store. In this case 295 // do nothing and return. 296 auto itr_exec_info = tempStore.find(head_inst->seqNum); 297 if (itr_exec_info == tempStore.end()) 298 return; 299 300 // If there is a squashed load for which a read request was 301 // sent before it got squashed then add it to the trace. 302 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n", 303 head_inst->seqNum); 304 // Get pointer to the execution info object corresponding to the inst. 305 InstExecInfo* exec_info_ptr = itr_exec_info->second; 306 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick && 307 exec_info_ptr->toCommitTick != MaxTick && 308 head_inst->hasRequest() && 309 head_inst->getFault() == NoFault) { 310 // Add record to depTrace with commit parameter as false. 311 addDepTraceRecord(head_inst, exec_info_ptr, false); 312 } 313 // As the information contained is no longer needed, remove the execution 314 // info object from the temporary store. 315 clearTempStoreUntil(head_inst); 316} 317 318void 319ElasticTrace::addCommittedInst(const DynInstPtr &head_inst) 320{ 321 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", 322 head_inst->seqNum); 323 324 // Add the instruction to the depTrace. 325 if (!head_inst->isNop()) { 326 327 // If tracing has just been enabled then the instruction at this stage 328 // of execution is far enough that we cannot gather info about its past 329 // like the tick it started execution. Simply return until we see an 330 // instruction that is found in the tempStore. 331 auto itr_temp_store = tempStore.find(head_inst->seqNum); 332 if (itr_temp_store == tempStore.end()) { 333 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp " 334 "store, skipping.\n", head_inst->seqNum); 335 return; 336 } 337 338 // Get pointer to the execution info object corresponding to the inst. 339 InstExecInfo* exec_info_ptr = itr_temp_store->second; 340 assert(exec_info_ptr->executeTick != MaxTick); 341 assert(exec_info_ptr->toCommitTick != MaxTick); 342 343 // Check if the instruction had a fault, if it predicated false and 344 // thus previous register values were restored or if it was a 345 // load/store that did not have a request (e.g. when the size of the 346 // request is zero). In all these cases the instruction is set as 347 // executed and is picked up by the commit probe listener. But a 348 // request is not issued and registers are not written. So practically, 349 // skipping these should not hurt as execution would not stall on them. 350 // Alternatively, these could be included merely as a compute node in 351 // the graph. Removing these for now. If correlation accuracy needs to 352 // be improved in future these can be turned into comp nodes at the 353 // cost of bigger traces. 354 if (head_inst->getFault() != NoFault) { 355 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so " 356 "skip adding it to the trace\n", 357 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 358 head_inst->seqNum); 359 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) { 360 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so " 361 "skip adding it to the trace\n", head_inst->seqNum); 362 } else if (!head_inst->readPredicate()) { 363 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so " 364 "skip adding it to the trace\n", 365 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 366 head_inst->seqNum); 367 } else { 368 // Add record to depTrace with commit parameter as true. 369 addDepTraceRecord(head_inst, exec_info_ptr, true); 370 } 371 } 372 // As the information contained is no longer needed, remove the execution 373 // info object from the temporary store. 374 clearTempStoreUntil(head_inst); 375} 376 377void 378ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst, 379 InstExecInfo* exec_info_ptr, bool commit) 380{ 381 // Create a record to assign dynamic intruction related fields. 382 TraceInfo* new_record = new TraceInfo; 383 // Add to map for sequence number look up to retrieve the TraceInfo pointer 384 traceInfoMap[head_inst->seqNum] = new_record; 385 386 // Assign fields from the instruction 387 new_record->instNum = head_inst->seqNum; 388 new_record->commit = commit; 389 new_record->type = head_inst->isLoad() ? Record::LOAD : 390 (head_inst->isStore() ? Record::STORE : 391 Record::COMP); 392 393 // Assign fields for creating a request in case of a load/store 394 new_record->reqFlags = head_inst->memReqFlags; 395 new_record->virtAddr = head_inst->effAddr; 396 new_record->asid = head_inst->asid; 397 new_record->physAddr = head_inst->physEffAddrLow; 398 // Currently the tracing does not support split requests. 399 new_record->size = head_inst->effSize; 400 new_record->pc = head_inst->instAddr(); 401 402 // Assign the timing information stored in the execution info object 403 new_record->executeTick = exec_info_ptr->executeTick; 404 new_record->toCommitTick = exec_info_ptr->toCommitTick; 405 new_record->commitTick = curTick(); 406 407 // Assign initial values for number of dependents and computational delay 408 new_record->numDepts = 0; 409 new_record->compDelay = -1; 410 411 // The physical register dependency set of the first instruction is 412 // empty. Since there are no records in the depTrace at this point, the 413 // case of adding an ROB dependency by using a reverse iterator is not 414 // applicable. Thus, populate the fields of the record corresponding to the 415 // first instruction and return. 416 if (depTrace.empty()) { 417 // Store the record in depTrace. 418 depTrace.push_back(new_record); 419 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n", 420 new_record->instNum); 421 return; 422 } 423 424 // Clear register dependencies for squashed loads as they may be dependent 425 // on squashed instructions and we do not add those to the trace. 426 if (head_inst->isLoad() && !commit) { 427 (exec_info_ptr->physRegDepSet).clear(); 428 } 429 430 // Assign the register dependencies stored in the execution info object 431 std::set<InstSeqNum>::const_iterator dep_set_it; 432 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin(); 433 dep_set_it != (exec_info_ptr->physRegDepSet).end(); 434 ++dep_set_it) { 435 auto trace_info_itr = traceInfoMap.find(*dep_set_it); 436 if (trace_info_itr != traceInfoMap.end()) { 437 // The register dependency is valid. Assign it and calculate 438 // computational delay 439 new_record->physRegDepList.push_back(*dep_set_it); 440 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 441 "%lli\n", new_record->instNum, *dep_set_it); 442 TraceInfo* reg_dep = trace_info_itr->second; 443 reg_dep->numDepts++; 444 compDelayPhysRegDep(reg_dep, new_record); 445 ++numRegDep; 446 } else { 447 // The instruction that this has a register dependency on was 448 // not added to the trace because of one of the following 449 // 1. it was an instruction that had a fault 450 // 2. it was an instruction that was predicated false and 451 // previous register values were restored 452 // 3. it was load/store that did not have a request (e.g. when 453 // the size of the request is zero but this may not be a fault) 454 // In all these cases the instruction is set as executed and is 455 // picked up by the commit probe listener. But a request is not 456 // issued and registers are not written to in these cases. 457 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 458 "%lli is skipped\n",new_record->instNum, *dep_set_it); 459 } 460 } 461 462 // Check for and assign an ROB dependency in addition to register 463 // dependency before adding the record to the trace. 464 // As stores have to commit in order a store is dependent on the last 465 // committed load/store. This is recorded in the ROB dependency. 466 if (head_inst->isStore()) { 467 // Look up store-after-store order dependency 468 updateCommitOrderDep(new_record, false); 469 // Look up store-after-load order dependency 470 updateCommitOrderDep(new_record, true); 471 } 472 473 // In case a node is dependency-free or its dependency got discarded 474 // because it was outside the window, it is marked ready in the ROB at the 475 // time of issue. A request is sent as soon as possible. To model this, a 476 // node is assigned an issue order dependency on a committed instruction 477 // that completed earlier than it. This is done to avoid the problem of 478 // determining the issue times of such dependency-free nodes during replay 479 // which could lead to too much parallelism, thinking conservatively. 480 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) { 481 updateIssueOrderDep(new_record); 482 } 483 484 // Store the record in depTrace. 485 depTrace.push_back(new_record); 486 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n", 487 (commit ? "committed" : "squashed"), new_record->instNum); 488 489 // To process the number of records specified by depWindowSize in the 490 // forward direction, the depTrace must have twice as many records 491 // to check for dependencies. 492 if (depTrace.size() == 2 * depWindowSize) { 493 494 DPRINTF(ElasticTrace, "Writing out trace...\n"); 495 496 // Write out the records which have been processed to the trace 497 // and remove them from the depTrace. 498 writeDepTrace(depWindowSize); 499 500 // After the first window, writeDepTrace() must check for valid 501 // compDelay. 502 firstWin = false; 503 } 504} 505 506void 507ElasticTrace::updateCommitOrderDep(TraceInfo* new_record, 508 bool find_load_not_store) 509{ 510 assert(new_record->isStore()); 511 // Iterate in reverse direction to search for the last committed 512 // load/store that completed earlier than the new record 513 depTraceRevItr from_itr(depTrace.end()); 514 depTraceRevItr until_itr(depTrace.begin()); 515 TraceInfo* past_record = *from_itr; 516 uint32_t num_go_back = 0; 517 518 // The execution time of this store is when it is sent, that is committed 519 Tick execute_tick = curTick(); 520 // Search for store-after-load or store-after-store order dependency 521 while (num_go_back < depWindowSize && from_itr != until_itr) { 522 if (find_load_not_store) { 523 // Check if previous inst is a load completed earlier by comparing 524 // with execute tick 525 if (hasLoadCompleted(past_record, execute_tick)) { 526 // Assign rob dependency and calculate the computational delay 527 assignRobDep(past_record, new_record); 528 ++numOrderDepStores; 529 return; 530 } 531 } else { 532 // Check if previous inst is a store sent earlier by comparing with 533 // execute tick 534 if (hasStoreCommitted(past_record, execute_tick)) { 535 // Assign rob dependency and calculate the computational delay 536 assignRobDep(past_record, new_record); 537 ++numOrderDepStores; 538 return; 539 } 540 } 541 ++from_itr; 542 past_record = *from_itr; 543 ++num_go_back; 544 } 545} 546 547void 548ElasticTrace::updateIssueOrderDep(TraceInfo* new_record) 549{ 550 // Interate in reverse direction to search for the last committed 551 // record that completed earlier than the new record 552 depTraceRevItr from_itr(depTrace.end()); 553 depTraceRevItr until_itr(depTrace.begin()); 554 TraceInfo* past_record = *from_itr; 555 556 uint32_t num_go_back = 0; 557 Tick execute_tick = 0; 558 559 if (new_record->isLoad()) { 560 // The execution time of a load is when a request is sent 561 execute_tick = new_record->executeTick; 562 ++numIssueOrderDepLoads; 563 } else if (new_record->isStore()) { 564 // The execution time of a store is when it is sent, i.e. committed 565 execute_tick = curTick(); 566 ++numIssueOrderDepStores; 567 } else { 568 // The execution time of a non load/store is when it completes 569 execute_tick = new_record->toCommitTick; 570 ++numIssueOrderDepOther; 571 } 572 573 // We search if this record has an issue order dependency on a past record. 574 // Once we find it, we update both the new record and the record it depends 575 // on and return. 576 while (num_go_back < depWindowSize && from_itr != until_itr) { 577 // Check if a previous inst is a load sent earlier, or a store sent 578 // earlier, or a comp inst completed earlier by comparing with execute 579 // tick 580 if (hasLoadBeenSent(past_record, execute_tick) || 581 hasStoreCommitted(past_record, execute_tick) || 582 hasCompCompleted(past_record, execute_tick)) { 583 // Assign rob dependency and calculate the computational delay 584 assignRobDep(past_record, new_record); 585 return; 586 } 587 ++from_itr; 588 past_record = *from_itr; 589 ++num_go_back; 590 } 591} 592 593void 594ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) { 595 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n", 596 new_record->typeToStr(), new_record->instNum, 597 past_record->instNum); 598 // Add dependency on past record 599 new_record->robDepList.push_back(past_record->instNum); 600 // Update new_record's compute delay with respect to the past record 601 compDelayRob(past_record, new_record); 602 // Increment number of dependents of the past record 603 ++(past_record->numDepts); 604 // Update stat to log max number of dependents 605 maxNumDependents = std::max(past_record->numDepts, 606 (uint32_t)maxNumDependents.value()); 607} 608 609bool 610ElasticTrace::hasStoreCommitted(TraceInfo* past_record, 611 Tick execute_tick) const 612{ 613 return (past_record->isStore() && past_record->commitTick <= execute_tick); 614} 615 616bool 617ElasticTrace::hasLoadCompleted(TraceInfo* past_record, 618 Tick execute_tick) const 619{ 620 return(past_record->isLoad() && past_record->commit && 621 past_record->toCommitTick <= execute_tick); 622} 623 624bool 625ElasticTrace::hasLoadBeenSent(TraceInfo* past_record, 626 Tick execute_tick) const 627{ 628 // Check if previous inst is a load sent earlier than this 629 return (past_record->isLoad() && past_record->commit && 630 past_record->executeTick <= execute_tick); 631} 632 633bool 634ElasticTrace::hasCompCompleted(TraceInfo* past_record, 635 Tick execute_tick) const 636{ 637 return(past_record->isComp() && past_record->toCommitTick <= execute_tick); 638} 639 640void 641ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst) 642{ 643 // Clear from temp store starting with the execution info object 644 // corresponding the head_inst and continue clearing by decrementing the 645 // sequence number until the last cleared sequence number. 646 InstSeqNum temp_sn = (head_inst->seqNum); 647 while (temp_sn > lastClearedSeqNum) { 648 auto itr_exec_info = tempStore.find(temp_sn); 649 if (itr_exec_info != tempStore.end()) { 650 InstExecInfo* exec_info_ptr = itr_exec_info->second; 651 // Free allocated memory for the info object 652 delete exec_info_ptr; 653 // Remove entry from temporary store 654 tempStore.erase(itr_exec_info); 655 } 656 temp_sn--; 657 } 658 // Update the last cleared sequence number to that of the head_inst 659 lastClearedSeqNum = head_inst->seqNum; 660} 661 662void 663ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record) 664{ 665 // The computation delay is the delay between the completion tick of the 666 // inst. pointed to by past_record and the execution tick of its dependent 667 // inst. pointed to by new_record. 668 int64_t comp_delay = -1; 669 Tick execution_tick = 0, completion_tick = 0; 670 671 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n", 672 new_record->instNum, past_record->instNum); 673 674 // Get the tick when the node is executed as per the modelling of 675 // computation delay 676 execution_tick = new_record->getExecuteTick(); 677 678 if (past_record->isLoad()) { 679 if (new_record->isStore()) { 680 completion_tick = past_record->toCommitTick; 681 } else { 682 completion_tick = past_record->executeTick; 683 } 684 } else if (past_record->isStore()) { 685 completion_tick = past_record->commitTick; 686 } else if (past_record->isComp()){ 687 completion_tick = past_record->toCommitTick; 688 } 689 assert(execution_tick >= completion_tick); 690 comp_delay = execution_tick - completion_tick; 691 692 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 693 execution_tick, completion_tick, comp_delay); 694 695 // Assign the computational delay with respect to the dependency which 696 // completes the latest. 697 if (new_record->compDelay == -1) 698 new_record->compDelay = comp_delay; 699 else 700 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 701 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 702 new_record->compDelay); 703} 704 705void 706ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record, 707 TraceInfo* new_record) 708{ 709 // The computation delay is the delay between the completion tick of the 710 // inst. pointed to by past_record and the execution tick of its dependent 711 // inst. pointed to by new_record. 712 int64_t comp_delay = -1; 713 Tick execution_tick = 0, completion_tick = 0; 714 715 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num" 716 " %lli.\n", new_record->instNum, past_record->instNum); 717 718 // Get the tick when the node is executed as per the modelling of 719 // computation delay 720 execution_tick = new_record->getExecuteTick(); 721 722 // When there is a physical register dependency on an instruction, the 723 // completion tick of that instruction is when it wrote to the register, 724 // that is toCommitTick. In case, of a store updating a destination 725 // register, this is approximated to commitTick instead 726 if (past_record->isStore()) { 727 completion_tick = past_record->commitTick; 728 } else { 729 completion_tick = past_record->toCommitTick; 730 } 731 assert(execution_tick >= completion_tick); 732 comp_delay = execution_tick - completion_tick; 733 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 734 execution_tick, completion_tick, comp_delay); 735 736 // Assign the computational delay with respect to the dependency which 737 // completes the latest. 738 if (new_record->compDelay == -1) 739 new_record->compDelay = comp_delay; 740 else 741 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 742 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 743 new_record->compDelay); 744} 745 746Tick 747ElasticTrace::TraceInfo::getExecuteTick() const 748{ 749 if (isLoad()) { 750 // Execution tick for a load instruction is when the request was sent, 751 // that is executeTick. 752 return executeTick; 753 } else if (isStore()) { 754 // Execution tick for a store instruction is when the request was sent, 755 // that is commitTick. 756 return commitTick; 757 } else { 758 // Execution tick for a non load/store instruction is when the register 759 // value was written to, that is commitTick. 760 return toCommitTick; 761 } 762} 763 764void 765ElasticTrace::writeDepTrace(uint32_t num_to_write) 766{ 767 // Write the trace with fields as follows: 768 // Instruction sequence number 769 // If instruction was a load 770 // If instruction was a store 771 // If instruction has addr 772 // If instruction has size 773 // If instruction has flags 774 // List of order dependencies - optional, repeated 775 // Computational delay with respect to last completed dependency 776 // List of physical register RAW dependencies - optional, repeated 777 // Weight of a node equal to no. of filtered nodes before it - optional 778 uint16_t num_filtered_nodes = 0; 779 depTraceItr dep_trace_itr(depTrace.begin()); 780 depTraceItr dep_trace_itr_start = dep_trace_itr; 781 while (num_to_write > 0) { 782 TraceInfo* temp_ptr = *dep_trace_itr; 783 assert(temp_ptr->type != Record::INVALID); 784 // If no node dependends on a comp node then there is no reason to 785 // track the comp node in the dependency graph. We filter out such 786 // nodes but count them and add a weight field to the subsequent node 787 // that we do include in the trace. 788 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) { 789 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli " 790 "is as follows:\n", temp_ptr->instNum); 791 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 792 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 793 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, " 794 "size %i, flags %i\n", temp_ptr->physAddr, 795 temp_ptr->size, temp_ptr->reqFlags); 796 } else { 797 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 798 } 799 if (firstWin && temp_ptr->compDelay == -1) { 800 if (temp_ptr->isLoad()) { 801 temp_ptr->compDelay = temp_ptr->executeTick; 802 } else if (temp_ptr->isStore()) { 803 temp_ptr->compDelay = temp_ptr->commitTick; 804 } else { 805 temp_ptr->compDelay = temp_ptr->toCommitTick; 806 } 807 } 808 assert(temp_ptr->compDelay != -1); 809 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n", 810 temp_ptr->compDelay); 811 812 // Create a protobuf message for the dependency record 813 ProtoMessage::InstDepRecord dep_pkt; 814 dep_pkt.set_seq_num(temp_ptr->instNum); 815 dep_pkt.set_type(temp_ptr->type); 816 dep_pkt.set_pc(temp_ptr->pc); 817 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 818 dep_pkt.set_flags(temp_ptr->reqFlags); 819 dep_pkt.set_p_addr(temp_ptr->physAddr); 820 // If tracing of virtual addresses is enabled, set the optional 821 // field for it 822 if (traceVirtAddr) { 823 dep_pkt.set_v_addr(temp_ptr->virtAddr); 824 dep_pkt.set_asid(temp_ptr->asid); 825 } 826 dep_pkt.set_size(temp_ptr->size); 827 } 828 dep_pkt.set_comp_delay(temp_ptr->compDelay); 829 if (temp_ptr->robDepList.empty()) { 830 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n"); 831 } 832 while (!temp_ptr->robDepList.empty()) { 833 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n", 834 temp_ptr->robDepList.front()); 835 dep_pkt.add_rob_dep(temp_ptr->robDepList.front()); 836 temp_ptr->robDepList.pop_front(); 837 } 838 if (temp_ptr->physRegDepList.empty()) { 839 DPRINTFR(ElasticTrace, "\thas no register dependencies\n"); 840 } 841 while (!temp_ptr->physRegDepList.empty()) { 842 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n", 843 temp_ptr->physRegDepList.front()); 844 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front()); 845 temp_ptr->physRegDepList.pop_front(); 846 } 847 if (num_filtered_nodes != 0) { 848 // Set the weight of this node as the no. of filtered nodes 849 // between this node and the last node that we wrote to output 850 // stream. The weight will be used during replay to model ROB 851 // occupancy of filtered nodes. 852 dep_pkt.set_weight(num_filtered_nodes); 853 num_filtered_nodes = 0; 854 } 855 // Write the message to the protobuf output stream 856 dataTraceStream->write(dep_pkt); 857 } else { 858 // Don't write the node to the trace but note that we have filtered 859 // out a node. 860 ++numFilteredNodes; 861 ++num_filtered_nodes; 862 } 863 dep_trace_itr++; 864 traceInfoMap.erase(temp_ptr->instNum); 865 delete temp_ptr; 866 num_to_write--; 867 } 868 depTrace.erase(dep_trace_itr_start, dep_trace_itr); 869} 870 871void 872ElasticTrace::regStats() { 873 ProbeListenerObject::regStats(); 874 875 using namespace Stats; 876 numRegDep 877 .name(name() + ".numRegDep") 878 .desc("Number of register dependencies recorded during tracing") 879 ; 880 881 numOrderDepStores 882 .name(name() + ".numOrderDepStores") 883 .desc("Number of commit order (rob) dependencies for a store recorded" 884 " on a past load/store during tracing") 885 ; 886 887 numIssueOrderDepLoads 888 .name(name() + ".numIssueOrderDepLoads") 889 .desc("Number of loads that got assigned issue order dependency" 890 " because they were dependency-free") 891 ; 892 893 numIssueOrderDepStores 894 .name(name() + ".numIssueOrderDepStores") 895 .desc("Number of stores that got assigned issue order dependency" 896 " because they were dependency-free") 897 ; 898 899 numIssueOrderDepOther 900 .name(name() + ".numIssueOrderDepOther") 901 .desc("Number of non load/store insts that got assigned issue order" 902 " dependency because they were dependency-free") 903 ; 904 905 numFilteredNodes 906 .name(name() + ".numFilteredNodes") 907 .desc("No. of nodes filtered out before writing the output trace") 908 ; 909 910 maxNumDependents 911 .name(name() + ".maxNumDependents") 912 .desc("Maximum number or dependents on any instruction") 913 ; 914 915 maxTempStoreSize 916 .name(name() + ".maxTempStoreSize") 917 .desc("Maximum size of the temporary store during the run") 918 ; 919 920 maxPhysRegDepMapSize 921 .name(name() + ".maxPhysRegDepMapSize") 922 .desc("Maximum size of register dependency map") 923 ; 924} 925 926const std::string& 927ElasticTrace::TraceInfo::typeToStr() const 928{ 929 return Record::RecordType_Name(type); 930} 931 932const std::string 933ElasticTrace::name() const 934{ 935 return ProbeListenerObject::name(); 936} 937 938void 939ElasticTrace::flushTraces() 940{ 941 // Write to trace all records in the depTrace. 942 writeDepTrace(depTrace.size()); 943 // Delete the stream objects 944 delete dataTraceStream; 945 delete instTraceStream; 946} 947 948ElasticTrace* 949ElasticTraceParams::create() 950{ 951 return new ElasticTrace(this); 952} 953