elastic_trace.cc revision 11252
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/o3/probe/elastic_trace.hh" 43 44#include "base/callback.hh" 45#include "base/output.hh" 46#include "base/trace.hh" 47#include "cpu/reg_class.hh" 48#include "debug/ElasticTrace.hh" 49#include "mem/packet.hh" 50 51ElasticTrace::ElasticTrace(const ElasticTraceParams* params) 52 : ProbeListenerObject(params), 53 regEtraceListenersEvent(this), 54 firstWin(true), 55 lastClearedSeqNum(0), 56 depWindowSize(params->depWindowSize), 57 dataTraceStream(nullptr), 58 instTraceStream(nullptr), 59 startTraceInst(params->startTraceInst), 60 allProbesReg(false) 61{ 62 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager); 63 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\ 64 "support dependency tracing.\n", name()); 65 66 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\ 67 "Recommended size is 3x ROB size in the O3CPU.\n"); 68 69 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\ 70 "single-threaded workload only", cpu->numThreads, name()); 71 // Initialize the protobuf output stream 72 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\ 73 "trace file path to instFetchTraceFile"); 74 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\ 75 "trace file path to dataDepTraceFile"); 76 std::string filename = simout.resolve(name() + "." + 77 params->instFetchTraceFile); 78 instTraceStream = new ProtoOutputStream(filename); 79 filename = simout.resolve(name() + "." + params->dataDepTraceFile); 80 dataTraceStream = new ProtoOutputStream(filename); 81 // Create a protobuf message for the header and write it to the stream 82 ProtoMessage::PacketHeader inst_pkt_header; 83 inst_pkt_header.set_obj_id(name()); 84 inst_pkt_header.set_tick_freq(SimClock::Frequency); 85 instTraceStream->write(inst_pkt_header); 86 // Create a protobuf message for the header and write it to 87 // the stream 88 ProtoMessage::InstDepRecordHeader data_rec_header; 89 data_rec_header.set_obj_id(name()); 90 data_rec_header.set_tick_freq(SimClock::Frequency); 91 data_rec_header.set_window_size(depWindowSize); 92 dataTraceStream->write(data_rec_header); 93 // Register a callback to flush trace records and close the output streams. 94 Callback* cb = new MakeCallback<ElasticTrace, 95 &ElasticTrace::flushTraces>(this); 96 registerExitCallback(cb); 97} 98 99void 100ElasticTrace::regProbeListeners() 101{ 102 inform("@%llu: regProbeListeners() called, startTraceInst = %llu", 103 curTick(), startTraceInst); 104 if (startTraceInst == 0) { 105 // If we want to start tracing from the start of the simulation, 106 // register all elastic trace probes now. 107 regEtraceListeners(); 108 } else { 109 // Schedule an event to register all elastic trace probes when 110 // specified no. of instructions are committed. 111 cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent, 112 startTraceInst); 113 } 114} 115 116void 117ElasticTrace::regEtraceListeners() 118{ 119 assert(!allProbesReg); 120 inform("@%llu: No. of instructions committed = %llu, registering elastic" 121 " probe listeners", curTick(), cpu->numSimulatedInsts()); 122 // Create new listeners: provide method to be called upon a notify() for 123 // each probe point. 124 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this, 125 "FetchRequest", &ElasticTrace::fetchReqTrace)); 126 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 127 "Execute", &ElasticTrace::recordExecTick)); 128 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 129 "ToCommit", &ElasticTrace::recordToCommTick)); 130 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 131 "Rename", &ElasticTrace::updateRegDep)); 132 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this, 133 "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); 134 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 135 "Squash", &ElasticTrace::addSquashedInst)); 136 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 137 "Commit", &ElasticTrace::addCommittedInst)); 138 allProbesReg = true; 139} 140 141void 142ElasticTrace::fetchReqTrace(const RequestPtr &req) 143{ 144 145 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n", 146 (MemCmd::ReadReq), 147 req->getPC(), req->getVaddr(), req->getPaddr(), 148 req->getFlags(), req->getSize(), curTick()); 149 150 // Create a protobuf message including the request fields necessary to 151 // recreate the request in the TraceCPU. 152 ProtoMessage::Packet inst_fetch_pkt; 153 inst_fetch_pkt.set_tick(curTick()); 154 inst_fetch_pkt.set_cmd(MemCmd::ReadReq); 155 inst_fetch_pkt.set_pc(req->getPC()); 156 inst_fetch_pkt.set_flags(req->getFlags()); 157 inst_fetch_pkt.set_addr(req->getPaddr()); 158 inst_fetch_pkt.set_size(req->getSize()); 159 // Write the message to the stream. 160 instTraceStream->write(inst_fetch_pkt); 161} 162 163void 164ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst) 165{ 166 167 // In a corner case, a retired instruction is propagated backward to the 168 // IEW instruction queue to handle some side-channel information. But we 169 // must not process an instruction again. So we test the sequence number 170 // against the lastClearedSeqNum and skip adding the instruction for such 171 // corner cases. 172 if (dyn_inst->seqNum <= lastClearedSeqNum) { 173 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \ 174 has already retired (mostly squashed)", dyn_inst->seqNum); 175 // Do nothing as program has proceeded and this inst has been 176 // propagated backwards to handle something. 177 return; 178 } 179 180 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum, 181 curTick()); 182 // Either the execution info object will already exist if this 183 // instruction had a register dependency recorded in the rename probe 184 // listener before entering execute stage or it will not exist and will 185 // need to be created here. 186 InstExecInfo* exec_info_ptr; 187 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 188 if (itr_exec_info != tempStore.end()) { 189 exec_info_ptr = itr_exec_info->second; 190 } else { 191 exec_info_ptr = new InstExecInfo; 192 tempStore[dyn_inst->seqNum] = exec_info_ptr; 193 } 194 195 exec_info_ptr->executeTick = curTick(); 196 maxTempStoreSize = std::max(tempStore.size(), 197 (std::size_t)maxTempStoreSize.value()); 198} 199 200void 201ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst) 202{ 203 // If tracing has just been enabled then the instruction at this stage of 204 // execution is far enough that we cannot gather info about its past like 205 // the tick it started execution. Simply return until we see an instruction 206 // that is found in the tempStore. 207 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 208 if (itr_exec_info == tempStore.end()) { 209 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store," 210 " skipping.\n", dyn_inst->seqNum); 211 return; 212 } 213 214 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum, 215 curTick()); 216 InstExecInfo* exec_info_ptr = itr_exec_info->second; 217 exec_info_ptr->toCommitTick = curTick(); 218 219} 220 221void 222ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst) 223{ 224 // Get the sequence number of the instruction 225 InstSeqNum seq_num = dyn_inst->seqNum; 226 227 assert(dyn_inst->seqNum > lastClearedSeqNum); 228 229 // Since this is the first probe activated in the pipeline, create 230 // a new execution info object to track this instruction as it 231 // progresses through the pipeline. 232 InstExecInfo* exec_info_ptr = new InstExecInfo; 233 tempStore[seq_num] = exec_info_ptr; 234 235 // Loop through the source registers and look up the dependency map. If 236 // the source register entry is found in the dependency map, add a 237 // dependency on the last writer. 238 int8_t max_regs = dyn_inst->numSrcRegs(); 239 for (int src_idx = 0; src_idx < max_regs; src_idx++) { 240 // Get the physical register index of the i'th source register. 241 PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx); 242 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num, 243 src_reg); 244 auto itr_last_writer = physRegDepMap.find(src_reg); 245 if (itr_last_writer != physRegDepMap.end()) { 246 InstSeqNum last_writer = itr_last_writer->second; 247 // Additionally the dependency distance is kept less than the window 248 // size parameter to limit the memory allocation to nodes in the 249 // graph. If the window were tending to infinite we would have to 250 // load a large number of node objects during replay. 251 if (seq_num - last_writer < depWindowSize) { 252 // Record a physical register dependency. 253 exec_info_ptr->physRegDepSet.insert(last_writer); 254 } 255 } 256 } 257 258 // Loop through the destination registers of this instruction and update 259 // the physical register dependency map for last writers to registers. 260 max_regs = dyn_inst->numDestRegs(); 261 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) { 262 // For data dependency tracking the register must be an int, float or 263 // CC register and not a Misc register. 264 TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx); 265 if (regIdxToClass(dest_reg) != MiscRegClass) { 266 // Get the physical register index of the i'th destination register. 267 dest_reg = dyn_inst->renamedDestRegIdx(dest_idx); 268 if (dest_reg != TheISA::ZeroReg) { 269 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n", 270 seq_num, dest_reg); 271 physRegDepMap[dest_reg] = seq_num; 272 } 273 } 274 } 275 maxPhysRegDepMapSize = std::max(physRegDepMap.size(), 276 (std::size_t)maxPhysRegDepMapSize.value()); 277} 278 279void 280ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) 281{ 282 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n", 283 inst_reg_pair.second); 284 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second); 285 if (itr_regdep_map != physRegDepMap.end()) 286 physRegDepMap.erase(itr_regdep_map); 287} 288 289void 290ElasticTrace::addSquashedInst(const DynInstPtr &head_inst) 291{ 292 // If the squashed instruction was squashed before being processed by 293 // execute stage then it will not be in the temporary store. In this case 294 // do nothing and return. 295 auto itr_exec_info = tempStore.find(head_inst->seqNum); 296 if (itr_exec_info == tempStore.end()) 297 return; 298 299 // If there is a squashed load for which a read request was 300 // sent before it got squashed then add it to the trace. 301 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n", 302 head_inst->seqNum); 303 // Get pointer to the execution info object corresponding to the inst. 304 InstExecInfo* exec_info_ptr = itr_exec_info->second; 305 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick && 306 exec_info_ptr->toCommitTick != MaxTick && 307 head_inst->hasRequest() && 308 head_inst->getFault() == NoFault) { 309 // Add record to depTrace with commit parameter as false. 310 addDepTraceRecord(head_inst, exec_info_ptr, false); 311 } 312 // As the information contained is no longer needed, remove the execution 313 // info object from the temporary store. 314 clearTempStoreUntil(head_inst); 315} 316 317void 318ElasticTrace::addCommittedInst(const DynInstPtr &head_inst) 319{ 320 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", 321 head_inst->seqNum); 322 323 // Add the instruction to the depTrace. 324 if (!head_inst->isNop()) { 325 326 // If tracing has just been enabled then the instruction at this stage 327 // of execution is far enough that we cannot gather info about its past 328 // like the tick it started execution. Simply return until we see an 329 // instruction that is found in the tempStore. 330 auto itr_temp_store = tempStore.find(head_inst->seqNum); 331 if (itr_temp_store == tempStore.end()) { 332 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp " 333 "store, skipping.\n", head_inst->seqNum); 334 return; 335 } 336 337 // Get pointer to the execution info object corresponding to the inst. 338 InstExecInfo* exec_info_ptr = itr_temp_store->second; 339 assert(exec_info_ptr->executeTick != MaxTick); 340 assert(exec_info_ptr->toCommitTick != MaxTick); 341 342 // Check if the instruction had a fault, if it predicated false and 343 // thus previous register values were restored or if it was a 344 // load/store that did not have a request (e.g. when the size of the 345 // request is zero). In all these cases the instruction is set as 346 // executed and is picked up by the commit probe listener. But a 347 // request is not issued and registers are not written. So practically, 348 // skipping these should not hurt as execution would not stall on them. 349 // Alternatively, these could be included merely as a compute node in 350 // the graph. Removing these for now. If correlation accuracy needs to 351 // be improved in future these can be turned into comp nodes at the 352 // cost of bigger traces. 353 if (head_inst->getFault() != NoFault) { 354 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so " 355 "skip adding it to the trace\n", 356 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 357 head_inst->seqNum); 358 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) { 359 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so " 360 "skip adding it to the trace\n", head_inst->seqNum); 361 } else if (!head_inst->readPredicate()) { 362 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so " 363 "skip adding it to the trace\n", 364 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 365 head_inst->seqNum); 366 } else { 367 // Add record to depTrace with commit parameter as true. 368 addDepTraceRecord(head_inst, exec_info_ptr, true); 369 } 370 } 371 // As the information contained is no longer needed, remove the execution 372 // info object from the temporary store. 373 clearTempStoreUntil(head_inst); 374} 375 376void 377ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst, 378 InstExecInfo* exec_info_ptr, bool commit) 379{ 380 // Create a record to assign dynamic intruction related fields. 381 TraceInfo* new_record = new TraceInfo; 382 // Add to map for sequence number look up to retrieve the TraceInfo pointer 383 traceInfoMap[head_inst->seqNum] = new_record; 384 385 // Assign fields from the instruction 386 new_record->instNum = head_inst->seqNum; 387 new_record->commit = commit; 388 new_record->type = head_inst->isLoad() ? Record::LOAD : 389 (head_inst->isStore() ? Record::STORE : 390 Record::COMP); 391 392 // Assign fields for creating a request in case of a load/store 393 new_record->reqFlags = head_inst->memReqFlags; 394 new_record->addr = head_inst->physEffAddrLow; 395 // Currently the tracing does not support split requests. 396 new_record->size = head_inst->effSize; 397 new_record->pc = head_inst->instAddr(); 398 399 // Assign the timing information stored in the execution info object 400 new_record->executeTick = exec_info_ptr->executeTick; 401 new_record->toCommitTick = exec_info_ptr->toCommitTick; 402 new_record->commitTick = curTick(); 403 404 // Assign initial values for number of dependents and computational delay 405 new_record->numDepts = 0; 406 new_record->compDelay = -1; 407 408 // The physical register dependency set of the first instruction is 409 // empty. Since there are no records in the depTrace at this point, the 410 // case of adding an ROB dependency by using a reverse iterator is not 411 // applicable. Thus, populate the fields of the record corresponding to the 412 // first instruction and return. 413 if (depTrace.empty()) { 414 // Store the record in depTrace. 415 depTrace.push_back(new_record); 416 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n", 417 new_record->instNum); 418 return; 419 } 420 421 // Clear register dependencies for squashed loads as they may be dependent 422 // on squashed instructions and we do not add those to the trace. 423 if (head_inst->isLoad() && !commit) { 424 (exec_info_ptr->physRegDepSet).clear(); 425 } 426 427 // Assign the register dependencies stored in the execution info object 428 std::set<InstSeqNum>::const_iterator dep_set_it; 429 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin(); 430 dep_set_it != (exec_info_ptr->physRegDepSet).end(); 431 ++dep_set_it) { 432 auto trace_info_itr = traceInfoMap.find(*dep_set_it); 433 if (trace_info_itr != traceInfoMap.end()) { 434 // The register dependency is valid. Assign it and calculate 435 // computational delay 436 new_record->physRegDepList.push_back(*dep_set_it); 437 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 438 "%lli\n", new_record->instNum, *dep_set_it); 439 TraceInfo* reg_dep = trace_info_itr->second; 440 reg_dep->numDepts++; 441 compDelayPhysRegDep(reg_dep, new_record); 442 ++numRegDep; 443 } else { 444 // The instruction that this has a register dependency on was 445 // not added to the trace because of one of the following 446 // 1. it was an instruction that had a fault 447 // 2. it was an instruction that was predicated false and 448 // previous register values were restored 449 // 3. it was load/store that did not have a request (e.g. when 450 // the size of the request is zero but this may not be a fault) 451 // In all these cases the instruction is set as executed and is 452 // picked up by the commit probe listener. But a request is not 453 // issued and registers are not written to in these cases. 454 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 455 "%lli is skipped\n",new_record->instNum, *dep_set_it); 456 } 457 } 458 459 // Check for and assign an ROB dependency in addition to register 460 // dependency before adding the record to the trace. 461 // As stores have to commit in order a store is dependent on the last 462 // committed load/store. This is recorded in the ROB dependency. 463 if (head_inst->isStore()) { 464 // Look up store-after-store order dependency 465 updateCommitOrderDep(new_record, false); 466 // Look up store-after-load order dependency 467 updateCommitOrderDep(new_record, true); 468 } 469 470 // In case a node is dependency-free or its dependency got discarded 471 // because it was outside the window, it is marked ready in the ROB at the 472 // time of issue. A request is sent as soon as possible. To model this, a 473 // node is assigned an issue order dependency on a committed instruction 474 // that completed earlier than it. This is done to avoid the problem of 475 // determining the issue times of such dependency-free nodes during replay 476 // which could lead to too much parallelism, thinking conservatively. 477 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) { 478 updateIssueOrderDep(new_record); 479 } 480 481 // Store the record in depTrace. 482 depTrace.push_back(new_record); 483 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n", 484 (commit ? "committed" : "squashed"), new_record->instNum); 485 486 // To process the number of records specified by depWindowSize in the 487 // forward direction, the depTrace must have twice as many records 488 // to check for dependencies. 489 if (depTrace.size() == 2 * depWindowSize) { 490 491 DPRINTF(ElasticTrace, "Writing out trace...\n"); 492 493 // Write out the records which have been processed to the trace 494 // and remove them from the depTrace. 495 writeDepTrace(depWindowSize); 496 497 // After the first window, writeDepTrace() must check for valid 498 // compDelay. 499 firstWin = false; 500 } 501} 502 503void 504ElasticTrace::updateCommitOrderDep(TraceInfo* new_record, 505 bool find_load_not_store) 506{ 507 assert(new_record->isStore()); 508 // Iterate in reverse direction to search for the last committed 509 // load/store that completed earlier than the new record 510 depTraceRevItr from_itr(depTrace.end()); 511 depTraceRevItr until_itr(depTrace.begin()); 512 TraceInfo* past_record = *from_itr; 513 uint32_t num_go_back = 0; 514 515 // The execution time of this store is when it is sent, that is committed 516 Tick execute_tick = curTick(); 517 // Search for store-after-load or store-after-store order dependency 518 while (num_go_back < depWindowSize && from_itr != until_itr) { 519 if (find_load_not_store) { 520 // Check if previous inst is a load completed earlier by comparing 521 // with execute tick 522 if (hasLoadCompleted(past_record, execute_tick)) { 523 // Assign rob dependency and calculate the computational delay 524 assignRobDep(past_record, new_record); 525 ++numOrderDepStores; 526 return; 527 } 528 } else { 529 // Check if previous inst is a store sent earlier by comparing with 530 // execute tick 531 if (hasStoreCommitted(past_record, execute_tick)) { 532 // Assign rob dependency and calculate the computational delay 533 assignRobDep(past_record, new_record); 534 ++numOrderDepStores; 535 return; 536 } 537 } 538 ++from_itr; 539 past_record = *from_itr; 540 ++num_go_back; 541 } 542} 543 544void 545ElasticTrace::updateIssueOrderDep(TraceInfo* new_record) 546{ 547 // Interate in reverse direction to search for the last committed 548 // record that completed earlier than the new record 549 depTraceRevItr from_itr(depTrace.end()); 550 depTraceRevItr until_itr(depTrace.begin()); 551 TraceInfo* past_record = *from_itr; 552 553 uint32_t num_go_back = 0; 554 Tick execute_tick = 0; 555 556 if (new_record->isLoad()) { 557 // The execution time of a load is when a request is sent 558 execute_tick = new_record->executeTick; 559 ++numIssueOrderDepLoads; 560 } else if (new_record->isStore()) { 561 // The execution time of a store is when it is sent, i.e. committed 562 execute_tick = curTick(); 563 ++numIssueOrderDepStores; 564 } else { 565 // The execution time of a non load/store is when it completes 566 execute_tick = new_record->toCommitTick; 567 ++numIssueOrderDepOther; 568 } 569 570 // We search if this record has an issue order dependency on a past record. 571 // Once we find it, we update both the new record and the record it depends 572 // on and return. 573 while (num_go_back < depWindowSize && from_itr != until_itr) { 574 // Check if a previous inst is a load sent earlier, or a store sent 575 // earlier, or a comp inst completed earlier by comparing with execute 576 // tick 577 if (hasLoadBeenSent(past_record, execute_tick) || 578 hasStoreCommitted(past_record, execute_tick) || 579 hasCompCompleted(past_record, execute_tick)) { 580 // Assign rob dependency and calculate the computational delay 581 assignRobDep(past_record, new_record); 582 return; 583 } 584 ++from_itr; 585 past_record = *from_itr; 586 ++num_go_back; 587 } 588} 589 590void 591ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) { 592 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n", 593 new_record->typeToStr(), new_record->instNum, 594 past_record->instNum); 595 // Add dependency on past record 596 new_record->robDepList.push_back(past_record->instNum); 597 // Update new_record's compute delay with respect to the past record 598 compDelayRob(past_record, new_record); 599 // Increment number of dependents of the past record 600 ++(past_record->numDepts); 601 // Update stat to log max number of dependents 602 maxNumDependents = std::max(past_record->numDepts, 603 (uint32_t)maxNumDependents.value()); 604} 605 606bool 607ElasticTrace::hasStoreCommitted(TraceInfo* past_record, 608 Tick execute_tick) const 609{ 610 return (past_record->isStore() && past_record->commitTick <= execute_tick); 611} 612 613bool 614ElasticTrace::hasLoadCompleted(TraceInfo* past_record, 615 Tick execute_tick) const 616{ 617 return(past_record->isLoad() && past_record->commit && 618 past_record->toCommitTick <= execute_tick); 619} 620 621bool 622ElasticTrace::hasLoadBeenSent(TraceInfo* past_record, 623 Tick execute_tick) const 624{ 625 // Check if previous inst is a load sent earlier than this 626 return (past_record->isLoad() && past_record->commit && 627 past_record->executeTick <= execute_tick); 628} 629 630bool 631ElasticTrace::hasCompCompleted(TraceInfo* past_record, 632 Tick execute_tick) const 633{ 634 return(past_record->isComp() && past_record->toCommitTick <= execute_tick); 635} 636 637void 638ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst) 639{ 640 // Clear from temp store starting with the execution info object 641 // corresponding the head_inst and continue clearing by decrementing the 642 // sequence number until the last cleared sequence number. 643 InstSeqNum temp_sn = (head_inst->seqNum); 644 while (temp_sn > lastClearedSeqNum) { 645 auto itr_exec_info = tempStore.find(temp_sn); 646 if (itr_exec_info != tempStore.end()) { 647 InstExecInfo* exec_info_ptr = itr_exec_info->second; 648 // Free allocated memory for the info object 649 delete exec_info_ptr; 650 // Remove entry from temporary store 651 tempStore.erase(itr_exec_info); 652 } 653 temp_sn--; 654 } 655 // Update the last cleared sequence number to that of the head_inst 656 lastClearedSeqNum = head_inst->seqNum; 657} 658 659void 660ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record) 661{ 662 // The computation delay is the delay between the completion tick of the 663 // inst. pointed to by past_record and the execution tick of its dependent 664 // inst. pointed to by new_record. 665 int64_t comp_delay = -1; 666 Tick execution_tick = 0, completion_tick = 0; 667 668 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n", 669 new_record->instNum, past_record->instNum); 670 671 // Get the tick when the node is executed as per the modelling of 672 // computation delay 673 execution_tick = new_record->getExecuteTick(); 674 675 if (past_record->isLoad()) { 676 if (new_record->isStore()) { 677 completion_tick = past_record->toCommitTick; 678 } else { 679 completion_tick = past_record->executeTick; 680 } 681 } else if (past_record->isStore()) { 682 completion_tick = past_record->commitTick; 683 } else if (past_record->isComp()){ 684 completion_tick = past_record->toCommitTick; 685 } 686 assert(execution_tick >= completion_tick); 687 comp_delay = execution_tick - completion_tick; 688 689 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 690 execution_tick, completion_tick, comp_delay); 691 692 // Assign the computational delay with respect to the dependency which 693 // completes the latest. 694 if (new_record->compDelay == -1) 695 new_record->compDelay = comp_delay; 696 else 697 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 698 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 699 new_record->compDelay); 700} 701 702void 703ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record, 704 TraceInfo* new_record) 705{ 706 // The computation delay is the delay between the completion tick of the 707 // inst. pointed to by past_record and the execution tick of its dependent 708 // inst. pointed to by new_record. 709 int64_t comp_delay = -1; 710 Tick execution_tick = 0, completion_tick = 0; 711 712 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num" 713 " %lli.\n", new_record->instNum, past_record->instNum); 714 715 // Get the tick when the node is executed as per the modelling of 716 // computation delay 717 execution_tick = new_record->getExecuteTick(); 718 719 // When there is a physical register dependency on an instruction, the 720 // completion tick of that instruction is when it wrote to the register, 721 // that is toCommitTick. In case, of a store updating a destination 722 // register, this is approximated to commitTick instead 723 if (past_record->isStore()) { 724 completion_tick = past_record->commitTick; 725 } else { 726 completion_tick = past_record->toCommitTick; 727 } 728 assert(execution_tick >= completion_tick); 729 comp_delay = execution_tick - completion_tick; 730 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 731 execution_tick, completion_tick, comp_delay); 732 733 // Assign the computational delay with respect to the dependency which 734 // completes the latest. 735 if (new_record->compDelay == -1) 736 new_record->compDelay = comp_delay; 737 else 738 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 739 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 740 new_record->compDelay); 741} 742 743Tick 744ElasticTrace::TraceInfo::getExecuteTick() const 745{ 746 if (isLoad()) { 747 // Execution tick for a load instruction is when the request was sent, 748 // that is executeTick. 749 return executeTick; 750 } else if (isStore()) { 751 // Execution tick for a store instruction is when the request was sent, 752 // that is commitTick. 753 return commitTick; 754 } else { 755 // Execution tick for a non load/store instruction is when the register 756 // value was written to, that is commitTick. 757 return toCommitTick; 758 } 759} 760 761void 762ElasticTrace::writeDepTrace(uint32_t num_to_write) 763{ 764 // Write the trace with fields as follows: 765 // Instruction sequence number 766 // If instruction was a load 767 // If instruction was a store 768 // If instruction has addr 769 // If instruction has size 770 // If instruction has flags 771 // List of order dependencies - optional, repeated 772 // Computational delay with respect to last completed dependency 773 // List of physical register RAW dependencies - optional, repeated 774 // Weight of a node equal to no. of filtered nodes before it - optional 775 uint16_t num_filtered_nodes = 0; 776 depTraceItr dep_trace_itr(depTrace.begin()); 777 depTraceItr dep_trace_itr_start = dep_trace_itr; 778 while (num_to_write > 0) { 779 TraceInfo* temp_ptr = *dep_trace_itr; 780 assert(temp_ptr->type != Record::INVALID); 781 // If no node dependends on a comp node then there is no reason to 782 // track the comp node in the dependency graph. We filter out such 783 // nodes but count them and add a weight field to the subsequent node 784 // that we do include in the trace. 785 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) { 786 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli " 787 "is as follows:\n", temp_ptr->instNum); 788 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 789 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 790 DPRINTFR(ElasticTrace, "\thas a request with addr %i, size %i," 791 " flags %i\n", temp_ptr->addr, temp_ptr->size, 792 temp_ptr->reqFlags); 793 } else { 794 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 795 } 796 if (firstWin && temp_ptr->compDelay == -1) { 797 if (temp_ptr->isLoad()) { 798 temp_ptr->compDelay = temp_ptr->executeTick; 799 } else if (temp_ptr->isStore()) { 800 temp_ptr->compDelay = temp_ptr->commitTick; 801 } else { 802 temp_ptr->compDelay = temp_ptr->toCommitTick; 803 } 804 } 805 assert(temp_ptr->compDelay != -1); 806 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n", 807 temp_ptr->compDelay); 808 809 // Create a protobuf message for the dependency record 810 ProtoMessage::InstDepRecord dep_pkt; 811 dep_pkt.set_seq_num(temp_ptr->instNum); 812 dep_pkt.set_type(temp_ptr->type); 813 dep_pkt.set_pc(temp_ptr->pc); 814 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 815 dep_pkt.set_flags(temp_ptr->reqFlags); 816 dep_pkt.set_addr(temp_ptr->addr); 817 dep_pkt.set_size(temp_ptr->size); 818 } 819 dep_pkt.set_comp_delay(temp_ptr->compDelay); 820 if (temp_ptr->robDepList.empty()) { 821 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n"); 822 } 823 while (!temp_ptr->robDepList.empty()) { 824 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n", 825 temp_ptr->robDepList.front()); 826 dep_pkt.add_rob_dep(temp_ptr->robDepList.front()); 827 temp_ptr->robDepList.pop_front(); 828 } 829 if (temp_ptr->physRegDepList.empty()) { 830 DPRINTFR(ElasticTrace, "\thas no register dependencies\n"); 831 } 832 while (!temp_ptr->physRegDepList.empty()) { 833 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n", 834 temp_ptr->physRegDepList.front()); 835 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front()); 836 temp_ptr->physRegDepList.pop_front(); 837 } 838 if (num_filtered_nodes != 0) { 839 // Set the weight of this node as the no. of filtered nodes 840 // between this node and the last node that we wrote to output 841 // stream. The weight will be used during replay to model ROB 842 // occupancy of filtered nodes. 843 dep_pkt.set_weight(num_filtered_nodes); 844 num_filtered_nodes = 0; 845 } 846 // Write the message to the protobuf output stream 847 dataTraceStream->write(dep_pkt); 848 } else { 849 // Don't write the node to the trace but note that we have filtered 850 // out a node. 851 ++numFilteredNodes; 852 ++num_filtered_nodes; 853 } 854 dep_trace_itr++; 855 traceInfoMap.erase(temp_ptr->instNum); 856 delete temp_ptr; 857 num_to_write--; 858 } 859 depTrace.erase(dep_trace_itr_start, dep_trace_itr); 860} 861 862void 863ElasticTrace::regStats() { 864 using namespace Stats; 865 numRegDep 866 .name(name() + ".numRegDep") 867 .desc("Number of register dependencies recorded during tracing") 868 ; 869 870 numOrderDepStores 871 .name(name() + ".numOrderDepStores") 872 .desc("Number of commit order (rob) dependencies for a store recorded" 873 " on a past load/store during tracing") 874 ; 875 876 numIssueOrderDepLoads 877 .name(name() + ".numIssueOrderDepLoads") 878 .desc("Number of loads that got assigned issue order dependency" 879 " because they were dependency-free") 880 ; 881 882 numIssueOrderDepStores 883 .name(name() + ".numIssueOrderDepStores") 884 .desc("Number of stores that got assigned issue order dependency" 885 " because they were dependency-free") 886 ; 887 888 numIssueOrderDepOther 889 .name(name() + ".numIssueOrderDepOther") 890 .desc("Number of non load/store insts that got assigned issue order" 891 " dependency because they were dependency-free") 892 ; 893 894 numFilteredNodes 895 .name(name() + ".numFilteredNodes") 896 .desc("No. of nodes filtered out before writing the output trace") 897 ; 898 899 maxNumDependents 900 .name(name() + ".maxNumDependents") 901 .desc("Maximum number or dependents on any instruction") 902 ; 903 904 maxTempStoreSize 905 .name(name() + ".maxTempStoreSize") 906 .desc("Maximum size of the temporary store during the run") 907 ; 908 909 maxPhysRegDepMapSize 910 .name(name() + ".maxPhysRegDepMapSize") 911 .desc("Maximum size of register dependency map") 912 ; 913} 914 915const std::string& 916ElasticTrace::TraceInfo::typeToStr() const 917{ 918 return Record::RecordType_Name(type); 919} 920 921const std::string 922ElasticTrace::name() const 923{ 924 return ProbeListenerObject::name(); 925} 926 927void 928ElasticTrace::flushTraces() 929{ 930 // Write to trace all records in the depTrace. 931 writeDepTrace(depTrace.size()); 932 // Delete the stream objects 933 delete dataTraceStream; 934 delete instTraceStream; 935} 936 937ElasticTrace* 938ElasticTraceParams::create() 939{ 940 return new ElasticTrace(this); 941} 942