elastic_trace.cc revision 11247
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/o3/probe/elastic_trace.hh" 43 44#include "base/callback.hh" 45#include "base/output.hh" 46#include "base/trace.hh" 47#include "cpu/reg_class.hh" 48#include "debug/ElasticTrace.hh" 49#include "mem/packet.hh" 50 51ElasticTrace::ElasticTrace(const ElasticTraceParams* params) 52 : ProbeListenerObject(params), 53 regEtraceListenersEvent(this), 54 firstWin(true), 55 lastClearedSeqNum(0), 56 depWindowSize(params->depWindowSize), 57 dataTraceStream(nullptr), 58 instTraceStream(nullptr), 59 startTraceInst(params->startTraceInst), 60 allProbesReg(false) 61{ 62 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager); 63 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\ 64 "support dependency tracing.\n", name()); 65 66 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\ 67 "Recommended size is 3x ROB size in the O3CPU.\n"); 68 69 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\ 70 "single-threaded workload only", cpu->numThreads, name()); 71 // Initialize the protobuf output stream 72 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\ 73 "trace file path to instFetchTraceFile"); 74 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\ 75 "trace file path to dataDepTraceFile"); 76 std::string filename = simout.resolve(name() + "." + 77 params->instFetchTraceFile); 78 instTraceStream = new ProtoOutputStream(filename); 79 filename = simout.resolve(name() + "." + params->dataDepTraceFile); 80 dataTraceStream = new ProtoOutputStream(filename); 81 // Create a protobuf message for the header and write it to the stream 82 ProtoMessage::PacketHeader inst_pkt_header; 83 inst_pkt_header.set_obj_id(name()); 84 inst_pkt_header.set_tick_freq(SimClock::Frequency); 85 instTraceStream->write(inst_pkt_header); 86 // Create a protobuf message for the header and write it to 87 // the stream 88 ProtoMessage::InstDepRecordHeader data_rec_header; 89 data_rec_header.set_obj_id(name()); 90 data_rec_header.set_tick_freq(SimClock::Frequency); 91 data_rec_header.set_window_size(depWindowSize); 92 dataTraceStream->write(data_rec_header); 93 // Register a callback to flush trace records and close the output streams. 94 Callback* cb = new MakeCallback<ElasticTrace, 95 &ElasticTrace::flushTraces>(this); 96 registerExitCallback(cb); 97} 98 99void 100ElasticTrace::regProbeListeners() 101{ 102 inform("@%llu: regProbeListeners() called, startTraceInst = %llu", 103 curTick(), startTraceInst); 104 if (startTraceInst == 0) { 105 // If we want to start tracing from the start of the simulation, 106 // register all elastic trace probes now. 107 regEtraceListeners(); 108 } else { 109 // Schedule an event to register all elastic trace probes when 110 // specified no. of instructions are committed. 111 cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent, 112 startTraceInst); 113 } 114} 115 116void 117ElasticTrace::regEtraceListeners() 118{ 119 assert(!allProbesReg); 120 inform("@%llu: No. of instructions committed = %llu, registering elastic" 121 " probe listeners", curTick(), cpu->numSimulatedInsts()); 122 // Create new listeners: provide method to be called upon a notify() for 123 // each probe point. 124 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this, 125 "FetchRequest", &ElasticTrace::fetchReqTrace)); 126 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 127 "Execute", &ElasticTrace::recordExecTick)); 128 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 129 "ToCommit", &ElasticTrace::recordToCommTick)); 130 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 131 "Rename", &ElasticTrace::updateRegDep)); 132 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this, 133 "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); 134 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 135 "Squash", &ElasticTrace::addSquashedInst)); 136 listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, 137 "Commit", &ElasticTrace::addCommittedInst)); 138 allProbesReg = true; 139} 140 141void 142ElasticTrace::fetchReqTrace(const RequestPtr &req) 143{ 144 145 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n", 146 (MemCmd::ReadReq), 147 req->getPC(), req->getVaddr(), req->getPaddr(), 148 req->getFlags(), req->getSize(), curTick()); 149 150 // Create a protobuf message including the request fields necessary to 151 // recreate the request in the TraceCPU. 152 ProtoMessage::Packet inst_fetch_pkt; 153 inst_fetch_pkt.set_tick(curTick()); 154 inst_fetch_pkt.set_cmd(MemCmd::ReadReq); 155 inst_fetch_pkt.set_pc(req->getPC()); 156 inst_fetch_pkt.set_flags(req->getFlags()); 157 inst_fetch_pkt.set_addr(req->getPaddr()); 158 inst_fetch_pkt.set_size(req->getSize()); 159 // Write the message to the stream. 160 instTraceStream->write(inst_fetch_pkt); 161} 162 163void 164ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst) 165{ 166 167 // In a corner case, a retired instruction is propagated backward to the 168 // IEW instruction queue to handle some side-channel information. But we 169 // must not process an instruction again. So we test the sequence number 170 // against the lastClearedSeqNum and skip adding the instruction for such 171 // corner cases. 172 if (dyn_inst->seqNum <= lastClearedSeqNum) { 173 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \ 174 has already retired (mostly squashed)", dyn_inst->seqNum); 175 // Do nothing as program has proceeded and this inst has been 176 // propagated backwards to handle something. 177 return; 178 } 179 180 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum, 181 curTick()); 182 // Either the execution info object will already exist if this 183 // instruction had a register dependency recorded in the rename probe 184 // listener before entering execute stage or it will not exist and will 185 // need to be created here. 186 InstExecInfo* exec_info_ptr; 187 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 188 if (itr_exec_info != tempStore.end()) { 189 exec_info_ptr = itr_exec_info->second; 190 } else { 191 exec_info_ptr = new InstExecInfo; 192 tempStore[dyn_inst->seqNum] = exec_info_ptr; 193 } 194 195 exec_info_ptr->executeTick = curTick(); 196 maxTempStoreSize = std::max(tempStore.size(), 197 (std::size_t)maxTempStoreSize.value()); 198} 199 200void 201ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst) 202{ 203 // If tracing has just been enabled then the instruction at this stage of 204 // execution is far enough that we cannot gather info about its past like 205 // the tick it started execution. Simply return until we see an instruction 206 // that is found in the tempStore. 207 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 208 if (itr_exec_info == tempStore.end()) { 209 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store," 210 " skipping.\n", dyn_inst->seqNum); 211 return; 212 } 213 214 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum, 215 curTick()); 216 InstExecInfo* exec_info_ptr = itr_exec_info->second; 217 exec_info_ptr->toCommitTick = curTick(); 218 219} 220 221void 222ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst) 223{ 224 // Get the sequence number of the instruction 225 InstSeqNum seq_num = dyn_inst->seqNum; 226 227 assert(dyn_inst->seqNum > lastClearedSeqNum); 228 229 // Since this is the first probe activated in the pipeline, create 230 // a new execution info object to track this instruction as it 231 // progresses through the pipeline. 232 InstExecInfo* exec_info_ptr = new InstExecInfo; 233 tempStore[seq_num] = exec_info_ptr; 234 235 // Loop through the source registers and look up the dependency map. If 236 // the source register entry is found in the dependency map, add a 237 // dependency on the last writer. 238 int8_t max_regs = dyn_inst->numSrcRegs(); 239 for (int src_idx = 0; src_idx < max_regs; src_idx++) { 240 // Get the physical register index of the i'th source register. 241 PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx); 242 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num, 243 src_reg); 244 auto itr_last_writer = physRegDepMap.find(src_reg); 245 if (itr_last_writer != physRegDepMap.end()) { 246 InstSeqNum last_writer = itr_last_writer->second; 247 // Additionally the dependency distance is kept less than the window 248 // size parameter to limit the memory allocation to nodes in the 249 // graph. If the window were tending to infinite we would have to 250 // load a large number of node objects during replay. 251 if (seq_num - last_writer < depWindowSize) { 252 // Record a physical register dependency. 253 exec_info_ptr->physRegDepSet.insert(last_writer); 254 } 255 } 256 } 257 258 // Loop through the destination registers of this instruction and update 259 // the physical register dependency map for last writers to registers. 260 max_regs = dyn_inst->numDestRegs(); 261 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) { 262 // For data dependency tracking the register must be an int, float or 263 // CC register and not a Misc register. 264 TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx); 265 if (regIdxToClass(dest_reg) != MiscRegClass) { 266 // Get the physical register index of the i'th destination register. 267 dest_reg = dyn_inst->renamedDestRegIdx(dest_idx); 268 if (dest_reg != TheISA::ZeroReg) { 269 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n", 270 seq_num, dest_reg); 271 physRegDepMap[dest_reg] = seq_num; 272 } 273 } 274 } 275 maxPhysRegDepMapSize = std::max(physRegDepMap.size(), 276 (std::size_t)maxPhysRegDepMapSize.value()); 277} 278 279void 280ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) 281{ 282 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n", 283 inst_reg_pair.second); 284 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second); 285 if (itr_regdep_map != physRegDepMap.end()) 286 physRegDepMap.erase(itr_regdep_map); 287} 288 289void 290ElasticTrace::addSquashedInst(const DynInstPtr &head_inst) 291{ 292 // If the squashed instruction was squashed before being processed by 293 // execute stage then it will not be in the temporary store. In this case 294 // do nothing and return. 295 auto itr_exec_info = tempStore.find(head_inst->seqNum); 296 if (itr_exec_info == tempStore.end()) 297 return; 298 299 // If there is a squashed load for which a read request was 300 // sent before it got squashed then add it to the trace. 301 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n", 302 head_inst->seqNum); 303 // Get pointer to the execution info object corresponding to the inst. 304 InstExecInfo* exec_info_ptr = itr_exec_info->second; 305 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick && 306 exec_info_ptr->toCommitTick != MaxTick && 307 head_inst->hasRequest() && 308 head_inst->getFault() == NoFault) { 309 // Add record to depTrace with commit parameter as false. 310 addDepTraceRecord(head_inst, exec_info_ptr, false); 311 } 312 // As the information contained is no longer needed, remove the execution 313 // info object from the temporary store. 314 clearTempStoreUntil(head_inst); 315} 316 317void 318ElasticTrace::addCommittedInst(const DynInstPtr &head_inst) 319{ 320 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", 321 head_inst->seqNum); 322 323 // Add the instruction to the depTrace. 324 if (!head_inst->isNop()) { 325 326 // If tracing has just been enabled then the instruction at this stage 327 // of execution is far enough that we cannot gather info about its past 328 // like the tick it started execution. Simply return until we see an 329 // instruction that is found in the tempStore. 330 auto itr_temp_store = tempStore.find(head_inst->seqNum); 331 if (itr_temp_store == tempStore.end()) { 332 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp " 333 "store, skipping.\n", head_inst->seqNum); 334 return; 335 } 336 337 // Get pointer to the execution info object corresponding to the inst. 338 InstExecInfo* exec_info_ptr = itr_temp_store->second; 339 assert(exec_info_ptr->executeTick != MaxTick); 340 assert(exec_info_ptr->toCommitTick != MaxTick); 341 342 // Check if the instruction had a fault, if it predicated false and 343 // thus previous register values were restored or if it was a 344 // load/store that did not have a request (e.g. when the size of the 345 // request is zero). In all these cases the instruction is set as 346 // executed and is picked up by the commit probe listener. But a 347 // request is not issued and registers are not written. So practically, 348 // skipping these should not hurt as execution would not stall on them. 349 // Alternatively, these could be included merely as a compute node in 350 // the graph. Removing these for now. If correlation accuracy needs to 351 // be improved in future these can be turned into comp nodes at the 352 // cost of bigger traces. 353 if (head_inst->getFault() != NoFault) { 354 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so " 355 "skip adding it to the trace\n", 356 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 357 head_inst->seqNum); 358 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) { 359 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so " 360 "skip adding it to the trace\n", head_inst->seqNum); 361 } else if (!head_inst->readPredicate()) { 362 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so " 363 "skip adding it to the trace\n", 364 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 365 head_inst->seqNum); 366 } else { 367 // Add record to depTrace with commit parameter as true. 368 addDepTraceRecord(head_inst, exec_info_ptr, true); 369 } 370 } 371 // As the information contained is no longer needed, remove the execution 372 // info object from the temporary store. 373 clearTempStoreUntil(head_inst); 374} 375 376void 377ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst, 378 InstExecInfo* exec_info_ptr, bool commit) 379{ 380 // Create a record to assign dynamic intruction related fields. 381 TraceInfo* new_record = new TraceInfo; 382 // Add to map for sequence number look up to retrieve the TraceInfo pointer 383 traceInfoMap[head_inst->seqNum] = new_record; 384 385 // Assign fields from the instruction 386 new_record->instNum = head_inst->seqNum; 387 new_record->load = head_inst->isLoad(); 388 new_record->store = head_inst->isStore(); 389 new_record->commit = commit; 390 391 // Assign fields for creating a request in case of a load/store 392 new_record->reqFlags = head_inst->memReqFlags; 393 new_record->addr = head_inst->physEffAddrLow; 394 // Currently the tracing does not support split requests. 395 new_record->size = head_inst->effSize; 396 new_record->pc = head_inst->instAddr(); 397 398 // Assign the timing information stored in the execution info object 399 new_record->executeTick = exec_info_ptr->executeTick; 400 new_record->toCommitTick = exec_info_ptr->toCommitTick; 401 new_record->commitTick = curTick(); 402 403 // Assign initial values for number of dependents and computational delay 404 new_record->numDepts = 0; 405 new_record->compDelay = -1; 406 407 // The physical register dependency set of the first instruction is 408 // empty. Since there are no records in the depTrace at this point, the 409 // case of adding an ROB dependency by using a reverse iterator is not 410 // applicable. Thus, populate the fields of the record corresponding to the 411 // first instruction and return. 412 if (depTrace.empty()) { 413 // Store the record in depTrace. 414 depTrace.push_back(new_record); 415 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n", 416 new_record->instNum); 417 return; 418 } 419 420 // Clear register dependencies for squashed loads as they may be dependent 421 // on squashed instructions and we do not add those to the trace. 422 if (head_inst->isLoad() && !commit) { 423 (exec_info_ptr->physRegDepSet).clear(); 424 } 425 426 // Assign the register dependencies stored in the execution info object 427 std::set<InstSeqNum>::const_iterator dep_set_it; 428 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin(); 429 dep_set_it != (exec_info_ptr->physRegDepSet).end(); 430 ++dep_set_it) { 431 auto trace_info_itr = traceInfoMap.find(*dep_set_it); 432 if (trace_info_itr != traceInfoMap.end()) { 433 // The register dependency is valid. Assign it and calculate 434 // computational delay 435 new_record->physRegDepList.push_back(*dep_set_it); 436 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 437 "%lli\n", new_record->instNum, *dep_set_it); 438 TraceInfo* reg_dep = trace_info_itr->second; 439 reg_dep->numDepts++; 440 compDelayPhysRegDep(reg_dep, new_record); 441 ++numRegDep; 442 } else { 443 // The instruction that this has a register dependency on was 444 // not added to the trace because of one of the following 445 // 1. it was an instruction that had a fault 446 // 2. it was an instruction that was predicated false and 447 // previous register values were restored 448 // 3. it was load/store that did not have a request (e.g. when 449 // the size of the request is zero but this may not be a fault) 450 // In all these cases the instruction is set as executed and is 451 // picked up by the commit probe listener. But a request is not 452 // issued and registers are not written to in these cases. 453 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 454 "%lli is skipped\n",new_record->instNum, *dep_set_it); 455 } 456 } 457 458 // Check for and assign an ROB dependency in addition to register 459 // dependency before adding the record to the trace. 460 // As stores have to commit in order a store is dependent on the last 461 // committed load/store. This is recorded in the ROB dependency. 462 if (head_inst->isStore()) { 463 // Look up store-after-store order dependency 464 updateCommitOrderDep(new_record, false); 465 // Look up store-after-load order dependency 466 updateCommitOrderDep(new_record, true); 467 } 468 469 // In case a node is dependency-free or its dependency got discarded 470 // because it was outside the window, it is marked ready in the ROB at the 471 // time of issue. A request is sent as soon as possible. To model this, a 472 // node is assigned an issue order dependency on a committed instruction 473 // that completed earlier than it. This is done to avoid the problem of 474 // determining the issue times of such dependency-free nodes during replay 475 // which could lead to too much parallelism, thinking conservatively. 476 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) { 477 updateIssueOrderDep(new_record); 478 } 479 480 // Store the record in depTrace. 481 depTrace.push_back(new_record); 482 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n", 483 (commit ? "committed" : "squashed"), new_record->instNum); 484 485 // To process the number of records specified by depWindowSize in the 486 // forward direction, the depTrace must have twice as many records 487 // to check for dependencies. 488 if (depTrace.size() == 2 * depWindowSize) { 489 490 DPRINTF(ElasticTrace, "Writing out trace...\n"); 491 492 // Write out the records which have been processed to the trace 493 // and remove them from the depTrace. 494 writeDepTrace(depWindowSize); 495 496 // After the first window, writeDepTrace() must check for valid 497 // compDelay. 498 firstWin = false; 499 } 500} 501 502void 503ElasticTrace::updateCommitOrderDep(TraceInfo* new_record, 504 bool find_load_not_store) 505{ 506 assert(new_record->store); 507 // Iterate in reverse direction to search for the last committed 508 // load/store that completed earlier than the new record 509 depTraceRevItr from_itr(depTrace.end()); 510 depTraceRevItr until_itr(depTrace.begin()); 511 TraceInfo* past_record = *from_itr; 512 uint32_t num_go_back = 0; 513 514 // The execution time of this store is when it is sent, that is committed 515 Tick execute_tick = curTick(); 516 // Search for store-after-load or store-after-store order dependency 517 while (num_go_back < depWindowSize && from_itr != until_itr) { 518 if (find_load_not_store) { 519 // Check if previous inst is a load completed earlier by comparing 520 // with execute tick 521 if (hasLoadCompleted(past_record, execute_tick)) { 522 // Assign rob dependency and calculate the computational delay 523 assignRobDep(past_record, new_record); 524 ++numOrderDepStores; 525 return; 526 } 527 } else { 528 // Check if previous inst is a store sent earlier by comparing with 529 // execute tick 530 if (hasStoreCommitted(past_record, execute_tick)) { 531 // Assign rob dependency and calculate the computational delay 532 assignRobDep(past_record, new_record); 533 ++numOrderDepStores; 534 return; 535 } 536 } 537 ++from_itr; 538 past_record = *from_itr; 539 ++num_go_back; 540 } 541} 542 543void 544ElasticTrace::updateIssueOrderDep(TraceInfo* new_record) 545{ 546 // Interate in reverse direction to search for the last committed 547 // record that completed earlier than the new record 548 depTraceRevItr from_itr(depTrace.end()); 549 depTraceRevItr until_itr(depTrace.begin()); 550 TraceInfo* past_record = *from_itr; 551 552 uint32_t num_go_back = 0; 553 Tick execute_tick = 0; 554 555 if (new_record->load) { 556 // The execution time of a load is when a request is sent 557 execute_tick = new_record->executeTick; 558 ++numIssueOrderDepLoads; 559 } else if (new_record->store) { 560 // The execution time of a store is when it is sent, i.e. committed 561 execute_tick = curTick(); 562 ++numIssueOrderDepStores; 563 } else { 564 // The execution time of a non load/store is when it completes 565 execute_tick = new_record->toCommitTick; 566 ++numIssueOrderDepOther; 567 } 568 569 // We search if this record has an issue order dependency on a past record. 570 // Once we find it, we update both the new record and the record it depends 571 // on and return. 572 while (num_go_back < depWindowSize && from_itr != until_itr) { 573 // Check if a previous inst is a load sent earlier, or a store sent 574 // earlier, or a comp inst completed earlier by comparing with execute 575 // tick 576 if (hasLoadBeenSent(past_record, execute_tick) || 577 hasStoreCommitted(past_record, execute_tick) || 578 hasCompCompleted(past_record, execute_tick)) { 579 // Assign rob dependency and calculate the computational delay 580 assignRobDep(past_record, new_record); 581 return; 582 } 583 ++from_itr; 584 past_record = *from_itr; 585 ++num_go_back; 586 } 587} 588 589void 590ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) { 591 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n", 592 new_record->load ? "Load" : (new_record->store ? "Store" : 593 "Non load/store"), 594 new_record->instNum, past_record->instNum); 595 596 // Add dependency on past record 597 new_record->robDepList.push_back(past_record->instNum); 598 // Update new_record's compute delay with respect to the past record 599 compDelayRob(past_record, new_record); 600 // Increment number of dependents of the past record 601 ++(past_record->numDepts); 602 // Update stat to log max number of dependents 603 maxNumDependents = std::max(past_record->numDepts, 604 (uint32_t)maxNumDependents.value()); 605} 606 607bool 608ElasticTrace::hasStoreCommitted(TraceInfo* past_record, 609 Tick execute_tick) const 610{ 611 return (past_record->store && past_record->commitTick <= execute_tick); 612} 613 614bool 615ElasticTrace::hasLoadCompleted(TraceInfo* past_record, 616 Tick execute_tick) const 617{ 618 return(past_record->load && past_record->commit && 619 past_record->toCommitTick <= execute_tick); 620} 621 622bool 623ElasticTrace::hasLoadBeenSent(TraceInfo* past_record, 624 Tick execute_tick) const 625{ 626 // Check if previous inst is a load sent earlier than this 627 return (past_record->load && past_record->commit && 628 past_record->executeTick <= execute_tick); 629} 630 631bool 632ElasticTrace::hasCompCompleted(TraceInfo* past_record, 633 Tick execute_tick) const 634{ 635 return(!past_record->store && !past_record->load && 636 past_record->toCommitTick <= execute_tick); 637} 638 639void 640ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst) 641{ 642 // Clear from temp store starting with the execution info object 643 // corresponding the head_inst and continue clearing by decrementing the 644 // sequence number until the last cleared sequence number. 645 InstSeqNum temp_sn = (head_inst->seqNum); 646 while (temp_sn > lastClearedSeqNum) { 647 auto itr_exec_info = tempStore.find(temp_sn); 648 if (itr_exec_info != tempStore.end()) { 649 InstExecInfo* exec_info_ptr = itr_exec_info->second; 650 // Free allocated memory for the info object 651 delete exec_info_ptr; 652 // Remove entry from temporary store 653 tempStore.erase(itr_exec_info); 654 } 655 temp_sn--; 656 } 657 // Update the last cleared sequence number to that of the head_inst 658 lastClearedSeqNum = head_inst->seqNum; 659} 660 661void 662ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record) 663{ 664 // The computation delay is the delay between the completion tick of the 665 // inst. pointed to by past_record and the execution tick of its dependent 666 // inst. pointed to by new_record. 667 int64_t comp_delay = -1; 668 Tick execution_tick = 0, completion_tick = 0; 669 670 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n", 671 new_record->instNum, past_record->instNum); 672 673 // Get the tick when the node is executed as per the modelling of 674 // computation delay 675 execution_tick = new_record->getExecuteTick(); 676 677 if (past_record->load) { 678 if (new_record->store) { 679 completion_tick = past_record->toCommitTick; 680 } else { 681 completion_tick = past_record->executeTick; 682 } 683 } else if (past_record->store) { 684 completion_tick = past_record->commitTick; 685 } else { 686 completion_tick = past_record->toCommitTick; 687 } 688 assert(execution_tick >= completion_tick); 689 comp_delay = execution_tick - completion_tick; 690 691 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 692 execution_tick, completion_tick, comp_delay); 693 694 // Assign the computational delay with respect to the dependency which 695 // completes the latest. 696 if (new_record->compDelay == -1) 697 new_record->compDelay = comp_delay; 698 else 699 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 700 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 701 new_record->compDelay); 702} 703 704void 705ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record, 706 TraceInfo* new_record) 707{ 708 // The computation delay is the delay between the completion tick of the 709 // inst. pointed to by past_record and the execution tick of its dependent 710 // inst. pointed to by new_record. 711 int64_t comp_delay = -1; 712 Tick execution_tick = 0, completion_tick = 0; 713 714 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num" 715 " %lli.\n", new_record->instNum, past_record->instNum); 716 717 // Get the tick when the node is executed as per the modelling of 718 // computation delay 719 execution_tick = new_record->getExecuteTick(); 720 721 // When there is a physical register dependency on an instruction, the 722 // completion tick of that instruction is when it wrote to the register, 723 // that is toCommitTick. In case, of a store updating a destination 724 // register, this is approximated to commitTick instead 725 if (past_record->store) { 726 completion_tick = past_record->commitTick; 727 } else { 728 completion_tick = past_record->toCommitTick; 729 } 730 assert(execution_tick >= completion_tick); 731 comp_delay = execution_tick - completion_tick; 732 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 733 execution_tick, completion_tick, comp_delay); 734 735 // Assign the computational delay with respect to the dependency which 736 // completes the latest. 737 if (new_record->compDelay == -1) 738 new_record->compDelay = comp_delay; 739 else 740 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 741 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 742 new_record->compDelay); 743} 744 745Tick 746ElasticTrace::TraceInfo::getExecuteTick() const 747{ 748 if (load) { 749 // Execution tick for a load instruction is when the request was sent, 750 // that is executeTick. 751 return executeTick; 752 } else if (store) { 753 // Execution tick for a store instruction is when the request was sent, 754 // that is commitTick. 755 return commitTick; 756 } else { 757 // Execution tick for a non load/store instruction is when the register 758 // value was written to, that is commitTick. 759 return toCommitTick; 760 } 761} 762 763void 764ElasticTrace::writeDepTrace(uint32_t num_to_write) 765{ 766 // Write the trace with fields as follows: 767 // Instruction sequence number 768 // If instruction was a load 769 // If instruction was a store 770 // If instruction has addr 771 // If instruction has size 772 // If instruction has flags 773 // List of order dependencies - optional, repeated 774 // Computational delay with respect to last completed dependency 775 // List of physical register RAW dependencies - optional, repeated 776 // Weight of a node equal to no. of filtered nodes before it - optional 777 uint16_t num_filtered_nodes = 0; 778 depTraceItr dep_trace_itr(depTrace.begin()); 779 depTraceItr dep_trace_itr_start = dep_trace_itr; 780 while (num_to_write > 0) { 781 TraceInfo* temp_ptr = *dep_trace_itr; 782 // If no node dependends on a non load/store node then there is 783 // no reason to track it in the dependency graph. We filter out such 784 // nodes but count them and add a weight field to the subsequent node 785 // that we do include in the trace. 786 if (temp_ptr->numDepts != 0 || temp_ptr->load || temp_ptr->store) { 787 788 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli " 789 "is as follows:\n", temp_ptr->instNum); 790 if (temp_ptr->load || temp_ptr->store) { 791 DPRINTFR(ElasticTrace, "\tis a %s\n", 792 (temp_ptr->load ? "Load" : "Store")); 793 DPRINTFR(ElasticTrace, "\thas a request with addr %i, size %i," 794 " flags %i\n", temp_ptr->addr, temp_ptr->size, 795 temp_ptr->reqFlags); 796 } else { 797 DPRINTFR(ElasticTrace, "\tis not a load or store\n"); 798 } 799 if (firstWin && temp_ptr->compDelay == -1) { 800 if (temp_ptr->load) { 801 temp_ptr->compDelay = temp_ptr->executeTick; 802 } else if (temp_ptr->store) { 803 temp_ptr->compDelay = temp_ptr->commitTick; 804 } else { 805 temp_ptr->compDelay = temp_ptr->toCommitTick; 806 } 807 } 808 assert(temp_ptr->compDelay != -1); 809 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n", 810 temp_ptr->compDelay); 811 812 // Create a protobuf message for the dependency record 813 ProtoMessage::InstDepRecord dep_pkt; 814 dep_pkt.set_seq_num(temp_ptr->instNum); 815 dep_pkt.set_load(temp_ptr->load); 816 dep_pkt.set_store(temp_ptr->store); 817 dep_pkt.set_pc(temp_ptr->pc); 818 if (temp_ptr->load || temp_ptr->store) { 819 dep_pkt.set_flags(temp_ptr->reqFlags); 820 dep_pkt.set_addr(temp_ptr->addr); 821 dep_pkt.set_size(temp_ptr->size); 822 } 823 dep_pkt.set_comp_delay(temp_ptr->compDelay); 824 if (temp_ptr->robDepList.empty()) { 825 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n"); 826 } 827 while (!temp_ptr->robDepList.empty()) { 828 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n", 829 temp_ptr->robDepList.front()); 830 dep_pkt.add_rob_dep(temp_ptr->robDepList.front()); 831 temp_ptr->robDepList.pop_front(); 832 } 833 if (temp_ptr->physRegDepList.empty()) { 834 DPRINTFR(ElasticTrace, "\thas no register dependencies\n"); 835 } 836 while (!temp_ptr->physRegDepList.empty()) { 837 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n", 838 temp_ptr->physRegDepList.front()); 839 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front()); 840 temp_ptr->physRegDepList.pop_front(); 841 } 842 if (num_filtered_nodes != 0) { 843 // Set the weight of this node as the no. of filtered nodes 844 // between this node and the last node that we wrote to output 845 // stream. The weight will be used during replay to model ROB 846 // occupancy of filtered nodes. 847 dep_pkt.set_weight(num_filtered_nodes); 848 num_filtered_nodes = 0; 849 } 850 // Write the message to the protobuf output stream 851 dataTraceStream->write(dep_pkt); 852 } else { 853 // Don't write the node to the trace but note that we have filtered 854 // out a node. 855 ++numFilteredNodes; 856 ++num_filtered_nodes; 857 } 858 dep_trace_itr++; 859 traceInfoMap.erase(temp_ptr->instNum); 860 delete temp_ptr; 861 num_to_write--; 862 } 863 depTrace.erase(dep_trace_itr_start, dep_trace_itr); 864} 865 866void 867ElasticTrace::regStats() { 868 using namespace Stats; 869 numRegDep 870 .name(name() + ".numRegDep") 871 .desc("Number of register dependencies recorded during tracing") 872 ; 873 874 numOrderDepStores 875 .name(name() + ".numOrderDepStores") 876 .desc("Number of commit order (rob) dependencies for a store recorded" 877 " on a past load/store during tracing") 878 ; 879 880 numIssueOrderDepLoads 881 .name(name() + ".numIssueOrderDepLoads") 882 .desc("Number of loads that got assigned issue order dependency" 883 " because they were dependency-free") 884 ; 885 886 numIssueOrderDepStores 887 .name(name() + ".numIssueOrderDepStores") 888 .desc("Number of stores that got assigned issue order dependency" 889 " because they were dependency-free") 890 ; 891 892 numIssueOrderDepOther 893 .name(name() + ".numIssueOrderDepOther") 894 .desc("Number of non load/store insts that got assigned issue order" 895 " dependency because they were dependency-free") 896 ; 897 898 numFilteredNodes 899 .name(name() + ".numFilteredNodes") 900 .desc("No. of nodes filtered out before writing the output trace") 901 ; 902 903 maxNumDependents 904 .name(name() + ".maxNumDependents") 905 .desc("Maximum number or dependents on any instruction") 906 ; 907 908 maxTempStoreSize 909 .name(name() + ".maxTempStoreSize") 910 .desc("Maximum size of the temporary store during the run") 911 ; 912 913 maxPhysRegDepMapSize 914 .name(name() + ".maxPhysRegDepMapSize") 915 .desc("Maximum size of register dependency map") 916 ; 917} 918 919const std::string 920ElasticTrace::name() const 921{ 922 return ProbeListenerObject::name(); 923} 924 925void 926ElasticTrace::flushTraces() 927{ 928 // Write to trace all records in the depTrace. 929 writeDepTrace(depTrace.size()); 930 // Delete the stream objects 931 delete dataTraceStream; 932 delete instTraceStream; 933} 934 935ElasticTrace* 936ElasticTraceParams::create() 937{ 938 return new ElasticTrace(this); 939} 940