elastic_trace.cc revision 13590
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42#include "cpu/o3/probe/elastic_trace.hh" 43 44#include "base/callback.hh" 45#include "base/output.hh" 46#include "base/trace.hh" 47#include "cpu/reg_class.hh" 48#include "debug/ElasticTrace.hh" 49#include "mem/packet.hh" 50 51ElasticTrace::ElasticTrace(const ElasticTraceParams* params) 52 : ProbeListenerObject(params), 53 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()), 54 firstWin(true), 55 lastClearedSeqNum(0), 56 depWindowSize(params->depWindowSize), 57 dataTraceStream(nullptr), 58 instTraceStream(nullptr), 59 startTraceInst(params->startTraceInst), 60 allProbesReg(false), 61 traceVirtAddr(params->traceVirtAddr) 62{ 63 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager); 64 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\ 65 "support dependency tracing.\n", name()); 66 67 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\ 68 "Recommended size is 3x ROB size in the O3CPU.\n"); 69 70 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\ 71 "single-threaded workload only", cpu->numThreads, name()); 72 // Initialize the protobuf output stream 73 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\ 74 "trace file path to instFetchTraceFile"); 75 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\ 76 "trace file path to dataDepTraceFile"); 77 std::string filename = simout.resolve(name() + "." + 78 params->instFetchTraceFile); 79 instTraceStream = new ProtoOutputStream(filename); 80 filename = simout.resolve(name() + "." + params->dataDepTraceFile); 81 dataTraceStream = new ProtoOutputStream(filename); 82 // Create a protobuf message for the header and write it to the stream 83 ProtoMessage::PacketHeader inst_pkt_header; 84 inst_pkt_header.set_obj_id(name()); 85 inst_pkt_header.set_tick_freq(SimClock::Frequency); 86 instTraceStream->write(inst_pkt_header); 87 // Create a protobuf message for the header and write it to 88 // the stream 89 ProtoMessage::InstDepRecordHeader data_rec_header; 90 data_rec_header.set_obj_id(name()); 91 data_rec_header.set_tick_freq(SimClock::Frequency); 92 data_rec_header.set_window_size(depWindowSize); 93 dataTraceStream->write(data_rec_header); 94 // Register a callback to flush trace records and close the output streams. 95 Callback* cb = new MakeCallback<ElasticTrace, 96 &ElasticTrace::flushTraces>(this); 97 registerExitCallback(cb); 98} 99 100void 101ElasticTrace::regProbeListeners() 102{ 103 inform("@%llu: regProbeListeners() called, startTraceInst = %llu", 104 curTick(), startTraceInst); 105 if (startTraceInst == 0) { 106 // If we want to start tracing from the start of the simulation, 107 // register all elastic trace probes now. 108 regEtraceListeners(); 109 } else { 110 // Schedule an event to register all elastic trace probes when 111 // specified no. of instructions are committed. 112 cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent, 113 startTraceInst); 114 } 115} 116 117void 118ElasticTrace::regEtraceListeners() 119{ 120 assert(!allProbesReg); 121 inform("@%llu: No. of instructions committed = %llu, registering elastic" 122 " probe listeners", curTick(), cpu->numSimulatedInsts()); 123 // Create new listeners: provide method to be called upon a notify() for 124 // each probe point. 125 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this, 126 "FetchRequest", &ElasticTrace::fetchReqTrace)); 127 listeners.push_back(new ProbeListenerArg<ElasticTrace, 128 DynInstConstPtr>(this, "Execute", 129 &ElasticTrace::recordExecTick)); 130 listeners.push_back(new ProbeListenerArg<ElasticTrace, 131 DynInstConstPtr>(this, "ToCommit", 132 &ElasticTrace::recordToCommTick)); 133 listeners.push_back(new ProbeListenerArg<ElasticTrace, 134 DynInstConstPtr>(this, "Rename", 135 &ElasticTrace::updateRegDep)); 136 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this, 137 "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); 138 listeners.push_back(new ProbeListenerArg<ElasticTrace, 139 DynInstConstPtr>(this, "Squash", 140 &ElasticTrace::addSquashedInst)); 141 listeners.push_back(new ProbeListenerArg<ElasticTrace, 142 DynInstConstPtr>(this, "Commit", 143 &ElasticTrace::addCommittedInst)); 144 allProbesReg = true; 145} 146 147void 148ElasticTrace::fetchReqTrace(const RequestPtr &req) 149{ 150 151 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n", 152 (MemCmd::ReadReq), 153 req->getPC(), req->getVaddr(), req->getPaddr(), 154 req->getFlags(), req->getSize(), curTick()); 155 156 // Create a protobuf message including the request fields necessary to 157 // recreate the request in the TraceCPU. 158 ProtoMessage::Packet inst_fetch_pkt; 159 inst_fetch_pkt.set_tick(curTick()); 160 inst_fetch_pkt.set_cmd(MemCmd::ReadReq); 161 inst_fetch_pkt.set_pc(req->getPC()); 162 inst_fetch_pkt.set_flags(req->getFlags()); 163 inst_fetch_pkt.set_addr(req->getPaddr()); 164 inst_fetch_pkt.set_size(req->getSize()); 165 // Write the message to the stream. 166 instTraceStream->write(inst_fetch_pkt); 167} 168 169void 170ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst) 171{ 172 173 // In a corner case, a retired instruction is propagated backward to the 174 // IEW instruction queue to handle some side-channel information. But we 175 // must not process an instruction again. So we test the sequence number 176 // against the lastClearedSeqNum and skip adding the instruction for such 177 // corner cases. 178 if (dyn_inst->seqNum <= lastClearedSeqNum) { 179 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \ 180 has already retired (mostly squashed)", dyn_inst->seqNum); 181 // Do nothing as program has proceeded and this inst has been 182 // propagated backwards to handle something. 183 return; 184 } 185 186 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum, 187 curTick()); 188 // Either the execution info object will already exist if this 189 // instruction had a register dependency recorded in the rename probe 190 // listener before entering execute stage or it will not exist and will 191 // need to be created here. 192 InstExecInfo* exec_info_ptr; 193 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 194 if (itr_exec_info != tempStore.end()) { 195 exec_info_ptr = itr_exec_info->second; 196 } else { 197 exec_info_ptr = new InstExecInfo; 198 tempStore[dyn_inst->seqNum] = exec_info_ptr; 199 } 200 201 exec_info_ptr->executeTick = curTick(); 202 maxTempStoreSize = std::max(tempStore.size(), 203 (std::size_t)maxTempStoreSize.value()); 204} 205 206void 207ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst) 208{ 209 // If tracing has just been enabled then the instruction at this stage of 210 // execution is far enough that we cannot gather info about its past like 211 // the tick it started execution. Simply return until we see an instruction 212 // that is found in the tempStore. 213 auto itr_exec_info = tempStore.find(dyn_inst->seqNum); 214 if (itr_exec_info == tempStore.end()) { 215 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store," 216 " skipping.\n", dyn_inst->seqNum); 217 return; 218 } 219 220 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum, 221 curTick()); 222 InstExecInfo* exec_info_ptr = itr_exec_info->second; 223 exec_info_ptr->toCommitTick = curTick(); 224 225} 226 227void 228ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst) 229{ 230 // Get the sequence number of the instruction 231 InstSeqNum seq_num = dyn_inst->seqNum; 232 233 assert(dyn_inst->seqNum > lastClearedSeqNum); 234 235 // Since this is the first probe activated in the pipeline, create 236 // a new execution info object to track this instruction as it 237 // progresses through the pipeline. 238 InstExecInfo* exec_info_ptr = new InstExecInfo; 239 tempStore[seq_num] = exec_info_ptr; 240 241 // Loop through the source registers and look up the dependency map. If 242 // the source register entry is found in the dependency map, add a 243 // dependency on the last writer. 244 int8_t max_regs = dyn_inst->numSrcRegs(); 245 for (int src_idx = 0; src_idx < max_regs; src_idx++) { 246 247 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx); 248 if (!src_reg.isMiscReg() && 249 !src_reg.isZeroReg()) { 250 // Get the physical register index of the i'th source register. 251 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx); 252 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg" 253 " %i (%s)\n", seq_num, 254 phys_src_reg->flatIndex(), phys_src_reg->className()); 255 auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex()); 256 if (itr_writer != physRegDepMap.end()) { 257 InstSeqNum last_writer = itr_writer->second; 258 // Additionally the dependency distance is kept less than the 259 // window size parameter to limit the memory allocation to 260 // nodes in the graph. If the window were tending to infinite 261 // we would have to load a large number of node objects during 262 // replay. 263 if (seq_num - last_writer < depWindowSize) { 264 // Record a physical register dependency. 265 exec_info_ptr->physRegDepSet.insert(last_writer); 266 } 267 } 268 269 } 270 271 } 272 273 // Loop through the destination registers of this instruction and update 274 // the physical register dependency map for last writers to registers. 275 max_regs = dyn_inst->numDestRegs(); 276 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) { 277 // For data dependency tracking the register must be an int, float or 278 // CC register and not a Misc register. 279 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx); 280 if (!dest_reg.isMiscReg() && 281 !dest_reg.isZeroReg()) { 282 // Get the physical register index of the i'th destination 283 // register. 284 PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx); 285 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg" 286 " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(), 287 dest_reg.className()); 288 physRegDepMap[phys_dest_reg->flatIndex()] = seq_num; 289 } 290 } 291 maxPhysRegDepMapSize = std::max(physRegDepMap.size(), 292 (std::size_t)maxPhysRegDepMapSize.value()); 293} 294 295void 296ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) 297{ 298 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n", 299 inst_reg_pair.second); 300 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second); 301 if (itr_regdep_map != physRegDepMap.end()) 302 physRegDepMap.erase(itr_regdep_map); 303} 304 305void 306ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst) 307{ 308 // If the squashed instruction was squashed before being processed by 309 // execute stage then it will not be in the temporary store. In this case 310 // do nothing and return. 311 auto itr_exec_info = tempStore.find(head_inst->seqNum); 312 if (itr_exec_info == tempStore.end()) 313 return; 314 315 // If there is a squashed load for which a read request was 316 // sent before it got squashed then add it to the trace. 317 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n", 318 head_inst->seqNum); 319 // Get pointer to the execution info object corresponding to the inst. 320 InstExecInfo* exec_info_ptr = itr_exec_info->second; 321 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick && 322 exec_info_ptr->toCommitTick != MaxTick && 323 head_inst->hasRequest() && 324 head_inst->getFault() == NoFault) { 325 // Add record to depTrace with commit parameter as false. 326 addDepTraceRecord(head_inst, exec_info_ptr, false); 327 } 328 // As the information contained is no longer needed, remove the execution 329 // info object from the temporary store. 330 clearTempStoreUntil(head_inst); 331} 332 333void 334ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst) 335{ 336 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", 337 head_inst->seqNum); 338 339 // Add the instruction to the depTrace. 340 if (!head_inst->isNop()) { 341 342 // If tracing has just been enabled then the instruction at this stage 343 // of execution is far enough that we cannot gather info about its past 344 // like the tick it started execution. Simply return until we see an 345 // instruction that is found in the tempStore. 346 auto itr_temp_store = tempStore.find(head_inst->seqNum); 347 if (itr_temp_store == tempStore.end()) { 348 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp " 349 "store, skipping.\n", head_inst->seqNum); 350 return; 351 } 352 353 // Get pointer to the execution info object corresponding to the inst. 354 InstExecInfo* exec_info_ptr = itr_temp_store->second; 355 assert(exec_info_ptr->executeTick != MaxTick); 356 assert(exec_info_ptr->toCommitTick != MaxTick); 357 358 // Check if the instruction had a fault, if it predicated false and 359 // thus previous register values were restored or if it was a 360 // load/store that did not have a request (e.g. when the size of the 361 // request is zero). In all these cases the instruction is set as 362 // executed and is picked up by the commit probe listener. But a 363 // request is not issued and registers are not written. So practically, 364 // skipping these should not hurt as execution would not stall on them. 365 // Alternatively, these could be included merely as a compute node in 366 // the graph. Removing these for now. If correlation accuracy needs to 367 // be improved in future these can be turned into comp nodes at the 368 // cost of bigger traces. 369 if (head_inst->getFault() != NoFault) { 370 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so " 371 "skip adding it to the trace\n", 372 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 373 head_inst->seqNum); 374 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) { 375 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so " 376 "skip adding it to the trace\n", head_inst->seqNum); 377 } else if (!head_inst->readPredicate()) { 378 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so " 379 "skip adding it to the trace\n", 380 (head_inst->isMemRef() ? "Load/store" : "Comp inst."), 381 head_inst->seqNum); 382 } else { 383 // Add record to depTrace with commit parameter as true. 384 addDepTraceRecord(head_inst, exec_info_ptr, true); 385 } 386 } 387 // As the information contained is no longer needed, remove the execution 388 // info object from the temporary store. 389 clearTempStoreUntil(head_inst); 390} 391 392void 393ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst, 394 InstExecInfo* exec_info_ptr, bool commit) 395{ 396 // Create a record to assign dynamic intruction related fields. 397 TraceInfo* new_record = new TraceInfo; 398 // Add to map for sequence number look up to retrieve the TraceInfo pointer 399 traceInfoMap[head_inst->seqNum] = new_record; 400 401 // Assign fields from the instruction 402 new_record->instNum = head_inst->seqNum; 403 new_record->commit = commit; 404 new_record->type = head_inst->isLoad() ? Record::LOAD : 405 (head_inst->isStore() ? Record::STORE : 406 Record::COMP); 407 408 // Assign fields for creating a request in case of a load/store 409 new_record->reqFlags = head_inst->memReqFlags; 410 new_record->virtAddr = head_inst->effAddr; 411 new_record->asid = head_inst->asid; 412 new_record->physAddr = head_inst->physEffAddr; 413 // Currently the tracing does not support split requests. 414 new_record->size = head_inst->effSize; 415 new_record->pc = head_inst->instAddr(); 416 417 // Assign the timing information stored in the execution info object 418 new_record->executeTick = exec_info_ptr->executeTick; 419 new_record->toCommitTick = exec_info_ptr->toCommitTick; 420 new_record->commitTick = curTick(); 421 422 // Assign initial values for number of dependents and computational delay 423 new_record->numDepts = 0; 424 new_record->compDelay = -1; 425 426 // The physical register dependency set of the first instruction is 427 // empty. Since there are no records in the depTrace at this point, the 428 // case of adding an ROB dependency by using a reverse iterator is not 429 // applicable. Thus, populate the fields of the record corresponding to the 430 // first instruction and return. 431 if (depTrace.empty()) { 432 // Store the record in depTrace. 433 depTrace.push_back(new_record); 434 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n", 435 new_record->instNum); 436 return; 437 } 438 439 // Clear register dependencies for squashed loads as they may be dependent 440 // on squashed instructions and we do not add those to the trace. 441 if (head_inst->isLoad() && !commit) { 442 (exec_info_ptr->physRegDepSet).clear(); 443 } 444 445 // Assign the register dependencies stored in the execution info object 446 std::set<InstSeqNum>::const_iterator dep_set_it; 447 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin(); 448 dep_set_it != (exec_info_ptr->physRegDepSet).end(); 449 ++dep_set_it) { 450 auto trace_info_itr = traceInfoMap.find(*dep_set_it); 451 if (trace_info_itr != traceInfoMap.end()) { 452 // The register dependency is valid. Assign it and calculate 453 // computational delay 454 new_record->physRegDepList.push_back(*dep_set_it); 455 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 456 "%lli\n", new_record->instNum, *dep_set_it); 457 TraceInfo* reg_dep = trace_info_itr->second; 458 reg_dep->numDepts++; 459 compDelayPhysRegDep(reg_dep, new_record); 460 ++numRegDep; 461 } else { 462 // The instruction that this has a register dependency on was 463 // not added to the trace because of one of the following 464 // 1. it was an instruction that had a fault 465 // 2. it was an instruction that was predicated false and 466 // previous register values were restored 467 // 3. it was load/store that did not have a request (e.g. when 468 // the size of the request is zero but this may not be a fault) 469 // In all these cases the instruction is set as executed and is 470 // picked up by the commit probe listener. But a request is not 471 // issued and registers are not written to in these cases. 472 DPRINTF(ElasticTrace, "Inst %lli has register dependency on " 473 "%lli is skipped\n",new_record->instNum, *dep_set_it); 474 } 475 } 476 477 // Check for and assign an ROB dependency in addition to register 478 // dependency before adding the record to the trace. 479 // As stores have to commit in order a store is dependent on the last 480 // committed load/store. This is recorded in the ROB dependency. 481 if (head_inst->isStore()) { 482 // Look up store-after-store order dependency 483 updateCommitOrderDep(new_record, false); 484 // Look up store-after-load order dependency 485 updateCommitOrderDep(new_record, true); 486 } 487 488 // In case a node is dependency-free or its dependency got discarded 489 // because it was outside the window, it is marked ready in the ROB at the 490 // time of issue. A request is sent as soon as possible. To model this, a 491 // node is assigned an issue order dependency on a committed instruction 492 // that completed earlier than it. This is done to avoid the problem of 493 // determining the issue times of such dependency-free nodes during replay 494 // which could lead to too much parallelism, thinking conservatively. 495 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) { 496 updateIssueOrderDep(new_record); 497 } 498 499 // Store the record in depTrace. 500 depTrace.push_back(new_record); 501 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n", 502 (commit ? "committed" : "squashed"), new_record->instNum); 503 504 // To process the number of records specified by depWindowSize in the 505 // forward direction, the depTrace must have twice as many records 506 // to check for dependencies. 507 if (depTrace.size() == 2 * depWindowSize) { 508 509 DPRINTF(ElasticTrace, "Writing out trace...\n"); 510 511 // Write out the records which have been processed to the trace 512 // and remove them from the depTrace. 513 writeDepTrace(depWindowSize); 514 515 // After the first window, writeDepTrace() must check for valid 516 // compDelay. 517 firstWin = false; 518 } 519} 520 521void 522ElasticTrace::updateCommitOrderDep(TraceInfo* new_record, 523 bool find_load_not_store) 524{ 525 assert(new_record->isStore()); 526 // Iterate in reverse direction to search for the last committed 527 // load/store that completed earlier than the new record 528 depTraceRevItr from_itr(depTrace.end()); 529 depTraceRevItr until_itr(depTrace.begin()); 530 TraceInfo* past_record = *from_itr; 531 uint32_t num_go_back = 0; 532 533 // The execution time of this store is when it is sent, that is committed 534 Tick execute_tick = curTick(); 535 // Search for store-after-load or store-after-store order dependency 536 while (num_go_back < depWindowSize && from_itr != until_itr) { 537 if (find_load_not_store) { 538 // Check if previous inst is a load completed earlier by comparing 539 // with execute tick 540 if (hasLoadCompleted(past_record, execute_tick)) { 541 // Assign rob dependency and calculate the computational delay 542 assignRobDep(past_record, new_record); 543 ++numOrderDepStores; 544 return; 545 } 546 } else { 547 // Check if previous inst is a store sent earlier by comparing with 548 // execute tick 549 if (hasStoreCommitted(past_record, execute_tick)) { 550 // Assign rob dependency and calculate the computational delay 551 assignRobDep(past_record, new_record); 552 ++numOrderDepStores; 553 return; 554 } 555 } 556 ++from_itr; 557 past_record = *from_itr; 558 ++num_go_back; 559 } 560} 561 562void 563ElasticTrace::updateIssueOrderDep(TraceInfo* new_record) 564{ 565 // Interate in reverse direction to search for the last committed 566 // record that completed earlier than the new record 567 depTraceRevItr from_itr(depTrace.end()); 568 depTraceRevItr until_itr(depTrace.begin()); 569 TraceInfo* past_record = *from_itr; 570 571 uint32_t num_go_back = 0; 572 Tick execute_tick = 0; 573 574 if (new_record->isLoad()) { 575 // The execution time of a load is when a request is sent 576 execute_tick = new_record->executeTick; 577 ++numIssueOrderDepLoads; 578 } else if (new_record->isStore()) { 579 // The execution time of a store is when it is sent, i.e. committed 580 execute_tick = curTick(); 581 ++numIssueOrderDepStores; 582 } else { 583 // The execution time of a non load/store is when it completes 584 execute_tick = new_record->toCommitTick; 585 ++numIssueOrderDepOther; 586 } 587 588 // We search if this record has an issue order dependency on a past record. 589 // Once we find it, we update both the new record and the record it depends 590 // on and return. 591 while (num_go_back < depWindowSize && from_itr != until_itr) { 592 // Check if a previous inst is a load sent earlier, or a store sent 593 // earlier, or a comp inst completed earlier by comparing with execute 594 // tick 595 if (hasLoadBeenSent(past_record, execute_tick) || 596 hasStoreCommitted(past_record, execute_tick) || 597 hasCompCompleted(past_record, execute_tick)) { 598 // Assign rob dependency and calculate the computational delay 599 assignRobDep(past_record, new_record); 600 return; 601 } 602 ++from_itr; 603 past_record = *from_itr; 604 ++num_go_back; 605 } 606} 607 608void 609ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) { 610 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n", 611 new_record->typeToStr(), new_record->instNum, 612 past_record->instNum); 613 // Add dependency on past record 614 new_record->robDepList.push_back(past_record->instNum); 615 // Update new_record's compute delay with respect to the past record 616 compDelayRob(past_record, new_record); 617 // Increment number of dependents of the past record 618 ++(past_record->numDepts); 619 // Update stat to log max number of dependents 620 maxNumDependents = std::max(past_record->numDepts, 621 (uint32_t)maxNumDependents.value()); 622} 623 624bool 625ElasticTrace::hasStoreCommitted(TraceInfo* past_record, 626 Tick execute_tick) const 627{ 628 return (past_record->isStore() && past_record->commitTick <= execute_tick); 629} 630 631bool 632ElasticTrace::hasLoadCompleted(TraceInfo* past_record, 633 Tick execute_tick) const 634{ 635 return(past_record->isLoad() && past_record->commit && 636 past_record->toCommitTick <= execute_tick); 637} 638 639bool 640ElasticTrace::hasLoadBeenSent(TraceInfo* past_record, 641 Tick execute_tick) const 642{ 643 // Check if previous inst is a load sent earlier than this 644 return (past_record->isLoad() && past_record->commit && 645 past_record->executeTick <= execute_tick); 646} 647 648bool 649ElasticTrace::hasCompCompleted(TraceInfo* past_record, 650 Tick execute_tick) const 651{ 652 return(past_record->isComp() && past_record->toCommitTick <= execute_tick); 653} 654 655void 656ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst) 657{ 658 // Clear from temp store starting with the execution info object 659 // corresponding the head_inst and continue clearing by decrementing the 660 // sequence number until the last cleared sequence number. 661 InstSeqNum temp_sn = (head_inst->seqNum); 662 while (temp_sn > lastClearedSeqNum) { 663 auto itr_exec_info = tempStore.find(temp_sn); 664 if (itr_exec_info != tempStore.end()) { 665 InstExecInfo* exec_info_ptr = itr_exec_info->second; 666 // Free allocated memory for the info object 667 delete exec_info_ptr; 668 // Remove entry from temporary store 669 tempStore.erase(itr_exec_info); 670 } 671 temp_sn--; 672 } 673 // Update the last cleared sequence number to that of the head_inst 674 lastClearedSeqNum = head_inst->seqNum; 675} 676 677void 678ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record) 679{ 680 // The computation delay is the delay between the completion tick of the 681 // inst. pointed to by past_record and the execution tick of its dependent 682 // inst. pointed to by new_record. 683 int64_t comp_delay = -1; 684 Tick execution_tick = 0, completion_tick = 0; 685 686 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n", 687 new_record->instNum, past_record->instNum); 688 689 // Get the tick when the node is executed as per the modelling of 690 // computation delay 691 execution_tick = new_record->getExecuteTick(); 692 693 if (past_record->isLoad()) { 694 if (new_record->isStore()) { 695 completion_tick = past_record->toCommitTick; 696 } else { 697 completion_tick = past_record->executeTick; 698 } 699 } else if (past_record->isStore()) { 700 completion_tick = past_record->commitTick; 701 } else if (past_record->isComp()){ 702 completion_tick = past_record->toCommitTick; 703 } 704 assert(execution_tick >= completion_tick); 705 comp_delay = execution_tick - completion_tick; 706 707 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 708 execution_tick, completion_tick, comp_delay); 709 710 // Assign the computational delay with respect to the dependency which 711 // completes the latest. 712 if (new_record->compDelay == -1) 713 new_record->compDelay = comp_delay; 714 else 715 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 716 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 717 new_record->compDelay); 718} 719 720void 721ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record, 722 TraceInfo* new_record) 723{ 724 // The computation delay is the delay between the completion tick of the 725 // inst. pointed to by past_record and the execution tick of its dependent 726 // inst. pointed to by new_record. 727 int64_t comp_delay = -1; 728 Tick execution_tick = 0, completion_tick = 0; 729 730 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num" 731 " %lli.\n", new_record->instNum, past_record->instNum); 732 733 // Get the tick when the node is executed as per the modelling of 734 // computation delay 735 execution_tick = new_record->getExecuteTick(); 736 737 // When there is a physical register dependency on an instruction, the 738 // completion tick of that instruction is when it wrote to the register, 739 // that is toCommitTick. In case, of a store updating a destination 740 // register, this is approximated to commitTick instead 741 if (past_record->isStore()) { 742 completion_tick = past_record->commitTick; 743 } else { 744 completion_tick = past_record->toCommitTick; 745 } 746 assert(execution_tick >= completion_tick); 747 comp_delay = execution_tick - completion_tick; 748 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", 749 execution_tick, completion_tick, comp_delay); 750 751 // Assign the computational delay with respect to the dependency which 752 // completes the latest. 753 if (new_record->compDelay == -1) 754 new_record->compDelay = comp_delay; 755 else 756 new_record->compDelay = std::min(comp_delay, new_record->compDelay); 757 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", 758 new_record->compDelay); 759} 760 761Tick 762ElasticTrace::TraceInfo::getExecuteTick() const 763{ 764 if (isLoad()) { 765 // Execution tick for a load instruction is when the request was sent, 766 // that is executeTick. 767 return executeTick; 768 } else if (isStore()) { 769 // Execution tick for a store instruction is when the request was sent, 770 // that is commitTick. 771 return commitTick; 772 } else { 773 // Execution tick for a non load/store instruction is when the register 774 // value was written to, that is commitTick. 775 return toCommitTick; 776 } 777} 778 779void 780ElasticTrace::writeDepTrace(uint32_t num_to_write) 781{ 782 // Write the trace with fields as follows: 783 // Instruction sequence number 784 // If instruction was a load 785 // If instruction was a store 786 // If instruction has addr 787 // If instruction has size 788 // If instruction has flags 789 // List of order dependencies - optional, repeated 790 // Computational delay with respect to last completed dependency 791 // List of physical register RAW dependencies - optional, repeated 792 // Weight of a node equal to no. of filtered nodes before it - optional 793 uint16_t num_filtered_nodes = 0; 794 depTraceItr dep_trace_itr(depTrace.begin()); 795 depTraceItr dep_trace_itr_start = dep_trace_itr; 796 while (num_to_write > 0) { 797 TraceInfo* temp_ptr = *dep_trace_itr; 798 assert(temp_ptr->type != Record::INVALID); 799 // If no node dependends on a comp node then there is no reason to 800 // track the comp node in the dependency graph. We filter out such 801 // nodes but count them and add a weight field to the subsequent node 802 // that we do include in the trace. 803 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) { 804 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli " 805 "is as follows:\n", temp_ptr->instNum); 806 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 807 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 808 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, " 809 "size %i, flags %i\n", temp_ptr->physAddr, 810 temp_ptr->size, temp_ptr->reqFlags); 811 } else { 812 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr()); 813 } 814 if (firstWin && temp_ptr->compDelay == -1) { 815 if (temp_ptr->isLoad()) { 816 temp_ptr->compDelay = temp_ptr->executeTick; 817 } else if (temp_ptr->isStore()) { 818 temp_ptr->compDelay = temp_ptr->commitTick; 819 } else { 820 temp_ptr->compDelay = temp_ptr->toCommitTick; 821 } 822 } 823 assert(temp_ptr->compDelay != -1); 824 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n", 825 temp_ptr->compDelay); 826 827 // Create a protobuf message for the dependency record 828 ProtoMessage::InstDepRecord dep_pkt; 829 dep_pkt.set_seq_num(temp_ptr->instNum); 830 dep_pkt.set_type(temp_ptr->type); 831 dep_pkt.set_pc(temp_ptr->pc); 832 if (temp_ptr->isLoad() || temp_ptr->isStore()) { 833 dep_pkt.set_flags(temp_ptr->reqFlags); 834 dep_pkt.set_p_addr(temp_ptr->physAddr); 835 // If tracing of virtual addresses is enabled, set the optional 836 // field for it 837 if (traceVirtAddr) { 838 dep_pkt.set_v_addr(temp_ptr->virtAddr); 839 dep_pkt.set_asid(temp_ptr->asid); 840 } 841 dep_pkt.set_size(temp_ptr->size); 842 } 843 dep_pkt.set_comp_delay(temp_ptr->compDelay); 844 if (temp_ptr->robDepList.empty()) { 845 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n"); 846 } 847 while (!temp_ptr->robDepList.empty()) { 848 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n", 849 temp_ptr->robDepList.front()); 850 dep_pkt.add_rob_dep(temp_ptr->robDepList.front()); 851 temp_ptr->robDepList.pop_front(); 852 } 853 if (temp_ptr->physRegDepList.empty()) { 854 DPRINTFR(ElasticTrace, "\thas no register dependencies\n"); 855 } 856 while (!temp_ptr->physRegDepList.empty()) { 857 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n", 858 temp_ptr->physRegDepList.front()); 859 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front()); 860 temp_ptr->physRegDepList.pop_front(); 861 } 862 if (num_filtered_nodes != 0) { 863 // Set the weight of this node as the no. of filtered nodes 864 // between this node and the last node that we wrote to output 865 // stream. The weight will be used during replay to model ROB 866 // occupancy of filtered nodes. 867 dep_pkt.set_weight(num_filtered_nodes); 868 num_filtered_nodes = 0; 869 } 870 // Write the message to the protobuf output stream 871 dataTraceStream->write(dep_pkt); 872 } else { 873 // Don't write the node to the trace but note that we have filtered 874 // out a node. 875 ++numFilteredNodes; 876 ++num_filtered_nodes; 877 } 878 dep_trace_itr++; 879 traceInfoMap.erase(temp_ptr->instNum); 880 delete temp_ptr; 881 num_to_write--; 882 } 883 depTrace.erase(dep_trace_itr_start, dep_trace_itr); 884} 885 886void 887ElasticTrace::regStats() { 888 ProbeListenerObject::regStats(); 889 890 using namespace Stats; 891 numRegDep 892 .name(name() + ".numRegDep") 893 .desc("Number of register dependencies recorded during tracing") 894 ; 895 896 numOrderDepStores 897 .name(name() + ".numOrderDepStores") 898 .desc("Number of commit order (rob) dependencies for a store recorded" 899 " on a past load/store during tracing") 900 ; 901 902 numIssueOrderDepLoads 903 .name(name() + ".numIssueOrderDepLoads") 904 .desc("Number of loads that got assigned issue order dependency" 905 " because they were dependency-free") 906 ; 907 908 numIssueOrderDepStores 909 .name(name() + ".numIssueOrderDepStores") 910 .desc("Number of stores that got assigned issue order dependency" 911 " because they were dependency-free") 912 ; 913 914 numIssueOrderDepOther 915 .name(name() + ".numIssueOrderDepOther") 916 .desc("Number of non load/store insts that got assigned issue order" 917 " dependency because they were dependency-free") 918 ; 919 920 numFilteredNodes 921 .name(name() + ".numFilteredNodes") 922 .desc("No. of nodes filtered out before writing the output trace") 923 ; 924 925 maxNumDependents 926 .name(name() + ".maxNumDependents") 927 .desc("Maximum number or dependents on any instruction") 928 ; 929 930 maxTempStoreSize 931 .name(name() + ".maxTempStoreSize") 932 .desc("Maximum size of the temporary store during the run") 933 ; 934 935 maxPhysRegDepMapSize 936 .name(name() + ".maxPhysRegDepMapSize") 937 .desc("Maximum size of register dependency map") 938 ; 939} 940 941const std::string& 942ElasticTrace::TraceInfo::typeToStr() const 943{ 944 return Record::RecordType_Name(type); 945} 946 947const std::string 948ElasticTrace::name() const 949{ 950 return ProbeListenerObject::name(); 951} 952 953void 954ElasticTrace::flushTraces() 955{ 956 // Write to trace all records in the depTrace. 957 writeDepTrace(depTrace.size()); 958 // Delete the stream objects 959 delete dataTraceStream; 960 delete instTraceStream; 961} 962 963ElasticTrace* 964ElasticTraceParams::create() 965{ 966 return new ElasticTrace(this); 967} 968