1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42#include "cpu/o3/probe/elastic_trace.hh"
43
44#include "base/callback.hh"
45#include "base/output.hh"
46#include "base/trace.hh"
47#include "cpu/reg_class.hh"
48#include "debug/ElasticTrace.hh"
49#include "mem/packet.hh"
50
51ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
52 : ProbeListenerObject(params),
53 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
54 firstWin(true),
55 lastClearedSeqNum(0),
56 depWindowSize(params->depWindowSize),
57 dataTraceStream(nullptr),
58 instTraceStream(nullptr),
59 startTraceInst(params->startTraceInst),
60 allProbesReg(false),
61 traceVirtAddr(params->traceVirtAddr)
62{
63 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
64 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
65 "support dependency tracing.\n", name());
66
67 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
68 "Recommended size is 3x ROB size in the O3CPU.\n");
69
70 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
71 "single-threaded workload only", cpu->numThreads, name());
72 // Initialize the protobuf output stream
73 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
74 "trace file path to instFetchTraceFile");
75 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
76 "trace file path to dataDepTraceFile");
77 std::string filename = simout.resolve(name() + "." +
78 params->instFetchTraceFile);
79 instTraceStream = new ProtoOutputStream(filename);
80 filename = simout.resolve(name() + "." + params->dataDepTraceFile);
81 dataTraceStream = new ProtoOutputStream(filename);
82 // Create a protobuf message for the header and write it to the stream
83 ProtoMessage::PacketHeader inst_pkt_header;
84 inst_pkt_header.set_obj_id(name());
85 inst_pkt_header.set_tick_freq(SimClock::Frequency);
86 instTraceStream->write(inst_pkt_header);
87 // Create a protobuf message for the header and write it to
88 // the stream
89 ProtoMessage::InstDepRecordHeader data_rec_header;
90 data_rec_header.set_obj_id(name());
91 data_rec_header.set_tick_freq(SimClock::Frequency);
92 data_rec_header.set_window_size(depWindowSize);
93 dataTraceStream->write(data_rec_header);
94 // Register a callback to flush trace records and close the output streams.
95 Callback* cb = new MakeCallback<ElasticTrace,
96 &ElasticTrace::flushTraces>(this);
97 registerExitCallback(cb);
98}
99
100void
101ElasticTrace::regProbeListeners()
102{
103 inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
104 curTick(), startTraceInst);
105 if (startTraceInst == 0) {
106 // If we want to start tracing from the start of the simulation,
107 // register all elastic trace probes now.
108 regEtraceListeners();
109 } else {
110 // Schedule an event to register all elastic trace probes when
111 // specified no. of instructions are committed.
112 cpu->comInstEventQueue[(ThreadID)0]->schedule(&regEtraceListenersEvent,
113 startTraceInst);
114 }
115}
116
117void
118ElasticTrace::regEtraceListeners()
119{
120 assert(!allProbesReg);
121 inform("@%llu: No. of instructions committed = %llu, registering elastic"
122 " probe listeners", curTick(), cpu->numSimulatedInsts());
123 // Create new listeners: provide method to be called upon a notify() for
124 // each probe point.
125 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
126 "FetchRequest", &ElasticTrace::fetchReqTrace));
127 listeners.push_back(new ProbeListenerArg<ElasticTrace,
128 DynInstConstPtr>(this, "Execute",
129 &ElasticTrace::recordExecTick));
130 listeners.push_back(new ProbeListenerArg<ElasticTrace,
131 DynInstConstPtr>(this, "ToCommit",
132 &ElasticTrace::recordToCommTick));
133 listeners.push_back(new ProbeListenerArg<ElasticTrace,
134 DynInstConstPtr>(this, "Rename",
135 &ElasticTrace::updateRegDep));
136 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
137 "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
138 listeners.push_back(new ProbeListenerArg<ElasticTrace,
139 DynInstConstPtr>(this, "Squash",
140 &ElasticTrace::addSquashedInst));
141 listeners.push_back(new ProbeListenerArg<ElasticTrace,
142 DynInstConstPtr>(this, "Commit",
143 &ElasticTrace::addCommittedInst));
144 allProbesReg = true;
145}
146
147void
148ElasticTrace::fetchReqTrace(const RequestPtr &req)
149{
150
151 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
152 (MemCmd::ReadReq),
153 req->getPC(), req->getVaddr(), req->getPaddr(),
154 req->getFlags(), req->getSize(), curTick());
155
156 // Create a protobuf message including the request fields necessary to
157 // recreate the request in the TraceCPU.
158 ProtoMessage::Packet inst_fetch_pkt;
159 inst_fetch_pkt.set_tick(curTick());
160 inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
161 inst_fetch_pkt.set_pc(req->getPC());
162 inst_fetch_pkt.set_flags(req->getFlags());
163 inst_fetch_pkt.set_addr(req->getPaddr());
164 inst_fetch_pkt.set_size(req->getSize());
165 // Write the message to the stream.
166 instTraceStream->write(inst_fetch_pkt);
167}
168
169void
170ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
171{
172
173 // In a corner case, a retired instruction is propagated backward to the
174 // IEW instruction queue to handle some side-channel information. But we
175 // must not process an instruction again. So we test the sequence number
176 // against the lastClearedSeqNum and skip adding the instruction for such
177 // corner cases.
178 if (dyn_inst->seqNum <= lastClearedSeqNum) {
179 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
180 has already retired (mostly squashed)", dyn_inst->seqNum);
181 // Do nothing as program has proceeded and this inst has been
182 // propagated backwards to handle something.
183 return;
184 }
185
186 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
187 curTick());
188 // Either the execution info object will already exist if this
189 // instruction had a register dependency recorded in the rename probe
190 // listener before entering execute stage or it will not exist and will
191 // need to be created here.
192 InstExecInfo* exec_info_ptr;
193 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
194 if (itr_exec_info != tempStore.end()) {
195 exec_info_ptr = itr_exec_info->second;
196 } else {
197 exec_info_ptr = new InstExecInfo;
198 tempStore[dyn_inst->seqNum] = exec_info_ptr;
199 }
200
201 exec_info_ptr->executeTick = curTick();
202 maxTempStoreSize = std::max(tempStore.size(),
203 (std::size_t)maxTempStoreSize.value());
204}
205
206void
207ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
208{
209 // If tracing has just been enabled then the instruction at this stage of
210 // execution is far enough that we cannot gather info about its past like
211 // the tick it started execution. Simply return until we see an instruction
212 // that is found in the tempStore.
213 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
214 if (itr_exec_info == tempStore.end()) {
215 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
216 " skipping.\n", dyn_inst->seqNum);
217 return;
218 }
219
220 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
221 curTick());
222 InstExecInfo* exec_info_ptr = itr_exec_info->second;
223 exec_info_ptr->toCommitTick = curTick();
224
225}
226
227void
228ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
229{
230 // Get the sequence number of the instruction
231 InstSeqNum seq_num = dyn_inst->seqNum;
232
233 assert(dyn_inst->seqNum > lastClearedSeqNum);
234
235 // Since this is the first probe activated in the pipeline, create
236 // a new execution info object to track this instruction as it
237 // progresses through the pipeline.
238 InstExecInfo* exec_info_ptr = new InstExecInfo;
239 tempStore[seq_num] = exec_info_ptr;
240
241 // Loop through the source registers and look up the dependency map. If
242 // the source register entry is found in the dependency map, add a
243 // dependency on the last writer.
244 int8_t max_regs = dyn_inst->numSrcRegs();
245 for (int src_idx = 0; src_idx < max_regs; src_idx++) {
246
247 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
248 if (!src_reg.isMiscReg() &&
249 !src_reg.isZeroReg()) {
250 // Get the physical register index of the i'th source register.
251 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
252 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
253 " %i (%s)\n", seq_num,
254 phys_src_reg->flatIndex(), phys_src_reg->className());
255 auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
256 if (itr_writer != physRegDepMap.end()) {
257 InstSeqNum last_writer = itr_writer->second;
258 // Additionally the dependency distance is kept less than the
259 // window size parameter to limit the memory allocation to
260 // nodes in the graph. If the window were tending to infinite
261 // we would have to load a large number of node objects during
262 // replay.
263 if (seq_num - last_writer < depWindowSize) {
264 // Record a physical register dependency.
265 exec_info_ptr->physRegDepSet.insert(last_writer);
266 }
267 }
268
269 }
270
271 }
272
273 // Loop through the destination registers of this instruction and update
274 // the physical register dependency map for last writers to registers.
275 max_regs = dyn_inst->numDestRegs();
276 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
277 // For data dependency tracking the register must be an int, float or
278 // CC register and not a Misc register.
279 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
280 if (!dest_reg.isMiscReg() &&
281 !dest_reg.isZeroReg()) {
282 // Get the physical register index of the i'th destination
283 // register.
284 PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
285 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
286 " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
287 dest_reg.className());
288 physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
289 }
290 }
291 maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
292 (std::size_t)maxPhysRegDepMapSize.value());
293}
294
295void
296ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
297{
298 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
299 inst_reg_pair.second);
300 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
301 if (itr_regdep_map != physRegDepMap.end())
302 physRegDepMap.erase(itr_regdep_map);
303}
304
305void
306ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
307{
308 // If the squashed instruction was squashed before being processed by
309 // execute stage then it will not be in the temporary store. In this case
310 // do nothing and return.
311 auto itr_exec_info = tempStore.find(head_inst->seqNum);
312 if (itr_exec_info == tempStore.end())
313 return;
314
315 // If there is a squashed load for which a read request was
316 // sent before it got squashed then add it to the trace.
317 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
318 head_inst->seqNum);
319 // Get pointer to the execution info object corresponding to the inst.
320 InstExecInfo* exec_info_ptr = itr_exec_info->second;
321 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
322 exec_info_ptr->toCommitTick != MaxTick &&
323 head_inst->hasRequest() &&
324 head_inst->getFault() == NoFault) {
325 // Add record to depTrace with commit parameter as false.
326 addDepTraceRecord(head_inst, exec_info_ptr, false);
327 }
328 // As the information contained is no longer needed, remove the execution
329 // info object from the temporary store.
330 clearTempStoreUntil(head_inst);
331}
332
333void
334ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
335{
336 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
337 head_inst->seqNum);
338
339 // Add the instruction to the depTrace.
340 if (!head_inst->isNop()) {
341
342 // If tracing has just been enabled then the instruction at this stage
343 // of execution is far enough that we cannot gather info about its past
344 // like the tick it started execution. Simply return until we see an
345 // instruction that is found in the tempStore.
346 auto itr_temp_store = tempStore.find(head_inst->seqNum);
347 if (itr_temp_store == tempStore.end()) {
348 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
349 "store, skipping.\n", head_inst->seqNum);
350 return;
351 }
352
353 // Get pointer to the execution info object corresponding to the inst.
354 InstExecInfo* exec_info_ptr = itr_temp_store->second;
355 assert(exec_info_ptr->executeTick != MaxTick);
356 assert(exec_info_ptr->toCommitTick != MaxTick);
357
358 // Check if the instruction had a fault, if it predicated false and
359 // thus previous register values were restored or if it was a
360 // load/store that did not have a request (e.g. when the size of the
361 // request is zero). In all these cases the instruction is set as
362 // executed and is picked up by the commit probe listener. But a
363 // request is not issued and registers are not written. So practically,
364 // skipping these should not hurt as execution would not stall on them.
365 // Alternatively, these could be included merely as a compute node in
366 // the graph. Removing these for now. If correlation accuracy needs to
367 // be improved in future these can be turned into comp nodes at the
368 // cost of bigger traces.
369 if (head_inst->getFault() != NoFault) {
370 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
371 "skip adding it to the trace\n",
372 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
373 head_inst->seqNum);
374 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
375 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
376 "skip adding it to the trace\n", head_inst->seqNum);
377 } else if (!head_inst->readPredicate()) {
378 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
379 "skip adding it to the trace\n",
380 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
381 head_inst->seqNum);
382 } else {
383 // Add record to depTrace with commit parameter as true.
384 addDepTraceRecord(head_inst, exec_info_ptr, true);
385 }
386 }
387 // As the information contained is no longer needed, remove the execution
388 // info object from the temporary store.
389 clearTempStoreUntil(head_inst);
390}
391
392void
393ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
394 InstExecInfo* exec_info_ptr, bool commit)
395{
396 // Create a record to assign dynamic intruction related fields.
397 TraceInfo* new_record = new TraceInfo;
398 // Add to map for sequence number look up to retrieve the TraceInfo pointer
399 traceInfoMap[head_inst->seqNum] = new_record;
400
401 // Assign fields from the instruction
402 new_record->instNum = head_inst->seqNum;
403 new_record->commit = commit;
404 new_record->type = head_inst->isLoad() ? Record::LOAD :
405 (head_inst->isStore() ? Record::STORE :
406 Record::COMP);
407
408 // Assign fields for creating a request in case of a load/store
409 new_record->reqFlags = head_inst->memReqFlags;
410 new_record->virtAddr = head_inst->effAddr;
411 new_record->asid = head_inst->asid;
412 new_record->physAddr = head_inst->physEffAddrLow;
412 new_record->physAddr = head_inst->physEffAddr;
413 // Currently the tracing does not support split requests.
414 new_record->size = head_inst->effSize;
415 new_record->pc = head_inst->instAddr();
416
417 // Assign the timing information stored in the execution info object
418 new_record->executeTick = exec_info_ptr->executeTick;
419 new_record->toCommitTick = exec_info_ptr->toCommitTick;
420 new_record->commitTick = curTick();
421
422 // Assign initial values for number of dependents and computational delay
423 new_record->numDepts = 0;
424 new_record->compDelay = -1;
425
426 // The physical register dependency set of the first instruction is
427 // empty. Since there are no records in the depTrace at this point, the
428 // case of adding an ROB dependency by using a reverse iterator is not
429 // applicable. Thus, populate the fields of the record corresponding to the
430 // first instruction and return.
431 if (depTrace.empty()) {
432 // Store the record in depTrace.
433 depTrace.push_back(new_record);
434 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
435 new_record->instNum);
436 return;
437 }
438
439 // Clear register dependencies for squashed loads as they may be dependent
440 // on squashed instructions and we do not add those to the trace.
441 if (head_inst->isLoad() && !commit) {
442 (exec_info_ptr->physRegDepSet).clear();
443 }
444
445 // Assign the register dependencies stored in the execution info object
446 std::set<InstSeqNum>::const_iterator dep_set_it;
447 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
448 dep_set_it != (exec_info_ptr->physRegDepSet).end();
449 ++dep_set_it) {
450 auto trace_info_itr = traceInfoMap.find(*dep_set_it);
451 if (trace_info_itr != traceInfoMap.end()) {
452 // The register dependency is valid. Assign it and calculate
453 // computational delay
454 new_record->physRegDepList.push_back(*dep_set_it);
455 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
456 "%lli\n", new_record->instNum, *dep_set_it);
457 TraceInfo* reg_dep = trace_info_itr->second;
458 reg_dep->numDepts++;
459 compDelayPhysRegDep(reg_dep, new_record);
460 ++numRegDep;
461 } else {
462 // The instruction that this has a register dependency on was
463 // not added to the trace because of one of the following
464 // 1. it was an instruction that had a fault
465 // 2. it was an instruction that was predicated false and
466 // previous register values were restored
467 // 3. it was load/store that did not have a request (e.g. when
468 // the size of the request is zero but this may not be a fault)
469 // In all these cases the instruction is set as executed and is
470 // picked up by the commit probe listener. But a request is not
471 // issued and registers are not written to in these cases.
472 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
473 "%lli is skipped\n",new_record->instNum, *dep_set_it);
474 }
475 }
476
477 // Check for and assign an ROB dependency in addition to register
478 // dependency before adding the record to the trace.
479 // As stores have to commit in order a store is dependent on the last
480 // committed load/store. This is recorded in the ROB dependency.
481 if (head_inst->isStore()) {
482 // Look up store-after-store order dependency
483 updateCommitOrderDep(new_record, false);
484 // Look up store-after-load order dependency
485 updateCommitOrderDep(new_record, true);
486 }
487
488 // In case a node is dependency-free or its dependency got discarded
489 // because it was outside the window, it is marked ready in the ROB at the
490 // time of issue. A request is sent as soon as possible. To model this, a
491 // node is assigned an issue order dependency on a committed instruction
492 // that completed earlier than it. This is done to avoid the problem of
493 // determining the issue times of such dependency-free nodes during replay
494 // which could lead to too much parallelism, thinking conservatively.
495 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
496 updateIssueOrderDep(new_record);
497 }
498
499 // Store the record in depTrace.
500 depTrace.push_back(new_record);
501 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
502 (commit ? "committed" : "squashed"), new_record->instNum);
503
504 // To process the number of records specified by depWindowSize in the
505 // forward direction, the depTrace must have twice as many records
506 // to check for dependencies.
507 if (depTrace.size() == 2 * depWindowSize) {
508
509 DPRINTF(ElasticTrace, "Writing out trace...\n");
510
511 // Write out the records which have been processed to the trace
512 // and remove them from the depTrace.
513 writeDepTrace(depWindowSize);
514
515 // After the first window, writeDepTrace() must check for valid
516 // compDelay.
517 firstWin = false;
518 }
519}
520
521void
522ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
523 bool find_load_not_store)
524{
525 assert(new_record->isStore());
526 // Iterate in reverse direction to search for the last committed
527 // load/store that completed earlier than the new record
528 depTraceRevItr from_itr(depTrace.end());
529 depTraceRevItr until_itr(depTrace.begin());
530 TraceInfo* past_record = *from_itr;
531 uint32_t num_go_back = 0;
532
533 // The execution time of this store is when it is sent, that is committed
534 Tick execute_tick = curTick();
535 // Search for store-after-load or store-after-store order dependency
536 while (num_go_back < depWindowSize && from_itr != until_itr) {
537 if (find_load_not_store) {
538 // Check if previous inst is a load completed earlier by comparing
539 // with execute tick
540 if (hasLoadCompleted(past_record, execute_tick)) {
541 // Assign rob dependency and calculate the computational delay
542 assignRobDep(past_record, new_record);
543 ++numOrderDepStores;
544 return;
545 }
546 } else {
547 // Check if previous inst is a store sent earlier by comparing with
548 // execute tick
549 if (hasStoreCommitted(past_record, execute_tick)) {
550 // Assign rob dependency and calculate the computational delay
551 assignRobDep(past_record, new_record);
552 ++numOrderDepStores;
553 return;
554 }
555 }
556 ++from_itr;
557 past_record = *from_itr;
558 ++num_go_back;
559 }
560}
561
562void
563ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
564{
565 // Interate in reverse direction to search for the last committed
566 // record that completed earlier than the new record
567 depTraceRevItr from_itr(depTrace.end());
568 depTraceRevItr until_itr(depTrace.begin());
569 TraceInfo* past_record = *from_itr;
570
571 uint32_t num_go_back = 0;
572 Tick execute_tick = 0;
573
574 if (new_record->isLoad()) {
575 // The execution time of a load is when a request is sent
576 execute_tick = new_record->executeTick;
577 ++numIssueOrderDepLoads;
578 } else if (new_record->isStore()) {
579 // The execution time of a store is when it is sent, i.e. committed
580 execute_tick = curTick();
581 ++numIssueOrderDepStores;
582 } else {
583 // The execution time of a non load/store is when it completes
584 execute_tick = new_record->toCommitTick;
585 ++numIssueOrderDepOther;
586 }
587
588 // We search if this record has an issue order dependency on a past record.
589 // Once we find it, we update both the new record and the record it depends
590 // on and return.
591 while (num_go_back < depWindowSize && from_itr != until_itr) {
592 // Check if a previous inst is a load sent earlier, or a store sent
593 // earlier, or a comp inst completed earlier by comparing with execute
594 // tick
595 if (hasLoadBeenSent(past_record, execute_tick) ||
596 hasStoreCommitted(past_record, execute_tick) ||
597 hasCompCompleted(past_record, execute_tick)) {
598 // Assign rob dependency and calculate the computational delay
599 assignRobDep(past_record, new_record);
600 return;
601 }
602 ++from_itr;
603 past_record = *from_itr;
604 ++num_go_back;
605 }
606}
607
608void
609ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
610 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
611 new_record->typeToStr(), new_record->instNum,
612 past_record->instNum);
613 // Add dependency on past record
614 new_record->robDepList.push_back(past_record->instNum);
615 // Update new_record's compute delay with respect to the past record
616 compDelayRob(past_record, new_record);
617 // Increment number of dependents of the past record
618 ++(past_record->numDepts);
619 // Update stat to log max number of dependents
620 maxNumDependents = std::max(past_record->numDepts,
621 (uint32_t)maxNumDependents.value());
622}
623
624bool
625ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
626 Tick execute_tick) const
627{
628 return (past_record->isStore() && past_record->commitTick <= execute_tick);
629}
630
631bool
632ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
633 Tick execute_tick) const
634{
635 return(past_record->isLoad() && past_record->commit &&
636 past_record->toCommitTick <= execute_tick);
637}
638
639bool
640ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
641 Tick execute_tick) const
642{
643 // Check if previous inst is a load sent earlier than this
644 return (past_record->isLoad() && past_record->commit &&
645 past_record->executeTick <= execute_tick);
646}
647
648bool
649ElasticTrace::hasCompCompleted(TraceInfo* past_record,
650 Tick execute_tick) const
651{
652 return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
653}
654
655void
656ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
657{
658 // Clear from temp store starting with the execution info object
659 // corresponding the head_inst and continue clearing by decrementing the
660 // sequence number until the last cleared sequence number.
661 InstSeqNum temp_sn = (head_inst->seqNum);
662 while (temp_sn > lastClearedSeqNum) {
663 auto itr_exec_info = tempStore.find(temp_sn);
664 if (itr_exec_info != tempStore.end()) {
665 InstExecInfo* exec_info_ptr = itr_exec_info->second;
666 // Free allocated memory for the info object
667 delete exec_info_ptr;
668 // Remove entry from temporary store
669 tempStore.erase(itr_exec_info);
670 }
671 temp_sn--;
672 }
673 // Update the last cleared sequence number to that of the head_inst
674 lastClearedSeqNum = head_inst->seqNum;
675}
676
677void
678ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
679{
680 // The computation delay is the delay between the completion tick of the
681 // inst. pointed to by past_record and the execution tick of its dependent
682 // inst. pointed to by new_record.
683 int64_t comp_delay = -1;
684 Tick execution_tick = 0, completion_tick = 0;
685
686 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
687 new_record->instNum, past_record->instNum);
688
689 // Get the tick when the node is executed as per the modelling of
690 // computation delay
691 execution_tick = new_record->getExecuteTick();
692
693 if (past_record->isLoad()) {
694 if (new_record->isStore()) {
695 completion_tick = past_record->toCommitTick;
696 } else {
697 completion_tick = past_record->executeTick;
698 }
699 } else if (past_record->isStore()) {
700 completion_tick = past_record->commitTick;
701 } else if (past_record->isComp()){
702 completion_tick = past_record->toCommitTick;
703 }
704 assert(execution_tick >= completion_tick);
705 comp_delay = execution_tick - completion_tick;
706
707 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
708 execution_tick, completion_tick, comp_delay);
709
710 // Assign the computational delay with respect to the dependency which
711 // completes the latest.
712 if (new_record->compDelay == -1)
713 new_record->compDelay = comp_delay;
714 else
715 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
716 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
717 new_record->compDelay);
718}
719
720void
721ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
722 TraceInfo* new_record)
723{
724 // The computation delay is the delay between the completion tick of the
725 // inst. pointed to by past_record and the execution tick of its dependent
726 // inst. pointed to by new_record.
727 int64_t comp_delay = -1;
728 Tick execution_tick = 0, completion_tick = 0;
729
730 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
731 " %lli.\n", new_record->instNum, past_record->instNum);
732
733 // Get the tick when the node is executed as per the modelling of
734 // computation delay
735 execution_tick = new_record->getExecuteTick();
736
737 // When there is a physical register dependency on an instruction, the
738 // completion tick of that instruction is when it wrote to the register,
739 // that is toCommitTick. In case, of a store updating a destination
740 // register, this is approximated to commitTick instead
741 if (past_record->isStore()) {
742 completion_tick = past_record->commitTick;
743 } else {
744 completion_tick = past_record->toCommitTick;
745 }
746 assert(execution_tick >= completion_tick);
747 comp_delay = execution_tick - completion_tick;
748 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
749 execution_tick, completion_tick, comp_delay);
750
751 // Assign the computational delay with respect to the dependency which
752 // completes the latest.
753 if (new_record->compDelay == -1)
754 new_record->compDelay = comp_delay;
755 else
756 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
757 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
758 new_record->compDelay);
759}
760
761Tick
762ElasticTrace::TraceInfo::getExecuteTick() const
763{
764 if (isLoad()) {
765 // Execution tick for a load instruction is when the request was sent,
766 // that is executeTick.
767 return executeTick;
768 } else if (isStore()) {
769 // Execution tick for a store instruction is when the request was sent,
770 // that is commitTick.
771 return commitTick;
772 } else {
773 // Execution tick for a non load/store instruction is when the register
774 // value was written to, that is commitTick.
775 return toCommitTick;
776 }
777}
778
779void
780ElasticTrace::writeDepTrace(uint32_t num_to_write)
781{
782 // Write the trace with fields as follows:
783 // Instruction sequence number
784 // If instruction was a load
785 // If instruction was a store
786 // If instruction has addr
787 // If instruction has size
788 // If instruction has flags
789 // List of order dependencies - optional, repeated
790 // Computational delay with respect to last completed dependency
791 // List of physical register RAW dependencies - optional, repeated
792 // Weight of a node equal to no. of filtered nodes before it - optional
793 uint16_t num_filtered_nodes = 0;
794 depTraceItr dep_trace_itr(depTrace.begin());
795 depTraceItr dep_trace_itr_start = dep_trace_itr;
796 while (num_to_write > 0) {
797 TraceInfo* temp_ptr = *dep_trace_itr;
798 assert(temp_ptr->type != Record::INVALID);
799 // If no node dependends on a comp node then there is no reason to
800 // track the comp node in the dependency graph. We filter out such
801 // nodes but count them and add a weight field to the subsequent node
802 // that we do include in the trace.
803 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
804 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
805 "is as follows:\n", temp_ptr->instNum);
806 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
807 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
808 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
809 "size %i, flags %i\n", temp_ptr->physAddr,
810 temp_ptr->size, temp_ptr->reqFlags);
811 } else {
812 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
813 }
814 if (firstWin && temp_ptr->compDelay == -1) {
815 if (temp_ptr->isLoad()) {
816 temp_ptr->compDelay = temp_ptr->executeTick;
817 } else if (temp_ptr->isStore()) {
818 temp_ptr->compDelay = temp_ptr->commitTick;
819 } else {
820 temp_ptr->compDelay = temp_ptr->toCommitTick;
821 }
822 }
823 assert(temp_ptr->compDelay != -1);
824 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
825 temp_ptr->compDelay);
826
827 // Create a protobuf message for the dependency record
828 ProtoMessage::InstDepRecord dep_pkt;
829 dep_pkt.set_seq_num(temp_ptr->instNum);
830 dep_pkt.set_type(temp_ptr->type);
831 dep_pkt.set_pc(temp_ptr->pc);
832 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
833 dep_pkt.set_flags(temp_ptr->reqFlags);
834 dep_pkt.set_p_addr(temp_ptr->physAddr);
835 // If tracing of virtual addresses is enabled, set the optional
836 // field for it
837 if (traceVirtAddr) {
838 dep_pkt.set_v_addr(temp_ptr->virtAddr);
839 dep_pkt.set_asid(temp_ptr->asid);
840 }
841 dep_pkt.set_size(temp_ptr->size);
842 }
843 dep_pkt.set_comp_delay(temp_ptr->compDelay);
844 if (temp_ptr->robDepList.empty()) {
845 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
846 }
847 while (!temp_ptr->robDepList.empty()) {
848 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
849 temp_ptr->robDepList.front());
850 dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
851 temp_ptr->robDepList.pop_front();
852 }
853 if (temp_ptr->physRegDepList.empty()) {
854 DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
855 }
856 while (!temp_ptr->physRegDepList.empty()) {
857 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
858 temp_ptr->physRegDepList.front());
859 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
860 temp_ptr->physRegDepList.pop_front();
861 }
862 if (num_filtered_nodes != 0) {
863 // Set the weight of this node as the no. of filtered nodes
864 // between this node and the last node that we wrote to output
865 // stream. The weight will be used during replay to model ROB
866 // occupancy of filtered nodes.
867 dep_pkt.set_weight(num_filtered_nodes);
868 num_filtered_nodes = 0;
869 }
870 // Write the message to the protobuf output stream
871 dataTraceStream->write(dep_pkt);
872 } else {
873 // Don't write the node to the trace but note that we have filtered
874 // out a node.
875 ++numFilteredNodes;
876 ++num_filtered_nodes;
877 }
878 dep_trace_itr++;
879 traceInfoMap.erase(temp_ptr->instNum);
880 delete temp_ptr;
881 num_to_write--;
882 }
883 depTrace.erase(dep_trace_itr_start, dep_trace_itr);
884}
885
886void
887ElasticTrace::regStats() {
888 ProbeListenerObject::regStats();
889
890 using namespace Stats;
891 numRegDep
892 .name(name() + ".numRegDep")
893 .desc("Number of register dependencies recorded during tracing")
894 ;
895
896 numOrderDepStores
897 .name(name() + ".numOrderDepStores")
898 .desc("Number of commit order (rob) dependencies for a store recorded"
899 " on a past load/store during tracing")
900 ;
901
902 numIssueOrderDepLoads
903 .name(name() + ".numIssueOrderDepLoads")
904 .desc("Number of loads that got assigned issue order dependency"
905 " because they were dependency-free")
906 ;
907
908 numIssueOrderDepStores
909 .name(name() + ".numIssueOrderDepStores")
910 .desc("Number of stores that got assigned issue order dependency"
911 " because they were dependency-free")
912 ;
913
914 numIssueOrderDepOther
915 .name(name() + ".numIssueOrderDepOther")
916 .desc("Number of non load/store insts that got assigned issue order"
917 " dependency because they were dependency-free")
918 ;
919
920 numFilteredNodes
921 .name(name() + ".numFilteredNodes")
922 .desc("No. of nodes filtered out before writing the output trace")
923 ;
924
925 maxNumDependents
926 .name(name() + ".maxNumDependents")
927 .desc("Maximum number or dependents on any instruction")
928 ;
929
930 maxTempStoreSize
931 .name(name() + ".maxTempStoreSize")
932 .desc("Maximum size of the temporary store during the run")
933 ;
934
935 maxPhysRegDepMapSize
936 .name(name() + ".maxPhysRegDepMapSize")
937 .desc("Maximum size of register dependency map")
938 ;
939}
940
941const std::string&
942ElasticTrace::TraceInfo::typeToStr() const
943{
944 return Record::RecordType_Name(type);
945}
946
947const std::string
948ElasticTrace::name() const
949{
950 return ProbeListenerObject::name();
951}
952
953void
954ElasticTrace::flushTraces()
955{
956 // Write to trace all records in the depTrace.
957 writeDepTrace(depTrace.size());
958 // Delete the stream objects
959 delete dataTraceStream;
960 delete instTraceStream;
961}
962
963ElasticTrace*
964ElasticTraceParams::create()
965{
966 return new ElasticTrace(this);
967}