elastic_trace.cc revision 12106
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/o3/probe/elastic_trace.hh"
43
44#include "base/callback.hh"
45#include "base/output.hh"
46#include "base/trace.hh"
47#include "cpu/reg_class.hh"
48#include "debug/ElasticTrace.hh"
49#include "mem/packet.hh"
50
51ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
52    :  ProbeListenerObject(params),
53       regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
54       firstWin(true),
55       lastClearedSeqNum(0),
56       depWindowSize(params->depWindowSize),
57       dataTraceStream(nullptr),
58       instTraceStream(nullptr),
59       startTraceInst(params->startTraceInst),
60       allProbesReg(false),
61       traceVirtAddr(params->traceVirtAddr)
62{
63    cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
64    fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
65                "support dependency tracing.\n", name());
66
67    fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
68                "Recommended size is 3x ROB size in the O3CPU.\n");
69
70    fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
71                "single-threaded workload only", cpu->numThreads, name());
72    // Initialize the protobuf output stream
73    fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
74                "trace file path to instFetchTraceFile");
75    fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
76                "trace file path to dataDepTraceFile");
77    std::string filename = simout.resolve(name() + "." +
78                                            params->instFetchTraceFile);
79    instTraceStream = new ProtoOutputStream(filename);
80    filename = simout.resolve(name() + "." + params->dataDepTraceFile);
81    dataTraceStream = new ProtoOutputStream(filename);
82    // Create a protobuf message for the header and write it to the stream
83    ProtoMessage::PacketHeader inst_pkt_header;
84    inst_pkt_header.set_obj_id(name());
85    inst_pkt_header.set_tick_freq(SimClock::Frequency);
86    instTraceStream->write(inst_pkt_header);
87    // Create a protobuf message for the header and write it to
88    // the stream
89    ProtoMessage::InstDepRecordHeader data_rec_header;
90    data_rec_header.set_obj_id(name());
91    data_rec_header.set_tick_freq(SimClock::Frequency);
92    data_rec_header.set_window_size(depWindowSize);
93    dataTraceStream->write(data_rec_header);
94    // Register a callback to flush trace records and close the output streams.
95    Callback* cb = new MakeCallback<ElasticTrace,
96        &ElasticTrace::flushTraces>(this);
97    registerExitCallback(cb);
98}
99
100void
101ElasticTrace::regProbeListeners()
102{
103    inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
104        curTick(), startTraceInst);
105    if (startTraceInst == 0) {
106        // If we want to start tracing from the start of the simulation,
107        // register all elastic trace probes now.
108        regEtraceListeners();
109    } else {
110        // Schedule an event to register all elastic trace probes when
111        // specified no. of instructions are committed.
112        cpu->comInstEventQueue[(ThreadID)0]->schedule(&regEtraceListenersEvent,
113                                                      startTraceInst);
114    }
115}
116
117void
118ElasticTrace::regEtraceListeners()
119{
120    assert(!allProbesReg);
121    inform("@%llu: No. of instructions committed = %llu, registering elastic"
122        " probe listeners", curTick(), cpu->numSimulatedInsts());
123    // Create new listeners: provide method to be called upon a notify() for
124    // each probe point.
125    listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
126                        "FetchRequest", &ElasticTrace::fetchReqTrace));
127    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
128                        "Execute", &ElasticTrace::recordExecTick));
129    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
130                        "ToCommit", &ElasticTrace::recordToCommTick));
131    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
132                        "Rename", &ElasticTrace::updateRegDep));
133    listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
134                        "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
135    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
136                        "Squash", &ElasticTrace::addSquashedInst));
137    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
138                        "Commit", &ElasticTrace::addCommittedInst));
139    allProbesReg = true;
140}
141
142void
143ElasticTrace::fetchReqTrace(const RequestPtr &req)
144{
145
146    DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
147             (MemCmd::ReadReq),
148             req->getPC(), req->getVaddr(), req->getPaddr(),
149             req->getFlags(), req->getSize(), curTick());
150
151    // Create a protobuf message including the request fields necessary to
152    // recreate the request in the TraceCPU.
153    ProtoMessage::Packet inst_fetch_pkt;
154    inst_fetch_pkt.set_tick(curTick());
155    inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
156    inst_fetch_pkt.set_pc(req->getPC());
157    inst_fetch_pkt.set_flags(req->getFlags());
158    inst_fetch_pkt.set_addr(req->getPaddr());
159    inst_fetch_pkt.set_size(req->getSize());
160    // Write the message to the stream.
161    instTraceStream->write(inst_fetch_pkt);
162}
163
164void
165ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst)
166{
167
168    // In a corner case, a retired instruction is propagated backward to the
169    // IEW instruction queue to handle some side-channel information. But we
170    // must not process an instruction again. So we test the sequence number
171    // against the lastClearedSeqNum and skip adding the instruction for such
172    // corner cases.
173    if (dyn_inst->seqNum <= lastClearedSeqNum) {
174        DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
175        has already retired (mostly squashed)", dyn_inst->seqNum);
176        // Do nothing as program has proceeded and this inst has been
177        // propagated backwards to handle something.
178        return;
179    }
180
181    DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
182                curTick());
183    // Either the execution info object will already exist if this
184    // instruction had a register dependency recorded in the rename probe
185    // listener before entering execute stage or it will not exist and will
186    // need to be created here.
187    InstExecInfo* exec_info_ptr;
188    auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
189    if (itr_exec_info != tempStore.end()) {
190        exec_info_ptr = itr_exec_info->second;
191    } else {
192        exec_info_ptr = new InstExecInfo;
193        tempStore[dyn_inst->seqNum] = exec_info_ptr;
194    }
195
196    exec_info_ptr->executeTick = curTick();
197    maxTempStoreSize = std::max(tempStore.size(),
198                                (std::size_t)maxTempStoreSize.value());
199}
200
201void
202ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst)
203{
204    // If tracing has just been enabled then the instruction at this stage of
205    // execution is far enough that we cannot gather info about its past like
206    // the tick it started execution. Simply return until we see an instruction
207    // that is found in the tempStore.
208    auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
209    if (itr_exec_info == tempStore.end()) {
210        DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
211                    " skipping.\n", dyn_inst->seqNum);
212        return;
213    }
214
215    DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
216                curTick());
217    InstExecInfo* exec_info_ptr = itr_exec_info->second;
218    exec_info_ptr->toCommitTick = curTick();
219
220}
221
222void
223ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst)
224{
225    // Get the sequence number of the instruction
226    InstSeqNum seq_num = dyn_inst->seqNum;
227
228    assert(dyn_inst->seqNum > lastClearedSeqNum);
229
230    // Since this is the first probe activated in the pipeline, create
231    // a new execution info object to track this instruction as it
232    // progresses through the pipeline.
233    InstExecInfo* exec_info_ptr = new InstExecInfo;
234    tempStore[seq_num] = exec_info_ptr;
235
236    // Loop through the source registers and look up the dependency map. If
237    // the source register entry is found in the dependency map, add a
238    // dependency on the last writer.
239    int8_t max_regs = dyn_inst->numSrcRegs();
240    for (int src_idx = 0; src_idx < max_regs; src_idx++) {
241        // Get the physical register index of the i'th source register.
242        PhysRegIdPtr src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
243        DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
244                 " %i (%s)\n", seq_num,
245                 src_reg->index(), src_reg->className());
246        auto itr_last_writer = physRegDepMap.find(src_reg->flatIndex());
247        if (itr_last_writer != physRegDepMap.end()) {
248            InstSeqNum last_writer = itr_last_writer->second;
249            // Additionally the dependency distance is kept less than the window
250            // size parameter to limit the memory allocation to nodes in the
251            // graph. If the window were tending to infinite we would have to
252            // load a large number of node objects during replay.
253            if (seq_num - last_writer < depWindowSize) {
254                // Record a physical register dependency.
255                exec_info_ptr->physRegDepSet.insert(last_writer);
256            }
257        }
258    }
259
260    // Loop through the destination registers of this instruction and update
261    // the physical register dependency map for last writers to registers.
262    max_regs = dyn_inst->numDestRegs();
263    for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
264        // For data dependency tracking the register must be an int, float or
265        // CC register and not a Misc register.
266        const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
267        if (!dest_reg.isMiscReg() &&
268            !dest_reg.isZeroReg()) {
269            // Get the physical register index of the i'th destination
270            // register.
271            PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
272            DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
273                     " %i (%s)\n", seq_num, dest_reg.index(),
274                     dest_reg.className());
275            physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
276        }
277    }
278    maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
279                                    (std::size_t)maxPhysRegDepMapSize.value());
280}
281
282void
283ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
284{
285    DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
286            inst_reg_pair.second);
287    auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
288    if (itr_regdep_map != physRegDepMap.end())
289        physRegDepMap.erase(itr_regdep_map);
290}
291
292void
293ElasticTrace::addSquashedInst(const DynInstPtr &head_inst)
294{
295    // If the squashed instruction was squashed before being processed by
296    // execute stage then it will not be in the temporary store. In this case
297    // do nothing and return.
298    auto itr_exec_info = tempStore.find(head_inst->seqNum);
299    if (itr_exec_info == tempStore.end())
300        return;
301
302    // If there is a squashed load for which a read request was
303    // sent before it got squashed then add it to the trace.
304    DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
305                head_inst->seqNum);
306    // Get pointer to the execution info object corresponding to the inst.
307    InstExecInfo* exec_info_ptr = itr_exec_info->second;
308    if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
309        exec_info_ptr->toCommitTick != MaxTick &&
310        head_inst->hasRequest() &&
311        head_inst->getFault() == NoFault) {
312        // Add record to depTrace with commit parameter as false.
313        addDepTraceRecord(head_inst, exec_info_ptr, false);
314    }
315    // As the information contained is no longer needed, remove the execution
316    // info object from the temporary store.
317    clearTempStoreUntil(head_inst);
318}
319
320void
321ElasticTrace::addCommittedInst(const DynInstPtr &head_inst)
322{
323    DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
324                head_inst->seqNum);
325
326    // Add the instruction to the depTrace.
327    if (!head_inst->isNop()) {
328
329        // If tracing has just been enabled then the instruction at this stage
330        // of execution is far enough that we cannot gather info about its past
331        // like the tick it started execution. Simply return until we see an
332        // instruction that is found in the tempStore.
333        auto itr_temp_store = tempStore.find(head_inst->seqNum);
334        if (itr_temp_store == tempStore.end()) {
335            DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
336                "store, skipping.\n", head_inst->seqNum);
337            return;
338        }
339
340        // Get pointer to the execution info object corresponding to the inst.
341        InstExecInfo* exec_info_ptr = itr_temp_store->second;
342        assert(exec_info_ptr->executeTick != MaxTick);
343        assert(exec_info_ptr->toCommitTick != MaxTick);
344
345        // Check if the instruction had a fault, if it predicated false and
346        // thus previous register values were restored or if it was a
347        // load/store that did not have a request (e.g. when the size of the
348        // request is zero). In all these cases the instruction is set as
349        // executed and is picked up by the commit probe listener. But a
350        // request is not issued and registers are not written. So practically,
351        // skipping these should not hurt as execution would not stall on them.
352        // Alternatively, these could be included merely as a compute node in
353        // the graph. Removing these for now. If correlation accuracy needs to
354        // be improved in future these can be turned into comp nodes at the
355        // cost of bigger traces.
356        if (head_inst->getFault() != NoFault) {
357            DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
358                    "skip adding it to the trace\n",
359                    (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
360                    head_inst->seqNum);
361        } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
362            DPRINTF(ElasticTrace, "Load/store [sn:%lli]  has no request so "
363                    "skip adding it to the trace\n", head_inst->seqNum);
364        } else if (!head_inst->readPredicate()) {
365            DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
366                    "skip adding it to the trace\n",
367                    (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
368                    head_inst->seqNum);
369        } else {
370            // Add record to depTrace with commit parameter as true.
371            addDepTraceRecord(head_inst, exec_info_ptr, true);
372        }
373    }
374    // As the information contained is no longer needed, remove the execution
375    // info object from the temporary store.
376    clearTempStoreUntil(head_inst);
377}
378
379void
380ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst,
381                                InstExecInfo* exec_info_ptr, bool commit)
382{
383    // Create a record to assign dynamic intruction related fields.
384    TraceInfo* new_record = new TraceInfo;
385    // Add to map for sequence number look up to retrieve the TraceInfo pointer
386    traceInfoMap[head_inst->seqNum] = new_record;
387
388    // Assign fields from the instruction
389    new_record->instNum = head_inst->seqNum;
390    new_record->commit = commit;
391    new_record->type = head_inst->isLoad() ? Record::LOAD :
392                        (head_inst->isStore() ? Record::STORE :
393                        Record::COMP);
394
395    // Assign fields for creating a request in case of a load/store
396    new_record->reqFlags = head_inst->memReqFlags;
397    new_record->virtAddr = head_inst->effAddr;
398    new_record->asid = head_inst->asid;
399    new_record->physAddr = head_inst->physEffAddrLow;
400    // Currently the tracing does not support split requests.
401    new_record->size = head_inst->effSize;
402    new_record->pc = head_inst->instAddr();
403
404    // Assign the timing information stored in the execution info object
405    new_record->executeTick = exec_info_ptr->executeTick;
406    new_record->toCommitTick = exec_info_ptr->toCommitTick;
407    new_record->commitTick = curTick();
408
409    // Assign initial values for number of dependents and computational delay
410    new_record->numDepts = 0;
411    new_record->compDelay = -1;
412
413    // The physical register dependency set of the first instruction is
414    // empty. Since there are no records in the depTrace at this point, the
415    // case of adding an ROB dependency by using a reverse iterator is not
416    // applicable. Thus, populate the fields of the record corresponding to the
417    // first instruction and return.
418    if (depTrace.empty()) {
419        // Store the record in depTrace.
420        depTrace.push_back(new_record);
421        DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
422                new_record->instNum);
423        return;
424    }
425
426    // Clear register dependencies for squashed loads as they may be dependent
427    // on squashed instructions and we do not add those to the trace.
428    if (head_inst->isLoad() && !commit) {
429         (exec_info_ptr->physRegDepSet).clear();
430    }
431
432    // Assign the register dependencies stored in the execution info object
433    std::set<InstSeqNum>::const_iterator dep_set_it;
434    for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
435         dep_set_it != (exec_info_ptr->physRegDepSet).end();
436         ++dep_set_it) {
437        auto trace_info_itr = traceInfoMap.find(*dep_set_it);
438        if (trace_info_itr != traceInfoMap.end()) {
439            // The register dependency is valid. Assign it and calculate
440            // computational delay
441            new_record->physRegDepList.push_back(*dep_set_it);
442            DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
443                    "%lli\n", new_record->instNum, *dep_set_it);
444            TraceInfo* reg_dep = trace_info_itr->second;
445            reg_dep->numDepts++;
446            compDelayPhysRegDep(reg_dep, new_record);
447            ++numRegDep;
448        } else {
449            // The instruction that this has a register dependency on was
450            // not added to the trace because of one of the following
451            // 1. it was an instruction that had a fault
452            // 2. it was an instruction that was predicated false and
453            // previous register values were restored
454            // 3. it was load/store that did not have a request (e.g. when
455            // the size of the request is zero but this may not be a fault)
456            // In all these cases the instruction is set as executed and is
457            // picked up by the commit probe listener. But a request is not
458            // issued and registers are not written to in these cases.
459            DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
460                    "%lli is skipped\n",new_record->instNum, *dep_set_it);
461        }
462    }
463
464    // Check for and assign an ROB dependency in addition to register
465    // dependency before adding the record to the trace.
466    // As stores have to commit in order a store is dependent on the last
467    // committed load/store. This is recorded in the ROB dependency.
468    if (head_inst->isStore()) {
469        // Look up store-after-store order dependency
470        updateCommitOrderDep(new_record, false);
471        // Look up store-after-load order dependency
472        updateCommitOrderDep(new_record, true);
473    }
474
475    // In case a node is dependency-free or its dependency got discarded
476    // because it was outside the window, it is marked ready in the ROB at the
477    // time of issue. A request is sent as soon as possible. To model this, a
478    // node is assigned an issue order dependency on a committed instruction
479    // that completed earlier than it. This is done to avoid the problem of
480    // determining the issue times of such dependency-free nodes during replay
481    // which could lead to too much parallelism, thinking conservatively.
482    if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
483        updateIssueOrderDep(new_record);
484    }
485
486    // Store the record in depTrace.
487    depTrace.push_back(new_record);
488    DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
489            (commit ? "committed" : "squashed"), new_record->instNum);
490
491    // To process the number of records specified by depWindowSize in the
492    // forward direction, the depTrace must have twice as many records
493    // to check for dependencies.
494    if (depTrace.size() == 2 * depWindowSize) {
495
496        DPRINTF(ElasticTrace, "Writing out trace...\n");
497
498        // Write out the records which have been processed to the trace
499        // and remove them from the depTrace.
500        writeDepTrace(depWindowSize);
501
502        // After the first window, writeDepTrace() must check for valid
503        // compDelay.
504        firstWin = false;
505    }
506}
507
508void
509ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
510                                    bool find_load_not_store)
511{
512    assert(new_record->isStore());
513    // Iterate in reverse direction to search for the last committed
514    // load/store that completed earlier than the new record
515    depTraceRevItr from_itr(depTrace.end());
516    depTraceRevItr until_itr(depTrace.begin());
517    TraceInfo* past_record = *from_itr;
518    uint32_t num_go_back = 0;
519
520    // The execution time of this store is when it is sent, that is committed
521    Tick execute_tick = curTick();
522    // Search for store-after-load or store-after-store order dependency
523    while (num_go_back < depWindowSize && from_itr != until_itr) {
524        if (find_load_not_store) {
525            // Check if previous inst is a load completed earlier by comparing
526            // with execute tick
527            if (hasLoadCompleted(past_record, execute_tick)) {
528                // Assign rob dependency and calculate the computational delay
529                assignRobDep(past_record, new_record);
530                ++numOrderDepStores;
531                return;
532            }
533        } else {
534            // Check if previous inst is a store sent earlier by comparing with
535            // execute tick
536            if (hasStoreCommitted(past_record, execute_tick)) {
537                // Assign rob dependency and calculate the computational delay
538                assignRobDep(past_record, new_record);
539                ++numOrderDepStores;
540                return;
541            }
542        }
543        ++from_itr;
544        past_record = *from_itr;
545        ++num_go_back;
546    }
547}
548
549void
550ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
551{
552    // Interate in reverse direction to search for the last committed
553    // record that completed earlier than the new record
554    depTraceRevItr from_itr(depTrace.end());
555    depTraceRevItr until_itr(depTrace.begin());
556    TraceInfo* past_record = *from_itr;
557
558    uint32_t num_go_back = 0;
559    Tick execute_tick = 0;
560
561    if (new_record->isLoad()) {
562        // The execution time of a load is when a request is sent
563        execute_tick = new_record->executeTick;
564        ++numIssueOrderDepLoads;
565    } else if (new_record->isStore()) {
566        // The execution time of a store is when it is sent, i.e. committed
567        execute_tick = curTick();
568        ++numIssueOrderDepStores;
569    } else {
570        // The execution time of a non load/store is when it completes
571        execute_tick = new_record->toCommitTick;
572        ++numIssueOrderDepOther;
573    }
574
575    // We search if this record has an issue order dependency on a past record.
576    // Once we find it, we update both the new record and the record it depends
577    // on and return.
578    while (num_go_back < depWindowSize && from_itr != until_itr) {
579        // Check if a previous inst is a load sent earlier, or a store sent
580        // earlier, or a comp inst completed earlier by comparing with execute
581        // tick
582        if (hasLoadBeenSent(past_record, execute_tick) ||
583            hasStoreCommitted(past_record, execute_tick) ||
584            hasCompCompleted(past_record, execute_tick)) {
585            // Assign rob dependency and calculate the computational delay
586            assignRobDep(past_record, new_record);
587            return;
588        }
589        ++from_itr;
590        past_record = *from_itr;
591        ++num_go_back;
592    }
593}
594
595void
596ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
597    DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
598            new_record->typeToStr(), new_record->instNum,
599            past_record->instNum);
600    // Add dependency on past record
601    new_record->robDepList.push_back(past_record->instNum);
602    // Update new_record's compute delay with respect to the past record
603    compDelayRob(past_record, new_record);
604    // Increment number of dependents of the past record
605    ++(past_record->numDepts);
606    // Update stat to log max number of dependents
607    maxNumDependents = std::max(past_record->numDepts,
608                                (uint32_t)maxNumDependents.value());
609}
610
611bool
612ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
613                                    Tick execute_tick) const
614{
615    return (past_record->isStore() && past_record->commitTick <= execute_tick);
616}
617
618bool
619ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
620                                    Tick execute_tick) const
621{
622    return(past_record->isLoad() && past_record->commit &&
623                past_record->toCommitTick <= execute_tick);
624}
625
626bool
627ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
628                                Tick execute_tick) const
629{
630    // Check if previous inst is a load sent earlier than this
631    return (past_record->isLoad() && past_record->commit &&
632        past_record->executeTick <= execute_tick);
633}
634
635bool
636ElasticTrace::hasCompCompleted(TraceInfo* past_record,
637                                    Tick execute_tick) const
638{
639    return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
640}
641
642void
643ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst)
644{
645    // Clear from temp store starting with the execution info object
646    // corresponding the head_inst and continue clearing by decrementing the
647    // sequence number until the last cleared sequence number.
648    InstSeqNum temp_sn = (head_inst->seqNum);
649    while (temp_sn > lastClearedSeqNum) {
650        auto itr_exec_info = tempStore.find(temp_sn);
651        if (itr_exec_info != tempStore.end()) {
652            InstExecInfo* exec_info_ptr = itr_exec_info->second;
653            // Free allocated memory for the info object
654            delete exec_info_ptr;
655            // Remove entry from temporary store
656            tempStore.erase(itr_exec_info);
657        }
658        temp_sn--;
659    }
660    // Update the last cleared sequence number to that of the head_inst
661    lastClearedSeqNum = head_inst->seqNum;
662}
663
664void
665ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
666{
667    // The computation delay is the delay between the completion tick of the
668    // inst. pointed to by past_record and the execution tick of its dependent
669    // inst. pointed to by new_record.
670    int64_t comp_delay = -1;
671    Tick execution_tick = 0, completion_tick = 0;
672
673    DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
674            new_record->instNum, past_record->instNum);
675
676    // Get the tick when the node is executed as per the modelling of
677    // computation delay
678    execution_tick = new_record->getExecuteTick();
679
680    if (past_record->isLoad()) {
681        if (new_record->isStore()) {
682            completion_tick = past_record->toCommitTick;
683        } else {
684            completion_tick = past_record->executeTick;
685        }
686    } else if (past_record->isStore()) {
687        completion_tick = past_record->commitTick;
688    } else if (past_record->isComp()){
689        completion_tick = past_record->toCommitTick;
690    }
691    assert(execution_tick >= completion_tick);
692    comp_delay = execution_tick - completion_tick;
693
694    DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
695            execution_tick, completion_tick, comp_delay);
696
697    // Assign the computational delay with respect to the dependency which
698    // completes the latest.
699    if (new_record->compDelay == -1)
700        new_record->compDelay = comp_delay;
701    else
702        new_record->compDelay = std::min(comp_delay, new_record->compDelay);
703    DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
704            new_record->compDelay);
705}
706
707void
708ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
709                                    TraceInfo* new_record)
710{
711    // The computation delay is the delay between the completion tick of the
712    // inst. pointed to by past_record and the execution tick of its dependent
713    // inst. pointed to by new_record.
714    int64_t comp_delay = -1;
715    Tick execution_tick = 0, completion_tick = 0;
716
717    DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
718            " %lli.\n", new_record->instNum, past_record->instNum);
719
720    // Get the tick when the node is executed as per the modelling of
721    // computation delay
722    execution_tick = new_record->getExecuteTick();
723
724    // When there is a physical register dependency on an instruction, the
725    // completion tick of that instruction is when it wrote to the register,
726    // that is toCommitTick. In case, of a store updating a destination
727    // register, this is approximated to commitTick instead
728    if (past_record->isStore()) {
729        completion_tick = past_record->commitTick;
730    } else {
731        completion_tick = past_record->toCommitTick;
732    }
733    assert(execution_tick >= completion_tick);
734    comp_delay = execution_tick - completion_tick;
735    DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
736            execution_tick, completion_tick, comp_delay);
737
738    // Assign the computational delay with respect to the dependency which
739    // completes the latest.
740    if (new_record->compDelay == -1)
741        new_record->compDelay = comp_delay;
742    else
743        new_record->compDelay = std::min(comp_delay, new_record->compDelay);
744    DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
745            new_record->compDelay);
746}
747
748Tick
749ElasticTrace::TraceInfo::getExecuteTick() const
750{
751    if (isLoad()) {
752        // Execution tick for a load instruction is when the request was sent,
753        // that is executeTick.
754        return executeTick;
755    } else if (isStore()) {
756        // Execution tick for a store instruction is when the request was sent,
757        // that is commitTick.
758        return commitTick;
759    } else {
760        // Execution tick for a non load/store instruction is when the register
761        // value was written to, that is commitTick.
762        return toCommitTick;
763    }
764}
765
766void
767ElasticTrace::writeDepTrace(uint32_t num_to_write)
768{
769    // Write the trace with fields as follows:
770    // Instruction sequence number
771    // If instruction was a load
772    // If instruction was a store
773    // If instruction has addr
774    // If instruction has size
775    // If instruction has flags
776    // List of order dependencies - optional, repeated
777    // Computational delay with respect to last completed dependency
778    // List of physical register RAW dependencies - optional, repeated
779    // Weight of a node equal to no. of filtered nodes before it - optional
780    uint16_t num_filtered_nodes = 0;
781    depTraceItr dep_trace_itr(depTrace.begin());
782    depTraceItr dep_trace_itr_start = dep_trace_itr;
783    while (num_to_write > 0) {
784        TraceInfo* temp_ptr = *dep_trace_itr;
785        assert(temp_ptr->type != Record::INVALID);
786        // If no node dependends on a comp node then there is no reason to
787        // track the comp node in the dependency graph. We filter out such
788        // nodes but count them and add a weight field to the subsequent node
789        // that we do include in the trace.
790        if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
791            DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
792                     "is as follows:\n", temp_ptr->instNum);
793            if (temp_ptr->isLoad() || temp_ptr->isStore()) {
794                DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
795                DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
796                         "size %i, flags %i\n", temp_ptr->physAddr,
797                         temp_ptr->size, temp_ptr->reqFlags);
798            } else {
799                 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
800            }
801            if (firstWin && temp_ptr->compDelay == -1) {
802                if (temp_ptr->isLoad()) {
803                    temp_ptr->compDelay = temp_ptr->executeTick;
804                } else if (temp_ptr->isStore()) {
805                    temp_ptr->compDelay = temp_ptr->commitTick;
806                } else {
807                    temp_ptr->compDelay = temp_ptr->toCommitTick;
808                }
809            }
810            assert(temp_ptr->compDelay != -1);
811            DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
812                     temp_ptr->compDelay);
813
814            // Create a protobuf message for the dependency record
815            ProtoMessage::InstDepRecord dep_pkt;
816            dep_pkt.set_seq_num(temp_ptr->instNum);
817            dep_pkt.set_type(temp_ptr->type);
818            dep_pkt.set_pc(temp_ptr->pc);
819            if (temp_ptr->isLoad() || temp_ptr->isStore()) {
820                dep_pkt.set_flags(temp_ptr->reqFlags);
821                dep_pkt.set_p_addr(temp_ptr->physAddr);
822                // If tracing of virtual addresses is enabled, set the optional
823                // field for it
824                if (traceVirtAddr) {
825                    dep_pkt.set_v_addr(temp_ptr->virtAddr);
826                    dep_pkt.set_asid(temp_ptr->asid);
827                }
828                dep_pkt.set_size(temp_ptr->size);
829            }
830            dep_pkt.set_comp_delay(temp_ptr->compDelay);
831            if (temp_ptr->robDepList.empty()) {
832                DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
833            }
834            while (!temp_ptr->robDepList.empty()) {
835                DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
836                         temp_ptr->robDepList.front());
837                dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
838                temp_ptr->robDepList.pop_front();
839            }
840            if (temp_ptr->physRegDepList.empty()) {
841                DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
842            }
843            while (!temp_ptr->physRegDepList.empty()) {
844                DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
845                         temp_ptr->physRegDepList.front());
846                dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
847                temp_ptr->physRegDepList.pop_front();
848            }
849            if (num_filtered_nodes != 0) {
850                // Set the weight of this node as the no. of filtered nodes
851                // between this node and the last node that we wrote to output
852                // stream. The weight will be used during replay to model ROB
853                // occupancy of filtered nodes.
854                dep_pkt.set_weight(num_filtered_nodes);
855                num_filtered_nodes = 0;
856            }
857            // Write the message to the protobuf output stream
858            dataTraceStream->write(dep_pkt);
859        } else {
860            // Don't write the node to the trace but note that we have filtered
861            // out a node.
862            ++numFilteredNodes;
863            ++num_filtered_nodes;
864        }
865        dep_trace_itr++;
866        traceInfoMap.erase(temp_ptr->instNum);
867        delete temp_ptr;
868        num_to_write--;
869    }
870    depTrace.erase(dep_trace_itr_start, dep_trace_itr);
871}
872
873void
874ElasticTrace::regStats() {
875    ProbeListenerObject::regStats();
876
877    using namespace Stats;
878    numRegDep
879        .name(name() + ".numRegDep")
880        .desc("Number of register dependencies recorded during tracing")
881        ;
882
883    numOrderDepStores
884        .name(name() + ".numOrderDepStores")
885        .desc("Number of commit order (rob) dependencies for a store recorded"
886              " on a past load/store during tracing")
887        ;
888
889    numIssueOrderDepLoads
890        .name(name() + ".numIssueOrderDepLoads")
891        .desc("Number of loads that got assigned issue order dependency"
892              " because they were dependency-free")
893        ;
894
895    numIssueOrderDepStores
896        .name(name() + ".numIssueOrderDepStores")
897        .desc("Number of stores that got assigned issue order dependency"
898              " because they were dependency-free")
899        ;
900
901    numIssueOrderDepOther
902        .name(name() + ".numIssueOrderDepOther")
903        .desc("Number of non load/store insts that got assigned issue order"
904              " dependency because they were dependency-free")
905        ;
906
907    numFilteredNodes
908        .name(name() + ".numFilteredNodes")
909        .desc("No. of nodes filtered out before writing the output trace")
910        ;
911
912    maxNumDependents
913        .name(name() + ".maxNumDependents")
914        .desc("Maximum number or dependents on any instruction")
915        ;
916
917    maxTempStoreSize
918        .name(name() + ".maxTempStoreSize")
919        .desc("Maximum size of the temporary store during the run")
920        ;
921
922    maxPhysRegDepMapSize
923        .name(name() + ".maxPhysRegDepMapSize")
924        .desc("Maximum size of register dependency map")
925        ;
926}
927
928const std::string&
929ElasticTrace::TraceInfo::typeToStr() const
930{
931    return Record::RecordType_Name(type);
932}
933
934const std::string
935ElasticTrace::name() const
936{
937    return ProbeListenerObject::name();
938}
939
940void
941ElasticTrace::flushTraces()
942{
943    // Write to trace all records in the depTrace.
944    writeDepTrace(depTrace.size());
945    // Delete the stream objects
946    delete dataTraceStream;
947    delete instTraceStream;
948}
949
950ElasticTrace*
951ElasticTraceParams::create()
952{
953    return new ElasticTrace(this);
954}
955