elastic_trace.cc revision 11252
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/o3/probe/elastic_trace.hh"
43
44#include "base/callback.hh"
45#include "base/output.hh"
46#include "base/trace.hh"
47#include "cpu/reg_class.hh"
48#include "debug/ElasticTrace.hh"
49#include "mem/packet.hh"
50
51ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
52    :  ProbeListenerObject(params),
53       regEtraceListenersEvent(this),
54       firstWin(true),
55       lastClearedSeqNum(0),
56       depWindowSize(params->depWindowSize),
57       dataTraceStream(nullptr),
58       instTraceStream(nullptr),
59       startTraceInst(params->startTraceInst),
60       allProbesReg(false)
61{
62    cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
63    fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
64                "support dependency tracing.\n", name());
65
66    fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
67                "Recommended size is 3x ROB size in the O3CPU.\n");
68
69    fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
70                "single-threaded workload only", cpu->numThreads, name());
71    // Initialize the protobuf output stream
72    fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
73                "trace file path to instFetchTraceFile");
74    fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
75                "trace file path to dataDepTraceFile");
76    std::string filename = simout.resolve(name() + "." +
77                                            params->instFetchTraceFile);
78    instTraceStream = new ProtoOutputStream(filename);
79    filename = simout.resolve(name() + "." + params->dataDepTraceFile);
80    dataTraceStream = new ProtoOutputStream(filename);
81    // Create a protobuf message for the header and write it to the stream
82    ProtoMessage::PacketHeader inst_pkt_header;
83    inst_pkt_header.set_obj_id(name());
84    inst_pkt_header.set_tick_freq(SimClock::Frequency);
85    instTraceStream->write(inst_pkt_header);
86    // Create a protobuf message for the header and write it to
87    // the stream
88    ProtoMessage::InstDepRecordHeader data_rec_header;
89    data_rec_header.set_obj_id(name());
90    data_rec_header.set_tick_freq(SimClock::Frequency);
91    data_rec_header.set_window_size(depWindowSize);
92    dataTraceStream->write(data_rec_header);
93    // Register a callback to flush trace records and close the output streams.
94    Callback* cb = new MakeCallback<ElasticTrace,
95        &ElasticTrace::flushTraces>(this);
96    registerExitCallback(cb);
97}
98
99void
100ElasticTrace::regProbeListeners()
101{
102    inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
103        curTick(), startTraceInst);
104    if (startTraceInst == 0) {
105        // If we want to start tracing from the start of the simulation,
106        // register all elastic trace probes now.
107        regEtraceListeners();
108    } else {
109        // Schedule an event to register all elastic trace probes when
110        // specified no. of instructions are committed.
111        cpu->comInstEventQueue[(ThreadID)0]->schedule(&regEtraceListenersEvent,
112                                                      startTraceInst);
113    }
114}
115
116void
117ElasticTrace::regEtraceListeners()
118{
119    assert(!allProbesReg);
120    inform("@%llu: No. of instructions committed = %llu, registering elastic"
121        " probe listeners", curTick(), cpu->numSimulatedInsts());
122    // Create new listeners: provide method to be called upon a notify() for
123    // each probe point.
124    listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
125                        "FetchRequest", &ElasticTrace::fetchReqTrace));
126    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
127                        "Execute", &ElasticTrace::recordExecTick));
128    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
129                        "ToCommit", &ElasticTrace::recordToCommTick));
130    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
131                        "Rename", &ElasticTrace::updateRegDep));
132    listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
133                        "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
134    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
135                        "Squash", &ElasticTrace::addSquashedInst));
136    listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
137                        "Commit", &ElasticTrace::addCommittedInst));
138    allProbesReg = true;
139}
140
141void
142ElasticTrace::fetchReqTrace(const RequestPtr &req)
143{
144
145    DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
146             (MemCmd::ReadReq),
147             req->getPC(), req->getVaddr(), req->getPaddr(),
148             req->getFlags(), req->getSize(), curTick());
149
150    // Create a protobuf message including the request fields necessary to
151    // recreate the request in the TraceCPU.
152    ProtoMessage::Packet inst_fetch_pkt;
153    inst_fetch_pkt.set_tick(curTick());
154    inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
155    inst_fetch_pkt.set_pc(req->getPC());
156    inst_fetch_pkt.set_flags(req->getFlags());
157    inst_fetch_pkt.set_addr(req->getPaddr());
158    inst_fetch_pkt.set_size(req->getSize());
159    // Write the message to the stream.
160    instTraceStream->write(inst_fetch_pkt);
161}
162
163void
164ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst)
165{
166
167    // In a corner case, a retired instruction is propagated backward to the
168    // IEW instruction queue to handle some side-channel information. But we
169    // must not process an instruction again. So we test the sequence number
170    // against the lastClearedSeqNum and skip adding the instruction for such
171    // corner cases.
172    if (dyn_inst->seqNum <= lastClearedSeqNum) {
173        DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
174        has already retired (mostly squashed)", dyn_inst->seqNum);
175        // Do nothing as program has proceeded and this inst has been
176        // propagated backwards to handle something.
177        return;
178    }
179
180    DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
181                curTick());
182    // Either the execution info object will already exist if this
183    // instruction had a register dependency recorded in the rename probe
184    // listener before entering execute stage or it will not exist and will
185    // need to be created here.
186    InstExecInfo* exec_info_ptr;
187    auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
188    if (itr_exec_info != tempStore.end()) {
189        exec_info_ptr = itr_exec_info->second;
190    } else {
191        exec_info_ptr = new InstExecInfo;
192        tempStore[dyn_inst->seqNum] = exec_info_ptr;
193    }
194
195    exec_info_ptr->executeTick = curTick();
196    maxTempStoreSize = std::max(tempStore.size(),
197                                (std::size_t)maxTempStoreSize.value());
198}
199
200void
201ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst)
202{
203    // If tracing has just been enabled then the instruction at this stage of
204    // execution is far enough that we cannot gather info about its past like
205    // the tick it started execution. Simply return until we see an instruction
206    // that is found in the tempStore.
207    auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
208    if (itr_exec_info == tempStore.end()) {
209        DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
210                    " skipping.\n", dyn_inst->seqNum);
211        return;
212    }
213
214    DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
215                curTick());
216    InstExecInfo* exec_info_ptr = itr_exec_info->second;
217    exec_info_ptr->toCommitTick = curTick();
218
219}
220
221void
222ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst)
223{
224    // Get the sequence number of the instruction
225    InstSeqNum seq_num = dyn_inst->seqNum;
226
227    assert(dyn_inst->seqNum > lastClearedSeqNum);
228
229    // Since this is the first probe activated in the pipeline, create
230    // a new execution info object to track this instruction as it
231    // progresses through the pipeline.
232    InstExecInfo* exec_info_ptr = new InstExecInfo;
233    tempStore[seq_num] = exec_info_ptr;
234
235    // Loop through the source registers and look up the dependency map. If
236    // the source register entry is found in the dependency map, add a
237    // dependency on the last writer.
238    int8_t max_regs = dyn_inst->numSrcRegs();
239    for (int src_idx = 0; src_idx < max_regs; src_idx++) {
240        // Get the physical register index of the i'th source register.
241        PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
242        DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num,
243                    src_reg);
244        auto itr_last_writer = physRegDepMap.find(src_reg);
245        if (itr_last_writer != physRegDepMap.end()) {
246            InstSeqNum last_writer = itr_last_writer->second;
247            // Additionally the dependency distance is kept less than the window
248            // size parameter to limit the memory allocation to nodes in the
249            // graph. If the window were tending to infinite we would have to
250            // load a large number of node objects during replay.
251            if (seq_num - last_writer < depWindowSize) {
252                // Record a physical register dependency.
253                exec_info_ptr->physRegDepSet.insert(last_writer);
254            }
255        }
256    }
257
258    // Loop through the destination registers of this instruction and update
259    // the physical register dependency map for last writers to registers.
260    max_regs = dyn_inst->numDestRegs();
261    for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
262        // For data dependency tracking the register must be an int, float or
263        // CC register and not a Misc register.
264        TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx);
265        if (regIdxToClass(dest_reg) != MiscRegClass) {
266            // Get the physical register index of the i'th destination register.
267            dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
268            if (dest_reg != TheISA::ZeroReg) {
269                DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n",
270                            seq_num, dest_reg);
271                physRegDepMap[dest_reg] = seq_num;
272            }
273        }
274    }
275    maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
276                                    (std::size_t)maxPhysRegDepMapSize.value());
277}
278
279void
280ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
281{
282    DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
283                inst_reg_pair.second);
284    auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
285    if (itr_regdep_map != physRegDepMap.end())
286        physRegDepMap.erase(itr_regdep_map);
287}
288
289void
290ElasticTrace::addSquashedInst(const DynInstPtr &head_inst)
291{
292    // If the squashed instruction was squashed before being processed by
293    // execute stage then it will not be in the temporary store. In this case
294    // do nothing and return.
295    auto itr_exec_info = tempStore.find(head_inst->seqNum);
296    if (itr_exec_info == tempStore.end())
297        return;
298
299    // If there is a squashed load for which a read request was
300    // sent before it got squashed then add it to the trace.
301    DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
302                head_inst->seqNum);
303    // Get pointer to the execution info object corresponding to the inst.
304    InstExecInfo* exec_info_ptr = itr_exec_info->second;
305    if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
306        exec_info_ptr->toCommitTick != MaxTick &&
307        head_inst->hasRequest() &&
308        head_inst->getFault() == NoFault) {
309        // Add record to depTrace with commit parameter as false.
310        addDepTraceRecord(head_inst, exec_info_ptr, false);
311    }
312    // As the information contained is no longer needed, remove the execution
313    // info object from the temporary store.
314    clearTempStoreUntil(head_inst);
315}
316
317void
318ElasticTrace::addCommittedInst(const DynInstPtr &head_inst)
319{
320    DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
321                head_inst->seqNum);
322
323    // Add the instruction to the depTrace.
324    if (!head_inst->isNop()) {
325
326        // If tracing has just been enabled then the instruction at this stage
327        // of execution is far enough that we cannot gather info about its past
328        // like the tick it started execution. Simply return until we see an
329        // instruction that is found in the tempStore.
330        auto itr_temp_store = tempStore.find(head_inst->seqNum);
331        if (itr_temp_store == tempStore.end()) {
332            DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
333                "store, skipping.\n", head_inst->seqNum);
334            return;
335        }
336
337        // Get pointer to the execution info object corresponding to the inst.
338        InstExecInfo* exec_info_ptr = itr_temp_store->second;
339        assert(exec_info_ptr->executeTick != MaxTick);
340        assert(exec_info_ptr->toCommitTick != MaxTick);
341
342        // Check if the instruction had a fault, if it predicated false and
343        // thus previous register values were restored or if it was a
344        // load/store that did not have a request (e.g. when the size of the
345        // request is zero). In all these cases the instruction is set as
346        // executed and is picked up by the commit probe listener. But a
347        // request is not issued and registers are not written. So practically,
348        // skipping these should not hurt as execution would not stall on them.
349        // Alternatively, these could be included merely as a compute node in
350        // the graph. Removing these for now. If correlation accuracy needs to
351        // be improved in future these can be turned into comp nodes at the
352        // cost of bigger traces.
353        if (head_inst->getFault() != NoFault) {
354            DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
355                    "skip adding it to the trace\n",
356                    (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
357                    head_inst->seqNum);
358        } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
359            DPRINTF(ElasticTrace, "Load/store [sn:%lli]  has no request so "
360                    "skip adding it to the trace\n", head_inst->seqNum);
361        } else if (!head_inst->readPredicate()) {
362            DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
363                    "skip adding it to the trace\n",
364                    (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
365                    head_inst->seqNum);
366        } else {
367            // Add record to depTrace with commit parameter as true.
368            addDepTraceRecord(head_inst, exec_info_ptr, true);
369        }
370    }
371    // As the information contained is no longer needed, remove the execution
372    // info object from the temporary store.
373    clearTempStoreUntil(head_inst);
374}
375
376void
377ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst,
378                                InstExecInfo* exec_info_ptr, bool commit)
379{
380    // Create a record to assign dynamic intruction related fields.
381    TraceInfo* new_record = new TraceInfo;
382    // Add to map for sequence number look up to retrieve the TraceInfo pointer
383    traceInfoMap[head_inst->seqNum] = new_record;
384
385    // Assign fields from the instruction
386    new_record->instNum = head_inst->seqNum;
387    new_record->commit = commit;
388    new_record->type = head_inst->isLoad() ? Record::LOAD :
389                        (head_inst->isStore() ? Record::STORE :
390                        Record::COMP);
391
392    // Assign fields for creating a request in case of a load/store
393    new_record->reqFlags = head_inst->memReqFlags;
394    new_record->addr = head_inst->physEffAddrLow;
395    // Currently the tracing does not support split requests.
396    new_record->size = head_inst->effSize;
397    new_record->pc = head_inst->instAddr();
398
399    // Assign the timing information stored in the execution info object
400    new_record->executeTick = exec_info_ptr->executeTick;
401    new_record->toCommitTick = exec_info_ptr->toCommitTick;
402    new_record->commitTick = curTick();
403
404    // Assign initial values for number of dependents and computational delay
405    new_record->numDepts = 0;
406    new_record->compDelay = -1;
407
408    // The physical register dependency set of the first instruction is
409    // empty. Since there are no records in the depTrace at this point, the
410    // case of adding an ROB dependency by using a reverse iterator is not
411    // applicable. Thus, populate the fields of the record corresponding to the
412    // first instruction and return.
413    if (depTrace.empty()) {
414        // Store the record in depTrace.
415        depTrace.push_back(new_record);
416        DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
417                new_record->instNum);
418        return;
419    }
420
421    // Clear register dependencies for squashed loads as they may be dependent
422    // on squashed instructions and we do not add those to the trace.
423    if (head_inst->isLoad() && !commit) {
424         (exec_info_ptr->physRegDepSet).clear();
425    }
426
427    // Assign the register dependencies stored in the execution info object
428    std::set<InstSeqNum>::const_iterator dep_set_it;
429    for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
430         dep_set_it != (exec_info_ptr->physRegDepSet).end();
431         ++dep_set_it) {
432        auto trace_info_itr = traceInfoMap.find(*dep_set_it);
433        if (trace_info_itr != traceInfoMap.end()) {
434            // The register dependency is valid. Assign it and calculate
435            // computational delay
436            new_record->physRegDepList.push_back(*dep_set_it);
437            DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
438                    "%lli\n", new_record->instNum, *dep_set_it);
439            TraceInfo* reg_dep = trace_info_itr->second;
440            reg_dep->numDepts++;
441            compDelayPhysRegDep(reg_dep, new_record);
442            ++numRegDep;
443        } else {
444            // The instruction that this has a register dependency on was
445            // not added to the trace because of one of the following
446            // 1. it was an instruction that had a fault
447            // 2. it was an instruction that was predicated false and
448            // previous register values were restored
449            // 3. it was load/store that did not have a request (e.g. when
450            // the size of the request is zero but this may not be a fault)
451            // In all these cases the instruction is set as executed and is
452            // picked up by the commit probe listener. But a request is not
453            // issued and registers are not written to in these cases.
454            DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
455                    "%lli is skipped\n",new_record->instNum, *dep_set_it);
456        }
457    }
458
459    // Check for and assign an ROB dependency in addition to register
460    // dependency before adding the record to the trace.
461    // As stores have to commit in order a store is dependent on the last
462    // committed load/store. This is recorded in the ROB dependency.
463    if (head_inst->isStore()) {
464        // Look up store-after-store order dependency
465        updateCommitOrderDep(new_record, false);
466        // Look up store-after-load order dependency
467        updateCommitOrderDep(new_record, true);
468    }
469
470    // In case a node is dependency-free or its dependency got discarded
471    // because it was outside the window, it is marked ready in the ROB at the
472    // time of issue. A request is sent as soon as possible. To model this, a
473    // node is assigned an issue order dependency on a committed instruction
474    // that completed earlier than it. This is done to avoid the problem of
475    // determining the issue times of such dependency-free nodes during replay
476    // which could lead to too much parallelism, thinking conservatively.
477    if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
478        updateIssueOrderDep(new_record);
479    }
480
481    // Store the record in depTrace.
482    depTrace.push_back(new_record);
483    DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
484            (commit ? "committed" : "squashed"), new_record->instNum);
485
486    // To process the number of records specified by depWindowSize in the
487    // forward direction, the depTrace must have twice as many records
488    // to check for dependencies.
489    if (depTrace.size() == 2 * depWindowSize) {
490
491        DPRINTF(ElasticTrace, "Writing out trace...\n");
492
493        // Write out the records which have been processed to the trace
494        // and remove them from the depTrace.
495        writeDepTrace(depWindowSize);
496
497        // After the first window, writeDepTrace() must check for valid
498        // compDelay.
499        firstWin = false;
500    }
501}
502
503void
504ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
505                                    bool find_load_not_store)
506{
507    assert(new_record->isStore());
508    // Iterate in reverse direction to search for the last committed
509    // load/store that completed earlier than the new record
510    depTraceRevItr from_itr(depTrace.end());
511    depTraceRevItr until_itr(depTrace.begin());
512    TraceInfo* past_record = *from_itr;
513    uint32_t num_go_back = 0;
514
515    // The execution time of this store is when it is sent, that is committed
516    Tick execute_tick = curTick();
517    // Search for store-after-load or store-after-store order dependency
518    while (num_go_back < depWindowSize && from_itr != until_itr) {
519        if (find_load_not_store) {
520            // Check if previous inst is a load completed earlier by comparing
521            // with execute tick
522            if (hasLoadCompleted(past_record, execute_tick)) {
523                // Assign rob dependency and calculate the computational delay
524                assignRobDep(past_record, new_record);
525                ++numOrderDepStores;
526                return;
527            }
528        } else {
529            // Check if previous inst is a store sent earlier by comparing with
530            // execute tick
531            if (hasStoreCommitted(past_record, execute_tick)) {
532                // Assign rob dependency and calculate the computational delay
533                assignRobDep(past_record, new_record);
534                ++numOrderDepStores;
535                return;
536            }
537        }
538        ++from_itr;
539        past_record = *from_itr;
540        ++num_go_back;
541    }
542}
543
544void
545ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
546{
547    // Interate in reverse direction to search for the last committed
548    // record that completed earlier than the new record
549    depTraceRevItr from_itr(depTrace.end());
550    depTraceRevItr until_itr(depTrace.begin());
551    TraceInfo* past_record = *from_itr;
552
553    uint32_t num_go_back = 0;
554    Tick execute_tick = 0;
555
556    if (new_record->isLoad()) {
557        // The execution time of a load is when a request is sent
558        execute_tick = new_record->executeTick;
559        ++numIssueOrderDepLoads;
560    } else if (new_record->isStore()) {
561        // The execution time of a store is when it is sent, i.e. committed
562        execute_tick = curTick();
563        ++numIssueOrderDepStores;
564    } else {
565        // The execution time of a non load/store is when it completes
566        execute_tick = new_record->toCommitTick;
567        ++numIssueOrderDepOther;
568    }
569
570    // We search if this record has an issue order dependency on a past record.
571    // Once we find it, we update both the new record and the record it depends
572    // on and return.
573    while (num_go_back < depWindowSize && from_itr != until_itr) {
574        // Check if a previous inst is a load sent earlier, or a store sent
575        // earlier, or a comp inst completed earlier by comparing with execute
576        // tick
577        if (hasLoadBeenSent(past_record, execute_tick) ||
578            hasStoreCommitted(past_record, execute_tick) ||
579            hasCompCompleted(past_record, execute_tick)) {
580            // Assign rob dependency and calculate the computational delay
581            assignRobDep(past_record, new_record);
582            return;
583        }
584        ++from_itr;
585        past_record = *from_itr;
586        ++num_go_back;
587    }
588}
589
590void
591ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
592    DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
593            new_record->typeToStr(), new_record->instNum,
594            past_record->instNum);
595    // Add dependency on past record
596    new_record->robDepList.push_back(past_record->instNum);
597    // Update new_record's compute delay with respect to the past record
598    compDelayRob(past_record, new_record);
599    // Increment number of dependents of the past record
600    ++(past_record->numDepts);
601    // Update stat to log max number of dependents
602    maxNumDependents = std::max(past_record->numDepts,
603                                (uint32_t)maxNumDependents.value());
604}
605
606bool
607ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
608                                    Tick execute_tick) const
609{
610    return (past_record->isStore() && past_record->commitTick <= execute_tick);
611}
612
613bool
614ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
615                                    Tick execute_tick) const
616{
617    return(past_record->isLoad() && past_record->commit &&
618                past_record->toCommitTick <= execute_tick);
619}
620
621bool
622ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
623                                Tick execute_tick) const
624{
625    // Check if previous inst is a load sent earlier than this
626    return (past_record->isLoad() && past_record->commit &&
627        past_record->executeTick <= execute_tick);
628}
629
630bool
631ElasticTrace::hasCompCompleted(TraceInfo* past_record,
632                                    Tick execute_tick) const
633{
634    return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
635}
636
637void
638ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst)
639{
640    // Clear from temp store starting with the execution info object
641    // corresponding the head_inst and continue clearing by decrementing the
642    // sequence number until the last cleared sequence number.
643    InstSeqNum temp_sn = (head_inst->seqNum);
644    while (temp_sn > lastClearedSeqNum) {
645        auto itr_exec_info = tempStore.find(temp_sn);
646        if (itr_exec_info != tempStore.end()) {
647            InstExecInfo* exec_info_ptr = itr_exec_info->second;
648            // Free allocated memory for the info object
649            delete exec_info_ptr;
650            // Remove entry from temporary store
651            tempStore.erase(itr_exec_info);
652        }
653        temp_sn--;
654    }
655    // Update the last cleared sequence number to that of the head_inst
656    lastClearedSeqNum = head_inst->seqNum;
657}
658
659void
660ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
661{
662    // The computation delay is the delay between the completion tick of the
663    // inst. pointed to by past_record and the execution tick of its dependent
664    // inst. pointed to by new_record.
665    int64_t comp_delay = -1;
666    Tick execution_tick = 0, completion_tick = 0;
667
668    DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
669            new_record->instNum, past_record->instNum);
670
671    // Get the tick when the node is executed as per the modelling of
672    // computation delay
673    execution_tick = new_record->getExecuteTick();
674
675    if (past_record->isLoad()) {
676        if (new_record->isStore()) {
677            completion_tick = past_record->toCommitTick;
678        } else {
679            completion_tick = past_record->executeTick;
680        }
681    } else if (past_record->isStore()) {
682        completion_tick = past_record->commitTick;
683    } else if (past_record->isComp()){
684        completion_tick = past_record->toCommitTick;
685    }
686    assert(execution_tick >= completion_tick);
687    comp_delay = execution_tick - completion_tick;
688
689    DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
690            execution_tick, completion_tick, comp_delay);
691
692    // Assign the computational delay with respect to the dependency which
693    // completes the latest.
694    if (new_record->compDelay == -1)
695        new_record->compDelay = comp_delay;
696    else
697        new_record->compDelay = std::min(comp_delay, new_record->compDelay);
698    DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
699            new_record->compDelay);
700}
701
702void
703ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
704                                    TraceInfo* new_record)
705{
706    // The computation delay is the delay between the completion tick of the
707    // inst. pointed to by past_record and the execution tick of its dependent
708    // inst. pointed to by new_record.
709    int64_t comp_delay = -1;
710    Tick execution_tick = 0, completion_tick = 0;
711
712    DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
713            " %lli.\n", new_record->instNum, past_record->instNum);
714
715    // Get the tick when the node is executed as per the modelling of
716    // computation delay
717    execution_tick = new_record->getExecuteTick();
718
719    // When there is a physical register dependency on an instruction, the
720    // completion tick of that instruction is when it wrote to the register,
721    // that is toCommitTick. In case, of a store updating a destination
722    // register, this is approximated to commitTick instead
723    if (past_record->isStore()) {
724        completion_tick = past_record->commitTick;
725    } else {
726        completion_tick = past_record->toCommitTick;
727    }
728    assert(execution_tick >= completion_tick);
729    comp_delay = execution_tick - completion_tick;
730    DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
731            execution_tick, completion_tick, comp_delay);
732
733    // Assign the computational delay with respect to the dependency which
734    // completes the latest.
735    if (new_record->compDelay == -1)
736        new_record->compDelay = comp_delay;
737    else
738        new_record->compDelay = std::min(comp_delay, new_record->compDelay);
739    DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
740            new_record->compDelay);
741}
742
743Tick
744ElasticTrace::TraceInfo::getExecuteTick() const
745{
746    if (isLoad()) {
747        // Execution tick for a load instruction is when the request was sent,
748        // that is executeTick.
749        return executeTick;
750    } else if (isStore()) {
751        // Execution tick for a store instruction is when the request was sent,
752        // that is commitTick.
753        return commitTick;
754    } else {
755        // Execution tick for a non load/store instruction is when the register
756        // value was written to, that is commitTick.
757        return toCommitTick;
758    }
759}
760
761void
762ElasticTrace::writeDepTrace(uint32_t num_to_write)
763{
764    // Write the trace with fields as follows:
765    // Instruction sequence number
766    // If instruction was a load
767    // If instruction was a store
768    // If instruction has addr
769    // If instruction has size
770    // If instruction has flags
771    // List of order dependencies - optional, repeated
772    // Computational delay with respect to last completed dependency
773    // List of physical register RAW dependencies - optional, repeated
774    // Weight of a node equal to no. of filtered nodes before it - optional
775    uint16_t num_filtered_nodes = 0;
776    depTraceItr dep_trace_itr(depTrace.begin());
777    depTraceItr dep_trace_itr_start = dep_trace_itr;
778    while (num_to_write > 0) {
779        TraceInfo* temp_ptr = *dep_trace_itr;
780        assert(temp_ptr->type != Record::INVALID);
781        // If no node dependends on a comp node then there is no reason to
782        // track the comp node in the dependency graph. We filter out such
783        // nodes but count them and add a weight field to the subsequent node
784        // that we do include in the trace.
785        if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
786            DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
787                     "is as follows:\n", temp_ptr->instNum);
788            if (temp_ptr->isLoad() || temp_ptr->isStore()) {
789                DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
790                DPRINTFR(ElasticTrace, "\thas a request with addr %i, size %i,"
791                         " flags %i\n", temp_ptr->addr, temp_ptr->size,
792                         temp_ptr->reqFlags);
793            } else {
794                 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
795            }
796            if (firstWin && temp_ptr->compDelay == -1) {
797                if (temp_ptr->isLoad()) {
798                    temp_ptr->compDelay = temp_ptr->executeTick;
799                } else if (temp_ptr->isStore()) {
800                    temp_ptr->compDelay = temp_ptr->commitTick;
801                } else {
802                    temp_ptr->compDelay = temp_ptr->toCommitTick;
803                }
804            }
805            assert(temp_ptr->compDelay != -1);
806            DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
807                     temp_ptr->compDelay);
808
809            // Create a protobuf message for the dependency record
810            ProtoMessage::InstDepRecord dep_pkt;
811            dep_pkt.set_seq_num(temp_ptr->instNum);
812            dep_pkt.set_type(temp_ptr->type);
813            dep_pkt.set_pc(temp_ptr->pc);
814            if (temp_ptr->isLoad() || temp_ptr->isStore()) {
815                dep_pkt.set_flags(temp_ptr->reqFlags);
816                dep_pkt.set_addr(temp_ptr->addr);
817                dep_pkt.set_size(temp_ptr->size);
818            }
819            dep_pkt.set_comp_delay(temp_ptr->compDelay);
820            if (temp_ptr->robDepList.empty()) {
821                DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
822            }
823            while (!temp_ptr->robDepList.empty()) {
824                DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
825                         temp_ptr->robDepList.front());
826                dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
827                temp_ptr->robDepList.pop_front();
828            }
829            if (temp_ptr->physRegDepList.empty()) {
830                DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
831            }
832            while (!temp_ptr->physRegDepList.empty()) {
833                DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
834                         temp_ptr->physRegDepList.front());
835                dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
836                temp_ptr->physRegDepList.pop_front();
837            }
838            if (num_filtered_nodes != 0) {
839                // Set the weight of this node as the no. of filtered nodes
840                // between this node and the last node that we wrote to output
841                // stream. The weight will be used during replay to model ROB
842                // occupancy of filtered nodes.
843                dep_pkt.set_weight(num_filtered_nodes);
844                num_filtered_nodes = 0;
845            }
846            // Write the message to the protobuf output stream
847            dataTraceStream->write(dep_pkt);
848        } else {
849            // Don't write the node to the trace but note that we have filtered
850            // out a node.
851            ++numFilteredNodes;
852            ++num_filtered_nodes;
853        }
854        dep_trace_itr++;
855        traceInfoMap.erase(temp_ptr->instNum);
856        delete temp_ptr;
857        num_to_write--;
858    }
859    depTrace.erase(dep_trace_itr_start, dep_trace_itr);
860}
861
862void
863ElasticTrace::regStats() {
864    using namespace Stats;
865    numRegDep
866        .name(name() + ".numRegDep")
867        .desc("Number of register dependencies recorded during tracing")
868        ;
869
870    numOrderDepStores
871        .name(name() + ".numOrderDepStores")
872        .desc("Number of commit order (rob) dependencies for a store recorded"
873              " on a past load/store during tracing")
874        ;
875
876    numIssueOrderDepLoads
877        .name(name() + ".numIssueOrderDepLoads")
878        .desc("Number of loads that got assigned issue order dependency"
879              " because they were dependency-free")
880        ;
881
882    numIssueOrderDepStores
883        .name(name() + ".numIssueOrderDepStores")
884        .desc("Number of stores that got assigned issue order dependency"
885              " because they were dependency-free")
886        ;
887
888    numIssueOrderDepOther
889        .name(name() + ".numIssueOrderDepOther")
890        .desc("Number of non load/store insts that got assigned issue order"
891              " dependency because they were dependency-free")
892        ;
893
894    numFilteredNodes
895        .name(name() + ".numFilteredNodes")
896        .desc("No. of nodes filtered out before writing the output trace")
897        ;
898
899    maxNumDependents
900        .name(name() + ".maxNumDependents")
901        .desc("Maximum number or dependents on any instruction")
902        ;
903
904    maxTempStoreSize
905        .name(name() + ".maxTempStoreSize")
906        .desc("Maximum size of the temporary store during the run")
907        ;
908
909    maxPhysRegDepMapSize
910        .name(name() + ".maxPhysRegDepMapSize")
911        .desc("Maximum size of register dependency map")
912        ;
913}
914
915const std::string&
916ElasticTrace::TraceInfo::typeToStr() const
917{
918    return Record::RecordType_Name(type);
919}
920
921const std::string
922ElasticTrace::name() const
923{
924    return ProbeListenerObject::name();
925}
926
927void
928ElasticTrace::flushTraces()
929{
930    // Write to trace all records in the depTrace.
931    writeDepTrace(depTrace.size());
932    // Delete the stream objects
933    delete dataTraceStream;
934    delete instTraceStream;
935}
936
937ElasticTrace*
938ElasticTraceParams::create()
939{
940    return new ElasticTrace(this);
941}
942