1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50    :   BaseCPU(params),
51        icachePort(this),
52        dcachePort(this),
53        instMasterID(params->system->getMasterId(this, "inst")),
54        dataMasterID(params->system->getMasterId(this, "data")),
55        instTraceFile(params->instTraceFile),
56        dataTraceFile(params->dataTraceFile),
57        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59                  params),
60        icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61        dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62        oneTraceComplete(false),
63        traceOffset(0),
64        execCompleteEvent(nullptr),
65        enableEarlyExit(params->enableEarlyExit),
66        progressMsgInterval(params->progressMsgInterval),
67        progressMsgThreshold(params->progressMsgInterval)
68{
69    // Increment static counter for number of Trace CPUs.
70    ++TraceCPU::numTraceCPUs;
71
72    // Check that the python parameters for sizes of ROB, store buffer and
73    // load buffer do not overflow the corresponding C++ variables.
74    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77                "exceeds the max. value of %d.\n", params->sizeROB,
78                UINT16_MAX);
79    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80                " %d exceeds the max. value of %d.\n",
81                params->sizeLoadBuffer, UINT16_MAX);
82}
83
84TraceCPU::~TraceCPU()
85{
86
87}
88
89TraceCPU*
90TraceCPUParams::create()
91{
92    return new TraceCPU(this);
93}
94
95void
96TraceCPU::updateNumOps(uint64_t rob_num)
97{
98    numOps = rob_num;
99    if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
100        inform("%s: %i insts committed\n", name(), progressMsgThreshold);
101        progressMsgThreshold += progressMsgInterval;
102    }
103}
104
105void
106TraceCPU::takeOverFrom(BaseCPU *oldCPU)
107{
108    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109    getInstPort().takeOverFrom(&oldCPU->getInstPort());
110    getDataPort().takeOverFrom(&oldCPU->getDataPort());
111}
112
113void
114TraceCPU::init()
115{
116    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
117            "\n", instTraceFile);
118    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
119            dataTraceFile);
120
121    BaseCPU::init();
122
123    // Get the send tick of the first instruction read request
124    Tick first_icache_tick = icacheGen.init();
125
126    // Get the send tick of the first data read/write request
127    Tick first_dcache_tick = dcacheGen.init();
128
129    // Set the trace offset as the minimum of that in both traces
130    traceOffset = std::min(first_icache_tick, first_dcache_tick);
131    inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
132            name(), traceOffset);
133
134    // Schedule next icache and dcache event by subtracting the offset
135    schedule(icacheNextEvent, first_icache_tick - traceOffset);
136    schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
137
138    // Adjust the trace offset for the dcache generator's ready nodes
139    // We don't need to do this for the icache generator as it will
140    // send its first request at the first event and schedule subsequent
141    // events using a relative tick delta
142    dcacheGen.adjustInitTraceOffset(traceOffset);
143
144    // If the Trace CPU simulation is configured to exit on any one trace
145    // completion then we don't need a counted event to count down all Trace
146    // CPUs in the system. If not then instantiate a counted event.
147    if (!enableEarlyExit) {
148        // The static counter for number of Trace CPUs is correctly set at
149        // this point so create an event and pass it.
150        execCompleteEvent = new CountedExitEvent("end of all traces reached.",
151                                                 numTraceCPUs);
152    }
153
154}
155
156void
157TraceCPU::schedIcacheNext()
158{
159    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
160
161    // Try to send the current packet or a retry packet if there is one
162    bool sched_next = icacheGen.tryNext();
163    // If packet sent successfully, schedule next event
164    if (sched_next) {
165        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
166                "at %d.\n", curTick() + icacheGen.tickDelta());
167        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
168        ++numSchedIcacheEvent;
169    } else {
170        // check if traceComplete. If not, do nothing because sending failed
171        // and next event will be scheduled via RecvRetry()
172        if (icacheGen.isTraceComplete()) {
173            // If this is the first trace to complete, set the variable. If it
174            // is already set then both traces are complete to exit sim.
175            checkAndSchedExitEvent();
176        }
177    }
178    return;
179}
180
181void
182TraceCPU::schedDcacheNext()
183{
184    DPRINTF(TraceCPUData, "DcacheGen event.\n");
185
186    // Update stat for numCycles
187    numCycles = clockEdge() / clockPeriod();
188
189    dcacheGen.execute();
190    if (dcacheGen.isExecComplete()) {
191        checkAndSchedExitEvent();
192    }
193}
194
195void
196TraceCPU::checkAndSchedExitEvent()
197{
198    if (!oneTraceComplete) {
199        oneTraceComplete = true;
200    } else {
201        // Schedule event to indicate execution is complete as both
202        // instruction and data access traces have been played back.
203        inform("%s: Execution complete.\n", name());
204        // If the replay is configured to exit early, that is when any one
205        // execution is complete then exit immediately and return. Otherwise,
206        // schedule the counted exit that counts down completion of each Trace
207        // CPU.
208        if (enableEarlyExit) {
209            exitSimLoop("End of trace reached");
210        } else {
211            schedule(*execCompleteEvent, curTick());
212        }
213    }
214}
215
216void
217TraceCPU::regStats()
218{
219
220    BaseCPU::regStats();
221
222    numSchedDcacheEvent
223    .name(name() + ".numSchedDcacheEvent")
224    .desc("Number of events scheduled to trigger data request generator")
225    ;
226
227    numSchedIcacheEvent
228    .name(name() + ".numSchedIcacheEvent")
229    .desc("Number of events scheduled to trigger instruction request generator")
230    ;
231
232    numOps
233    .name(name() + ".numOps")
234    .desc("Number of micro-ops simulated by the Trace CPU")
235    ;
236
237    cpi
238    .name(name() + ".cpi")
239    .desc("Cycles per micro-op used as a proxy for CPI")
240    .precision(6)
241    ;
242    cpi = numCycles/numOps;
243
244    icacheGen.regStats();
245    dcacheGen.regStats();
246}
247
248void
249TraceCPU::ElasticDataGen::regStats()
250{
251    using namespace Stats;
252
253    maxDependents
254    .name(name() + ".maxDependents")
255    .desc("Max number of dependents observed on a node")
256    ;
257
258    maxReadyListSize
259    .name(name() + ".maxReadyListSize")
260    .desc("Max size of the ready list observed")
261    ;
262
263    numSendAttempted
264    .name(name() + ".numSendAttempted")
265    .desc("Number of first attempts to send a request")
266    ;
267
268    numSendSucceeded
269    .name(name() + ".numSendSucceeded")
270    .desc("Number of successful first attempts")
271    ;
272
273    numSendFailed
274    .name(name() + ".numSendFailed")
275    .desc("Number of failed first attempts")
276    ;
277
278    numRetrySucceeded
279    .name(name() + ".numRetrySucceeded")
280    .desc("Number of successful retries")
281    ;
282
283    numSplitReqs
284    .name(name() + ".numSplitReqs")
285    .desc("Number of split requests")
286    ;
287
288    numSOLoads
289    .name(name() + ".numSOLoads")
290    .desc("Number of strictly ordered loads")
291    ;
292
293    numSOStores
294    .name(name() + ".numSOStores")
295    .desc("Number of strictly ordered stores")
296    ;
297
298    dataLastTick
299    .name(name() + ".dataLastTick")
300    .desc("Last tick simulated from the elastic data trace")
301    ;
302}
303
304Tick
305TraceCPU::ElasticDataGen::init()
306{
307    DPRINTF(TraceCPUData, "Initializing data memory request generator "
308            "DcacheGen: elastic issue with retry.\n");
309
310    if (!readNextWindow())
311        panic("Trace has %d elements. It must have at least %d elements.\n",
312              depGraph.size(), 2 * windowSize);
313    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
314            depGraph.size());
315
316    if (!readNextWindow())
317        panic("Trace has %d elements. It must have at least %d elements.\n",
318              depGraph.size(), 2 * windowSize);
319    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
320            depGraph.size());
321
322    // Print readyList
323    if (DTRACE(TraceCPUData)) {
324        printReadyList();
325    }
326    auto free_itr = readyList.begin();
327    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
328            " is %d.\n", free_itr->seqNum, free_itr->execTick);
329    // Return the execute tick of the earliest ready node so that an event
330    // can be scheduled to call execute()
331    return (free_itr->execTick);
332}
333
334void
335TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
336    for (auto& free_node : readyList) {
337        free_node.execTick -= offset;
338    }
339}
340
341void
342TraceCPU::ElasticDataGen::exit()
343{
344    trace.reset();
345}
346
347bool
348TraceCPU::ElasticDataGen::readNextWindow()
349{
350
351    // Read and add next window
352    DPRINTF(TraceCPUData, "Reading next window from file.\n");
353
354    if (traceComplete) {
355        // We are at the end of the file, thus we have no more records.
356        // Return false.
357        return false;
358    }
359
360    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
361            depGraph.size());
362
363    uint32_t num_read = 0;
364    while (num_read != windowSize) {
365
366        // Create a new graph node
367        GraphNode* new_node = new GraphNode;
368
369        // Read the next line to get the next record. If that fails then end of
370        // trace has been reached and traceComplete needs to be set in addition
371        // to returning false.
372        if (!trace.read(new_node)) {
373            DPRINTF(TraceCPUData, "\tTrace complete!\n");
374            traceComplete = true;
375            return false;
376        }
377
378        // Annotate the ROB dependencies of the new node onto the parent nodes.
379        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
380        // Annotate the register dependencies of the new node onto the parent
381        // nodes.
382        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
383
384        num_read++;
385        // Add to map
386        depGraph[new_node->seqNum] = new_node;
387        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
388            // Source dependencies are already complete, check if resources
389            // are available and issue. The execution time is approximated
390            // to current time plus the computational delay.
391            checkAndIssue(new_node);
392        }
393    }
394
395    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
396            depGraph.size());
397    return true;
398}
399
400template<typename T> void
401TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
402                                            T& dep_array, uint8_t& num_dep)
403{
404    for (auto& a_dep : dep_array) {
405        // The convention is to set the dependencies starting with the first
406        // index in the ROB and register dependency arrays. Thus, when we reach
407        // a dependency equal to the initialisation value of zero, we know have
408        // iterated over all dependencies and can break.
409        if (a_dep == 0)
410            break;
411        // We look up the valid dependency, i.e. the parent of this node
412        auto parent_itr = depGraph.find(a_dep);
413        if (parent_itr != depGraph.end()) {
414            // If the parent is found, it is yet to be executed. Append a
415            // pointer to the new node to the dependents list of the parent
416            // node.
417            parent_itr->second->dependents.push_back(new_node);
418            auto num_depts = parent_itr->second->dependents.size();
419            maxDependents = std::max<double>(num_depts, maxDependents.value());
420        } else {
421            // The dependency is not found in the graph. So consider
422            // the execution of the parent is complete, i.e. remove this
423            // dependency.
424            a_dep = 0;
425            num_dep--;
426        }
427    }
428}
429
430void
431TraceCPU::ElasticDataGen::execute()
432{
433    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
434    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
435            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
436            depFreeQueue.size());
437    hwResource.printOccupancy();
438
439    // Read next window to make sure that dependents of all dep-free nodes
440    // are in the depGraph
441    if (nextRead) {
442        readNextWindow();
443        nextRead = false;
444    }
445
446    // First attempt to issue the pending dependency-free nodes held
447    // in depFreeQueue. If resources have become available for a node,
448    // then issue it, i.e. add the node to readyList.
449    while (!depFreeQueue.empty()) {
450        if (checkAndIssue(depFreeQueue.front(), false)) {
451            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
452                "%lli.\n", (depFreeQueue.front())->seqNum);
453            depFreeQueue.pop();
454        } else {
455            break;
456        }
457    }
458    // Proceed to execute from readyList
459    auto graph_itr = depGraph.begin();
460    auto free_itr = readyList.begin();
461    // Iterate through readyList until the next free node has its execute
462    // tick later than curTick or the end of readyList is reached
463    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
464
465        // Get pointer to the node to be executed
466        graph_itr = depGraph.find(free_itr->seqNum);
467        assert(graph_itr != depGraph.end());
468        GraphNode* node_ptr = graph_itr->second;
469
470        // If there is a retryPkt send that else execute the load
471        if (retryPkt) {
472            // The retryPkt must be the request that was created by the
473            // first node in the readyList.
474            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
475                panic("Retry packet's seqence number does not match "
476                      "the first node in the readyList.\n");
477            }
478            if (port.sendTimingReq(retryPkt)) {
479                ++numRetrySucceeded;
480                retryPkt = nullptr;
481            }
482        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
483            // If there is no retryPkt, attempt to send a memory request in
484            // case of a load or store node. If the send fails, executeMemReq()
485            // returns a packet pointer, which we save in retryPkt. In case of
486            // a comp node we don't do anything and simply continue as if the
487            // execution of the comp node succedded.
488            retryPkt = executeMemReq(node_ptr);
489        }
490        // If the retryPkt or a new load/store node failed, we exit from here
491        // as a retry from cache will bring the control to execute(). The
492        // first node in readyList then, will be the failed node.
493        if (retryPkt) {
494            break;
495        }
496
497        // Proceed to remove dependencies for the successfully executed node.
498        // If it is a load which is not strictly ordered and we sent a
499        // request for it successfully, we do not yet mark any register
500        // dependencies complete. But as per dependency modelling we need
501        // to mark ROB dependencies of load and non load/store nodes which
502        // are based on successful sending of the load as complete.
503        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
504            // If execute succeeded mark its dependents as complete
505            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
506                    "dependents..\n", node_ptr->seqNum);
507
508            auto child_itr = (node_ptr->dependents).begin();
509            while (child_itr != (node_ptr->dependents).end()) {
510                // ROB dependency of a store on a load must not be removed
511                // after load is sent but after response is received
512                if (!(*child_itr)->isStore() &&
513                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
514
515                    // Check if the child node has become dependency free
516                    if ((*child_itr)->numRobDep == 0 &&
517                        (*child_itr)->numRegDep == 0) {
518
519                        // Source dependencies are complete, check if
520                        // resources are available and issue
521                        checkAndIssue(*child_itr);
522                    }
523                    // Remove this child for the sent load and point to new
524                    // location of the element following the erased element
525                    child_itr = node_ptr->dependents.erase(child_itr);
526                } else {
527                    // This child is not dependency-free, point to the next
528                    // child
529                    child_itr++;
530                }
531            }
532        } else {
533            // If it is a strictly ordered load mark its dependents as complete
534            // as we do not send a request for this case. If it is a store or a
535            // comp node we also mark all its dependents complete.
536            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
537                    " up dependents..\n", node_ptr->seqNum);
538
539            for (auto child : node_ptr->dependents) {
540                // If the child node is dependency free removeDepOnInst()
541                // returns true.
542                if (child->removeDepOnInst(node_ptr->seqNum)) {
543                    // Source dependencies are complete, check if resources
544                    // are available and issue
545                    checkAndIssue(child);
546                }
547            }
548        }
549
550        // After executing the node, remove from readyList and delete node.
551        readyList.erase(free_itr);
552        // If it is a cacheable load which was sent, don't delete
553        // just yet.  Delete it in completeMemAccess() after the
554        // response is received. If it is an strictly ordered
555        // load, it was not sent and all dependencies were simply
556        // marked complete. Thus it is safe to delete it. For
557        // stores and non load/store nodes all dependencies were
558        // marked complete so it is safe to delete it.
559        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
560            // Release all resources occupied by the completed node
561            hwResource.release(node_ptr);
562            // clear the dynamically allocated set of dependents
563            (node_ptr->dependents).clear();
564            // Update the stat for numOps simulated
565            owner.updateNumOps(node_ptr->robNum);
566            // delete node
567            delete node_ptr;
568            // remove from graph
569            depGraph.erase(graph_itr);
570        }
571        // Point to first node to continue to next iteration of while loop
572        free_itr = readyList.begin();
573    } // end of while loop
574
575    // Print readyList, sizes of queues and resource status after updating
576    if (DTRACE(TraceCPUData)) {
577        printReadyList();
578        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
579        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
580                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
581                depFreeQueue.size());
582        hwResource.printOccupancy();
583    }
584
585    if (retryPkt) {
586        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
587                "event from the cache for seq. num %lli.\n",
588                retryPkt->req->getReqInstSeqNum());
589        return;
590    }
591    // If the size of the dependency graph is less than the dependency window
592    // then read from the trace file to populate the graph next time we are in
593    // execute.
594    if (depGraph.size() < windowSize && !traceComplete)
595        nextRead = true;
596
597    // If cache is not blocked, schedule an event for the first execTick in
598    // readyList else retry from cache will schedule the event. If the ready
599    // list is empty then check if the next pending node has resources
600    // available to issue. If yes, then schedule an event for the next cycle.
601    if (!readyList.empty()) {
602        Tick next_event_tick = std::max(readyList.begin()->execTick,
603                                        curTick());
604        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
605                next_event_tick);
606        owner.schedDcacheNextEvent(next_event_tick);
607    } else if (readyList.empty() && !depFreeQueue.empty() &&
608                hwResource.isAvailable(depFreeQueue.front())) {
609        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
610                owner.clockEdge(Cycles(1)));
611        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
612    }
613
614    // If trace is completely read, readyList is empty and depGraph is empty,
615    // set execComplete to true
616    if (depGraph.empty() && readyList.empty() && traceComplete &&
617        !hwResource.awaitingResponse()) {
618        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
619        execComplete = true;
620        dataLastTick = curTick();
621    }
622}
623
624PacketPtr
625TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
626{
627
628    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
629            "virt addr %d, pc %#x, size %d, flags %d).\n",
630            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
631            node_ptr->pc, node_ptr->size, node_ptr->flags);
632
633    // If the request is strictly ordered, do not send it. Just return nullptr
634    // as if it was succesfully sent.
635    if (node_ptr->isStrictlyOrdered()) {
636        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
637        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
638                node_ptr->seqNum);
639        return nullptr;
640    }
641
642    // Check if the request spans two cache lines as this condition triggers
643    // an assert fail in the L1 cache. If it does then truncate the size to
644    // access only until the end of that line and ignore the remainder. The
645    // stat counting this is useful to keep a check on how frequently this
646    // happens. If required the code could be revised to mimick splitting such
647    // a request into two.
648    unsigned blk_size = owner.cacheLineSize();
649    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
650    if (!(blk_offset + node_ptr->size <= blk_size)) {
651        node_ptr->size = blk_size - blk_offset;
652        ++numSplitReqs;
653    }
654
655    // Create a request and the packet containing request
656    auto req = std::make_shared<Request>(
657        node_ptr->physAddr, node_ptr->size,
658        node_ptr->flags, masterID, node_ptr->seqNum,
659        ContextID(0));
660
661    req->setPC(node_ptr->pc);
662    // If virtual address is valid, set the asid and virtual address fields
663    // of the request.
664    if (node_ptr->virtAddr != 0) {
665        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
666                        node_ptr->flags, masterID, node_ptr->pc);
667        req->setPaddr(node_ptr->physAddr);
668        req->setReqInstSeqNum(node_ptr->seqNum);
669    }
670
671    PacketPtr pkt;
672    uint8_t* pkt_data = new uint8_t[req->getSize()];
673    if (node_ptr->isLoad()) {
674        pkt = Packet::createRead(req);
675    } else {
676        pkt = Packet::createWrite(req);
677        memset(pkt_data, 0xA, req->getSize());
678    }
679    pkt->dataDynamic(pkt_data);
680
681    // Call MasterPort method to send a timing request for this packet
682    bool success = port.sendTimingReq(pkt);
683    ++numSendAttempted;
684
685    if (!success) {
686        // If it fails, return the packet to retry when a retry is signalled by
687        // the cache
688        ++numSendFailed;
689        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
690        return pkt;
691    } else {
692        // It is succeeds, return nullptr
693        ++numSendSucceeded;
694        return nullptr;
695    }
696}
697
698bool
699TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
700{
701    // Assert the node is dependency-free
702    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
703
704    // If this is the first attempt, print a debug message to indicate this.
705    if (first) {
706        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
707            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
708            node_ptr->robNum);
709    }
710
711    // Check if resources are available to issue the specific node
712    if (hwResource.isAvailable(node_ptr)) {
713        // If resources are free only then add to readyList
714        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
715            " to readyList, occupying resources.\n", node_ptr->seqNum);
716        // Compute the execute tick by adding the compute delay for the node
717        // and add the ready node to the ready list
718        addToSortedReadyList(node_ptr->seqNum,
719                                owner.clockEdge() + node_ptr->compDelay);
720        // Account for the resources taken up by this issued node.
721        hwResource.occupy(node_ptr);
722        return true;
723
724    } else {
725        if (first) {
726            // Although dependencies are complete, resources are not available.
727            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
728                " Adding to depFreeQueue.\n", node_ptr->seqNum);
729            depFreeQueue.push(node_ptr);
730        } else {
731            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
732                "Still pending issue.\n", node_ptr->seqNum);
733        }
734        return false;
735    }
736}
737
738void
739TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
740{
741    // Release the resources for this completed node.
742    if (pkt->isWrite()) {
743        // Consider store complete.
744        hwResource.releaseStoreBuffer();
745        // If it is a store response then do nothing since we do not model
746        // dependencies on store completion in the trace. But if we were
747        // blocking execution due to store buffer fullness, we need to schedule
748        // an event and attempt to progress.
749    } else {
750        // If it is a load response then release the dependents waiting on it.
751        // Get pointer to the completed load
752        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
753        assert(graph_itr != depGraph.end());
754        GraphNode* node_ptr = graph_itr->second;
755
756        // Release resources occupied by the load
757        hwResource.release(node_ptr);
758
759        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
760                " dependents..\n", node_ptr->seqNum);
761
762        for (auto child : node_ptr->dependents) {
763            if (child->removeDepOnInst(node_ptr->seqNum)) {
764                checkAndIssue(child);
765            }
766        }
767
768        // clear the dynamically allocated set of dependents
769        (node_ptr->dependents).clear();
770        // Update the stat for numOps completed
771        owner.updateNumOps(node_ptr->robNum);
772        // delete node
773        delete node_ptr;
774        // remove from graph
775        depGraph.erase(graph_itr);
776    }
777
778    if (DTRACE(TraceCPUData)) {
779        printReadyList();
780    }
781
782    // If the size of the dependency graph is less than the dependency window
783    // then read from the trace file to populate the graph next time we are in
784    // execute.
785    if (depGraph.size() < windowSize && !traceComplete)
786        nextRead = true;
787
788    // If not waiting for retry, attempt to schedule next event
789    if (!retryPkt) {
790        // We might have new dep-free nodes in the list which will have execute
791        // tick greater than or equal to curTick. But a new dep-free node might
792        // have its execute tick earlier. Therefore, attempt to reschedule. It
793        // could happen that the readyList is empty and we got here via a
794        // last remaining response. So, either the trace is complete or there
795        // are pending nodes in the depFreeQueue. The checking is done in the
796        // execute() control flow, so schedule an event to go via that flow.
797        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
798            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
799        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
800                next_event_tick);
801        owner.schedDcacheNextEvent(next_event_tick);
802    }
803}
804
805void
806TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
807                                                    Tick exec_tick)
808{
809    ReadyNode ready_node;
810    ready_node.seqNum = seq_num;
811    ready_node.execTick = exec_tick;
812
813    // Iterator to readyList
814    auto itr = readyList.begin();
815
816    // If the readyList is empty, simply insert the new node at the beginning
817    // and return
818    if (itr == readyList.end()) {
819        readyList.insert(itr, ready_node);
820        maxReadyListSize = std::max<double>(readyList.size(),
821                                              maxReadyListSize.value());
822        return;
823    }
824
825    // If the new node has its execution tick equal to the first node in the
826    // list then go to the next node. If the first node in the list failed
827    // to execute, its position as the first is thus maintained.
828    if (retryPkt)
829        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
830            itr++;
831
832    // Increment the iterator and compare the node pointed to by it to the new
833    // node till the position to insert the new node is found.
834    bool found = false;
835    while (!found && itr != readyList.end()) {
836        // If the execution tick of the new node is less than the node then
837        // this is the position to insert
838        if (exec_tick < itr->execTick)
839            found = true;
840        // If the execution tick of the new node is equal to the node then
841        // sort in ascending order of sequence numbers
842        else if (exec_tick == itr->execTick) {
843            // If the sequence number of the new node is less than the node
844            // then this is the position to insert
845            if (seq_num < itr->seqNum)
846                found = true;
847            // Else go to next node
848            else
849                itr++;
850        }
851        // If the execution tick of the new node is greater than the node then
852        // go to the next node
853        else
854            itr++;
855    }
856    readyList.insert(itr, ready_node);
857    // Update the stat for max size reached of the readyList
858    maxReadyListSize = std::max<double>(readyList.size(),
859                                          maxReadyListSize.value());
860}
861
862void
863TraceCPU::ElasticDataGen::printReadyList() {
864
865    auto itr = readyList.begin();
866    if (itr == readyList.end()) {
867        DPRINTF(TraceCPUData, "readyList is empty.\n");
868        return;
869    }
870    DPRINTF(TraceCPUData, "Printing readyList:\n");
871    while (itr != readyList.end()) {
872        auto graph_itr = depGraph.find(itr->seqNum);
873        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
874        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
875            node_ptr->typeToStr(), itr->execTick);
876        itr++;
877    }
878}
879
880TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
881    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
882  : sizeROB(max_rob),
883    sizeStoreBuffer(max_stores),
884    sizeLoadBuffer(max_loads),
885    oldestInFlightRobNum(UINT64_MAX),
886    numInFlightLoads(0),
887    numInFlightStores(0)
888{}
889
890void
891TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
892{
893    // Occupy ROB entry for the issued node
894    // Merely maintain the oldest node, i.e. numerically least robNum by saving
895    // it in the variable oldestInFLightRobNum.
896    inFlightNodes[new_node->seqNum] = new_node->robNum;
897    oldestInFlightRobNum = inFlightNodes.begin()->second;
898
899    // Occupy Load/Store Buffer entry for the issued node if applicable
900    if (new_node->isLoad()) {
901        ++numInFlightLoads;
902    } else if (new_node->isStore()) {
903        ++numInFlightStores;
904    } // else if it is a non load/store node, no buffer entry is occupied
905
906    printOccupancy();
907}
908
909void
910TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
911{
912    assert(!inFlightNodes.empty());
913    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
914        done_node->seqNum);
915
916    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
917    inFlightNodes.erase(done_node->seqNum);
918
919    if (inFlightNodes.empty()) {
920        // If we delete the only in-flight node and then the
921        // oldestInFlightRobNum is set to it's initialized (max) value.
922        oldestInFlightRobNum = UINT64_MAX;
923    } else {
924        // Set the oldest in-flight node rob number equal to the first node in
925        // the inFlightNodes since that will have the numerically least value.
926        oldestInFlightRobNum = inFlightNodes.begin()->second;
927    }
928
929    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
930        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
931        oldestInFlightRobNum);
932
933    // A store is considered complete when a request is sent, thus ROB entry is
934    // freed. But it occupies an entry in the Store Buffer until its response
935    // is received. A load is considered complete when a response is received,
936    // thus both ROB and Load Buffer entries can be released.
937    if (done_node->isLoad()) {
938        assert(numInFlightLoads != 0);
939        --numInFlightLoads;
940    }
941    // For normal writes, we send the requests out and clear a store buffer
942    // entry on response. For writes which are strictly ordered, for e.g.
943    // writes to device registers, we do that within release() which is called
944    // when node is executed and taken off from readyList.
945    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
946        releaseStoreBuffer();
947    }
948}
949
950void
951TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
952{
953    assert(numInFlightStores != 0);
954    --numInFlightStores;
955}
956
957bool
958TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
959    const GraphNode* new_node) const
960{
961    uint16_t num_in_flight_nodes;
962    if (inFlightNodes.empty()) {
963        num_in_flight_nodes = 0;
964        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
965            " #in-flight nodes = 0", new_node->seqNum);
966    } else if (new_node->robNum > oldestInFlightRobNum) {
967        // This is the intuitive case where new dep-free node is younger
968        // instruction than the oldest instruction in-flight. Thus we make sure
969        // in_flight_nodes does not overflow.
970        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
971        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
973             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
974    } else {
975        // This is the case where an instruction older than the oldest in-
976        // flight instruction becomes dep-free. Thus we must have already
977        // accounted for the entry in ROB for this new dep-free node.
978        // Immediately after this check returns true, oldestInFlightRobNum will
979        // be updated in occupy(). We simply let this node issue now.
980        num_in_flight_nodes = 0;
981        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
982            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
983            new_node->seqNum, new_node->robNum);
984    }
985    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
986        numInFlightLoads, sizeLoadBuffer,
987        numInFlightStores, sizeStoreBuffer);
988    // Check if resources are available to issue the specific node
989    if (num_in_flight_nodes >= sizeROB) {
990        return false;
991    }
992    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
993        return false;
994    }
995    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
996        return false;
997    }
998    return true;
999}
1000
1001bool
1002TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1003    // Return true if there is at least one read or write request in flight
1004    return (numInFlightStores != 0 || numInFlightLoads != 0);
1005}
1006
1007void
1008TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1009    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1010            "LQ = %d/%d, SQ  = %d/%d.\n",
1011            oldestInFlightRobNum,
1012            numInFlightLoads, sizeLoadBuffer,
1013            numInFlightStores, sizeStoreBuffer);
1014}
1015
1016void
1017TraceCPU::FixedRetryGen::regStats()
1018{
1019    using namespace Stats;
1020
1021    numSendAttempted
1022    .name(name() + ".numSendAttempted")
1023    .desc("Number of first attempts to send a request")
1024    ;
1025
1026    numSendSucceeded
1027    .name(name() + ".numSendSucceeded")
1028    .desc("Number of successful first attempts")
1029    ;
1030
1031    numSendFailed
1032    .name(name() + ".numSendFailed")
1033    .desc("Number of failed first attempts")
1034    ;
1035
1036    numRetrySucceeded
1037    .name(name() + ".numRetrySucceeded")
1038    .desc("Number of successful retries")
1039    ;
1040
1041    instLastTick
1042    .name(name() + ".instLastTick")
1043    .desc("Last tick simulated from the fixed inst trace")
1044    ;
1045}
1046
1047Tick
1048TraceCPU::FixedRetryGen::init()
1049{
1050    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1051            " IcacheGen: fixed issue with retry.\n");
1052
1053    if (nextExecute()) {
1054        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1055        return currElement.tick;
1056    } else {
1057        panic("Read of first message in the trace failed.\n");
1058        return MaxTick;
1059    }
1060}
1061
1062bool
1063TraceCPU::FixedRetryGen::tryNext()
1064{
1065    // If there is a retry packet, try to send it
1066    if (retryPkt) {
1067
1068        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1069
1070        if (!port.sendTimingReq(retryPkt)) {
1071            // Still blocked! This should never occur.
1072            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1073            return false;
1074        }
1075        ++numRetrySucceeded;
1076    } else {
1077
1078        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1079
1080        // try sending current element
1081        assert(currElement.isValid());
1082
1083        ++numSendAttempted;
1084
1085        if (!send(currElement.addr, currElement.blocksize,
1086                    currElement.cmd, currElement.flags, currElement.pc)) {
1087            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1088            ++numSendFailed;
1089            // return false to indicate not to schedule next event
1090            return false;
1091        } else {
1092            ++numSendSucceeded;
1093        }
1094    }
1095    // If packet was sent successfully, either retryPkt or currElement, return
1096    // true to indicate to schedule event at current Tick plus delta. If packet
1097    // was sent successfully and there is no next packet to send, return false.
1098    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1099        "element.\n");
1100    retryPkt = nullptr;
1101    // Read next element into currElement, currElement gets cleared so save the
1102    // tick to calculate delta
1103    Tick last_tick = currElement.tick;
1104    if (nextExecute()) {
1105        assert(currElement.tick >= last_tick);
1106        delta = currElement.tick - last_tick;
1107    }
1108    return !traceComplete;
1109}
1110
1111void
1112TraceCPU::FixedRetryGen::exit()
1113{
1114    trace.reset();
1115}
1116
1117bool
1118TraceCPU::FixedRetryGen::nextExecute()
1119{
1120    if (traceComplete)
1121        // We are at the end of the file, thus we have no more messages.
1122        // Return false.
1123        return false;
1124
1125
1126    //Reset the currElement to the default values
1127    currElement.clear();
1128
1129    // Read the next line to get the next message. If that fails then end of
1130    // trace has been reached and traceComplete needs to be set in addition
1131    // to returning false. If successful then next message is in currElement.
1132    if (!trace.read(&currElement)) {
1133        traceComplete = true;
1134        instLastTick = curTick();
1135        return false;
1136    }
1137
1138    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1139            currElement.cmd.isRead() ? 'r' : 'w',
1140            currElement.addr,
1141            currElement.pc,
1142            currElement.blocksize,
1143            currElement.tick);
1144
1145    return true;
1146}
1147
1148bool
1149TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1150              Request::FlagsType flags, Addr pc)
1151{
1152
1153    // Create new request
1154    auto req = std::make_shared<Request>(addr, size, flags, masterID);
1155    req->setPC(pc);
1156
1157    // If this is not done it triggers assert in L1 cache for invalid contextId
1158    req->setContext(ContextID(0));
1159
1160    // Embed it in a packet
1161    PacketPtr pkt = new Packet(req, cmd);
1162
1163    uint8_t* pkt_data = new uint8_t[req->getSize()];
1164    pkt->dataDynamic(pkt_data);
1165
1166    if (cmd.isWrite()) {
1167        memset(pkt_data, 0xA, req->getSize());
1168    }
1169
1170    // Call MasterPort method to send a timing request for this packet
1171    bool success = port.sendTimingReq(pkt);
1172    if (!success) {
1173        // If it fails, save the packet to retry when a retry is signalled by
1174        // the cache
1175        retryPkt = pkt;
1176    }
1177    return success;
1178}
1179
1180void
1181TraceCPU::icacheRetryRecvd()
1182{
1183    // Schedule an event to go through the control flow in the same tick as
1184    // retry is received
1185    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1186            " event @%lli.\n", curTick());
1187    schedule(icacheNextEvent, curTick());
1188}
1189
1190void
1191TraceCPU::dcacheRetryRecvd()
1192{
1193    // Schedule an event to go through the execute flow in the same tick as
1194    // retry is received
1195    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1196            " event @%lli.\n", curTick());
1197    schedule(dcacheNextEvent, curTick());
1198}
1199
1200void
1201TraceCPU::schedDcacheNextEvent(Tick when)
1202{
1203    if (!dcacheNextEvent.scheduled()) {
1204        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1205                when);
1206        schedule(dcacheNextEvent, when);
1207        ++numSchedDcacheEvent;
1208    } else if (when < dcacheNextEvent.when()) {
1209        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1210                " to %lli.\n", dcacheNextEvent.when(), when);
1211        reschedule(dcacheNextEvent, when);
1212    }
1213
1214}
1215
1216bool
1217TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1218{
1219    // All responses on the instruction fetch side are ignored. Simply delete
1220    // the packet to free allocated memory
1221    delete pkt;
1222
1223    return true;
1224}
1225
1226void
1227TraceCPU::IcachePort::recvReqRetry()
1228{
1229    owner->icacheRetryRecvd();
1230}
1231
1232void
1233TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1234{
1235    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1236    dcacheGen.completeMemAccess(pkt);
1237}
1238
1239bool
1240TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1241{
1242    // Handle the responses for data memory requests which is done inside the
1243    // elastic data generator
1244    owner->dcacheRecvTimingResp(pkt);
1245    // After processing the response delete the packet to free
1246    // memory
1247    delete pkt;
1248
1249    return true;
1250}
1251
1252void
1253TraceCPU::DcachePort::recvReqRetry()
1254{
1255    owner->dcacheRetryRecvd();
1256}
1257
1258TraceCPU::ElasticDataGen::InputStream::InputStream(
1259    const std::string& filename,
1260    const double time_multiplier)
1261    : trace(filename),
1262      timeMultiplier(time_multiplier),
1263      microOpCount(0)
1264{
1265    // Create a protobuf message for the header and read it from the stream
1266    ProtoMessage::InstDepRecordHeader header_msg;
1267    if (!trace.read(header_msg)) {
1268        panic("Failed to read packet header from %s\n", filename);
1269
1270        if (header_msg.tick_freq() != SimClock::Frequency) {
1271            panic("Trace %s was recorded with a different tick frequency %d\n",
1272                  header_msg.tick_freq());
1273        }
1274    } else {
1275        // Assign window size equal to the field in the trace that was recorded
1276        // when the data dependency trace was captured in the o3cpu model
1277        windowSize = header_msg.window_size();
1278    }
1279}
1280
1281void
1282TraceCPU::ElasticDataGen::InputStream::reset()
1283{
1284    trace.reset();
1285}
1286
1287bool
1288TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1289{
1290    ProtoMessage::InstDepRecord pkt_msg;
1291    if (trace.read(pkt_msg)) {
1292        // Required fields
1293        element->seqNum = pkt_msg.seq_num();
1294        element->type = pkt_msg.type();
1295        // Scale the compute delay to effectively scale the Trace CPU frequency
1296        element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1297
1298        // Repeated field robDepList
1299        element->clearRobDep();
1300        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1301        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1302            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1303            element->numRobDep += 1;
1304        }
1305
1306        // Repeated field
1307        element->clearRegDep();
1308        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1309        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1310            // There is a possibility that an instruction has both, a register
1311            // and order dependency on an instruction. In such a case, the
1312            // register dependency is omitted
1313            bool duplicate = false;
1314            for (int j = 0; j < element->numRobDep; j++) {
1315                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1316            }
1317            if (!duplicate) {
1318                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1319                element->numRegDep += 1;
1320            }
1321        }
1322
1323        // Optional fields
1324        if (pkt_msg.has_p_addr())
1325            element->physAddr = pkt_msg.p_addr();
1326        else
1327            element->physAddr = 0;
1328
1329        if (pkt_msg.has_v_addr())
1330            element->virtAddr = pkt_msg.v_addr();
1331        else
1332            element->virtAddr = 0;
1333
1334        if (pkt_msg.has_asid())
1335            element->asid = pkt_msg.asid();
1336        else
1337            element->asid = 0;
1338
1339        if (pkt_msg.has_size())
1340            element->size = pkt_msg.size();
1341        else
1342            element->size = 0;
1343
1344        if (pkt_msg.has_flags())
1345            element->flags = pkt_msg.flags();
1346        else
1347            element->flags = 0;
1348
1349        if (pkt_msg.has_pc())
1350            element->pc = pkt_msg.pc();
1351        else
1352            element->pc = 0;
1353
1354        // ROB occupancy number
1355        ++microOpCount;
1356        if (pkt_msg.has_weight()) {
1357            microOpCount += pkt_msg.weight();
1358        }
1359        element->robNum = microOpCount;
1360        return true;
1361    }
1362
1363    // We have reached the end of the file
1364    return false;
1365}
1366
1367bool
1368TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1369{
1370    for (auto& own_reg_dep : regDep) {
1371        if (own_reg_dep == reg_dep) {
1372            // If register dependency is found, make it zero and return true
1373            own_reg_dep = 0;
1374            assert(numRegDep > 0);
1375            --numRegDep;
1376            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1377                    "done.\n", seqNum, reg_dep);
1378            return true;
1379        }
1380    }
1381
1382    // Return false if the dependency is not found
1383    return false;
1384}
1385
1386bool
1387TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1388{
1389    for (auto& own_rob_dep : robDep) {
1390        if (own_rob_dep == rob_dep) {
1391            // If the rob dependency is found, make it zero and return true
1392            own_rob_dep = 0;
1393            assert(numRobDep > 0);
1394            --numRobDep;
1395            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1396                "done.\n", seqNum, rob_dep);
1397            return true;
1398        }
1399    }
1400    return false;
1401}
1402
1403void
1404TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1405    for (auto& own_reg_dep : regDep) {
1406        own_reg_dep = 0;
1407    }
1408    numRegDep = 0;
1409}
1410
1411void
1412TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1413    for (auto& own_rob_dep : robDep) {
1414        own_rob_dep = 0;
1415    }
1416    numRobDep = 0;
1417}
1418
1419bool
1420TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1421{
1422    // If it is an rob dependency then remove it
1423    if (!removeRobDep(done_seq_num)) {
1424        // If it is not an rob dependency then it must be a register dependency
1425        // If the register dependency is not found, it violates an assumption
1426        // and must be caught by assert.
1427        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1428        assert(regdep_found);
1429    }
1430    // Return true if the node is dependency free
1431    return (numRobDep == 0 && numRegDep == 0);
1432}
1433
1434void
1435TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1436{
1437    DPRINTFR(TraceCPUData, "%lli", seqNum);
1438    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1439    if (isLoad() || isStore()) {
1440        DPRINTFR(TraceCPUData, ",%i", physAddr);
1441        DPRINTFR(TraceCPUData, ",%i", size);
1442        DPRINTFR(TraceCPUData, ",%i", flags);
1443    }
1444    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1445    int i = 0;
1446    DPRINTFR(TraceCPUData, "robDep:");
1447    while (robDep[i] != 0) {
1448        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1449        i++;
1450    }
1451    i = 0;
1452    DPRINTFR(TraceCPUData, "regDep:");
1453    while (regDep[i] != 0) {
1454        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1455        i++;
1456    }
1457    auto child_itr = dependents.begin();
1458    DPRINTFR(TraceCPUData, "dependents:");
1459    while (child_itr != dependents.end()) {
1460        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1461        child_itr++;
1462    }
1463
1464    DPRINTFR(TraceCPUData, "\n");
1465}
1466
1467std::string
1468TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1469{
1470    return Record::RecordType_Name(type);
1471}
1472
1473TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1474    : trace(filename)
1475{
1476    // Create a protobuf message for the header and read it from the stream
1477    ProtoMessage::PacketHeader header_msg;
1478    if (!trace.read(header_msg)) {
1479        panic("Failed to read packet header from %s\n", filename);
1480
1481        if (header_msg.tick_freq() != SimClock::Frequency) {
1482            panic("Trace %s was recorded with a different tick frequency %d\n",
1483                  header_msg.tick_freq());
1484        }
1485    }
1486}
1487
1488void
1489TraceCPU::FixedRetryGen::InputStream::reset()
1490{
1491    trace.reset();
1492}
1493
1494bool
1495TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1496{
1497    ProtoMessage::Packet pkt_msg;
1498    if (trace.read(pkt_msg)) {
1499        element->cmd = pkt_msg.cmd();
1500        element->addr = pkt_msg.addr();
1501        element->blocksize = pkt_msg.size();
1502        element->tick = pkt_msg.tick();
1503        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1504        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1505        return true;
1506    }
1507
1508    // We have reached the end of the file
1509    return false;
1510}
1511