trace_cpu.cc revision 11631:6d147afa8fc6
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50    :   BaseCPU(params),
51        icachePort(this),
52        dcachePort(this),
53        instMasterID(params->system->getMasterId(name() + ".inst")),
54        dataMasterID(params->system->getMasterId(name() + ".data")),
55        instTraceFile(params->instTraceFile),
56        dataTraceFile(params->dataTraceFile),
57        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59                  params),
60        icacheNextEvent(this),
61        dcacheNextEvent(this),
62        oneTraceComplete(false),
63        firstFetchTick(0),
64        execCompleteEvent(nullptr)
65{
66    // Increment static counter for number of Trace CPUs.
67    ++TraceCPU::numTraceCPUs;
68
69    // Check that the python parameters for sizes of ROB, store buffer and load
70    // buffer do not overflow the corresponding C++ variables.
71    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
72                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
73    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
74                "exceeds the max. value of %d.\n", params->sizeROB,
75                UINT16_MAX);
76    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
77                " %d exceeds the max. value of %d.\n",
78                params->sizeLoadBuffer, UINT16_MAX);
79}
80
81TraceCPU::~TraceCPU()
82{
83
84}
85
86TraceCPU*
87TraceCPUParams::create()
88{
89    return new TraceCPU(this);
90}
91
92void
93TraceCPU::takeOverFrom(BaseCPU *oldCPU)
94{
95    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
96    assert(!getInstPort().isConnected());
97    assert(oldCPU->getInstPort().isConnected());
98    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
99    oldCPU->getInstPort().unbind();
100    getInstPort().bind(inst_peer_port);
101
102    assert(!getDataPort().isConnected());
103    assert(oldCPU->getDataPort().isConnected());
104    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
105    oldCPU->getDataPort().unbind();
106    getDataPort().bind(data_peer_port);
107}
108
109void
110TraceCPU::init()
111{
112    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
113            "\n", instTraceFile);
114    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
115            dataTraceFile);
116
117    BaseCPU::init();
118
119    // Get the send tick of the first instruction read request and schedule
120    // icacheNextEvent at that tick.
121    Tick first_icache_tick = icacheGen.init();
122    schedule(icacheNextEvent, first_icache_tick);
123
124    // Get the send tick of the first data read/write request and schedule
125    // dcacheNextEvent at that tick.
126    Tick first_dcache_tick = dcacheGen.init();
127    schedule(dcacheNextEvent, first_dcache_tick);
128
129    // The static counter for number of Trace CPUs is correctly set at this
130    // point so create an event and pass it.
131    execCompleteEvent = new CountedExitEvent("end of all traces reached.",
132                                                numTraceCPUs);
133    // Save the first fetch request tick to dump it as tickOffset
134    firstFetchTick = first_icache_tick;
135}
136
137void
138TraceCPU::schedIcacheNext()
139{
140    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
141
142    // Try to send the current packet or a retry packet if there is one
143    bool sched_next = icacheGen.tryNext();
144    // If packet sent successfully, schedule next event
145    if (sched_next) {
146        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
147                "at %d.\n", curTick() + icacheGen.tickDelta());
148        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
149        ++numSchedIcacheEvent;
150    } else {
151        // check if traceComplete. If not, do nothing because sending failed
152        // and next event will be scheduled via RecvRetry()
153        if (icacheGen.isTraceComplete()) {
154            // If this is the first trace to complete, set the variable. If it
155            // is already set then both traces are complete to exit sim.
156            checkAndSchedExitEvent();
157        }
158    }
159    return;
160}
161
162void
163TraceCPU::schedDcacheNext()
164{
165    DPRINTF(TraceCPUData, "DcacheGen event.\n");
166
167    dcacheGen.execute();
168    if (dcacheGen.isExecComplete()) {
169        checkAndSchedExitEvent();
170    }
171}
172
173void
174TraceCPU::checkAndSchedExitEvent()
175{
176    if (!oneTraceComplete) {
177        oneTraceComplete = true;
178    } else {
179        // Schedule event to indicate execution is complete as both
180        // instruction and data access traces have been played back.
181        inform("%s: Execution complete.\n", name());
182
183        // Record stats which are computed at the end of simulation
184        tickOffset = firstFetchTick;
185        numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
186        numOps = dcacheGen.getMicroOpCount();
187        schedule(*execCompleteEvent, curTick());
188    }
189}
190
191void
192TraceCPU::regStats()
193{
194
195    BaseCPU::regStats();
196
197    numSchedDcacheEvent
198    .name(name() + ".numSchedDcacheEvent")
199    .desc("Number of events scheduled to trigger data request generator")
200    ;
201
202    numSchedIcacheEvent
203    .name(name() + ".numSchedIcacheEvent")
204    .desc("Number of events scheduled to trigger instruction request generator")
205    ;
206
207    numOps
208    .name(name() + ".numOps")
209    .desc("Number of micro-ops simulated by the Trace CPU")
210    ;
211
212    cpi
213    .name(name() + ".cpi")
214    .desc("Cycles per micro-op used as a proxy for CPI")
215    .precision(6)
216    ;
217    cpi = numCycles/numOps;
218
219    tickOffset
220    .name(name() + ".tickOffset")
221    .desc("The first execution tick for the root node of elastic traces")
222    ;
223
224    icacheGen.regStats();
225    dcacheGen.regStats();
226}
227
228void
229TraceCPU::ElasticDataGen::regStats()
230{
231    using namespace Stats;
232
233    maxDependents
234    .name(name() + ".maxDependents")
235    .desc("Max number of dependents observed on a node")
236    ;
237
238    maxReadyListSize
239    .name(name() + ".maxReadyListSize")
240    .desc("Max size of the ready list observed")
241    ;
242
243    numSendAttempted
244    .name(name() + ".numSendAttempted")
245    .desc("Number of first attempts to send a request")
246    ;
247
248    numSendSucceeded
249    .name(name() + ".numSendSucceeded")
250    .desc("Number of successful first attempts")
251    ;
252
253    numSendFailed
254    .name(name() + ".numSendFailed")
255    .desc("Number of failed first attempts")
256    ;
257
258    numRetrySucceeded
259    .name(name() + ".numRetrySucceeded")
260    .desc("Number of successful retries")
261    ;
262
263    numSplitReqs
264    .name(name() + ".numSplitReqs")
265    .desc("Number of split requests")
266    ;
267
268    numSOLoads
269    .name(name() + ".numSOLoads")
270    .desc("Number of strictly ordered loads")
271    ;
272
273    numSOStores
274    .name(name() + ".numSOStores")
275    .desc("Number of strictly ordered stores")
276    ;
277
278    dataLastTick
279    .name(name() + ".dataLastTick")
280    .desc("Last tick simulated from the elastic data trace")
281    ;
282}
283
284Tick
285TraceCPU::ElasticDataGen::init()
286{
287    DPRINTF(TraceCPUData, "Initializing data memory request generator "
288            "DcacheGen: elastic issue with retry.\n");
289
290    if (!readNextWindow())
291        panic("Trace has %d elements. It must have at least %d elements.\n",
292              depGraph.size(), 2 * windowSize);
293    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
294            depGraph.size());
295
296    if (!readNextWindow())
297        panic("Trace has %d elements. It must have at least %d elements.\n",
298              depGraph.size(), 2 * windowSize);
299    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
300            depGraph.size());
301
302    // Print readyList
303    if (DTRACE(TraceCPUData)) {
304        printReadyList();
305    }
306    auto free_itr = readyList.begin();
307    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
308            " is %d.\n", free_itr->seqNum, free_itr->execTick);
309    // Return the execute tick of the earliest ready node so that an event
310    // can be scheduled to call execute()
311    return (free_itr->execTick);
312}
313
314void
315TraceCPU::ElasticDataGen::exit()
316{
317    trace.reset();
318}
319
320bool
321TraceCPU::ElasticDataGen::readNextWindow()
322{
323
324    // Read and add next window
325    DPRINTF(TraceCPUData, "Reading next window from file.\n");
326
327    if (traceComplete) {
328        // We are at the end of the file, thus we have no more records.
329        // Return false.
330        return false;
331    }
332
333    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
334            depGraph.size());
335
336    uint32_t num_read = 0;
337    while (num_read != windowSize) {
338
339        // Create a new graph node
340        GraphNode* new_node = new GraphNode;
341
342        // Read the next line to get the next record. If that fails then end of
343        // trace has been reached and traceComplete needs to be set in addition
344        // to returning false.
345        if (!trace.read(new_node)) {
346            DPRINTF(TraceCPUData, "\tTrace complete!\n");
347            traceComplete = true;
348            return false;
349        }
350
351        // Annotate the ROB dependencies of the new node onto the parent nodes.
352        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
353        // Annotate the register dependencies of the new node onto the parent
354        // nodes.
355        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
356
357        num_read++;
358        // Add to map
359        depGraph[new_node->seqNum] = new_node;
360        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
361            // Source dependencies are already complete, check if resources
362            // are available and issue. The execution time is approximated
363            // to current time plus the computational delay.
364            checkAndIssue(new_node);
365        }
366    }
367
368    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
369            depGraph.size());
370    return true;
371}
372
373template<typename T> void
374TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
375                                            T& dep_array, uint8_t& num_dep)
376{
377    for (auto& a_dep : dep_array) {
378        // The convention is to set the dependencies starting with the first
379        // index in the ROB and register dependency arrays. Thus, when we reach
380        // a dependency equal to the initialisation value of zero, we know have
381        // iterated over all dependencies and can break.
382        if (a_dep == 0)
383            break;
384        // We look up the valid dependency, i.e. the parent of this node
385        auto parent_itr = depGraph.find(a_dep);
386        if (parent_itr != depGraph.end()) {
387            // If the parent is found, it is yet to be executed. Append a
388            // pointer to the new node to the dependents list of the parent
389            // node.
390            parent_itr->second->dependents.push_back(new_node);
391            auto num_depts = parent_itr->second->dependents.size();
392            maxDependents = std::max<double>(num_depts, maxDependents.value());
393        } else {
394            // The dependency is not found in the graph. So consider
395            // the execution of the parent is complete, i.e. remove this
396            // dependency.
397            a_dep = 0;
398            num_dep--;
399        }
400    }
401}
402
403void
404TraceCPU::ElasticDataGen::execute()
405{
406    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
407    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
408            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
409            depFreeQueue.size());
410    hwResource.printOccupancy();
411
412    // Read next window to make sure that dependents of all dep-free nodes
413    // are in the depGraph
414    if (nextRead) {
415        readNextWindow();
416        nextRead = false;
417    }
418
419    // First attempt to issue the pending dependency-free nodes held
420    // in depFreeQueue. If resources have become available for a node,
421    // then issue it, i.e. add the node to readyList.
422    while (!depFreeQueue.empty()) {
423        if (checkAndIssue(depFreeQueue.front(), false)) {
424            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
425                "%lli.\n", (depFreeQueue.front())->seqNum);
426            depFreeQueue.pop();
427        } else {
428            break;
429        }
430    }
431    // Proceed to execute from readyList
432    auto graph_itr = depGraph.begin();
433    auto free_itr = readyList.begin();
434    // Iterate through readyList until the next free node has its execute
435    // tick later than curTick or the end of readyList is reached
436    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
437
438        // Get pointer to the node to be executed
439        graph_itr = depGraph.find(free_itr->seqNum);
440        assert(graph_itr != depGraph.end());
441        GraphNode* node_ptr = graph_itr->second;
442
443        // If there is a retryPkt send that else execute the load
444        if (retryPkt) {
445            // The retryPkt must be the request that was created by the
446            // first node in the readyList.
447            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
448                panic("Retry packet's seqence number does not match "
449                      "the first node in the readyList.\n");
450            }
451            if (port.sendTimingReq(retryPkt)) {
452                ++numRetrySucceeded;
453                retryPkt = nullptr;
454            }
455        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
456            // If there is no retryPkt, attempt to send a memory request in
457            // case of a load or store node. If the send fails, executeMemReq()
458            // returns a packet pointer, which we save in retryPkt. In case of
459            // a comp node we don't do anything and simply continue as if the
460            // execution of the comp node succedded.
461            retryPkt = executeMemReq(node_ptr);
462        }
463        // If the retryPkt or a new load/store node failed, we exit from here
464        // as a retry from cache will bring the control to execute(). The
465        // first node in readyList then, will be the failed node.
466        if (retryPkt) {
467            break;
468        }
469
470        // Proceed to remove dependencies for the successfully executed node.
471        // If it is a load which is not strictly ordered and we sent a
472        // request for it successfully, we do not yet mark any register
473        // dependencies complete. But as per dependency modelling we need
474        // to mark ROB dependencies of load and non load/store nodes which
475        // are based on successful sending of the load as complete.
476        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
477            // If execute succeeded mark its dependents as complete
478            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
479                    "dependents..\n", node_ptr->seqNum);
480
481            auto child_itr = (node_ptr->dependents).begin();
482            while (child_itr != (node_ptr->dependents).end()) {
483                // ROB dependency of a store on a load must not be removed
484                // after load is sent but after response is received
485                if (!(*child_itr)->isStore() &&
486                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
487
488                    // Check if the child node has become dependency free
489                    if ((*child_itr)->numRobDep == 0 &&
490                        (*child_itr)->numRegDep == 0) {
491
492                        // Source dependencies are complete, check if
493                        // resources are available and issue
494                        checkAndIssue(*child_itr);
495                    }
496                    // Remove this child for the sent load and point to new
497                    // location of the element following the erased element
498                    child_itr = node_ptr->dependents.erase(child_itr);
499                } else {
500                    // This child is not dependency-free, point to the next
501                    // child
502                    child_itr++;
503                }
504            }
505        } else {
506            // If it is a strictly ordered load mark its dependents as complete
507            // as we do not send a request for this case. If it is a store or a
508            // comp node we also mark all its dependents complete.
509            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
510                    " up dependents..\n", node_ptr->seqNum);
511
512            for (auto child : node_ptr->dependents) {
513                // If the child node is dependency free removeDepOnInst()
514                // returns true.
515                if (child->removeDepOnInst(node_ptr->seqNum)) {
516                    // Source dependencies are complete, check if resources
517                    // are available and issue
518                    checkAndIssue(child);
519                }
520            }
521        }
522
523        // After executing the node, remove from readyList and delete node.
524        readyList.erase(free_itr);
525        // If it is a cacheable load which was sent, don't delete
526        // just yet.  Delete it in completeMemAccess() after the
527        // response is received. If it is an strictly ordered
528        // load, it was not sent and all dependencies were simply
529        // marked complete. Thus it is safe to delete it. For
530        // stores and non load/store nodes all dependencies were
531        // marked complete so it is safe to delete it.
532        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
533            // Release all resources occupied by the completed node
534            hwResource.release(node_ptr);
535            // clear the dynamically allocated set of dependents
536            (node_ptr->dependents).clear();
537            // delete node
538            delete node_ptr;
539            // remove from graph
540            depGraph.erase(graph_itr);
541        }
542        // Point to first node to continue to next iteration of while loop
543        free_itr = readyList.begin();
544    } // end of while loop
545
546    // Print readyList, sizes of queues and resource status after updating
547    if (DTRACE(TraceCPUData)) {
548        printReadyList();
549        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
550        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
551                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
552                depFreeQueue.size());
553        hwResource.printOccupancy();
554    }
555
556    if (retryPkt) {
557        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
558                "event from the cache for seq. num %lli.\n",
559                retryPkt->req->getReqInstSeqNum());
560        return;
561    }
562    // If the size of the dependency graph is less than the dependency window
563    // then read from the trace file to populate the graph next time we are in
564    // execute.
565    if (depGraph.size() < windowSize && !traceComplete)
566        nextRead = true;
567
568    // If cache is not blocked, schedule an event for the first execTick in
569    // readyList else retry from cache will schedule the event. If the ready
570    // list is empty then check if the next pending node has resources
571    // available to issue. If yes, then schedule an event for the next cycle.
572    if (!readyList.empty()) {
573        Tick next_event_tick = std::max(readyList.begin()->execTick,
574                                        curTick());
575        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
576                next_event_tick);
577        owner.schedDcacheNextEvent(next_event_tick);
578    } else if (readyList.empty() && !depFreeQueue.empty() &&
579                hwResource.isAvailable(depFreeQueue.front())) {
580        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
581                owner.clockEdge(Cycles(1)));
582        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
583    }
584
585    // If trace is completely read, readyList is empty and depGraph is empty,
586    // set execComplete to true
587    if (depGraph.empty() && readyList.empty() && traceComplete &&
588        !hwResource.awaitingResponse()) {
589        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
590        execComplete = true;
591        dataLastTick = curTick();
592    }
593}
594
595PacketPtr
596TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
597{
598
599    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
600            "virt addr %d, pc %#x, size %d, flags %d).\n",
601            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
602            node_ptr->pc, node_ptr->size, node_ptr->flags);
603
604    // If the request is strictly ordered, do not send it. Just return nullptr
605    // as if it was succesfully sent.
606    if (node_ptr->isStrictlyOrdered()) {
607        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
608        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
609                node_ptr->seqNum);
610        return nullptr;
611    }
612
613    // Check if the request spans two cache lines as this condition triggers
614    // an assert fail in the L1 cache. If it does then truncate the size to
615    // access only until the end of that line and ignore the remainder. The
616    // stat counting this is useful to keep a check on how frequently this
617    // happens. If required the code could be revised to mimick splitting such
618    // a request into two.
619    unsigned blk_size = owner.cacheLineSize();
620    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
621    if (!(blk_offset + node_ptr->size <= blk_size)) {
622        node_ptr->size = blk_size - blk_offset;
623        ++numSplitReqs;
624    }
625
626    // Create a request and the packet containing request
627    Request* req = new Request(node_ptr->physAddr, node_ptr->size,
628                               node_ptr->flags, masterID, node_ptr->seqNum,
629                               ContextID(0));
630    req->setPC(node_ptr->pc);
631    // If virtual address is valid, set the asid and virtual address fields
632    // of the request.
633    if (node_ptr->virtAddr != 0) {
634        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
635                        node_ptr->flags, masterID, node_ptr->pc);
636        req->setPaddr(node_ptr->physAddr);
637        req->setReqInstSeqNum(node_ptr->seqNum);
638    }
639
640    PacketPtr pkt;
641    uint8_t* pkt_data = new uint8_t[req->getSize()];
642    if (node_ptr->isLoad()) {
643        pkt = Packet::createRead(req);
644    } else {
645        pkt = Packet::createWrite(req);
646        memset(pkt_data, 0xA, req->getSize());
647    }
648    pkt->dataDynamic(pkt_data);
649
650    // Call MasterPort method to send a timing request for this packet
651    bool success = port.sendTimingReq(pkt);
652    ++numSendAttempted;
653
654    if (!success) {
655        // If it fails, return the packet to retry when a retry is signalled by
656        // the cache
657        ++numSendFailed;
658        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
659        return pkt;
660    } else {
661        // It is succeeds, return nullptr
662        ++numSendSucceeded;
663        return nullptr;
664    }
665}
666
667bool
668TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
669{
670    // Assert the node is dependency-free
671    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
672
673    // If this is the first attempt, print a debug message to indicate this.
674    if (first) {
675        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
676            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
677            node_ptr->robNum);
678    }
679
680    // Check if resources are available to issue the specific node
681    if (hwResource.isAvailable(node_ptr)) {
682        // If resources are free only then add to readyList
683        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
684            " to readyList, occupying resources.\n", node_ptr->seqNum);
685        // Compute the execute tick by adding the compute delay for the node
686        // and add the ready node to the ready list
687        addToSortedReadyList(node_ptr->seqNum,
688                                owner.clockEdge() + node_ptr->compDelay);
689        // Account for the resources taken up by this issued node.
690        hwResource.occupy(node_ptr);
691        return true;
692
693    } else {
694        if (first) {
695            // Although dependencies are complete, resources are not available.
696            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
697                " Adding to depFreeQueue.\n", node_ptr->seqNum);
698            depFreeQueue.push(node_ptr);
699        } else {
700            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
701                "Still pending issue.\n", node_ptr->seqNum);
702        }
703        return false;
704    }
705}
706
707void
708TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
709{
710    // Release the resources for this completed node.
711    if (pkt->isWrite()) {
712        // Consider store complete.
713        hwResource.releaseStoreBuffer();
714        // If it is a store response then do nothing since we do not model
715        // dependencies on store completion in the trace. But if we were
716        // blocking execution due to store buffer fullness, we need to schedule
717        // an event and attempt to progress.
718    } else {
719        // If it is a load response then release the dependents waiting on it.
720        // Get pointer to the completed load
721        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
722        assert(graph_itr != depGraph.end());
723        GraphNode* node_ptr = graph_itr->second;
724
725        // Release resources occupied by the load
726        hwResource.release(node_ptr);
727
728        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
729                " dependents..\n", node_ptr->seqNum);
730
731        for (auto child : node_ptr->dependents) {
732            if (child->removeDepOnInst(node_ptr->seqNum)) {
733                checkAndIssue(child);
734            }
735        }
736
737        // clear the dynamically allocated set of dependents
738        (node_ptr->dependents).clear();
739        // delete node
740        delete node_ptr;
741        // remove from graph
742        depGraph.erase(graph_itr);
743    }
744
745    if (DTRACE(TraceCPUData)) {
746        printReadyList();
747    }
748
749    // If the size of the dependency graph is less than the dependency window
750    // then read from the trace file to populate the graph next time we are in
751    // execute.
752    if (depGraph.size() < windowSize && !traceComplete)
753        nextRead = true;
754
755    // If not waiting for retry, attempt to schedule next event
756    if (!retryPkt) {
757        // We might have new dep-free nodes in the list which will have execute
758        // tick greater than or equal to curTick. But a new dep-free node might
759        // have its execute tick earlier. Therefore, attempt to reschedule. It
760        // could happen that the readyList is empty and we got here via a
761        // last remaining response. So, either the trace is complete or there
762        // are pending nodes in the depFreeQueue. The checking is done in the
763        // execute() control flow, so schedule an event to go via that flow.
764        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
765            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
766        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
767                next_event_tick);
768        owner.schedDcacheNextEvent(next_event_tick);
769    }
770}
771
772void
773TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
774                                                    Tick exec_tick)
775{
776    ReadyNode ready_node;
777    ready_node.seqNum = seq_num;
778    ready_node.execTick = exec_tick;
779
780    // Iterator to readyList
781    auto itr = readyList.begin();
782
783    // If the readyList is empty, simply insert the new node at the beginning
784    // and return
785    if (itr == readyList.end()) {
786        readyList.insert(itr, ready_node);
787        maxReadyListSize = std::max<double>(readyList.size(),
788                                              maxReadyListSize.value());
789        return;
790    }
791
792    // If the new node has its execution tick equal to the first node in the
793    // list then go to the next node. If the first node in the list failed
794    // to execute, its position as the first is thus maintained.
795    if (retryPkt)
796        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
797            itr++;
798
799    // Increment the iterator and compare the node pointed to by it to the new
800    // node till the position to insert the new node is found.
801    bool found = false;
802    while (!found && itr != readyList.end()) {
803        // If the execution tick of the new node is less than the node then
804        // this is the position to insert
805        if (exec_tick < itr->execTick)
806            found = true;
807        // If the execution tick of the new node is equal to the node then
808        // sort in ascending order of sequence numbers
809        else if (exec_tick == itr->execTick) {
810            // If the sequence number of the new node is less than the node
811            // then this is the position to insert
812            if (seq_num < itr->seqNum)
813                found = true;
814            // Else go to next node
815            else
816                itr++;
817        }
818        // If the execution tick of the new node is greater than the node then
819        // go to the next node
820        else
821            itr++;
822    }
823    readyList.insert(itr, ready_node);
824    // Update the stat for max size reached of the readyList
825    maxReadyListSize = std::max<double>(readyList.size(),
826                                          maxReadyListSize.value());
827}
828
829void
830TraceCPU::ElasticDataGen::printReadyList() {
831
832    auto itr = readyList.begin();
833    if (itr == readyList.end()) {
834        DPRINTF(TraceCPUData, "readyList is empty.\n");
835        return;
836    }
837    DPRINTF(TraceCPUData, "Printing readyList:\n");
838    while (itr != readyList.end()) {
839        auto graph_itr = depGraph.find(itr->seqNum);
840        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
841        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
842            node_ptr->typeToStr(), itr->execTick);
843        itr++;
844    }
845}
846
847TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
848    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
849  : sizeROB(max_rob),
850    sizeStoreBuffer(max_stores),
851    sizeLoadBuffer(max_loads),
852    oldestInFlightRobNum(UINT64_MAX),
853    numInFlightLoads(0),
854    numInFlightStores(0)
855{}
856
857void
858TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
859{
860    // Occupy ROB entry for the issued node
861    // Merely maintain the oldest node, i.e. numerically least robNum by saving
862    // it in the variable oldestInFLightRobNum.
863    inFlightNodes[new_node->seqNum] = new_node->robNum;
864    oldestInFlightRobNum = inFlightNodes.begin()->second;
865
866    // Occupy Load/Store Buffer entry for the issued node if applicable
867    if (new_node->isLoad()) {
868        ++numInFlightLoads;
869    } else if (new_node->isStore()) {
870        ++numInFlightStores;
871    } // else if it is a non load/store node, no buffer entry is occupied
872
873    printOccupancy();
874}
875
876void
877TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
878{
879    assert(!inFlightNodes.empty());
880    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
881        done_node->seqNum);
882
883    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
884    inFlightNodes.erase(done_node->seqNum);
885
886    if (inFlightNodes.empty()) {
887        // If we delete the only in-flight node and then the
888        // oldestInFlightRobNum is set to it's initialized (max) value.
889        oldestInFlightRobNum = UINT64_MAX;
890    } else {
891        // Set the oldest in-flight node rob number equal to the first node in
892        // the inFlightNodes since that will have the numerically least value.
893        oldestInFlightRobNum = inFlightNodes.begin()->second;
894    }
895
896    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
897        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
898        oldestInFlightRobNum);
899
900    // A store is considered complete when a request is sent, thus ROB entry is
901    // freed. But it occupies an entry in the Store Buffer until its response
902    // is received. A load is considered complete when a response is received,
903    // thus both ROB and Load Buffer entries can be released.
904    if (done_node->isLoad()) {
905        assert(numInFlightLoads != 0);
906        --numInFlightLoads;
907    }
908    // For normal writes, we send the requests out and clear a store buffer
909    // entry on response. For writes which are strictly ordered, for e.g.
910    // writes to device registers, we do that within release() which is called
911    // when node is executed and taken off from readyList.
912    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
913        releaseStoreBuffer();
914    }
915}
916
917void
918TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
919{
920    assert(numInFlightStores != 0);
921    --numInFlightStores;
922}
923
924bool
925TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
926    const GraphNode* new_node) const
927{
928    uint16_t num_in_flight_nodes;
929    if (inFlightNodes.empty()) {
930        num_in_flight_nodes = 0;
931        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
932            " #in-flight nodes = 0", new_node->seqNum);
933    } else if (new_node->robNum > oldestInFlightRobNum) {
934        // This is the intuitive case where new dep-free node is younger
935        // instruction than the oldest instruction in-flight. Thus we make sure
936        // in_flight_nodes does not overflow.
937        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
938        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
939            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
940             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
941    } else {
942        // This is the case where an instruction older than the oldest in-
943        // flight instruction becomes dep-free. Thus we must have already
944        // accounted for the entry in ROB for this new dep-free node.
945        // Immediately after this check returns true, oldestInFlightRobNum will
946        // be updated in occupy(). We simply let this node issue now.
947        num_in_flight_nodes = 0;
948        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
949            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
950            new_node->seqNum, new_node->robNum);
951    }
952    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
953        numInFlightLoads, sizeLoadBuffer,
954        numInFlightStores, sizeStoreBuffer);
955    // Check if resources are available to issue the specific node
956    if (num_in_flight_nodes >= sizeROB) {
957        return false;
958    }
959    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
960        return false;
961    }
962    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
963        return false;
964    }
965    return true;
966}
967
968bool
969TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
970    // Return true if there is at least one read or write request in flight
971    return (numInFlightStores != 0 || numInFlightLoads != 0);
972}
973
974void
975TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
976    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
977            "LQ = %d/%d, SQ  = %d/%d.\n",
978            oldestInFlightRobNum,
979            numInFlightLoads, sizeLoadBuffer,
980            numInFlightStores, sizeStoreBuffer);
981}
982
983void
984TraceCPU::FixedRetryGen::regStats()
985{
986    using namespace Stats;
987
988    numSendAttempted
989    .name(name() + ".numSendAttempted")
990    .desc("Number of first attempts to send a request")
991    ;
992
993    numSendSucceeded
994    .name(name() + ".numSendSucceeded")
995    .desc("Number of successful first attempts")
996    ;
997
998    numSendFailed
999    .name(name() + ".numSendFailed")
1000    .desc("Number of failed first attempts")
1001    ;
1002
1003    numRetrySucceeded
1004    .name(name() + ".numRetrySucceeded")
1005    .desc("Number of successful retries")
1006    ;
1007
1008    instLastTick
1009    .name(name() + ".instLastTick")
1010    .desc("Last tick simulated from the fixed inst trace")
1011    ;
1012}
1013
1014Tick
1015TraceCPU::FixedRetryGen::init()
1016{
1017    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1018            " IcacheGen: fixed issue with retry.\n");
1019
1020    if (nextExecute()) {
1021        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1022        return currElement.tick;
1023    } else {
1024        panic("Read of first message in the trace failed.\n");
1025        return MaxTick;
1026    }
1027}
1028
1029bool
1030TraceCPU::FixedRetryGen::tryNext()
1031{
1032    // If there is a retry packet, try to send it
1033    if (retryPkt) {
1034
1035        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1036
1037        if (!port.sendTimingReq(retryPkt)) {
1038            // Still blocked! This should never occur.
1039            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1040            return false;
1041        }
1042        ++numRetrySucceeded;
1043    } else {
1044
1045        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1046
1047        // try sending current element
1048        assert(currElement.isValid());
1049
1050        ++numSendAttempted;
1051
1052        if (!send(currElement.addr, currElement.blocksize,
1053                    currElement.cmd, currElement.flags, currElement.pc)) {
1054            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1055            ++numSendFailed;
1056            // return false to indicate not to schedule next event
1057            return false;
1058        } else {
1059            ++numSendSucceeded;
1060        }
1061    }
1062    // If packet was sent successfully, either retryPkt or currElement, return
1063    // true to indicate to schedule event at current Tick plus delta. If packet
1064    // was sent successfully and there is no next packet to send, return false.
1065    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1066        "element.\n");
1067    retryPkt = nullptr;
1068    // Read next element into currElement, currElement gets cleared so save the
1069    // tick to calculate delta
1070    Tick last_tick = currElement.tick;
1071    if (nextExecute()) {
1072        assert(currElement.tick >= last_tick);
1073        delta = currElement.tick - last_tick;
1074    }
1075    return !traceComplete;
1076}
1077
1078void
1079TraceCPU::FixedRetryGen::exit()
1080{
1081    trace.reset();
1082}
1083
1084bool
1085TraceCPU::FixedRetryGen::nextExecute()
1086{
1087    if (traceComplete)
1088        // We are at the end of the file, thus we have no more messages.
1089        // Return false.
1090        return false;
1091
1092
1093    //Reset the currElement to the default values
1094    currElement.clear();
1095
1096    // Read the next line to get the next message. If that fails then end of
1097    // trace has been reached and traceComplete needs to be set in addition
1098    // to returning false. If successful then next message is in currElement.
1099    if (!trace.read(&currElement)) {
1100        traceComplete = true;
1101        instLastTick = curTick();
1102        return false;
1103    }
1104
1105    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1106            currElement.cmd.isRead() ? 'r' : 'w',
1107            currElement.addr,
1108            currElement.pc,
1109            currElement.blocksize,
1110            currElement.tick);
1111
1112    return true;
1113}
1114
1115bool
1116TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1117              Request::FlagsType flags, Addr pc)
1118{
1119
1120    // Create new request
1121    Request* req = new Request(addr, size, flags, masterID);
1122    req->setPC(pc);
1123
1124    // If this is not done it triggers assert in L1 cache for invalid contextId
1125    req->setContext(ContextID(0));
1126
1127    // Embed it in a packet
1128    PacketPtr pkt = new Packet(req, cmd);
1129
1130    uint8_t* pkt_data = new uint8_t[req->getSize()];
1131    pkt->dataDynamic(pkt_data);
1132
1133    if (cmd.isWrite()) {
1134        memset(pkt_data, 0xA, req->getSize());
1135    }
1136
1137    // Call MasterPort method to send a timing request for this packet
1138    bool success = port.sendTimingReq(pkt);
1139    if (!success) {
1140        // If it fails, save the packet to retry when a retry is signalled by
1141        // the cache
1142        retryPkt = pkt;
1143    }
1144    return success;
1145}
1146
1147void
1148TraceCPU::icacheRetryRecvd()
1149{
1150    // Schedule an event to go through the control flow in the same tick as
1151    // retry is received
1152    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1153            " event @%lli.\n", curTick());
1154    schedule(icacheNextEvent, curTick());
1155}
1156
1157void
1158TraceCPU::dcacheRetryRecvd()
1159{
1160    // Schedule an event to go through the execute flow in the same tick as
1161    // retry is received
1162    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1163            " event @%lli.\n", curTick());
1164    schedule(dcacheNextEvent, curTick());
1165}
1166
1167void
1168TraceCPU::schedDcacheNextEvent(Tick when)
1169{
1170    if (!dcacheNextEvent.scheduled()) {
1171        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1172                when);
1173        schedule(dcacheNextEvent, when);
1174        ++numSchedDcacheEvent;
1175    } else if (when < dcacheNextEvent.when()) {
1176        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1177                " to %lli.\n", dcacheNextEvent.when(), when);
1178        reschedule(dcacheNextEvent, when);
1179    }
1180
1181}
1182
1183bool
1184TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1185{
1186    // All responses on the instruction fetch side are ignored. Simply delete
1187    // the request and packet to free allocated memory
1188    delete pkt->req;
1189    delete pkt;
1190
1191    return true;
1192}
1193
1194void
1195TraceCPU::IcachePort::recvReqRetry()
1196{
1197    owner->icacheRetryRecvd();
1198}
1199
1200void
1201TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1202{
1203    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1204    dcacheGen.completeMemAccess(pkt);
1205}
1206
1207bool
1208TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1209{
1210    // Handle the responses for data memory requests which is done inside the
1211    // elastic data generator
1212    owner->dcacheRecvTimingResp(pkt);
1213    // After processing the response delete the request and packet to free
1214    // memory
1215    delete pkt->req;
1216    delete pkt;
1217
1218    return true;
1219}
1220
1221void
1222TraceCPU::DcachePort::recvReqRetry()
1223{
1224    owner->dcacheRetryRecvd();
1225}
1226
1227TraceCPU::ElasticDataGen::InputStream::InputStream(
1228    const std::string& filename,
1229    const double time_multiplier)
1230    : trace(filename),
1231      timeMultiplier(time_multiplier),
1232      microOpCount(0)
1233{
1234    // Create a protobuf message for the header and read it from the stream
1235    ProtoMessage::InstDepRecordHeader header_msg;
1236    if (!trace.read(header_msg)) {
1237        panic("Failed to read packet header from %s\n", filename);
1238
1239        if (header_msg.tick_freq() != SimClock::Frequency) {
1240            panic("Trace %s was recorded with a different tick frequency %d\n",
1241                  header_msg.tick_freq());
1242        }
1243    } else {
1244        // Assign window size equal to the field in the trace that was recorded
1245        // when the data dependency trace was captured in the o3cpu model
1246        windowSize = header_msg.window_size();
1247    }
1248}
1249
1250void
1251TraceCPU::ElasticDataGen::InputStream::reset()
1252{
1253    trace.reset();
1254}
1255
1256bool
1257TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1258{
1259    ProtoMessage::InstDepRecord pkt_msg;
1260    if (trace.read(pkt_msg)) {
1261        // Required fields
1262        element->seqNum = pkt_msg.seq_num();
1263        element->type = pkt_msg.type();
1264        // Scale the compute delay to effectively scale the Trace CPU frequency
1265        element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1266
1267        // Repeated field robDepList
1268        element->clearRobDep();
1269        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1270        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1271            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1272            element->numRobDep += 1;
1273        }
1274
1275        // Repeated field
1276        element->clearRegDep();
1277        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1278        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1279            // There is a possibility that an instruction has both, a register
1280            // and order dependency on an instruction. In such a case, the
1281            // register dependency is omitted
1282            bool duplicate = false;
1283            for (int j = 0; j < element->numRobDep; j++) {
1284                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1285            }
1286            if (!duplicate) {
1287                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1288                element->numRegDep += 1;
1289            }
1290        }
1291
1292        // Optional fields
1293        if (pkt_msg.has_p_addr())
1294            element->physAddr = pkt_msg.p_addr();
1295        else
1296            element->physAddr = 0;
1297
1298        if (pkt_msg.has_v_addr())
1299            element->virtAddr = pkt_msg.v_addr();
1300        else
1301            element->virtAddr = 0;
1302
1303        if (pkt_msg.has_asid())
1304            element->asid = pkt_msg.asid();
1305        else
1306            element->asid = 0;
1307
1308        if (pkt_msg.has_size())
1309            element->size = pkt_msg.size();
1310        else
1311            element->size = 0;
1312
1313        if (pkt_msg.has_flags())
1314            element->flags = pkt_msg.flags();
1315        else
1316            element->flags = 0;
1317
1318        if (pkt_msg.has_pc())
1319            element->pc = pkt_msg.pc();
1320        else
1321            element->pc = 0;
1322
1323        // ROB occupancy number
1324        ++microOpCount;
1325        if (pkt_msg.has_weight()) {
1326            microOpCount += pkt_msg.weight();
1327        }
1328        element->robNum = microOpCount;
1329        return true;
1330    }
1331
1332    // We have reached the end of the file
1333    return false;
1334}
1335
1336bool
1337TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1338{
1339    for (auto& own_reg_dep : regDep) {
1340        if (own_reg_dep == reg_dep) {
1341            // If register dependency is found, make it zero and return true
1342            own_reg_dep = 0;
1343            assert(numRegDep > 0);
1344            --numRegDep;
1345            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1346                    "done.\n", seqNum, reg_dep);
1347            return true;
1348        }
1349    }
1350
1351    // Return false if the dependency is not found
1352    return false;
1353}
1354
1355bool
1356TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1357{
1358    for (auto& own_rob_dep : robDep) {
1359        if (own_rob_dep == rob_dep) {
1360            // If the rob dependency is found, make it zero and return true
1361            own_rob_dep = 0;
1362            assert(numRobDep > 0);
1363            --numRobDep;
1364            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1365                "done.\n", seqNum, rob_dep);
1366            return true;
1367        }
1368    }
1369    return false;
1370}
1371
1372void
1373TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1374    for (auto& own_reg_dep : regDep) {
1375        own_reg_dep = 0;
1376    }
1377    numRegDep = 0;
1378}
1379
1380void
1381TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1382    for (auto& own_rob_dep : robDep) {
1383        own_rob_dep = 0;
1384    }
1385    numRobDep = 0;
1386}
1387
1388bool
1389TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1390{
1391    // If it is an rob dependency then remove it
1392    if (!removeRobDep(done_seq_num)) {
1393        // If it is not an rob dependency then it must be a register dependency
1394        // If the register dependency is not found, it violates an assumption
1395        // and must be caught by assert.
1396        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1397        assert(regdep_found);
1398    }
1399    // Return true if the node is dependency free
1400    return (numRobDep == 0 && numRegDep == 0);
1401}
1402
1403void
1404TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1405{
1406    DPRINTFR(TraceCPUData, "%lli", seqNum);
1407    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1408    if (isLoad() || isStore()) {
1409        DPRINTFR(TraceCPUData, ",%i", physAddr);
1410        DPRINTFR(TraceCPUData, ",%i", size);
1411        DPRINTFR(TraceCPUData, ",%i", flags);
1412    }
1413    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1414    int i = 0;
1415    DPRINTFR(TraceCPUData, "robDep:");
1416    while (robDep[i] != 0) {
1417        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1418        i++;
1419    }
1420    i = 0;
1421    DPRINTFR(TraceCPUData, "regDep:");
1422    while (regDep[i] != 0) {
1423        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1424        i++;
1425    }
1426    auto child_itr = dependents.begin();
1427    DPRINTFR(TraceCPUData, "dependents:");
1428    while (child_itr != dependents.end()) {
1429        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1430        child_itr++;
1431    }
1432
1433    DPRINTFR(TraceCPUData, "\n");
1434}
1435
1436std::string
1437TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1438{
1439    return Record::RecordType_Name(type);
1440}
1441
1442TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1443    : trace(filename)
1444{
1445    // Create a protobuf message for the header and read it from the stream
1446    ProtoMessage::PacketHeader header_msg;
1447    if (!trace.read(header_msg)) {
1448        panic("Failed to read packet header from %s\n", filename);
1449
1450        if (header_msg.tick_freq() != SimClock::Frequency) {
1451            panic("Trace %s was recorded with a different tick frequency %d\n",
1452                  header_msg.tick_freq());
1453        }
1454    }
1455}
1456
1457void
1458TraceCPU::FixedRetryGen::InputStream::reset()
1459{
1460    trace.reset();
1461}
1462
1463bool
1464TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1465{
1466    ProtoMessage::Packet pkt_msg;
1467    if (trace.read(pkt_msg)) {
1468        element->cmd = pkt_msg.cmd();
1469        element->addr = pkt_msg.addr();
1470        element->blocksize = pkt_msg.size();
1471        element->tick = pkt_msg.tick();
1472        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1473        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1474        return true;
1475    }
1476
1477    // We have reached the end of the file
1478    return false;
1479}
1480