trace_cpu.cc revision 11632:a96d6787b385
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50    :   BaseCPU(params),
51        icachePort(this),
52        dcachePort(this),
53        instMasterID(params->system->getMasterId(name() + ".inst")),
54        dataMasterID(params->system->getMasterId(name() + ".data")),
55        instTraceFile(params->instTraceFile),
56        dataTraceFile(params->dataTraceFile),
57        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59                  params),
60        icacheNextEvent(this),
61        dcacheNextEvent(this),
62        oneTraceComplete(false),
63        traceOffset(0),
64        execCompleteEvent(nullptr)
65{
66    // Increment static counter for number of Trace CPUs.
67    ++TraceCPU::numTraceCPUs;
68
69    // Check that the python parameters for sizes of ROB, store buffer and load
70    // buffer do not overflow the corresponding C++ variables.
71    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
72                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
73    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
74                "exceeds the max. value of %d.\n", params->sizeROB,
75                UINT16_MAX);
76    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
77                " %d exceeds the max. value of %d.\n",
78                params->sizeLoadBuffer, UINT16_MAX);
79}
80
81TraceCPU::~TraceCPU()
82{
83
84}
85
86TraceCPU*
87TraceCPUParams::create()
88{
89    return new TraceCPU(this);
90}
91
92void
93TraceCPU::takeOverFrom(BaseCPU *oldCPU)
94{
95    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
96    assert(!getInstPort().isConnected());
97    assert(oldCPU->getInstPort().isConnected());
98    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
99    oldCPU->getInstPort().unbind();
100    getInstPort().bind(inst_peer_port);
101
102    assert(!getDataPort().isConnected());
103    assert(oldCPU->getDataPort().isConnected());
104    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
105    oldCPU->getDataPort().unbind();
106    getDataPort().bind(data_peer_port);
107}
108
109void
110TraceCPU::init()
111{
112    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
113            "\n", instTraceFile);
114    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
115            dataTraceFile);
116
117    BaseCPU::init();
118
119    // Get the send tick of the first instruction read request
120    Tick first_icache_tick = icacheGen.init();
121
122    // Get the send tick of the first data read/write request
123    Tick first_dcache_tick = dcacheGen.init();
124
125    // Set the trace offset as the minimum of that in both traces
126    traceOffset = std::min(first_icache_tick, first_dcache_tick);
127    inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
128            name(), traceOffset);
129
130    // Schedule next icache and dcache event by subtracting the offset
131    schedule(icacheNextEvent, first_icache_tick - traceOffset);
132    schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
133
134    // Adjust the trace offset for the dcache generator's ready nodes
135    // We don't need to do this for the icache generator as it will
136    // send its first request at the first event and schedule subsequent
137    // events using a relative tick delta
138    dcacheGen.adjustInitTraceOffset(traceOffset);
139
140    // The static counter for number of Trace CPUs is correctly set at this
141    // point so create an event and pass it.
142    execCompleteEvent = new CountedExitEvent("end of all traces reached.",
143                                                numTraceCPUs);
144}
145
146void
147TraceCPU::schedIcacheNext()
148{
149    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
150
151    // Try to send the current packet or a retry packet if there is one
152    bool sched_next = icacheGen.tryNext();
153    // If packet sent successfully, schedule next event
154    if (sched_next) {
155        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
156                "at %d.\n", curTick() + icacheGen.tickDelta());
157        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
158        ++numSchedIcacheEvent;
159    } else {
160        // check if traceComplete. If not, do nothing because sending failed
161        // and next event will be scheduled via RecvRetry()
162        if (icacheGen.isTraceComplete()) {
163            // If this is the first trace to complete, set the variable. If it
164            // is already set then both traces are complete to exit sim.
165            checkAndSchedExitEvent();
166        }
167    }
168    return;
169}
170
171void
172TraceCPU::schedDcacheNext()
173{
174    DPRINTF(TraceCPUData, "DcacheGen event.\n");
175
176    // Update stat for numCycles
177    numCycles = clockEdge() / clockPeriod();
178
179    dcacheGen.execute();
180    if (dcacheGen.isExecComplete()) {
181        checkAndSchedExitEvent();
182    }
183}
184
185void
186TraceCPU::checkAndSchedExitEvent()
187{
188    if (!oneTraceComplete) {
189        oneTraceComplete = true;
190    } else {
191        // Schedule event to indicate execution is complete as both
192        // instruction and data access traces have been played back.
193        inform("%s: Execution complete.\n", name());
194        schedule(*execCompleteEvent, curTick());
195    }
196}
197
198void
199TraceCPU::regStats()
200{
201
202    BaseCPU::regStats();
203
204    numSchedDcacheEvent
205    .name(name() + ".numSchedDcacheEvent")
206    .desc("Number of events scheduled to trigger data request generator")
207    ;
208
209    numSchedIcacheEvent
210    .name(name() + ".numSchedIcacheEvent")
211    .desc("Number of events scheduled to trigger instruction request generator")
212    ;
213
214    numOps
215    .name(name() + ".numOps")
216    .desc("Number of micro-ops simulated by the Trace CPU")
217    ;
218
219    cpi
220    .name(name() + ".cpi")
221    .desc("Cycles per micro-op used as a proxy for CPI")
222    .precision(6)
223    ;
224    cpi = numCycles/numOps;
225
226    icacheGen.regStats();
227    dcacheGen.regStats();
228}
229
230void
231TraceCPU::ElasticDataGen::regStats()
232{
233    using namespace Stats;
234
235    maxDependents
236    .name(name() + ".maxDependents")
237    .desc("Max number of dependents observed on a node")
238    ;
239
240    maxReadyListSize
241    .name(name() + ".maxReadyListSize")
242    .desc("Max size of the ready list observed")
243    ;
244
245    numSendAttempted
246    .name(name() + ".numSendAttempted")
247    .desc("Number of first attempts to send a request")
248    ;
249
250    numSendSucceeded
251    .name(name() + ".numSendSucceeded")
252    .desc("Number of successful first attempts")
253    ;
254
255    numSendFailed
256    .name(name() + ".numSendFailed")
257    .desc("Number of failed first attempts")
258    ;
259
260    numRetrySucceeded
261    .name(name() + ".numRetrySucceeded")
262    .desc("Number of successful retries")
263    ;
264
265    numSplitReqs
266    .name(name() + ".numSplitReqs")
267    .desc("Number of split requests")
268    ;
269
270    numSOLoads
271    .name(name() + ".numSOLoads")
272    .desc("Number of strictly ordered loads")
273    ;
274
275    numSOStores
276    .name(name() + ".numSOStores")
277    .desc("Number of strictly ordered stores")
278    ;
279
280    dataLastTick
281    .name(name() + ".dataLastTick")
282    .desc("Last tick simulated from the elastic data trace")
283    ;
284}
285
286Tick
287TraceCPU::ElasticDataGen::init()
288{
289    DPRINTF(TraceCPUData, "Initializing data memory request generator "
290            "DcacheGen: elastic issue with retry.\n");
291
292    if (!readNextWindow())
293        panic("Trace has %d elements. It must have at least %d elements.\n",
294              depGraph.size(), 2 * windowSize);
295    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
296            depGraph.size());
297
298    if (!readNextWindow())
299        panic("Trace has %d elements. It must have at least %d elements.\n",
300              depGraph.size(), 2 * windowSize);
301    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
302            depGraph.size());
303
304    // Print readyList
305    if (DTRACE(TraceCPUData)) {
306        printReadyList();
307    }
308    auto free_itr = readyList.begin();
309    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
310            " is %d.\n", free_itr->seqNum, free_itr->execTick);
311    // Return the execute tick of the earliest ready node so that an event
312    // can be scheduled to call execute()
313    return (free_itr->execTick);
314}
315
316void
317TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
318    for (auto& free_node : readyList) {
319        free_node.execTick -= offset;
320    }
321}
322
323void
324TraceCPU::ElasticDataGen::exit()
325{
326    trace.reset();
327}
328
329bool
330TraceCPU::ElasticDataGen::readNextWindow()
331{
332
333    // Read and add next window
334    DPRINTF(TraceCPUData, "Reading next window from file.\n");
335
336    if (traceComplete) {
337        // We are at the end of the file, thus we have no more records.
338        // Return false.
339        return false;
340    }
341
342    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
343            depGraph.size());
344
345    uint32_t num_read = 0;
346    while (num_read != windowSize) {
347
348        // Create a new graph node
349        GraphNode* new_node = new GraphNode;
350
351        // Read the next line to get the next record. If that fails then end of
352        // trace has been reached and traceComplete needs to be set in addition
353        // to returning false.
354        if (!trace.read(new_node)) {
355            DPRINTF(TraceCPUData, "\tTrace complete!\n");
356            traceComplete = true;
357            return false;
358        }
359
360        // Annotate the ROB dependencies of the new node onto the parent nodes.
361        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
362        // Annotate the register dependencies of the new node onto the parent
363        // nodes.
364        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
365
366        num_read++;
367        // Add to map
368        depGraph[new_node->seqNum] = new_node;
369        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
370            // Source dependencies are already complete, check if resources
371            // are available and issue. The execution time is approximated
372            // to current time plus the computational delay.
373            checkAndIssue(new_node);
374        }
375    }
376
377    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
378            depGraph.size());
379    return true;
380}
381
382template<typename T> void
383TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
384                                            T& dep_array, uint8_t& num_dep)
385{
386    for (auto& a_dep : dep_array) {
387        // The convention is to set the dependencies starting with the first
388        // index in the ROB and register dependency arrays. Thus, when we reach
389        // a dependency equal to the initialisation value of zero, we know have
390        // iterated over all dependencies and can break.
391        if (a_dep == 0)
392            break;
393        // We look up the valid dependency, i.e. the parent of this node
394        auto parent_itr = depGraph.find(a_dep);
395        if (parent_itr != depGraph.end()) {
396            // If the parent is found, it is yet to be executed. Append a
397            // pointer to the new node to the dependents list of the parent
398            // node.
399            parent_itr->second->dependents.push_back(new_node);
400            auto num_depts = parent_itr->second->dependents.size();
401            maxDependents = std::max<double>(num_depts, maxDependents.value());
402        } else {
403            // The dependency is not found in the graph. So consider
404            // the execution of the parent is complete, i.e. remove this
405            // dependency.
406            a_dep = 0;
407            num_dep--;
408        }
409    }
410}
411
412void
413TraceCPU::ElasticDataGen::execute()
414{
415    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
416    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
417            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
418            depFreeQueue.size());
419    hwResource.printOccupancy();
420
421    // Read next window to make sure that dependents of all dep-free nodes
422    // are in the depGraph
423    if (nextRead) {
424        readNextWindow();
425        nextRead = false;
426    }
427
428    // First attempt to issue the pending dependency-free nodes held
429    // in depFreeQueue. If resources have become available for a node,
430    // then issue it, i.e. add the node to readyList.
431    while (!depFreeQueue.empty()) {
432        if (checkAndIssue(depFreeQueue.front(), false)) {
433            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
434                "%lli.\n", (depFreeQueue.front())->seqNum);
435            depFreeQueue.pop();
436        } else {
437            break;
438        }
439    }
440    // Proceed to execute from readyList
441    auto graph_itr = depGraph.begin();
442    auto free_itr = readyList.begin();
443    // Iterate through readyList until the next free node has its execute
444    // tick later than curTick or the end of readyList is reached
445    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
446
447        // Get pointer to the node to be executed
448        graph_itr = depGraph.find(free_itr->seqNum);
449        assert(graph_itr != depGraph.end());
450        GraphNode* node_ptr = graph_itr->second;
451
452        // If there is a retryPkt send that else execute the load
453        if (retryPkt) {
454            // The retryPkt must be the request that was created by the
455            // first node in the readyList.
456            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
457                panic("Retry packet's seqence number does not match "
458                      "the first node in the readyList.\n");
459            }
460            if (port.sendTimingReq(retryPkt)) {
461                ++numRetrySucceeded;
462                retryPkt = nullptr;
463            }
464        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
465            // If there is no retryPkt, attempt to send a memory request in
466            // case of a load or store node. If the send fails, executeMemReq()
467            // returns a packet pointer, which we save in retryPkt. In case of
468            // a comp node we don't do anything and simply continue as if the
469            // execution of the comp node succedded.
470            retryPkt = executeMemReq(node_ptr);
471        }
472        // If the retryPkt or a new load/store node failed, we exit from here
473        // as a retry from cache will bring the control to execute(). The
474        // first node in readyList then, will be the failed node.
475        if (retryPkt) {
476            break;
477        }
478
479        // Proceed to remove dependencies for the successfully executed node.
480        // If it is a load which is not strictly ordered and we sent a
481        // request for it successfully, we do not yet mark any register
482        // dependencies complete. But as per dependency modelling we need
483        // to mark ROB dependencies of load and non load/store nodes which
484        // are based on successful sending of the load as complete.
485        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
486            // If execute succeeded mark its dependents as complete
487            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
488                    "dependents..\n", node_ptr->seqNum);
489
490            auto child_itr = (node_ptr->dependents).begin();
491            while (child_itr != (node_ptr->dependents).end()) {
492                // ROB dependency of a store on a load must not be removed
493                // after load is sent but after response is received
494                if (!(*child_itr)->isStore() &&
495                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
496
497                    // Check if the child node has become dependency free
498                    if ((*child_itr)->numRobDep == 0 &&
499                        (*child_itr)->numRegDep == 0) {
500
501                        // Source dependencies are complete, check if
502                        // resources are available and issue
503                        checkAndIssue(*child_itr);
504                    }
505                    // Remove this child for the sent load and point to new
506                    // location of the element following the erased element
507                    child_itr = node_ptr->dependents.erase(child_itr);
508                } else {
509                    // This child is not dependency-free, point to the next
510                    // child
511                    child_itr++;
512                }
513            }
514        } else {
515            // If it is a strictly ordered load mark its dependents as complete
516            // as we do not send a request for this case. If it is a store or a
517            // comp node we also mark all its dependents complete.
518            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
519                    " up dependents..\n", node_ptr->seqNum);
520
521            for (auto child : node_ptr->dependents) {
522                // If the child node is dependency free removeDepOnInst()
523                // returns true.
524                if (child->removeDepOnInst(node_ptr->seqNum)) {
525                    // Source dependencies are complete, check if resources
526                    // are available and issue
527                    checkAndIssue(child);
528                }
529            }
530        }
531
532        // After executing the node, remove from readyList and delete node.
533        readyList.erase(free_itr);
534        // If it is a cacheable load which was sent, don't delete
535        // just yet.  Delete it in completeMemAccess() after the
536        // response is received. If it is an strictly ordered
537        // load, it was not sent and all dependencies were simply
538        // marked complete. Thus it is safe to delete it. For
539        // stores and non load/store nodes all dependencies were
540        // marked complete so it is safe to delete it.
541        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
542            // Release all resources occupied by the completed node
543            hwResource.release(node_ptr);
544            // clear the dynamically allocated set of dependents
545            (node_ptr->dependents).clear();
546            // Update the stat for numOps simulated
547            owner.updateNumOps(node_ptr->robNum);
548            // delete node
549            delete node_ptr;
550            // remove from graph
551            depGraph.erase(graph_itr);
552        }
553        // Point to first node to continue to next iteration of while loop
554        free_itr = readyList.begin();
555    } // end of while loop
556
557    // Print readyList, sizes of queues and resource status after updating
558    if (DTRACE(TraceCPUData)) {
559        printReadyList();
560        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
561        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
562                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
563                depFreeQueue.size());
564        hwResource.printOccupancy();
565    }
566
567    if (retryPkt) {
568        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
569                "event from the cache for seq. num %lli.\n",
570                retryPkt->req->getReqInstSeqNum());
571        return;
572    }
573    // If the size of the dependency graph is less than the dependency window
574    // then read from the trace file to populate the graph next time we are in
575    // execute.
576    if (depGraph.size() < windowSize && !traceComplete)
577        nextRead = true;
578
579    // If cache is not blocked, schedule an event for the first execTick in
580    // readyList else retry from cache will schedule the event. If the ready
581    // list is empty then check if the next pending node has resources
582    // available to issue. If yes, then schedule an event for the next cycle.
583    if (!readyList.empty()) {
584        Tick next_event_tick = std::max(readyList.begin()->execTick,
585                                        curTick());
586        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
587                next_event_tick);
588        owner.schedDcacheNextEvent(next_event_tick);
589    } else if (readyList.empty() && !depFreeQueue.empty() &&
590                hwResource.isAvailable(depFreeQueue.front())) {
591        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
592                owner.clockEdge(Cycles(1)));
593        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
594    }
595
596    // If trace is completely read, readyList is empty and depGraph is empty,
597    // set execComplete to true
598    if (depGraph.empty() && readyList.empty() && traceComplete &&
599        !hwResource.awaitingResponse()) {
600        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
601        execComplete = true;
602        dataLastTick = curTick();
603    }
604}
605
606PacketPtr
607TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
608{
609
610    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
611            "virt addr %d, pc %#x, size %d, flags %d).\n",
612            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
613            node_ptr->pc, node_ptr->size, node_ptr->flags);
614
615    // If the request is strictly ordered, do not send it. Just return nullptr
616    // as if it was succesfully sent.
617    if (node_ptr->isStrictlyOrdered()) {
618        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
619        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
620                node_ptr->seqNum);
621        return nullptr;
622    }
623
624    // Check if the request spans two cache lines as this condition triggers
625    // an assert fail in the L1 cache. If it does then truncate the size to
626    // access only until the end of that line and ignore the remainder. The
627    // stat counting this is useful to keep a check on how frequently this
628    // happens. If required the code could be revised to mimick splitting such
629    // a request into two.
630    unsigned blk_size = owner.cacheLineSize();
631    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
632    if (!(blk_offset + node_ptr->size <= blk_size)) {
633        node_ptr->size = blk_size - blk_offset;
634        ++numSplitReqs;
635    }
636
637    // Create a request and the packet containing request
638    Request* req = new Request(node_ptr->physAddr, node_ptr->size,
639                               node_ptr->flags, masterID, node_ptr->seqNum,
640                               ContextID(0));
641    req->setPC(node_ptr->pc);
642    // If virtual address is valid, set the asid and virtual address fields
643    // of the request.
644    if (node_ptr->virtAddr != 0) {
645        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
646                        node_ptr->flags, masterID, node_ptr->pc);
647        req->setPaddr(node_ptr->physAddr);
648        req->setReqInstSeqNum(node_ptr->seqNum);
649    }
650
651    PacketPtr pkt;
652    uint8_t* pkt_data = new uint8_t[req->getSize()];
653    if (node_ptr->isLoad()) {
654        pkt = Packet::createRead(req);
655    } else {
656        pkt = Packet::createWrite(req);
657        memset(pkt_data, 0xA, req->getSize());
658    }
659    pkt->dataDynamic(pkt_data);
660
661    // Call MasterPort method to send a timing request for this packet
662    bool success = port.sendTimingReq(pkt);
663    ++numSendAttempted;
664
665    if (!success) {
666        // If it fails, return the packet to retry when a retry is signalled by
667        // the cache
668        ++numSendFailed;
669        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
670        return pkt;
671    } else {
672        // It is succeeds, return nullptr
673        ++numSendSucceeded;
674        return nullptr;
675    }
676}
677
678bool
679TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
680{
681    // Assert the node is dependency-free
682    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
683
684    // If this is the first attempt, print a debug message to indicate this.
685    if (first) {
686        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
687            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
688            node_ptr->robNum);
689    }
690
691    // Check if resources are available to issue the specific node
692    if (hwResource.isAvailable(node_ptr)) {
693        // If resources are free only then add to readyList
694        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
695            " to readyList, occupying resources.\n", node_ptr->seqNum);
696        // Compute the execute tick by adding the compute delay for the node
697        // and add the ready node to the ready list
698        addToSortedReadyList(node_ptr->seqNum,
699                                owner.clockEdge() + node_ptr->compDelay);
700        // Account for the resources taken up by this issued node.
701        hwResource.occupy(node_ptr);
702        return true;
703
704    } else {
705        if (first) {
706            // Although dependencies are complete, resources are not available.
707            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
708                " Adding to depFreeQueue.\n", node_ptr->seqNum);
709            depFreeQueue.push(node_ptr);
710        } else {
711            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
712                "Still pending issue.\n", node_ptr->seqNum);
713        }
714        return false;
715    }
716}
717
718void
719TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
720{
721    // Release the resources for this completed node.
722    if (pkt->isWrite()) {
723        // Consider store complete.
724        hwResource.releaseStoreBuffer();
725        // If it is a store response then do nothing since we do not model
726        // dependencies on store completion in the trace. But if we were
727        // blocking execution due to store buffer fullness, we need to schedule
728        // an event and attempt to progress.
729    } else {
730        // If it is a load response then release the dependents waiting on it.
731        // Get pointer to the completed load
732        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
733        assert(graph_itr != depGraph.end());
734        GraphNode* node_ptr = graph_itr->second;
735
736        // Release resources occupied by the load
737        hwResource.release(node_ptr);
738
739        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
740                " dependents..\n", node_ptr->seqNum);
741
742        for (auto child : node_ptr->dependents) {
743            if (child->removeDepOnInst(node_ptr->seqNum)) {
744                checkAndIssue(child);
745            }
746        }
747
748        // clear the dynamically allocated set of dependents
749        (node_ptr->dependents).clear();
750        // Update the stat for numOps completed
751        owner.updateNumOps(node_ptr->robNum);
752        // delete node
753        delete node_ptr;
754        // remove from graph
755        depGraph.erase(graph_itr);
756    }
757
758    if (DTRACE(TraceCPUData)) {
759        printReadyList();
760    }
761
762    // If the size of the dependency graph is less than the dependency window
763    // then read from the trace file to populate the graph next time we are in
764    // execute.
765    if (depGraph.size() < windowSize && !traceComplete)
766        nextRead = true;
767
768    // If not waiting for retry, attempt to schedule next event
769    if (!retryPkt) {
770        // We might have new dep-free nodes in the list which will have execute
771        // tick greater than or equal to curTick. But a new dep-free node might
772        // have its execute tick earlier. Therefore, attempt to reschedule. It
773        // could happen that the readyList is empty and we got here via a
774        // last remaining response. So, either the trace is complete or there
775        // are pending nodes in the depFreeQueue. The checking is done in the
776        // execute() control flow, so schedule an event to go via that flow.
777        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
778            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
779        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
780                next_event_tick);
781        owner.schedDcacheNextEvent(next_event_tick);
782    }
783}
784
785void
786TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
787                                                    Tick exec_tick)
788{
789    ReadyNode ready_node;
790    ready_node.seqNum = seq_num;
791    ready_node.execTick = exec_tick;
792
793    // Iterator to readyList
794    auto itr = readyList.begin();
795
796    // If the readyList is empty, simply insert the new node at the beginning
797    // and return
798    if (itr == readyList.end()) {
799        readyList.insert(itr, ready_node);
800        maxReadyListSize = std::max<double>(readyList.size(),
801                                              maxReadyListSize.value());
802        return;
803    }
804
805    // If the new node has its execution tick equal to the first node in the
806    // list then go to the next node. If the first node in the list failed
807    // to execute, its position as the first is thus maintained.
808    if (retryPkt)
809        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
810            itr++;
811
812    // Increment the iterator and compare the node pointed to by it to the new
813    // node till the position to insert the new node is found.
814    bool found = false;
815    while (!found && itr != readyList.end()) {
816        // If the execution tick of the new node is less than the node then
817        // this is the position to insert
818        if (exec_tick < itr->execTick)
819            found = true;
820        // If the execution tick of the new node is equal to the node then
821        // sort in ascending order of sequence numbers
822        else if (exec_tick == itr->execTick) {
823            // If the sequence number of the new node is less than the node
824            // then this is the position to insert
825            if (seq_num < itr->seqNum)
826                found = true;
827            // Else go to next node
828            else
829                itr++;
830        }
831        // If the execution tick of the new node is greater than the node then
832        // go to the next node
833        else
834            itr++;
835    }
836    readyList.insert(itr, ready_node);
837    // Update the stat for max size reached of the readyList
838    maxReadyListSize = std::max<double>(readyList.size(),
839                                          maxReadyListSize.value());
840}
841
842void
843TraceCPU::ElasticDataGen::printReadyList() {
844
845    auto itr = readyList.begin();
846    if (itr == readyList.end()) {
847        DPRINTF(TraceCPUData, "readyList is empty.\n");
848        return;
849    }
850    DPRINTF(TraceCPUData, "Printing readyList:\n");
851    while (itr != readyList.end()) {
852        auto graph_itr = depGraph.find(itr->seqNum);
853        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
854        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
855            node_ptr->typeToStr(), itr->execTick);
856        itr++;
857    }
858}
859
860TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
861    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
862  : sizeROB(max_rob),
863    sizeStoreBuffer(max_stores),
864    sizeLoadBuffer(max_loads),
865    oldestInFlightRobNum(UINT64_MAX),
866    numInFlightLoads(0),
867    numInFlightStores(0)
868{}
869
870void
871TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
872{
873    // Occupy ROB entry for the issued node
874    // Merely maintain the oldest node, i.e. numerically least robNum by saving
875    // it in the variable oldestInFLightRobNum.
876    inFlightNodes[new_node->seqNum] = new_node->robNum;
877    oldestInFlightRobNum = inFlightNodes.begin()->second;
878
879    // Occupy Load/Store Buffer entry for the issued node if applicable
880    if (new_node->isLoad()) {
881        ++numInFlightLoads;
882    } else if (new_node->isStore()) {
883        ++numInFlightStores;
884    } // else if it is a non load/store node, no buffer entry is occupied
885
886    printOccupancy();
887}
888
889void
890TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
891{
892    assert(!inFlightNodes.empty());
893    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
894        done_node->seqNum);
895
896    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
897    inFlightNodes.erase(done_node->seqNum);
898
899    if (inFlightNodes.empty()) {
900        // If we delete the only in-flight node and then the
901        // oldestInFlightRobNum is set to it's initialized (max) value.
902        oldestInFlightRobNum = UINT64_MAX;
903    } else {
904        // Set the oldest in-flight node rob number equal to the first node in
905        // the inFlightNodes since that will have the numerically least value.
906        oldestInFlightRobNum = inFlightNodes.begin()->second;
907    }
908
909    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
910        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
911        oldestInFlightRobNum);
912
913    // A store is considered complete when a request is sent, thus ROB entry is
914    // freed. But it occupies an entry in the Store Buffer until its response
915    // is received. A load is considered complete when a response is received,
916    // thus both ROB and Load Buffer entries can be released.
917    if (done_node->isLoad()) {
918        assert(numInFlightLoads != 0);
919        --numInFlightLoads;
920    }
921    // For normal writes, we send the requests out and clear a store buffer
922    // entry on response. For writes which are strictly ordered, for e.g.
923    // writes to device registers, we do that within release() which is called
924    // when node is executed and taken off from readyList.
925    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
926        releaseStoreBuffer();
927    }
928}
929
930void
931TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
932{
933    assert(numInFlightStores != 0);
934    --numInFlightStores;
935}
936
937bool
938TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
939    const GraphNode* new_node) const
940{
941    uint16_t num_in_flight_nodes;
942    if (inFlightNodes.empty()) {
943        num_in_flight_nodes = 0;
944        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
945            " #in-flight nodes = 0", new_node->seqNum);
946    } else if (new_node->robNum > oldestInFlightRobNum) {
947        // This is the intuitive case where new dep-free node is younger
948        // instruction than the oldest instruction in-flight. Thus we make sure
949        // in_flight_nodes does not overflow.
950        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
951        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
952            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
953             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
954    } else {
955        // This is the case where an instruction older than the oldest in-
956        // flight instruction becomes dep-free. Thus we must have already
957        // accounted for the entry in ROB for this new dep-free node.
958        // Immediately after this check returns true, oldestInFlightRobNum will
959        // be updated in occupy(). We simply let this node issue now.
960        num_in_flight_nodes = 0;
961        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
962            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
963            new_node->seqNum, new_node->robNum);
964    }
965    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
966        numInFlightLoads, sizeLoadBuffer,
967        numInFlightStores, sizeStoreBuffer);
968    // Check if resources are available to issue the specific node
969    if (num_in_flight_nodes >= sizeROB) {
970        return false;
971    }
972    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
973        return false;
974    }
975    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
976        return false;
977    }
978    return true;
979}
980
981bool
982TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
983    // Return true if there is at least one read or write request in flight
984    return (numInFlightStores != 0 || numInFlightLoads != 0);
985}
986
987void
988TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
989    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
990            "LQ = %d/%d, SQ  = %d/%d.\n",
991            oldestInFlightRobNum,
992            numInFlightLoads, sizeLoadBuffer,
993            numInFlightStores, sizeStoreBuffer);
994}
995
996void
997TraceCPU::FixedRetryGen::regStats()
998{
999    using namespace Stats;
1000
1001    numSendAttempted
1002    .name(name() + ".numSendAttempted")
1003    .desc("Number of first attempts to send a request")
1004    ;
1005
1006    numSendSucceeded
1007    .name(name() + ".numSendSucceeded")
1008    .desc("Number of successful first attempts")
1009    ;
1010
1011    numSendFailed
1012    .name(name() + ".numSendFailed")
1013    .desc("Number of failed first attempts")
1014    ;
1015
1016    numRetrySucceeded
1017    .name(name() + ".numRetrySucceeded")
1018    .desc("Number of successful retries")
1019    ;
1020
1021    instLastTick
1022    .name(name() + ".instLastTick")
1023    .desc("Last tick simulated from the fixed inst trace")
1024    ;
1025}
1026
1027Tick
1028TraceCPU::FixedRetryGen::init()
1029{
1030    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1031            " IcacheGen: fixed issue with retry.\n");
1032
1033    if (nextExecute()) {
1034        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1035        return currElement.tick;
1036    } else {
1037        panic("Read of first message in the trace failed.\n");
1038        return MaxTick;
1039    }
1040}
1041
1042bool
1043TraceCPU::FixedRetryGen::tryNext()
1044{
1045    // If there is a retry packet, try to send it
1046    if (retryPkt) {
1047
1048        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1049
1050        if (!port.sendTimingReq(retryPkt)) {
1051            // Still blocked! This should never occur.
1052            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1053            return false;
1054        }
1055        ++numRetrySucceeded;
1056    } else {
1057
1058        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1059
1060        // try sending current element
1061        assert(currElement.isValid());
1062
1063        ++numSendAttempted;
1064
1065        if (!send(currElement.addr, currElement.blocksize,
1066                    currElement.cmd, currElement.flags, currElement.pc)) {
1067            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1068            ++numSendFailed;
1069            // return false to indicate not to schedule next event
1070            return false;
1071        } else {
1072            ++numSendSucceeded;
1073        }
1074    }
1075    // If packet was sent successfully, either retryPkt or currElement, return
1076    // true to indicate to schedule event at current Tick plus delta. If packet
1077    // was sent successfully and there is no next packet to send, return false.
1078    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1079        "element.\n");
1080    retryPkt = nullptr;
1081    // Read next element into currElement, currElement gets cleared so save the
1082    // tick to calculate delta
1083    Tick last_tick = currElement.tick;
1084    if (nextExecute()) {
1085        assert(currElement.tick >= last_tick);
1086        delta = currElement.tick - last_tick;
1087    }
1088    return !traceComplete;
1089}
1090
1091void
1092TraceCPU::FixedRetryGen::exit()
1093{
1094    trace.reset();
1095}
1096
1097bool
1098TraceCPU::FixedRetryGen::nextExecute()
1099{
1100    if (traceComplete)
1101        // We are at the end of the file, thus we have no more messages.
1102        // Return false.
1103        return false;
1104
1105
1106    //Reset the currElement to the default values
1107    currElement.clear();
1108
1109    // Read the next line to get the next message. If that fails then end of
1110    // trace has been reached and traceComplete needs to be set in addition
1111    // to returning false. If successful then next message is in currElement.
1112    if (!trace.read(&currElement)) {
1113        traceComplete = true;
1114        instLastTick = curTick();
1115        return false;
1116    }
1117
1118    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1119            currElement.cmd.isRead() ? 'r' : 'w',
1120            currElement.addr,
1121            currElement.pc,
1122            currElement.blocksize,
1123            currElement.tick);
1124
1125    return true;
1126}
1127
1128bool
1129TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1130              Request::FlagsType flags, Addr pc)
1131{
1132
1133    // Create new request
1134    Request* req = new Request(addr, size, flags, masterID);
1135    req->setPC(pc);
1136
1137    // If this is not done it triggers assert in L1 cache for invalid contextId
1138    req->setContext(ContextID(0));
1139
1140    // Embed it in a packet
1141    PacketPtr pkt = new Packet(req, cmd);
1142
1143    uint8_t* pkt_data = new uint8_t[req->getSize()];
1144    pkt->dataDynamic(pkt_data);
1145
1146    if (cmd.isWrite()) {
1147        memset(pkt_data, 0xA, req->getSize());
1148    }
1149
1150    // Call MasterPort method to send a timing request for this packet
1151    bool success = port.sendTimingReq(pkt);
1152    if (!success) {
1153        // If it fails, save the packet to retry when a retry is signalled by
1154        // the cache
1155        retryPkt = pkt;
1156    }
1157    return success;
1158}
1159
1160void
1161TraceCPU::icacheRetryRecvd()
1162{
1163    // Schedule an event to go through the control flow in the same tick as
1164    // retry is received
1165    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1166            " event @%lli.\n", curTick());
1167    schedule(icacheNextEvent, curTick());
1168}
1169
1170void
1171TraceCPU::dcacheRetryRecvd()
1172{
1173    // Schedule an event to go through the execute flow in the same tick as
1174    // retry is received
1175    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1176            " event @%lli.\n", curTick());
1177    schedule(dcacheNextEvent, curTick());
1178}
1179
1180void
1181TraceCPU::schedDcacheNextEvent(Tick when)
1182{
1183    if (!dcacheNextEvent.scheduled()) {
1184        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1185                when);
1186        schedule(dcacheNextEvent, when);
1187        ++numSchedDcacheEvent;
1188    } else if (when < dcacheNextEvent.when()) {
1189        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1190                " to %lli.\n", dcacheNextEvent.when(), when);
1191        reschedule(dcacheNextEvent, when);
1192    }
1193
1194}
1195
1196bool
1197TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1198{
1199    // All responses on the instruction fetch side are ignored. Simply delete
1200    // the request and packet to free allocated memory
1201    delete pkt->req;
1202    delete pkt;
1203
1204    return true;
1205}
1206
1207void
1208TraceCPU::IcachePort::recvReqRetry()
1209{
1210    owner->icacheRetryRecvd();
1211}
1212
1213void
1214TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1215{
1216    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1217    dcacheGen.completeMemAccess(pkt);
1218}
1219
1220bool
1221TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1222{
1223    // Handle the responses for data memory requests which is done inside the
1224    // elastic data generator
1225    owner->dcacheRecvTimingResp(pkt);
1226    // After processing the response delete the request and packet to free
1227    // memory
1228    delete pkt->req;
1229    delete pkt;
1230
1231    return true;
1232}
1233
1234void
1235TraceCPU::DcachePort::recvReqRetry()
1236{
1237    owner->dcacheRetryRecvd();
1238}
1239
1240TraceCPU::ElasticDataGen::InputStream::InputStream(
1241    const std::string& filename,
1242    const double time_multiplier)
1243    : trace(filename),
1244      timeMultiplier(time_multiplier),
1245      microOpCount(0)
1246{
1247    // Create a protobuf message for the header and read it from the stream
1248    ProtoMessage::InstDepRecordHeader header_msg;
1249    if (!trace.read(header_msg)) {
1250        panic("Failed to read packet header from %s\n", filename);
1251
1252        if (header_msg.tick_freq() != SimClock::Frequency) {
1253            panic("Trace %s was recorded with a different tick frequency %d\n",
1254                  header_msg.tick_freq());
1255        }
1256    } else {
1257        // Assign window size equal to the field in the trace that was recorded
1258        // when the data dependency trace was captured in the o3cpu model
1259        windowSize = header_msg.window_size();
1260    }
1261}
1262
1263void
1264TraceCPU::ElasticDataGen::InputStream::reset()
1265{
1266    trace.reset();
1267}
1268
1269bool
1270TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1271{
1272    ProtoMessage::InstDepRecord pkt_msg;
1273    if (trace.read(pkt_msg)) {
1274        // Required fields
1275        element->seqNum = pkt_msg.seq_num();
1276        element->type = pkt_msg.type();
1277        // Scale the compute delay to effectively scale the Trace CPU frequency
1278        element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1279
1280        // Repeated field robDepList
1281        element->clearRobDep();
1282        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1283        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1284            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1285            element->numRobDep += 1;
1286        }
1287
1288        // Repeated field
1289        element->clearRegDep();
1290        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1291        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1292            // There is a possibility that an instruction has both, a register
1293            // and order dependency on an instruction. In such a case, the
1294            // register dependency is omitted
1295            bool duplicate = false;
1296            for (int j = 0; j < element->numRobDep; j++) {
1297                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1298            }
1299            if (!duplicate) {
1300                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1301                element->numRegDep += 1;
1302            }
1303        }
1304
1305        // Optional fields
1306        if (pkt_msg.has_p_addr())
1307            element->physAddr = pkt_msg.p_addr();
1308        else
1309            element->physAddr = 0;
1310
1311        if (pkt_msg.has_v_addr())
1312            element->virtAddr = pkt_msg.v_addr();
1313        else
1314            element->virtAddr = 0;
1315
1316        if (pkt_msg.has_asid())
1317            element->asid = pkt_msg.asid();
1318        else
1319            element->asid = 0;
1320
1321        if (pkt_msg.has_size())
1322            element->size = pkt_msg.size();
1323        else
1324            element->size = 0;
1325
1326        if (pkt_msg.has_flags())
1327            element->flags = pkt_msg.flags();
1328        else
1329            element->flags = 0;
1330
1331        if (pkt_msg.has_pc())
1332            element->pc = pkt_msg.pc();
1333        else
1334            element->pc = 0;
1335
1336        // ROB occupancy number
1337        ++microOpCount;
1338        if (pkt_msg.has_weight()) {
1339            microOpCount += pkt_msg.weight();
1340        }
1341        element->robNum = microOpCount;
1342        return true;
1343    }
1344
1345    // We have reached the end of the file
1346    return false;
1347}
1348
1349bool
1350TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1351{
1352    for (auto& own_reg_dep : regDep) {
1353        if (own_reg_dep == reg_dep) {
1354            // If register dependency is found, make it zero and return true
1355            own_reg_dep = 0;
1356            assert(numRegDep > 0);
1357            --numRegDep;
1358            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1359                    "done.\n", seqNum, reg_dep);
1360            return true;
1361        }
1362    }
1363
1364    // Return false if the dependency is not found
1365    return false;
1366}
1367
1368bool
1369TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1370{
1371    for (auto& own_rob_dep : robDep) {
1372        if (own_rob_dep == rob_dep) {
1373            // If the rob dependency is found, make it zero and return true
1374            own_rob_dep = 0;
1375            assert(numRobDep > 0);
1376            --numRobDep;
1377            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1378                "done.\n", seqNum, rob_dep);
1379            return true;
1380        }
1381    }
1382    return false;
1383}
1384
1385void
1386TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1387    for (auto& own_reg_dep : regDep) {
1388        own_reg_dep = 0;
1389    }
1390    numRegDep = 0;
1391}
1392
1393void
1394TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1395    for (auto& own_rob_dep : robDep) {
1396        own_rob_dep = 0;
1397    }
1398    numRobDep = 0;
1399}
1400
1401bool
1402TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1403{
1404    // If it is an rob dependency then remove it
1405    if (!removeRobDep(done_seq_num)) {
1406        // If it is not an rob dependency then it must be a register dependency
1407        // If the register dependency is not found, it violates an assumption
1408        // and must be caught by assert.
1409        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1410        assert(regdep_found);
1411    }
1412    // Return true if the node is dependency free
1413    return (numRobDep == 0 && numRegDep == 0);
1414}
1415
1416void
1417TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1418{
1419    DPRINTFR(TraceCPUData, "%lli", seqNum);
1420    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1421    if (isLoad() || isStore()) {
1422        DPRINTFR(TraceCPUData, ",%i", physAddr);
1423        DPRINTFR(TraceCPUData, ",%i", size);
1424        DPRINTFR(TraceCPUData, ",%i", flags);
1425    }
1426    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1427    int i = 0;
1428    DPRINTFR(TraceCPUData, "robDep:");
1429    while (robDep[i] != 0) {
1430        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1431        i++;
1432    }
1433    i = 0;
1434    DPRINTFR(TraceCPUData, "regDep:");
1435    while (regDep[i] != 0) {
1436        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1437        i++;
1438    }
1439    auto child_itr = dependents.begin();
1440    DPRINTFR(TraceCPUData, "dependents:");
1441    while (child_itr != dependents.end()) {
1442        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1443        child_itr++;
1444    }
1445
1446    DPRINTFR(TraceCPUData, "\n");
1447}
1448
1449std::string
1450TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1451{
1452    return Record::RecordType_Name(type);
1453}
1454
1455TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1456    : trace(filename)
1457{
1458    // Create a protobuf message for the header and read it from the stream
1459    ProtoMessage::PacketHeader header_msg;
1460    if (!trace.read(header_msg)) {
1461        panic("Failed to read packet header from %s\n", filename);
1462
1463        if (header_msg.tick_freq() != SimClock::Frequency) {
1464            panic("Trace %s was recorded with a different tick frequency %d\n",
1465                  header_msg.tick_freq());
1466        }
1467    }
1468}
1469
1470void
1471TraceCPU::FixedRetryGen::InputStream::reset()
1472{
1473    trace.reset();
1474}
1475
1476bool
1477TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1478{
1479    ProtoMessage::Packet pkt_msg;
1480    if (trace.read(pkt_msg)) {
1481        element->cmd = pkt_msg.cmd();
1482        element->addr = pkt_msg.addr();
1483        element->blocksize = pkt_msg.size();
1484        element->tick = pkt_msg.tick();
1485        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1486        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1487        return true;
1488    }
1489
1490    // We have reached the end of the file
1491    return false;
1492}
1493