trace_cpu.cc revision 11633:40c951e58c2b
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50    :   BaseCPU(params),
51        icachePort(this),
52        dcachePort(this),
53        instMasterID(params->system->getMasterId(name() + ".inst")),
54        dataMasterID(params->system->getMasterId(name() + ".data")),
55        instTraceFile(params->instTraceFile),
56        dataTraceFile(params->dataTraceFile),
57        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59                  params),
60        icacheNextEvent(this),
61        dcacheNextEvent(this),
62        oneTraceComplete(false),
63        traceOffset(0),
64        execCompleteEvent(nullptr),
65        enableEarlyExit(params->enableEarlyExit)
66{
67    // Increment static counter for number of Trace CPUs.
68    ++TraceCPU::numTraceCPUs;
69
70    // Check that the python parameters for sizes of ROB, store buffer and load
71    // buffer do not overflow the corresponding C++ variables.
72    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
73                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
74    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
75                "exceeds the max. value of %d.\n", params->sizeROB,
76                UINT16_MAX);
77    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
78                " %d exceeds the max. value of %d.\n",
79                params->sizeLoadBuffer, UINT16_MAX);
80}
81
82TraceCPU::~TraceCPU()
83{
84
85}
86
87TraceCPU*
88TraceCPUParams::create()
89{
90    return new TraceCPU(this);
91}
92
93void
94TraceCPU::takeOverFrom(BaseCPU *oldCPU)
95{
96    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
97    assert(!getInstPort().isConnected());
98    assert(oldCPU->getInstPort().isConnected());
99    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
100    oldCPU->getInstPort().unbind();
101    getInstPort().bind(inst_peer_port);
102
103    assert(!getDataPort().isConnected());
104    assert(oldCPU->getDataPort().isConnected());
105    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
106    oldCPU->getDataPort().unbind();
107    getDataPort().bind(data_peer_port);
108}
109
110void
111TraceCPU::init()
112{
113    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
114            "\n", instTraceFile);
115    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
116            dataTraceFile);
117
118    BaseCPU::init();
119
120    // Get the send tick of the first instruction read request
121    Tick first_icache_tick = icacheGen.init();
122
123    // Get the send tick of the first data read/write request
124    Tick first_dcache_tick = dcacheGen.init();
125
126    // Set the trace offset as the minimum of that in both traces
127    traceOffset = std::min(first_icache_tick, first_dcache_tick);
128    inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
129            name(), traceOffset);
130
131    // Schedule next icache and dcache event by subtracting the offset
132    schedule(icacheNextEvent, first_icache_tick - traceOffset);
133    schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
134
135    // Adjust the trace offset for the dcache generator's ready nodes
136    // We don't need to do this for the icache generator as it will
137    // send its first request at the first event and schedule subsequent
138    // events using a relative tick delta
139    dcacheGen.adjustInitTraceOffset(traceOffset);
140
141    // If the Trace CPU simulation is configured to exit on any one trace
142    // completion then we don't need a counted event to count down all Trace
143    // CPUs in the system. If not then instantiate a counted event.
144    if (!enableEarlyExit) {
145        // The static counter for number of Trace CPUs is correctly set at
146        // this point so create an event and pass it.
147        execCompleteEvent = new CountedExitEvent("end of all traces reached.",
148                                                 numTraceCPUs);
149    }
150
151}
152
153void
154TraceCPU::schedIcacheNext()
155{
156    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
157
158    // Try to send the current packet or a retry packet if there is one
159    bool sched_next = icacheGen.tryNext();
160    // If packet sent successfully, schedule next event
161    if (sched_next) {
162        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
163                "at %d.\n", curTick() + icacheGen.tickDelta());
164        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
165        ++numSchedIcacheEvent;
166    } else {
167        // check if traceComplete. If not, do nothing because sending failed
168        // and next event will be scheduled via RecvRetry()
169        if (icacheGen.isTraceComplete()) {
170            // If this is the first trace to complete, set the variable. If it
171            // is already set then both traces are complete to exit sim.
172            checkAndSchedExitEvent();
173        }
174    }
175    return;
176}
177
178void
179TraceCPU::schedDcacheNext()
180{
181    DPRINTF(TraceCPUData, "DcacheGen event.\n");
182
183    // Update stat for numCycles
184    numCycles = clockEdge() / clockPeriod();
185
186    dcacheGen.execute();
187    if (dcacheGen.isExecComplete()) {
188        checkAndSchedExitEvent();
189    }
190}
191
192void
193TraceCPU::checkAndSchedExitEvent()
194{
195    if (!oneTraceComplete) {
196        oneTraceComplete = true;
197    } else {
198        // Schedule event to indicate execution is complete as both
199        // instruction and data access traces have been played back.
200        inform("%s: Execution complete.\n", name());
201        // If the replay is configured to exit early, that is when any one
202        // execution is complete then exit immediately and return. Otherwise,
203        // schedule the counted exit that counts down completion of each Trace
204        // CPU.
205        if (enableEarlyExit) {
206            exitSimLoop("End of trace reached");
207        } else {
208            schedule(*execCompleteEvent, curTick());
209        }
210    }
211}
212
213void
214TraceCPU::regStats()
215{
216
217    BaseCPU::regStats();
218
219    numSchedDcacheEvent
220    .name(name() + ".numSchedDcacheEvent")
221    .desc("Number of events scheduled to trigger data request generator")
222    ;
223
224    numSchedIcacheEvent
225    .name(name() + ".numSchedIcacheEvent")
226    .desc("Number of events scheduled to trigger instruction request generator")
227    ;
228
229    numOps
230    .name(name() + ".numOps")
231    .desc("Number of micro-ops simulated by the Trace CPU")
232    ;
233
234    cpi
235    .name(name() + ".cpi")
236    .desc("Cycles per micro-op used as a proxy for CPI")
237    .precision(6)
238    ;
239    cpi = numCycles/numOps;
240
241    icacheGen.regStats();
242    dcacheGen.regStats();
243}
244
245void
246TraceCPU::ElasticDataGen::regStats()
247{
248    using namespace Stats;
249
250    maxDependents
251    .name(name() + ".maxDependents")
252    .desc("Max number of dependents observed on a node")
253    ;
254
255    maxReadyListSize
256    .name(name() + ".maxReadyListSize")
257    .desc("Max size of the ready list observed")
258    ;
259
260    numSendAttempted
261    .name(name() + ".numSendAttempted")
262    .desc("Number of first attempts to send a request")
263    ;
264
265    numSendSucceeded
266    .name(name() + ".numSendSucceeded")
267    .desc("Number of successful first attempts")
268    ;
269
270    numSendFailed
271    .name(name() + ".numSendFailed")
272    .desc("Number of failed first attempts")
273    ;
274
275    numRetrySucceeded
276    .name(name() + ".numRetrySucceeded")
277    .desc("Number of successful retries")
278    ;
279
280    numSplitReqs
281    .name(name() + ".numSplitReqs")
282    .desc("Number of split requests")
283    ;
284
285    numSOLoads
286    .name(name() + ".numSOLoads")
287    .desc("Number of strictly ordered loads")
288    ;
289
290    numSOStores
291    .name(name() + ".numSOStores")
292    .desc("Number of strictly ordered stores")
293    ;
294
295    dataLastTick
296    .name(name() + ".dataLastTick")
297    .desc("Last tick simulated from the elastic data trace")
298    ;
299}
300
301Tick
302TraceCPU::ElasticDataGen::init()
303{
304    DPRINTF(TraceCPUData, "Initializing data memory request generator "
305            "DcacheGen: elastic issue with retry.\n");
306
307    if (!readNextWindow())
308        panic("Trace has %d elements. It must have at least %d elements.\n",
309              depGraph.size(), 2 * windowSize);
310    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
311            depGraph.size());
312
313    if (!readNextWindow())
314        panic("Trace has %d elements. It must have at least %d elements.\n",
315              depGraph.size(), 2 * windowSize);
316    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
317            depGraph.size());
318
319    // Print readyList
320    if (DTRACE(TraceCPUData)) {
321        printReadyList();
322    }
323    auto free_itr = readyList.begin();
324    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
325            " is %d.\n", free_itr->seqNum, free_itr->execTick);
326    // Return the execute tick of the earliest ready node so that an event
327    // can be scheduled to call execute()
328    return (free_itr->execTick);
329}
330
331void
332TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
333    for (auto& free_node : readyList) {
334        free_node.execTick -= offset;
335    }
336}
337
338void
339TraceCPU::ElasticDataGen::exit()
340{
341    trace.reset();
342}
343
344bool
345TraceCPU::ElasticDataGen::readNextWindow()
346{
347
348    // Read and add next window
349    DPRINTF(TraceCPUData, "Reading next window from file.\n");
350
351    if (traceComplete) {
352        // We are at the end of the file, thus we have no more records.
353        // Return false.
354        return false;
355    }
356
357    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
358            depGraph.size());
359
360    uint32_t num_read = 0;
361    while (num_read != windowSize) {
362
363        // Create a new graph node
364        GraphNode* new_node = new GraphNode;
365
366        // Read the next line to get the next record. If that fails then end of
367        // trace has been reached and traceComplete needs to be set in addition
368        // to returning false.
369        if (!trace.read(new_node)) {
370            DPRINTF(TraceCPUData, "\tTrace complete!\n");
371            traceComplete = true;
372            return false;
373        }
374
375        // Annotate the ROB dependencies of the new node onto the parent nodes.
376        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
377        // Annotate the register dependencies of the new node onto the parent
378        // nodes.
379        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
380
381        num_read++;
382        // Add to map
383        depGraph[new_node->seqNum] = new_node;
384        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
385            // Source dependencies are already complete, check if resources
386            // are available and issue. The execution time is approximated
387            // to current time plus the computational delay.
388            checkAndIssue(new_node);
389        }
390    }
391
392    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
393            depGraph.size());
394    return true;
395}
396
397template<typename T> void
398TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
399                                            T& dep_array, uint8_t& num_dep)
400{
401    for (auto& a_dep : dep_array) {
402        // The convention is to set the dependencies starting with the first
403        // index in the ROB and register dependency arrays. Thus, when we reach
404        // a dependency equal to the initialisation value of zero, we know have
405        // iterated over all dependencies and can break.
406        if (a_dep == 0)
407            break;
408        // We look up the valid dependency, i.e. the parent of this node
409        auto parent_itr = depGraph.find(a_dep);
410        if (parent_itr != depGraph.end()) {
411            // If the parent is found, it is yet to be executed. Append a
412            // pointer to the new node to the dependents list of the parent
413            // node.
414            parent_itr->second->dependents.push_back(new_node);
415            auto num_depts = parent_itr->second->dependents.size();
416            maxDependents = std::max<double>(num_depts, maxDependents.value());
417        } else {
418            // The dependency is not found in the graph. So consider
419            // the execution of the parent is complete, i.e. remove this
420            // dependency.
421            a_dep = 0;
422            num_dep--;
423        }
424    }
425}
426
427void
428TraceCPU::ElasticDataGen::execute()
429{
430    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
431    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
432            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
433            depFreeQueue.size());
434    hwResource.printOccupancy();
435
436    // Read next window to make sure that dependents of all dep-free nodes
437    // are in the depGraph
438    if (nextRead) {
439        readNextWindow();
440        nextRead = false;
441    }
442
443    // First attempt to issue the pending dependency-free nodes held
444    // in depFreeQueue. If resources have become available for a node,
445    // then issue it, i.e. add the node to readyList.
446    while (!depFreeQueue.empty()) {
447        if (checkAndIssue(depFreeQueue.front(), false)) {
448            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
449                "%lli.\n", (depFreeQueue.front())->seqNum);
450            depFreeQueue.pop();
451        } else {
452            break;
453        }
454    }
455    // Proceed to execute from readyList
456    auto graph_itr = depGraph.begin();
457    auto free_itr = readyList.begin();
458    // Iterate through readyList until the next free node has its execute
459    // tick later than curTick or the end of readyList is reached
460    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
461
462        // Get pointer to the node to be executed
463        graph_itr = depGraph.find(free_itr->seqNum);
464        assert(graph_itr != depGraph.end());
465        GraphNode* node_ptr = graph_itr->second;
466
467        // If there is a retryPkt send that else execute the load
468        if (retryPkt) {
469            // The retryPkt must be the request that was created by the
470            // first node in the readyList.
471            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
472                panic("Retry packet's seqence number does not match "
473                      "the first node in the readyList.\n");
474            }
475            if (port.sendTimingReq(retryPkt)) {
476                ++numRetrySucceeded;
477                retryPkt = nullptr;
478            }
479        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
480            // If there is no retryPkt, attempt to send a memory request in
481            // case of a load or store node. If the send fails, executeMemReq()
482            // returns a packet pointer, which we save in retryPkt. In case of
483            // a comp node we don't do anything and simply continue as if the
484            // execution of the comp node succedded.
485            retryPkt = executeMemReq(node_ptr);
486        }
487        // If the retryPkt or a new load/store node failed, we exit from here
488        // as a retry from cache will bring the control to execute(). The
489        // first node in readyList then, will be the failed node.
490        if (retryPkt) {
491            break;
492        }
493
494        // Proceed to remove dependencies for the successfully executed node.
495        // If it is a load which is not strictly ordered and we sent a
496        // request for it successfully, we do not yet mark any register
497        // dependencies complete. But as per dependency modelling we need
498        // to mark ROB dependencies of load and non load/store nodes which
499        // are based on successful sending of the load as complete.
500        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
501            // If execute succeeded mark its dependents as complete
502            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
503                    "dependents..\n", node_ptr->seqNum);
504
505            auto child_itr = (node_ptr->dependents).begin();
506            while (child_itr != (node_ptr->dependents).end()) {
507                // ROB dependency of a store on a load must not be removed
508                // after load is sent but after response is received
509                if (!(*child_itr)->isStore() &&
510                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
511
512                    // Check if the child node has become dependency free
513                    if ((*child_itr)->numRobDep == 0 &&
514                        (*child_itr)->numRegDep == 0) {
515
516                        // Source dependencies are complete, check if
517                        // resources are available and issue
518                        checkAndIssue(*child_itr);
519                    }
520                    // Remove this child for the sent load and point to new
521                    // location of the element following the erased element
522                    child_itr = node_ptr->dependents.erase(child_itr);
523                } else {
524                    // This child is not dependency-free, point to the next
525                    // child
526                    child_itr++;
527                }
528            }
529        } else {
530            // If it is a strictly ordered load mark its dependents as complete
531            // as we do not send a request for this case. If it is a store or a
532            // comp node we also mark all its dependents complete.
533            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
534                    " up dependents..\n", node_ptr->seqNum);
535
536            for (auto child : node_ptr->dependents) {
537                // If the child node is dependency free removeDepOnInst()
538                // returns true.
539                if (child->removeDepOnInst(node_ptr->seqNum)) {
540                    // Source dependencies are complete, check if resources
541                    // are available and issue
542                    checkAndIssue(child);
543                }
544            }
545        }
546
547        // After executing the node, remove from readyList and delete node.
548        readyList.erase(free_itr);
549        // If it is a cacheable load which was sent, don't delete
550        // just yet.  Delete it in completeMemAccess() after the
551        // response is received. If it is an strictly ordered
552        // load, it was not sent and all dependencies were simply
553        // marked complete. Thus it is safe to delete it. For
554        // stores and non load/store nodes all dependencies were
555        // marked complete so it is safe to delete it.
556        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
557            // Release all resources occupied by the completed node
558            hwResource.release(node_ptr);
559            // clear the dynamically allocated set of dependents
560            (node_ptr->dependents).clear();
561            // Update the stat for numOps simulated
562            owner.updateNumOps(node_ptr->robNum);
563            // delete node
564            delete node_ptr;
565            // remove from graph
566            depGraph.erase(graph_itr);
567        }
568        // Point to first node to continue to next iteration of while loop
569        free_itr = readyList.begin();
570    } // end of while loop
571
572    // Print readyList, sizes of queues and resource status after updating
573    if (DTRACE(TraceCPUData)) {
574        printReadyList();
575        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
576        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
577                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
578                depFreeQueue.size());
579        hwResource.printOccupancy();
580    }
581
582    if (retryPkt) {
583        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
584                "event from the cache for seq. num %lli.\n",
585                retryPkt->req->getReqInstSeqNum());
586        return;
587    }
588    // If the size of the dependency graph is less than the dependency window
589    // then read from the trace file to populate the graph next time we are in
590    // execute.
591    if (depGraph.size() < windowSize && !traceComplete)
592        nextRead = true;
593
594    // If cache is not blocked, schedule an event for the first execTick in
595    // readyList else retry from cache will schedule the event. If the ready
596    // list is empty then check if the next pending node has resources
597    // available to issue. If yes, then schedule an event for the next cycle.
598    if (!readyList.empty()) {
599        Tick next_event_tick = std::max(readyList.begin()->execTick,
600                                        curTick());
601        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
602                next_event_tick);
603        owner.schedDcacheNextEvent(next_event_tick);
604    } else if (readyList.empty() && !depFreeQueue.empty() &&
605                hwResource.isAvailable(depFreeQueue.front())) {
606        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
607                owner.clockEdge(Cycles(1)));
608        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
609    }
610
611    // If trace is completely read, readyList is empty and depGraph is empty,
612    // set execComplete to true
613    if (depGraph.empty() && readyList.empty() && traceComplete &&
614        !hwResource.awaitingResponse()) {
615        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
616        execComplete = true;
617        dataLastTick = curTick();
618    }
619}
620
621PacketPtr
622TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
623{
624
625    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
626            "virt addr %d, pc %#x, size %d, flags %d).\n",
627            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
628            node_ptr->pc, node_ptr->size, node_ptr->flags);
629
630    // If the request is strictly ordered, do not send it. Just return nullptr
631    // as if it was succesfully sent.
632    if (node_ptr->isStrictlyOrdered()) {
633        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
634        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
635                node_ptr->seqNum);
636        return nullptr;
637    }
638
639    // Check if the request spans two cache lines as this condition triggers
640    // an assert fail in the L1 cache. If it does then truncate the size to
641    // access only until the end of that line and ignore the remainder. The
642    // stat counting this is useful to keep a check on how frequently this
643    // happens. If required the code could be revised to mimick splitting such
644    // a request into two.
645    unsigned blk_size = owner.cacheLineSize();
646    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
647    if (!(blk_offset + node_ptr->size <= blk_size)) {
648        node_ptr->size = blk_size - blk_offset;
649        ++numSplitReqs;
650    }
651
652    // Create a request and the packet containing request
653    Request* req = new Request(node_ptr->physAddr, node_ptr->size,
654                               node_ptr->flags, masterID, node_ptr->seqNum,
655                               ContextID(0));
656    req->setPC(node_ptr->pc);
657    // If virtual address is valid, set the asid and virtual address fields
658    // of the request.
659    if (node_ptr->virtAddr != 0) {
660        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
661                        node_ptr->flags, masterID, node_ptr->pc);
662        req->setPaddr(node_ptr->physAddr);
663        req->setReqInstSeqNum(node_ptr->seqNum);
664    }
665
666    PacketPtr pkt;
667    uint8_t* pkt_data = new uint8_t[req->getSize()];
668    if (node_ptr->isLoad()) {
669        pkt = Packet::createRead(req);
670    } else {
671        pkt = Packet::createWrite(req);
672        memset(pkt_data, 0xA, req->getSize());
673    }
674    pkt->dataDynamic(pkt_data);
675
676    // Call MasterPort method to send a timing request for this packet
677    bool success = port.sendTimingReq(pkt);
678    ++numSendAttempted;
679
680    if (!success) {
681        // If it fails, return the packet to retry when a retry is signalled by
682        // the cache
683        ++numSendFailed;
684        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
685        return pkt;
686    } else {
687        // It is succeeds, return nullptr
688        ++numSendSucceeded;
689        return nullptr;
690    }
691}
692
693bool
694TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
695{
696    // Assert the node is dependency-free
697    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
698
699    // If this is the first attempt, print a debug message to indicate this.
700    if (first) {
701        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
702            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
703            node_ptr->robNum);
704    }
705
706    // Check if resources are available to issue the specific node
707    if (hwResource.isAvailable(node_ptr)) {
708        // If resources are free only then add to readyList
709        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
710            " to readyList, occupying resources.\n", node_ptr->seqNum);
711        // Compute the execute tick by adding the compute delay for the node
712        // and add the ready node to the ready list
713        addToSortedReadyList(node_ptr->seqNum,
714                                owner.clockEdge() + node_ptr->compDelay);
715        // Account for the resources taken up by this issued node.
716        hwResource.occupy(node_ptr);
717        return true;
718
719    } else {
720        if (first) {
721            // Although dependencies are complete, resources are not available.
722            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
723                " Adding to depFreeQueue.\n", node_ptr->seqNum);
724            depFreeQueue.push(node_ptr);
725        } else {
726            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
727                "Still pending issue.\n", node_ptr->seqNum);
728        }
729        return false;
730    }
731}
732
733void
734TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
735{
736    // Release the resources for this completed node.
737    if (pkt->isWrite()) {
738        // Consider store complete.
739        hwResource.releaseStoreBuffer();
740        // If it is a store response then do nothing since we do not model
741        // dependencies on store completion in the trace. But if we were
742        // blocking execution due to store buffer fullness, we need to schedule
743        // an event and attempt to progress.
744    } else {
745        // If it is a load response then release the dependents waiting on it.
746        // Get pointer to the completed load
747        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
748        assert(graph_itr != depGraph.end());
749        GraphNode* node_ptr = graph_itr->second;
750
751        // Release resources occupied by the load
752        hwResource.release(node_ptr);
753
754        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
755                " dependents..\n", node_ptr->seqNum);
756
757        for (auto child : node_ptr->dependents) {
758            if (child->removeDepOnInst(node_ptr->seqNum)) {
759                checkAndIssue(child);
760            }
761        }
762
763        // clear the dynamically allocated set of dependents
764        (node_ptr->dependents).clear();
765        // Update the stat for numOps completed
766        owner.updateNumOps(node_ptr->robNum);
767        // delete node
768        delete node_ptr;
769        // remove from graph
770        depGraph.erase(graph_itr);
771    }
772
773    if (DTRACE(TraceCPUData)) {
774        printReadyList();
775    }
776
777    // If the size of the dependency graph is less than the dependency window
778    // then read from the trace file to populate the graph next time we are in
779    // execute.
780    if (depGraph.size() < windowSize && !traceComplete)
781        nextRead = true;
782
783    // If not waiting for retry, attempt to schedule next event
784    if (!retryPkt) {
785        // We might have new dep-free nodes in the list which will have execute
786        // tick greater than or equal to curTick. But a new dep-free node might
787        // have its execute tick earlier. Therefore, attempt to reschedule. It
788        // could happen that the readyList is empty and we got here via a
789        // last remaining response. So, either the trace is complete or there
790        // are pending nodes in the depFreeQueue. The checking is done in the
791        // execute() control flow, so schedule an event to go via that flow.
792        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
793            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
794        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
795                next_event_tick);
796        owner.schedDcacheNextEvent(next_event_tick);
797    }
798}
799
800void
801TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
802                                                    Tick exec_tick)
803{
804    ReadyNode ready_node;
805    ready_node.seqNum = seq_num;
806    ready_node.execTick = exec_tick;
807
808    // Iterator to readyList
809    auto itr = readyList.begin();
810
811    // If the readyList is empty, simply insert the new node at the beginning
812    // and return
813    if (itr == readyList.end()) {
814        readyList.insert(itr, ready_node);
815        maxReadyListSize = std::max<double>(readyList.size(),
816                                              maxReadyListSize.value());
817        return;
818    }
819
820    // If the new node has its execution tick equal to the first node in the
821    // list then go to the next node. If the first node in the list failed
822    // to execute, its position as the first is thus maintained.
823    if (retryPkt)
824        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
825            itr++;
826
827    // Increment the iterator and compare the node pointed to by it to the new
828    // node till the position to insert the new node is found.
829    bool found = false;
830    while (!found && itr != readyList.end()) {
831        // If the execution tick of the new node is less than the node then
832        // this is the position to insert
833        if (exec_tick < itr->execTick)
834            found = true;
835        // If the execution tick of the new node is equal to the node then
836        // sort in ascending order of sequence numbers
837        else if (exec_tick == itr->execTick) {
838            // If the sequence number of the new node is less than the node
839            // then this is the position to insert
840            if (seq_num < itr->seqNum)
841                found = true;
842            // Else go to next node
843            else
844                itr++;
845        }
846        // If the execution tick of the new node is greater than the node then
847        // go to the next node
848        else
849            itr++;
850    }
851    readyList.insert(itr, ready_node);
852    // Update the stat for max size reached of the readyList
853    maxReadyListSize = std::max<double>(readyList.size(),
854                                          maxReadyListSize.value());
855}
856
857void
858TraceCPU::ElasticDataGen::printReadyList() {
859
860    auto itr = readyList.begin();
861    if (itr == readyList.end()) {
862        DPRINTF(TraceCPUData, "readyList is empty.\n");
863        return;
864    }
865    DPRINTF(TraceCPUData, "Printing readyList:\n");
866    while (itr != readyList.end()) {
867        auto graph_itr = depGraph.find(itr->seqNum);
868        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
869        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
870            node_ptr->typeToStr(), itr->execTick);
871        itr++;
872    }
873}
874
875TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
876    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
877  : sizeROB(max_rob),
878    sizeStoreBuffer(max_stores),
879    sizeLoadBuffer(max_loads),
880    oldestInFlightRobNum(UINT64_MAX),
881    numInFlightLoads(0),
882    numInFlightStores(0)
883{}
884
885void
886TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
887{
888    // Occupy ROB entry for the issued node
889    // Merely maintain the oldest node, i.e. numerically least robNum by saving
890    // it in the variable oldestInFLightRobNum.
891    inFlightNodes[new_node->seqNum] = new_node->robNum;
892    oldestInFlightRobNum = inFlightNodes.begin()->second;
893
894    // Occupy Load/Store Buffer entry for the issued node if applicable
895    if (new_node->isLoad()) {
896        ++numInFlightLoads;
897    } else if (new_node->isStore()) {
898        ++numInFlightStores;
899    } // else if it is a non load/store node, no buffer entry is occupied
900
901    printOccupancy();
902}
903
904void
905TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
906{
907    assert(!inFlightNodes.empty());
908    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
909        done_node->seqNum);
910
911    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
912    inFlightNodes.erase(done_node->seqNum);
913
914    if (inFlightNodes.empty()) {
915        // If we delete the only in-flight node and then the
916        // oldestInFlightRobNum is set to it's initialized (max) value.
917        oldestInFlightRobNum = UINT64_MAX;
918    } else {
919        // Set the oldest in-flight node rob number equal to the first node in
920        // the inFlightNodes since that will have the numerically least value.
921        oldestInFlightRobNum = inFlightNodes.begin()->second;
922    }
923
924    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
925        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
926        oldestInFlightRobNum);
927
928    // A store is considered complete when a request is sent, thus ROB entry is
929    // freed. But it occupies an entry in the Store Buffer until its response
930    // is received. A load is considered complete when a response is received,
931    // thus both ROB and Load Buffer entries can be released.
932    if (done_node->isLoad()) {
933        assert(numInFlightLoads != 0);
934        --numInFlightLoads;
935    }
936    // For normal writes, we send the requests out and clear a store buffer
937    // entry on response. For writes which are strictly ordered, for e.g.
938    // writes to device registers, we do that within release() which is called
939    // when node is executed and taken off from readyList.
940    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
941        releaseStoreBuffer();
942    }
943}
944
945void
946TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
947{
948    assert(numInFlightStores != 0);
949    --numInFlightStores;
950}
951
952bool
953TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
954    const GraphNode* new_node) const
955{
956    uint16_t num_in_flight_nodes;
957    if (inFlightNodes.empty()) {
958        num_in_flight_nodes = 0;
959        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
960            " #in-flight nodes = 0", new_node->seqNum);
961    } else if (new_node->robNum > oldestInFlightRobNum) {
962        // This is the intuitive case where new dep-free node is younger
963        // instruction than the oldest instruction in-flight. Thus we make sure
964        // in_flight_nodes does not overflow.
965        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
966        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
967            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
968             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
969    } else {
970        // This is the case where an instruction older than the oldest in-
971        // flight instruction becomes dep-free. Thus we must have already
972        // accounted for the entry in ROB for this new dep-free node.
973        // Immediately after this check returns true, oldestInFlightRobNum will
974        // be updated in occupy(). We simply let this node issue now.
975        num_in_flight_nodes = 0;
976        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
977            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
978            new_node->seqNum, new_node->robNum);
979    }
980    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
981        numInFlightLoads, sizeLoadBuffer,
982        numInFlightStores, sizeStoreBuffer);
983    // Check if resources are available to issue the specific node
984    if (num_in_flight_nodes >= sizeROB) {
985        return false;
986    }
987    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
988        return false;
989    }
990    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
991        return false;
992    }
993    return true;
994}
995
996bool
997TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
998    // Return true if there is at least one read or write request in flight
999    return (numInFlightStores != 0 || numInFlightLoads != 0);
1000}
1001
1002void
1003TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1004    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1005            "LQ = %d/%d, SQ  = %d/%d.\n",
1006            oldestInFlightRobNum,
1007            numInFlightLoads, sizeLoadBuffer,
1008            numInFlightStores, sizeStoreBuffer);
1009}
1010
1011void
1012TraceCPU::FixedRetryGen::regStats()
1013{
1014    using namespace Stats;
1015
1016    numSendAttempted
1017    .name(name() + ".numSendAttempted")
1018    .desc("Number of first attempts to send a request")
1019    ;
1020
1021    numSendSucceeded
1022    .name(name() + ".numSendSucceeded")
1023    .desc("Number of successful first attempts")
1024    ;
1025
1026    numSendFailed
1027    .name(name() + ".numSendFailed")
1028    .desc("Number of failed first attempts")
1029    ;
1030
1031    numRetrySucceeded
1032    .name(name() + ".numRetrySucceeded")
1033    .desc("Number of successful retries")
1034    ;
1035
1036    instLastTick
1037    .name(name() + ".instLastTick")
1038    .desc("Last tick simulated from the fixed inst trace")
1039    ;
1040}
1041
1042Tick
1043TraceCPU::FixedRetryGen::init()
1044{
1045    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1046            " IcacheGen: fixed issue with retry.\n");
1047
1048    if (nextExecute()) {
1049        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1050        return currElement.tick;
1051    } else {
1052        panic("Read of first message in the trace failed.\n");
1053        return MaxTick;
1054    }
1055}
1056
1057bool
1058TraceCPU::FixedRetryGen::tryNext()
1059{
1060    // If there is a retry packet, try to send it
1061    if (retryPkt) {
1062
1063        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1064
1065        if (!port.sendTimingReq(retryPkt)) {
1066            // Still blocked! This should never occur.
1067            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1068            return false;
1069        }
1070        ++numRetrySucceeded;
1071    } else {
1072
1073        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1074
1075        // try sending current element
1076        assert(currElement.isValid());
1077
1078        ++numSendAttempted;
1079
1080        if (!send(currElement.addr, currElement.blocksize,
1081                    currElement.cmd, currElement.flags, currElement.pc)) {
1082            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1083            ++numSendFailed;
1084            // return false to indicate not to schedule next event
1085            return false;
1086        } else {
1087            ++numSendSucceeded;
1088        }
1089    }
1090    // If packet was sent successfully, either retryPkt or currElement, return
1091    // true to indicate to schedule event at current Tick plus delta. If packet
1092    // was sent successfully and there is no next packet to send, return false.
1093    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1094        "element.\n");
1095    retryPkt = nullptr;
1096    // Read next element into currElement, currElement gets cleared so save the
1097    // tick to calculate delta
1098    Tick last_tick = currElement.tick;
1099    if (nextExecute()) {
1100        assert(currElement.tick >= last_tick);
1101        delta = currElement.tick - last_tick;
1102    }
1103    return !traceComplete;
1104}
1105
1106void
1107TraceCPU::FixedRetryGen::exit()
1108{
1109    trace.reset();
1110}
1111
1112bool
1113TraceCPU::FixedRetryGen::nextExecute()
1114{
1115    if (traceComplete)
1116        // We are at the end of the file, thus we have no more messages.
1117        // Return false.
1118        return false;
1119
1120
1121    //Reset the currElement to the default values
1122    currElement.clear();
1123
1124    // Read the next line to get the next message. If that fails then end of
1125    // trace has been reached and traceComplete needs to be set in addition
1126    // to returning false. If successful then next message is in currElement.
1127    if (!trace.read(&currElement)) {
1128        traceComplete = true;
1129        instLastTick = curTick();
1130        return false;
1131    }
1132
1133    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1134            currElement.cmd.isRead() ? 'r' : 'w',
1135            currElement.addr,
1136            currElement.pc,
1137            currElement.blocksize,
1138            currElement.tick);
1139
1140    return true;
1141}
1142
1143bool
1144TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1145              Request::FlagsType flags, Addr pc)
1146{
1147
1148    // Create new request
1149    Request* req = new Request(addr, size, flags, masterID);
1150    req->setPC(pc);
1151
1152    // If this is not done it triggers assert in L1 cache for invalid contextId
1153    req->setContext(ContextID(0));
1154
1155    // Embed it in a packet
1156    PacketPtr pkt = new Packet(req, cmd);
1157
1158    uint8_t* pkt_data = new uint8_t[req->getSize()];
1159    pkt->dataDynamic(pkt_data);
1160
1161    if (cmd.isWrite()) {
1162        memset(pkt_data, 0xA, req->getSize());
1163    }
1164
1165    // Call MasterPort method to send a timing request for this packet
1166    bool success = port.sendTimingReq(pkt);
1167    if (!success) {
1168        // If it fails, save the packet to retry when a retry is signalled by
1169        // the cache
1170        retryPkt = pkt;
1171    }
1172    return success;
1173}
1174
1175void
1176TraceCPU::icacheRetryRecvd()
1177{
1178    // Schedule an event to go through the control flow in the same tick as
1179    // retry is received
1180    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1181            " event @%lli.\n", curTick());
1182    schedule(icacheNextEvent, curTick());
1183}
1184
1185void
1186TraceCPU::dcacheRetryRecvd()
1187{
1188    // Schedule an event to go through the execute flow in the same tick as
1189    // retry is received
1190    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1191            " event @%lli.\n", curTick());
1192    schedule(dcacheNextEvent, curTick());
1193}
1194
1195void
1196TraceCPU::schedDcacheNextEvent(Tick when)
1197{
1198    if (!dcacheNextEvent.scheduled()) {
1199        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1200                when);
1201        schedule(dcacheNextEvent, when);
1202        ++numSchedDcacheEvent;
1203    } else if (when < dcacheNextEvent.when()) {
1204        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1205                " to %lli.\n", dcacheNextEvent.when(), when);
1206        reschedule(dcacheNextEvent, when);
1207    }
1208
1209}
1210
1211bool
1212TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1213{
1214    // All responses on the instruction fetch side are ignored. Simply delete
1215    // the request and packet to free allocated memory
1216    delete pkt->req;
1217    delete pkt;
1218
1219    return true;
1220}
1221
1222void
1223TraceCPU::IcachePort::recvReqRetry()
1224{
1225    owner->icacheRetryRecvd();
1226}
1227
1228void
1229TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1230{
1231    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1232    dcacheGen.completeMemAccess(pkt);
1233}
1234
1235bool
1236TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1237{
1238    // Handle the responses for data memory requests which is done inside the
1239    // elastic data generator
1240    owner->dcacheRecvTimingResp(pkt);
1241    // After processing the response delete the request and packet to free
1242    // memory
1243    delete pkt->req;
1244    delete pkt;
1245
1246    return true;
1247}
1248
1249void
1250TraceCPU::DcachePort::recvReqRetry()
1251{
1252    owner->dcacheRetryRecvd();
1253}
1254
1255TraceCPU::ElasticDataGen::InputStream::InputStream(
1256    const std::string& filename,
1257    const double time_multiplier)
1258    : trace(filename),
1259      timeMultiplier(time_multiplier),
1260      microOpCount(0)
1261{
1262    // Create a protobuf message for the header and read it from the stream
1263    ProtoMessage::InstDepRecordHeader header_msg;
1264    if (!trace.read(header_msg)) {
1265        panic("Failed to read packet header from %s\n", filename);
1266
1267        if (header_msg.tick_freq() != SimClock::Frequency) {
1268            panic("Trace %s was recorded with a different tick frequency %d\n",
1269                  header_msg.tick_freq());
1270        }
1271    } else {
1272        // Assign window size equal to the field in the trace that was recorded
1273        // when the data dependency trace was captured in the o3cpu model
1274        windowSize = header_msg.window_size();
1275    }
1276}
1277
1278void
1279TraceCPU::ElasticDataGen::InputStream::reset()
1280{
1281    trace.reset();
1282}
1283
1284bool
1285TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1286{
1287    ProtoMessage::InstDepRecord pkt_msg;
1288    if (trace.read(pkt_msg)) {
1289        // Required fields
1290        element->seqNum = pkt_msg.seq_num();
1291        element->type = pkt_msg.type();
1292        // Scale the compute delay to effectively scale the Trace CPU frequency
1293        element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1294
1295        // Repeated field robDepList
1296        element->clearRobDep();
1297        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1298        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1299            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1300            element->numRobDep += 1;
1301        }
1302
1303        // Repeated field
1304        element->clearRegDep();
1305        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1306        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1307            // There is a possibility that an instruction has both, a register
1308            // and order dependency on an instruction. In such a case, the
1309            // register dependency is omitted
1310            bool duplicate = false;
1311            for (int j = 0; j < element->numRobDep; j++) {
1312                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1313            }
1314            if (!duplicate) {
1315                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1316                element->numRegDep += 1;
1317            }
1318        }
1319
1320        // Optional fields
1321        if (pkt_msg.has_p_addr())
1322            element->physAddr = pkt_msg.p_addr();
1323        else
1324            element->physAddr = 0;
1325
1326        if (pkt_msg.has_v_addr())
1327            element->virtAddr = pkt_msg.v_addr();
1328        else
1329            element->virtAddr = 0;
1330
1331        if (pkt_msg.has_asid())
1332            element->asid = pkt_msg.asid();
1333        else
1334            element->asid = 0;
1335
1336        if (pkt_msg.has_size())
1337            element->size = pkt_msg.size();
1338        else
1339            element->size = 0;
1340
1341        if (pkt_msg.has_flags())
1342            element->flags = pkt_msg.flags();
1343        else
1344            element->flags = 0;
1345
1346        if (pkt_msg.has_pc())
1347            element->pc = pkt_msg.pc();
1348        else
1349            element->pc = 0;
1350
1351        // ROB occupancy number
1352        ++microOpCount;
1353        if (pkt_msg.has_weight()) {
1354            microOpCount += pkt_msg.weight();
1355        }
1356        element->robNum = microOpCount;
1357        return true;
1358    }
1359
1360    // We have reached the end of the file
1361    return false;
1362}
1363
1364bool
1365TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1366{
1367    for (auto& own_reg_dep : regDep) {
1368        if (own_reg_dep == reg_dep) {
1369            // If register dependency is found, make it zero and return true
1370            own_reg_dep = 0;
1371            assert(numRegDep > 0);
1372            --numRegDep;
1373            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1374                    "done.\n", seqNum, reg_dep);
1375            return true;
1376        }
1377    }
1378
1379    // Return false if the dependency is not found
1380    return false;
1381}
1382
1383bool
1384TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1385{
1386    for (auto& own_rob_dep : robDep) {
1387        if (own_rob_dep == rob_dep) {
1388            // If the rob dependency is found, make it zero and return true
1389            own_rob_dep = 0;
1390            assert(numRobDep > 0);
1391            --numRobDep;
1392            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1393                "done.\n", seqNum, rob_dep);
1394            return true;
1395        }
1396    }
1397    return false;
1398}
1399
1400void
1401TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1402    for (auto& own_reg_dep : regDep) {
1403        own_reg_dep = 0;
1404    }
1405    numRegDep = 0;
1406}
1407
1408void
1409TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1410    for (auto& own_rob_dep : robDep) {
1411        own_rob_dep = 0;
1412    }
1413    numRobDep = 0;
1414}
1415
1416bool
1417TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1418{
1419    // If it is an rob dependency then remove it
1420    if (!removeRobDep(done_seq_num)) {
1421        // If it is not an rob dependency then it must be a register dependency
1422        // If the register dependency is not found, it violates an assumption
1423        // and must be caught by assert.
1424        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1425        assert(regdep_found);
1426    }
1427    // Return true if the node is dependency free
1428    return (numRobDep == 0 && numRegDep == 0);
1429}
1430
1431void
1432TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1433{
1434    DPRINTFR(TraceCPUData, "%lli", seqNum);
1435    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1436    if (isLoad() || isStore()) {
1437        DPRINTFR(TraceCPUData, ",%i", physAddr);
1438        DPRINTFR(TraceCPUData, ",%i", size);
1439        DPRINTFR(TraceCPUData, ",%i", flags);
1440    }
1441    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1442    int i = 0;
1443    DPRINTFR(TraceCPUData, "robDep:");
1444    while (robDep[i] != 0) {
1445        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1446        i++;
1447    }
1448    i = 0;
1449    DPRINTFR(TraceCPUData, "regDep:");
1450    while (regDep[i] != 0) {
1451        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1452        i++;
1453    }
1454    auto child_itr = dependents.begin();
1455    DPRINTFR(TraceCPUData, "dependents:");
1456    while (child_itr != dependents.end()) {
1457        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1458        child_itr++;
1459    }
1460
1461    DPRINTFR(TraceCPUData, "\n");
1462}
1463
1464std::string
1465TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1466{
1467    return Record::RecordType_Name(type);
1468}
1469
1470TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1471    : trace(filename)
1472{
1473    // Create a protobuf message for the header and read it from the stream
1474    ProtoMessage::PacketHeader header_msg;
1475    if (!trace.read(header_msg)) {
1476        panic("Failed to read packet header from %s\n", filename);
1477
1478        if (header_msg.tick_freq() != SimClock::Frequency) {
1479            panic("Trace %s was recorded with a different tick frequency %d\n",
1480                  header_msg.tick_freq());
1481        }
1482    }
1483}
1484
1485void
1486TraceCPU::FixedRetryGen::InputStream::reset()
1487{
1488    trace.reset();
1489}
1490
1491bool
1492TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1493{
1494    ProtoMessage::Packet pkt_msg;
1495    if (trace.read(pkt_msg)) {
1496        element->cmd = pkt_msg.cmd();
1497        element->addr = pkt_msg.addr();
1498        element->blocksize = pkt_msg.size();
1499        element->tick = pkt_msg.tick();
1500        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1501        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1502        return true;
1503    }
1504
1505    // We have reached the end of the file
1506    return false;
1507}
1508