trace_cpu.cc revision 11253
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50    :   BaseCPU(params),
51        icachePort(this),
52        dcachePort(this),
53        instMasterID(params->system->getMasterId(name() + ".inst")),
54        dataMasterID(params->system->getMasterId(name() + ".data")),
55        instTraceFile(params->instTraceFile),
56        dataTraceFile(params->dataTraceFile),
57        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59                    params->sizeROB, params->sizeStoreBuffer,
60                    params->sizeLoadBuffer),
61        icacheNextEvent(this),
62        dcacheNextEvent(this),
63        oneTraceComplete(false),
64        firstFetchTick(0),
65        execCompleteEvent(nullptr)
66{
67    // Increment static counter for number of Trace CPUs.
68    ++TraceCPU::numTraceCPUs;
69
70    // Check that the python parameters for sizes of ROB, store buffer and load
71    // buffer do not overflow the corresponding C++ variables.
72    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
73                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
74    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
75                "exceeds the max. value of %d.\n", params->sizeROB,
76                UINT16_MAX);
77    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
78                " %d exceeds the max. value of %d.\n",
79                params->sizeLoadBuffer, UINT16_MAX);
80}
81
82TraceCPU::~TraceCPU()
83{
84
85}
86
87TraceCPU*
88TraceCPUParams::create()
89{
90    return new TraceCPU(this);
91}
92
93void
94TraceCPU::takeOverFrom(BaseCPU *oldCPU)
95{
96    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
97    assert(!getInstPort().isConnected());
98    assert(oldCPU->getInstPort().isConnected());
99    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
100    oldCPU->getInstPort().unbind();
101    getInstPort().bind(inst_peer_port);
102
103    assert(!getDataPort().isConnected());
104    assert(oldCPU->getDataPort().isConnected());
105    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
106    oldCPU->getDataPort().unbind();
107    getDataPort().bind(data_peer_port);
108}
109
110void
111TraceCPU::init()
112{
113    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
114            "\n", instTraceFile);
115    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
116            dataTraceFile);
117
118    BaseCPU::init();
119
120    // Get the send tick of the first instruction read request and schedule
121    // icacheNextEvent at that tick.
122    Tick first_icache_tick = icacheGen.init();
123    schedule(icacheNextEvent, first_icache_tick);
124
125    // Get the send tick of the first data read/write request and schedule
126    // dcacheNextEvent at that tick.
127    Tick first_dcache_tick = dcacheGen.init();
128    schedule(dcacheNextEvent, first_dcache_tick);
129
130    // The static counter for number of Trace CPUs is correctly set at this
131    // point so create an event and pass it.
132    execCompleteEvent = new CountedExitEvent("end of all traces reached.",
133                                                numTraceCPUs);
134    // Save the first fetch request tick to dump it as tickOffset
135    firstFetchTick = first_icache_tick;
136}
137
138void
139TraceCPU::schedIcacheNext()
140{
141    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
142
143    // Try to send the current packet or a retry packet if there is one
144    bool sched_next = icacheGen.tryNext();
145    // If packet sent successfully, schedule next event
146    if (sched_next) {
147        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
148                "at %d.\n", curTick() + icacheGen.tickDelta());
149        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
150        ++numSchedIcacheEvent;
151    } else {
152        // check if traceComplete. If not, do nothing because sending failed
153        // and next event will be scheduled via RecvRetry()
154        if (icacheGen.isTraceComplete()) {
155            // If this is the first trace to complete, set the variable. If it
156            // is already set then both traces are complete to exit sim.
157            checkAndSchedExitEvent();
158        }
159    }
160    return;
161}
162
163void
164TraceCPU::schedDcacheNext()
165{
166    DPRINTF(TraceCPUData, "DcacheGen event.\n");
167
168    dcacheGen.execute();
169    if (dcacheGen.isExecComplete()) {
170        checkAndSchedExitEvent();
171    }
172}
173
174void
175TraceCPU::checkAndSchedExitEvent()
176{
177    if (!oneTraceComplete) {
178        oneTraceComplete = true;
179    } else {
180        // Schedule event to indicate execution is complete as both
181        // instruction and data access traces have been played back.
182        inform("%s: Execution complete.\n", name());
183
184        // Record stats which are computed at the end of simulation
185        tickOffset = firstFetchTick;
186        numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
187        numOps = dcacheGen.getMicroOpCount();
188        schedule(*execCompleteEvent, curTick());
189    }
190}
191
192void
193TraceCPU::regStats()
194{
195
196    BaseCPU::regStats();
197
198    numSchedDcacheEvent
199    .name(name() + ".numSchedDcacheEvent")
200    .desc("Number of events scheduled to trigger data request generator")
201    ;
202
203    numSchedIcacheEvent
204    .name(name() + ".numSchedIcacheEvent")
205    .desc("Number of events scheduled to trigger instruction request generator")
206    ;
207
208    numOps
209    .name(name() + ".numOps")
210    .desc("Number of micro-ops simulated by the Trace CPU")
211    ;
212
213    cpi
214    .name(name() + ".cpi")
215    .desc("Cycles per micro-op used as a proxy for CPI")
216    .precision(6)
217    ;
218    cpi = numCycles/numOps;
219
220    tickOffset
221    .name(name() + ".tickOffset")
222    .desc("The first execution tick for the root node of elastic traces")
223    ;
224
225    icacheGen.regStats();
226    dcacheGen.regStats();
227}
228
229void
230TraceCPU::ElasticDataGen::regStats()
231{
232    using namespace Stats;
233
234    maxDependents
235    .name(name() + ".maxDependents")
236    .desc("Max number of dependents observed on a node")
237    ;
238
239    maxReadyListSize
240    .name(name() + ".maxReadyListSize")
241    .desc("Max size of the ready list observed")
242    ;
243
244    numSendAttempted
245    .name(name() + ".numSendAttempted")
246    .desc("Number of first attempts to send a request")
247    ;
248
249    numSendSucceeded
250    .name(name() + ".numSendSucceeded")
251    .desc("Number of successful first attempts")
252    ;
253
254    numSendFailed
255    .name(name() + ".numSendFailed")
256    .desc("Number of failed first attempts")
257    ;
258
259    numRetrySucceeded
260    .name(name() + ".numRetrySucceeded")
261    .desc("Number of successful retries")
262    ;
263
264    numSplitReqs
265    .name(name() + ".numSplitReqs")
266    .desc("Number of split requests")
267    ;
268
269    numSOLoads
270    .name(name() + ".numSOLoads")
271    .desc("Number of strictly ordered loads")
272    ;
273
274    numSOStores
275    .name(name() + ".numSOStores")
276    .desc("Number of strictly ordered stores")
277    ;
278
279    dataLastTick
280    .name(name() + ".dataLastTick")
281    .desc("Last tick simulated from the elastic data trace")
282    ;
283}
284
285Tick
286TraceCPU::ElasticDataGen::init()
287{
288    DPRINTF(TraceCPUData, "Initializing data memory request generator "
289            "DcacheGen: elastic issue with retry.\n");
290
291    if (!readNextWindow())
292        panic("Trace has %d elements. It must have at least %d elements.\n",
293              depGraph.size(), 2 * windowSize);
294    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
295            depGraph.size());
296
297    if (!readNextWindow())
298        panic("Trace has %d elements. It must have at least %d elements.\n",
299              depGraph.size(), 2 * windowSize);
300    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
301            depGraph.size());
302
303    // Print readyList
304    if (DTRACE(TraceCPUData)) {
305        printReadyList();
306    }
307    auto free_itr = readyList.begin();
308    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
309            " is %d.\n", free_itr->seqNum, free_itr->execTick);
310    // Return the execute tick of the earliest ready node so that an event
311    // can be scheduled to call execute()
312    return (free_itr->execTick);
313}
314
315void
316TraceCPU::ElasticDataGen::exit()
317{
318    trace.reset();
319}
320
321bool
322TraceCPU::ElasticDataGen::readNextWindow()
323{
324
325    // Read and add next window
326    DPRINTF(TraceCPUData, "Reading next window from file.\n");
327
328    if (traceComplete) {
329        // We are at the end of the file, thus we have no more records.
330        // Return false.
331        return false;
332    }
333
334    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
335            depGraph.size());
336
337    uint32_t num_read = 0;
338    while (num_read != windowSize) {
339
340        // Create a new graph node
341        GraphNode* new_node = new GraphNode;
342
343        // Read the next line to get the next record. If that fails then end of
344        // trace has been reached and traceComplete needs to be set in addition
345        // to returning false.
346        if (!trace.read(new_node)) {
347            DPRINTF(TraceCPUData, "\tTrace complete!\n");
348            traceComplete = true;
349            return false;
350        }
351
352        // Annotate the ROB dependencies of the new node onto the parent nodes.
353        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
354        // Annotate the register dependencies of the new node onto the parent
355        // nodes.
356        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
357
358        num_read++;
359        // Add to map
360        depGraph[new_node->seqNum] = new_node;
361        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
362            // Source dependencies are already complete, check if resources
363            // are available and issue. The execution time is approximated
364            // to current time plus the computational delay.
365            checkAndIssue(new_node);
366        }
367    }
368
369    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
370            depGraph.size());
371    return true;
372}
373
374template<typename T> void
375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
376                                            T& dep_array, uint8_t& num_dep)
377{
378    for (auto& a_dep : dep_array) {
379        // The convention is to set the dependencies starting with the first
380        // index in the ROB and register dependency arrays. Thus, when we reach
381        // a dependency equal to the initialisation value of zero, we know have
382        // iterated over all dependencies and can break.
383        if (a_dep == 0)
384            break;
385        // We look up the valid dependency, i.e. the parent of this node
386        auto parent_itr = depGraph.find(a_dep);
387        if (parent_itr != depGraph.end()) {
388            // If the parent is found, it is yet to be executed. Append a
389            // pointer to the new node to the dependents list of the parent
390            // node.
391            parent_itr->second->dependents.push_back(new_node);
392            auto num_depts = parent_itr->second->dependents.size();
393            maxDependents = std::max<double>(num_depts, maxDependents.value());
394        } else {
395            // The dependency is not found in the graph. So consider
396            // the execution of the parent is complete, i.e. remove this
397            // dependency.
398            a_dep = 0;
399            num_dep--;
400        }
401    }
402}
403
404void
405TraceCPU::ElasticDataGen::execute()
406{
407    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
408    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
409            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
410            depFreeQueue.size());
411    hwResource.printOccupancy();
412
413    // Read next window to make sure that dependents of all dep-free nodes
414    // are in the depGraph
415    if (nextRead) {
416        readNextWindow();
417        nextRead = false;
418    }
419
420    // First attempt to issue the pending dependency-free nodes held
421    // in depFreeQueue. If resources have become available for a node,
422    // then issue it, i.e. add the node to readyList.
423    while (!depFreeQueue.empty()) {
424        if (checkAndIssue(depFreeQueue.front(), false)) {
425            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
426                "%lli.\n", (depFreeQueue.front())->seqNum);
427            depFreeQueue.pop();
428        } else {
429            break;
430        }
431    }
432    // Proceed to execute from readyList
433    auto graph_itr = depGraph.begin();
434    auto free_itr = readyList.begin();
435    // Iterate through readyList until the next free node has its execute
436    // tick later than curTick or the end of readyList is reached
437    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
438
439        // Get pointer to the node to be executed
440        graph_itr = depGraph.find(free_itr->seqNum);
441        assert(graph_itr != depGraph.end());
442        GraphNode* node_ptr = graph_itr->second;
443
444        // If there is a retryPkt send that else execute the load
445        if (retryPkt) {
446            // The retryPkt must be the request that was created by the
447            // first node in the readyList.
448            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
449                panic("Retry packet's seqence number does not match "
450                      "the first node in the readyList.\n");
451            }
452            if (port.sendTimingReq(retryPkt)) {
453                ++numRetrySucceeded;
454                retryPkt = nullptr;
455            }
456        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
457            // If there is no retryPkt, attempt to send a memory request in
458            // case of a load or store node. If the send fails, executeMemReq()
459            // returns a packet pointer, which we save in retryPkt. In case of
460            // a comp node we don't do anything and simply continue as if the
461            // execution of the comp node succedded.
462            retryPkt = executeMemReq(node_ptr);
463        }
464        // If the retryPkt or a new load/store node failed, we exit from here
465        // as a retry from cache will bring the control to execute(). The
466        // first node in readyList then, will be the failed node.
467        if (retryPkt) {
468            break;
469        }
470
471        // Proceed to remove dependencies for the successfully executed node.
472        // If it is a load which is not strictly ordered and we sent a
473        // request for it successfully, we do not yet mark any register
474        // dependencies complete. But as per dependency modelling we need
475        // to mark ROB dependencies of load and non load/store nodes which
476        // are based on successful sending of the load as complete.
477        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
478            // If execute succeeded mark its dependents as complete
479            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
480                    "dependents..\n", node_ptr->seqNum);
481
482            auto child_itr = (node_ptr->dependents).begin();
483            while (child_itr != (node_ptr->dependents).end()) {
484                // ROB dependency of a store on a load must not be removed
485                // after load is sent but after response is received
486                if (!(*child_itr)->isStore() &&
487                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
488
489                    // Check if the child node has become dependency free
490                    if ((*child_itr)->numRobDep == 0 &&
491                        (*child_itr)->numRegDep == 0) {
492
493                        // Source dependencies are complete, check if
494                        // resources are available and issue
495                        checkAndIssue(*child_itr);
496                    }
497                    // Remove this child for the sent load and point to new
498                    // location of the element following the erased element
499                    child_itr = node_ptr->dependents.erase(child_itr);
500                } else {
501                    // This child is not dependency-free, point to the next
502                    // child
503                    child_itr++;
504                }
505            }
506        } else {
507            // If it is a strictly ordered load mark its dependents as complete
508            // as we do not send a request for this case. If it is a store or a
509            // comp node we also mark all its dependents complete.
510            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
511                    " up dependents..\n", node_ptr->seqNum);
512
513            for (auto child : node_ptr->dependents) {
514                // If the child node is dependency free removeDepOnInst()
515                // returns true.
516                if (child->removeDepOnInst(node_ptr->seqNum)) {
517                    // Source dependencies are complete, check if resources
518                    // are available and issue
519                    checkAndIssue(child);
520                }
521            }
522        }
523
524        // After executing the node, remove from readyList and delete node.
525        readyList.erase(free_itr);
526        // If it is a cacheable load which was sent, don't delete
527        // just yet.  Delete it in completeMemAccess() after the
528        // response is received. If it is an strictly ordered
529        // load, it was not sent and all dependencies were simply
530        // marked complete. Thus it is safe to delete it. For
531        // stores and non load/store nodes all dependencies were
532        // marked complete so it is safe to delete it.
533        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
534            // Release all resources occupied by the completed node
535            hwResource.release(node_ptr);
536            // clear the dynamically allocated set of dependents
537            (node_ptr->dependents).clear();
538            // delete node
539            delete node_ptr;
540            // remove from graph
541            depGraph.erase(graph_itr);
542        }
543        // Point to first node to continue to next iteration of while loop
544        free_itr = readyList.begin();
545    } // end of while loop
546
547    // Print readyList, sizes of queues and resource status after updating
548    if (DTRACE(TraceCPUData)) {
549        printReadyList();
550        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
551        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
552                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
553                depFreeQueue.size());
554        hwResource.printOccupancy();
555    }
556
557    if (retryPkt) {
558        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
559                "event from the cache for seq. num %lli.\n",
560                retryPkt->req->getReqInstSeqNum());
561        return;
562    }
563    // If the size of the dependency graph is less than the dependency window
564    // then read from the trace file to populate the graph next time we are in
565    // execute.
566    if (depGraph.size() < windowSize && !traceComplete)
567        nextRead = true;
568
569    // If cache is not blocked, schedule an event for the first execTick in
570    // readyList else retry from cache will schedule the event. If the ready
571    // list is empty then check if the next pending node has resources
572    // available to issue. If yes, then schedule an event for the next cycle.
573    if (!readyList.empty()) {
574        Tick next_event_tick = std::max(readyList.begin()->execTick,
575                                        curTick());
576        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
577                next_event_tick);
578        owner.schedDcacheNextEvent(next_event_tick);
579    } else if (readyList.empty() && !depFreeQueue.empty() &&
580                hwResource.isAvailable(depFreeQueue.front())) {
581        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
582                owner.clockEdge(Cycles(1)));
583        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
584    }
585
586    // If trace is completely read, readyList is empty and depGraph is empty,
587    // set execComplete to true
588    if (depGraph.empty() && readyList.empty() && traceComplete &&
589        !hwResource.awaitingResponse()) {
590        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
591        execComplete = true;
592        dataLastTick = curTick();
593    }
594}
595
596PacketPtr
597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
598{
599
600    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
601            "virt addr %d, pc %#x, size %d, flags %d).\n",
602            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
603            node_ptr->pc, node_ptr->size, node_ptr->flags);
604
605    // If the request is strictly ordered, do not send it. Just return nullptr
606    // as if it was succesfully sent.
607    if (node_ptr->isStrictlyOrdered()) {
608        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
609        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
610                node_ptr->seqNum);
611        return nullptr;
612    }
613
614    // Check if the request spans two cache lines as this condition triggers
615    // an assert fail in the L1 cache. If it does then truncate the size to
616    // access only until the end of that line and ignore the remainder. The
617    // stat counting this is useful to keep a check on how frequently this
618    // happens. If required the code could be revised to mimick splitting such
619    // a request into two.
620    unsigned blk_size = owner.cacheLineSize();
621    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
622    if (!(blk_offset + node_ptr->size <= blk_size)) {
623        node_ptr->size = blk_size - blk_offset;
624        ++numSplitReqs;
625    }
626
627    // Create a request and the packet containing request
628    Request* req = new Request(node_ptr->physAddr, node_ptr->size,
629                               node_ptr->flags, masterID, node_ptr->seqNum,
630                               ContextID(0), ThreadID(0));
631    req->setPC(node_ptr->pc);
632    // If virtual address is valid, set the asid and virtual address fields
633    // of the request.
634    if (node_ptr->virtAddr != 0) {
635        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
636                        node_ptr->flags, masterID, node_ptr->pc);
637        req->setPaddr(node_ptr->physAddr);
638        req->setReqInstSeqNum(node_ptr->seqNum);
639    }
640
641    PacketPtr pkt;
642    uint8_t* pkt_data = new uint8_t[req->getSize()];
643    if (node_ptr->isLoad()) {
644        pkt = Packet::createRead(req);
645    } else {
646        pkt = Packet::createWrite(req);
647        memset(pkt_data, 0xA, req->getSize());
648    }
649    pkt->dataDynamic(pkt_data);
650
651    // Call MasterPort method to send a timing request for this packet
652    bool success = port.sendTimingReq(pkt);
653    ++numSendAttempted;
654
655    if (!success) {
656        // If it fails, return the packet to retry when a retry is signalled by
657        // the cache
658        ++numSendFailed;
659        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
660        return pkt;
661    } else {
662        // It is succeeds, return nullptr
663        ++numSendSucceeded;
664        return nullptr;
665    }
666}
667
668bool
669TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
670{
671    // Assert the node is dependency-free
672    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
673
674    // If this is the first attempt, print a debug message to indicate this.
675    if (first) {
676        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
677            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
678            node_ptr->robNum);
679    }
680
681    // Check if resources are available to issue the specific node
682    if (hwResource.isAvailable(node_ptr)) {
683        // If resources are free only then add to readyList
684        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
685            " to readyList, occupying resources.\n", node_ptr->seqNum);
686        // Compute the execute tick by adding the compute delay for the node
687        // and add the ready node to the ready list
688        addToSortedReadyList(node_ptr->seqNum,
689                                owner.clockEdge() + node_ptr->compDelay);
690        // Account for the resources taken up by this issued node.
691        hwResource.occupy(node_ptr);
692        return true;
693
694    } else {
695        if (first) {
696            // Although dependencies are complete, resources are not available.
697            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
698                " Adding to depFreeQueue.\n", node_ptr->seqNum);
699            depFreeQueue.push(node_ptr);
700        } else {
701            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
702                "Still pending issue.\n", node_ptr->seqNum);
703        }
704        return false;
705    }
706}
707
708void
709TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
710{
711    // Release the resources for this completed node.
712    if (pkt->isWrite()) {
713        // Consider store complete.
714        hwResource.releaseStoreBuffer();
715        // If it is a store response then do nothing since we do not model
716        // dependencies on store completion in the trace. But if we were
717        // blocking execution due to store buffer fullness, we need to schedule
718        // an event and attempt to progress.
719    } else {
720        // If it is a load response then release the dependents waiting on it.
721        // Get pointer to the completed load
722        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
723        assert(graph_itr != depGraph.end());
724        GraphNode* node_ptr = graph_itr->second;
725
726        // Release resources occupied by the load
727        hwResource.release(node_ptr);
728
729        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
730                " dependents..\n", node_ptr->seqNum);
731
732        for (auto child : node_ptr->dependents) {
733            if (child->removeDepOnInst(node_ptr->seqNum)) {
734                checkAndIssue(child);
735            }
736        }
737
738        // clear the dynamically allocated set of dependents
739        (node_ptr->dependents).clear();
740        // delete node
741        delete node_ptr;
742        // remove from graph
743        depGraph.erase(graph_itr);
744    }
745
746    if (DTRACE(TraceCPUData)) {
747        printReadyList();
748    }
749
750    // If the size of the dependency graph is less than the dependency window
751    // then read from the trace file to populate the graph next time we are in
752    // execute.
753    if (depGraph.size() < windowSize && !traceComplete)
754        nextRead = true;
755
756    // If not waiting for retry, attempt to schedule next event
757    if (!retryPkt) {
758        // We might have new dep-free nodes in the list which will have execute
759        // tick greater than or equal to curTick. But a new dep-free node might
760        // have its execute tick earlier. Therefore, attempt to reschedule. It
761        // could happen that the readyList is empty and we got here via a
762        // last remaining response. So, either the trace is complete or there
763        // are pending nodes in the depFreeQueue. The checking is done in the
764        // execute() control flow, so schedule an event to go via that flow.
765        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
766            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
767        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
768                next_event_tick);
769        owner.schedDcacheNextEvent(next_event_tick);
770    }
771}
772
773void
774TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
775                                                    Tick exec_tick)
776{
777    ReadyNode ready_node;
778    ready_node.seqNum = seq_num;
779    ready_node.execTick = exec_tick;
780
781    // Iterator to readyList
782    auto itr = readyList.begin();
783
784    // If the readyList is empty, simply insert the new node at the beginning
785    // and return
786    if (itr == readyList.end()) {
787        readyList.insert(itr, ready_node);
788        maxReadyListSize = std::max<double>(readyList.size(),
789                                              maxReadyListSize.value());
790        return;
791    }
792
793    // If the new node has its execution tick equal to the first node in the
794    // list then go to the next node. If the first node in the list failed
795    // to execute, its position as the first is thus maintained.
796    if (retryPkt)
797        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
798            itr++;
799
800    // Increment the iterator and compare the node pointed to by it to the new
801    // node till the position to insert the new node is found.
802    bool found = false;
803    while (!found && itr != readyList.end()) {
804        // If the execution tick of the new node is less than the node then
805        // this is the position to insert
806        if (exec_tick < itr->execTick)
807            found = true;
808        // If the execution tick of the new node is equal to the node then
809        // sort in ascending order of sequence numbers
810        else if (exec_tick == itr->execTick) {
811            // If the sequence number of the new node is less than the node
812            // then this is the position to insert
813            if (seq_num < itr->seqNum)
814                found = true;
815            // Else go to next node
816            else
817                itr++;
818        }
819        // If the execution tick of the new node is greater than the node then
820        // go to the next node
821        else
822            itr++;
823    }
824    readyList.insert(itr, ready_node);
825    // Update the stat for max size reached of the readyList
826    maxReadyListSize = std::max<double>(readyList.size(),
827                                          maxReadyListSize.value());
828}
829
830void
831TraceCPU::ElasticDataGen::printReadyList() {
832
833    auto itr = readyList.begin();
834    if (itr == readyList.end()) {
835        DPRINTF(TraceCPUData, "readyList is empty.\n");
836        return;
837    }
838    DPRINTF(TraceCPUData, "Printing readyList:\n");
839    while (itr != readyList.end()) {
840        auto graph_itr = depGraph.find(itr->seqNum);
841        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
842        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
843            node_ptr->typeToStr(), itr->execTick);
844        itr++;
845    }
846}
847
848TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
849    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
850  : sizeROB(max_rob),
851    sizeStoreBuffer(max_stores),
852    sizeLoadBuffer(max_loads),
853    oldestInFlightRobNum(UINT64_MAX),
854    numInFlightLoads(0),
855    numInFlightStores(0)
856{}
857
858void
859TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
860{
861    // Occupy ROB entry for the issued node
862    // Merely maintain the oldest node, i.e. numerically least robNum by saving
863    // it in the variable oldestInFLightRobNum.
864    inFlightNodes[new_node->seqNum] = new_node->robNum;
865    oldestInFlightRobNum = inFlightNodes.begin()->second;
866
867    // Occupy Load/Store Buffer entry for the issued node if applicable
868    if (new_node->isLoad()) {
869        ++numInFlightLoads;
870    } else if (new_node->isStore()) {
871        ++numInFlightStores;
872    } // else if it is a non load/store node, no buffer entry is occupied
873
874    printOccupancy();
875}
876
877void
878TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
879{
880    assert(!inFlightNodes.empty());
881    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
882        done_node->seqNum);
883
884    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
885    inFlightNodes.erase(done_node->seqNum);
886
887    if (inFlightNodes.empty()) {
888        // If we delete the only in-flight node and then the
889        // oldestInFlightRobNum is set to it's initialized (max) value.
890        oldestInFlightRobNum = UINT64_MAX;
891    } else {
892        // Set the oldest in-flight node rob number equal to the first node in
893        // the inFlightNodes since that will have the numerically least value.
894        oldestInFlightRobNum = inFlightNodes.begin()->second;
895    }
896
897    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
898        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
899        oldestInFlightRobNum);
900
901    // A store is considered complete when a request is sent, thus ROB entry is
902    // freed. But it occupies an entry in the Store Buffer until its response
903    // is received. A load is considered complete when a response is received,
904    // thus both ROB and Load Buffer entries can be released.
905    if (done_node->isLoad()) {
906        assert(numInFlightLoads != 0);
907        --numInFlightLoads;
908    }
909    // For normal writes, we send the requests out and clear a store buffer
910    // entry on response. For writes which are strictly ordered, for e.g.
911    // writes to device registers, we do that within release() which is called
912    // when node is executed and taken off from readyList.
913    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
914        releaseStoreBuffer();
915    }
916}
917
918void
919TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
920{
921    assert(numInFlightStores != 0);
922    --numInFlightStores;
923}
924
925bool
926TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
927    const GraphNode* new_node) const
928{
929    uint16_t num_in_flight_nodes;
930    if (inFlightNodes.empty()) {
931        num_in_flight_nodes = 0;
932        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
933            " #in-flight nodes = 0", new_node->seqNum);
934    } else if (new_node->robNum > oldestInFlightRobNum) {
935        // This is the intuitive case where new dep-free node is younger
936        // instruction than the oldest instruction in-flight. Thus we make sure
937        // in_flight_nodes does not overflow.
938        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
939        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
940            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
941             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
942    } else {
943        // This is the case where an instruction older than the oldest in-
944        // flight instruction becomes dep-free. Thus we must have already
945        // accounted for the entry in ROB for this new dep-free node.
946        // Immediately after this check returns true, oldestInFlightRobNum will
947        // be updated in occupy(). We simply let this node issue now.
948        num_in_flight_nodes = 0;
949        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
950            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
951            new_node->seqNum, new_node->robNum);
952    }
953    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
954        numInFlightLoads, sizeLoadBuffer,
955        numInFlightStores, sizeStoreBuffer);
956    // Check if resources are available to issue the specific node
957    if (num_in_flight_nodes >= sizeROB) {
958        return false;
959    }
960    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
961        return false;
962    }
963    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
964        return false;
965    }
966    return true;
967}
968
969bool
970TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
971    // Return true if there is at least one read or write request in flight
972    return (numInFlightStores != 0 || numInFlightLoads != 0);
973}
974
975void
976TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
977    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
978            "LQ = %d/%d, SQ  = %d/%d.\n",
979            oldestInFlightRobNum,
980            numInFlightLoads, sizeLoadBuffer,
981            numInFlightStores, sizeStoreBuffer);
982}
983
984void
985TraceCPU::FixedRetryGen::regStats()
986{
987    using namespace Stats;
988
989    numSendAttempted
990    .name(name() + ".numSendAttempted")
991    .desc("Number of first attempts to send a request")
992    ;
993
994    numSendSucceeded
995    .name(name() + ".numSendSucceeded")
996    .desc("Number of successful first attempts")
997    ;
998
999    numSendFailed
1000    .name(name() + ".numSendFailed")
1001    .desc("Number of failed first attempts")
1002    ;
1003
1004    numRetrySucceeded
1005    .name(name() + ".numRetrySucceeded")
1006    .desc("Number of successful retries")
1007    ;
1008
1009    instLastTick
1010    .name(name() + ".instLastTick")
1011    .desc("Last tick simulated from the fixed inst trace")
1012    ;
1013}
1014
1015Tick
1016TraceCPU::FixedRetryGen::init()
1017{
1018    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1019            " IcacheGen: fixed issue with retry.\n");
1020
1021    if (nextExecute()) {
1022        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1023        return currElement.tick;
1024    } else {
1025        panic("Read of first message in the trace failed.\n");
1026        return MaxTick;
1027    }
1028}
1029
1030bool
1031TraceCPU::FixedRetryGen::tryNext()
1032{
1033    // If there is a retry packet, try to send it
1034    if (retryPkt) {
1035
1036        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1037
1038        if (!port.sendTimingReq(retryPkt)) {
1039            // Still blocked! This should never occur.
1040            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1041            return false;
1042        }
1043        ++numRetrySucceeded;
1044    } else {
1045
1046        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1047
1048        // try sending current element
1049        assert(currElement.isValid());
1050
1051        ++numSendAttempted;
1052
1053        if (!send(currElement.addr, currElement.blocksize,
1054                    currElement.cmd, currElement.flags, currElement.pc)) {
1055            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1056            ++numSendFailed;
1057            // return false to indicate not to schedule next event
1058            return false;
1059        } else {
1060            ++numSendSucceeded;
1061        }
1062    }
1063    // If packet was sent successfully, either retryPkt or currElement, return
1064    // true to indicate to schedule event at current Tick plus delta. If packet
1065    // was sent successfully and there is no next packet to send, return false.
1066    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1067        "element.\n");
1068    retryPkt = nullptr;
1069    // Read next element into currElement, currElement gets cleared so save the
1070    // tick to calculate delta
1071    Tick last_tick = currElement.tick;
1072    if (nextExecute()) {
1073        assert(currElement.tick >= last_tick);
1074        delta = currElement.tick - last_tick;
1075    }
1076    return !traceComplete;
1077}
1078
1079void
1080TraceCPU::FixedRetryGen::exit()
1081{
1082    trace.reset();
1083}
1084
1085bool
1086TraceCPU::FixedRetryGen::nextExecute()
1087{
1088    if (traceComplete)
1089        // We are at the end of the file, thus we have no more messages.
1090        // Return false.
1091        return false;
1092
1093
1094    //Reset the currElement to the default values
1095    currElement.clear();
1096
1097    // Read the next line to get the next message. If that fails then end of
1098    // trace has been reached and traceComplete needs to be set in addition
1099    // to returning false. If successful then next message is in currElement.
1100    if (!trace.read(&currElement)) {
1101        traceComplete = true;
1102        instLastTick = curTick();
1103        return false;
1104    }
1105
1106    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1107            currElement.cmd.isRead() ? 'r' : 'w',
1108            currElement.addr,
1109            currElement.pc,
1110            currElement.blocksize,
1111            currElement.tick);
1112
1113    return true;
1114}
1115
1116bool
1117TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1118              Request::FlagsType flags, Addr pc)
1119{
1120
1121    // Create new request
1122    Request* req = new Request(addr, size, flags, masterID);
1123    req->setPC(pc);
1124
1125    // If this is not done it triggers assert in L1 cache for invalid contextId
1126    req->setThreadContext(ContextID(0), ThreadID(0));
1127
1128    // Embed it in a packet
1129    PacketPtr pkt = new Packet(req, cmd);
1130
1131    uint8_t* pkt_data = new uint8_t[req->getSize()];
1132    pkt->dataDynamic(pkt_data);
1133
1134    if (cmd.isWrite()) {
1135        memset(pkt_data, 0xA, req->getSize());
1136    }
1137
1138    // Call MasterPort method to send a timing request for this packet
1139    bool success = port.sendTimingReq(pkt);
1140    if (!success) {
1141        // If it fails, save the packet to retry when a retry is signalled by
1142        // the cache
1143        retryPkt = pkt;
1144    }
1145    return success;
1146}
1147
1148void
1149TraceCPU::icacheRetryRecvd()
1150{
1151    // Schedule an event to go through the control flow in the same tick as
1152    // retry is received
1153    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1154            " event @%lli.\n", curTick());
1155    schedule(icacheNextEvent, curTick());
1156}
1157
1158void
1159TraceCPU::dcacheRetryRecvd()
1160{
1161    // Schedule an event to go through the execute flow in the same tick as
1162    // retry is received
1163    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1164            " event @%lli.\n", curTick());
1165    schedule(dcacheNextEvent, curTick());
1166}
1167
1168void
1169TraceCPU::schedDcacheNextEvent(Tick when)
1170{
1171    if (!dcacheNextEvent.scheduled()) {
1172        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1173                when);
1174        schedule(dcacheNextEvent, when);
1175        ++numSchedDcacheEvent;
1176    } else if (when < dcacheNextEvent.when()) {
1177        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1178                " to %lli.\n", dcacheNextEvent.when(), when);
1179        reschedule(dcacheNextEvent, when);
1180    }
1181
1182}
1183
1184bool
1185TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1186{
1187    // All responses on the instruction fetch side are ignored. Simply delete
1188    // the request and packet to free allocated memory
1189    delete pkt->req;
1190    delete pkt;
1191
1192    return true;
1193}
1194
1195void
1196TraceCPU::IcachePort::recvReqRetry()
1197{
1198    owner->icacheRetryRecvd();
1199}
1200
1201void
1202TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1203{
1204    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1205    dcacheGen.completeMemAccess(pkt);
1206}
1207
1208bool
1209TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1210{
1211    // Handle the responses for data memory requests which is done inside the
1212    // elastic data generator
1213    owner->dcacheRecvTimingResp(pkt);
1214    // After processing the response delete the request and packet to free
1215    // memory
1216    delete pkt->req;
1217    delete pkt;
1218
1219    return true;
1220}
1221
1222void
1223TraceCPU::DcachePort::recvReqRetry()
1224{
1225    owner->dcacheRetryRecvd();
1226}
1227
1228TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename)
1229    : trace(filename),
1230      microOpCount(0)
1231{
1232    // Create a protobuf message for the header and read it from the stream
1233    ProtoMessage::InstDepRecordHeader header_msg;
1234    if (!trace.read(header_msg)) {
1235        panic("Failed to read packet header from %s\n", filename);
1236
1237        if (header_msg.tick_freq() != SimClock::Frequency) {
1238            panic("Trace %s was recorded with a different tick frequency %d\n",
1239                  header_msg.tick_freq());
1240        }
1241    } else {
1242        // Assign window size equal to the field in the trace that was recorded
1243        // when the data dependency trace was captured in the o3cpu model
1244        windowSize = header_msg.window_size();
1245    }
1246}
1247
1248void
1249TraceCPU::ElasticDataGen::InputStream::reset()
1250{
1251    trace.reset();
1252}
1253
1254bool
1255TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1256{
1257    ProtoMessage::InstDepRecord pkt_msg;
1258    if (trace.read(pkt_msg)) {
1259        // Required fields
1260        element->seqNum = pkt_msg.seq_num();
1261        element->type = pkt_msg.type();
1262        element->compDelay = pkt_msg.comp_delay();
1263
1264        // Repeated field robDepList
1265        element->clearRobDep();
1266        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1267        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1268            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1269            element->numRobDep += 1;
1270        }
1271
1272        // Repeated field
1273        element->clearRegDep();
1274        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1275        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1276            // There is a possibility that an instruction has both, a register
1277            // and order dependency on an instruction. In such a case, the
1278            // register dependency is omitted
1279            bool duplicate = false;
1280            for (int j = 0; j < element->numRobDep; j++) {
1281                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1282            }
1283            if (!duplicate) {
1284                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1285                element->numRegDep += 1;
1286            }
1287        }
1288
1289        // Optional fields
1290        if (pkt_msg.has_p_addr())
1291            element->physAddr = pkt_msg.p_addr();
1292        else
1293            element->physAddr = 0;
1294
1295        if (pkt_msg.has_v_addr())
1296            element->virtAddr = pkt_msg.v_addr();
1297        else
1298            element->virtAddr = 0;
1299
1300        if (pkt_msg.has_asid())
1301            element->asid = pkt_msg.asid();
1302        else
1303            element->asid = 0;
1304
1305        if (pkt_msg.has_size())
1306            element->size = pkt_msg.size();
1307        else
1308            element->size = 0;
1309
1310        if (pkt_msg.has_flags())
1311            element->flags = pkt_msg.flags();
1312        else
1313            element->flags = 0;
1314
1315        if (pkt_msg.has_pc())
1316            element->pc = pkt_msg.pc();
1317        else
1318            element->pc = 0;
1319
1320        // ROB occupancy number
1321        ++microOpCount;
1322        if (pkt_msg.has_weight()) {
1323            microOpCount += pkt_msg.weight();
1324        }
1325        element->robNum = microOpCount;
1326        return true;
1327    }
1328
1329    // We have reached the end of the file
1330    return false;
1331}
1332
1333bool
1334TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1335{
1336    for (auto& own_reg_dep : regDep) {
1337        if (own_reg_dep == reg_dep) {
1338            // If register dependency is found, make it zero and return true
1339            own_reg_dep = 0;
1340            --numRegDep;
1341            assert(numRegDep >= 0);
1342            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1343                    "done.\n", seqNum, reg_dep);
1344            return true;
1345        }
1346    }
1347
1348    // Return false if the dependency is not found
1349    return false;
1350}
1351
1352bool
1353TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1354{
1355    for (auto& own_rob_dep : robDep) {
1356        if (own_rob_dep == rob_dep) {
1357            // If the rob dependency is found, make it zero and return true
1358            own_rob_dep = 0;
1359            --numRobDep;
1360            assert(numRobDep >= 0);
1361            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1362                "done.\n", seqNum, rob_dep);
1363            return true;
1364        }
1365    }
1366    return false;
1367}
1368
1369void
1370TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1371    for (auto& own_reg_dep : regDep) {
1372        own_reg_dep = 0;
1373    }
1374    numRegDep = 0;
1375}
1376
1377void
1378TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1379    for (auto& own_rob_dep : robDep) {
1380        own_rob_dep = 0;
1381    }
1382    numRobDep = 0;
1383}
1384
1385bool
1386TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1387{
1388    // If it is an rob dependency then remove it
1389    if (!removeRobDep(done_seq_num)) {
1390        // If it is not an rob dependency then it must be a register dependency
1391        // If the register dependency is not found, it violates an assumption
1392        // and must be caught by assert.
1393        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1394        assert(regdep_found);
1395    }
1396    // Return true if the node is dependency free
1397    return (numRobDep == 0 && numRegDep == 0);
1398}
1399
1400void
1401TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1402{
1403    DPRINTFR(TraceCPUData, "%lli", seqNum);
1404    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1405    if (isLoad() || isStore()) {
1406        DPRINTFR(TraceCPUData, ",%i", physAddr);
1407        DPRINTFR(TraceCPUData, ",%i", size);
1408        DPRINTFR(TraceCPUData, ",%i", flags);
1409    }
1410    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1411    int i = 0;
1412    DPRINTFR(TraceCPUData, "robDep:");
1413    while (robDep[i] != 0) {
1414        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1415        i++;
1416    }
1417    i = 0;
1418    DPRINTFR(TraceCPUData, "regDep:");
1419    while (regDep[i] != 0) {
1420        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1421        i++;
1422    }
1423    auto child_itr = dependents.begin();
1424    DPRINTFR(TraceCPUData, "dependents:");
1425    while (child_itr != dependents.end()) {
1426        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1427        child_itr++;
1428    }
1429
1430    DPRINTFR(TraceCPUData, "\n");
1431}
1432
1433std::string
1434TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1435{
1436    return Record::RecordType_Name(type);
1437}
1438
1439TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1440    : trace(filename)
1441{
1442    // Create a protobuf message for the header and read it from the stream
1443    ProtoMessage::PacketHeader header_msg;
1444    if (!trace.read(header_msg)) {
1445        panic("Failed to read packet header from %s\n", filename);
1446
1447        if (header_msg.tick_freq() != SimClock::Frequency) {
1448            panic("Trace %s was recorded with a different tick frequency %d\n",
1449                  header_msg.tick_freq());
1450        }
1451    }
1452}
1453
1454void
1455TraceCPU::FixedRetryGen::InputStream::reset()
1456{
1457    trace.reset();
1458}
1459
1460bool
1461TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1462{
1463    ProtoMessage::Packet pkt_msg;
1464    if (trace.read(pkt_msg)) {
1465        element->cmd = pkt_msg.cmd();
1466        element->addr = pkt_msg.addr();
1467        element->blocksize = pkt_msg.size();
1468        element->tick = pkt_msg.tick();
1469        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1470        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1471        return true;
1472    }
1473
1474    // We have reached the end of the file
1475    return false;
1476}
1477