trace_cpu.cc revision 12749
111986Sandreas.sandberg@arm.com/*
212391Sjason@lowepower.com * Copyright (c) 2013 - 2016 ARM Limited
311986Sandreas.sandberg@arm.com * All rights reserved
411986Sandreas.sandberg@arm.com *
511986Sandreas.sandberg@arm.com * The license below extends only to copyright in the software and shall
612391Sjason@lowepower.com * not be construed as granting a license to any other intellectual
712391Sjason@lowepower.com * property including but not limited to intellectual property relating
814299Sbbruce@ucdavis.edu * to a hardware implementation of the functionality of the software
914299Sbbruce@ucdavis.edu * licensed hereunder.  You may use the software subject to the license
1014299Sbbruce@ucdavis.edu * terms below provided that you ensure that this notice is replicated
1114299Sbbruce@ucdavis.edu * unmodified and in its entirety in all distributions of the software,
1214299Sbbruce@ucdavis.edu * modified or unmodified, in source code or in binary form.
1312391Sjason@lowepower.com *
1412391Sjason@lowepower.com * Redistribution and use in source and binary forms, with or without
1512391Sjason@lowepower.com * modification, are permitted provided that the following conditions are
1612391Sjason@lowepower.com * met: redistributions of source code must retain the above copyright
1712391Sjason@lowepower.com * notice, this list of conditions and the following disclaimer;
1812391Sjason@lowepower.com * redistributions in binary form must reproduce the above copyright
1911986Sandreas.sandberg@arm.com * notice, this list of conditions and the following disclaimer in the
2011986Sandreas.sandberg@arm.com * documentation and/or other materials provided with the distribution;
2111986Sandreas.sandberg@arm.com * neither the name of the copyright holders nor the names of its
2212391Sjason@lowepower.com * contributors may be used to endorse or promote products derived from
2311986Sandreas.sandberg@arm.com * this software without specific prior written permission.
2412391Sjason@lowepower.com *
2512391Sjason@lowepower.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2612391Sjason@lowepower.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2711986Sandreas.sandberg@arm.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2812391Sjason@lowepower.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2912391Sjason@lowepower.com * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3012391Sjason@lowepower.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3112391Sjason@lowepower.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3212391Sjason@lowepower.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3312391Sjason@lowepower.com * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3411986Sandreas.sandberg@arm.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3511986Sandreas.sandberg@arm.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3611986Sandreas.sandberg@arm.com *
3712391Sjason@lowepower.com * Authors: Radhika Jagtap
3812037Sandreas.sandberg@arm.com *          Andreas Hansson
3912037Sandreas.sandberg@arm.com *          Thomas Grass
4011986Sandreas.sandberg@arm.com */
4112391Sjason@lowepower.com
4212391Sjason@lowepower.com#include "cpu/trace/trace_cpu.hh"
4311986Sandreas.sandberg@arm.com
4412391Sjason@lowepower.com#include "sim/sim_exit.hh"
4512391Sjason@lowepower.com
4611986Sandreas.sandberg@arm.com// Declare and initialize the static counter for number of trace CPUs.
4711986Sandreas.sandberg@arm.comint TraceCPU::numTraceCPUs = 0;
4811986Sandreas.sandberg@arm.com
4911986Sandreas.sandberg@arm.comTraceCPU::TraceCPU(TraceCPUParams *params)
5012391Sjason@lowepower.com    :   BaseCPU(params),
5111986Sandreas.sandberg@arm.com        icachePort(this),
5211986Sandreas.sandberg@arm.com        dcachePort(this),
5311986Sandreas.sandberg@arm.com        instMasterID(params->system->getMasterId(this, "inst")),
5412391Sjason@lowepower.com        dataMasterID(params->system->getMasterId(this, "data")),
5512037Sandreas.sandberg@arm.com        instTraceFile(params->instTraceFile),
5612037Sandreas.sandberg@arm.com        dataTraceFile(params->dataTraceFile),
5712037Sandreas.sandberg@arm.com        icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
5812391Sjason@lowepower.com        dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
5912391Sjason@lowepower.com                  params),
6012391Sjason@lowepower.com        icacheNextEvent([this]{ schedIcacheNext(); }, name()),
6112391Sjason@lowepower.com        dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
6212037Sandreas.sandberg@arm.com        oneTraceComplete(false),
6312037Sandreas.sandberg@arm.com        traceOffset(0),
6412037Sandreas.sandberg@arm.com        execCompleteEvent(nullptr),
6512037Sandreas.sandberg@arm.com        enableEarlyExit(params->enableEarlyExit),
6612037Sandreas.sandberg@arm.com        progressMsgInterval(params->progressMsgInterval),
6712037Sandreas.sandberg@arm.com        progressMsgThreshold(params->progressMsgInterval)
6812037Sandreas.sandberg@arm.com{
6912037Sandreas.sandberg@arm.com    // Increment static counter for number of Trace CPUs.
7012037Sandreas.sandberg@arm.com    ++TraceCPU::numTraceCPUs;
7112037Sandreas.sandberg@arm.com
7212037Sandreas.sandberg@arm.com    // Check that the python parameters for sizes of ROB, store buffer and
7312037Sandreas.sandberg@arm.com    // load buffer do not overflow the corresponding C++ variables.
7412037Sandreas.sandberg@arm.com    fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
7512037Sandreas.sandberg@arm.com                "max. value of %d.\n", params->sizeROB, UINT16_MAX);
7612037Sandreas.sandberg@arm.com    fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
7712037Sandreas.sandberg@arm.com                "exceeds the max. value of %d.\n", params->sizeROB,
7812037Sandreas.sandberg@arm.com                UINT16_MAX);
7912037Sandreas.sandberg@arm.com    fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
8012037Sandreas.sandberg@arm.com                " %d exceeds the max. value of %d.\n",
8112037Sandreas.sandberg@arm.com                params->sizeLoadBuffer, UINT16_MAX);
8212037Sandreas.sandberg@arm.com}
8312037Sandreas.sandberg@arm.com
8412037Sandreas.sandberg@arm.comTraceCPU::~TraceCPU()
8512037Sandreas.sandberg@arm.com{
8612037Sandreas.sandberg@arm.com
8712037Sandreas.sandberg@arm.com}
8812037Sandreas.sandberg@arm.com
8912037Sandreas.sandberg@arm.comTraceCPU*
9012037Sandreas.sandberg@arm.comTraceCPUParams::create()
9112037Sandreas.sandberg@arm.com{
9212037Sandreas.sandberg@arm.com    return new TraceCPU(this);
9312037Sandreas.sandberg@arm.com}
9412037Sandreas.sandberg@arm.com
9512037Sandreas.sandberg@arm.comvoid
9614299Sbbruce@ucdavis.eduTraceCPU::updateNumOps(uint64_t rob_num)
9712037Sandreas.sandberg@arm.com{
9812037Sandreas.sandberg@arm.com    numOps = rob_num;
9912037Sandreas.sandberg@arm.com    if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
10012037Sandreas.sandberg@arm.com        inform("%s: %i insts committed\n", name(), progressMsgThreshold);
10112037Sandreas.sandberg@arm.com        progressMsgThreshold += progressMsgInterval;
10212037Sandreas.sandberg@arm.com    }
10312037Sandreas.sandberg@arm.com}
10414299Sbbruce@ucdavis.edu
10512037Sandreas.sandberg@arm.comvoid
10612037Sandreas.sandberg@arm.comTraceCPU::takeOverFrom(BaseCPU *oldCPU)
10712037Sandreas.sandberg@arm.com{
10814299Sbbruce@ucdavis.edu    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
10914299Sbbruce@ucdavis.edu    assert(!getInstPort().isConnected());
11014299Sbbruce@ucdavis.edu    assert(oldCPU->getInstPort().isConnected());
11114299Sbbruce@ucdavis.edu    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
11214299Sbbruce@ucdavis.edu    oldCPU->getInstPort().unbind();
11314299Sbbruce@ucdavis.edu    getInstPort().bind(inst_peer_port);
11414299Sbbruce@ucdavis.edu
11514299Sbbruce@ucdavis.edu    assert(!getDataPort().isConnected());
11614299Sbbruce@ucdavis.edu    assert(oldCPU->getDataPort().isConnected());
11714299Sbbruce@ucdavis.edu    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
11814299Sbbruce@ucdavis.edu    oldCPU->getDataPort().unbind();
11914299Sbbruce@ucdavis.edu    getDataPort().bind(data_peer_port);
12014299Sbbruce@ucdavis.edu}
12114299Sbbruce@ucdavis.edu
12214299Sbbruce@ucdavis.eduvoid
12314299Sbbruce@ucdavis.eduTraceCPU::init()
12414299Sbbruce@ucdavis.edu{
12514299Sbbruce@ucdavis.edu    DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
12614299Sbbruce@ucdavis.edu            "\n", instTraceFile);
12714299Sbbruce@ucdavis.edu    DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
12814299Sbbruce@ucdavis.edu            dataTraceFile);
12914299Sbbruce@ucdavis.edu
13014299Sbbruce@ucdavis.edu    BaseCPU::init();
13114299Sbbruce@ucdavis.edu
13214299Sbbruce@ucdavis.edu    // Get the send tick of the first instruction read request
13314299Sbbruce@ucdavis.edu    Tick first_icache_tick = icacheGen.init();
13414299Sbbruce@ucdavis.edu
13514299Sbbruce@ucdavis.edu    // Get the send tick of the first data read/write request
13614299Sbbruce@ucdavis.edu    Tick first_dcache_tick = dcacheGen.init();
13714299Sbbruce@ucdavis.edu
13814299Sbbruce@ucdavis.edu    // Set the trace offset as the minimum of that in both traces
13914299Sbbruce@ucdavis.edu    traceOffset = std::min(first_icache_tick, first_dcache_tick);
14014299Sbbruce@ucdavis.edu    inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
14114299Sbbruce@ucdavis.edu            name(), traceOffset);
14214299Sbbruce@ucdavis.edu
14314299Sbbruce@ucdavis.edu    // Schedule next icache and dcache event by subtracting the offset
14414299Sbbruce@ucdavis.edu    schedule(icacheNextEvent, first_icache_tick - traceOffset);
14514299Sbbruce@ucdavis.edu    schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
14614299Sbbruce@ucdavis.edu
14714299Sbbruce@ucdavis.edu    // Adjust the trace offset for the dcache generator's ready nodes
148    // We don't need to do this for the icache generator as it will
149    // send its first request at the first event and schedule subsequent
150    // events using a relative tick delta
151    dcacheGen.adjustInitTraceOffset(traceOffset);
152
153    // If the Trace CPU simulation is configured to exit on any one trace
154    // completion then we don't need a counted event to count down all Trace
155    // CPUs in the system. If not then instantiate a counted event.
156    if (!enableEarlyExit) {
157        // The static counter for number of Trace CPUs is correctly set at
158        // this point so create an event and pass it.
159        execCompleteEvent = new CountedExitEvent("end of all traces reached.",
160                                                 numTraceCPUs);
161    }
162
163}
164
165void
166TraceCPU::schedIcacheNext()
167{
168    DPRINTF(TraceCPUInst, "IcacheGen event.\n");
169
170    // Try to send the current packet or a retry packet if there is one
171    bool sched_next = icacheGen.tryNext();
172    // If packet sent successfully, schedule next event
173    if (sched_next) {
174        DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
175                "at %d.\n", curTick() + icacheGen.tickDelta());
176        schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
177        ++numSchedIcacheEvent;
178    } else {
179        // check if traceComplete. If not, do nothing because sending failed
180        // and next event will be scheduled via RecvRetry()
181        if (icacheGen.isTraceComplete()) {
182            // If this is the first trace to complete, set the variable. If it
183            // is already set then both traces are complete to exit sim.
184            checkAndSchedExitEvent();
185        }
186    }
187    return;
188}
189
190void
191TraceCPU::schedDcacheNext()
192{
193    DPRINTF(TraceCPUData, "DcacheGen event.\n");
194
195    // Update stat for numCycles
196    numCycles = clockEdge() / clockPeriod();
197
198    dcacheGen.execute();
199    if (dcacheGen.isExecComplete()) {
200        checkAndSchedExitEvent();
201    }
202}
203
204void
205TraceCPU::checkAndSchedExitEvent()
206{
207    if (!oneTraceComplete) {
208        oneTraceComplete = true;
209    } else {
210        // Schedule event to indicate execution is complete as both
211        // instruction and data access traces have been played back.
212        inform("%s: Execution complete.\n", name());
213        // If the replay is configured to exit early, that is when any one
214        // execution is complete then exit immediately and return. Otherwise,
215        // schedule the counted exit that counts down completion of each Trace
216        // CPU.
217        if (enableEarlyExit) {
218            exitSimLoop("End of trace reached");
219        } else {
220            schedule(*execCompleteEvent, curTick());
221        }
222    }
223}
224
225void
226TraceCPU::regStats()
227{
228
229    BaseCPU::regStats();
230
231    numSchedDcacheEvent
232    .name(name() + ".numSchedDcacheEvent")
233    .desc("Number of events scheduled to trigger data request generator")
234    ;
235
236    numSchedIcacheEvent
237    .name(name() + ".numSchedIcacheEvent")
238    .desc("Number of events scheduled to trigger instruction request generator")
239    ;
240
241    numOps
242    .name(name() + ".numOps")
243    .desc("Number of micro-ops simulated by the Trace CPU")
244    ;
245
246    cpi
247    .name(name() + ".cpi")
248    .desc("Cycles per micro-op used as a proxy for CPI")
249    .precision(6)
250    ;
251    cpi = numCycles/numOps;
252
253    icacheGen.regStats();
254    dcacheGen.regStats();
255}
256
257void
258TraceCPU::ElasticDataGen::regStats()
259{
260    using namespace Stats;
261
262    maxDependents
263    .name(name() + ".maxDependents")
264    .desc("Max number of dependents observed on a node")
265    ;
266
267    maxReadyListSize
268    .name(name() + ".maxReadyListSize")
269    .desc("Max size of the ready list observed")
270    ;
271
272    numSendAttempted
273    .name(name() + ".numSendAttempted")
274    .desc("Number of first attempts to send a request")
275    ;
276
277    numSendSucceeded
278    .name(name() + ".numSendSucceeded")
279    .desc("Number of successful first attempts")
280    ;
281
282    numSendFailed
283    .name(name() + ".numSendFailed")
284    .desc("Number of failed first attempts")
285    ;
286
287    numRetrySucceeded
288    .name(name() + ".numRetrySucceeded")
289    .desc("Number of successful retries")
290    ;
291
292    numSplitReqs
293    .name(name() + ".numSplitReqs")
294    .desc("Number of split requests")
295    ;
296
297    numSOLoads
298    .name(name() + ".numSOLoads")
299    .desc("Number of strictly ordered loads")
300    ;
301
302    numSOStores
303    .name(name() + ".numSOStores")
304    .desc("Number of strictly ordered stores")
305    ;
306
307    dataLastTick
308    .name(name() + ".dataLastTick")
309    .desc("Last tick simulated from the elastic data trace")
310    ;
311}
312
313Tick
314TraceCPU::ElasticDataGen::init()
315{
316    DPRINTF(TraceCPUData, "Initializing data memory request generator "
317            "DcacheGen: elastic issue with retry.\n");
318
319    if (!readNextWindow())
320        panic("Trace has %d elements. It must have at least %d elements.\n",
321              depGraph.size(), 2 * windowSize);
322    DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
323            depGraph.size());
324
325    if (!readNextWindow())
326        panic("Trace has %d elements. It must have at least %d elements.\n",
327              depGraph.size(), 2 * windowSize);
328    DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
329            depGraph.size());
330
331    // Print readyList
332    if (DTRACE(TraceCPUData)) {
333        printReadyList();
334    }
335    auto free_itr = readyList.begin();
336    DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
337            " is %d.\n", free_itr->seqNum, free_itr->execTick);
338    // Return the execute tick of the earliest ready node so that an event
339    // can be scheduled to call execute()
340    return (free_itr->execTick);
341}
342
343void
344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
345    for (auto& free_node : readyList) {
346        free_node.execTick -= offset;
347    }
348}
349
350void
351TraceCPU::ElasticDataGen::exit()
352{
353    trace.reset();
354}
355
356bool
357TraceCPU::ElasticDataGen::readNextWindow()
358{
359
360    // Read and add next window
361    DPRINTF(TraceCPUData, "Reading next window from file.\n");
362
363    if (traceComplete) {
364        // We are at the end of the file, thus we have no more records.
365        // Return false.
366        return false;
367    }
368
369    DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
370            depGraph.size());
371
372    uint32_t num_read = 0;
373    while (num_read != windowSize) {
374
375        // Create a new graph node
376        GraphNode* new_node = new GraphNode;
377
378        // Read the next line to get the next record. If that fails then end of
379        // trace has been reached and traceComplete needs to be set in addition
380        // to returning false.
381        if (!trace.read(new_node)) {
382            DPRINTF(TraceCPUData, "\tTrace complete!\n");
383            traceComplete = true;
384            return false;
385        }
386
387        // Annotate the ROB dependencies of the new node onto the parent nodes.
388        addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
389        // Annotate the register dependencies of the new node onto the parent
390        // nodes.
391        addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
392
393        num_read++;
394        // Add to map
395        depGraph[new_node->seqNum] = new_node;
396        if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
397            // Source dependencies are already complete, check if resources
398            // are available and issue. The execution time is approximated
399            // to current time plus the computational delay.
400            checkAndIssue(new_node);
401        }
402    }
403
404    DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
405            depGraph.size());
406    return true;
407}
408
409template<typename T> void
410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
411                                            T& dep_array, uint8_t& num_dep)
412{
413    for (auto& a_dep : dep_array) {
414        // The convention is to set the dependencies starting with the first
415        // index in the ROB and register dependency arrays. Thus, when we reach
416        // a dependency equal to the initialisation value of zero, we know have
417        // iterated over all dependencies and can break.
418        if (a_dep == 0)
419            break;
420        // We look up the valid dependency, i.e. the parent of this node
421        auto parent_itr = depGraph.find(a_dep);
422        if (parent_itr != depGraph.end()) {
423            // If the parent is found, it is yet to be executed. Append a
424            // pointer to the new node to the dependents list of the parent
425            // node.
426            parent_itr->second->dependents.push_back(new_node);
427            auto num_depts = parent_itr->second->dependents.size();
428            maxDependents = std::max<double>(num_depts, maxDependents.value());
429        } else {
430            // The dependency is not found in the graph. So consider
431            // the execution of the parent is complete, i.e. remove this
432            // dependency.
433            a_dep = 0;
434            num_dep--;
435        }
436    }
437}
438
439void
440TraceCPU::ElasticDataGen::execute()
441{
442    DPRINTF(TraceCPUData, "Execute start occupancy:\n");
443    DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
444            "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
445            depFreeQueue.size());
446    hwResource.printOccupancy();
447
448    // Read next window to make sure that dependents of all dep-free nodes
449    // are in the depGraph
450    if (nextRead) {
451        readNextWindow();
452        nextRead = false;
453    }
454
455    // First attempt to issue the pending dependency-free nodes held
456    // in depFreeQueue. If resources have become available for a node,
457    // then issue it, i.e. add the node to readyList.
458    while (!depFreeQueue.empty()) {
459        if (checkAndIssue(depFreeQueue.front(), false)) {
460            DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
461                "%lli.\n", (depFreeQueue.front())->seqNum);
462            depFreeQueue.pop();
463        } else {
464            break;
465        }
466    }
467    // Proceed to execute from readyList
468    auto graph_itr = depGraph.begin();
469    auto free_itr = readyList.begin();
470    // Iterate through readyList until the next free node has its execute
471    // tick later than curTick or the end of readyList is reached
472    while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
473
474        // Get pointer to the node to be executed
475        graph_itr = depGraph.find(free_itr->seqNum);
476        assert(graph_itr != depGraph.end());
477        GraphNode* node_ptr = graph_itr->second;
478
479        // If there is a retryPkt send that else execute the load
480        if (retryPkt) {
481            // The retryPkt must be the request that was created by the
482            // first node in the readyList.
483            if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
484                panic("Retry packet's seqence number does not match "
485                      "the first node in the readyList.\n");
486            }
487            if (port.sendTimingReq(retryPkt)) {
488                ++numRetrySucceeded;
489                retryPkt = nullptr;
490            }
491        } else if (node_ptr->isLoad() || node_ptr->isStore()) {
492            // If there is no retryPkt, attempt to send a memory request in
493            // case of a load or store node. If the send fails, executeMemReq()
494            // returns a packet pointer, which we save in retryPkt. In case of
495            // a comp node we don't do anything and simply continue as if the
496            // execution of the comp node succedded.
497            retryPkt = executeMemReq(node_ptr);
498        }
499        // If the retryPkt or a new load/store node failed, we exit from here
500        // as a retry from cache will bring the control to execute(). The
501        // first node in readyList then, will be the failed node.
502        if (retryPkt) {
503            break;
504        }
505
506        // Proceed to remove dependencies for the successfully executed node.
507        // If it is a load which is not strictly ordered and we sent a
508        // request for it successfully, we do not yet mark any register
509        // dependencies complete. But as per dependency modelling we need
510        // to mark ROB dependencies of load and non load/store nodes which
511        // are based on successful sending of the load as complete.
512        if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
513            // If execute succeeded mark its dependents as complete
514            DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
515                    "dependents..\n", node_ptr->seqNum);
516
517            auto child_itr = (node_ptr->dependents).begin();
518            while (child_itr != (node_ptr->dependents).end()) {
519                // ROB dependency of a store on a load must not be removed
520                // after load is sent but after response is received
521                if (!(*child_itr)->isStore() &&
522                    (*child_itr)->removeRobDep(node_ptr->seqNum)) {
523
524                    // Check if the child node has become dependency free
525                    if ((*child_itr)->numRobDep == 0 &&
526                        (*child_itr)->numRegDep == 0) {
527
528                        // Source dependencies are complete, check if
529                        // resources are available and issue
530                        checkAndIssue(*child_itr);
531                    }
532                    // Remove this child for the sent load and point to new
533                    // location of the element following the erased element
534                    child_itr = node_ptr->dependents.erase(child_itr);
535                } else {
536                    // This child is not dependency-free, point to the next
537                    // child
538                    child_itr++;
539                }
540            }
541        } else {
542            // If it is a strictly ordered load mark its dependents as complete
543            // as we do not send a request for this case. If it is a store or a
544            // comp node we also mark all its dependents complete.
545            DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
546                    " up dependents..\n", node_ptr->seqNum);
547
548            for (auto child : node_ptr->dependents) {
549                // If the child node is dependency free removeDepOnInst()
550                // returns true.
551                if (child->removeDepOnInst(node_ptr->seqNum)) {
552                    // Source dependencies are complete, check if resources
553                    // are available and issue
554                    checkAndIssue(child);
555                }
556            }
557        }
558
559        // After executing the node, remove from readyList and delete node.
560        readyList.erase(free_itr);
561        // If it is a cacheable load which was sent, don't delete
562        // just yet.  Delete it in completeMemAccess() after the
563        // response is received. If it is an strictly ordered
564        // load, it was not sent and all dependencies were simply
565        // marked complete. Thus it is safe to delete it. For
566        // stores and non load/store nodes all dependencies were
567        // marked complete so it is safe to delete it.
568        if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
569            // Release all resources occupied by the completed node
570            hwResource.release(node_ptr);
571            // clear the dynamically allocated set of dependents
572            (node_ptr->dependents).clear();
573            // Update the stat for numOps simulated
574            owner.updateNumOps(node_ptr->robNum);
575            // delete node
576            delete node_ptr;
577            // remove from graph
578            depGraph.erase(graph_itr);
579        }
580        // Point to first node to continue to next iteration of while loop
581        free_itr = readyList.begin();
582    } // end of while loop
583
584    // Print readyList, sizes of queues and resource status after updating
585    if (DTRACE(TraceCPUData)) {
586        printReadyList();
587        DPRINTF(TraceCPUData, "Execute end occupancy:\n");
588        DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
589                "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
590                depFreeQueue.size());
591        hwResource.printOccupancy();
592    }
593
594    if (retryPkt) {
595        DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
596                "event from the cache for seq. num %lli.\n",
597                retryPkt->req->getReqInstSeqNum());
598        return;
599    }
600    // If the size of the dependency graph is less than the dependency window
601    // then read from the trace file to populate the graph next time we are in
602    // execute.
603    if (depGraph.size() < windowSize && !traceComplete)
604        nextRead = true;
605
606    // If cache is not blocked, schedule an event for the first execTick in
607    // readyList else retry from cache will schedule the event. If the ready
608    // list is empty then check if the next pending node has resources
609    // available to issue. If yes, then schedule an event for the next cycle.
610    if (!readyList.empty()) {
611        Tick next_event_tick = std::max(readyList.begin()->execTick,
612                                        curTick());
613        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
614                next_event_tick);
615        owner.schedDcacheNextEvent(next_event_tick);
616    } else if (readyList.empty() && !depFreeQueue.empty() &&
617                hwResource.isAvailable(depFreeQueue.front())) {
618        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
619                owner.clockEdge(Cycles(1)));
620        owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
621    }
622
623    // If trace is completely read, readyList is empty and depGraph is empty,
624    // set execComplete to true
625    if (depGraph.empty() && readyList.empty() && traceComplete &&
626        !hwResource.awaitingResponse()) {
627        DPRINTF(TraceCPUData, "\tExecution Complete!\n");
628        execComplete = true;
629        dataLastTick = curTick();
630    }
631}
632
633PacketPtr
634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
635{
636
637    DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
638            "virt addr %d, pc %#x, size %d, flags %d).\n",
639            node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
640            node_ptr->pc, node_ptr->size, node_ptr->flags);
641
642    // If the request is strictly ordered, do not send it. Just return nullptr
643    // as if it was succesfully sent.
644    if (node_ptr->isStrictlyOrdered()) {
645        node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
646        DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
647                node_ptr->seqNum);
648        return nullptr;
649    }
650
651    // Check if the request spans two cache lines as this condition triggers
652    // an assert fail in the L1 cache. If it does then truncate the size to
653    // access only until the end of that line and ignore the remainder. The
654    // stat counting this is useful to keep a check on how frequently this
655    // happens. If required the code could be revised to mimick splitting such
656    // a request into two.
657    unsigned blk_size = owner.cacheLineSize();
658    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
659    if (!(blk_offset + node_ptr->size <= blk_size)) {
660        node_ptr->size = blk_size - blk_offset;
661        ++numSplitReqs;
662    }
663
664    // Create a request and the packet containing request
665    auto req = std::make_shared<Request>(
666        node_ptr->physAddr, node_ptr->size,
667        node_ptr->flags, masterID, node_ptr->seqNum,
668        ContextID(0));
669
670    req->setPC(node_ptr->pc);
671    // If virtual address is valid, set the asid and virtual address fields
672    // of the request.
673    if (node_ptr->virtAddr != 0) {
674        req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
675                        node_ptr->flags, masterID, node_ptr->pc);
676        req->setPaddr(node_ptr->physAddr);
677        req->setReqInstSeqNum(node_ptr->seqNum);
678    }
679
680    PacketPtr pkt;
681    uint8_t* pkt_data = new uint8_t[req->getSize()];
682    if (node_ptr->isLoad()) {
683        pkt = Packet::createRead(req);
684    } else {
685        pkt = Packet::createWrite(req);
686        memset(pkt_data, 0xA, req->getSize());
687    }
688    pkt->dataDynamic(pkt_data);
689
690    // Call MasterPort method to send a timing request for this packet
691    bool success = port.sendTimingReq(pkt);
692    ++numSendAttempted;
693
694    if (!success) {
695        // If it fails, return the packet to retry when a retry is signalled by
696        // the cache
697        ++numSendFailed;
698        DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
699        return pkt;
700    } else {
701        // It is succeeds, return nullptr
702        ++numSendSucceeded;
703        return nullptr;
704    }
705}
706
707bool
708TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
709{
710    // Assert the node is dependency-free
711    assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
712
713    // If this is the first attempt, print a debug message to indicate this.
714    if (first) {
715        DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
716            " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
717            node_ptr->robNum);
718    }
719
720    // Check if resources are available to issue the specific node
721    if (hwResource.isAvailable(node_ptr)) {
722        // If resources are free only then add to readyList
723        DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
724            " to readyList, occupying resources.\n", node_ptr->seqNum);
725        // Compute the execute tick by adding the compute delay for the node
726        // and add the ready node to the ready list
727        addToSortedReadyList(node_ptr->seqNum,
728                                owner.clockEdge() + node_ptr->compDelay);
729        // Account for the resources taken up by this issued node.
730        hwResource.occupy(node_ptr);
731        return true;
732
733    } else {
734        if (first) {
735            // Although dependencies are complete, resources are not available.
736            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
737                " Adding to depFreeQueue.\n", node_ptr->seqNum);
738            depFreeQueue.push(node_ptr);
739        } else {
740            DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
741                "Still pending issue.\n", node_ptr->seqNum);
742        }
743        return false;
744    }
745}
746
747void
748TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
749{
750    // Release the resources for this completed node.
751    if (pkt->isWrite()) {
752        // Consider store complete.
753        hwResource.releaseStoreBuffer();
754        // If it is a store response then do nothing since we do not model
755        // dependencies on store completion in the trace. But if we were
756        // blocking execution due to store buffer fullness, we need to schedule
757        // an event and attempt to progress.
758    } else {
759        // If it is a load response then release the dependents waiting on it.
760        // Get pointer to the completed load
761        auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
762        assert(graph_itr != depGraph.end());
763        GraphNode* node_ptr = graph_itr->second;
764
765        // Release resources occupied by the load
766        hwResource.release(node_ptr);
767
768        DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
769                " dependents..\n", node_ptr->seqNum);
770
771        for (auto child : node_ptr->dependents) {
772            if (child->removeDepOnInst(node_ptr->seqNum)) {
773                checkAndIssue(child);
774            }
775        }
776
777        // clear the dynamically allocated set of dependents
778        (node_ptr->dependents).clear();
779        // Update the stat for numOps completed
780        owner.updateNumOps(node_ptr->robNum);
781        // delete node
782        delete node_ptr;
783        // remove from graph
784        depGraph.erase(graph_itr);
785    }
786
787    if (DTRACE(TraceCPUData)) {
788        printReadyList();
789    }
790
791    // If the size of the dependency graph is less than the dependency window
792    // then read from the trace file to populate the graph next time we are in
793    // execute.
794    if (depGraph.size() < windowSize && !traceComplete)
795        nextRead = true;
796
797    // If not waiting for retry, attempt to schedule next event
798    if (!retryPkt) {
799        // We might have new dep-free nodes in the list which will have execute
800        // tick greater than or equal to curTick. But a new dep-free node might
801        // have its execute tick earlier. Therefore, attempt to reschedule. It
802        // could happen that the readyList is empty and we got here via a
803        // last remaining response. So, either the trace is complete or there
804        // are pending nodes in the depFreeQueue. The checking is done in the
805        // execute() control flow, so schedule an event to go via that flow.
806        Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
807            std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
808        DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
809                next_event_tick);
810        owner.schedDcacheNextEvent(next_event_tick);
811    }
812}
813
814void
815TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
816                                                    Tick exec_tick)
817{
818    ReadyNode ready_node;
819    ready_node.seqNum = seq_num;
820    ready_node.execTick = exec_tick;
821
822    // Iterator to readyList
823    auto itr = readyList.begin();
824
825    // If the readyList is empty, simply insert the new node at the beginning
826    // and return
827    if (itr == readyList.end()) {
828        readyList.insert(itr, ready_node);
829        maxReadyListSize = std::max<double>(readyList.size(),
830                                              maxReadyListSize.value());
831        return;
832    }
833
834    // If the new node has its execution tick equal to the first node in the
835    // list then go to the next node. If the first node in the list failed
836    // to execute, its position as the first is thus maintained.
837    if (retryPkt)
838        if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
839            itr++;
840
841    // Increment the iterator and compare the node pointed to by it to the new
842    // node till the position to insert the new node is found.
843    bool found = false;
844    while (!found && itr != readyList.end()) {
845        // If the execution tick of the new node is less than the node then
846        // this is the position to insert
847        if (exec_tick < itr->execTick)
848            found = true;
849        // If the execution tick of the new node is equal to the node then
850        // sort in ascending order of sequence numbers
851        else if (exec_tick == itr->execTick) {
852            // If the sequence number of the new node is less than the node
853            // then this is the position to insert
854            if (seq_num < itr->seqNum)
855                found = true;
856            // Else go to next node
857            else
858                itr++;
859        }
860        // If the execution tick of the new node is greater than the node then
861        // go to the next node
862        else
863            itr++;
864    }
865    readyList.insert(itr, ready_node);
866    // Update the stat for max size reached of the readyList
867    maxReadyListSize = std::max<double>(readyList.size(),
868                                          maxReadyListSize.value());
869}
870
871void
872TraceCPU::ElasticDataGen::printReadyList() {
873
874    auto itr = readyList.begin();
875    if (itr == readyList.end()) {
876        DPRINTF(TraceCPUData, "readyList is empty.\n");
877        return;
878    }
879    DPRINTF(TraceCPUData, "Printing readyList:\n");
880    while (itr != readyList.end()) {
881        auto graph_itr = depGraph.find(itr->seqNum);
882        GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
883        DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
884            node_ptr->typeToStr(), itr->execTick);
885        itr++;
886    }
887}
888
889TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
890    uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
891  : sizeROB(max_rob),
892    sizeStoreBuffer(max_stores),
893    sizeLoadBuffer(max_loads),
894    oldestInFlightRobNum(UINT64_MAX),
895    numInFlightLoads(0),
896    numInFlightStores(0)
897{}
898
899void
900TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
901{
902    // Occupy ROB entry for the issued node
903    // Merely maintain the oldest node, i.e. numerically least robNum by saving
904    // it in the variable oldestInFLightRobNum.
905    inFlightNodes[new_node->seqNum] = new_node->robNum;
906    oldestInFlightRobNum = inFlightNodes.begin()->second;
907
908    // Occupy Load/Store Buffer entry for the issued node if applicable
909    if (new_node->isLoad()) {
910        ++numInFlightLoads;
911    } else if (new_node->isStore()) {
912        ++numInFlightStores;
913    } // else if it is a non load/store node, no buffer entry is occupied
914
915    printOccupancy();
916}
917
918void
919TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
920{
921    assert(!inFlightNodes.empty());
922    DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
923        done_node->seqNum);
924
925    assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
926    inFlightNodes.erase(done_node->seqNum);
927
928    if (inFlightNodes.empty()) {
929        // If we delete the only in-flight node and then the
930        // oldestInFlightRobNum is set to it's initialized (max) value.
931        oldestInFlightRobNum = UINT64_MAX;
932    } else {
933        // Set the oldest in-flight node rob number equal to the first node in
934        // the inFlightNodes since that will have the numerically least value.
935        oldestInFlightRobNum = inFlightNodes.begin()->second;
936    }
937
938    DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
939        "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
940        oldestInFlightRobNum);
941
942    // A store is considered complete when a request is sent, thus ROB entry is
943    // freed. But it occupies an entry in the Store Buffer until its response
944    // is received. A load is considered complete when a response is received,
945    // thus both ROB and Load Buffer entries can be released.
946    if (done_node->isLoad()) {
947        assert(numInFlightLoads != 0);
948        --numInFlightLoads;
949    }
950    // For normal writes, we send the requests out and clear a store buffer
951    // entry on response. For writes which are strictly ordered, for e.g.
952    // writes to device registers, we do that within release() which is called
953    // when node is executed and taken off from readyList.
954    if (done_node->isStore() && done_node->isStrictlyOrdered()) {
955        releaseStoreBuffer();
956    }
957}
958
959void
960TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
961{
962    assert(numInFlightStores != 0);
963    --numInFlightStores;
964}
965
966bool
967TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
968    const GraphNode* new_node) const
969{
970    uint16_t num_in_flight_nodes;
971    if (inFlightNodes.empty()) {
972        num_in_flight_nodes = 0;
973        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
974            " #in-flight nodes = 0", new_node->seqNum);
975    } else if (new_node->robNum > oldestInFlightRobNum) {
976        // This is the intuitive case where new dep-free node is younger
977        // instruction than the oldest instruction in-flight. Thus we make sure
978        // in_flight_nodes does not overflow.
979        num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
980        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
981            " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
982             new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
983    } else {
984        // This is the case where an instruction older than the oldest in-
985        // flight instruction becomes dep-free. Thus we must have already
986        // accounted for the entry in ROB for this new dep-free node.
987        // Immediately after this check returns true, oldestInFlightRobNum will
988        // be updated in occupy(). We simply let this node issue now.
989        num_in_flight_nodes = 0;
990        DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
991            " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
992            new_node->seqNum, new_node->robNum);
993    }
994    DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
995        numInFlightLoads, sizeLoadBuffer,
996        numInFlightStores, sizeStoreBuffer);
997    // Check if resources are available to issue the specific node
998    if (num_in_flight_nodes >= sizeROB) {
999        return false;
1000    }
1001    if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
1002        return false;
1003    }
1004    if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
1005        return false;
1006    }
1007    return true;
1008}
1009
1010bool
1011TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1012    // Return true if there is at least one read or write request in flight
1013    return (numInFlightStores != 0 || numInFlightLoads != 0);
1014}
1015
1016void
1017TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1018    DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1019            "LQ = %d/%d, SQ  = %d/%d.\n",
1020            oldestInFlightRobNum,
1021            numInFlightLoads, sizeLoadBuffer,
1022            numInFlightStores, sizeStoreBuffer);
1023}
1024
1025void
1026TraceCPU::FixedRetryGen::regStats()
1027{
1028    using namespace Stats;
1029
1030    numSendAttempted
1031    .name(name() + ".numSendAttempted")
1032    .desc("Number of first attempts to send a request")
1033    ;
1034
1035    numSendSucceeded
1036    .name(name() + ".numSendSucceeded")
1037    .desc("Number of successful first attempts")
1038    ;
1039
1040    numSendFailed
1041    .name(name() + ".numSendFailed")
1042    .desc("Number of failed first attempts")
1043    ;
1044
1045    numRetrySucceeded
1046    .name(name() + ".numRetrySucceeded")
1047    .desc("Number of successful retries")
1048    ;
1049
1050    instLastTick
1051    .name(name() + ".instLastTick")
1052    .desc("Last tick simulated from the fixed inst trace")
1053    ;
1054}
1055
1056Tick
1057TraceCPU::FixedRetryGen::init()
1058{
1059    DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1060            " IcacheGen: fixed issue with retry.\n");
1061
1062    if (nextExecute()) {
1063        DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1064        return currElement.tick;
1065    } else {
1066        panic("Read of first message in the trace failed.\n");
1067        return MaxTick;
1068    }
1069}
1070
1071bool
1072TraceCPU::FixedRetryGen::tryNext()
1073{
1074    // If there is a retry packet, try to send it
1075    if (retryPkt) {
1076
1077        DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1078
1079        if (!port.sendTimingReq(retryPkt)) {
1080            // Still blocked! This should never occur.
1081            DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1082            return false;
1083        }
1084        ++numRetrySucceeded;
1085    } else {
1086
1087        DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1088
1089        // try sending current element
1090        assert(currElement.isValid());
1091
1092        ++numSendAttempted;
1093
1094        if (!send(currElement.addr, currElement.blocksize,
1095                    currElement.cmd, currElement.flags, currElement.pc)) {
1096            DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1097            ++numSendFailed;
1098            // return false to indicate not to schedule next event
1099            return false;
1100        } else {
1101            ++numSendSucceeded;
1102        }
1103    }
1104    // If packet was sent successfully, either retryPkt or currElement, return
1105    // true to indicate to schedule event at current Tick plus delta. If packet
1106    // was sent successfully and there is no next packet to send, return false.
1107    DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1108        "element.\n");
1109    retryPkt = nullptr;
1110    // Read next element into currElement, currElement gets cleared so save the
1111    // tick to calculate delta
1112    Tick last_tick = currElement.tick;
1113    if (nextExecute()) {
1114        assert(currElement.tick >= last_tick);
1115        delta = currElement.tick - last_tick;
1116    }
1117    return !traceComplete;
1118}
1119
1120void
1121TraceCPU::FixedRetryGen::exit()
1122{
1123    trace.reset();
1124}
1125
1126bool
1127TraceCPU::FixedRetryGen::nextExecute()
1128{
1129    if (traceComplete)
1130        // We are at the end of the file, thus we have no more messages.
1131        // Return false.
1132        return false;
1133
1134
1135    //Reset the currElement to the default values
1136    currElement.clear();
1137
1138    // Read the next line to get the next message. If that fails then end of
1139    // trace has been reached and traceComplete needs to be set in addition
1140    // to returning false. If successful then next message is in currElement.
1141    if (!trace.read(&currElement)) {
1142        traceComplete = true;
1143        instLastTick = curTick();
1144        return false;
1145    }
1146
1147    DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1148            currElement.cmd.isRead() ? 'r' : 'w',
1149            currElement.addr,
1150            currElement.pc,
1151            currElement.blocksize,
1152            currElement.tick);
1153
1154    return true;
1155}
1156
1157bool
1158TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1159              Request::FlagsType flags, Addr pc)
1160{
1161
1162    // Create new request
1163    auto req = std::make_shared<Request>(addr, size, flags, masterID);
1164    req->setPC(pc);
1165
1166    // If this is not done it triggers assert in L1 cache for invalid contextId
1167    req->setContext(ContextID(0));
1168
1169    // Embed it in a packet
1170    PacketPtr pkt = new Packet(req, cmd);
1171
1172    uint8_t* pkt_data = new uint8_t[req->getSize()];
1173    pkt->dataDynamic(pkt_data);
1174
1175    if (cmd.isWrite()) {
1176        memset(pkt_data, 0xA, req->getSize());
1177    }
1178
1179    // Call MasterPort method to send a timing request for this packet
1180    bool success = port.sendTimingReq(pkt);
1181    if (!success) {
1182        // If it fails, save the packet to retry when a retry is signalled by
1183        // the cache
1184        retryPkt = pkt;
1185    }
1186    return success;
1187}
1188
1189void
1190TraceCPU::icacheRetryRecvd()
1191{
1192    // Schedule an event to go through the control flow in the same tick as
1193    // retry is received
1194    DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1195            " event @%lli.\n", curTick());
1196    schedule(icacheNextEvent, curTick());
1197}
1198
1199void
1200TraceCPU::dcacheRetryRecvd()
1201{
1202    // Schedule an event to go through the execute flow in the same tick as
1203    // retry is received
1204    DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1205            " event @%lli.\n", curTick());
1206    schedule(dcacheNextEvent, curTick());
1207}
1208
1209void
1210TraceCPU::schedDcacheNextEvent(Tick when)
1211{
1212    if (!dcacheNextEvent.scheduled()) {
1213        DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1214                when);
1215        schedule(dcacheNextEvent, when);
1216        ++numSchedDcacheEvent;
1217    } else if (when < dcacheNextEvent.when()) {
1218        DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1219                " to %lli.\n", dcacheNextEvent.when(), when);
1220        reschedule(dcacheNextEvent, when);
1221    }
1222
1223}
1224
1225bool
1226TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1227{
1228    // All responses on the instruction fetch side are ignored. Simply delete
1229    // the packet to free allocated memory
1230    delete pkt;
1231
1232    return true;
1233}
1234
1235void
1236TraceCPU::IcachePort::recvReqRetry()
1237{
1238    owner->icacheRetryRecvd();
1239}
1240
1241void
1242TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1243{
1244    DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1245    dcacheGen.completeMemAccess(pkt);
1246}
1247
1248bool
1249TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1250{
1251    // Handle the responses for data memory requests which is done inside the
1252    // elastic data generator
1253    owner->dcacheRecvTimingResp(pkt);
1254    // After processing the response delete the packet to free
1255    // memory
1256    delete pkt;
1257
1258    return true;
1259}
1260
1261void
1262TraceCPU::DcachePort::recvReqRetry()
1263{
1264    owner->dcacheRetryRecvd();
1265}
1266
1267TraceCPU::ElasticDataGen::InputStream::InputStream(
1268    const std::string& filename,
1269    const double time_multiplier)
1270    : trace(filename),
1271      timeMultiplier(time_multiplier),
1272      microOpCount(0)
1273{
1274    // Create a protobuf message for the header and read it from the stream
1275    ProtoMessage::InstDepRecordHeader header_msg;
1276    if (!trace.read(header_msg)) {
1277        panic("Failed to read packet header from %s\n", filename);
1278
1279        if (header_msg.tick_freq() != SimClock::Frequency) {
1280            panic("Trace %s was recorded with a different tick frequency %d\n",
1281                  header_msg.tick_freq());
1282        }
1283    } else {
1284        // Assign window size equal to the field in the trace that was recorded
1285        // when the data dependency trace was captured in the o3cpu model
1286        windowSize = header_msg.window_size();
1287    }
1288}
1289
1290void
1291TraceCPU::ElasticDataGen::InputStream::reset()
1292{
1293    trace.reset();
1294}
1295
1296bool
1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1298{
1299    ProtoMessage::InstDepRecord pkt_msg;
1300    if (trace.read(pkt_msg)) {
1301        // Required fields
1302        element->seqNum = pkt_msg.seq_num();
1303        element->type = pkt_msg.type();
1304        // Scale the compute delay to effectively scale the Trace CPU frequency
1305        element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1306
1307        // Repeated field robDepList
1308        element->clearRobDep();
1309        assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1310        for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1311            element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1312            element->numRobDep += 1;
1313        }
1314
1315        // Repeated field
1316        element->clearRegDep();
1317        assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1318        for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1319            // There is a possibility that an instruction has both, a register
1320            // and order dependency on an instruction. In such a case, the
1321            // register dependency is omitted
1322            bool duplicate = false;
1323            for (int j = 0; j < element->numRobDep; j++) {
1324                duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1325            }
1326            if (!duplicate) {
1327                element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1328                element->numRegDep += 1;
1329            }
1330        }
1331
1332        // Optional fields
1333        if (pkt_msg.has_p_addr())
1334            element->physAddr = pkt_msg.p_addr();
1335        else
1336            element->physAddr = 0;
1337
1338        if (pkt_msg.has_v_addr())
1339            element->virtAddr = pkt_msg.v_addr();
1340        else
1341            element->virtAddr = 0;
1342
1343        if (pkt_msg.has_asid())
1344            element->asid = pkt_msg.asid();
1345        else
1346            element->asid = 0;
1347
1348        if (pkt_msg.has_size())
1349            element->size = pkt_msg.size();
1350        else
1351            element->size = 0;
1352
1353        if (pkt_msg.has_flags())
1354            element->flags = pkt_msg.flags();
1355        else
1356            element->flags = 0;
1357
1358        if (pkt_msg.has_pc())
1359            element->pc = pkt_msg.pc();
1360        else
1361            element->pc = 0;
1362
1363        // ROB occupancy number
1364        ++microOpCount;
1365        if (pkt_msg.has_weight()) {
1366            microOpCount += pkt_msg.weight();
1367        }
1368        element->robNum = microOpCount;
1369        return true;
1370    }
1371
1372    // We have reached the end of the file
1373    return false;
1374}
1375
1376bool
1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1378{
1379    for (auto& own_reg_dep : regDep) {
1380        if (own_reg_dep == reg_dep) {
1381            // If register dependency is found, make it zero and return true
1382            own_reg_dep = 0;
1383            assert(numRegDep > 0);
1384            --numRegDep;
1385            DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1386                    "done.\n", seqNum, reg_dep);
1387            return true;
1388        }
1389    }
1390
1391    // Return false if the dependency is not found
1392    return false;
1393}
1394
1395bool
1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1397{
1398    for (auto& own_rob_dep : robDep) {
1399        if (own_rob_dep == rob_dep) {
1400            // If the rob dependency is found, make it zero and return true
1401            own_rob_dep = 0;
1402            assert(numRobDep > 0);
1403            --numRobDep;
1404            DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1405                "done.\n", seqNum, rob_dep);
1406            return true;
1407        }
1408    }
1409    return false;
1410}
1411
1412void
1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1414    for (auto& own_reg_dep : regDep) {
1415        own_reg_dep = 0;
1416    }
1417    numRegDep = 0;
1418}
1419
1420void
1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1422    for (auto& own_rob_dep : robDep) {
1423        own_rob_dep = 0;
1424    }
1425    numRobDep = 0;
1426}
1427
1428bool
1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1430{
1431    // If it is an rob dependency then remove it
1432    if (!removeRobDep(done_seq_num)) {
1433        // If it is not an rob dependency then it must be a register dependency
1434        // If the register dependency is not found, it violates an assumption
1435        // and must be caught by assert.
1436        bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1437        assert(regdep_found);
1438    }
1439    // Return true if the node is dependency free
1440    return (numRobDep == 0 && numRegDep == 0);
1441}
1442
1443void
1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1445{
1446    DPRINTFR(TraceCPUData, "%lli", seqNum);
1447    DPRINTFR(TraceCPUData, ",%s", typeToStr());
1448    if (isLoad() || isStore()) {
1449        DPRINTFR(TraceCPUData, ",%i", physAddr);
1450        DPRINTFR(TraceCPUData, ",%i", size);
1451        DPRINTFR(TraceCPUData, ",%i", flags);
1452    }
1453    DPRINTFR(TraceCPUData, ",%lli", compDelay);
1454    int i = 0;
1455    DPRINTFR(TraceCPUData, "robDep:");
1456    while (robDep[i] != 0) {
1457        DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1458        i++;
1459    }
1460    i = 0;
1461    DPRINTFR(TraceCPUData, "regDep:");
1462    while (regDep[i] != 0) {
1463        DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1464        i++;
1465    }
1466    auto child_itr = dependents.begin();
1467    DPRINTFR(TraceCPUData, "dependents:");
1468    while (child_itr != dependents.end()) {
1469        DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1470        child_itr++;
1471    }
1472
1473    DPRINTFR(TraceCPUData, "\n");
1474}
1475
1476std::string
1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1478{
1479    return Record::RecordType_Name(type);
1480}
1481
1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1483    : trace(filename)
1484{
1485    // Create a protobuf message for the header and read it from the stream
1486    ProtoMessage::PacketHeader header_msg;
1487    if (!trace.read(header_msg)) {
1488        panic("Failed to read packet header from %s\n", filename);
1489
1490        if (header_msg.tick_freq() != SimClock::Frequency) {
1491            panic("Trace %s was recorded with a different tick frequency %d\n",
1492                  header_msg.tick_freq());
1493        }
1494    }
1495}
1496
1497void
1498TraceCPU::FixedRetryGen::InputStream::reset()
1499{
1500    trace.reset();
1501}
1502
1503bool
1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1505{
1506    ProtoMessage::Packet pkt_msg;
1507    if (trace.read(pkt_msg)) {
1508        element->cmd = pkt_msg.cmd();
1509        element->addr = pkt_msg.addr();
1510        element->blocksize = pkt_msg.size();
1511        element->tick = pkt_msg.tick();
1512        element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1513        element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1514        return true;
1515    }
1516
1517    // We have reached the end of the file
1518    return false;
1519}
1520