trace_cpu.cc (12085:de78ea63e0ca) trace_cpu.cc (12680:91f4d6668b4f)
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
53 instMasterID(params->system->getMasterId(name() + ".inst")),
54 dataMasterID(params->system->getMasterId(name() + ".data")),
53 instMasterID(params->system->getMasterId(this, "inst")),
54 dataMasterID(params->system->getMasterId(this, "data")),
55 instTraceFile(params->instTraceFile),
56 dataTraceFile(params->dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59 params),
60 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62 oneTraceComplete(false),
63 traceOffset(0),
64 execCompleteEvent(nullptr),
65 enableEarlyExit(params->enableEarlyExit),
66 progressMsgInterval(params->progressMsgInterval),
67 progressMsgThreshold(params->progressMsgInterval)
68{
69 // Increment static counter for number of Trace CPUs.
70 ++TraceCPU::numTraceCPUs;
71
72 // Check that the python parameters for sizes of ROB, store buffer and
73 // load buffer do not overflow the corresponding C++ variables.
74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77 "exceeds the max. value of %d.\n", params->sizeROB,
78 UINT16_MAX);
79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80 " %d exceeds the max. value of %d.\n",
81 params->sizeLoadBuffer, UINT16_MAX);
82}
83
84TraceCPU::~TraceCPU()
85{
86
87}
88
89TraceCPU*
90TraceCPUParams::create()
91{
92 return new TraceCPU(this);
93}
94
95void
96TraceCPU::updateNumOps(uint64_t rob_num)
97{
98 numOps = rob_num;
99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
100 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
101 progressMsgThreshold += progressMsgInterval;
102 }
103}
104
105void
106TraceCPU::takeOverFrom(BaseCPU *oldCPU)
107{
108 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109 assert(!getInstPort().isConnected());
110 assert(oldCPU->getInstPort().isConnected());
111 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
112 oldCPU->getInstPort().unbind();
113 getInstPort().bind(inst_peer_port);
114
115 assert(!getDataPort().isConnected());
116 assert(oldCPU->getDataPort().isConnected());
117 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
118 oldCPU->getDataPort().unbind();
119 getDataPort().bind(data_peer_port);
120}
121
122void
123TraceCPU::init()
124{
125 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
126 "\n", instTraceFile);
127 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
128 dataTraceFile);
129
130 BaseCPU::init();
131
132 // Get the send tick of the first instruction read request
133 Tick first_icache_tick = icacheGen.init();
134
135 // Get the send tick of the first data read/write request
136 Tick first_dcache_tick = dcacheGen.init();
137
138 // Set the trace offset as the minimum of that in both traces
139 traceOffset = std::min(first_icache_tick, first_dcache_tick);
140 inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
141 name(), traceOffset);
142
143 // Schedule next icache and dcache event by subtracting the offset
144 schedule(icacheNextEvent, first_icache_tick - traceOffset);
145 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
146
147 // Adjust the trace offset for the dcache generator's ready nodes
148 // We don't need to do this for the icache generator as it will
149 // send its first request at the first event and schedule subsequent
150 // events using a relative tick delta
151 dcacheGen.adjustInitTraceOffset(traceOffset);
152
153 // If the Trace CPU simulation is configured to exit on any one trace
154 // completion then we don't need a counted event to count down all Trace
155 // CPUs in the system. If not then instantiate a counted event.
156 if (!enableEarlyExit) {
157 // The static counter for number of Trace CPUs is correctly set at
158 // this point so create an event and pass it.
159 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
160 numTraceCPUs);
161 }
162
163}
164
165void
166TraceCPU::schedIcacheNext()
167{
168 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
169
170 // Try to send the current packet or a retry packet if there is one
171 bool sched_next = icacheGen.tryNext();
172 // If packet sent successfully, schedule next event
173 if (sched_next) {
174 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
175 "at %d.\n", curTick() + icacheGen.tickDelta());
176 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
177 ++numSchedIcacheEvent;
178 } else {
179 // check if traceComplete. If not, do nothing because sending failed
180 // and next event will be scheduled via RecvRetry()
181 if (icacheGen.isTraceComplete()) {
182 // If this is the first trace to complete, set the variable. If it
183 // is already set then both traces are complete to exit sim.
184 checkAndSchedExitEvent();
185 }
186 }
187 return;
188}
189
190void
191TraceCPU::schedDcacheNext()
192{
193 DPRINTF(TraceCPUData, "DcacheGen event.\n");
194
195 // Update stat for numCycles
196 numCycles = clockEdge() / clockPeriod();
197
198 dcacheGen.execute();
199 if (dcacheGen.isExecComplete()) {
200 checkAndSchedExitEvent();
201 }
202}
203
204void
205TraceCPU::checkAndSchedExitEvent()
206{
207 if (!oneTraceComplete) {
208 oneTraceComplete = true;
209 } else {
210 // Schedule event to indicate execution is complete as both
211 // instruction and data access traces have been played back.
212 inform("%s: Execution complete.\n", name());
213 // If the replay is configured to exit early, that is when any one
214 // execution is complete then exit immediately and return. Otherwise,
215 // schedule the counted exit that counts down completion of each Trace
216 // CPU.
217 if (enableEarlyExit) {
218 exitSimLoop("End of trace reached");
219 } else {
220 schedule(*execCompleteEvent, curTick());
221 }
222 }
223}
224
225void
226TraceCPU::regStats()
227{
228
229 BaseCPU::regStats();
230
231 numSchedDcacheEvent
232 .name(name() + ".numSchedDcacheEvent")
233 .desc("Number of events scheduled to trigger data request generator")
234 ;
235
236 numSchedIcacheEvent
237 .name(name() + ".numSchedIcacheEvent")
238 .desc("Number of events scheduled to trigger instruction request generator")
239 ;
240
241 numOps
242 .name(name() + ".numOps")
243 .desc("Number of micro-ops simulated by the Trace CPU")
244 ;
245
246 cpi
247 .name(name() + ".cpi")
248 .desc("Cycles per micro-op used as a proxy for CPI")
249 .precision(6)
250 ;
251 cpi = numCycles/numOps;
252
253 icacheGen.regStats();
254 dcacheGen.regStats();
255}
256
257void
258TraceCPU::ElasticDataGen::regStats()
259{
260 using namespace Stats;
261
262 maxDependents
263 .name(name() + ".maxDependents")
264 .desc("Max number of dependents observed on a node")
265 ;
266
267 maxReadyListSize
268 .name(name() + ".maxReadyListSize")
269 .desc("Max size of the ready list observed")
270 ;
271
272 numSendAttempted
273 .name(name() + ".numSendAttempted")
274 .desc("Number of first attempts to send a request")
275 ;
276
277 numSendSucceeded
278 .name(name() + ".numSendSucceeded")
279 .desc("Number of successful first attempts")
280 ;
281
282 numSendFailed
283 .name(name() + ".numSendFailed")
284 .desc("Number of failed first attempts")
285 ;
286
287 numRetrySucceeded
288 .name(name() + ".numRetrySucceeded")
289 .desc("Number of successful retries")
290 ;
291
292 numSplitReqs
293 .name(name() + ".numSplitReqs")
294 .desc("Number of split requests")
295 ;
296
297 numSOLoads
298 .name(name() + ".numSOLoads")
299 .desc("Number of strictly ordered loads")
300 ;
301
302 numSOStores
303 .name(name() + ".numSOStores")
304 .desc("Number of strictly ordered stores")
305 ;
306
307 dataLastTick
308 .name(name() + ".dataLastTick")
309 .desc("Last tick simulated from the elastic data trace")
310 ;
311}
312
313Tick
314TraceCPU::ElasticDataGen::init()
315{
316 DPRINTF(TraceCPUData, "Initializing data memory request generator "
317 "DcacheGen: elastic issue with retry.\n");
318
319 if (!readNextWindow())
320 panic("Trace has %d elements. It must have at least %d elements.\n",
321 depGraph.size(), 2 * windowSize);
322 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
323 depGraph.size());
324
325 if (!readNextWindow())
326 panic("Trace has %d elements. It must have at least %d elements.\n",
327 depGraph.size(), 2 * windowSize);
328 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
329 depGraph.size());
330
331 // Print readyList
332 if (DTRACE(TraceCPUData)) {
333 printReadyList();
334 }
335 auto free_itr = readyList.begin();
336 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
337 " is %d.\n", free_itr->seqNum, free_itr->execTick);
338 // Return the execute tick of the earliest ready node so that an event
339 // can be scheduled to call execute()
340 return (free_itr->execTick);
341}
342
343void
344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
345 for (auto& free_node : readyList) {
346 free_node.execTick -= offset;
347 }
348}
349
350void
351TraceCPU::ElasticDataGen::exit()
352{
353 trace.reset();
354}
355
356bool
357TraceCPU::ElasticDataGen::readNextWindow()
358{
359
360 // Read and add next window
361 DPRINTF(TraceCPUData, "Reading next window from file.\n");
362
363 if (traceComplete) {
364 // We are at the end of the file, thus we have no more records.
365 // Return false.
366 return false;
367 }
368
369 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
370 depGraph.size());
371
372 uint32_t num_read = 0;
373 while (num_read != windowSize) {
374
375 // Create a new graph node
376 GraphNode* new_node = new GraphNode;
377
378 // Read the next line to get the next record. If that fails then end of
379 // trace has been reached and traceComplete needs to be set in addition
380 // to returning false.
381 if (!trace.read(new_node)) {
382 DPRINTF(TraceCPUData, "\tTrace complete!\n");
383 traceComplete = true;
384 return false;
385 }
386
387 // Annotate the ROB dependencies of the new node onto the parent nodes.
388 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
389 // Annotate the register dependencies of the new node onto the parent
390 // nodes.
391 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
392
393 num_read++;
394 // Add to map
395 depGraph[new_node->seqNum] = new_node;
396 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
397 // Source dependencies are already complete, check if resources
398 // are available and issue. The execution time is approximated
399 // to current time plus the computational delay.
400 checkAndIssue(new_node);
401 }
402 }
403
404 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
405 depGraph.size());
406 return true;
407}
408
409template<typename T> void
410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
411 T& dep_array, uint8_t& num_dep)
412{
413 for (auto& a_dep : dep_array) {
414 // The convention is to set the dependencies starting with the first
415 // index in the ROB and register dependency arrays. Thus, when we reach
416 // a dependency equal to the initialisation value of zero, we know have
417 // iterated over all dependencies and can break.
418 if (a_dep == 0)
419 break;
420 // We look up the valid dependency, i.e. the parent of this node
421 auto parent_itr = depGraph.find(a_dep);
422 if (parent_itr != depGraph.end()) {
423 // If the parent is found, it is yet to be executed. Append a
424 // pointer to the new node to the dependents list of the parent
425 // node.
426 parent_itr->second->dependents.push_back(new_node);
427 auto num_depts = parent_itr->second->dependents.size();
428 maxDependents = std::max<double>(num_depts, maxDependents.value());
429 } else {
430 // The dependency is not found in the graph. So consider
431 // the execution of the parent is complete, i.e. remove this
432 // dependency.
433 a_dep = 0;
434 num_dep--;
435 }
436 }
437}
438
439void
440TraceCPU::ElasticDataGen::execute()
441{
442 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
443 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
444 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
445 depFreeQueue.size());
446 hwResource.printOccupancy();
447
448 // Read next window to make sure that dependents of all dep-free nodes
449 // are in the depGraph
450 if (nextRead) {
451 readNextWindow();
452 nextRead = false;
453 }
454
455 // First attempt to issue the pending dependency-free nodes held
456 // in depFreeQueue. If resources have become available for a node,
457 // then issue it, i.e. add the node to readyList.
458 while (!depFreeQueue.empty()) {
459 if (checkAndIssue(depFreeQueue.front(), false)) {
460 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
461 "%lli.\n", (depFreeQueue.front())->seqNum);
462 depFreeQueue.pop();
463 } else {
464 break;
465 }
466 }
467 // Proceed to execute from readyList
468 auto graph_itr = depGraph.begin();
469 auto free_itr = readyList.begin();
470 // Iterate through readyList until the next free node has its execute
471 // tick later than curTick or the end of readyList is reached
472 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
473
474 // Get pointer to the node to be executed
475 graph_itr = depGraph.find(free_itr->seqNum);
476 assert(graph_itr != depGraph.end());
477 GraphNode* node_ptr = graph_itr->second;
478
479 // If there is a retryPkt send that else execute the load
480 if (retryPkt) {
481 // The retryPkt must be the request that was created by the
482 // first node in the readyList.
483 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
484 panic("Retry packet's seqence number does not match "
485 "the first node in the readyList.\n");
486 }
487 if (port.sendTimingReq(retryPkt)) {
488 ++numRetrySucceeded;
489 retryPkt = nullptr;
490 }
491 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
492 // If there is no retryPkt, attempt to send a memory request in
493 // case of a load or store node. If the send fails, executeMemReq()
494 // returns a packet pointer, which we save in retryPkt. In case of
495 // a comp node we don't do anything and simply continue as if the
496 // execution of the comp node succedded.
497 retryPkt = executeMemReq(node_ptr);
498 }
499 // If the retryPkt or a new load/store node failed, we exit from here
500 // as a retry from cache will bring the control to execute(). The
501 // first node in readyList then, will be the failed node.
502 if (retryPkt) {
503 break;
504 }
505
506 // Proceed to remove dependencies for the successfully executed node.
507 // If it is a load which is not strictly ordered and we sent a
508 // request for it successfully, we do not yet mark any register
509 // dependencies complete. But as per dependency modelling we need
510 // to mark ROB dependencies of load and non load/store nodes which
511 // are based on successful sending of the load as complete.
512 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
513 // If execute succeeded mark its dependents as complete
514 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
515 "dependents..\n", node_ptr->seqNum);
516
517 auto child_itr = (node_ptr->dependents).begin();
518 while (child_itr != (node_ptr->dependents).end()) {
519 // ROB dependency of a store on a load must not be removed
520 // after load is sent but after response is received
521 if (!(*child_itr)->isStore() &&
522 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
523
524 // Check if the child node has become dependency free
525 if ((*child_itr)->numRobDep == 0 &&
526 (*child_itr)->numRegDep == 0) {
527
528 // Source dependencies are complete, check if
529 // resources are available and issue
530 checkAndIssue(*child_itr);
531 }
532 // Remove this child for the sent load and point to new
533 // location of the element following the erased element
534 child_itr = node_ptr->dependents.erase(child_itr);
535 } else {
536 // This child is not dependency-free, point to the next
537 // child
538 child_itr++;
539 }
540 }
541 } else {
542 // If it is a strictly ordered load mark its dependents as complete
543 // as we do not send a request for this case. If it is a store or a
544 // comp node we also mark all its dependents complete.
545 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
546 " up dependents..\n", node_ptr->seqNum);
547
548 for (auto child : node_ptr->dependents) {
549 // If the child node is dependency free removeDepOnInst()
550 // returns true.
551 if (child->removeDepOnInst(node_ptr->seqNum)) {
552 // Source dependencies are complete, check if resources
553 // are available and issue
554 checkAndIssue(child);
555 }
556 }
557 }
558
559 // After executing the node, remove from readyList and delete node.
560 readyList.erase(free_itr);
561 // If it is a cacheable load which was sent, don't delete
562 // just yet. Delete it in completeMemAccess() after the
563 // response is received. If it is an strictly ordered
564 // load, it was not sent and all dependencies were simply
565 // marked complete. Thus it is safe to delete it. For
566 // stores and non load/store nodes all dependencies were
567 // marked complete so it is safe to delete it.
568 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
569 // Release all resources occupied by the completed node
570 hwResource.release(node_ptr);
571 // clear the dynamically allocated set of dependents
572 (node_ptr->dependents).clear();
573 // Update the stat for numOps simulated
574 owner.updateNumOps(node_ptr->robNum);
575 // delete node
576 delete node_ptr;
577 // remove from graph
578 depGraph.erase(graph_itr);
579 }
580 // Point to first node to continue to next iteration of while loop
581 free_itr = readyList.begin();
582 } // end of while loop
583
584 // Print readyList, sizes of queues and resource status after updating
585 if (DTRACE(TraceCPUData)) {
586 printReadyList();
587 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
588 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
589 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
590 depFreeQueue.size());
591 hwResource.printOccupancy();
592 }
593
594 if (retryPkt) {
595 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
596 "event from the cache for seq. num %lli.\n",
597 retryPkt->req->getReqInstSeqNum());
598 return;
599 }
600 // If the size of the dependency graph is less than the dependency window
601 // then read from the trace file to populate the graph next time we are in
602 // execute.
603 if (depGraph.size() < windowSize && !traceComplete)
604 nextRead = true;
605
606 // If cache is not blocked, schedule an event for the first execTick in
607 // readyList else retry from cache will schedule the event. If the ready
608 // list is empty then check if the next pending node has resources
609 // available to issue. If yes, then schedule an event for the next cycle.
610 if (!readyList.empty()) {
611 Tick next_event_tick = std::max(readyList.begin()->execTick,
612 curTick());
613 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
614 next_event_tick);
615 owner.schedDcacheNextEvent(next_event_tick);
616 } else if (readyList.empty() && !depFreeQueue.empty() &&
617 hwResource.isAvailable(depFreeQueue.front())) {
618 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
619 owner.clockEdge(Cycles(1)));
620 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
621 }
622
623 // If trace is completely read, readyList is empty and depGraph is empty,
624 // set execComplete to true
625 if (depGraph.empty() && readyList.empty() && traceComplete &&
626 !hwResource.awaitingResponse()) {
627 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
628 execComplete = true;
629 dataLastTick = curTick();
630 }
631}
632
633PacketPtr
634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
635{
636
637 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
638 "virt addr %d, pc %#x, size %d, flags %d).\n",
639 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
640 node_ptr->pc, node_ptr->size, node_ptr->flags);
641
642 // If the request is strictly ordered, do not send it. Just return nullptr
643 // as if it was succesfully sent.
644 if (node_ptr->isStrictlyOrdered()) {
645 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
646 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
647 node_ptr->seqNum);
648 return nullptr;
649 }
650
651 // Check if the request spans two cache lines as this condition triggers
652 // an assert fail in the L1 cache. If it does then truncate the size to
653 // access only until the end of that line and ignore the remainder. The
654 // stat counting this is useful to keep a check on how frequently this
655 // happens. If required the code could be revised to mimick splitting such
656 // a request into two.
657 unsigned blk_size = owner.cacheLineSize();
658 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
659 if (!(blk_offset + node_ptr->size <= blk_size)) {
660 node_ptr->size = blk_size - blk_offset;
661 ++numSplitReqs;
662 }
663
664 // Create a request and the packet containing request
665 Request* req = new Request(node_ptr->physAddr, node_ptr->size,
666 node_ptr->flags, masterID, node_ptr->seqNum,
667 ContextID(0));
668 req->setPC(node_ptr->pc);
669 // If virtual address is valid, set the asid and virtual address fields
670 // of the request.
671 if (node_ptr->virtAddr != 0) {
672 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
673 node_ptr->flags, masterID, node_ptr->pc);
674 req->setPaddr(node_ptr->physAddr);
675 req->setReqInstSeqNum(node_ptr->seqNum);
676 }
677
678 PacketPtr pkt;
679 uint8_t* pkt_data = new uint8_t[req->getSize()];
680 if (node_ptr->isLoad()) {
681 pkt = Packet::createRead(req);
682 } else {
683 pkt = Packet::createWrite(req);
684 memset(pkt_data, 0xA, req->getSize());
685 }
686 pkt->dataDynamic(pkt_data);
687
688 // Call MasterPort method to send a timing request for this packet
689 bool success = port.sendTimingReq(pkt);
690 ++numSendAttempted;
691
692 if (!success) {
693 // If it fails, return the packet to retry when a retry is signalled by
694 // the cache
695 ++numSendFailed;
696 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
697 return pkt;
698 } else {
699 // It is succeeds, return nullptr
700 ++numSendSucceeded;
701 return nullptr;
702 }
703}
704
705bool
706TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
707{
708 // Assert the node is dependency-free
709 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
710
711 // If this is the first attempt, print a debug message to indicate this.
712 if (first) {
713 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
714 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
715 node_ptr->robNum);
716 }
717
718 // Check if resources are available to issue the specific node
719 if (hwResource.isAvailable(node_ptr)) {
720 // If resources are free only then add to readyList
721 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
722 " to readyList, occupying resources.\n", node_ptr->seqNum);
723 // Compute the execute tick by adding the compute delay for the node
724 // and add the ready node to the ready list
725 addToSortedReadyList(node_ptr->seqNum,
726 owner.clockEdge() + node_ptr->compDelay);
727 // Account for the resources taken up by this issued node.
728 hwResource.occupy(node_ptr);
729 return true;
730
731 } else {
732 if (first) {
733 // Although dependencies are complete, resources are not available.
734 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
735 " Adding to depFreeQueue.\n", node_ptr->seqNum);
736 depFreeQueue.push(node_ptr);
737 } else {
738 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
739 "Still pending issue.\n", node_ptr->seqNum);
740 }
741 return false;
742 }
743}
744
745void
746TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
747{
748 // Release the resources for this completed node.
749 if (pkt->isWrite()) {
750 // Consider store complete.
751 hwResource.releaseStoreBuffer();
752 // If it is a store response then do nothing since we do not model
753 // dependencies on store completion in the trace. But if we were
754 // blocking execution due to store buffer fullness, we need to schedule
755 // an event and attempt to progress.
756 } else {
757 // If it is a load response then release the dependents waiting on it.
758 // Get pointer to the completed load
759 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
760 assert(graph_itr != depGraph.end());
761 GraphNode* node_ptr = graph_itr->second;
762
763 // Release resources occupied by the load
764 hwResource.release(node_ptr);
765
766 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
767 " dependents..\n", node_ptr->seqNum);
768
769 for (auto child : node_ptr->dependents) {
770 if (child->removeDepOnInst(node_ptr->seqNum)) {
771 checkAndIssue(child);
772 }
773 }
774
775 // clear the dynamically allocated set of dependents
776 (node_ptr->dependents).clear();
777 // Update the stat for numOps completed
778 owner.updateNumOps(node_ptr->robNum);
779 // delete node
780 delete node_ptr;
781 // remove from graph
782 depGraph.erase(graph_itr);
783 }
784
785 if (DTRACE(TraceCPUData)) {
786 printReadyList();
787 }
788
789 // If the size of the dependency graph is less than the dependency window
790 // then read from the trace file to populate the graph next time we are in
791 // execute.
792 if (depGraph.size() < windowSize && !traceComplete)
793 nextRead = true;
794
795 // If not waiting for retry, attempt to schedule next event
796 if (!retryPkt) {
797 // We might have new dep-free nodes in the list which will have execute
798 // tick greater than or equal to curTick. But a new dep-free node might
799 // have its execute tick earlier. Therefore, attempt to reschedule. It
800 // could happen that the readyList is empty and we got here via a
801 // last remaining response. So, either the trace is complete or there
802 // are pending nodes in the depFreeQueue. The checking is done in the
803 // execute() control flow, so schedule an event to go via that flow.
804 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
805 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
806 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
807 next_event_tick);
808 owner.schedDcacheNextEvent(next_event_tick);
809 }
810}
811
812void
813TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
814 Tick exec_tick)
815{
816 ReadyNode ready_node;
817 ready_node.seqNum = seq_num;
818 ready_node.execTick = exec_tick;
819
820 // Iterator to readyList
821 auto itr = readyList.begin();
822
823 // If the readyList is empty, simply insert the new node at the beginning
824 // and return
825 if (itr == readyList.end()) {
826 readyList.insert(itr, ready_node);
827 maxReadyListSize = std::max<double>(readyList.size(),
828 maxReadyListSize.value());
829 return;
830 }
831
832 // If the new node has its execution tick equal to the first node in the
833 // list then go to the next node. If the first node in the list failed
834 // to execute, its position as the first is thus maintained.
835 if (retryPkt)
836 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
837 itr++;
838
839 // Increment the iterator and compare the node pointed to by it to the new
840 // node till the position to insert the new node is found.
841 bool found = false;
842 while (!found && itr != readyList.end()) {
843 // If the execution tick of the new node is less than the node then
844 // this is the position to insert
845 if (exec_tick < itr->execTick)
846 found = true;
847 // If the execution tick of the new node is equal to the node then
848 // sort in ascending order of sequence numbers
849 else if (exec_tick == itr->execTick) {
850 // If the sequence number of the new node is less than the node
851 // then this is the position to insert
852 if (seq_num < itr->seqNum)
853 found = true;
854 // Else go to next node
855 else
856 itr++;
857 }
858 // If the execution tick of the new node is greater than the node then
859 // go to the next node
860 else
861 itr++;
862 }
863 readyList.insert(itr, ready_node);
864 // Update the stat for max size reached of the readyList
865 maxReadyListSize = std::max<double>(readyList.size(),
866 maxReadyListSize.value());
867}
868
869void
870TraceCPU::ElasticDataGen::printReadyList() {
871
872 auto itr = readyList.begin();
873 if (itr == readyList.end()) {
874 DPRINTF(TraceCPUData, "readyList is empty.\n");
875 return;
876 }
877 DPRINTF(TraceCPUData, "Printing readyList:\n");
878 while (itr != readyList.end()) {
879 auto graph_itr = depGraph.find(itr->seqNum);
880 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
881 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
882 node_ptr->typeToStr(), itr->execTick);
883 itr++;
884 }
885}
886
887TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
888 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
889 : sizeROB(max_rob),
890 sizeStoreBuffer(max_stores),
891 sizeLoadBuffer(max_loads),
892 oldestInFlightRobNum(UINT64_MAX),
893 numInFlightLoads(0),
894 numInFlightStores(0)
895{}
896
897void
898TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
899{
900 // Occupy ROB entry for the issued node
901 // Merely maintain the oldest node, i.e. numerically least robNum by saving
902 // it in the variable oldestInFLightRobNum.
903 inFlightNodes[new_node->seqNum] = new_node->robNum;
904 oldestInFlightRobNum = inFlightNodes.begin()->second;
905
906 // Occupy Load/Store Buffer entry for the issued node if applicable
907 if (new_node->isLoad()) {
908 ++numInFlightLoads;
909 } else if (new_node->isStore()) {
910 ++numInFlightStores;
911 } // else if it is a non load/store node, no buffer entry is occupied
912
913 printOccupancy();
914}
915
916void
917TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
918{
919 assert(!inFlightNodes.empty());
920 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
921 done_node->seqNum);
922
923 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
924 inFlightNodes.erase(done_node->seqNum);
925
926 if (inFlightNodes.empty()) {
927 // If we delete the only in-flight node and then the
928 // oldestInFlightRobNum is set to it's initialized (max) value.
929 oldestInFlightRobNum = UINT64_MAX;
930 } else {
931 // Set the oldest in-flight node rob number equal to the first node in
932 // the inFlightNodes since that will have the numerically least value.
933 oldestInFlightRobNum = inFlightNodes.begin()->second;
934 }
935
936 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
937 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
938 oldestInFlightRobNum);
939
940 // A store is considered complete when a request is sent, thus ROB entry is
941 // freed. But it occupies an entry in the Store Buffer until its response
942 // is received. A load is considered complete when a response is received,
943 // thus both ROB and Load Buffer entries can be released.
944 if (done_node->isLoad()) {
945 assert(numInFlightLoads != 0);
946 --numInFlightLoads;
947 }
948 // For normal writes, we send the requests out and clear a store buffer
949 // entry on response. For writes which are strictly ordered, for e.g.
950 // writes to device registers, we do that within release() which is called
951 // when node is executed and taken off from readyList.
952 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
953 releaseStoreBuffer();
954 }
955}
956
957void
958TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
959{
960 assert(numInFlightStores != 0);
961 --numInFlightStores;
962}
963
964bool
965TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
966 const GraphNode* new_node) const
967{
968 uint16_t num_in_flight_nodes;
969 if (inFlightNodes.empty()) {
970 num_in_flight_nodes = 0;
971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972 " #in-flight nodes = 0", new_node->seqNum);
973 } else if (new_node->robNum > oldestInFlightRobNum) {
974 // This is the intuitive case where new dep-free node is younger
975 // instruction than the oldest instruction in-flight. Thus we make sure
976 // in_flight_nodes does not overflow.
977 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
978 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
979 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
980 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
981 } else {
982 // This is the case where an instruction older than the oldest in-
983 // flight instruction becomes dep-free. Thus we must have already
984 // accounted for the entry in ROB for this new dep-free node.
985 // Immediately after this check returns true, oldestInFlightRobNum will
986 // be updated in occupy(). We simply let this node issue now.
987 num_in_flight_nodes = 0;
988 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
989 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
990 new_node->seqNum, new_node->robNum);
991 }
992 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
993 numInFlightLoads, sizeLoadBuffer,
994 numInFlightStores, sizeStoreBuffer);
995 // Check if resources are available to issue the specific node
996 if (num_in_flight_nodes >= sizeROB) {
997 return false;
998 }
999 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
1000 return false;
1001 }
1002 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
1003 return false;
1004 }
1005 return true;
1006}
1007
1008bool
1009TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1010 // Return true if there is at least one read or write request in flight
1011 return (numInFlightStores != 0 || numInFlightLoads != 0);
1012}
1013
1014void
1015TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1016 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1017 "LQ = %d/%d, SQ = %d/%d.\n",
1018 oldestInFlightRobNum,
1019 numInFlightLoads, sizeLoadBuffer,
1020 numInFlightStores, sizeStoreBuffer);
1021}
1022
1023void
1024TraceCPU::FixedRetryGen::regStats()
1025{
1026 using namespace Stats;
1027
1028 numSendAttempted
1029 .name(name() + ".numSendAttempted")
1030 .desc("Number of first attempts to send a request")
1031 ;
1032
1033 numSendSucceeded
1034 .name(name() + ".numSendSucceeded")
1035 .desc("Number of successful first attempts")
1036 ;
1037
1038 numSendFailed
1039 .name(name() + ".numSendFailed")
1040 .desc("Number of failed first attempts")
1041 ;
1042
1043 numRetrySucceeded
1044 .name(name() + ".numRetrySucceeded")
1045 .desc("Number of successful retries")
1046 ;
1047
1048 instLastTick
1049 .name(name() + ".instLastTick")
1050 .desc("Last tick simulated from the fixed inst trace")
1051 ;
1052}
1053
1054Tick
1055TraceCPU::FixedRetryGen::init()
1056{
1057 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1058 " IcacheGen: fixed issue with retry.\n");
1059
1060 if (nextExecute()) {
1061 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1062 return currElement.tick;
1063 } else {
1064 panic("Read of first message in the trace failed.\n");
1065 return MaxTick;
1066 }
1067}
1068
1069bool
1070TraceCPU::FixedRetryGen::tryNext()
1071{
1072 // If there is a retry packet, try to send it
1073 if (retryPkt) {
1074
1075 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1076
1077 if (!port.sendTimingReq(retryPkt)) {
1078 // Still blocked! This should never occur.
1079 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1080 return false;
1081 }
1082 ++numRetrySucceeded;
1083 } else {
1084
1085 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1086
1087 // try sending current element
1088 assert(currElement.isValid());
1089
1090 ++numSendAttempted;
1091
1092 if (!send(currElement.addr, currElement.blocksize,
1093 currElement.cmd, currElement.flags, currElement.pc)) {
1094 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1095 ++numSendFailed;
1096 // return false to indicate not to schedule next event
1097 return false;
1098 } else {
1099 ++numSendSucceeded;
1100 }
1101 }
1102 // If packet was sent successfully, either retryPkt or currElement, return
1103 // true to indicate to schedule event at current Tick plus delta. If packet
1104 // was sent successfully and there is no next packet to send, return false.
1105 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1106 "element.\n");
1107 retryPkt = nullptr;
1108 // Read next element into currElement, currElement gets cleared so save the
1109 // tick to calculate delta
1110 Tick last_tick = currElement.tick;
1111 if (nextExecute()) {
1112 assert(currElement.tick >= last_tick);
1113 delta = currElement.tick - last_tick;
1114 }
1115 return !traceComplete;
1116}
1117
1118void
1119TraceCPU::FixedRetryGen::exit()
1120{
1121 trace.reset();
1122}
1123
1124bool
1125TraceCPU::FixedRetryGen::nextExecute()
1126{
1127 if (traceComplete)
1128 // We are at the end of the file, thus we have no more messages.
1129 // Return false.
1130 return false;
1131
1132
1133 //Reset the currElement to the default values
1134 currElement.clear();
1135
1136 // Read the next line to get the next message. If that fails then end of
1137 // trace has been reached and traceComplete needs to be set in addition
1138 // to returning false. If successful then next message is in currElement.
1139 if (!trace.read(&currElement)) {
1140 traceComplete = true;
1141 instLastTick = curTick();
1142 return false;
1143 }
1144
1145 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1146 currElement.cmd.isRead() ? 'r' : 'w',
1147 currElement.addr,
1148 currElement.pc,
1149 currElement.blocksize,
1150 currElement.tick);
1151
1152 return true;
1153}
1154
1155bool
1156TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1157 Request::FlagsType flags, Addr pc)
1158{
1159
1160 // Create new request
1161 Request* req = new Request(addr, size, flags, masterID);
1162 req->setPC(pc);
1163
1164 // If this is not done it triggers assert in L1 cache for invalid contextId
1165 req->setContext(ContextID(0));
1166
1167 // Embed it in a packet
1168 PacketPtr pkt = new Packet(req, cmd);
1169
1170 uint8_t* pkt_data = new uint8_t[req->getSize()];
1171 pkt->dataDynamic(pkt_data);
1172
1173 if (cmd.isWrite()) {
1174 memset(pkt_data, 0xA, req->getSize());
1175 }
1176
1177 // Call MasterPort method to send a timing request for this packet
1178 bool success = port.sendTimingReq(pkt);
1179 if (!success) {
1180 // If it fails, save the packet to retry when a retry is signalled by
1181 // the cache
1182 retryPkt = pkt;
1183 }
1184 return success;
1185}
1186
1187void
1188TraceCPU::icacheRetryRecvd()
1189{
1190 // Schedule an event to go through the control flow in the same tick as
1191 // retry is received
1192 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1193 " event @%lli.\n", curTick());
1194 schedule(icacheNextEvent, curTick());
1195}
1196
1197void
1198TraceCPU::dcacheRetryRecvd()
1199{
1200 // Schedule an event to go through the execute flow in the same tick as
1201 // retry is received
1202 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1203 " event @%lli.\n", curTick());
1204 schedule(dcacheNextEvent, curTick());
1205}
1206
1207void
1208TraceCPU::schedDcacheNextEvent(Tick when)
1209{
1210 if (!dcacheNextEvent.scheduled()) {
1211 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1212 when);
1213 schedule(dcacheNextEvent, when);
1214 ++numSchedDcacheEvent;
1215 } else if (when < dcacheNextEvent.when()) {
1216 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1217 " to %lli.\n", dcacheNextEvent.when(), when);
1218 reschedule(dcacheNextEvent, when);
1219 }
1220
1221}
1222
1223bool
1224TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1225{
1226 // All responses on the instruction fetch side are ignored. Simply delete
1227 // the request and packet to free allocated memory
1228 delete pkt->req;
1229 delete pkt;
1230
1231 return true;
1232}
1233
1234void
1235TraceCPU::IcachePort::recvReqRetry()
1236{
1237 owner->icacheRetryRecvd();
1238}
1239
1240void
1241TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1242{
1243 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1244 dcacheGen.completeMemAccess(pkt);
1245}
1246
1247bool
1248TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1249{
1250 // Handle the responses for data memory requests which is done inside the
1251 // elastic data generator
1252 owner->dcacheRecvTimingResp(pkt);
1253 // After processing the response delete the request and packet to free
1254 // memory
1255 delete pkt->req;
1256 delete pkt;
1257
1258 return true;
1259}
1260
1261void
1262TraceCPU::DcachePort::recvReqRetry()
1263{
1264 owner->dcacheRetryRecvd();
1265}
1266
1267TraceCPU::ElasticDataGen::InputStream::InputStream(
1268 const std::string& filename,
1269 const double time_multiplier)
1270 : trace(filename),
1271 timeMultiplier(time_multiplier),
1272 microOpCount(0)
1273{
1274 // Create a protobuf message for the header and read it from the stream
1275 ProtoMessage::InstDepRecordHeader header_msg;
1276 if (!trace.read(header_msg)) {
1277 panic("Failed to read packet header from %s\n", filename);
1278
1279 if (header_msg.tick_freq() != SimClock::Frequency) {
1280 panic("Trace %s was recorded with a different tick frequency %d\n",
1281 header_msg.tick_freq());
1282 }
1283 } else {
1284 // Assign window size equal to the field in the trace that was recorded
1285 // when the data dependency trace was captured in the o3cpu model
1286 windowSize = header_msg.window_size();
1287 }
1288}
1289
1290void
1291TraceCPU::ElasticDataGen::InputStream::reset()
1292{
1293 trace.reset();
1294}
1295
1296bool
1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1298{
1299 ProtoMessage::InstDepRecord pkt_msg;
1300 if (trace.read(pkt_msg)) {
1301 // Required fields
1302 element->seqNum = pkt_msg.seq_num();
1303 element->type = pkt_msg.type();
1304 // Scale the compute delay to effectively scale the Trace CPU frequency
1305 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1306
1307 // Repeated field robDepList
1308 element->clearRobDep();
1309 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1310 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1311 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1312 element->numRobDep += 1;
1313 }
1314
1315 // Repeated field
1316 element->clearRegDep();
1317 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1318 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1319 // There is a possibility that an instruction has both, a register
1320 // and order dependency on an instruction. In such a case, the
1321 // register dependency is omitted
1322 bool duplicate = false;
1323 for (int j = 0; j < element->numRobDep; j++) {
1324 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1325 }
1326 if (!duplicate) {
1327 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1328 element->numRegDep += 1;
1329 }
1330 }
1331
1332 // Optional fields
1333 if (pkt_msg.has_p_addr())
1334 element->physAddr = pkt_msg.p_addr();
1335 else
1336 element->physAddr = 0;
1337
1338 if (pkt_msg.has_v_addr())
1339 element->virtAddr = pkt_msg.v_addr();
1340 else
1341 element->virtAddr = 0;
1342
1343 if (pkt_msg.has_asid())
1344 element->asid = pkt_msg.asid();
1345 else
1346 element->asid = 0;
1347
1348 if (pkt_msg.has_size())
1349 element->size = pkt_msg.size();
1350 else
1351 element->size = 0;
1352
1353 if (pkt_msg.has_flags())
1354 element->flags = pkt_msg.flags();
1355 else
1356 element->flags = 0;
1357
1358 if (pkt_msg.has_pc())
1359 element->pc = pkt_msg.pc();
1360 else
1361 element->pc = 0;
1362
1363 // ROB occupancy number
1364 ++microOpCount;
1365 if (pkt_msg.has_weight()) {
1366 microOpCount += pkt_msg.weight();
1367 }
1368 element->robNum = microOpCount;
1369 return true;
1370 }
1371
1372 // We have reached the end of the file
1373 return false;
1374}
1375
1376bool
1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1378{
1379 for (auto& own_reg_dep : regDep) {
1380 if (own_reg_dep == reg_dep) {
1381 // If register dependency is found, make it zero and return true
1382 own_reg_dep = 0;
1383 assert(numRegDep > 0);
1384 --numRegDep;
1385 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1386 "done.\n", seqNum, reg_dep);
1387 return true;
1388 }
1389 }
1390
1391 // Return false if the dependency is not found
1392 return false;
1393}
1394
1395bool
1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1397{
1398 for (auto& own_rob_dep : robDep) {
1399 if (own_rob_dep == rob_dep) {
1400 // If the rob dependency is found, make it zero and return true
1401 own_rob_dep = 0;
1402 assert(numRobDep > 0);
1403 --numRobDep;
1404 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1405 "done.\n", seqNum, rob_dep);
1406 return true;
1407 }
1408 }
1409 return false;
1410}
1411
1412void
1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1414 for (auto& own_reg_dep : regDep) {
1415 own_reg_dep = 0;
1416 }
1417 numRegDep = 0;
1418}
1419
1420void
1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1422 for (auto& own_rob_dep : robDep) {
1423 own_rob_dep = 0;
1424 }
1425 numRobDep = 0;
1426}
1427
1428bool
1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1430{
1431 // If it is an rob dependency then remove it
1432 if (!removeRobDep(done_seq_num)) {
1433 // If it is not an rob dependency then it must be a register dependency
1434 // If the register dependency is not found, it violates an assumption
1435 // and must be caught by assert.
1436 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1437 assert(regdep_found);
1438 }
1439 // Return true if the node is dependency free
1440 return (numRobDep == 0 && numRegDep == 0);
1441}
1442
1443void
1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1445{
1446 DPRINTFR(TraceCPUData, "%lli", seqNum);
1447 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1448 if (isLoad() || isStore()) {
1449 DPRINTFR(TraceCPUData, ",%i", physAddr);
1450 DPRINTFR(TraceCPUData, ",%i", size);
1451 DPRINTFR(TraceCPUData, ",%i", flags);
1452 }
1453 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1454 int i = 0;
1455 DPRINTFR(TraceCPUData, "robDep:");
1456 while (robDep[i] != 0) {
1457 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1458 i++;
1459 }
1460 i = 0;
1461 DPRINTFR(TraceCPUData, "regDep:");
1462 while (regDep[i] != 0) {
1463 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1464 i++;
1465 }
1466 auto child_itr = dependents.begin();
1467 DPRINTFR(TraceCPUData, "dependents:");
1468 while (child_itr != dependents.end()) {
1469 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1470 child_itr++;
1471 }
1472
1473 DPRINTFR(TraceCPUData, "\n");
1474}
1475
1476std::string
1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1478{
1479 return Record::RecordType_Name(type);
1480}
1481
1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1483 : trace(filename)
1484{
1485 // Create a protobuf message for the header and read it from the stream
1486 ProtoMessage::PacketHeader header_msg;
1487 if (!trace.read(header_msg)) {
1488 panic("Failed to read packet header from %s\n", filename);
1489
1490 if (header_msg.tick_freq() != SimClock::Frequency) {
1491 panic("Trace %s was recorded with a different tick frequency %d\n",
1492 header_msg.tick_freq());
1493 }
1494 }
1495}
1496
1497void
1498TraceCPU::FixedRetryGen::InputStream::reset()
1499{
1500 trace.reset();
1501}
1502
1503bool
1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1505{
1506 ProtoMessage::Packet pkt_msg;
1507 if (trace.read(pkt_msg)) {
1508 element->cmd = pkt_msg.cmd();
1509 element->addr = pkt_msg.addr();
1510 element->blocksize = pkt_msg.size();
1511 element->tick = pkt_msg.tick();
1512 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1513 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1514 return true;
1515 }
1516
1517 // We have reached the end of the file
1518 return false;
1519}
55 instTraceFile(params->instTraceFile),
56 dataTraceFile(params->dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59 params),
60 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62 oneTraceComplete(false),
63 traceOffset(0),
64 execCompleteEvent(nullptr),
65 enableEarlyExit(params->enableEarlyExit),
66 progressMsgInterval(params->progressMsgInterval),
67 progressMsgThreshold(params->progressMsgInterval)
68{
69 // Increment static counter for number of Trace CPUs.
70 ++TraceCPU::numTraceCPUs;
71
72 // Check that the python parameters for sizes of ROB, store buffer and
73 // load buffer do not overflow the corresponding C++ variables.
74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77 "exceeds the max. value of %d.\n", params->sizeROB,
78 UINT16_MAX);
79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80 " %d exceeds the max. value of %d.\n",
81 params->sizeLoadBuffer, UINT16_MAX);
82}
83
84TraceCPU::~TraceCPU()
85{
86
87}
88
89TraceCPU*
90TraceCPUParams::create()
91{
92 return new TraceCPU(this);
93}
94
95void
96TraceCPU::updateNumOps(uint64_t rob_num)
97{
98 numOps = rob_num;
99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
100 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
101 progressMsgThreshold += progressMsgInterval;
102 }
103}
104
105void
106TraceCPU::takeOverFrom(BaseCPU *oldCPU)
107{
108 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109 assert(!getInstPort().isConnected());
110 assert(oldCPU->getInstPort().isConnected());
111 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
112 oldCPU->getInstPort().unbind();
113 getInstPort().bind(inst_peer_port);
114
115 assert(!getDataPort().isConnected());
116 assert(oldCPU->getDataPort().isConnected());
117 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
118 oldCPU->getDataPort().unbind();
119 getDataPort().bind(data_peer_port);
120}
121
122void
123TraceCPU::init()
124{
125 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
126 "\n", instTraceFile);
127 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
128 dataTraceFile);
129
130 BaseCPU::init();
131
132 // Get the send tick of the first instruction read request
133 Tick first_icache_tick = icacheGen.init();
134
135 // Get the send tick of the first data read/write request
136 Tick first_dcache_tick = dcacheGen.init();
137
138 // Set the trace offset as the minimum of that in both traces
139 traceOffset = std::min(first_icache_tick, first_dcache_tick);
140 inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
141 name(), traceOffset);
142
143 // Schedule next icache and dcache event by subtracting the offset
144 schedule(icacheNextEvent, first_icache_tick - traceOffset);
145 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
146
147 // Adjust the trace offset for the dcache generator's ready nodes
148 // We don't need to do this for the icache generator as it will
149 // send its first request at the first event and schedule subsequent
150 // events using a relative tick delta
151 dcacheGen.adjustInitTraceOffset(traceOffset);
152
153 // If the Trace CPU simulation is configured to exit on any one trace
154 // completion then we don't need a counted event to count down all Trace
155 // CPUs in the system. If not then instantiate a counted event.
156 if (!enableEarlyExit) {
157 // The static counter for number of Trace CPUs is correctly set at
158 // this point so create an event and pass it.
159 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
160 numTraceCPUs);
161 }
162
163}
164
165void
166TraceCPU::schedIcacheNext()
167{
168 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
169
170 // Try to send the current packet or a retry packet if there is one
171 bool sched_next = icacheGen.tryNext();
172 // If packet sent successfully, schedule next event
173 if (sched_next) {
174 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
175 "at %d.\n", curTick() + icacheGen.tickDelta());
176 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
177 ++numSchedIcacheEvent;
178 } else {
179 // check if traceComplete. If not, do nothing because sending failed
180 // and next event will be scheduled via RecvRetry()
181 if (icacheGen.isTraceComplete()) {
182 // If this is the first trace to complete, set the variable. If it
183 // is already set then both traces are complete to exit sim.
184 checkAndSchedExitEvent();
185 }
186 }
187 return;
188}
189
190void
191TraceCPU::schedDcacheNext()
192{
193 DPRINTF(TraceCPUData, "DcacheGen event.\n");
194
195 // Update stat for numCycles
196 numCycles = clockEdge() / clockPeriod();
197
198 dcacheGen.execute();
199 if (dcacheGen.isExecComplete()) {
200 checkAndSchedExitEvent();
201 }
202}
203
204void
205TraceCPU::checkAndSchedExitEvent()
206{
207 if (!oneTraceComplete) {
208 oneTraceComplete = true;
209 } else {
210 // Schedule event to indicate execution is complete as both
211 // instruction and data access traces have been played back.
212 inform("%s: Execution complete.\n", name());
213 // If the replay is configured to exit early, that is when any one
214 // execution is complete then exit immediately and return. Otherwise,
215 // schedule the counted exit that counts down completion of each Trace
216 // CPU.
217 if (enableEarlyExit) {
218 exitSimLoop("End of trace reached");
219 } else {
220 schedule(*execCompleteEvent, curTick());
221 }
222 }
223}
224
225void
226TraceCPU::regStats()
227{
228
229 BaseCPU::regStats();
230
231 numSchedDcacheEvent
232 .name(name() + ".numSchedDcacheEvent")
233 .desc("Number of events scheduled to trigger data request generator")
234 ;
235
236 numSchedIcacheEvent
237 .name(name() + ".numSchedIcacheEvent")
238 .desc("Number of events scheduled to trigger instruction request generator")
239 ;
240
241 numOps
242 .name(name() + ".numOps")
243 .desc("Number of micro-ops simulated by the Trace CPU")
244 ;
245
246 cpi
247 .name(name() + ".cpi")
248 .desc("Cycles per micro-op used as a proxy for CPI")
249 .precision(6)
250 ;
251 cpi = numCycles/numOps;
252
253 icacheGen.regStats();
254 dcacheGen.regStats();
255}
256
257void
258TraceCPU::ElasticDataGen::regStats()
259{
260 using namespace Stats;
261
262 maxDependents
263 .name(name() + ".maxDependents")
264 .desc("Max number of dependents observed on a node")
265 ;
266
267 maxReadyListSize
268 .name(name() + ".maxReadyListSize")
269 .desc("Max size of the ready list observed")
270 ;
271
272 numSendAttempted
273 .name(name() + ".numSendAttempted")
274 .desc("Number of first attempts to send a request")
275 ;
276
277 numSendSucceeded
278 .name(name() + ".numSendSucceeded")
279 .desc("Number of successful first attempts")
280 ;
281
282 numSendFailed
283 .name(name() + ".numSendFailed")
284 .desc("Number of failed first attempts")
285 ;
286
287 numRetrySucceeded
288 .name(name() + ".numRetrySucceeded")
289 .desc("Number of successful retries")
290 ;
291
292 numSplitReqs
293 .name(name() + ".numSplitReqs")
294 .desc("Number of split requests")
295 ;
296
297 numSOLoads
298 .name(name() + ".numSOLoads")
299 .desc("Number of strictly ordered loads")
300 ;
301
302 numSOStores
303 .name(name() + ".numSOStores")
304 .desc("Number of strictly ordered stores")
305 ;
306
307 dataLastTick
308 .name(name() + ".dataLastTick")
309 .desc("Last tick simulated from the elastic data trace")
310 ;
311}
312
313Tick
314TraceCPU::ElasticDataGen::init()
315{
316 DPRINTF(TraceCPUData, "Initializing data memory request generator "
317 "DcacheGen: elastic issue with retry.\n");
318
319 if (!readNextWindow())
320 panic("Trace has %d elements. It must have at least %d elements.\n",
321 depGraph.size(), 2 * windowSize);
322 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
323 depGraph.size());
324
325 if (!readNextWindow())
326 panic("Trace has %d elements. It must have at least %d elements.\n",
327 depGraph.size(), 2 * windowSize);
328 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
329 depGraph.size());
330
331 // Print readyList
332 if (DTRACE(TraceCPUData)) {
333 printReadyList();
334 }
335 auto free_itr = readyList.begin();
336 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
337 " is %d.\n", free_itr->seqNum, free_itr->execTick);
338 // Return the execute tick of the earliest ready node so that an event
339 // can be scheduled to call execute()
340 return (free_itr->execTick);
341}
342
343void
344TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
345 for (auto& free_node : readyList) {
346 free_node.execTick -= offset;
347 }
348}
349
350void
351TraceCPU::ElasticDataGen::exit()
352{
353 trace.reset();
354}
355
356bool
357TraceCPU::ElasticDataGen::readNextWindow()
358{
359
360 // Read and add next window
361 DPRINTF(TraceCPUData, "Reading next window from file.\n");
362
363 if (traceComplete) {
364 // We are at the end of the file, thus we have no more records.
365 // Return false.
366 return false;
367 }
368
369 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
370 depGraph.size());
371
372 uint32_t num_read = 0;
373 while (num_read != windowSize) {
374
375 // Create a new graph node
376 GraphNode* new_node = new GraphNode;
377
378 // Read the next line to get the next record. If that fails then end of
379 // trace has been reached and traceComplete needs to be set in addition
380 // to returning false.
381 if (!trace.read(new_node)) {
382 DPRINTF(TraceCPUData, "\tTrace complete!\n");
383 traceComplete = true;
384 return false;
385 }
386
387 // Annotate the ROB dependencies of the new node onto the parent nodes.
388 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
389 // Annotate the register dependencies of the new node onto the parent
390 // nodes.
391 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
392
393 num_read++;
394 // Add to map
395 depGraph[new_node->seqNum] = new_node;
396 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
397 // Source dependencies are already complete, check if resources
398 // are available and issue. The execution time is approximated
399 // to current time plus the computational delay.
400 checkAndIssue(new_node);
401 }
402 }
403
404 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
405 depGraph.size());
406 return true;
407}
408
409template<typename T> void
410TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
411 T& dep_array, uint8_t& num_dep)
412{
413 for (auto& a_dep : dep_array) {
414 // The convention is to set the dependencies starting with the first
415 // index in the ROB and register dependency arrays. Thus, when we reach
416 // a dependency equal to the initialisation value of zero, we know have
417 // iterated over all dependencies and can break.
418 if (a_dep == 0)
419 break;
420 // We look up the valid dependency, i.e. the parent of this node
421 auto parent_itr = depGraph.find(a_dep);
422 if (parent_itr != depGraph.end()) {
423 // If the parent is found, it is yet to be executed. Append a
424 // pointer to the new node to the dependents list of the parent
425 // node.
426 parent_itr->second->dependents.push_back(new_node);
427 auto num_depts = parent_itr->second->dependents.size();
428 maxDependents = std::max<double>(num_depts, maxDependents.value());
429 } else {
430 // The dependency is not found in the graph. So consider
431 // the execution of the parent is complete, i.e. remove this
432 // dependency.
433 a_dep = 0;
434 num_dep--;
435 }
436 }
437}
438
439void
440TraceCPU::ElasticDataGen::execute()
441{
442 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
443 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
444 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
445 depFreeQueue.size());
446 hwResource.printOccupancy();
447
448 // Read next window to make sure that dependents of all dep-free nodes
449 // are in the depGraph
450 if (nextRead) {
451 readNextWindow();
452 nextRead = false;
453 }
454
455 // First attempt to issue the pending dependency-free nodes held
456 // in depFreeQueue. If resources have become available for a node,
457 // then issue it, i.e. add the node to readyList.
458 while (!depFreeQueue.empty()) {
459 if (checkAndIssue(depFreeQueue.front(), false)) {
460 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
461 "%lli.\n", (depFreeQueue.front())->seqNum);
462 depFreeQueue.pop();
463 } else {
464 break;
465 }
466 }
467 // Proceed to execute from readyList
468 auto graph_itr = depGraph.begin();
469 auto free_itr = readyList.begin();
470 // Iterate through readyList until the next free node has its execute
471 // tick later than curTick or the end of readyList is reached
472 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
473
474 // Get pointer to the node to be executed
475 graph_itr = depGraph.find(free_itr->seqNum);
476 assert(graph_itr != depGraph.end());
477 GraphNode* node_ptr = graph_itr->second;
478
479 // If there is a retryPkt send that else execute the load
480 if (retryPkt) {
481 // The retryPkt must be the request that was created by the
482 // first node in the readyList.
483 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
484 panic("Retry packet's seqence number does not match "
485 "the first node in the readyList.\n");
486 }
487 if (port.sendTimingReq(retryPkt)) {
488 ++numRetrySucceeded;
489 retryPkt = nullptr;
490 }
491 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
492 // If there is no retryPkt, attempt to send a memory request in
493 // case of a load or store node. If the send fails, executeMemReq()
494 // returns a packet pointer, which we save in retryPkt. In case of
495 // a comp node we don't do anything and simply continue as if the
496 // execution of the comp node succedded.
497 retryPkt = executeMemReq(node_ptr);
498 }
499 // If the retryPkt or a new load/store node failed, we exit from here
500 // as a retry from cache will bring the control to execute(). The
501 // first node in readyList then, will be the failed node.
502 if (retryPkt) {
503 break;
504 }
505
506 // Proceed to remove dependencies for the successfully executed node.
507 // If it is a load which is not strictly ordered and we sent a
508 // request for it successfully, we do not yet mark any register
509 // dependencies complete. But as per dependency modelling we need
510 // to mark ROB dependencies of load and non load/store nodes which
511 // are based on successful sending of the load as complete.
512 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
513 // If execute succeeded mark its dependents as complete
514 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
515 "dependents..\n", node_ptr->seqNum);
516
517 auto child_itr = (node_ptr->dependents).begin();
518 while (child_itr != (node_ptr->dependents).end()) {
519 // ROB dependency of a store on a load must not be removed
520 // after load is sent but after response is received
521 if (!(*child_itr)->isStore() &&
522 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
523
524 // Check if the child node has become dependency free
525 if ((*child_itr)->numRobDep == 0 &&
526 (*child_itr)->numRegDep == 0) {
527
528 // Source dependencies are complete, check if
529 // resources are available and issue
530 checkAndIssue(*child_itr);
531 }
532 // Remove this child for the sent load and point to new
533 // location of the element following the erased element
534 child_itr = node_ptr->dependents.erase(child_itr);
535 } else {
536 // This child is not dependency-free, point to the next
537 // child
538 child_itr++;
539 }
540 }
541 } else {
542 // If it is a strictly ordered load mark its dependents as complete
543 // as we do not send a request for this case. If it is a store or a
544 // comp node we also mark all its dependents complete.
545 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
546 " up dependents..\n", node_ptr->seqNum);
547
548 for (auto child : node_ptr->dependents) {
549 // If the child node is dependency free removeDepOnInst()
550 // returns true.
551 if (child->removeDepOnInst(node_ptr->seqNum)) {
552 // Source dependencies are complete, check if resources
553 // are available and issue
554 checkAndIssue(child);
555 }
556 }
557 }
558
559 // After executing the node, remove from readyList and delete node.
560 readyList.erase(free_itr);
561 // If it is a cacheable load which was sent, don't delete
562 // just yet. Delete it in completeMemAccess() after the
563 // response is received. If it is an strictly ordered
564 // load, it was not sent and all dependencies were simply
565 // marked complete. Thus it is safe to delete it. For
566 // stores and non load/store nodes all dependencies were
567 // marked complete so it is safe to delete it.
568 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
569 // Release all resources occupied by the completed node
570 hwResource.release(node_ptr);
571 // clear the dynamically allocated set of dependents
572 (node_ptr->dependents).clear();
573 // Update the stat for numOps simulated
574 owner.updateNumOps(node_ptr->robNum);
575 // delete node
576 delete node_ptr;
577 // remove from graph
578 depGraph.erase(graph_itr);
579 }
580 // Point to first node to continue to next iteration of while loop
581 free_itr = readyList.begin();
582 } // end of while loop
583
584 // Print readyList, sizes of queues and resource status after updating
585 if (DTRACE(TraceCPUData)) {
586 printReadyList();
587 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
588 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
589 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
590 depFreeQueue.size());
591 hwResource.printOccupancy();
592 }
593
594 if (retryPkt) {
595 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
596 "event from the cache for seq. num %lli.\n",
597 retryPkt->req->getReqInstSeqNum());
598 return;
599 }
600 // If the size of the dependency graph is less than the dependency window
601 // then read from the trace file to populate the graph next time we are in
602 // execute.
603 if (depGraph.size() < windowSize && !traceComplete)
604 nextRead = true;
605
606 // If cache is not blocked, schedule an event for the first execTick in
607 // readyList else retry from cache will schedule the event. If the ready
608 // list is empty then check if the next pending node has resources
609 // available to issue. If yes, then schedule an event for the next cycle.
610 if (!readyList.empty()) {
611 Tick next_event_tick = std::max(readyList.begin()->execTick,
612 curTick());
613 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
614 next_event_tick);
615 owner.schedDcacheNextEvent(next_event_tick);
616 } else if (readyList.empty() && !depFreeQueue.empty() &&
617 hwResource.isAvailable(depFreeQueue.front())) {
618 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
619 owner.clockEdge(Cycles(1)));
620 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
621 }
622
623 // If trace is completely read, readyList is empty and depGraph is empty,
624 // set execComplete to true
625 if (depGraph.empty() && readyList.empty() && traceComplete &&
626 !hwResource.awaitingResponse()) {
627 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
628 execComplete = true;
629 dataLastTick = curTick();
630 }
631}
632
633PacketPtr
634TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
635{
636
637 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
638 "virt addr %d, pc %#x, size %d, flags %d).\n",
639 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
640 node_ptr->pc, node_ptr->size, node_ptr->flags);
641
642 // If the request is strictly ordered, do not send it. Just return nullptr
643 // as if it was succesfully sent.
644 if (node_ptr->isStrictlyOrdered()) {
645 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
646 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
647 node_ptr->seqNum);
648 return nullptr;
649 }
650
651 // Check if the request spans two cache lines as this condition triggers
652 // an assert fail in the L1 cache. If it does then truncate the size to
653 // access only until the end of that line and ignore the remainder. The
654 // stat counting this is useful to keep a check on how frequently this
655 // happens. If required the code could be revised to mimick splitting such
656 // a request into two.
657 unsigned blk_size = owner.cacheLineSize();
658 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
659 if (!(blk_offset + node_ptr->size <= blk_size)) {
660 node_ptr->size = blk_size - blk_offset;
661 ++numSplitReqs;
662 }
663
664 // Create a request and the packet containing request
665 Request* req = new Request(node_ptr->physAddr, node_ptr->size,
666 node_ptr->flags, masterID, node_ptr->seqNum,
667 ContextID(0));
668 req->setPC(node_ptr->pc);
669 // If virtual address is valid, set the asid and virtual address fields
670 // of the request.
671 if (node_ptr->virtAddr != 0) {
672 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
673 node_ptr->flags, masterID, node_ptr->pc);
674 req->setPaddr(node_ptr->physAddr);
675 req->setReqInstSeqNum(node_ptr->seqNum);
676 }
677
678 PacketPtr pkt;
679 uint8_t* pkt_data = new uint8_t[req->getSize()];
680 if (node_ptr->isLoad()) {
681 pkt = Packet::createRead(req);
682 } else {
683 pkt = Packet::createWrite(req);
684 memset(pkt_data, 0xA, req->getSize());
685 }
686 pkt->dataDynamic(pkt_data);
687
688 // Call MasterPort method to send a timing request for this packet
689 bool success = port.sendTimingReq(pkt);
690 ++numSendAttempted;
691
692 if (!success) {
693 // If it fails, return the packet to retry when a retry is signalled by
694 // the cache
695 ++numSendFailed;
696 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
697 return pkt;
698 } else {
699 // It is succeeds, return nullptr
700 ++numSendSucceeded;
701 return nullptr;
702 }
703}
704
705bool
706TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
707{
708 // Assert the node is dependency-free
709 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
710
711 // If this is the first attempt, print a debug message to indicate this.
712 if (first) {
713 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
714 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
715 node_ptr->robNum);
716 }
717
718 // Check if resources are available to issue the specific node
719 if (hwResource.isAvailable(node_ptr)) {
720 // If resources are free only then add to readyList
721 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
722 " to readyList, occupying resources.\n", node_ptr->seqNum);
723 // Compute the execute tick by adding the compute delay for the node
724 // and add the ready node to the ready list
725 addToSortedReadyList(node_ptr->seqNum,
726 owner.clockEdge() + node_ptr->compDelay);
727 // Account for the resources taken up by this issued node.
728 hwResource.occupy(node_ptr);
729 return true;
730
731 } else {
732 if (first) {
733 // Although dependencies are complete, resources are not available.
734 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
735 " Adding to depFreeQueue.\n", node_ptr->seqNum);
736 depFreeQueue.push(node_ptr);
737 } else {
738 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
739 "Still pending issue.\n", node_ptr->seqNum);
740 }
741 return false;
742 }
743}
744
745void
746TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
747{
748 // Release the resources for this completed node.
749 if (pkt->isWrite()) {
750 // Consider store complete.
751 hwResource.releaseStoreBuffer();
752 // If it is a store response then do nothing since we do not model
753 // dependencies on store completion in the trace. But if we were
754 // blocking execution due to store buffer fullness, we need to schedule
755 // an event and attempt to progress.
756 } else {
757 // If it is a load response then release the dependents waiting on it.
758 // Get pointer to the completed load
759 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
760 assert(graph_itr != depGraph.end());
761 GraphNode* node_ptr = graph_itr->second;
762
763 // Release resources occupied by the load
764 hwResource.release(node_ptr);
765
766 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
767 " dependents..\n", node_ptr->seqNum);
768
769 for (auto child : node_ptr->dependents) {
770 if (child->removeDepOnInst(node_ptr->seqNum)) {
771 checkAndIssue(child);
772 }
773 }
774
775 // clear the dynamically allocated set of dependents
776 (node_ptr->dependents).clear();
777 // Update the stat for numOps completed
778 owner.updateNumOps(node_ptr->robNum);
779 // delete node
780 delete node_ptr;
781 // remove from graph
782 depGraph.erase(graph_itr);
783 }
784
785 if (DTRACE(TraceCPUData)) {
786 printReadyList();
787 }
788
789 // If the size of the dependency graph is less than the dependency window
790 // then read from the trace file to populate the graph next time we are in
791 // execute.
792 if (depGraph.size() < windowSize && !traceComplete)
793 nextRead = true;
794
795 // If not waiting for retry, attempt to schedule next event
796 if (!retryPkt) {
797 // We might have new dep-free nodes in the list which will have execute
798 // tick greater than or equal to curTick. But a new dep-free node might
799 // have its execute tick earlier. Therefore, attempt to reschedule. It
800 // could happen that the readyList is empty and we got here via a
801 // last remaining response. So, either the trace is complete or there
802 // are pending nodes in the depFreeQueue. The checking is done in the
803 // execute() control flow, so schedule an event to go via that flow.
804 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
805 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
806 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
807 next_event_tick);
808 owner.schedDcacheNextEvent(next_event_tick);
809 }
810}
811
812void
813TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
814 Tick exec_tick)
815{
816 ReadyNode ready_node;
817 ready_node.seqNum = seq_num;
818 ready_node.execTick = exec_tick;
819
820 // Iterator to readyList
821 auto itr = readyList.begin();
822
823 // If the readyList is empty, simply insert the new node at the beginning
824 // and return
825 if (itr == readyList.end()) {
826 readyList.insert(itr, ready_node);
827 maxReadyListSize = std::max<double>(readyList.size(),
828 maxReadyListSize.value());
829 return;
830 }
831
832 // If the new node has its execution tick equal to the first node in the
833 // list then go to the next node. If the first node in the list failed
834 // to execute, its position as the first is thus maintained.
835 if (retryPkt)
836 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
837 itr++;
838
839 // Increment the iterator and compare the node pointed to by it to the new
840 // node till the position to insert the new node is found.
841 bool found = false;
842 while (!found && itr != readyList.end()) {
843 // If the execution tick of the new node is less than the node then
844 // this is the position to insert
845 if (exec_tick < itr->execTick)
846 found = true;
847 // If the execution tick of the new node is equal to the node then
848 // sort in ascending order of sequence numbers
849 else if (exec_tick == itr->execTick) {
850 // If the sequence number of the new node is less than the node
851 // then this is the position to insert
852 if (seq_num < itr->seqNum)
853 found = true;
854 // Else go to next node
855 else
856 itr++;
857 }
858 // If the execution tick of the new node is greater than the node then
859 // go to the next node
860 else
861 itr++;
862 }
863 readyList.insert(itr, ready_node);
864 // Update the stat for max size reached of the readyList
865 maxReadyListSize = std::max<double>(readyList.size(),
866 maxReadyListSize.value());
867}
868
869void
870TraceCPU::ElasticDataGen::printReadyList() {
871
872 auto itr = readyList.begin();
873 if (itr == readyList.end()) {
874 DPRINTF(TraceCPUData, "readyList is empty.\n");
875 return;
876 }
877 DPRINTF(TraceCPUData, "Printing readyList:\n");
878 while (itr != readyList.end()) {
879 auto graph_itr = depGraph.find(itr->seqNum);
880 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
881 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
882 node_ptr->typeToStr(), itr->execTick);
883 itr++;
884 }
885}
886
887TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
888 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
889 : sizeROB(max_rob),
890 sizeStoreBuffer(max_stores),
891 sizeLoadBuffer(max_loads),
892 oldestInFlightRobNum(UINT64_MAX),
893 numInFlightLoads(0),
894 numInFlightStores(0)
895{}
896
897void
898TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
899{
900 // Occupy ROB entry for the issued node
901 // Merely maintain the oldest node, i.e. numerically least robNum by saving
902 // it in the variable oldestInFLightRobNum.
903 inFlightNodes[new_node->seqNum] = new_node->robNum;
904 oldestInFlightRobNum = inFlightNodes.begin()->second;
905
906 // Occupy Load/Store Buffer entry for the issued node if applicable
907 if (new_node->isLoad()) {
908 ++numInFlightLoads;
909 } else if (new_node->isStore()) {
910 ++numInFlightStores;
911 } // else if it is a non load/store node, no buffer entry is occupied
912
913 printOccupancy();
914}
915
916void
917TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
918{
919 assert(!inFlightNodes.empty());
920 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
921 done_node->seqNum);
922
923 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
924 inFlightNodes.erase(done_node->seqNum);
925
926 if (inFlightNodes.empty()) {
927 // If we delete the only in-flight node and then the
928 // oldestInFlightRobNum is set to it's initialized (max) value.
929 oldestInFlightRobNum = UINT64_MAX;
930 } else {
931 // Set the oldest in-flight node rob number equal to the first node in
932 // the inFlightNodes since that will have the numerically least value.
933 oldestInFlightRobNum = inFlightNodes.begin()->second;
934 }
935
936 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
937 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
938 oldestInFlightRobNum);
939
940 // A store is considered complete when a request is sent, thus ROB entry is
941 // freed. But it occupies an entry in the Store Buffer until its response
942 // is received. A load is considered complete when a response is received,
943 // thus both ROB and Load Buffer entries can be released.
944 if (done_node->isLoad()) {
945 assert(numInFlightLoads != 0);
946 --numInFlightLoads;
947 }
948 // For normal writes, we send the requests out and clear a store buffer
949 // entry on response. For writes which are strictly ordered, for e.g.
950 // writes to device registers, we do that within release() which is called
951 // when node is executed and taken off from readyList.
952 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
953 releaseStoreBuffer();
954 }
955}
956
957void
958TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
959{
960 assert(numInFlightStores != 0);
961 --numInFlightStores;
962}
963
964bool
965TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
966 const GraphNode* new_node) const
967{
968 uint16_t num_in_flight_nodes;
969 if (inFlightNodes.empty()) {
970 num_in_flight_nodes = 0;
971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972 " #in-flight nodes = 0", new_node->seqNum);
973 } else if (new_node->robNum > oldestInFlightRobNum) {
974 // This is the intuitive case where new dep-free node is younger
975 // instruction than the oldest instruction in-flight. Thus we make sure
976 // in_flight_nodes does not overflow.
977 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
978 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
979 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
980 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
981 } else {
982 // This is the case where an instruction older than the oldest in-
983 // flight instruction becomes dep-free. Thus we must have already
984 // accounted for the entry in ROB for this new dep-free node.
985 // Immediately after this check returns true, oldestInFlightRobNum will
986 // be updated in occupy(). We simply let this node issue now.
987 num_in_flight_nodes = 0;
988 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
989 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
990 new_node->seqNum, new_node->robNum);
991 }
992 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
993 numInFlightLoads, sizeLoadBuffer,
994 numInFlightStores, sizeStoreBuffer);
995 // Check if resources are available to issue the specific node
996 if (num_in_flight_nodes >= sizeROB) {
997 return false;
998 }
999 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
1000 return false;
1001 }
1002 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
1003 return false;
1004 }
1005 return true;
1006}
1007
1008bool
1009TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1010 // Return true if there is at least one read or write request in flight
1011 return (numInFlightStores != 0 || numInFlightLoads != 0);
1012}
1013
1014void
1015TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1016 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1017 "LQ = %d/%d, SQ = %d/%d.\n",
1018 oldestInFlightRobNum,
1019 numInFlightLoads, sizeLoadBuffer,
1020 numInFlightStores, sizeStoreBuffer);
1021}
1022
1023void
1024TraceCPU::FixedRetryGen::regStats()
1025{
1026 using namespace Stats;
1027
1028 numSendAttempted
1029 .name(name() + ".numSendAttempted")
1030 .desc("Number of first attempts to send a request")
1031 ;
1032
1033 numSendSucceeded
1034 .name(name() + ".numSendSucceeded")
1035 .desc("Number of successful first attempts")
1036 ;
1037
1038 numSendFailed
1039 .name(name() + ".numSendFailed")
1040 .desc("Number of failed first attempts")
1041 ;
1042
1043 numRetrySucceeded
1044 .name(name() + ".numRetrySucceeded")
1045 .desc("Number of successful retries")
1046 ;
1047
1048 instLastTick
1049 .name(name() + ".instLastTick")
1050 .desc("Last tick simulated from the fixed inst trace")
1051 ;
1052}
1053
1054Tick
1055TraceCPU::FixedRetryGen::init()
1056{
1057 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1058 " IcacheGen: fixed issue with retry.\n");
1059
1060 if (nextExecute()) {
1061 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1062 return currElement.tick;
1063 } else {
1064 panic("Read of first message in the trace failed.\n");
1065 return MaxTick;
1066 }
1067}
1068
1069bool
1070TraceCPU::FixedRetryGen::tryNext()
1071{
1072 // If there is a retry packet, try to send it
1073 if (retryPkt) {
1074
1075 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1076
1077 if (!port.sendTimingReq(retryPkt)) {
1078 // Still blocked! This should never occur.
1079 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1080 return false;
1081 }
1082 ++numRetrySucceeded;
1083 } else {
1084
1085 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1086
1087 // try sending current element
1088 assert(currElement.isValid());
1089
1090 ++numSendAttempted;
1091
1092 if (!send(currElement.addr, currElement.blocksize,
1093 currElement.cmd, currElement.flags, currElement.pc)) {
1094 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1095 ++numSendFailed;
1096 // return false to indicate not to schedule next event
1097 return false;
1098 } else {
1099 ++numSendSucceeded;
1100 }
1101 }
1102 // If packet was sent successfully, either retryPkt or currElement, return
1103 // true to indicate to schedule event at current Tick plus delta. If packet
1104 // was sent successfully and there is no next packet to send, return false.
1105 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1106 "element.\n");
1107 retryPkt = nullptr;
1108 // Read next element into currElement, currElement gets cleared so save the
1109 // tick to calculate delta
1110 Tick last_tick = currElement.tick;
1111 if (nextExecute()) {
1112 assert(currElement.tick >= last_tick);
1113 delta = currElement.tick - last_tick;
1114 }
1115 return !traceComplete;
1116}
1117
1118void
1119TraceCPU::FixedRetryGen::exit()
1120{
1121 trace.reset();
1122}
1123
1124bool
1125TraceCPU::FixedRetryGen::nextExecute()
1126{
1127 if (traceComplete)
1128 // We are at the end of the file, thus we have no more messages.
1129 // Return false.
1130 return false;
1131
1132
1133 //Reset the currElement to the default values
1134 currElement.clear();
1135
1136 // Read the next line to get the next message. If that fails then end of
1137 // trace has been reached and traceComplete needs to be set in addition
1138 // to returning false. If successful then next message is in currElement.
1139 if (!trace.read(&currElement)) {
1140 traceComplete = true;
1141 instLastTick = curTick();
1142 return false;
1143 }
1144
1145 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1146 currElement.cmd.isRead() ? 'r' : 'w',
1147 currElement.addr,
1148 currElement.pc,
1149 currElement.blocksize,
1150 currElement.tick);
1151
1152 return true;
1153}
1154
1155bool
1156TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1157 Request::FlagsType flags, Addr pc)
1158{
1159
1160 // Create new request
1161 Request* req = new Request(addr, size, flags, masterID);
1162 req->setPC(pc);
1163
1164 // If this is not done it triggers assert in L1 cache for invalid contextId
1165 req->setContext(ContextID(0));
1166
1167 // Embed it in a packet
1168 PacketPtr pkt = new Packet(req, cmd);
1169
1170 uint8_t* pkt_data = new uint8_t[req->getSize()];
1171 pkt->dataDynamic(pkt_data);
1172
1173 if (cmd.isWrite()) {
1174 memset(pkt_data, 0xA, req->getSize());
1175 }
1176
1177 // Call MasterPort method to send a timing request for this packet
1178 bool success = port.sendTimingReq(pkt);
1179 if (!success) {
1180 // If it fails, save the packet to retry when a retry is signalled by
1181 // the cache
1182 retryPkt = pkt;
1183 }
1184 return success;
1185}
1186
1187void
1188TraceCPU::icacheRetryRecvd()
1189{
1190 // Schedule an event to go through the control flow in the same tick as
1191 // retry is received
1192 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1193 " event @%lli.\n", curTick());
1194 schedule(icacheNextEvent, curTick());
1195}
1196
1197void
1198TraceCPU::dcacheRetryRecvd()
1199{
1200 // Schedule an event to go through the execute flow in the same tick as
1201 // retry is received
1202 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1203 " event @%lli.\n", curTick());
1204 schedule(dcacheNextEvent, curTick());
1205}
1206
1207void
1208TraceCPU::schedDcacheNextEvent(Tick when)
1209{
1210 if (!dcacheNextEvent.scheduled()) {
1211 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1212 when);
1213 schedule(dcacheNextEvent, when);
1214 ++numSchedDcacheEvent;
1215 } else if (when < dcacheNextEvent.when()) {
1216 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1217 " to %lli.\n", dcacheNextEvent.when(), when);
1218 reschedule(dcacheNextEvent, when);
1219 }
1220
1221}
1222
1223bool
1224TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1225{
1226 // All responses on the instruction fetch side are ignored. Simply delete
1227 // the request and packet to free allocated memory
1228 delete pkt->req;
1229 delete pkt;
1230
1231 return true;
1232}
1233
1234void
1235TraceCPU::IcachePort::recvReqRetry()
1236{
1237 owner->icacheRetryRecvd();
1238}
1239
1240void
1241TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1242{
1243 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1244 dcacheGen.completeMemAccess(pkt);
1245}
1246
1247bool
1248TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1249{
1250 // Handle the responses for data memory requests which is done inside the
1251 // elastic data generator
1252 owner->dcacheRecvTimingResp(pkt);
1253 // After processing the response delete the request and packet to free
1254 // memory
1255 delete pkt->req;
1256 delete pkt;
1257
1258 return true;
1259}
1260
1261void
1262TraceCPU::DcachePort::recvReqRetry()
1263{
1264 owner->dcacheRetryRecvd();
1265}
1266
1267TraceCPU::ElasticDataGen::InputStream::InputStream(
1268 const std::string& filename,
1269 const double time_multiplier)
1270 : trace(filename),
1271 timeMultiplier(time_multiplier),
1272 microOpCount(0)
1273{
1274 // Create a protobuf message for the header and read it from the stream
1275 ProtoMessage::InstDepRecordHeader header_msg;
1276 if (!trace.read(header_msg)) {
1277 panic("Failed to read packet header from %s\n", filename);
1278
1279 if (header_msg.tick_freq() != SimClock::Frequency) {
1280 panic("Trace %s was recorded with a different tick frequency %d\n",
1281 header_msg.tick_freq());
1282 }
1283 } else {
1284 // Assign window size equal to the field in the trace that was recorded
1285 // when the data dependency trace was captured in the o3cpu model
1286 windowSize = header_msg.window_size();
1287 }
1288}
1289
1290void
1291TraceCPU::ElasticDataGen::InputStream::reset()
1292{
1293 trace.reset();
1294}
1295
1296bool
1297TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1298{
1299 ProtoMessage::InstDepRecord pkt_msg;
1300 if (trace.read(pkt_msg)) {
1301 // Required fields
1302 element->seqNum = pkt_msg.seq_num();
1303 element->type = pkt_msg.type();
1304 // Scale the compute delay to effectively scale the Trace CPU frequency
1305 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1306
1307 // Repeated field robDepList
1308 element->clearRobDep();
1309 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1310 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1311 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1312 element->numRobDep += 1;
1313 }
1314
1315 // Repeated field
1316 element->clearRegDep();
1317 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1318 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1319 // There is a possibility that an instruction has both, a register
1320 // and order dependency on an instruction. In such a case, the
1321 // register dependency is omitted
1322 bool duplicate = false;
1323 for (int j = 0; j < element->numRobDep; j++) {
1324 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1325 }
1326 if (!duplicate) {
1327 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1328 element->numRegDep += 1;
1329 }
1330 }
1331
1332 // Optional fields
1333 if (pkt_msg.has_p_addr())
1334 element->physAddr = pkt_msg.p_addr();
1335 else
1336 element->physAddr = 0;
1337
1338 if (pkt_msg.has_v_addr())
1339 element->virtAddr = pkt_msg.v_addr();
1340 else
1341 element->virtAddr = 0;
1342
1343 if (pkt_msg.has_asid())
1344 element->asid = pkt_msg.asid();
1345 else
1346 element->asid = 0;
1347
1348 if (pkt_msg.has_size())
1349 element->size = pkt_msg.size();
1350 else
1351 element->size = 0;
1352
1353 if (pkt_msg.has_flags())
1354 element->flags = pkt_msg.flags();
1355 else
1356 element->flags = 0;
1357
1358 if (pkt_msg.has_pc())
1359 element->pc = pkt_msg.pc();
1360 else
1361 element->pc = 0;
1362
1363 // ROB occupancy number
1364 ++microOpCount;
1365 if (pkt_msg.has_weight()) {
1366 microOpCount += pkt_msg.weight();
1367 }
1368 element->robNum = microOpCount;
1369 return true;
1370 }
1371
1372 // We have reached the end of the file
1373 return false;
1374}
1375
1376bool
1377TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1378{
1379 for (auto& own_reg_dep : regDep) {
1380 if (own_reg_dep == reg_dep) {
1381 // If register dependency is found, make it zero and return true
1382 own_reg_dep = 0;
1383 assert(numRegDep > 0);
1384 --numRegDep;
1385 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1386 "done.\n", seqNum, reg_dep);
1387 return true;
1388 }
1389 }
1390
1391 // Return false if the dependency is not found
1392 return false;
1393}
1394
1395bool
1396TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1397{
1398 for (auto& own_rob_dep : robDep) {
1399 if (own_rob_dep == rob_dep) {
1400 // If the rob dependency is found, make it zero and return true
1401 own_rob_dep = 0;
1402 assert(numRobDep > 0);
1403 --numRobDep;
1404 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1405 "done.\n", seqNum, rob_dep);
1406 return true;
1407 }
1408 }
1409 return false;
1410}
1411
1412void
1413TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1414 for (auto& own_reg_dep : regDep) {
1415 own_reg_dep = 0;
1416 }
1417 numRegDep = 0;
1418}
1419
1420void
1421TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1422 for (auto& own_rob_dep : robDep) {
1423 own_rob_dep = 0;
1424 }
1425 numRobDep = 0;
1426}
1427
1428bool
1429TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1430{
1431 // If it is an rob dependency then remove it
1432 if (!removeRobDep(done_seq_num)) {
1433 // If it is not an rob dependency then it must be a register dependency
1434 // If the register dependency is not found, it violates an assumption
1435 // and must be caught by assert.
1436 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1437 assert(regdep_found);
1438 }
1439 // Return true if the node is dependency free
1440 return (numRobDep == 0 && numRegDep == 0);
1441}
1442
1443void
1444TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1445{
1446 DPRINTFR(TraceCPUData, "%lli", seqNum);
1447 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1448 if (isLoad() || isStore()) {
1449 DPRINTFR(TraceCPUData, ",%i", physAddr);
1450 DPRINTFR(TraceCPUData, ",%i", size);
1451 DPRINTFR(TraceCPUData, ",%i", flags);
1452 }
1453 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1454 int i = 0;
1455 DPRINTFR(TraceCPUData, "robDep:");
1456 while (robDep[i] != 0) {
1457 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1458 i++;
1459 }
1460 i = 0;
1461 DPRINTFR(TraceCPUData, "regDep:");
1462 while (regDep[i] != 0) {
1463 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1464 i++;
1465 }
1466 auto child_itr = dependents.begin();
1467 DPRINTFR(TraceCPUData, "dependents:");
1468 while (child_itr != dependents.end()) {
1469 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1470 child_itr++;
1471 }
1472
1473 DPRINTFR(TraceCPUData, "\n");
1474}
1475
1476std::string
1477TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1478{
1479 return Record::RecordType_Name(type);
1480}
1481
1482TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1483 : trace(filename)
1484{
1485 // Create a protobuf message for the header and read it from the stream
1486 ProtoMessage::PacketHeader header_msg;
1487 if (!trace.read(header_msg)) {
1488 panic("Failed to read packet header from %s\n", filename);
1489
1490 if (header_msg.tick_freq() != SimClock::Frequency) {
1491 panic("Trace %s was recorded with a different tick frequency %d\n",
1492 header_msg.tick_freq());
1493 }
1494 }
1495}
1496
1497void
1498TraceCPU::FixedRetryGen::InputStream::reset()
1499{
1500 trace.reset();
1501}
1502
1503bool
1504TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1505{
1506 ProtoMessage::Packet pkt_msg;
1507 if (trace.read(pkt_msg)) {
1508 element->cmd = pkt_msg.cmd();
1509 element->addr = pkt_msg.addr();
1510 element->blocksize = pkt_msg.size();
1511 element->tick = pkt_msg.tick();
1512 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1513 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1514 return true;
1515 }
1516
1517 // We have reached the end of the file
1518 return false;
1519}