trace_cpu.cc (11249:0733a1c08600) trace_cpu.cc (11252:18bb597fc40c)
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
53 instMasterID(params->system->getMasterId(name() + ".inst")),
54 dataMasterID(params->system->getMasterId(name() + ".data")),
55 instTraceFile(params->instTraceFile),
56 dataTraceFile(params->dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59 params->sizeROB, params->sizeStoreBuffer,
60 params->sizeLoadBuffer),
61 icacheNextEvent(this),
62 dcacheNextEvent(this),
63 oneTraceComplete(false),
64 firstFetchTick(0),
65 execCompleteEvent(nullptr)
66{
67 // Increment static counter for number of Trace CPUs.
68 ++TraceCPU::numTraceCPUs;
69
70 // Check that the python parameters for sizes of ROB, store buffer and load
71 // buffer do not overflow the corresponding C++ variables.
72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
73 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
75 "exceeds the max. value of %d.\n", params->sizeROB,
76 UINT16_MAX);
77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
78 " %d exceeds the max. value of %d.\n",
79 params->sizeLoadBuffer, UINT16_MAX);
80}
81
82TraceCPU::~TraceCPU()
83{
84
85}
86
87TraceCPU*
88TraceCPUParams::create()
89{
90 return new TraceCPU(this);
91}
92
93void
94TraceCPU::takeOverFrom(BaseCPU *oldCPU)
95{
96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
97 assert(!getInstPort().isConnected());
98 assert(oldCPU->getInstPort().isConnected());
99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
100 oldCPU->getInstPort().unbind();
101 getInstPort().bind(inst_peer_port);
102
103 assert(!getDataPort().isConnected());
104 assert(oldCPU->getDataPort().isConnected());
105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
106 oldCPU->getDataPort().unbind();
107 getDataPort().bind(data_peer_port);
108}
109
110void
111TraceCPU::init()
112{
113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
114 "\n", instTraceFile);
115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
116 dataTraceFile);
117
118 BaseCPU::init();
119
120 // Get the send tick of the first instruction read request and schedule
121 // icacheNextEvent at that tick.
122 Tick first_icache_tick = icacheGen.init();
123 schedule(icacheNextEvent, first_icache_tick);
124
125 // Get the send tick of the first data read/write request and schedule
126 // dcacheNextEvent at that tick.
127 Tick first_dcache_tick = dcacheGen.init();
128 schedule(dcacheNextEvent, first_dcache_tick);
129
130 // The static counter for number of Trace CPUs is correctly set at this
131 // point so create an event and pass it.
132 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
133 numTraceCPUs);
134 // Save the first fetch request tick to dump it as tickOffset
135 firstFetchTick = first_icache_tick;
136}
137
138void
139TraceCPU::schedIcacheNext()
140{
141 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
142
143 // Try to send the current packet or a retry packet if there is one
144 bool sched_next = icacheGen.tryNext();
145 // If packet sent successfully, schedule next event
146 if (sched_next) {
147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
148 "at %d.\n", curTick() + icacheGen.tickDelta());
149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
150 ++numSchedIcacheEvent;
151 } else {
152 // check if traceComplete. If not, do nothing because sending failed
153 // and next event will be scheduled via RecvRetry()
154 if (icacheGen.isTraceComplete()) {
155 // If this is the first trace to complete, set the variable. If it
156 // is already set then both traces are complete to exit sim.
157 checkAndSchedExitEvent();
158 }
159 }
160 return;
161}
162
163void
164TraceCPU::schedDcacheNext()
165{
166 DPRINTF(TraceCPUData, "DcacheGen event.\n");
167
168 dcacheGen.execute();
169 if (dcacheGen.isExecComplete()) {
170 checkAndSchedExitEvent();
171 }
172}
173
174void
175TraceCPU::checkAndSchedExitEvent()
176{
177 if (!oneTraceComplete) {
178 oneTraceComplete = true;
179 } else {
180 // Schedule event to indicate execution is complete as both
181 // instruction and data access traces have been played back.
182 inform("%s: Execution complete.\n", name());
183
184 // Record stats which are computed at the end of simulation
185 tickOffset = firstFetchTick;
186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
187 numOps = dcacheGen.getMicroOpCount();
188 schedule(*execCompleteEvent, curTick());
189 }
190}
191
192void
193TraceCPU::regStats()
194{
195
196 BaseCPU::regStats();
197
198 numSchedDcacheEvent
199 .name(name() + ".numSchedDcacheEvent")
200 .desc("Number of events scheduled to trigger data request generator")
201 ;
202
203 numSchedIcacheEvent
204 .name(name() + ".numSchedIcacheEvent")
205 .desc("Number of events scheduled to trigger instruction request generator")
206 ;
207
208 numOps
209 .name(name() + ".numOps")
210 .desc("Number of micro-ops simulated by the Trace CPU")
211 ;
212
213 cpi
214 .name(name() + ".cpi")
215 .desc("Cycles per micro-op used as a proxy for CPI")
216 .precision(6)
217 ;
218 cpi = numCycles/numOps;
219
220 tickOffset
221 .name(name() + ".tickOffset")
222 .desc("The first execution tick for the root node of elastic traces")
223 ;
224
225 icacheGen.regStats();
226 dcacheGen.regStats();
227}
228
229void
230TraceCPU::ElasticDataGen::regStats()
231{
232 using namespace Stats;
233
234 maxDependents
235 .name(name() + ".maxDependents")
236 .desc("Max number of dependents observed on a node")
237 ;
238
239 maxReadyListSize
240 .name(name() + ".maxReadyListSize")
241 .desc("Max size of the ready list observed")
242 ;
243
244 numSendAttempted
245 .name(name() + ".numSendAttempted")
246 .desc("Number of first attempts to send a request")
247 ;
248
249 numSendSucceeded
250 .name(name() + ".numSendSucceeded")
251 .desc("Number of successful first attempts")
252 ;
253
254 numSendFailed
255 .name(name() + ".numSendFailed")
256 .desc("Number of failed first attempts")
257 ;
258
259 numRetrySucceeded
260 .name(name() + ".numRetrySucceeded")
261 .desc("Number of successful retries")
262 ;
263
264 numSplitReqs
265 .name(name() + ".numSplitReqs")
266 .desc("Number of split requests")
267 ;
268
269 numSOLoads
270 .name(name() + ".numSOLoads")
271 .desc("Number of strictly ordered loads")
272 ;
273
274 numSOStores
275 .name(name() + ".numSOStores")
276 .desc("Number of strictly ordered stores")
277 ;
278
279 dataLastTick
280 .name(name() + ".dataLastTick")
281 .desc("Last tick simulated from the elastic data trace")
282 ;
283}
284
285Tick
286TraceCPU::ElasticDataGen::init()
287{
288 DPRINTF(TraceCPUData, "Initializing data memory request generator "
289 "DcacheGen: elastic issue with retry.\n");
290
291 if (!readNextWindow())
292 panic("Trace has %d elements. It must have at least %d elements.\n",
293 depGraph.size(), 2 * windowSize);
294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
295 depGraph.size());
296
297 if (!readNextWindow())
298 panic("Trace has %d elements. It must have at least %d elements.\n",
299 depGraph.size(), 2 * windowSize);
300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
301 depGraph.size());
302
303 // Print readyList
304 if (DTRACE(TraceCPUData)) {
305 printReadyList();
306 }
307 auto free_itr = readyList.begin();
308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
309 " is %d.\n", free_itr->seqNum, free_itr->execTick);
310 // Return the execute tick of the earliest ready node so that an event
311 // can be scheduled to call execute()
312 return (free_itr->execTick);
313}
314
315void
316TraceCPU::ElasticDataGen::exit()
317{
318 trace.reset();
319}
320
321bool
322TraceCPU::ElasticDataGen::readNextWindow()
323{
324
325 // Read and add next window
326 DPRINTF(TraceCPUData, "Reading next window from file.\n");
327
328 if (traceComplete) {
329 // We are at the end of the file, thus we have no more records.
330 // Return false.
331 return false;
332 }
333
334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
335 depGraph.size());
336
337 uint32_t num_read = 0;
338 while (num_read != windowSize) {
339
340 // Create a new graph node
341 GraphNode* new_node = new GraphNode;
342
343 // Read the next line to get the next record. If that fails then end of
344 // trace has been reached and traceComplete needs to be set in addition
345 // to returning false.
346 if (!trace.read(new_node)) {
347 DPRINTF(TraceCPUData, "\tTrace complete!\n");
348 traceComplete = true;
349 return false;
350 }
351
352 // Annotate the ROB dependencies of the new node onto the parent nodes.
353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
354 // Annotate the register dependencies of the new node onto the parent
355 // nodes.
356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
357
358 num_read++;
359 // Add to map
360 depGraph[new_node->seqNum] = new_node;
361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
362 // Source dependencies are already complete, check if resources
363 // are available and issue. The execution time is approximated
364 // to current time plus the computational delay.
365 checkAndIssue(new_node);
366 }
367 }
368
369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
370 depGraph.size());
371 return true;
372}
373
374template<typename T> void
375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
376 T& dep_array, uint8_t& num_dep)
377{
378 for (auto& a_dep : dep_array) {
379 // The convention is to set the dependencies starting with the first
380 // index in the ROB and register dependency arrays. Thus, when we reach
381 // a dependency equal to the initialisation value of zero, we know have
382 // iterated over all dependencies and can break.
383 if (a_dep == 0)
384 break;
385 // We look up the valid dependency, i.e. the parent of this node
386 auto parent_itr = depGraph.find(a_dep);
387 if (parent_itr != depGraph.end()) {
388 // If the parent is found, it is yet to be executed. Append a
389 // pointer to the new node to the dependents list of the parent
390 // node.
391 parent_itr->second->dependents.push_back(new_node);
392 auto num_depts = parent_itr->second->dependents.size();
393 maxDependents = std::max<double>(num_depts, maxDependents.value());
394 } else {
395 // The dependency is not found in the graph. So consider
396 // the execution of the parent is complete, i.e. remove this
397 // dependency.
398 a_dep = 0;
399 num_dep--;
400 }
401 }
402}
403
404void
405TraceCPU::ElasticDataGen::execute()
406{
407 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
410 depFreeQueue.size());
411 hwResource.printOccupancy();
412
413 // Read next window to make sure that dependents of all dep-free nodes
414 // are in the depGraph
415 if (nextRead) {
416 readNextWindow();
417 nextRead = false;
418 }
419
420 // First attempt to issue the pending dependency-free nodes held
421 // in depFreeQueue. If resources have become available for a node,
422 // then issue it, i.e. add the node to readyList.
423 while (!depFreeQueue.empty()) {
424 if (checkAndIssue(depFreeQueue.front(), false)) {
425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
426 "%lli.\n", (depFreeQueue.front())->seqNum);
427 depFreeQueue.pop();
428 } else {
429 break;
430 }
431 }
432 // Proceed to execute from readyList
433 auto graph_itr = depGraph.begin();
434 auto free_itr = readyList.begin();
435 // Iterate through readyList until the next free node has its execute
436 // tick later than curTick or the end of readyList is reached
437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
438
439 // Get pointer to the node to be executed
440 graph_itr = depGraph.find(free_itr->seqNum);
441 assert(graph_itr != depGraph.end());
442 GraphNode* node_ptr = graph_itr->second;
443
444 // If there is a retryPkt send that else execute the load
445 if (retryPkt) {
446 // The retryPkt must be the request that was created by the
447 // first node in the readyList.
448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
449 panic("Retry packet's seqence number does not match "
450 "the first node in the readyList.\n");
451 }
452 if (port.sendTimingReq(retryPkt)) {
453 ++numRetrySucceeded;
454 retryPkt = nullptr;
455 }
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42#include "cpu/trace/trace_cpu.hh"
43
44#include "sim/sim_exit.hh"
45
46// Declare and initialize the static counter for number of trace CPUs.
47int TraceCPU::numTraceCPUs = 0;
48
49TraceCPU::TraceCPU(TraceCPUParams *params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
53 instMasterID(params->system->getMasterId(name() + ".inst")),
54 dataMasterID(params->system->getMasterId(name() + ".data")),
55 instTraceFile(params->instTraceFile),
56 dataTraceFile(params->dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59 params->sizeROB, params->sizeStoreBuffer,
60 params->sizeLoadBuffer),
61 icacheNextEvent(this),
62 dcacheNextEvent(this),
63 oneTraceComplete(false),
64 firstFetchTick(0),
65 execCompleteEvent(nullptr)
66{
67 // Increment static counter for number of Trace CPUs.
68 ++TraceCPU::numTraceCPUs;
69
70 // Check that the python parameters for sizes of ROB, store buffer and load
71 // buffer do not overflow the corresponding C++ variables.
72 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
73 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
74 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
75 "exceeds the max. value of %d.\n", params->sizeROB,
76 UINT16_MAX);
77 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
78 " %d exceeds the max. value of %d.\n",
79 params->sizeLoadBuffer, UINT16_MAX);
80}
81
82TraceCPU::~TraceCPU()
83{
84
85}
86
87TraceCPU*
88TraceCPUParams::create()
89{
90 return new TraceCPU(this);
91}
92
93void
94TraceCPU::takeOverFrom(BaseCPU *oldCPU)
95{
96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
97 assert(!getInstPort().isConnected());
98 assert(oldCPU->getInstPort().isConnected());
99 BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
100 oldCPU->getInstPort().unbind();
101 getInstPort().bind(inst_peer_port);
102
103 assert(!getDataPort().isConnected());
104 assert(oldCPU->getDataPort().isConnected());
105 BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
106 oldCPU->getDataPort().unbind();
107 getDataPort().bind(data_peer_port);
108}
109
110void
111TraceCPU::init()
112{
113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
114 "\n", instTraceFile);
115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
116 dataTraceFile);
117
118 BaseCPU::init();
119
120 // Get the send tick of the first instruction read request and schedule
121 // icacheNextEvent at that tick.
122 Tick first_icache_tick = icacheGen.init();
123 schedule(icacheNextEvent, first_icache_tick);
124
125 // Get the send tick of the first data read/write request and schedule
126 // dcacheNextEvent at that tick.
127 Tick first_dcache_tick = dcacheGen.init();
128 schedule(dcacheNextEvent, first_dcache_tick);
129
130 // The static counter for number of Trace CPUs is correctly set at this
131 // point so create an event and pass it.
132 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
133 numTraceCPUs);
134 // Save the first fetch request tick to dump it as tickOffset
135 firstFetchTick = first_icache_tick;
136}
137
138void
139TraceCPU::schedIcacheNext()
140{
141 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
142
143 // Try to send the current packet or a retry packet if there is one
144 bool sched_next = icacheGen.tryNext();
145 // If packet sent successfully, schedule next event
146 if (sched_next) {
147 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
148 "at %d.\n", curTick() + icacheGen.tickDelta());
149 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
150 ++numSchedIcacheEvent;
151 } else {
152 // check if traceComplete. If not, do nothing because sending failed
153 // and next event will be scheduled via RecvRetry()
154 if (icacheGen.isTraceComplete()) {
155 // If this is the first trace to complete, set the variable. If it
156 // is already set then both traces are complete to exit sim.
157 checkAndSchedExitEvent();
158 }
159 }
160 return;
161}
162
163void
164TraceCPU::schedDcacheNext()
165{
166 DPRINTF(TraceCPUData, "DcacheGen event.\n");
167
168 dcacheGen.execute();
169 if (dcacheGen.isExecComplete()) {
170 checkAndSchedExitEvent();
171 }
172}
173
174void
175TraceCPU::checkAndSchedExitEvent()
176{
177 if (!oneTraceComplete) {
178 oneTraceComplete = true;
179 } else {
180 // Schedule event to indicate execution is complete as both
181 // instruction and data access traces have been played back.
182 inform("%s: Execution complete.\n", name());
183
184 // Record stats which are computed at the end of simulation
185 tickOffset = firstFetchTick;
186 numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
187 numOps = dcacheGen.getMicroOpCount();
188 schedule(*execCompleteEvent, curTick());
189 }
190}
191
192void
193TraceCPU::regStats()
194{
195
196 BaseCPU::regStats();
197
198 numSchedDcacheEvent
199 .name(name() + ".numSchedDcacheEvent")
200 .desc("Number of events scheduled to trigger data request generator")
201 ;
202
203 numSchedIcacheEvent
204 .name(name() + ".numSchedIcacheEvent")
205 .desc("Number of events scheduled to trigger instruction request generator")
206 ;
207
208 numOps
209 .name(name() + ".numOps")
210 .desc("Number of micro-ops simulated by the Trace CPU")
211 ;
212
213 cpi
214 .name(name() + ".cpi")
215 .desc("Cycles per micro-op used as a proxy for CPI")
216 .precision(6)
217 ;
218 cpi = numCycles/numOps;
219
220 tickOffset
221 .name(name() + ".tickOffset")
222 .desc("The first execution tick for the root node of elastic traces")
223 ;
224
225 icacheGen.regStats();
226 dcacheGen.regStats();
227}
228
229void
230TraceCPU::ElasticDataGen::regStats()
231{
232 using namespace Stats;
233
234 maxDependents
235 .name(name() + ".maxDependents")
236 .desc("Max number of dependents observed on a node")
237 ;
238
239 maxReadyListSize
240 .name(name() + ".maxReadyListSize")
241 .desc("Max size of the ready list observed")
242 ;
243
244 numSendAttempted
245 .name(name() + ".numSendAttempted")
246 .desc("Number of first attempts to send a request")
247 ;
248
249 numSendSucceeded
250 .name(name() + ".numSendSucceeded")
251 .desc("Number of successful first attempts")
252 ;
253
254 numSendFailed
255 .name(name() + ".numSendFailed")
256 .desc("Number of failed first attempts")
257 ;
258
259 numRetrySucceeded
260 .name(name() + ".numRetrySucceeded")
261 .desc("Number of successful retries")
262 ;
263
264 numSplitReqs
265 .name(name() + ".numSplitReqs")
266 .desc("Number of split requests")
267 ;
268
269 numSOLoads
270 .name(name() + ".numSOLoads")
271 .desc("Number of strictly ordered loads")
272 ;
273
274 numSOStores
275 .name(name() + ".numSOStores")
276 .desc("Number of strictly ordered stores")
277 ;
278
279 dataLastTick
280 .name(name() + ".dataLastTick")
281 .desc("Last tick simulated from the elastic data trace")
282 ;
283}
284
285Tick
286TraceCPU::ElasticDataGen::init()
287{
288 DPRINTF(TraceCPUData, "Initializing data memory request generator "
289 "DcacheGen: elastic issue with retry.\n");
290
291 if (!readNextWindow())
292 panic("Trace has %d elements. It must have at least %d elements.\n",
293 depGraph.size(), 2 * windowSize);
294 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
295 depGraph.size());
296
297 if (!readNextWindow())
298 panic("Trace has %d elements. It must have at least %d elements.\n",
299 depGraph.size(), 2 * windowSize);
300 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
301 depGraph.size());
302
303 // Print readyList
304 if (DTRACE(TraceCPUData)) {
305 printReadyList();
306 }
307 auto free_itr = readyList.begin();
308 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
309 " is %d.\n", free_itr->seqNum, free_itr->execTick);
310 // Return the execute tick of the earliest ready node so that an event
311 // can be scheduled to call execute()
312 return (free_itr->execTick);
313}
314
315void
316TraceCPU::ElasticDataGen::exit()
317{
318 trace.reset();
319}
320
321bool
322TraceCPU::ElasticDataGen::readNextWindow()
323{
324
325 // Read and add next window
326 DPRINTF(TraceCPUData, "Reading next window from file.\n");
327
328 if (traceComplete) {
329 // We are at the end of the file, thus we have no more records.
330 // Return false.
331 return false;
332 }
333
334 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
335 depGraph.size());
336
337 uint32_t num_read = 0;
338 while (num_read != windowSize) {
339
340 // Create a new graph node
341 GraphNode* new_node = new GraphNode;
342
343 // Read the next line to get the next record. If that fails then end of
344 // trace has been reached and traceComplete needs to be set in addition
345 // to returning false.
346 if (!trace.read(new_node)) {
347 DPRINTF(TraceCPUData, "\tTrace complete!\n");
348 traceComplete = true;
349 return false;
350 }
351
352 // Annotate the ROB dependencies of the new node onto the parent nodes.
353 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
354 // Annotate the register dependencies of the new node onto the parent
355 // nodes.
356 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
357
358 num_read++;
359 // Add to map
360 depGraph[new_node->seqNum] = new_node;
361 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
362 // Source dependencies are already complete, check if resources
363 // are available and issue. The execution time is approximated
364 // to current time plus the computational delay.
365 checkAndIssue(new_node);
366 }
367 }
368
369 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
370 depGraph.size());
371 return true;
372}
373
374template<typename T> void
375TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
376 T& dep_array, uint8_t& num_dep)
377{
378 for (auto& a_dep : dep_array) {
379 // The convention is to set the dependencies starting with the first
380 // index in the ROB and register dependency arrays. Thus, when we reach
381 // a dependency equal to the initialisation value of zero, we know have
382 // iterated over all dependencies and can break.
383 if (a_dep == 0)
384 break;
385 // We look up the valid dependency, i.e. the parent of this node
386 auto parent_itr = depGraph.find(a_dep);
387 if (parent_itr != depGraph.end()) {
388 // If the parent is found, it is yet to be executed. Append a
389 // pointer to the new node to the dependents list of the parent
390 // node.
391 parent_itr->second->dependents.push_back(new_node);
392 auto num_depts = parent_itr->second->dependents.size();
393 maxDependents = std::max<double>(num_depts, maxDependents.value());
394 } else {
395 // The dependency is not found in the graph. So consider
396 // the execution of the parent is complete, i.e. remove this
397 // dependency.
398 a_dep = 0;
399 num_dep--;
400 }
401 }
402}
403
404void
405TraceCPU::ElasticDataGen::execute()
406{
407 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
408 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
409 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
410 depFreeQueue.size());
411 hwResource.printOccupancy();
412
413 // Read next window to make sure that dependents of all dep-free nodes
414 // are in the depGraph
415 if (nextRead) {
416 readNextWindow();
417 nextRead = false;
418 }
419
420 // First attempt to issue the pending dependency-free nodes held
421 // in depFreeQueue. If resources have become available for a node,
422 // then issue it, i.e. add the node to readyList.
423 while (!depFreeQueue.empty()) {
424 if (checkAndIssue(depFreeQueue.front(), false)) {
425 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
426 "%lli.\n", (depFreeQueue.front())->seqNum);
427 depFreeQueue.pop();
428 } else {
429 break;
430 }
431 }
432 // Proceed to execute from readyList
433 auto graph_itr = depGraph.begin();
434 auto free_itr = readyList.begin();
435 // Iterate through readyList until the next free node has its execute
436 // tick later than curTick or the end of readyList is reached
437 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
438
439 // Get pointer to the node to be executed
440 graph_itr = depGraph.find(free_itr->seqNum);
441 assert(graph_itr != depGraph.end());
442 GraphNode* node_ptr = graph_itr->second;
443
444 // If there is a retryPkt send that else execute the load
445 if (retryPkt) {
446 // The retryPkt must be the request that was created by the
447 // first node in the readyList.
448 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
449 panic("Retry packet's seqence number does not match "
450 "the first node in the readyList.\n");
451 }
452 if (port.sendTimingReq(retryPkt)) {
453 ++numRetrySucceeded;
454 retryPkt = nullptr;
455 }
456 } else if (node_ptr->isLoad || node_ptr->isStore) {
456 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
457 // If there is no retryPkt, attempt to send a memory request in
458 // case of a load or store node. If the send fails, executeMemReq()
459 // returns a packet pointer, which we save in retryPkt. In case of
460 // a comp node we don't do anything and simply continue as if the
461 // execution of the comp node succedded.
462 retryPkt = executeMemReq(node_ptr);
463 }
464 // If the retryPkt or a new load/store node failed, we exit from here
465 // as a retry from cache will bring the control to execute(). The
466 // first node in readyList then, will be the failed node.
467 if (retryPkt) {
468 break;
469 }
470
471 // Proceed to remove dependencies for the successfully executed node.
472 // If it is a load which is not strictly ordered and we sent a
473 // request for it successfully, we do not yet mark any register
474 // dependencies complete. But as per dependency modelling we need
475 // to mark ROB dependencies of load and non load/store nodes which
476 // are based on successful sending of the load as complete.
457 // If there is no retryPkt, attempt to send a memory request in
458 // case of a load or store node. If the send fails, executeMemReq()
459 // returns a packet pointer, which we save in retryPkt. In case of
460 // a comp node we don't do anything and simply continue as if the
461 // execution of the comp node succedded.
462 retryPkt = executeMemReq(node_ptr);
463 }
464 // If the retryPkt or a new load/store node failed, we exit from here
465 // as a retry from cache will bring the control to execute(). The
466 // first node in readyList then, will be the failed node.
467 if (retryPkt) {
468 break;
469 }
470
471 // Proceed to remove dependencies for the successfully executed node.
472 // If it is a load which is not strictly ordered and we sent a
473 // request for it successfully, we do not yet mark any register
474 // dependencies complete. But as per dependency modelling we need
475 // to mark ROB dependencies of load and non load/store nodes which
476 // are based on successful sending of the load as complete.
477 if (node_ptr->isLoad && !node_ptr->isStrictlyOrdered()) {
477 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
478 // If execute succeeded mark its dependents as complete
479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
480 "dependents..\n", node_ptr->seqNum);
481
482 auto child_itr = (node_ptr->dependents).begin();
483 while (child_itr != (node_ptr->dependents).end()) {
484 // ROB dependency of a store on a load must not be removed
485 // after load is sent but after response is received
478 // If execute succeeded mark its dependents as complete
479 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
480 "dependents..\n", node_ptr->seqNum);
481
482 auto child_itr = (node_ptr->dependents).begin();
483 while (child_itr != (node_ptr->dependents).end()) {
484 // ROB dependency of a store on a load must not be removed
485 // after load is sent but after response is received
486 if (!(*child_itr)->isStore &&
486 if (!(*child_itr)->isStore() &&
487 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
488
489 // Check if the child node has become dependency free
490 if ((*child_itr)->numRobDep == 0 &&
491 (*child_itr)->numRegDep == 0) {
492
493 // Source dependencies are complete, check if
494 // resources are available and issue
495 checkAndIssue(*child_itr);
496 }
497 // Remove this child for the sent load and point to new
498 // location of the element following the erased element
499 child_itr = node_ptr->dependents.erase(child_itr);
500 } else {
501 // This child is not dependency-free, point to the next
502 // child
503 child_itr++;
504 }
505 }
506 } else {
507 // If it is a strictly ordered load mark its dependents as complete
508 // as we do not send a request for this case. If it is a store or a
509 // comp node we also mark all its dependents complete.
510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
511 " up dependents..\n", node_ptr->seqNum);
512
513 for (auto child : node_ptr->dependents) {
514 // If the child node is dependency free removeDepOnInst()
515 // returns true.
516 if (child->removeDepOnInst(node_ptr->seqNum)) {
517 // Source dependencies are complete, check if resources
518 // are available and issue
519 checkAndIssue(child);
520 }
521 }
522 }
523
524 // After executing the node, remove from readyList and delete node.
525 readyList.erase(free_itr);
526 // If it is a cacheable load which was sent, don't delete
527 // just yet. Delete it in completeMemAccess() after the
528 // response is received. If it is an strictly ordered
529 // load, it was not sent and all dependencies were simply
530 // marked complete. Thus it is safe to delete it. For
531 // stores and non load/store nodes all dependencies were
532 // marked complete so it is safe to delete it.
487 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
488
489 // Check if the child node has become dependency free
490 if ((*child_itr)->numRobDep == 0 &&
491 (*child_itr)->numRegDep == 0) {
492
493 // Source dependencies are complete, check if
494 // resources are available and issue
495 checkAndIssue(*child_itr);
496 }
497 // Remove this child for the sent load and point to new
498 // location of the element following the erased element
499 child_itr = node_ptr->dependents.erase(child_itr);
500 } else {
501 // This child is not dependency-free, point to the next
502 // child
503 child_itr++;
504 }
505 }
506 } else {
507 // If it is a strictly ordered load mark its dependents as complete
508 // as we do not send a request for this case. If it is a store or a
509 // comp node we also mark all its dependents complete.
510 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
511 " up dependents..\n", node_ptr->seqNum);
512
513 for (auto child : node_ptr->dependents) {
514 // If the child node is dependency free removeDepOnInst()
515 // returns true.
516 if (child->removeDepOnInst(node_ptr->seqNum)) {
517 // Source dependencies are complete, check if resources
518 // are available and issue
519 checkAndIssue(child);
520 }
521 }
522 }
523
524 // After executing the node, remove from readyList and delete node.
525 readyList.erase(free_itr);
526 // If it is a cacheable load which was sent, don't delete
527 // just yet. Delete it in completeMemAccess() after the
528 // response is received. If it is an strictly ordered
529 // load, it was not sent and all dependencies were simply
530 // marked complete. Thus it is safe to delete it. For
531 // stores and non load/store nodes all dependencies were
532 // marked complete so it is safe to delete it.
533 if (!node_ptr->isLoad || node_ptr->isStrictlyOrdered()) {
533 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
534 // Release all resources occupied by the completed node
535 hwResource.release(node_ptr);
536 // clear the dynamically allocated set of dependents
537 (node_ptr->dependents).clear();
538 // delete node
539 delete node_ptr;
540 // remove from graph
541 depGraph.erase(graph_itr);
542 }
543 // Point to first node to continue to next iteration of while loop
544 free_itr = readyList.begin();
545 } // end of while loop
546
547 // Print readyList, sizes of queues and resource status after updating
548 if (DTRACE(TraceCPUData)) {
549 printReadyList();
550 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
553 depFreeQueue.size());
554 hwResource.printOccupancy();
555 }
556
557 if (retryPkt) {
558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
559 "event from the cache for seq. num %lli.\n",
560 retryPkt->req->getReqInstSeqNum());
561 return;
562 }
563 // If the size of the dependency graph is less than the dependency window
564 // then read from the trace file to populate the graph next time we are in
565 // execute.
566 if (depGraph.size() < windowSize && !traceComplete)
567 nextRead = true;
568
569 // If cache is not blocked, schedule an event for the first execTick in
570 // readyList else retry from cache will schedule the event. If the ready
571 // list is empty then check if the next pending node has resources
572 // available to issue. If yes, then schedule an event for the next cycle.
573 if (!readyList.empty()) {
574 Tick next_event_tick = std::max(readyList.begin()->execTick,
575 curTick());
576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
577 next_event_tick);
578 owner.schedDcacheNextEvent(next_event_tick);
579 } else if (readyList.empty() && !depFreeQueue.empty() &&
580 hwResource.isAvailable(depFreeQueue.front())) {
581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
582 owner.clockEdge(Cycles(1)));
583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
584 }
585
586 // If trace is completely read, readyList is empty and depGraph is empty,
587 // set execComplete to true
588 if (depGraph.empty() && readyList.empty() && traceComplete &&
589 !hwResource.awaitingResponse()) {
590 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
591 execComplete = true;
592 dataLastTick = curTick();
593 }
594}
595
596PacketPtr
597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
598{
599
600 DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, "
601 "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr,
602 node_ptr->pc, node_ptr->size, node_ptr->flags);
603
604 // If the request is strictly ordered, do not send it. Just return nullptr
605 // as if it was succesfully sent.
606 if (node_ptr->isStrictlyOrdered()) {
534 // Release all resources occupied by the completed node
535 hwResource.release(node_ptr);
536 // clear the dynamically allocated set of dependents
537 (node_ptr->dependents).clear();
538 // delete node
539 delete node_ptr;
540 // remove from graph
541 depGraph.erase(graph_itr);
542 }
543 // Point to first node to continue to next iteration of while loop
544 free_itr = readyList.begin();
545 } // end of while loop
546
547 // Print readyList, sizes of queues and resource status after updating
548 if (DTRACE(TraceCPUData)) {
549 printReadyList();
550 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
551 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
552 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
553 depFreeQueue.size());
554 hwResource.printOccupancy();
555 }
556
557 if (retryPkt) {
558 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
559 "event from the cache for seq. num %lli.\n",
560 retryPkt->req->getReqInstSeqNum());
561 return;
562 }
563 // If the size of the dependency graph is less than the dependency window
564 // then read from the trace file to populate the graph next time we are in
565 // execute.
566 if (depGraph.size() < windowSize && !traceComplete)
567 nextRead = true;
568
569 // If cache is not blocked, schedule an event for the first execTick in
570 // readyList else retry from cache will schedule the event. If the ready
571 // list is empty then check if the next pending node has resources
572 // available to issue. If yes, then schedule an event for the next cycle.
573 if (!readyList.empty()) {
574 Tick next_event_tick = std::max(readyList.begin()->execTick,
575 curTick());
576 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
577 next_event_tick);
578 owner.schedDcacheNextEvent(next_event_tick);
579 } else if (readyList.empty() && !depFreeQueue.empty() &&
580 hwResource.isAvailable(depFreeQueue.front())) {
581 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
582 owner.clockEdge(Cycles(1)));
583 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
584 }
585
586 // If trace is completely read, readyList is empty and depGraph is empty,
587 // set execComplete to true
588 if (depGraph.empty() && readyList.empty() && traceComplete &&
589 !hwResource.awaitingResponse()) {
590 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
591 execComplete = true;
592 dataLastTick = curTick();
593 }
594}
595
596PacketPtr
597TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
598{
599
600 DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, "
601 "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr,
602 node_ptr->pc, node_ptr->size, node_ptr->flags);
603
604 // If the request is strictly ordered, do not send it. Just return nullptr
605 // as if it was succesfully sent.
606 if (node_ptr->isStrictlyOrdered()) {
607 node_ptr->isLoad ? ++numSOLoads : ++numSOStores;
607 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
609 node_ptr->seqNum);
610 return nullptr;
611 }
612
613 // Check if the request spans two cache lines as this condition triggers
614 // an assert fail in the L1 cache. If it does then truncate the size to
615 // access only until the end of that line and ignore the remainder. The
616 // stat counting this is useful to keep a check on how frequently this
617 // happens. If required the code could be revised to mimick splitting such
618 // a request into two.
619 unsigned blk_size = owner.cacheLineSize();
620 Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1));
621 if (!(blk_offset + node_ptr->size <= blk_size)) {
622 node_ptr->size = blk_size - blk_offset;
623 ++numSplitReqs;
624 }
625
626 // Create a request and the packet containing request
627 Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags,
628 masterID, node_ptr->seqNum,
629 ContextID(0), ThreadID(0));
630 req->setPC(node_ptr->pc);
631 PacketPtr pkt;
632 uint8_t* pkt_data = new uint8_t[req->getSize()];
608 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
609 node_ptr->seqNum);
610 return nullptr;
611 }
612
613 // Check if the request spans two cache lines as this condition triggers
614 // an assert fail in the L1 cache. If it does then truncate the size to
615 // access only until the end of that line and ignore the remainder. The
616 // stat counting this is useful to keep a check on how frequently this
617 // happens. If required the code could be revised to mimick splitting such
618 // a request into two.
619 unsigned blk_size = owner.cacheLineSize();
620 Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1));
621 if (!(blk_offset + node_ptr->size <= blk_size)) {
622 node_ptr->size = blk_size - blk_offset;
623 ++numSplitReqs;
624 }
625
626 // Create a request and the packet containing request
627 Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags,
628 masterID, node_ptr->seqNum,
629 ContextID(0), ThreadID(0));
630 req->setPC(node_ptr->pc);
631 PacketPtr pkt;
632 uint8_t* pkt_data = new uint8_t[req->getSize()];
633 if (node_ptr->isLoad) {
633 if (node_ptr->isLoad()) {
634 pkt = Packet::createRead(req);
635 } else {
636 pkt = Packet::createWrite(req);
637 memset(pkt_data, 0xA, req->getSize());
638 }
639 pkt->dataDynamic(pkt_data);
640
641 // Call MasterPort method to send a timing request for this packet
642 bool success = port.sendTimingReq(pkt);
643 ++numSendAttempted;
644
645 if (!success) {
646 // If it fails, return the packet to retry when a retry is signalled by
647 // the cache
648 ++numSendFailed;
649 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
650 return pkt;
651 } else {
652 // It is succeeds, return nullptr
653 ++numSendSucceeded;
654 return nullptr;
655 }
656}
657
658bool
659TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
660{
661 // Assert the node is dependency-free
662 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
663
664 // If this is the first attempt, print a debug message to indicate this.
665 if (first) {
666 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
634 pkt = Packet::createRead(req);
635 } else {
636 pkt = Packet::createWrite(req);
637 memset(pkt_data, 0xA, req->getSize());
638 }
639 pkt->dataDynamic(pkt_data);
640
641 // Call MasterPort method to send a timing request for this packet
642 bool success = port.sendTimingReq(pkt);
643 ++numSendAttempted;
644
645 if (!success) {
646 // If it fails, return the packet to retry when a retry is signalled by
647 // the cache
648 ++numSendFailed;
649 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
650 return pkt;
651 } else {
652 // It is succeeds, return nullptr
653 ++numSendSucceeded;
654 return nullptr;
655 }
656}
657
658bool
659TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
660{
661 // Assert the node is dependency-free
662 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
663
664 // If this is the first attempt, print a debug message to indicate this.
665 if (first) {
666 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
667 " dependency free.\n", node_ptr->seqNum,
668 node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"),
667 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
669 node_ptr->robNum);
670 }
671
672 // Check if resources are available to issue the specific node
673 if (hwResource.isAvailable(node_ptr)) {
674 // If resources are free only then add to readyList
675 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
676 " to readyList, occupying resources.\n", node_ptr->seqNum);
677 // Compute the execute tick by adding the compute delay for the node
678 // and add the ready node to the ready list
679 addToSortedReadyList(node_ptr->seqNum,
680 owner.clockEdge() + node_ptr->compDelay);
681 // Account for the resources taken up by this issued node.
682 hwResource.occupy(node_ptr);
683 return true;
684
685 } else {
686 if (first) {
687 // Although dependencies are complete, resources are not available.
688 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
689 " Adding to depFreeQueue.\n", node_ptr->seqNum);
690 depFreeQueue.push(node_ptr);
691 } else {
692 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
693 "Still pending issue.\n", node_ptr->seqNum);
694 }
695 return false;
696 }
697}
698
699void
700TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
701{
702 // Release the resources for this completed node.
703 if (pkt->isWrite()) {
704 // Consider store complete.
705 hwResource.releaseStoreBuffer();
706 // If it is a store response then do nothing since we do not model
707 // dependencies on store completion in the trace. But if we were
708 // blocking execution due to store buffer fullness, we need to schedule
709 // an event and attempt to progress.
710 } else {
711 // If it is a load response then release the dependents waiting on it.
712 // Get pointer to the completed load
713 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
714 assert(graph_itr != depGraph.end());
715 GraphNode* node_ptr = graph_itr->second;
716
717 // Release resources occupied by the load
718 hwResource.release(node_ptr);
719
720 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
721 " dependents..\n", node_ptr->seqNum);
722
723 for (auto child : node_ptr->dependents) {
724 if (child->removeDepOnInst(node_ptr->seqNum)) {
725 checkAndIssue(child);
726 }
727 }
728
729 // clear the dynamically allocated set of dependents
730 (node_ptr->dependents).clear();
731 // delete node
732 delete node_ptr;
733 // remove from graph
734 depGraph.erase(graph_itr);
735 }
736
737 if (DTRACE(TraceCPUData)) {
738 printReadyList();
739 }
740
741 // If the size of the dependency graph is less than the dependency window
742 // then read from the trace file to populate the graph next time we are in
743 // execute.
744 if (depGraph.size() < windowSize && !traceComplete)
745 nextRead = true;
746
747 // If not waiting for retry, attempt to schedule next event
748 if (!retryPkt) {
749 // We might have new dep-free nodes in the list which will have execute
750 // tick greater than or equal to curTick. But a new dep-free node might
751 // have its execute tick earlier. Therefore, attempt to reschedule. It
752 // could happen that the readyList is empty and we got here via a
753 // last remaining response. So, either the trace is complete or there
754 // are pending nodes in the depFreeQueue. The checking is done in the
755 // execute() control flow, so schedule an event to go via that flow.
756 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
757 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
758 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
759 next_event_tick);
760 owner.schedDcacheNextEvent(next_event_tick);
761 }
762}
763
764void
765TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
766 Tick exec_tick)
767{
768 ReadyNode ready_node;
769 ready_node.seqNum = seq_num;
770 ready_node.execTick = exec_tick;
771
772 // Iterator to readyList
773 auto itr = readyList.begin();
774
775 // If the readyList is empty, simply insert the new node at the beginning
776 // and return
777 if (itr == readyList.end()) {
778 readyList.insert(itr, ready_node);
779 maxReadyListSize = std::max<double>(readyList.size(),
780 maxReadyListSize.value());
781 return;
782 }
783
784 // If the new node has its execution tick equal to the first node in the
785 // list then go to the next node. If the first node in the list failed
786 // to execute, its position as the first is thus maintained.
787 if (retryPkt)
788 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
789 itr++;
790
791 // Increment the iterator and compare the node pointed to by it to the new
792 // node till the position to insert the new node is found.
793 bool found = false;
794 while (!found && itr != readyList.end()) {
795 // If the execution tick of the new node is less than the node then
796 // this is the position to insert
797 if (exec_tick < itr->execTick)
798 found = true;
799 // If the execution tick of the new node is equal to the node then
800 // sort in ascending order of sequence numbers
801 else if (exec_tick == itr->execTick) {
802 // If the sequence number of the new node is less than the node
803 // then this is the position to insert
804 if (seq_num < itr->seqNum)
805 found = true;
806 // Else go to next node
807 else
808 itr++;
809 }
810 // If the execution tick of the new node is greater than the node then
811 // go to the next node
812 else
813 itr++;
814 }
815 readyList.insert(itr, ready_node);
816 // Update the stat for max size reached of the readyList
817 maxReadyListSize = std::max<double>(readyList.size(),
818 maxReadyListSize.value());
819}
820
821void
822TraceCPU::ElasticDataGen::printReadyList() {
823
824 auto itr = readyList.begin();
825 if (itr == readyList.end()) {
826 DPRINTF(TraceCPUData, "readyList is empty.\n");
827 return;
828 }
829 DPRINTF(TraceCPUData, "Printing readyList:\n");
830 while (itr != readyList.end()) {
831 auto graph_itr = depGraph.find(itr->seqNum);
832 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
833 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
668 node_ptr->robNum);
669 }
670
671 // Check if resources are available to issue the specific node
672 if (hwResource.isAvailable(node_ptr)) {
673 // If resources are free only then add to readyList
674 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
675 " to readyList, occupying resources.\n", node_ptr->seqNum);
676 // Compute the execute tick by adding the compute delay for the node
677 // and add the ready node to the ready list
678 addToSortedReadyList(node_ptr->seqNum,
679 owner.clockEdge() + node_ptr->compDelay);
680 // Account for the resources taken up by this issued node.
681 hwResource.occupy(node_ptr);
682 return true;
683
684 } else {
685 if (first) {
686 // Although dependencies are complete, resources are not available.
687 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
688 " Adding to depFreeQueue.\n", node_ptr->seqNum);
689 depFreeQueue.push(node_ptr);
690 } else {
691 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
692 "Still pending issue.\n", node_ptr->seqNum);
693 }
694 return false;
695 }
696}
697
698void
699TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
700{
701 // Release the resources for this completed node.
702 if (pkt->isWrite()) {
703 // Consider store complete.
704 hwResource.releaseStoreBuffer();
705 // If it is a store response then do nothing since we do not model
706 // dependencies on store completion in the trace. But if we were
707 // blocking execution due to store buffer fullness, we need to schedule
708 // an event and attempt to progress.
709 } else {
710 // If it is a load response then release the dependents waiting on it.
711 // Get pointer to the completed load
712 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
713 assert(graph_itr != depGraph.end());
714 GraphNode* node_ptr = graph_itr->second;
715
716 // Release resources occupied by the load
717 hwResource.release(node_ptr);
718
719 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
720 " dependents..\n", node_ptr->seqNum);
721
722 for (auto child : node_ptr->dependents) {
723 if (child->removeDepOnInst(node_ptr->seqNum)) {
724 checkAndIssue(child);
725 }
726 }
727
728 // clear the dynamically allocated set of dependents
729 (node_ptr->dependents).clear();
730 // delete node
731 delete node_ptr;
732 // remove from graph
733 depGraph.erase(graph_itr);
734 }
735
736 if (DTRACE(TraceCPUData)) {
737 printReadyList();
738 }
739
740 // If the size of the dependency graph is less than the dependency window
741 // then read from the trace file to populate the graph next time we are in
742 // execute.
743 if (depGraph.size() < windowSize && !traceComplete)
744 nextRead = true;
745
746 // If not waiting for retry, attempt to schedule next event
747 if (!retryPkt) {
748 // We might have new dep-free nodes in the list which will have execute
749 // tick greater than or equal to curTick. But a new dep-free node might
750 // have its execute tick earlier. Therefore, attempt to reschedule. It
751 // could happen that the readyList is empty and we got here via a
752 // last remaining response. So, either the trace is complete or there
753 // are pending nodes in the depFreeQueue. The checking is done in the
754 // execute() control flow, so schedule an event to go via that flow.
755 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
756 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
757 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
758 next_event_tick);
759 owner.schedDcacheNextEvent(next_event_tick);
760 }
761}
762
763void
764TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
765 Tick exec_tick)
766{
767 ReadyNode ready_node;
768 ready_node.seqNum = seq_num;
769 ready_node.execTick = exec_tick;
770
771 // Iterator to readyList
772 auto itr = readyList.begin();
773
774 // If the readyList is empty, simply insert the new node at the beginning
775 // and return
776 if (itr == readyList.end()) {
777 readyList.insert(itr, ready_node);
778 maxReadyListSize = std::max<double>(readyList.size(),
779 maxReadyListSize.value());
780 return;
781 }
782
783 // If the new node has its execution tick equal to the first node in the
784 // list then go to the next node. If the first node in the list failed
785 // to execute, its position as the first is thus maintained.
786 if (retryPkt)
787 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
788 itr++;
789
790 // Increment the iterator and compare the node pointed to by it to the new
791 // node till the position to insert the new node is found.
792 bool found = false;
793 while (!found && itr != readyList.end()) {
794 // If the execution tick of the new node is less than the node then
795 // this is the position to insert
796 if (exec_tick < itr->execTick)
797 found = true;
798 // If the execution tick of the new node is equal to the node then
799 // sort in ascending order of sequence numbers
800 else if (exec_tick == itr->execTick) {
801 // If the sequence number of the new node is less than the node
802 // then this is the position to insert
803 if (seq_num < itr->seqNum)
804 found = true;
805 // Else go to next node
806 else
807 itr++;
808 }
809 // If the execution tick of the new node is greater than the node then
810 // go to the next node
811 else
812 itr++;
813 }
814 readyList.insert(itr, ready_node);
815 // Update the stat for max size reached of the readyList
816 maxReadyListSize = std::max<double>(readyList.size(),
817 maxReadyListSize.value());
818}
819
820void
821TraceCPU::ElasticDataGen::printReadyList() {
822
823 auto itr = readyList.begin();
824 if (itr == readyList.end()) {
825 DPRINTF(TraceCPUData, "readyList is empty.\n");
826 return;
827 }
828 DPRINTF(TraceCPUData, "Printing readyList:\n");
829 while (itr != readyList.end()) {
830 auto graph_itr = depGraph.find(itr->seqNum);
831 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
832 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
834 node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"),
835 itr->execTick);
833 node_ptr->typeToStr(), itr->execTick);
836 itr++;
837 }
838}
839
840TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
841 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
842 : sizeROB(max_rob),
843 sizeStoreBuffer(max_stores),
844 sizeLoadBuffer(max_loads),
845 oldestInFlightRobNum(UINT64_MAX),
846 numInFlightLoads(0),
847 numInFlightStores(0)
848{}
849
850void
851TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
852{
853 // Occupy ROB entry for the issued node
854 // Merely maintain the oldest node, i.e. numerically least robNum by saving
855 // it in the variable oldestInFLightRobNum.
856 inFlightNodes[new_node->seqNum] = new_node->robNum;
857 oldestInFlightRobNum = inFlightNodes.begin()->second;
858
859 // Occupy Load/Store Buffer entry for the issued node if applicable
834 itr++;
835 }
836}
837
838TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
839 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
840 : sizeROB(max_rob),
841 sizeStoreBuffer(max_stores),
842 sizeLoadBuffer(max_loads),
843 oldestInFlightRobNum(UINT64_MAX),
844 numInFlightLoads(0),
845 numInFlightStores(0)
846{}
847
848void
849TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
850{
851 // Occupy ROB entry for the issued node
852 // Merely maintain the oldest node, i.e. numerically least robNum by saving
853 // it in the variable oldestInFLightRobNum.
854 inFlightNodes[new_node->seqNum] = new_node->robNum;
855 oldestInFlightRobNum = inFlightNodes.begin()->second;
856
857 // Occupy Load/Store Buffer entry for the issued node if applicable
860 if (new_node->isLoad) {
858 if (new_node->isLoad()) {
861 ++numInFlightLoads;
859 ++numInFlightLoads;
862 } else if (new_node->isStore) {
860 } else if (new_node->isStore()) {
863 ++numInFlightStores;
864 } // else if it is a non load/store node, no buffer entry is occupied
865
866 printOccupancy();
867}
868
869void
870TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
871{
872 assert(!inFlightNodes.empty());
873 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
874 done_node->seqNum);
875
876 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
877 inFlightNodes.erase(done_node->seqNum);
878
879 if (inFlightNodes.empty()) {
880 // If we delete the only in-flight node and then the
881 // oldestInFlightRobNum is set to it's initialized (max) value.
882 oldestInFlightRobNum = UINT64_MAX;
883 } else {
884 // Set the oldest in-flight node rob number equal to the first node in
885 // the inFlightNodes since that will have the numerically least value.
886 oldestInFlightRobNum = inFlightNodes.begin()->second;
887 }
888
889 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
890 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
891 oldestInFlightRobNum);
892
893 // A store is considered complete when a request is sent, thus ROB entry is
894 // freed. But it occupies an entry in the Store Buffer until its response
895 // is received. A load is considered complete when a response is received,
896 // thus both ROB and Load Buffer entries can be released.
861 ++numInFlightStores;
862 } // else if it is a non load/store node, no buffer entry is occupied
863
864 printOccupancy();
865}
866
867void
868TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
869{
870 assert(!inFlightNodes.empty());
871 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
872 done_node->seqNum);
873
874 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
875 inFlightNodes.erase(done_node->seqNum);
876
877 if (inFlightNodes.empty()) {
878 // If we delete the only in-flight node and then the
879 // oldestInFlightRobNum is set to it's initialized (max) value.
880 oldestInFlightRobNum = UINT64_MAX;
881 } else {
882 // Set the oldest in-flight node rob number equal to the first node in
883 // the inFlightNodes since that will have the numerically least value.
884 oldestInFlightRobNum = inFlightNodes.begin()->second;
885 }
886
887 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
888 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
889 oldestInFlightRobNum);
890
891 // A store is considered complete when a request is sent, thus ROB entry is
892 // freed. But it occupies an entry in the Store Buffer until its response
893 // is received. A load is considered complete when a response is received,
894 // thus both ROB and Load Buffer entries can be released.
897 if (done_node->isLoad) {
895 if (done_node->isLoad()) {
898 assert(numInFlightLoads != 0);
899 --numInFlightLoads;
900 }
901 // For normal writes, we send the requests out and clear a store buffer
902 // entry on response. For writes which are strictly ordered, for e.g.
903 // writes to device registers, we do that within release() which is called
904 // when node is executed and taken off from readyList.
896 assert(numInFlightLoads != 0);
897 --numInFlightLoads;
898 }
899 // For normal writes, we send the requests out and clear a store buffer
900 // entry on response. For writes which are strictly ordered, for e.g.
901 // writes to device registers, we do that within release() which is called
902 // when node is executed and taken off from readyList.
905 if (done_node->isStore && done_node->isStrictlyOrdered()) {
903 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
906 releaseStoreBuffer();
907 }
908}
909
910void
911TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
912{
913 assert(numInFlightStores != 0);
914 --numInFlightStores;
915}
916
917bool
918TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
919 const GraphNode* new_node) const
920{
921 uint16_t num_in_flight_nodes;
922 if (inFlightNodes.empty()) {
923 num_in_flight_nodes = 0;
924 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
925 " #in-flight nodes = 0", new_node->seqNum);
926 } else if (new_node->robNum > oldestInFlightRobNum) {
927 // This is the intuitive case where new dep-free node is younger
928 // instruction than the oldest instruction in-flight. Thus we make sure
929 // in_flight_nodes does not overflow.
930 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
931 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
932 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
933 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
934 } else {
935 // This is the case where an instruction older than the oldest in-
936 // flight instruction becomes dep-free. Thus we must have already
937 // accounted for the entry in ROB for this new dep-free node.
938 // Immediately after this check returns true, oldestInFlightRobNum will
939 // be updated in occupy(). We simply let this node issue now.
940 num_in_flight_nodes = 0;
941 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
942 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
943 new_node->seqNum, new_node->robNum);
944 }
945 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
946 numInFlightLoads, sizeLoadBuffer,
947 numInFlightStores, sizeStoreBuffer);
948 // Check if resources are available to issue the specific node
949 if (num_in_flight_nodes >= sizeROB) {
950 return false;
951 }
904 releaseStoreBuffer();
905 }
906}
907
908void
909TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
910{
911 assert(numInFlightStores != 0);
912 --numInFlightStores;
913}
914
915bool
916TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
917 const GraphNode* new_node) const
918{
919 uint16_t num_in_flight_nodes;
920 if (inFlightNodes.empty()) {
921 num_in_flight_nodes = 0;
922 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
923 " #in-flight nodes = 0", new_node->seqNum);
924 } else if (new_node->robNum > oldestInFlightRobNum) {
925 // This is the intuitive case where new dep-free node is younger
926 // instruction than the oldest instruction in-flight. Thus we make sure
927 // in_flight_nodes does not overflow.
928 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
929 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
930 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
931 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
932 } else {
933 // This is the case where an instruction older than the oldest in-
934 // flight instruction becomes dep-free. Thus we must have already
935 // accounted for the entry in ROB for this new dep-free node.
936 // Immediately after this check returns true, oldestInFlightRobNum will
937 // be updated in occupy(). We simply let this node issue now.
938 num_in_flight_nodes = 0;
939 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
940 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
941 new_node->seqNum, new_node->robNum);
942 }
943 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
944 numInFlightLoads, sizeLoadBuffer,
945 numInFlightStores, sizeStoreBuffer);
946 // Check if resources are available to issue the specific node
947 if (num_in_flight_nodes >= sizeROB) {
948 return false;
949 }
952 if (new_node->isLoad && numInFlightLoads >= sizeLoadBuffer) {
950 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
953 return false;
954 }
951 return false;
952 }
955 if (new_node->isStore && numInFlightStores >= sizeStoreBuffer) {
953 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
956 return false;
957 }
958 return true;
959}
960
961bool
962TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
963 // Return true if there is at least one read or write request in flight
964 return (numInFlightStores != 0 || numInFlightLoads != 0);
965}
966
967void
968TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
969 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
970 "LQ = %d/%d, SQ = %d/%d.\n",
971 oldestInFlightRobNum,
972 numInFlightLoads, sizeLoadBuffer,
973 numInFlightStores, sizeStoreBuffer);
974}
975
976void
977TraceCPU::FixedRetryGen::regStats()
978{
979 using namespace Stats;
980
981 numSendAttempted
982 .name(name() + ".numSendAttempted")
983 .desc("Number of first attempts to send a request")
984 ;
985
986 numSendSucceeded
987 .name(name() + ".numSendSucceeded")
988 .desc("Number of successful first attempts")
989 ;
990
991 numSendFailed
992 .name(name() + ".numSendFailed")
993 .desc("Number of failed first attempts")
994 ;
995
996 numRetrySucceeded
997 .name(name() + ".numRetrySucceeded")
998 .desc("Number of successful retries")
999 ;
1000
1001 instLastTick
1002 .name(name() + ".instLastTick")
1003 .desc("Last tick simulated from the fixed inst trace")
1004 ;
1005}
1006
1007Tick
1008TraceCPU::FixedRetryGen::init()
1009{
1010 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1011 " IcacheGen: fixed issue with retry.\n");
1012
1013 if (nextExecute()) {
1014 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1015 return currElement.tick;
1016 } else {
1017 panic("Read of first message in the trace failed.\n");
1018 return MaxTick;
1019 }
1020}
1021
1022bool
1023TraceCPU::FixedRetryGen::tryNext()
1024{
1025 // If there is a retry packet, try to send it
1026 if (retryPkt) {
1027
1028 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1029
1030 if (!port.sendTimingReq(retryPkt)) {
1031 // Still blocked! This should never occur.
1032 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1033 return false;
1034 }
1035 ++numRetrySucceeded;
1036 } else {
1037
1038 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1039
1040 // try sending current element
1041 assert(currElement.isValid());
1042
1043 ++numSendAttempted;
1044
1045 if (!send(currElement.addr, currElement.blocksize,
1046 currElement.cmd, currElement.flags, currElement.pc)) {
1047 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1048 ++numSendFailed;
1049 // return false to indicate not to schedule next event
1050 return false;
1051 } else {
1052 ++numSendSucceeded;
1053 }
1054 }
1055 // If packet was sent successfully, either retryPkt or currElement, return
1056 // true to indicate to schedule event at current Tick plus delta. If packet
1057 // was sent successfully and there is no next packet to send, return false.
1058 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1059 "element.\n");
1060 retryPkt = nullptr;
1061 // Read next element into currElement, currElement gets cleared so save the
1062 // tick to calculate delta
1063 Tick last_tick = currElement.tick;
1064 if (nextExecute()) {
1065 assert(currElement.tick >= last_tick);
1066 delta = currElement.tick - last_tick;
1067 }
1068 return !traceComplete;
1069}
1070
1071void
1072TraceCPU::FixedRetryGen::exit()
1073{
1074 trace.reset();
1075}
1076
1077bool
1078TraceCPU::FixedRetryGen::nextExecute()
1079{
1080 if (traceComplete)
1081 // We are at the end of the file, thus we have no more messages.
1082 // Return false.
1083 return false;
1084
1085
1086 //Reset the currElement to the default values
1087 currElement.clear();
1088
1089 // Read the next line to get the next message. If that fails then end of
1090 // trace has been reached and traceComplete needs to be set in addition
1091 // to returning false. If successful then next message is in currElement.
1092 if (!trace.read(&currElement)) {
1093 traceComplete = true;
1094 instLastTick = curTick();
1095 return false;
1096 }
1097
1098 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1099 currElement.cmd.isRead() ? 'r' : 'w',
1100 currElement.addr,
1101 currElement.pc,
1102 currElement.blocksize,
1103 currElement.tick);
1104
1105 return true;
1106}
1107
1108bool
1109TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1110 Request::FlagsType flags, Addr pc)
1111{
1112
1113 // Create new request
1114 Request* req = new Request(addr, size, flags, masterID);
1115 req->setPC(pc);
1116
1117 // If this is not done it triggers assert in L1 cache for invalid contextId
1118 req->setThreadContext(ContextID(0), ThreadID(0));
1119
1120 // Embed it in a packet
1121 PacketPtr pkt = new Packet(req, cmd);
1122
1123 uint8_t* pkt_data = new uint8_t[req->getSize()];
1124 pkt->dataDynamic(pkt_data);
1125
1126 if (cmd.isWrite()) {
1127 memset(pkt_data, 0xA, req->getSize());
1128 }
1129
1130 // Call MasterPort method to send a timing request for this packet
1131 bool success = port.sendTimingReq(pkt);
1132 if (!success) {
1133 // If it fails, save the packet to retry when a retry is signalled by
1134 // the cache
1135 retryPkt = pkt;
1136 }
1137 return success;
1138}
1139
1140void
1141TraceCPU::icacheRetryRecvd()
1142{
1143 // Schedule an event to go through the control flow in the same tick as
1144 // retry is received
1145 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1146 " event @%lli.\n", curTick());
1147 schedule(icacheNextEvent, curTick());
1148}
1149
1150void
1151TraceCPU::dcacheRetryRecvd()
1152{
1153 // Schedule an event to go through the execute flow in the same tick as
1154 // retry is received
1155 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1156 " event @%lli.\n", curTick());
1157 schedule(dcacheNextEvent, curTick());
1158}
1159
1160void
1161TraceCPU::schedDcacheNextEvent(Tick when)
1162{
1163 if (!dcacheNextEvent.scheduled()) {
1164 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1165 when);
1166 schedule(dcacheNextEvent, when);
1167 ++numSchedDcacheEvent;
1168 } else if (when < dcacheNextEvent.when()) {
1169 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1170 " to %lli.\n", dcacheNextEvent.when(), when);
1171 reschedule(dcacheNextEvent, when);
1172 }
1173
1174}
1175
1176bool
1177TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1178{
1179 // All responses on the instruction fetch side are ignored. Simply delete
1180 // the request and packet to free allocated memory
1181 delete pkt->req;
1182 delete pkt;
1183
1184 return true;
1185}
1186
1187void
1188TraceCPU::IcachePort::recvReqRetry()
1189{
1190 owner->icacheRetryRecvd();
1191}
1192
1193void
1194TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1195{
1196 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1197 dcacheGen.completeMemAccess(pkt);
1198}
1199
1200bool
1201TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1202{
1203 // Handle the responses for data memory requests which is done inside the
1204 // elastic data generator
1205 owner->dcacheRecvTimingResp(pkt);
1206 // After processing the response delete the request and packet to free
1207 // memory
1208 delete pkt->req;
1209 delete pkt;
1210
1211 return true;
1212}
1213
1214void
1215TraceCPU::DcachePort::recvReqRetry()
1216{
1217 owner->dcacheRetryRecvd();
1218}
1219
1220TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename)
1221 : trace(filename),
1222 microOpCount(0)
1223{
1224 // Create a protobuf message for the header and read it from the stream
1225 ProtoMessage::InstDepRecordHeader header_msg;
1226 if (!trace.read(header_msg)) {
1227 panic("Failed to read packet header from %s\n", filename);
1228
1229 if (header_msg.tick_freq() != SimClock::Frequency) {
1230 panic("Trace %s was recorded with a different tick frequency %d\n",
1231 header_msg.tick_freq());
1232 }
1233 } else {
1234 // Assign window size equal to the field in the trace that was recorded
1235 // when the data dependency trace was captured in the o3cpu model
1236 windowSize = header_msg.window_size();
1237 }
1238}
1239
1240void
1241TraceCPU::ElasticDataGen::InputStream::reset()
1242{
1243 trace.reset();
1244}
1245
1246bool
1247TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1248{
1249 ProtoMessage::InstDepRecord pkt_msg;
1250 if (trace.read(pkt_msg)) {
1251 // Required fields
1252 element->seqNum = pkt_msg.seq_num();
954 return false;
955 }
956 return true;
957}
958
959bool
960TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
961 // Return true if there is at least one read or write request in flight
962 return (numInFlightStores != 0 || numInFlightLoads != 0);
963}
964
965void
966TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
967 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
968 "LQ = %d/%d, SQ = %d/%d.\n",
969 oldestInFlightRobNum,
970 numInFlightLoads, sizeLoadBuffer,
971 numInFlightStores, sizeStoreBuffer);
972}
973
974void
975TraceCPU::FixedRetryGen::regStats()
976{
977 using namespace Stats;
978
979 numSendAttempted
980 .name(name() + ".numSendAttempted")
981 .desc("Number of first attempts to send a request")
982 ;
983
984 numSendSucceeded
985 .name(name() + ".numSendSucceeded")
986 .desc("Number of successful first attempts")
987 ;
988
989 numSendFailed
990 .name(name() + ".numSendFailed")
991 .desc("Number of failed first attempts")
992 ;
993
994 numRetrySucceeded
995 .name(name() + ".numRetrySucceeded")
996 .desc("Number of successful retries")
997 ;
998
999 instLastTick
1000 .name(name() + ".instLastTick")
1001 .desc("Last tick simulated from the fixed inst trace")
1002 ;
1003}
1004
1005Tick
1006TraceCPU::FixedRetryGen::init()
1007{
1008 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1009 " IcacheGen: fixed issue with retry.\n");
1010
1011 if (nextExecute()) {
1012 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1013 return currElement.tick;
1014 } else {
1015 panic("Read of first message in the trace failed.\n");
1016 return MaxTick;
1017 }
1018}
1019
1020bool
1021TraceCPU::FixedRetryGen::tryNext()
1022{
1023 // If there is a retry packet, try to send it
1024 if (retryPkt) {
1025
1026 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1027
1028 if (!port.sendTimingReq(retryPkt)) {
1029 // Still blocked! This should never occur.
1030 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1031 return false;
1032 }
1033 ++numRetrySucceeded;
1034 } else {
1035
1036 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1037
1038 // try sending current element
1039 assert(currElement.isValid());
1040
1041 ++numSendAttempted;
1042
1043 if (!send(currElement.addr, currElement.blocksize,
1044 currElement.cmd, currElement.flags, currElement.pc)) {
1045 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1046 ++numSendFailed;
1047 // return false to indicate not to schedule next event
1048 return false;
1049 } else {
1050 ++numSendSucceeded;
1051 }
1052 }
1053 // If packet was sent successfully, either retryPkt or currElement, return
1054 // true to indicate to schedule event at current Tick plus delta. If packet
1055 // was sent successfully and there is no next packet to send, return false.
1056 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1057 "element.\n");
1058 retryPkt = nullptr;
1059 // Read next element into currElement, currElement gets cleared so save the
1060 // tick to calculate delta
1061 Tick last_tick = currElement.tick;
1062 if (nextExecute()) {
1063 assert(currElement.tick >= last_tick);
1064 delta = currElement.tick - last_tick;
1065 }
1066 return !traceComplete;
1067}
1068
1069void
1070TraceCPU::FixedRetryGen::exit()
1071{
1072 trace.reset();
1073}
1074
1075bool
1076TraceCPU::FixedRetryGen::nextExecute()
1077{
1078 if (traceComplete)
1079 // We are at the end of the file, thus we have no more messages.
1080 // Return false.
1081 return false;
1082
1083
1084 //Reset the currElement to the default values
1085 currElement.clear();
1086
1087 // Read the next line to get the next message. If that fails then end of
1088 // trace has been reached and traceComplete needs to be set in addition
1089 // to returning false. If successful then next message is in currElement.
1090 if (!trace.read(&currElement)) {
1091 traceComplete = true;
1092 instLastTick = curTick();
1093 return false;
1094 }
1095
1096 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1097 currElement.cmd.isRead() ? 'r' : 'w',
1098 currElement.addr,
1099 currElement.pc,
1100 currElement.blocksize,
1101 currElement.tick);
1102
1103 return true;
1104}
1105
1106bool
1107TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1108 Request::FlagsType flags, Addr pc)
1109{
1110
1111 // Create new request
1112 Request* req = new Request(addr, size, flags, masterID);
1113 req->setPC(pc);
1114
1115 // If this is not done it triggers assert in L1 cache for invalid contextId
1116 req->setThreadContext(ContextID(0), ThreadID(0));
1117
1118 // Embed it in a packet
1119 PacketPtr pkt = new Packet(req, cmd);
1120
1121 uint8_t* pkt_data = new uint8_t[req->getSize()];
1122 pkt->dataDynamic(pkt_data);
1123
1124 if (cmd.isWrite()) {
1125 memset(pkt_data, 0xA, req->getSize());
1126 }
1127
1128 // Call MasterPort method to send a timing request for this packet
1129 bool success = port.sendTimingReq(pkt);
1130 if (!success) {
1131 // If it fails, save the packet to retry when a retry is signalled by
1132 // the cache
1133 retryPkt = pkt;
1134 }
1135 return success;
1136}
1137
1138void
1139TraceCPU::icacheRetryRecvd()
1140{
1141 // Schedule an event to go through the control flow in the same tick as
1142 // retry is received
1143 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1144 " event @%lli.\n", curTick());
1145 schedule(icacheNextEvent, curTick());
1146}
1147
1148void
1149TraceCPU::dcacheRetryRecvd()
1150{
1151 // Schedule an event to go through the execute flow in the same tick as
1152 // retry is received
1153 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1154 " event @%lli.\n", curTick());
1155 schedule(dcacheNextEvent, curTick());
1156}
1157
1158void
1159TraceCPU::schedDcacheNextEvent(Tick when)
1160{
1161 if (!dcacheNextEvent.scheduled()) {
1162 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1163 when);
1164 schedule(dcacheNextEvent, when);
1165 ++numSchedDcacheEvent;
1166 } else if (when < dcacheNextEvent.when()) {
1167 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1168 " to %lli.\n", dcacheNextEvent.when(), when);
1169 reschedule(dcacheNextEvent, when);
1170 }
1171
1172}
1173
1174bool
1175TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1176{
1177 // All responses on the instruction fetch side are ignored. Simply delete
1178 // the request and packet to free allocated memory
1179 delete pkt->req;
1180 delete pkt;
1181
1182 return true;
1183}
1184
1185void
1186TraceCPU::IcachePort::recvReqRetry()
1187{
1188 owner->icacheRetryRecvd();
1189}
1190
1191void
1192TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1193{
1194 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1195 dcacheGen.completeMemAccess(pkt);
1196}
1197
1198bool
1199TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1200{
1201 // Handle the responses for data memory requests which is done inside the
1202 // elastic data generator
1203 owner->dcacheRecvTimingResp(pkt);
1204 // After processing the response delete the request and packet to free
1205 // memory
1206 delete pkt->req;
1207 delete pkt;
1208
1209 return true;
1210}
1211
1212void
1213TraceCPU::DcachePort::recvReqRetry()
1214{
1215 owner->dcacheRetryRecvd();
1216}
1217
1218TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename)
1219 : trace(filename),
1220 microOpCount(0)
1221{
1222 // Create a protobuf message for the header and read it from the stream
1223 ProtoMessage::InstDepRecordHeader header_msg;
1224 if (!trace.read(header_msg)) {
1225 panic("Failed to read packet header from %s\n", filename);
1226
1227 if (header_msg.tick_freq() != SimClock::Frequency) {
1228 panic("Trace %s was recorded with a different tick frequency %d\n",
1229 header_msg.tick_freq());
1230 }
1231 } else {
1232 // Assign window size equal to the field in the trace that was recorded
1233 // when the data dependency trace was captured in the o3cpu model
1234 windowSize = header_msg.window_size();
1235 }
1236}
1237
1238void
1239TraceCPU::ElasticDataGen::InputStream::reset()
1240{
1241 trace.reset();
1242}
1243
1244bool
1245TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1246{
1247 ProtoMessage::InstDepRecord pkt_msg;
1248 if (trace.read(pkt_msg)) {
1249 // Required fields
1250 element->seqNum = pkt_msg.seq_num();
1253 element->isLoad = pkt_msg.load();
1254 element->isStore = pkt_msg.store();
1251 element->type = pkt_msg.type();
1255 element->compDelay = pkt_msg.comp_delay();
1256
1257 // Repeated field robDepList
1258 element->clearRobDep();
1259 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1260 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1261 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1262 element->numRobDep += 1;
1263 }
1264
1265 // Repeated field
1266 element->clearRegDep();
1267 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1268 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1269 // There is a possibility that an instruction has both, a register
1270 // and order dependency on an instruction. In such a case, the
1271 // register dependency is omitted
1272 bool duplicate = false;
1273 for (int j = 0; j < element->numRobDep; j++) {
1274 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1275 }
1276 if (!duplicate) {
1277 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1278 element->numRegDep += 1;
1279 }
1280 }
1281
1282 // Optional fields
1283 if (pkt_msg.has_addr())
1284 element->addr = pkt_msg.addr();
1285 else
1286 element->addr = 0;
1287
1288 if (pkt_msg.has_size())
1289 element->size = pkt_msg.size();
1290 else
1291 element->size = 0;
1292
1293 if (pkt_msg.has_flags())
1294 element->flags = pkt_msg.flags();
1295 else
1296 element->flags = 0;
1297
1298 if (pkt_msg.has_pc())
1299 element->pc = pkt_msg.pc();
1300 else
1301 element->pc = 0;
1302
1303 // ROB occupancy number
1304 ++microOpCount;
1305 if (pkt_msg.has_weight()) {
1306 microOpCount += pkt_msg.weight();
1307 }
1308 element->robNum = microOpCount;
1309 return true;
1310 }
1311
1312 // We have reached the end of the file
1313 return false;
1314}
1315
1316bool
1317TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1318{
1319 for (auto& own_reg_dep : regDep) {
1320 if (own_reg_dep == reg_dep) {
1321 // If register dependency is found, make it zero and return true
1322 own_reg_dep = 0;
1323 --numRegDep;
1324 assert(numRegDep >= 0);
1325 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1326 "done.\n", seqNum, reg_dep);
1327 return true;
1328 }
1329 }
1330
1331 // Return false if the dependency is not found
1332 return false;
1333}
1334
1335bool
1336TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1337{
1338 for (auto& own_rob_dep : robDep) {
1339 if (own_rob_dep == rob_dep) {
1340 // If the rob dependency is found, make it zero and return true
1341 own_rob_dep = 0;
1342 --numRobDep;
1343 assert(numRobDep >= 0);
1344 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1345 "done.\n", seqNum, rob_dep);
1346 return true;
1347 }
1348 }
1349 return false;
1350}
1351
1352void
1353TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1354 for (auto& own_reg_dep : regDep) {
1355 own_reg_dep = 0;
1356 }
1357 numRegDep = 0;
1358}
1359
1360void
1361TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1362 for (auto& own_rob_dep : robDep) {
1363 own_rob_dep = 0;
1364 }
1365 numRobDep = 0;
1366}
1367
1368bool
1369TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1370{
1371 // If it is an rob dependency then remove it
1372 if (!removeRobDep(done_seq_num)) {
1373 // If it is not an rob dependency then it must be a register dependency
1374 // If the register dependency is not found, it violates an assumption
1375 // and must be caught by assert.
1376 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1377 assert(regdep_found);
1378 }
1379 // Return true if the node is dependency free
1380 return (numRobDep == 0 && numRegDep == 0);
1381}
1382
1383void
1384TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1385{
1386 DPRINTFR(TraceCPUData, "%lli", seqNum);
1252 element->compDelay = pkt_msg.comp_delay();
1253
1254 // Repeated field robDepList
1255 element->clearRobDep();
1256 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1257 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1258 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1259 element->numRobDep += 1;
1260 }
1261
1262 // Repeated field
1263 element->clearRegDep();
1264 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1265 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1266 // There is a possibility that an instruction has both, a register
1267 // and order dependency on an instruction. In such a case, the
1268 // register dependency is omitted
1269 bool duplicate = false;
1270 for (int j = 0; j < element->numRobDep; j++) {
1271 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1272 }
1273 if (!duplicate) {
1274 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1275 element->numRegDep += 1;
1276 }
1277 }
1278
1279 // Optional fields
1280 if (pkt_msg.has_addr())
1281 element->addr = pkt_msg.addr();
1282 else
1283 element->addr = 0;
1284
1285 if (pkt_msg.has_size())
1286 element->size = pkt_msg.size();
1287 else
1288 element->size = 0;
1289
1290 if (pkt_msg.has_flags())
1291 element->flags = pkt_msg.flags();
1292 else
1293 element->flags = 0;
1294
1295 if (pkt_msg.has_pc())
1296 element->pc = pkt_msg.pc();
1297 else
1298 element->pc = 0;
1299
1300 // ROB occupancy number
1301 ++microOpCount;
1302 if (pkt_msg.has_weight()) {
1303 microOpCount += pkt_msg.weight();
1304 }
1305 element->robNum = microOpCount;
1306 return true;
1307 }
1308
1309 // We have reached the end of the file
1310 return false;
1311}
1312
1313bool
1314TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1315{
1316 for (auto& own_reg_dep : regDep) {
1317 if (own_reg_dep == reg_dep) {
1318 // If register dependency is found, make it zero and return true
1319 own_reg_dep = 0;
1320 --numRegDep;
1321 assert(numRegDep >= 0);
1322 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1323 "done.\n", seqNum, reg_dep);
1324 return true;
1325 }
1326 }
1327
1328 // Return false if the dependency is not found
1329 return false;
1330}
1331
1332bool
1333TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1334{
1335 for (auto& own_rob_dep : robDep) {
1336 if (own_rob_dep == rob_dep) {
1337 // If the rob dependency is found, make it zero and return true
1338 own_rob_dep = 0;
1339 --numRobDep;
1340 assert(numRobDep >= 0);
1341 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1342 "done.\n", seqNum, rob_dep);
1343 return true;
1344 }
1345 }
1346 return false;
1347}
1348
1349void
1350TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1351 for (auto& own_reg_dep : regDep) {
1352 own_reg_dep = 0;
1353 }
1354 numRegDep = 0;
1355}
1356
1357void
1358TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1359 for (auto& own_rob_dep : robDep) {
1360 own_rob_dep = 0;
1361 }
1362 numRobDep = 0;
1363}
1364
1365bool
1366TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1367{
1368 // If it is an rob dependency then remove it
1369 if (!removeRobDep(done_seq_num)) {
1370 // If it is not an rob dependency then it must be a register dependency
1371 // If the register dependency is not found, it violates an assumption
1372 // and must be caught by assert.
1373 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1374 assert(regdep_found);
1375 }
1376 // Return true if the node is dependency free
1377 return (numRobDep == 0 && numRegDep == 0);
1378}
1379
1380void
1381TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1382{
1383 DPRINTFR(TraceCPUData, "%lli", seqNum);
1387 DPRINTFR(TraceCPUData, ",%s", (isLoad ? "True" : "False"));
1388 DPRINTFR(TraceCPUData, ",%s", (isStore ? "True" : "False"));
1389 if (isLoad || isStore) {
1384 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1385 if (isLoad() || isStore()) {
1390 DPRINTFR(TraceCPUData, ",%i", addr);
1391 DPRINTFR(TraceCPUData, ",%i", size);
1392 DPRINTFR(TraceCPUData, ",%i", flags);
1393 }
1394 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1395 int i = 0;
1396 DPRINTFR(TraceCPUData, "robDep:");
1397 while (robDep[i] != 0) {
1398 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1399 i++;
1400 }
1401 i = 0;
1402 DPRINTFR(TraceCPUData, "regDep:");
1403 while (regDep[i] != 0) {
1404 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1405 i++;
1406 }
1407 auto child_itr = dependents.begin();
1408 DPRINTFR(TraceCPUData, "dependents:");
1409 while (child_itr != dependents.end()) {
1410 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1411 child_itr++;
1412 }
1413
1414 DPRINTFR(TraceCPUData, "\n");
1415}
1416
1386 DPRINTFR(TraceCPUData, ",%i", addr);
1387 DPRINTFR(TraceCPUData, ",%i", size);
1388 DPRINTFR(TraceCPUData, ",%i", flags);
1389 }
1390 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1391 int i = 0;
1392 DPRINTFR(TraceCPUData, "robDep:");
1393 while (robDep[i] != 0) {
1394 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1395 i++;
1396 }
1397 i = 0;
1398 DPRINTFR(TraceCPUData, "regDep:");
1399 while (regDep[i] != 0) {
1400 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1401 i++;
1402 }
1403 auto child_itr = dependents.begin();
1404 DPRINTFR(TraceCPUData, "dependents:");
1405 while (child_itr != dependents.end()) {
1406 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1407 child_itr++;
1408 }
1409
1410 DPRINTFR(TraceCPUData, "\n");
1411}
1412
1413std::string
1414TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1415{
1416 return Record::RecordType_Name(type);
1417}
1418
1417TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1418 : trace(filename)
1419{
1420 // Create a protobuf message for the header and read it from the stream
1421 ProtoMessage::PacketHeader header_msg;
1422 if (!trace.read(header_msg)) {
1423 panic("Failed to read packet header from %s\n", filename);
1424
1425 if (header_msg.tick_freq() != SimClock::Frequency) {
1426 panic("Trace %s was recorded with a different tick frequency %d\n",
1427 header_msg.tick_freq());
1428 }
1429 }
1430}
1431
1432void
1433TraceCPU::FixedRetryGen::InputStream::reset()
1434{
1435 trace.reset();
1436}
1437
1438bool
1439TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1440{
1441 ProtoMessage::Packet pkt_msg;
1442 if (trace.read(pkt_msg)) {
1443 element->cmd = pkt_msg.cmd();
1444 element->addr = pkt_msg.addr();
1445 element->blocksize = pkt_msg.size();
1446 element->tick = pkt_msg.tick();
1447 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1448 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1449 return true;
1450 }
1451
1452 // We have reached the end of the file
1453 return false;
1454}
1419TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1420 : trace(filename)
1421{
1422 // Create a protobuf message for the header and read it from the stream
1423 ProtoMessage::PacketHeader header_msg;
1424 if (!trace.read(header_msg)) {
1425 panic("Failed to read packet header from %s\n", filename);
1426
1427 if (header_msg.tick_freq() != SimClock::Frequency) {
1428 panic("Trace %s was recorded with a different tick frequency %d\n",
1429 header_msg.tick_freq());
1430 }
1431 }
1432}
1433
1434void
1435TraceCPU::FixedRetryGen::InputStream::reset()
1436{
1437 trace.reset();
1438}
1439
1440bool
1441TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1442{
1443 ProtoMessage::Packet pkt_msg;
1444 if (trace.read(pkt_msg)) {
1445 element->cmd = pkt_msg.cmd();
1446 element->addr = pkt_msg.addr();
1447 element->blocksize = pkt_msg.size();
1448 element->tick = pkt_msg.tick();
1449 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1450 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1451 return true;
1452 }
1453
1454 // We have reached the end of the file
1455 return false;
1456}