atomic.cc (9814:7ad2b0186a32) atomic.cc (9837:13a21202375d)
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "base/output.hh"
48#include "config/the_isa.hh"
49#include "cpu/simple/atomic.hh"
50#include "cpu/exetrace.hh"
51#include "debug/Drain.hh"
52#include "debug/ExecFaulting.hh"
53#include "debug/SimpleCPU.hh"
54#include "mem/packet.hh"
55#include "mem/packet_access.hh"
56#include "mem/physical.hh"
57#include "params/AtomicSimpleCPU.hh"
58#include "sim/faults.hh"
59#include "sim/system.hh"
60#include "sim/full_system.hh"
61
62using namespace std;
63using namespace TheISA;
64
65AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
66 : Event(CPU_Tick_Pri), cpu(c)
67{
68}
69
70
71void
72AtomicSimpleCPU::TickEvent::process()
73{
74 cpu->tick();
75}
76
77const char *
78AtomicSimpleCPU::TickEvent::description() const
79{
80 return "AtomicSimpleCPU tick";
81}
82
83void
84AtomicSimpleCPU::init()
85{
86 BaseCPU::init();
87
88 // Initialise the ThreadContext's memory proxies
89 tcBase()->initMemProxies(tcBase());
90
91 if (FullSystem && !params()->switched_out) {
92 ThreadID size = threadContexts.size();
93 for (ThreadID i = 0; i < size; ++i) {
94 ThreadContext *tc = threadContexts[i];
95 // initialize CPU, including PC
96 TheISA::initCPU(tc, tc->contextId());
97 }
98 }
99
100 // Atomic doesn't do MT right now, so contextId == threadId
101 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
102 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
103 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
104}
105
106AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
107 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
108 simulate_data_stalls(p->simulate_data_stalls),
109 simulate_inst_stalls(p->simulate_inst_stalls),
110 drain_manager(NULL),
111 icachePort(name() + ".icache_port", this),
112 dcachePort(name() + ".dcache_port", this),
113 fastmem(p->fastmem),
114 simpoint(p->simpoint_profile),
115 intervalSize(p->simpoint_interval),
116 intervalCount(0),
117 intervalDrift(0),
118 simpointStream(NULL),
119 currentBBV(0, 0),
120 currentBBVInstCount(0)
121{
122 _status = Idle;
123
124 if (simpoint) {
125 simpointStream = simout.create(p->simpoint_profile_file, false);
126 }
127}
128
129
130AtomicSimpleCPU::~AtomicSimpleCPU()
131{
132 if (tickEvent.scheduled()) {
133 deschedule(tickEvent);
134 }
135 if (simpointStream) {
136 simout.close(simpointStream);
137 }
138}
139
140unsigned int
141AtomicSimpleCPU::drain(DrainManager *dm)
142{
143 assert(!drain_manager);
144 if (switchedOut())
145 return 0;
146
147 if (!isDrained()) {
148 DPRINTF(Drain, "Requesting drain: %s\n", pcState());
149 drain_manager = dm;
150 return 1;
151 } else {
152 if (tickEvent.scheduled())
153 deschedule(tickEvent);
154
155 DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
156 return 0;
157 }
158}
159
160void
161AtomicSimpleCPU::drainResume()
162{
163 assert(!tickEvent.scheduled());
164 assert(!drain_manager);
165 if (switchedOut())
166 return;
167
168 DPRINTF(SimpleCPU, "Resume\n");
169 verifyMemoryMode();
170
171 assert(!threadContexts.empty());
172 if (threadContexts.size() > 1)
173 fatal("The atomic CPU only supports one thread.\n");
174
175 if (thread->status() == ThreadContext::Active) {
176 schedule(tickEvent, nextCycle());
177 _status = BaseSimpleCPU::Running;
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "base/output.hh"
48#include "config/the_isa.hh"
49#include "cpu/simple/atomic.hh"
50#include "cpu/exetrace.hh"
51#include "debug/Drain.hh"
52#include "debug/ExecFaulting.hh"
53#include "debug/SimpleCPU.hh"
54#include "mem/packet.hh"
55#include "mem/packet_access.hh"
56#include "mem/physical.hh"
57#include "params/AtomicSimpleCPU.hh"
58#include "sim/faults.hh"
59#include "sim/system.hh"
60#include "sim/full_system.hh"
61
62using namespace std;
63using namespace TheISA;
64
65AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
66 : Event(CPU_Tick_Pri), cpu(c)
67{
68}
69
70
71void
72AtomicSimpleCPU::TickEvent::process()
73{
74 cpu->tick();
75}
76
77const char *
78AtomicSimpleCPU::TickEvent::description() const
79{
80 return "AtomicSimpleCPU tick";
81}
82
83void
84AtomicSimpleCPU::init()
85{
86 BaseCPU::init();
87
88 // Initialise the ThreadContext's memory proxies
89 tcBase()->initMemProxies(tcBase());
90
91 if (FullSystem && !params()->switched_out) {
92 ThreadID size = threadContexts.size();
93 for (ThreadID i = 0; i < size; ++i) {
94 ThreadContext *tc = threadContexts[i];
95 // initialize CPU, including PC
96 TheISA::initCPU(tc, tc->contextId());
97 }
98 }
99
100 // Atomic doesn't do MT right now, so contextId == threadId
101 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
102 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
103 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
104}
105
106AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
107 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
108 simulate_data_stalls(p->simulate_data_stalls),
109 simulate_inst_stalls(p->simulate_inst_stalls),
110 drain_manager(NULL),
111 icachePort(name() + ".icache_port", this),
112 dcachePort(name() + ".dcache_port", this),
113 fastmem(p->fastmem),
114 simpoint(p->simpoint_profile),
115 intervalSize(p->simpoint_interval),
116 intervalCount(0),
117 intervalDrift(0),
118 simpointStream(NULL),
119 currentBBV(0, 0),
120 currentBBVInstCount(0)
121{
122 _status = Idle;
123
124 if (simpoint) {
125 simpointStream = simout.create(p->simpoint_profile_file, false);
126 }
127}
128
129
130AtomicSimpleCPU::~AtomicSimpleCPU()
131{
132 if (tickEvent.scheduled()) {
133 deschedule(tickEvent);
134 }
135 if (simpointStream) {
136 simout.close(simpointStream);
137 }
138}
139
140unsigned int
141AtomicSimpleCPU::drain(DrainManager *dm)
142{
143 assert(!drain_manager);
144 if (switchedOut())
145 return 0;
146
147 if (!isDrained()) {
148 DPRINTF(Drain, "Requesting drain: %s\n", pcState());
149 drain_manager = dm;
150 return 1;
151 } else {
152 if (tickEvent.scheduled())
153 deschedule(tickEvent);
154
155 DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
156 return 0;
157 }
158}
159
160void
161AtomicSimpleCPU::drainResume()
162{
163 assert(!tickEvent.scheduled());
164 assert(!drain_manager);
165 if (switchedOut())
166 return;
167
168 DPRINTF(SimpleCPU, "Resume\n");
169 verifyMemoryMode();
170
171 assert(!threadContexts.empty());
172 if (threadContexts.size() > 1)
173 fatal("The atomic CPU only supports one thread.\n");
174
175 if (thread->status() == ThreadContext::Active) {
176 schedule(tickEvent, nextCycle());
177 _status = BaseSimpleCPU::Running;
178 notIdleFraction = 1;
178 } else {
179 _status = BaseSimpleCPU::Idle;
179 } else {
180 _status = BaseSimpleCPU::Idle;
181 notIdleFraction = 0;
180 }
181
182 system->totalNumInsts = 0;
183}
184
185bool
186AtomicSimpleCPU::tryCompleteDrain()
187{
188 if (!drain_manager)
189 return false;
190
191 DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
192 if (!isDrained())
193 return false;
194
195 DPRINTF(Drain, "CPU done draining, processing drain event\n");
196 drain_manager->signalDrainDone();
197 drain_manager = NULL;
198
199 return true;
200}
201
202
203void
204AtomicSimpleCPU::switchOut()
205{
206 BaseSimpleCPU::switchOut();
207
208 assert(!tickEvent.scheduled());
209 assert(_status == BaseSimpleCPU::Running || _status == Idle);
210 assert(isDrained());
211}
212
213
214void
215AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
216{
217 BaseSimpleCPU::takeOverFrom(oldCPU);
218
219 // The tick event should have been descheduled by drain()
220 assert(!tickEvent.scheduled());
221
222 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
223 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
224 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
225}
226
227void
228AtomicSimpleCPU::verifyMemoryMode() const
229{
230 if (!system->isAtomicMode()) {
231 fatal("The atomic CPU requires the memory system to be in "
232 "'atomic' mode.\n");
233 }
234}
235
236void
237AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
238{
239 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
240
241 assert(thread_num == 0);
242 assert(thread);
243
244 assert(_status == Idle);
245 assert(!tickEvent.scheduled());
246
182 }
183
184 system->totalNumInsts = 0;
185}
186
187bool
188AtomicSimpleCPU::tryCompleteDrain()
189{
190 if (!drain_manager)
191 return false;
192
193 DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
194 if (!isDrained())
195 return false;
196
197 DPRINTF(Drain, "CPU done draining, processing drain event\n");
198 drain_manager->signalDrainDone();
199 drain_manager = NULL;
200
201 return true;
202}
203
204
205void
206AtomicSimpleCPU::switchOut()
207{
208 BaseSimpleCPU::switchOut();
209
210 assert(!tickEvent.scheduled());
211 assert(_status == BaseSimpleCPU::Running || _status == Idle);
212 assert(isDrained());
213}
214
215
216void
217AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
218{
219 BaseSimpleCPU::takeOverFrom(oldCPU);
220
221 // The tick event should have been descheduled by drain()
222 assert(!tickEvent.scheduled());
223
224 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
225 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
226 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
227}
228
229void
230AtomicSimpleCPU::verifyMemoryMode() const
231{
232 if (!system->isAtomicMode()) {
233 fatal("The atomic CPU requires the memory system to be in "
234 "'atomic' mode.\n");
235 }
236}
237
238void
239AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
240{
241 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
242
243 assert(thread_num == 0);
244 assert(thread);
245
246 assert(_status == Idle);
247 assert(!tickEvent.scheduled());
248
247 notIdleFraction++;
249 notIdleFraction = 1;
248 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
249
250 //Make sure ticks are still on multiples of cycles
251 schedule(tickEvent, clockEdge(delay));
252 _status = BaseSimpleCPU::Running;
253}
254
255
256void
257AtomicSimpleCPU::suspendContext(ThreadID thread_num)
258{
259 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
260
261 assert(thread_num == 0);
262 assert(thread);
263
264 if (_status == Idle)
265 return;
266
267 assert(_status == BaseSimpleCPU::Running);
268
269 // tick event may not be scheduled if this gets called from inside
270 // an instruction's execution, e.g. "quiesce"
271 if (tickEvent.scheduled())
272 deschedule(tickEvent);
273
250 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
251
252 //Make sure ticks are still on multiples of cycles
253 schedule(tickEvent, clockEdge(delay));
254 _status = BaseSimpleCPU::Running;
255}
256
257
258void
259AtomicSimpleCPU::suspendContext(ThreadID thread_num)
260{
261 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
262
263 assert(thread_num == 0);
264 assert(thread);
265
266 if (_status == Idle)
267 return;
268
269 assert(_status == BaseSimpleCPU::Running);
270
271 // tick event may not be scheduled if this gets called from inside
272 // an instruction's execution, e.g. "quiesce"
273 if (tickEvent.scheduled())
274 deschedule(tickEvent);
275
274 notIdleFraction--;
276 notIdleFraction = 0;
275 _status = Idle;
276}
277
278
279Fault
280AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
281 unsigned size, unsigned flags)
282{
283 // use the CPU's statically allocated read request and packet objects
284 Request *req = &data_read_req;
285
286 if (traceData) {
287 traceData->setAddr(addr);
288 }
289
290 //The size of the data we're trying to read.
291 int fullSize = size;
292
293 //The address of the second part of this access if it needs to be split
294 //across a cache line boundary.
295 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
296
297 if (secondAddr > addr)
298 size = secondAddr - addr;
299
300 dcache_latency = 0;
301
302 while (1) {
303 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
304
305 // translate to physical address
306 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
307
308 // Now do the access.
309 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
310 Packet pkt = Packet(req,
311 req->isLLSC() ? MemCmd::LoadLockedReq :
312 MemCmd::ReadReq);
313 pkt.dataStatic(data);
314
315 if (req->isMmappedIpr())
316 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
317 else {
318 if (fastmem && system->isMemAddr(pkt.getAddr()))
319 system->getPhysMem().access(&pkt);
320 else
321 dcache_latency += dcachePort.sendAtomic(&pkt);
322 }
323 dcache_access = true;
324
325 assert(!pkt.isError());
326
327 if (req->isLLSC()) {
328 TheISA::handleLockedRead(thread, req);
329 }
330 }
331
332 //If there's a fault, return it
333 if (fault != NoFault) {
334 if (req->isPrefetch()) {
335 return NoFault;
336 } else {
337 return fault;
338 }
339 }
340
341 //If we don't need to access a second cache line, stop now.
342 if (secondAddr <= addr)
343 {
344 if (req->isLocked() && fault == NoFault) {
345 assert(!locked);
346 locked = true;
347 }
348 return fault;
349 }
350
351 /*
352 * Set up for accessing the second cache line.
353 */
354
355 //Move the pointer we're reading into to the correct location.
356 data += size;
357 //Adjust the size to get the remaining bytes.
358 size = addr + fullSize - secondAddr;
359 //And access the right address.
360 addr = secondAddr;
361 }
362}
363
364
365Fault
366AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
367 Addr addr, unsigned flags, uint64_t *res)
368{
369 // use the CPU's statically allocated write request and packet objects
370 Request *req = &data_write_req;
371
372 if (traceData) {
373 traceData->setAddr(addr);
374 }
375
376 //The size of the data we're trying to read.
377 int fullSize = size;
378
379 //The address of the second part of this access if it needs to be split
380 //across a cache line boundary.
381 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
382
383 if(secondAddr > addr)
384 size = secondAddr - addr;
385
386 dcache_latency = 0;
387
388 while(1) {
389 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
390
391 // translate to physical address
392 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
393
394 // Now do the access.
395 if (fault == NoFault) {
396 MemCmd cmd = MemCmd::WriteReq; // default
397 bool do_access = true; // flag to suppress cache access
398
399 if (req->isLLSC()) {
400 cmd = MemCmd::StoreCondReq;
401 do_access = TheISA::handleLockedWrite(thread, req);
402 } else if (req->isSwap()) {
403 cmd = MemCmd::SwapReq;
404 if (req->isCondSwap()) {
405 assert(res);
406 req->setExtraData(*res);
407 }
408 }
409
410 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
411 Packet pkt = Packet(req, cmd);
412 pkt.dataStatic(data);
413
414 if (req->isMmappedIpr()) {
415 dcache_latency +=
416 TheISA::handleIprWrite(thread->getTC(), &pkt);
417 } else {
418 if (fastmem && system->isMemAddr(pkt.getAddr()))
419 system->getPhysMem().access(&pkt);
420 else
421 dcache_latency += dcachePort.sendAtomic(&pkt);
422 }
423 dcache_access = true;
424 assert(!pkt.isError());
425
426 if (req->isSwap()) {
427 assert(res);
428 memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
429 }
430 }
431
432 if (res && !req->isSwap()) {
433 *res = req->getExtraData();
434 }
435 }
436
437 //If there's a fault or we don't need to access a second cache line,
438 //stop now.
439 if (fault != NoFault || secondAddr <= addr)
440 {
441 if (req->isLocked() && fault == NoFault) {
442 assert(locked);
443 locked = false;
444 }
445 if (fault != NoFault && req->isPrefetch()) {
446 return NoFault;
447 } else {
448 return fault;
449 }
450 }
451
452 /*
453 * Set up for accessing the second cache line.
454 */
455
456 //Move the pointer we're reading into to the correct location.
457 data += size;
458 //Adjust the size to get the remaining bytes.
459 size = addr + fullSize - secondAddr;
460 //And access the right address.
461 addr = secondAddr;
462 }
463}
464
465
466void
467AtomicSimpleCPU::tick()
468{
469 DPRINTF(SimpleCPU, "Tick\n");
470
471 Tick latency = 0;
472
473 for (int i = 0; i < width || locked; ++i) {
474 numCycles++;
475
476 if (!curStaticInst || !curStaticInst->isDelayedCommit())
477 checkForInterrupts();
478
479 checkPcEventQueue();
480 // We must have just got suspended by a PC event
481 if (_status == Idle) {
482 tryCompleteDrain();
483 return;
484 }
485
486 Fault fault = NoFault;
487
488 TheISA::PCState pcState = thread->pcState();
489
490 bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
491 !curMacroStaticInst;
492 if (needToFetch) {
493 setupFetchRequest(&ifetch_req);
494 fault = thread->itb->translateAtomic(&ifetch_req, tc,
495 BaseTLB::Execute);
496 }
497
498 if (fault == NoFault) {
499 Tick icache_latency = 0;
500 bool icache_access = false;
501 dcache_access = false; // assume no dcache access
502
503 if (needToFetch) {
504 // This is commented out because the decoder would act like
505 // a tiny cache otherwise. It wouldn't be flushed when needed
506 // like the I cache. It should be flushed, and when that works
507 // this code should be uncommented.
508 //Fetch more instruction memory if necessary
509 //if(decoder.needMoreBytes())
510 //{
511 icache_access = true;
512 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
513 ifetch_pkt.dataStatic(&inst);
514
515 if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
516 system->getPhysMem().access(&ifetch_pkt);
517 else
518 icache_latency = icachePort.sendAtomic(&ifetch_pkt);
519
520 assert(!ifetch_pkt.isError());
521
522 // ifetch_req is initialized to read the instruction directly
523 // into the CPU object's inst field.
524 //}
525 }
526
527 preExecute();
528
529 if (curStaticInst) {
530 fault = curStaticInst->execute(this, traceData);
531
532 // keep an instruction count
533 if (fault == NoFault)
534 countInst();
535 else if (traceData && !DTRACE(ExecFaulting)) {
536 delete traceData;
537 traceData = NULL;
538 }
539
540 postExecute();
541 }
542
543 // @todo remove me after debugging with legion done
544 if (curStaticInst && (!curStaticInst->isMicroop() ||
545 curStaticInst->isFirstMicroop()))
546 instCnt++;
547
548 // profile for SimPoints if enabled and macro inst is finished
549 if (simpoint && curStaticInst && (fault == NoFault) &&
550 (!curStaticInst->isMicroop() ||
551 curStaticInst->isLastMicroop())) {
552 profileSimPoint();
553 }
554
555 Tick stall_ticks = 0;
556 if (simulate_inst_stalls && icache_access)
557 stall_ticks += icache_latency;
558
559 if (simulate_data_stalls && dcache_access)
560 stall_ticks += dcache_latency;
561
562 if (stall_ticks) {
563 // the atomic cpu does its accounting in ticks, so
564 // keep counting in ticks but round to the clock
565 // period
566 latency += divCeil(stall_ticks, clockPeriod()) *
567 clockPeriod();
568 }
569
570 }
571 if(fault != NoFault || !stayAtPC)
572 advancePC(fault);
573 }
574
575 if (tryCompleteDrain())
576 return;
577
578 // instruction takes at least one cycle
579 if (latency < clockPeriod())
580 latency = clockPeriod();
581
582 if (_status != Idle)
583 schedule(tickEvent, curTick() + latency);
584}
585
586
587void
588AtomicSimpleCPU::printAddr(Addr a)
589{
590 dcachePort.printAddr(a);
591}
592
593void
594AtomicSimpleCPU::profileSimPoint()
595{
596 if (!currentBBVInstCount)
597 currentBBV.first = thread->pcState().instAddr();
598
599 ++intervalCount;
600 ++currentBBVInstCount;
601
602 // If inst is control inst, assume end of basic block.
603 if (curStaticInst->isControl()) {
604 currentBBV.second = thread->pcState().instAddr();
605
606 auto map_itr = bbMap.find(currentBBV);
607 if (map_itr == bbMap.end()){
608 // If a new (previously unseen) basic block is found,
609 // add a new unique id, record num of insts and insert into bbMap.
610 BBInfo info;
611 info.id = bbMap.size() + 1;
612 info.insts = currentBBVInstCount;
613 info.count = currentBBVInstCount;
614 bbMap.insert(std::make_pair(currentBBV, info));
615 } else {
616 // If basic block is seen before, just increment the count by the
617 // number of insts in basic block.
618 BBInfo& info = map_itr->second;
619 assert(info.insts == currentBBVInstCount);
620 info.count += currentBBVInstCount;
621 }
622 currentBBVInstCount = 0;
623
624 // Reached end of interval if the sum of the current inst count
625 // (intervalCount) and the excessive inst count from the previous
626 // interval (intervalDrift) is greater than/equal to the interval size.
627 if (intervalCount + intervalDrift >= intervalSize) {
628 // summarize interval and display BBV info
629 std::vector<pair<uint64_t, uint64_t> > counts;
630 for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
631 ++map_itr) {
632 BBInfo& info = map_itr->second;
633 if (info.count != 0) {
634 counts.push_back(std::make_pair(info.id, info.count));
635 info.count = 0;
636 }
637 }
638 std::sort(counts.begin(), counts.end());
639
640 // Print output BBV info
641 *simpointStream << "T";
642 for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
643 ++cnt_itr) {
644 *simpointStream << ":" << cnt_itr->first
645 << ":" << cnt_itr->second << " ";
646 }
647 *simpointStream << "\n";
648
649 intervalDrift = (intervalCount + intervalDrift) - intervalSize;
650 intervalCount = 0;
651 }
652 }
653}
654
655////////////////////////////////////////////////////////////////////////
656//
657// AtomicSimpleCPU Simulation Object
658//
659AtomicSimpleCPU *
660AtomicSimpleCPUParams::create()
661{
662 numThreads = 1;
663 if (!FullSystem && workload.size() != 1)
664 panic("only one workload allowed");
665 return new AtomicSimpleCPU(this);
666}
277 _status = Idle;
278}
279
280
281Fault
282AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
283 unsigned size, unsigned flags)
284{
285 // use the CPU's statically allocated read request and packet objects
286 Request *req = &data_read_req;
287
288 if (traceData) {
289 traceData->setAddr(addr);
290 }
291
292 //The size of the data we're trying to read.
293 int fullSize = size;
294
295 //The address of the second part of this access if it needs to be split
296 //across a cache line boundary.
297 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
298
299 if (secondAddr > addr)
300 size = secondAddr - addr;
301
302 dcache_latency = 0;
303
304 while (1) {
305 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
306
307 // translate to physical address
308 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
309
310 // Now do the access.
311 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
312 Packet pkt = Packet(req,
313 req->isLLSC() ? MemCmd::LoadLockedReq :
314 MemCmd::ReadReq);
315 pkt.dataStatic(data);
316
317 if (req->isMmappedIpr())
318 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
319 else {
320 if (fastmem && system->isMemAddr(pkt.getAddr()))
321 system->getPhysMem().access(&pkt);
322 else
323 dcache_latency += dcachePort.sendAtomic(&pkt);
324 }
325 dcache_access = true;
326
327 assert(!pkt.isError());
328
329 if (req->isLLSC()) {
330 TheISA::handleLockedRead(thread, req);
331 }
332 }
333
334 //If there's a fault, return it
335 if (fault != NoFault) {
336 if (req->isPrefetch()) {
337 return NoFault;
338 } else {
339 return fault;
340 }
341 }
342
343 //If we don't need to access a second cache line, stop now.
344 if (secondAddr <= addr)
345 {
346 if (req->isLocked() && fault == NoFault) {
347 assert(!locked);
348 locked = true;
349 }
350 return fault;
351 }
352
353 /*
354 * Set up for accessing the second cache line.
355 */
356
357 //Move the pointer we're reading into to the correct location.
358 data += size;
359 //Adjust the size to get the remaining bytes.
360 size = addr + fullSize - secondAddr;
361 //And access the right address.
362 addr = secondAddr;
363 }
364}
365
366
367Fault
368AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
369 Addr addr, unsigned flags, uint64_t *res)
370{
371 // use the CPU's statically allocated write request and packet objects
372 Request *req = &data_write_req;
373
374 if (traceData) {
375 traceData->setAddr(addr);
376 }
377
378 //The size of the data we're trying to read.
379 int fullSize = size;
380
381 //The address of the second part of this access if it needs to be split
382 //across a cache line boundary.
383 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
384
385 if(secondAddr > addr)
386 size = secondAddr - addr;
387
388 dcache_latency = 0;
389
390 while(1) {
391 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
392
393 // translate to physical address
394 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
395
396 // Now do the access.
397 if (fault == NoFault) {
398 MemCmd cmd = MemCmd::WriteReq; // default
399 bool do_access = true; // flag to suppress cache access
400
401 if (req->isLLSC()) {
402 cmd = MemCmd::StoreCondReq;
403 do_access = TheISA::handleLockedWrite(thread, req);
404 } else if (req->isSwap()) {
405 cmd = MemCmd::SwapReq;
406 if (req->isCondSwap()) {
407 assert(res);
408 req->setExtraData(*res);
409 }
410 }
411
412 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
413 Packet pkt = Packet(req, cmd);
414 pkt.dataStatic(data);
415
416 if (req->isMmappedIpr()) {
417 dcache_latency +=
418 TheISA::handleIprWrite(thread->getTC(), &pkt);
419 } else {
420 if (fastmem && system->isMemAddr(pkt.getAddr()))
421 system->getPhysMem().access(&pkt);
422 else
423 dcache_latency += dcachePort.sendAtomic(&pkt);
424 }
425 dcache_access = true;
426 assert(!pkt.isError());
427
428 if (req->isSwap()) {
429 assert(res);
430 memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
431 }
432 }
433
434 if (res && !req->isSwap()) {
435 *res = req->getExtraData();
436 }
437 }
438
439 //If there's a fault or we don't need to access a second cache line,
440 //stop now.
441 if (fault != NoFault || secondAddr <= addr)
442 {
443 if (req->isLocked() && fault == NoFault) {
444 assert(locked);
445 locked = false;
446 }
447 if (fault != NoFault && req->isPrefetch()) {
448 return NoFault;
449 } else {
450 return fault;
451 }
452 }
453
454 /*
455 * Set up for accessing the second cache line.
456 */
457
458 //Move the pointer we're reading into to the correct location.
459 data += size;
460 //Adjust the size to get the remaining bytes.
461 size = addr + fullSize - secondAddr;
462 //And access the right address.
463 addr = secondAddr;
464 }
465}
466
467
468void
469AtomicSimpleCPU::tick()
470{
471 DPRINTF(SimpleCPU, "Tick\n");
472
473 Tick latency = 0;
474
475 for (int i = 0; i < width || locked; ++i) {
476 numCycles++;
477
478 if (!curStaticInst || !curStaticInst->isDelayedCommit())
479 checkForInterrupts();
480
481 checkPcEventQueue();
482 // We must have just got suspended by a PC event
483 if (_status == Idle) {
484 tryCompleteDrain();
485 return;
486 }
487
488 Fault fault = NoFault;
489
490 TheISA::PCState pcState = thread->pcState();
491
492 bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
493 !curMacroStaticInst;
494 if (needToFetch) {
495 setupFetchRequest(&ifetch_req);
496 fault = thread->itb->translateAtomic(&ifetch_req, tc,
497 BaseTLB::Execute);
498 }
499
500 if (fault == NoFault) {
501 Tick icache_latency = 0;
502 bool icache_access = false;
503 dcache_access = false; // assume no dcache access
504
505 if (needToFetch) {
506 // This is commented out because the decoder would act like
507 // a tiny cache otherwise. It wouldn't be flushed when needed
508 // like the I cache. It should be flushed, and when that works
509 // this code should be uncommented.
510 //Fetch more instruction memory if necessary
511 //if(decoder.needMoreBytes())
512 //{
513 icache_access = true;
514 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
515 ifetch_pkt.dataStatic(&inst);
516
517 if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
518 system->getPhysMem().access(&ifetch_pkt);
519 else
520 icache_latency = icachePort.sendAtomic(&ifetch_pkt);
521
522 assert(!ifetch_pkt.isError());
523
524 // ifetch_req is initialized to read the instruction directly
525 // into the CPU object's inst field.
526 //}
527 }
528
529 preExecute();
530
531 if (curStaticInst) {
532 fault = curStaticInst->execute(this, traceData);
533
534 // keep an instruction count
535 if (fault == NoFault)
536 countInst();
537 else if (traceData && !DTRACE(ExecFaulting)) {
538 delete traceData;
539 traceData = NULL;
540 }
541
542 postExecute();
543 }
544
545 // @todo remove me after debugging with legion done
546 if (curStaticInst && (!curStaticInst->isMicroop() ||
547 curStaticInst->isFirstMicroop()))
548 instCnt++;
549
550 // profile for SimPoints if enabled and macro inst is finished
551 if (simpoint && curStaticInst && (fault == NoFault) &&
552 (!curStaticInst->isMicroop() ||
553 curStaticInst->isLastMicroop())) {
554 profileSimPoint();
555 }
556
557 Tick stall_ticks = 0;
558 if (simulate_inst_stalls && icache_access)
559 stall_ticks += icache_latency;
560
561 if (simulate_data_stalls && dcache_access)
562 stall_ticks += dcache_latency;
563
564 if (stall_ticks) {
565 // the atomic cpu does its accounting in ticks, so
566 // keep counting in ticks but round to the clock
567 // period
568 latency += divCeil(stall_ticks, clockPeriod()) *
569 clockPeriod();
570 }
571
572 }
573 if(fault != NoFault || !stayAtPC)
574 advancePC(fault);
575 }
576
577 if (tryCompleteDrain())
578 return;
579
580 // instruction takes at least one cycle
581 if (latency < clockPeriod())
582 latency = clockPeriod();
583
584 if (_status != Idle)
585 schedule(tickEvent, curTick() + latency);
586}
587
588
589void
590AtomicSimpleCPU::printAddr(Addr a)
591{
592 dcachePort.printAddr(a);
593}
594
595void
596AtomicSimpleCPU::profileSimPoint()
597{
598 if (!currentBBVInstCount)
599 currentBBV.first = thread->pcState().instAddr();
600
601 ++intervalCount;
602 ++currentBBVInstCount;
603
604 // If inst is control inst, assume end of basic block.
605 if (curStaticInst->isControl()) {
606 currentBBV.second = thread->pcState().instAddr();
607
608 auto map_itr = bbMap.find(currentBBV);
609 if (map_itr == bbMap.end()){
610 // If a new (previously unseen) basic block is found,
611 // add a new unique id, record num of insts and insert into bbMap.
612 BBInfo info;
613 info.id = bbMap.size() + 1;
614 info.insts = currentBBVInstCount;
615 info.count = currentBBVInstCount;
616 bbMap.insert(std::make_pair(currentBBV, info));
617 } else {
618 // If basic block is seen before, just increment the count by the
619 // number of insts in basic block.
620 BBInfo& info = map_itr->second;
621 assert(info.insts == currentBBVInstCount);
622 info.count += currentBBVInstCount;
623 }
624 currentBBVInstCount = 0;
625
626 // Reached end of interval if the sum of the current inst count
627 // (intervalCount) and the excessive inst count from the previous
628 // interval (intervalDrift) is greater than/equal to the interval size.
629 if (intervalCount + intervalDrift >= intervalSize) {
630 // summarize interval and display BBV info
631 std::vector<pair<uint64_t, uint64_t> > counts;
632 for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
633 ++map_itr) {
634 BBInfo& info = map_itr->second;
635 if (info.count != 0) {
636 counts.push_back(std::make_pair(info.id, info.count));
637 info.count = 0;
638 }
639 }
640 std::sort(counts.begin(), counts.end());
641
642 // Print output BBV info
643 *simpointStream << "T";
644 for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
645 ++cnt_itr) {
646 *simpointStream << ":" << cnt_itr->first
647 << ":" << cnt_itr->second << " ";
648 }
649 *simpointStream << "\n";
650
651 intervalDrift = (intervalCount + intervalDrift) - intervalSize;
652 intervalCount = 0;
653 }
654 }
655}
656
657////////////////////////////////////////////////////////////////////////
658//
659// AtomicSimpleCPU Simulation Object
660//
661AtomicSimpleCPU *
662AtomicSimpleCPUParams::create()
663{
664 numThreads = 1;
665 if (!FullSystem && workload.size() != 1)
666 panic("only one workload allowed");
667 return new AtomicSimpleCPU(this);
668}