Deleted Added
sdiff udiff text old ( 9814:7ad2b0186a32 ) new ( 9837:13a21202375d )
full compact
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "base/output.hh"
48#include "config/the_isa.hh"
49#include "cpu/simple/atomic.hh"
50#include "cpu/exetrace.hh"
51#include "debug/Drain.hh"
52#include "debug/ExecFaulting.hh"
53#include "debug/SimpleCPU.hh"
54#include "mem/packet.hh"
55#include "mem/packet_access.hh"
56#include "mem/physical.hh"
57#include "params/AtomicSimpleCPU.hh"
58#include "sim/faults.hh"
59#include "sim/system.hh"
60#include "sim/full_system.hh"
61
62using namespace std;
63using namespace TheISA;
64
65AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
66 : Event(CPU_Tick_Pri), cpu(c)
67{
68}
69
70
71void
72AtomicSimpleCPU::TickEvent::process()
73{
74 cpu->tick();
75}
76
77const char *
78AtomicSimpleCPU::TickEvent::description() const
79{
80 return "AtomicSimpleCPU tick";
81}
82
83void
84AtomicSimpleCPU::init()
85{
86 BaseCPU::init();
87
88 // Initialise the ThreadContext's memory proxies
89 tcBase()->initMemProxies(tcBase());
90
91 if (FullSystem && !params()->switched_out) {
92 ThreadID size = threadContexts.size();
93 for (ThreadID i = 0; i < size; ++i) {
94 ThreadContext *tc = threadContexts[i];
95 // initialize CPU, including PC
96 TheISA::initCPU(tc, tc->contextId());
97 }
98 }
99
100 // Atomic doesn't do MT right now, so contextId == threadId
101 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
102 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
103 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
104}
105
106AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
107 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
108 simulate_data_stalls(p->simulate_data_stalls),
109 simulate_inst_stalls(p->simulate_inst_stalls),
110 drain_manager(NULL),
111 icachePort(name() + ".icache_port", this),
112 dcachePort(name() + ".dcache_port", this),
113 fastmem(p->fastmem),
114 simpoint(p->simpoint_profile),
115 intervalSize(p->simpoint_interval),
116 intervalCount(0),
117 intervalDrift(0),
118 simpointStream(NULL),
119 currentBBV(0, 0),
120 currentBBVInstCount(0)
121{
122 _status = Idle;
123
124 if (simpoint) {
125 simpointStream = simout.create(p->simpoint_profile_file, false);
126 }
127}
128
129
130AtomicSimpleCPU::~AtomicSimpleCPU()
131{
132 if (tickEvent.scheduled()) {
133 deschedule(tickEvent);
134 }
135 if (simpointStream) {
136 simout.close(simpointStream);
137 }
138}
139
140unsigned int
141AtomicSimpleCPU::drain(DrainManager *dm)
142{
143 assert(!drain_manager);
144 if (switchedOut())
145 return 0;
146
147 if (!isDrained()) {
148 DPRINTF(Drain, "Requesting drain: %s\n", pcState());
149 drain_manager = dm;
150 return 1;
151 } else {
152 if (tickEvent.scheduled())
153 deschedule(tickEvent);
154
155 DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
156 return 0;
157 }
158}
159
160void
161AtomicSimpleCPU::drainResume()
162{
163 assert(!tickEvent.scheduled());
164 assert(!drain_manager);
165 if (switchedOut())
166 return;
167
168 DPRINTF(SimpleCPU, "Resume\n");
169 verifyMemoryMode();
170
171 assert(!threadContexts.empty());
172 if (threadContexts.size() > 1)
173 fatal("The atomic CPU only supports one thread.\n");
174
175 if (thread->status() == ThreadContext::Active) {
176 schedule(tickEvent, nextCycle());
177 _status = BaseSimpleCPU::Running;
178 notIdleFraction = 1;
179 } else {
180 _status = BaseSimpleCPU::Idle;
181 notIdleFraction = 0;
182 }
183
184 system->totalNumInsts = 0;
185}
186
187bool
188AtomicSimpleCPU::tryCompleteDrain()
189{
190 if (!drain_manager)
191 return false;
192
193 DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
194 if (!isDrained())
195 return false;
196
197 DPRINTF(Drain, "CPU done draining, processing drain event\n");
198 drain_manager->signalDrainDone();
199 drain_manager = NULL;
200
201 return true;
202}
203
204
205void
206AtomicSimpleCPU::switchOut()
207{
208 BaseSimpleCPU::switchOut();
209
210 assert(!tickEvent.scheduled());
211 assert(_status == BaseSimpleCPU::Running || _status == Idle);
212 assert(isDrained());
213}
214
215
216void
217AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
218{
219 BaseSimpleCPU::takeOverFrom(oldCPU);
220
221 // The tick event should have been descheduled by drain()
222 assert(!tickEvent.scheduled());
223
224 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
225 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
226 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
227}
228
229void
230AtomicSimpleCPU::verifyMemoryMode() const
231{
232 if (!system->isAtomicMode()) {
233 fatal("The atomic CPU requires the memory system to be in "
234 "'atomic' mode.\n");
235 }
236}
237
238void
239AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
240{
241 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
242
243 assert(thread_num == 0);
244 assert(thread);
245
246 assert(_status == Idle);
247 assert(!tickEvent.scheduled());
248
249 notIdleFraction = 1;
250 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
251
252 //Make sure ticks are still on multiples of cycles
253 schedule(tickEvent, clockEdge(delay));
254 _status = BaseSimpleCPU::Running;
255}
256
257
258void
259AtomicSimpleCPU::suspendContext(ThreadID thread_num)
260{
261 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
262
263 assert(thread_num == 0);
264 assert(thread);
265
266 if (_status == Idle)
267 return;
268
269 assert(_status == BaseSimpleCPU::Running);
270
271 // tick event may not be scheduled if this gets called from inside
272 // an instruction's execution, e.g. "quiesce"
273 if (tickEvent.scheduled())
274 deschedule(tickEvent);
275
276 notIdleFraction = 0;
277 _status = Idle;
278}
279
280
281Fault
282AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
283 unsigned size, unsigned flags)
284{
285 // use the CPU's statically allocated read request and packet objects
286 Request *req = &data_read_req;
287
288 if (traceData) {
289 traceData->setAddr(addr);
290 }
291
292 //The size of the data we're trying to read.
293 int fullSize = size;
294
295 //The address of the second part of this access if it needs to be split
296 //across a cache line boundary.
297 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
298
299 if (secondAddr > addr)
300 size = secondAddr - addr;
301
302 dcache_latency = 0;
303
304 while (1) {
305 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
306
307 // translate to physical address
308 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
309
310 // Now do the access.
311 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
312 Packet pkt = Packet(req,
313 req->isLLSC() ? MemCmd::LoadLockedReq :
314 MemCmd::ReadReq);
315 pkt.dataStatic(data);
316
317 if (req->isMmappedIpr())
318 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
319 else {
320 if (fastmem && system->isMemAddr(pkt.getAddr()))
321 system->getPhysMem().access(&pkt);
322 else
323 dcache_latency += dcachePort.sendAtomic(&pkt);
324 }
325 dcache_access = true;
326
327 assert(!pkt.isError());
328
329 if (req->isLLSC()) {
330 TheISA::handleLockedRead(thread, req);
331 }
332 }
333
334 //If there's a fault, return it
335 if (fault != NoFault) {
336 if (req->isPrefetch()) {
337 return NoFault;
338 } else {
339 return fault;
340 }
341 }
342
343 //If we don't need to access a second cache line, stop now.
344 if (secondAddr <= addr)
345 {
346 if (req->isLocked() && fault == NoFault) {
347 assert(!locked);
348 locked = true;
349 }
350 return fault;
351 }
352
353 /*
354 * Set up for accessing the second cache line.
355 */
356
357 //Move the pointer we're reading into to the correct location.
358 data += size;
359 //Adjust the size to get the remaining bytes.
360 size = addr + fullSize - secondAddr;
361 //And access the right address.
362 addr = secondAddr;
363 }
364}
365
366
367Fault
368AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
369 Addr addr, unsigned flags, uint64_t *res)
370{
371 // use the CPU's statically allocated write request and packet objects
372 Request *req = &data_write_req;
373
374 if (traceData) {
375 traceData->setAddr(addr);
376 }
377
378 //The size of the data we're trying to read.
379 int fullSize = size;
380
381 //The address of the second part of this access if it needs to be split
382 //across a cache line boundary.
383 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
384
385 if(secondAddr > addr)
386 size = secondAddr - addr;
387
388 dcache_latency = 0;
389
390 while(1) {
391 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
392
393 // translate to physical address
394 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
395
396 // Now do the access.
397 if (fault == NoFault) {
398 MemCmd cmd = MemCmd::WriteReq; // default
399 bool do_access = true; // flag to suppress cache access
400
401 if (req->isLLSC()) {
402 cmd = MemCmd::StoreCondReq;
403 do_access = TheISA::handleLockedWrite(thread, req);
404 } else if (req->isSwap()) {
405 cmd = MemCmd::SwapReq;
406 if (req->isCondSwap()) {
407 assert(res);
408 req->setExtraData(*res);
409 }
410 }
411
412 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
413 Packet pkt = Packet(req, cmd);
414 pkt.dataStatic(data);
415
416 if (req->isMmappedIpr()) {
417 dcache_latency +=
418 TheISA::handleIprWrite(thread->getTC(), &pkt);
419 } else {
420 if (fastmem && system->isMemAddr(pkt.getAddr()))
421 system->getPhysMem().access(&pkt);
422 else
423 dcache_latency += dcachePort.sendAtomic(&pkt);
424 }
425 dcache_access = true;
426 assert(!pkt.isError());
427
428 if (req->isSwap()) {
429 assert(res);
430 memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
431 }
432 }
433
434 if (res && !req->isSwap()) {
435 *res = req->getExtraData();
436 }
437 }
438
439 //If there's a fault or we don't need to access a second cache line,
440 //stop now.
441 if (fault != NoFault || secondAddr <= addr)
442 {
443 if (req->isLocked() && fault == NoFault) {
444 assert(locked);
445 locked = false;
446 }
447 if (fault != NoFault && req->isPrefetch()) {
448 return NoFault;
449 } else {
450 return fault;
451 }
452 }
453
454 /*
455 * Set up for accessing the second cache line.
456 */
457
458 //Move the pointer we're reading into to the correct location.
459 data += size;
460 //Adjust the size to get the remaining bytes.
461 size = addr + fullSize - secondAddr;
462 //And access the right address.
463 addr = secondAddr;
464 }
465}
466
467
468void
469AtomicSimpleCPU::tick()
470{
471 DPRINTF(SimpleCPU, "Tick\n");
472
473 Tick latency = 0;
474
475 for (int i = 0; i < width || locked; ++i) {
476 numCycles++;
477
478 if (!curStaticInst || !curStaticInst->isDelayedCommit())
479 checkForInterrupts();
480
481 checkPcEventQueue();
482 // We must have just got suspended by a PC event
483 if (_status == Idle) {
484 tryCompleteDrain();
485 return;
486 }
487
488 Fault fault = NoFault;
489
490 TheISA::PCState pcState = thread->pcState();
491
492 bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
493 !curMacroStaticInst;
494 if (needToFetch) {
495 setupFetchRequest(&ifetch_req);
496 fault = thread->itb->translateAtomic(&ifetch_req, tc,
497 BaseTLB::Execute);
498 }
499
500 if (fault == NoFault) {
501 Tick icache_latency = 0;
502 bool icache_access = false;
503 dcache_access = false; // assume no dcache access
504
505 if (needToFetch) {
506 // This is commented out because the decoder would act like
507 // a tiny cache otherwise. It wouldn't be flushed when needed
508 // like the I cache. It should be flushed, and when that works
509 // this code should be uncommented.
510 //Fetch more instruction memory if necessary
511 //if(decoder.needMoreBytes())
512 //{
513 icache_access = true;
514 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
515 ifetch_pkt.dataStatic(&inst);
516
517 if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
518 system->getPhysMem().access(&ifetch_pkt);
519 else
520 icache_latency = icachePort.sendAtomic(&ifetch_pkt);
521
522 assert(!ifetch_pkt.isError());
523
524 // ifetch_req is initialized to read the instruction directly
525 // into the CPU object's inst field.
526 //}
527 }
528
529 preExecute();
530
531 if (curStaticInst) {
532 fault = curStaticInst->execute(this, traceData);
533
534 // keep an instruction count
535 if (fault == NoFault)
536 countInst();
537 else if (traceData && !DTRACE(ExecFaulting)) {
538 delete traceData;
539 traceData = NULL;
540 }
541
542 postExecute();
543 }
544
545 // @todo remove me after debugging with legion done
546 if (curStaticInst && (!curStaticInst->isMicroop() ||
547 curStaticInst->isFirstMicroop()))
548 instCnt++;
549
550 // profile for SimPoints if enabled and macro inst is finished
551 if (simpoint && curStaticInst && (fault == NoFault) &&
552 (!curStaticInst->isMicroop() ||
553 curStaticInst->isLastMicroop())) {
554 profileSimPoint();
555 }
556
557 Tick stall_ticks = 0;
558 if (simulate_inst_stalls && icache_access)
559 stall_ticks += icache_latency;
560
561 if (simulate_data_stalls && dcache_access)
562 stall_ticks += dcache_latency;
563
564 if (stall_ticks) {
565 // the atomic cpu does its accounting in ticks, so
566 // keep counting in ticks but round to the clock
567 // period
568 latency += divCeil(stall_ticks, clockPeriod()) *
569 clockPeriod();
570 }
571
572 }
573 if(fault != NoFault || !stayAtPC)
574 advancePC(fault);
575 }
576
577 if (tryCompleteDrain())
578 return;
579
580 // instruction takes at least one cycle
581 if (latency < clockPeriod())
582 latency = clockPeriod();
583
584 if (_status != Idle)
585 schedule(tickEvent, curTick() + latency);
586}
587
588
589void
590AtomicSimpleCPU::printAddr(Addr a)
591{
592 dcachePort.printAddr(a);
593}
594
595void
596AtomicSimpleCPU::profileSimPoint()
597{
598 if (!currentBBVInstCount)
599 currentBBV.first = thread->pcState().instAddr();
600
601 ++intervalCount;
602 ++currentBBVInstCount;
603
604 // If inst is control inst, assume end of basic block.
605 if (curStaticInst->isControl()) {
606 currentBBV.second = thread->pcState().instAddr();
607
608 auto map_itr = bbMap.find(currentBBV);
609 if (map_itr == bbMap.end()){
610 // If a new (previously unseen) basic block is found,
611 // add a new unique id, record num of insts and insert into bbMap.
612 BBInfo info;
613 info.id = bbMap.size() + 1;
614 info.insts = currentBBVInstCount;
615 info.count = currentBBVInstCount;
616 bbMap.insert(std::make_pair(currentBBV, info));
617 } else {
618 // If basic block is seen before, just increment the count by the
619 // number of insts in basic block.
620 BBInfo& info = map_itr->second;
621 assert(info.insts == currentBBVInstCount);
622 info.count += currentBBVInstCount;
623 }
624 currentBBVInstCount = 0;
625
626 // Reached end of interval if the sum of the current inst count
627 // (intervalCount) and the excessive inst count from the previous
628 // interval (intervalDrift) is greater than/equal to the interval size.
629 if (intervalCount + intervalDrift >= intervalSize) {
630 // summarize interval and display BBV info
631 std::vector<pair<uint64_t, uint64_t> > counts;
632 for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
633 ++map_itr) {
634 BBInfo& info = map_itr->second;
635 if (info.count != 0) {
636 counts.push_back(std::make_pair(info.id, info.count));
637 info.count = 0;
638 }
639 }
640 std::sort(counts.begin(), counts.end());
641
642 // Print output BBV info
643 *simpointStream << "T";
644 for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
645 ++cnt_itr) {
646 *simpointStream << ":" << cnt_itr->first
647 << ":" << cnt_itr->second << " ";
648 }
649 *simpointStream << "\n";
650
651 intervalDrift = (intervalCount + intervalDrift) - intervalSize;
652 intervalCount = 0;
653 }
654 }
655}
656
657////////////////////////////////////////////////////////////////////////
658//
659// AtomicSimpleCPU Simulation Object
660//
661AtomicSimpleCPU *
662AtomicSimpleCPUParams::create()
663{
664 numThreads = 1;
665 if (!FullSystem && workload.size() != 1)
666 panic("only one workload allowed");
667 return new AtomicSimpleCPU(this);
668}