atomic.cc revision 9443
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "config/the_isa.hh"
48#include "cpu/simple/atomic.hh"
49#include "cpu/exetrace.hh"
50#include "debug/Drain.hh"
51#include "debug/ExecFaulting.hh"
52#include "debug/SimpleCPU.hh"
53#include "mem/packet.hh"
54#include "mem/packet_access.hh"
55#include "mem/physical.hh"
56#include "params/AtomicSimpleCPU.hh"
57#include "sim/faults.hh"
58#include "sim/system.hh"
59#include "sim/full_system.hh"
60
61using namespace std;
62using namespace TheISA;
63
64AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
65    : Event(CPU_Tick_Pri), cpu(c)
66{
67}
68
69
70void
71AtomicSimpleCPU::TickEvent::process()
72{
73    cpu->tick();
74}
75
76const char *
77AtomicSimpleCPU::TickEvent::description() const
78{
79    return "AtomicSimpleCPU tick";
80}
81
82void
83AtomicSimpleCPU::init()
84{
85    BaseCPU::init();
86
87    if (!params()->switched_out &&
88        system->getMemoryMode() != Enums::atomic) {
89        fatal("The atomic CPU requires the memory system to be in "
90              "'atomic' mode.\n");
91    }
92
93    // Initialise the ThreadContext's memory proxies
94    tcBase()->initMemProxies(tcBase());
95
96    if (FullSystem && !params()->switched_out) {
97        ThreadID size = threadContexts.size();
98        for (ThreadID i = 0; i < size; ++i) {
99            ThreadContext *tc = threadContexts[i];
100            // initialize CPU, including PC
101            TheISA::initCPU(tc, tc->contextId());
102        }
103    }
104
105    // Atomic doesn't do MT right now, so contextId == threadId
106    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
107    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
108    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
109}
110
111AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
112    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
113      simulate_data_stalls(p->simulate_data_stalls),
114      simulate_inst_stalls(p->simulate_inst_stalls),
115      drain_manager(NULL),
116      icachePort(name() + ".icache_port", this),
117      dcachePort(name() + ".dcache_port", this),
118      fastmem(p->fastmem)
119{
120    _status = Idle;
121}
122
123
124AtomicSimpleCPU::~AtomicSimpleCPU()
125{
126    if (tickEvent.scheduled()) {
127        deschedule(tickEvent);
128    }
129}
130
131unsigned int
132AtomicSimpleCPU::drain(DrainManager *dm)
133{
134    assert(!drain_manager);
135    if (_status == SwitchedOut)
136        return 0;
137
138    if (!isDrained()) {
139        DPRINTF(Drain, "Requesting drain: %s\n", pcState());
140        drain_manager = dm;
141        return 1;
142    } else {
143        if (tickEvent.scheduled())
144            deschedule(tickEvent);
145
146        DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
147        return 0;
148    }
149}
150
151void
152AtomicSimpleCPU::drainResume()
153{
154    assert(!drain_manager);
155    if (_status == Idle || _status == SwitchedOut)
156        return;
157
158    DPRINTF(SimpleCPU, "Resume\n");
159    if (system->getMemoryMode() != Enums::atomic) {
160        fatal("The atomic CPU requires the memory system to be in "
161              "'atomic' mode.\n");
162    }
163
164    assert(!tickEvent.scheduled());
165    if (thread->status() == ThreadContext::Active)
166        schedule(tickEvent, nextCycle());
167
168    system->totalNumInsts = 0;
169}
170
171bool
172AtomicSimpleCPU::tryCompleteDrain()
173{
174    if (!drain_manager)
175        return false;
176
177    DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
178    if (!isDrained())
179        return false;
180
181    DPRINTF(Drain, "CPU done draining, processing drain event\n");
182    drain_manager->signalDrainDone();
183    drain_manager = NULL;
184
185    return true;
186}
187
188
189void
190AtomicSimpleCPU::switchOut()
191{
192    BaseSimpleCPU::switchOut();
193
194    assert(!tickEvent.scheduled());
195    assert(_status == BaseSimpleCPU::Running || _status == Idle);
196    assert(isDrained());
197
198    _status = SwitchedOut;
199}
200
201
202void
203AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
204{
205    BaseSimpleCPU::takeOverFrom(oldCPU);
206
207    // The tick event should have been descheduled by drain()
208    assert(!tickEvent.scheduled());
209
210    assert(!threadContexts.empty());
211    if (threadContexts.size() > 1)
212        fatal("The atomic CPU only supports one thread.\n");
213
214    // If the ThreadContext is active, mark the CPU as running.
215    if (thread->status() == ThreadContext::Active)
216        _status = BaseSimpleCPU::Running;
217    else
218        _status = Idle;
219
220    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
221    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
222    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
223}
224
225
226void
227AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
228{
229    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
230
231    assert(thread_num == 0);
232    assert(thread);
233
234    assert(_status == Idle);
235    assert(!tickEvent.scheduled());
236
237    notIdleFraction++;
238    numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
239
240    //Make sure ticks are still on multiples of cycles
241    schedule(tickEvent, clockEdge(delay));
242    _status = BaseSimpleCPU::Running;
243}
244
245
246void
247AtomicSimpleCPU::suspendContext(ThreadID thread_num)
248{
249    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
250
251    assert(thread_num == 0);
252    assert(thread);
253
254    if (_status == Idle)
255        return;
256
257    assert(_status == BaseSimpleCPU::Running);
258
259    // tick event may not be scheduled if this gets called from inside
260    // an instruction's execution, e.g. "quiesce"
261    if (tickEvent.scheduled())
262        deschedule(tickEvent);
263
264    notIdleFraction--;
265    _status = Idle;
266}
267
268
269Fault
270AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
271                         unsigned size, unsigned flags)
272{
273    // use the CPU's statically allocated read request and packet objects
274    Request *req = &data_read_req;
275
276    if (traceData) {
277        traceData->setAddr(addr);
278    }
279
280    //The block size of our peer.
281    unsigned blockSize = dcachePort.peerBlockSize();
282    //The size of the data we're trying to read.
283    int fullSize = size;
284
285    //The address of the second part of this access if it needs to be split
286    //across a cache line boundary.
287    Addr secondAddr = roundDown(addr + size - 1, blockSize);
288
289    if (secondAddr > addr)
290        size = secondAddr - addr;
291
292    dcache_latency = 0;
293
294    while (1) {
295        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
296
297        // translate to physical address
298        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
299
300        // Now do the access.
301        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
302            Packet pkt = Packet(req,
303                                req->isLLSC() ? MemCmd::LoadLockedReq :
304                                MemCmd::ReadReq);
305            pkt.dataStatic(data);
306
307            if (req->isMmappedIpr())
308                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
309            else {
310                if (fastmem && system->isMemAddr(pkt.getAddr()))
311                    system->getPhysMem().access(&pkt);
312                else
313                    dcache_latency += dcachePort.sendAtomic(&pkt);
314            }
315            dcache_access = true;
316
317            assert(!pkt.isError());
318
319            if (req->isLLSC()) {
320                TheISA::handleLockedRead(thread, req);
321            }
322        }
323
324        //If there's a fault, return it
325        if (fault != NoFault) {
326            if (req->isPrefetch()) {
327                return NoFault;
328            } else {
329                return fault;
330            }
331        }
332
333        //If we don't need to access a second cache line, stop now.
334        if (secondAddr <= addr)
335        {
336            if (req->isLocked() && fault == NoFault) {
337                assert(!locked);
338                locked = true;
339            }
340            return fault;
341        }
342
343        /*
344         * Set up for accessing the second cache line.
345         */
346
347        //Move the pointer we're reading into to the correct location.
348        data += size;
349        //Adjust the size to get the remaining bytes.
350        size = addr + fullSize - secondAddr;
351        //And access the right address.
352        addr = secondAddr;
353    }
354}
355
356
357Fault
358AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
359                          Addr addr, unsigned flags, uint64_t *res)
360{
361    // use the CPU's statically allocated write request and packet objects
362    Request *req = &data_write_req;
363
364    if (traceData) {
365        traceData->setAddr(addr);
366    }
367
368    //The block size of our peer.
369    unsigned blockSize = dcachePort.peerBlockSize();
370    //The size of the data we're trying to read.
371    int fullSize = size;
372
373    //The address of the second part of this access if it needs to be split
374    //across a cache line boundary.
375    Addr secondAddr = roundDown(addr + size - 1, blockSize);
376
377    if(secondAddr > addr)
378        size = secondAddr - addr;
379
380    dcache_latency = 0;
381
382    while(1) {
383        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
384
385        // translate to physical address
386        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
387
388        // Now do the access.
389        if (fault == NoFault) {
390            MemCmd cmd = MemCmd::WriteReq; // default
391            bool do_access = true;  // flag to suppress cache access
392
393            if (req->isLLSC()) {
394                cmd = MemCmd::StoreCondReq;
395                do_access = TheISA::handleLockedWrite(thread, req);
396            } else if (req->isSwap()) {
397                cmd = MemCmd::SwapReq;
398                if (req->isCondSwap()) {
399                    assert(res);
400                    req->setExtraData(*res);
401                }
402            }
403
404            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
405                Packet pkt = Packet(req, cmd);
406                pkt.dataStatic(data);
407
408                if (req->isMmappedIpr()) {
409                    dcache_latency +=
410                        TheISA::handleIprWrite(thread->getTC(), &pkt);
411                } else {
412                    if (fastmem && system->isMemAddr(pkt.getAddr()))
413                        system->getPhysMem().access(&pkt);
414                    else
415                        dcache_latency += dcachePort.sendAtomic(&pkt);
416                }
417                dcache_access = true;
418                assert(!pkt.isError());
419
420                if (req->isSwap()) {
421                    assert(res);
422                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
423                }
424            }
425
426            if (res && !req->isSwap()) {
427                *res = req->getExtraData();
428            }
429        }
430
431        //If there's a fault or we don't need to access a second cache line,
432        //stop now.
433        if (fault != NoFault || secondAddr <= addr)
434        {
435            if (req->isLocked() && fault == NoFault) {
436                assert(locked);
437                locked = false;
438            }
439            if (fault != NoFault && req->isPrefetch()) {
440                return NoFault;
441            } else {
442                return fault;
443            }
444        }
445
446        /*
447         * Set up for accessing the second cache line.
448         */
449
450        //Move the pointer we're reading into to the correct location.
451        data += size;
452        //Adjust the size to get the remaining bytes.
453        size = addr + fullSize - secondAddr;
454        //And access the right address.
455        addr = secondAddr;
456    }
457}
458
459
460void
461AtomicSimpleCPU::tick()
462{
463    DPRINTF(SimpleCPU, "Tick\n");
464
465    Tick latency = 0;
466
467    for (int i = 0; i < width || locked; ++i) {
468        numCycles++;
469
470        if (!curStaticInst || !curStaticInst->isDelayedCommit())
471            checkForInterrupts();
472
473        checkPcEventQueue();
474        // We must have just got suspended by a PC event
475        if (_status == Idle) {
476            tryCompleteDrain();
477            return;
478        }
479
480        Fault fault = NoFault;
481
482        TheISA::PCState pcState = thread->pcState();
483
484        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
485                           !curMacroStaticInst;
486        if (needToFetch) {
487            setupFetchRequest(&ifetch_req);
488            fault = thread->itb->translateAtomic(&ifetch_req, tc,
489                                                 BaseTLB::Execute);
490        }
491
492        if (fault == NoFault) {
493            Tick icache_latency = 0;
494            bool icache_access = false;
495            dcache_access = false; // assume no dcache access
496
497            if (needToFetch) {
498                // This is commented out because the decoder would act like
499                // a tiny cache otherwise. It wouldn't be flushed when needed
500                // like the I cache. It should be flushed, and when that works
501                // this code should be uncommented.
502                //Fetch more instruction memory if necessary
503                //if(decoder.needMoreBytes())
504                //{
505                    icache_access = true;
506                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
507                    ifetch_pkt.dataStatic(&inst);
508
509                    if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
510                        system->getPhysMem().access(&ifetch_pkt);
511                    else
512                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
513
514                    assert(!ifetch_pkt.isError());
515
516                    // ifetch_req is initialized to read the instruction directly
517                    // into the CPU object's inst field.
518                //}
519            }
520
521            preExecute();
522
523            if (curStaticInst) {
524                fault = curStaticInst->execute(this, traceData);
525
526                // keep an instruction count
527                if (fault == NoFault)
528                    countInst();
529                else if (traceData && !DTRACE(ExecFaulting)) {
530                    delete traceData;
531                    traceData = NULL;
532                }
533
534                postExecute();
535            }
536
537            // @todo remove me after debugging with legion done
538            if (curStaticInst && (!curStaticInst->isMicroop() ||
539                        curStaticInst->isFirstMicroop()))
540                instCnt++;
541
542            Tick stall_ticks = 0;
543            if (simulate_inst_stalls && icache_access)
544                stall_ticks += icache_latency;
545
546            if (simulate_data_stalls && dcache_access)
547                stall_ticks += dcache_latency;
548
549            if (stall_ticks) {
550                // the atomic cpu does its accounting in ticks, so
551                // keep counting in ticks but round to the clock
552                // period
553                latency += divCeil(stall_ticks, clockPeriod()) *
554                    clockPeriod();
555            }
556
557        }
558        if(fault != NoFault || !stayAtPC)
559            advancePC(fault);
560    }
561
562    if (tryCompleteDrain())
563        return;
564
565    // instruction takes at least one cycle
566    if (latency < clockPeriod())
567        latency = clockPeriod();
568
569    if (_status != Idle)
570        schedule(tickEvent, curTick() + latency);
571}
572
573
574void
575AtomicSimpleCPU::printAddr(Addr a)
576{
577    dcachePort.printAddr(a);
578}
579
580
581////////////////////////////////////////////////////////////////////////
582//
583//  AtomicSimpleCPU Simulation Object
584//
585AtomicSimpleCPU *
586AtomicSimpleCPUParams::create()
587{
588    numThreads = 1;
589    if (!FullSystem && workload.size() != 1)
590        panic("only one workload allowed");
591    return new AtomicSimpleCPU(this);
592}
593