atomic.cc revision 9342
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "config/the_isa.hh"
48#include "cpu/simple/atomic.hh"
49#include "cpu/exetrace.hh"
50#include "debug/ExecFaulting.hh"
51#include "debug/SimpleCPU.hh"
52#include "mem/packet.hh"
53#include "mem/packet_access.hh"
54#include "mem/physical.hh"
55#include "params/AtomicSimpleCPU.hh"
56#include "sim/faults.hh"
57#include "sim/system.hh"
58#include "sim/full_system.hh"
59
60using namespace std;
61using namespace TheISA;
62
63AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
64    : Event(CPU_Tick_Pri), cpu(c)
65{
66}
67
68
69void
70AtomicSimpleCPU::TickEvent::process()
71{
72    cpu->tick();
73}
74
75const char *
76AtomicSimpleCPU::TickEvent::description() const
77{
78    return "AtomicSimpleCPU tick";
79}
80
81void
82AtomicSimpleCPU::init()
83{
84    BaseCPU::init();
85
86    // Initialise the ThreadContext's memory proxies
87    tcBase()->initMemProxies(tcBase());
88
89    if (FullSystem && !params()->defer_registration) {
90        ThreadID size = threadContexts.size();
91        for (ThreadID i = 0; i < size; ++i) {
92            ThreadContext *tc = threadContexts[i];
93            // initialize CPU, including PC
94            TheISA::initCPU(tc, tc->contextId());
95        }
96    }
97
98    // Atomic doesn't do MT right now, so contextId == threadId
99    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
100    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
101    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
102}
103
104AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
105    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
106      simulate_data_stalls(p->simulate_data_stalls),
107      simulate_inst_stalls(p->simulate_inst_stalls),
108      icachePort(name() + ".icache_port", this),
109      dcachePort(name() + ".dcache_port", this),
110      fastmem(p->fastmem)
111{
112    _status = Idle;
113}
114
115
116AtomicSimpleCPU::~AtomicSimpleCPU()
117{
118    if (tickEvent.scheduled()) {
119        deschedule(tickEvent);
120    }
121}
122
123void
124AtomicSimpleCPU::serialize(ostream &os)
125{
126    Drainable::State so_state(getDrainState());
127    SERIALIZE_ENUM(so_state);
128    SERIALIZE_SCALAR(locked);
129    BaseSimpleCPU::serialize(os);
130    nameOut(os, csprintf("%s.tickEvent", name()));
131    tickEvent.serialize(os);
132}
133
134void
135AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
136{
137    Drainable::State so_state;
138    UNSERIALIZE_ENUM(so_state);
139    UNSERIALIZE_SCALAR(locked);
140    BaseSimpleCPU::unserialize(cp, section);
141    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
142}
143
144unsigned int
145AtomicSimpleCPU::drain(DrainManager *drain_manager)
146{
147    setDrainState(Drainable::Drained);
148    return 0;
149}
150
151void
152AtomicSimpleCPU::drainResume()
153{
154    if (_status == Idle || _status == SwitchedOut)
155        return;
156
157    DPRINTF(SimpleCPU, "Resume\n");
158    assert(system->getMemoryMode() == Enums::atomic);
159
160    setDrainState(Drainable::Running);
161    if (thread->status() == ThreadContext::Active) {
162        if (!tickEvent.scheduled())
163            schedule(tickEvent, nextCycle());
164    }
165    system->totalNumInsts = 0;
166}
167
168void
169AtomicSimpleCPU::switchOut()
170{
171    assert(_status == BaseSimpleCPU::Running || _status == Idle);
172    _status = SwitchedOut;
173
174    tickEvent.squash();
175}
176
177
178void
179AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
180{
181    BaseCPU::takeOverFrom(oldCPU);
182
183    assert(!tickEvent.scheduled());
184
185    // if any of this CPU's ThreadContexts are active, mark the CPU as
186    // running and schedule its tick event.
187    ThreadID size = threadContexts.size();
188    for (ThreadID i = 0; i < size; ++i) {
189        ThreadContext *tc = threadContexts[i];
190        if (tc->status() == ThreadContext::Active &&
191            _status != BaseSimpleCPU::Running) {
192            _status = BaseSimpleCPU::Running;
193            schedule(tickEvent, nextCycle());
194            break;
195        }
196    }
197    if (_status != BaseSimpleCPU::Running) {
198        _status = Idle;
199    }
200    assert(threadContexts.size() == 1);
201    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
202    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
203    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
204}
205
206
207void
208AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
209{
210    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
211
212    assert(thread_num == 0);
213    assert(thread);
214
215    assert(_status == Idle);
216    assert(!tickEvent.scheduled());
217
218    notIdleFraction++;
219    numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
220
221    //Make sure ticks are still on multiples of cycles
222    schedule(tickEvent, clockEdge(delay));
223    _status = BaseSimpleCPU::Running;
224}
225
226
227void
228AtomicSimpleCPU::suspendContext(ThreadID thread_num)
229{
230    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
231
232    assert(thread_num == 0);
233    assert(thread);
234
235    if (_status == Idle)
236        return;
237
238    assert(_status == BaseSimpleCPU::Running);
239
240    // tick event may not be scheduled if this gets called from inside
241    // an instruction's execution, e.g. "quiesce"
242    if (tickEvent.scheduled())
243        deschedule(tickEvent);
244
245    notIdleFraction--;
246    _status = Idle;
247}
248
249
250Fault
251AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
252                         unsigned size, unsigned flags)
253{
254    // use the CPU's statically allocated read request and packet objects
255    Request *req = &data_read_req;
256
257    if (traceData) {
258        traceData->setAddr(addr);
259    }
260
261    //The block size of our peer.
262    unsigned blockSize = dcachePort.peerBlockSize();
263    //The size of the data we're trying to read.
264    int fullSize = size;
265
266    //The address of the second part of this access if it needs to be split
267    //across a cache line boundary.
268    Addr secondAddr = roundDown(addr + size - 1, blockSize);
269
270    if (secondAddr > addr)
271        size = secondAddr - addr;
272
273    dcache_latency = 0;
274
275    while (1) {
276        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
277
278        // translate to physical address
279        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
280
281        // Now do the access.
282        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
283            Packet pkt = Packet(req,
284                                req->isLLSC() ? MemCmd::LoadLockedReq :
285                                MemCmd::ReadReq);
286            pkt.dataStatic(data);
287
288            if (req->isMmappedIpr())
289                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
290            else {
291                if (fastmem && system->isMemAddr(pkt.getAddr()))
292                    system->getPhysMem().access(&pkt);
293                else
294                    dcache_latency += dcachePort.sendAtomic(&pkt);
295            }
296            dcache_access = true;
297
298            assert(!pkt.isError());
299
300            if (req->isLLSC()) {
301                TheISA::handleLockedRead(thread, req);
302            }
303        }
304
305        //If there's a fault, return it
306        if (fault != NoFault) {
307            if (req->isPrefetch()) {
308                return NoFault;
309            } else {
310                return fault;
311            }
312        }
313
314        //If we don't need to access a second cache line, stop now.
315        if (secondAddr <= addr)
316        {
317            if (req->isLocked() && fault == NoFault) {
318                assert(!locked);
319                locked = true;
320            }
321            return fault;
322        }
323
324        /*
325         * Set up for accessing the second cache line.
326         */
327
328        //Move the pointer we're reading into to the correct location.
329        data += size;
330        //Adjust the size to get the remaining bytes.
331        size = addr + fullSize - secondAddr;
332        //And access the right address.
333        addr = secondAddr;
334    }
335}
336
337
338Fault
339AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
340                          Addr addr, unsigned flags, uint64_t *res)
341{
342    // use the CPU's statically allocated write request and packet objects
343    Request *req = &data_write_req;
344
345    if (traceData) {
346        traceData->setAddr(addr);
347    }
348
349    //The block size of our peer.
350    unsigned blockSize = dcachePort.peerBlockSize();
351    //The size of the data we're trying to read.
352    int fullSize = size;
353
354    //The address of the second part of this access if it needs to be split
355    //across a cache line boundary.
356    Addr secondAddr = roundDown(addr + size - 1, blockSize);
357
358    if(secondAddr > addr)
359        size = secondAddr - addr;
360
361    dcache_latency = 0;
362
363    while(1) {
364        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
365
366        // translate to physical address
367        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
368
369        // Now do the access.
370        if (fault == NoFault) {
371            MemCmd cmd = MemCmd::WriteReq; // default
372            bool do_access = true;  // flag to suppress cache access
373
374            if (req->isLLSC()) {
375                cmd = MemCmd::StoreCondReq;
376                do_access = TheISA::handleLockedWrite(thread, req);
377            } else if (req->isSwap()) {
378                cmd = MemCmd::SwapReq;
379                if (req->isCondSwap()) {
380                    assert(res);
381                    req->setExtraData(*res);
382                }
383            }
384
385            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
386                Packet pkt = Packet(req, cmd);
387                pkt.dataStatic(data);
388
389                if (req->isMmappedIpr()) {
390                    dcache_latency +=
391                        TheISA::handleIprWrite(thread->getTC(), &pkt);
392                } else {
393                    if (fastmem && system->isMemAddr(pkt.getAddr()))
394                        system->getPhysMem().access(&pkt);
395                    else
396                        dcache_latency += dcachePort.sendAtomic(&pkt);
397                }
398                dcache_access = true;
399                assert(!pkt.isError());
400
401                if (req->isSwap()) {
402                    assert(res);
403                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
404                }
405            }
406
407            if (res && !req->isSwap()) {
408                *res = req->getExtraData();
409            }
410        }
411
412        //If there's a fault or we don't need to access a second cache line,
413        //stop now.
414        if (fault != NoFault || secondAddr <= addr)
415        {
416            if (req->isLocked() && fault == NoFault) {
417                assert(locked);
418                locked = false;
419            }
420            if (fault != NoFault && req->isPrefetch()) {
421                return NoFault;
422            } else {
423                return fault;
424            }
425        }
426
427        /*
428         * Set up for accessing the second cache line.
429         */
430
431        //Move the pointer we're reading into to the correct location.
432        data += size;
433        //Adjust the size to get the remaining bytes.
434        size = addr + fullSize - secondAddr;
435        //And access the right address.
436        addr = secondAddr;
437    }
438}
439
440
441void
442AtomicSimpleCPU::tick()
443{
444    DPRINTF(SimpleCPU, "Tick\n");
445
446    Tick latency = 0;
447
448    for (int i = 0; i < width || locked; ++i) {
449        numCycles++;
450
451        if (!curStaticInst || !curStaticInst->isDelayedCommit())
452            checkForInterrupts();
453
454        checkPcEventQueue();
455        // We must have just got suspended by a PC event
456        if (_status == Idle)
457            return;
458
459        Fault fault = NoFault;
460
461        TheISA::PCState pcState = thread->pcState();
462
463        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
464                           !curMacroStaticInst;
465        if (needToFetch) {
466            setupFetchRequest(&ifetch_req);
467            fault = thread->itb->translateAtomic(&ifetch_req, tc,
468                                                 BaseTLB::Execute);
469        }
470
471        if (fault == NoFault) {
472            Tick icache_latency = 0;
473            bool icache_access = false;
474            dcache_access = false; // assume no dcache access
475
476            if (needToFetch) {
477                // This is commented out because the decoder would act like
478                // a tiny cache otherwise. It wouldn't be flushed when needed
479                // like the I cache. It should be flushed, and when that works
480                // this code should be uncommented.
481                //Fetch more instruction memory if necessary
482                //if(decoder.needMoreBytes())
483                //{
484                    icache_access = true;
485                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
486                    ifetch_pkt.dataStatic(&inst);
487
488                    if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
489                        system->getPhysMem().access(&ifetch_pkt);
490                    else
491                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
492
493                    assert(!ifetch_pkt.isError());
494
495                    // ifetch_req is initialized to read the instruction directly
496                    // into the CPU object's inst field.
497                //}
498            }
499
500            preExecute();
501
502            if (curStaticInst) {
503                fault = curStaticInst->execute(this, traceData);
504
505                // keep an instruction count
506                if (fault == NoFault)
507                    countInst();
508                else if (traceData && !DTRACE(ExecFaulting)) {
509                    delete traceData;
510                    traceData = NULL;
511                }
512
513                postExecute();
514            }
515
516            // @todo remove me after debugging with legion done
517            if (curStaticInst && (!curStaticInst->isMicroop() ||
518                        curStaticInst->isFirstMicroop()))
519                instCnt++;
520
521            Tick stall_ticks = 0;
522            if (simulate_inst_stalls && icache_access)
523                stall_ticks += icache_latency;
524
525            if (simulate_data_stalls && dcache_access)
526                stall_ticks += dcache_latency;
527
528            if (stall_ticks) {
529                // the atomic cpu does its accounting in ticks, so
530                // keep counting in ticks but round to the clock
531                // period
532                latency += divCeil(stall_ticks, clockPeriod()) *
533                    clockPeriod();
534            }
535
536        }
537        if(fault != NoFault || !stayAtPC)
538            advancePC(fault);
539    }
540
541    // instruction takes at least one cycle
542    if (latency < clockPeriod())
543        latency = clockPeriod();
544
545    if (_status != Idle)
546        schedule(tickEvent, curTick() + latency);
547}
548
549
550void
551AtomicSimpleCPU::printAddr(Addr a)
552{
553    dcachePort.printAddr(a);
554}
555
556
557////////////////////////////////////////////////////////////////////////
558//
559//  AtomicSimpleCPU Simulation Object
560//
561AtomicSimpleCPU *
562AtomicSimpleCPUParams::create()
563{
564    numThreads = 1;
565    if (!FullSystem && workload.size() != 1)
566        panic("only one workload allowed");
567    return new AtomicSimpleCPU(this);
568}
569