atomic.cc revision 9058:cc47e11ccec1
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "config/the_isa.hh"
48#include "cpu/simple/atomic.hh"
49#include "cpu/exetrace.hh"
50#include "debug/ExecFaulting.hh"
51#include "debug/SimpleCPU.hh"
52#include "mem/packet.hh"
53#include "mem/packet_access.hh"
54#include "mem/physical.hh"
55#include "params/AtomicSimpleCPU.hh"
56#include "sim/faults.hh"
57#include "sim/system.hh"
58#include "sim/full_system.hh"
59
60using namespace std;
61using namespace TheISA;
62
63AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
64    : Event(CPU_Tick_Pri), cpu(c)
65{
66}
67
68
69void
70AtomicSimpleCPU::TickEvent::process()
71{
72    cpu->tick();
73}
74
75const char *
76AtomicSimpleCPU::TickEvent::description() const
77{
78    return "AtomicSimpleCPU tick";
79}
80
81void
82AtomicSimpleCPU::init()
83{
84    BaseCPU::init();
85
86    // Initialise the ThreadContext's memory proxies
87    tcBase()->initMemProxies(tcBase());
88
89    if (FullSystem && !params()->defer_registration) {
90        ThreadID size = threadContexts.size();
91        for (ThreadID i = 0; i < size; ++i) {
92            ThreadContext *tc = threadContexts[i];
93            // initialize CPU, including PC
94            TheISA::initCPU(tc, tc->contextId());
95        }
96    }
97
98    // Atomic doesn't do MT right now, so contextId == threadId
99    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
100    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
101    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
102}
103
104AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
105    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
106      simulate_data_stalls(p->simulate_data_stalls),
107      simulate_inst_stalls(p->simulate_inst_stalls),
108      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
109      fastmem(p->fastmem)
110{
111    _status = Idle;
112}
113
114
115AtomicSimpleCPU::~AtomicSimpleCPU()
116{
117    if (tickEvent.scheduled()) {
118        deschedule(tickEvent);
119    }
120}
121
122void
123AtomicSimpleCPU::serialize(ostream &os)
124{
125    SimObject::State so_state = SimObject::getState();
126    SERIALIZE_ENUM(so_state);
127    SERIALIZE_SCALAR(locked);
128    BaseSimpleCPU::serialize(os);
129    nameOut(os, csprintf("%s.tickEvent", name()));
130    tickEvent.serialize(os);
131}
132
133void
134AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
135{
136    SimObject::State so_state;
137    UNSERIALIZE_ENUM(so_state);
138    UNSERIALIZE_SCALAR(locked);
139    BaseSimpleCPU::unserialize(cp, section);
140    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
141}
142
143void
144AtomicSimpleCPU::resume()
145{
146    if (_status == Idle || _status == SwitchedOut)
147        return;
148
149    DPRINTF(SimpleCPU, "Resume\n");
150    assert(system->getMemoryMode() == Enums::atomic);
151
152    changeState(SimObject::Running);
153    if (thread->status() == ThreadContext::Active) {
154        if (!tickEvent.scheduled())
155            schedule(tickEvent, nextCycle());
156    }
157    system->totalNumInsts = 0;
158}
159
160void
161AtomicSimpleCPU::switchOut()
162{
163    assert(_status == Running || _status == Idle);
164    _status = SwitchedOut;
165
166    tickEvent.squash();
167}
168
169
170void
171AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
172{
173    BaseCPU::takeOverFrom(oldCPU);
174
175    assert(!tickEvent.scheduled());
176
177    // if any of this CPU's ThreadContexts are active, mark the CPU as
178    // running and schedule its tick event.
179    ThreadID size = threadContexts.size();
180    for (ThreadID i = 0; i < size; ++i) {
181        ThreadContext *tc = threadContexts[i];
182        if (tc->status() == ThreadContext::Active && _status != Running) {
183            _status = Running;
184            schedule(tickEvent, nextCycle());
185            break;
186        }
187    }
188    if (_status != Running) {
189        _status = Idle;
190    }
191    assert(threadContexts.size() == 1);
192    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
193    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
194    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
195}
196
197
198void
199AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay)
200{
201    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
202
203    assert(thread_num == 0);
204    assert(thread);
205
206    assert(_status == Idle);
207    assert(!tickEvent.scheduled());
208
209    notIdleFraction++;
210    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
211
212    //Make sure ticks are still on multiples of cycles
213    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
214    _status = Running;
215}
216
217
218void
219AtomicSimpleCPU::suspendContext(ThreadID thread_num)
220{
221    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
222
223    assert(thread_num == 0);
224    assert(thread);
225
226    if (_status == Idle)
227        return;
228
229    assert(_status == Running);
230
231    // tick event may not be scheduled if this gets called from inside
232    // an instruction's execution, e.g. "quiesce"
233    if (tickEvent.scheduled())
234        deschedule(tickEvent);
235
236    notIdleFraction--;
237    _status = Idle;
238}
239
240
241Fault
242AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
243                         unsigned size, unsigned flags)
244{
245    // use the CPU's statically allocated read request and packet objects
246    Request *req = &data_read_req;
247
248    if (traceData) {
249        traceData->setAddr(addr);
250    }
251
252    //The block size of our peer.
253    unsigned blockSize = dcachePort.peerBlockSize();
254    //The size of the data we're trying to read.
255    int fullSize = size;
256
257    //The address of the second part of this access if it needs to be split
258    //across a cache line boundary.
259    Addr secondAddr = roundDown(addr + size - 1, blockSize);
260
261    if (secondAddr > addr)
262        size = secondAddr - addr;
263
264    dcache_latency = 0;
265
266    while (1) {
267        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
268
269        // translate to physical address
270        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
271
272        // Now do the access.
273        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
274            Packet pkt = Packet(req,
275                                req->isLLSC() ? MemCmd::LoadLockedReq :
276                                MemCmd::ReadReq);
277            pkt.dataStatic(data);
278
279            if (req->isMmappedIpr())
280                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
281            else {
282                if (fastmem && system->isMemAddr(pkt.getAddr()))
283                    system->getPhysMem().access(&pkt);
284                else
285                    dcache_latency += dcachePort.sendAtomic(&pkt);
286            }
287            dcache_access = true;
288
289            assert(!pkt.isError());
290
291            if (req->isLLSC()) {
292                TheISA::handleLockedRead(thread, req);
293            }
294        }
295
296        //If there's a fault, return it
297        if (fault != NoFault) {
298            if (req->isPrefetch()) {
299                return NoFault;
300            } else {
301                return fault;
302            }
303        }
304
305        //If we don't need to access a second cache line, stop now.
306        if (secondAddr <= addr)
307        {
308            if (req->isLocked() && fault == NoFault) {
309                assert(!locked);
310                locked = true;
311            }
312            return fault;
313        }
314
315        /*
316         * Set up for accessing the second cache line.
317         */
318
319        //Move the pointer we're reading into to the correct location.
320        data += size;
321        //Adjust the size to get the remaining bytes.
322        size = addr + fullSize - secondAddr;
323        //And access the right address.
324        addr = secondAddr;
325    }
326}
327
328
329Fault
330AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
331                          Addr addr, unsigned flags, uint64_t *res)
332{
333    // use the CPU's statically allocated write request and packet objects
334    Request *req = &data_write_req;
335
336    if (traceData) {
337        traceData->setAddr(addr);
338    }
339
340    //The block size of our peer.
341    unsigned blockSize = dcachePort.peerBlockSize();
342    //The size of the data we're trying to read.
343    int fullSize = size;
344
345    //The address of the second part of this access if it needs to be split
346    //across a cache line boundary.
347    Addr secondAddr = roundDown(addr + size - 1, blockSize);
348
349    if(secondAddr > addr)
350        size = secondAddr - addr;
351
352    dcache_latency = 0;
353
354    while(1) {
355        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
356
357        // translate to physical address
358        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
359
360        // Now do the access.
361        if (fault == NoFault) {
362            MemCmd cmd = MemCmd::WriteReq; // default
363            bool do_access = true;  // flag to suppress cache access
364
365            if (req->isLLSC()) {
366                cmd = MemCmd::StoreCondReq;
367                do_access = TheISA::handleLockedWrite(thread, req);
368            } else if (req->isSwap()) {
369                cmd = MemCmd::SwapReq;
370                if (req->isCondSwap()) {
371                    assert(res);
372                    req->setExtraData(*res);
373                }
374            }
375
376            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
377                Packet pkt = Packet(req, cmd);
378                pkt.dataStatic(data);
379
380                if (req->isMmappedIpr()) {
381                    dcache_latency +=
382                        TheISA::handleIprWrite(thread->getTC(), &pkt);
383                } else {
384                    if (fastmem && system->isMemAddr(pkt.getAddr()))
385                        system->getPhysMem().access(&pkt);
386                    else
387                        dcache_latency += dcachePort.sendAtomic(&pkt);
388                }
389                dcache_access = true;
390                assert(!pkt.isError());
391
392                if (req->isSwap()) {
393                    assert(res);
394                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
395                }
396            }
397
398            if (res && !req->isSwap()) {
399                *res = req->getExtraData();
400            }
401        }
402
403        //If there's a fault or we don't need to access a second cache line,
404        //stop now.
405        if (fault != NoFault || secondAddr <= addr)
406        {
407            if (req->isLocked() && fault == NoFault) {
408                assert(locked);
409                locked = false;
410            }
411            if (fault != NoFault && req->isPrefetch()) {
412                return NoFault;
413            } else {
414                return fault;
415            }
416        }
417
418        /*
419         * Set up for accessing the second cache line.
420         */
421
422        //Move the pointer we're reading into to the correct location.
423        data += size;
424        //Adjust the size to get the remaining bytes.
425        size = addr + fullSize - secondAddr;
426        //And access the right address.
427        addr = secondAddr;
428    }
429}
430
431
432void
433AtomicSimpleCPU::tick()
434{
435    DPRINTF(SimpleCPU, "Tick\n");
436
437    Tick latency = 0;
438
439    for (int i = 0; i < width || locked; ++i) {
440        numCycles++;
441
442        if (!curStaticInst || !curStaticInst->isDelayedCommit())
443            checkForInterrupts();
444
445        checkPcEventQueue();
446        // We must have just got suspended by a PC event
447        if (_status == Idle)
448            return;
449
450        Fault fault = NoFault;
451
452        TheISA::PCState pcState = thread->pcState();
453
454        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
455                           !curMacroStaticInst;
456        if (needToFetch) {
457            setupFetchRequest(&ifetch_req);
458            fault = thread->itb->translateAtomic(&ifetch_req, tc,
459                                                 BaseTLB::Execute);
460        }
461
462        if (fault == NoFault) {
463            Tick icache_latency = 0;
464            bool icache_access = false;
465            dcache_access = false; // assume no dcache access
466
467            if (needToFetch) {
468                // This is commented out because the decoder would act like
469                // a tiny cache otherwise. It wouldn't be flushed when needed
470                // like the I cache. It should be flushed, and when that works
471                // this code should be uncommented.
472                //Fetch more instruction memory if necessary
473                //if(decoder.needMoreBytes())
474                //{
475                    icache_access = true;
476                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
477                    ifetch_pkt.dataStatic(&inst);
478
479                    if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
480                        system->getPhysMem().access(&ifetch_pkt);
481                    else
482                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
483
484                    assert(!ifetch_pkt.isError());
485
486                    // ifetch_req is initialized to read the instruction directly
487                    // into the CPU object's inst field.
488                //}
489            }
490
491            preExecute();
492
493            if (curStaticInst) {
494                fault = curStaticInst->execute(this, traceData);
495
496                // keep an instruction count
497                if (fault == NoFault)
498                    countInst();
499                else if (traceData && !DTRACE(ExecFaulting)) {
500                    delete traceData;
501                    traceData = NULL;
502                }
503
504                postExecute();
505            }
506
507            // @todo remove me after debugging with legion done
508            if (curStaticInst && (!curStaticInst->isMicroop() ||
509                        curStaticInst->isFirstMicroop()))
510                instCnt++;
511
512            Tick stall_ticks = 0;
513            if (simulate_inst_stalls && icache_access)
514                stall_ticks += icache_latency;
515
516            if (simulate_data_stalls && dcache_access)
517                stall_ticks += dcache_latency;
518
519            if (stall_ticks) {
520                Tick stall_cycles = stall_ticks / ticks(1);
521                Tick aligned_stall_ticks = ticks(stall_cycles);
522
523                if (aligned_stall_ticks < stall_ticks)
524                    aligned_stall_ticks += 1;
525
526                latency += aligned_stall_ticks;
527            }
528
529        }
530        if(fault != NoFault || !stayAtPC)
531            advancePC(fault);
532    }
533
534    // instruction takes at least one cycle
535    if (latency < ticks(1))
536        latency = ticks(1);
537
538    if (_status != Idle)
539        schedule(tickEvent, curTick() + latency);
540}
541
542
543void
544AtomicSimpleCPU::printAddr(Addr a)
545{
546    dcachePort.printAddr(a);
547}
548
549
550////////////////////////////////////////////////////////////////////////
551//
552//  AtomicSimpleCPU Simulation Object
553//
554AtomicSimpleCPU *
555AtomicSimpleCPUParams::create()
556{
557    numThreads = 1;
558    if (!FullSystem && workload.size() != 1)
559        panic("only one workload allowed");
560    return new AtomicSimpleCPU(this);
561}
562