atomic.cc revision 8926
111570SCurtis.Dunham@arm.com/*
211570SCurtis.Dunham@arm.com * Copyright (c) 2012 ARM Limited
38441SN/A * All rights reserved.
48441SN/A *
57893SN/A * The license below extends only to copyright in the software and shall
611960Sgabeblack@google.com * not be construed as granting a license to any other intellectual
711960Sgabeblack@google.com * property including but not limited to intellectual property relating
811960Sgabeblack@google.com * to a hardware implementation of the functionality of the software
911960Sgabeblack@google.com * licensed hereunder.  You may use the software subject to the license
1010798Ssteve.reinhardt@amd.com * terms below provided that you ensure that this notice is replicated
117934SN/A * unmodified and in its entirety in all distributions of the software,
127934SN/A * modified or unmodified, in source code or in binary form.
1311960Sgabeblack@google.com *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Steve Reinhardt
41 */
42
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "arch/utility.hh"
46#include "base/bigint.hh"
47#include "config/the_isa.hh"
48#include "cpu/simple/atomic.hh"
49#include "cpu/exetrace.hh"
50#include "debug/ExecFaulting.hh"
51#include "debug/SimpleCPU.hh"
52#include "mem/packet.hh"
53#include "mem/packet_access.hh"
54#include "mem/physical.hh"
55#include "params/AtomicSimpleCPU.hh"
56#include "sim/faults.hh"
57#include "sim/system.hh"
58#include "sim/full_system.hh"
59
60using namespace std;
61using namespace TheISA;
62
63AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
64    : Event(CPU_Tick_Pri), cpu(c)
65{
66}
67
68
69void
70AtomicSimpleCPU::TickEvent::process()
71{
72    cpu->tick();
73}
74
75const char *
76AtomicSimpleCPU::TickEvent::description() const
77{
78    return "AtomicSimpleCPU tick";
79}
80
81void
82AtomicSimpleCPU::init()
83{
84    BaseCPU::init();
85
86    // Initialise the ThreadContext's memory proxies
87    tcBase()->initMemProxies(tcBase());
88
89    if (FullSystem) {
90        ThreadID size = threadContexts.size();
91        for (ThreadID i = 0; i < size; ++i) {
92            ThreadContext *tc = threadContexts[i];
93            // initialize CPU, including PC
94            TheISA::initCPU(tc, tc->contextId());
95        }
96    }
97
98    if (fastmem) {
99        AddrRangeList pmAddrList = system->physmem->getAddrRanges();
100        physMemAddr = *pmAddrList.begin();
101    }
102    // Atomic doesn't do MT right now, so contextId == threadId
103    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
104    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
105    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
106}
107
108AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
109    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
110      simulate_data_stalls(p->simulate_data_stalls),
111      simulate_inst_stalls(p->simulate_inst_stalls),
112      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
113      fastmem(p->fastmem)
114{
115    _status = Idle;
116}
117
118
119AtomicSimpleCPU::~AtomicSimpleCPU()
120{
121    if (tickEvent.scheduled()) {
122        deschedule(tickEvent);
123    }
124}
125
126void
127AtomicSimpleCPU::serialize(ostream &os)
128{
129    SimObject::State so_state = SimObject::getState();
130    SERIALIZE_ENUM(so_state);
131    SERIALIZE_SCALAR(locked);
132    BaseSimpleCPU::serialize(os);
133    nameOut(os, csprintf("%s.tickEvent", name()));
134    tickEvent.serialize(os);
135}
136
137void
138AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
139{
140    SimObject::State so_state;
141    UNSERIALIZE_ENUM(so_state);
142    UNSERIALIZE_SCALAR(locked);
143    BaseSimpleCPU::unserialize(cp, section);
144    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
145}
146
147void
148AtomicSimpleCPU::resume()
149{
150    if (_status == Idle || _status == SwitchedOut)
151        return;
152
153    DPRINTF(SimpleCPU, "Resume\n");
154    assert(system->getMemoryMode() == Enums::atomic);
155
156    changeState(SimObject::Running);
157    if (thread->status() == ThreadContext::Active) {
158        if (!tickEvent.scheduled())
159            schedule(tickEvent, nextCycle());
160    }
161    system->totalNumInsts = 0;
162}
163
164void
165AtomicSimpleCPU::switchOut()
166{
167    assert(_status == Running || _status == Idle);
168    _status = SwitchedOut;
169
170    tickEvent.squash();
171}
172
173
174void
175AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
176{
177    BaseCPU::takeOverFrom(oldCPU);
178
179    assert(!tickEvent.scheduled());
180
181    // if any of this CPU's ThreadContexts are active, mark the CPU as
182    // running and schedule its tick event.
183    ThreadID size = threadContexts.size();
184    for (ThreadID i = 0; i < size; ++i) {
185        ThreadContext *tc = threadContexts[i];
186        if (tc->status() == ThreadContext::Active && _status != Running) {
187            _status = Running;
188            schedule(tickEvent, nextCycle());
189            break;
190        }
191    }
192    if (_status != Running) {
193        _status = Idle;
194    }
195    assert(threadContexts.size() == 1);
196    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
197    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
198    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
199}
200
201
202void
203AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay)
204{
205    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
206
207    assert(thread_num == 0);
208    assert(thread);
209
210    assert(_status == Idle);
211    assert(!tickEvent.scheduled());
212
213    notIdleFraction++;
214    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
215
216    //Make sure ticks are still on multiples of cycles
217    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
218    _status = Running;
219}
220
221
222void
223AtomicSimpleCPU::suspendContext(ThreadID thread_num)
224{
225    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
226
227    assert(thread_num == 0);
228    assert(thread);
229
230    if (_status == Idle)
231        return;
232
233    assert(_status == Running);
234
235    // tick event may not be scheduled if this gets called from inside
236    // an instruction's execution, e.g. "quiesce"
237    if (tickEvent.scheduled())
238        deschedule(tickEvent);
239
240    notIdleFraction--;
241    _status = Idle;
242}
243
244
245Fault
246AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
247                         unsigned size, unsigned flags)
248{
249    // use the CPU's statically allocated read request and packet objects
250    Request *req = &data_read_req;
251
252    if (traceData) {
253        traceData->setAddr(addr);
254    }
255
256    //The block size of our peer.
257    unsigned blockSize = dcachePort.peerBlockSize();
258    //The size of the data we're trying to read.
259    int fullSize = size;
260
261    //The address of the second part of this access if it needs to be split
262    //across a cache line boundary.
263    Addr secondAddr = roundDown(addr + size - 1, blockSize);
264
265    if (secondAddr > addr)
266        size = secondAddr - addr;
267
268    dcache_latency = 0;
269
270    while (1) {
271        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
272
273        // translate to physical address
274        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
275
276        // Now do the access.
277        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
278            Packet pkt = Packet(req,
279                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
280                    Packet::Broadcast);
281            pkt.dataStatic(data);
282
283            if (req->isMmappedIpr())
284                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
285            else {
286                if (fastmem && pkt.getAddr() == physMemAddr)
287                    dcache_latency += system->physmem->doAtomicAccess(&pkt);
288                else
289                    dcache_latency += dcachePort.sendAtomic(&pkt);
290            }
291            dcache_access = true;
292
293            assert(!pkt.isError());
294
295            if (req->isLLSC()) {
296                TheISA::handleLockedRead(thread, req);
297            }
298        }
299
300        //If there's a fault, return it
301        if (fault != NoFault) {
302            if (req->isPrefetch()) {
303                return NoFault;
304            } else {
305                return fault;
306            }
307        }
308
309        //If we don't need to access a second cache line, stop now.
310        if (secondAddr <= addr)
311        {
312            if (req->isLocked() && fault == NoFault) {
313                assert(!locked);
314                locked = true;
315            }
316            return fault;
317        }
318
319        /*
320         * Set up for accessing the second cache line.
321         */
322
323        //Move the pointer we're reading into to the correct location.
324        data += size;
325        //Adjust the size to get the remaining bytes.
326        size = addr + fullSize - secondAddr;
327        //And access the right address.
328        addr = secondAddr;
329    }
330}
331
332
333Fault
334AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
335                          Addr addr, unsigned flags, uint64_t *res)
336{
337    // use the CPU's statically allocated write request and packet objects
338    Request *req = &data_write_req;
339
340    if (traceData) {
341        traceData->setAddr(addr);
342    }
343
344    //The block size of our peer.
345    unsigned blockSize = dcachePort.peerBlockSize();
346    //The size of the data we're trying to read.
347    int fullSize = size;
348
349    //The address of the second part of this access if it needs to be split
350    //across a cache line boundary.
351    Addr secondAddr = roundDown(addr + size - 1, blockSize);
352
353    if(secondAddr > addr)
354        size = secondAddr - addr;
355
356    dcache_latency = 0;
357
358    while(1) {
359        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
360
361        // translate to physical address
362        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
363
364        // Now do the access.
365        if (fault == NoFault) {
366            MemCmd cmd = MemCmd::WriteReq; // default
367            bool do_access = true;  // flag to suppress cache access
368
369            if (req->isLLSC()) {
370                cmd = MemCmd::StoreCondReq;
371                do_access = TheISA::handleLockedWrite(thread, req);
372            } else if (req->isSwap()) {
373                cmd = MemCmd::SwapReq;
374                if (req->isCondSwap()) {
375                    assert(res);
376                    req->setExtraData(*res);
377                }
378            }
379
380            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
381                Packet pkt = Packet(req, cmd, Packet::Broadcast);
382                pkt.dataStatic(data);
383
384                if (req->isMmappedIpr()) {
385                    dcache_latency +=
386                        TheISA::handleIprWrite(thread->getTC(), &pkt);
387                } else {
388                    if (fastmem && pkt.getAddr() == physMemAddr)
389                        dcache_latency += system->physmem->doAtomicAccess(&pkt);
390                    else
391                        dcache_latency += dcachePort.sendAtomic(&pkt);
392                }
393                dcache_access = true;
394                assert(!pkt.isError());
395
396                if (req->isSwap()) {
397                    assert(res);
398                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
399                }
400            }
401
402            if (res && !req->isSwap()) {
403                *res = req->getExtraData();
404            }
405        }
406
407        //If there's a fault or we don't need to access a second cache line,
408        //stop now.
409        if (fault != NoFault || secondAddr <= addr)
410        {
411            if (req->isLocked() && fault == NoFault) {
412                assert(locked);
413                locked = false;
414            }
415            if (fault != NoFault && req->isPrefetch()) {
416                return NoFault;
417            } else {
418                return fault;
419            }
420        }
421
422        /*
423         * Set up for accessing the second cache line.
424         */
425
426        //Move the pointer we're reading into to the correct location.
427        data += size;
428        //Adjust the size to get the remaining bytes.
429        size = addr + fullSize - secondAddr;
430        //And access the right address.
431        addr = secondAddr;
432    }
433}
434
435
436void
437AtomicSimpleCPU::tick()
438{
439    DPRINTF(SimpleCPU, "Tick\n");
440
441    Tick latency = 0;
442
443    for (int i = 0; i < width || locked; ++i) {
444        numCycles++;
445
446        if (!curStaticInst || !curStaticInst->isDelayedCommit())
447            checkForInterrupts();
448
449        checkPcEventQueue();
450        // We must have just got suspended by a PC event
451        if (_status == Idle)
452            return;
453
454        Fault fault = NoFault;
455
456        TheISA::PCState pcState = thread->pcState();
457
458        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
459                           !curMacroStaticInst;
460        if (needToFetch) {
461            setupFetchRequest(&ifetch_req);
462            fault = thread->itb->translateAtomic(&ifetch_req, tc,
463                                                 BaseTLB::Execute);
464        }
465
466        if (fault == NoFault) {
467            Tick icache_latency = 0;
468            bool icache_access = false;
469            dcache_access = false; // assume no dcache access
470
471            if (needToFetch) {
472                // This is commented out because the predecoder would act like
473                // a tiny cache otherwise. It wouldn't be flushed when needed
474                // like the I cache. It should be flushed, and when that works
475                // this code should be uncommented.
476                //Fetch more instruction memory if necessary
477                //if(predecoder.needMoreBytes())
478                //{
479                    icache_access = true;
480                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
481                                               Packet::Broadcast);
482                    ifetch_pkt.dataStatic(&inst);
483
484                    if (fastmem && ifetch_pkt.getAddr() == physMemAddr)
485                        icache_latency =
486                            system->physmem->doAtomicAccess(&ifetch_pkt);
487                    else
488                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
489
490                    assert(!ifetch_pkt.isError());
491
492                    // ifetch_req is initialized to read the instruction directly
493                    // into the CPU object's inst field.
494                //}
495            }
496
497            preExecute();
498
499            if (curStaticInst) {
500                fault = curStaticInst->execute(this, traceData);
501
502                // keep an instruction count
503                if (fault == NoFault)
504                    countInst();
505                else if (traceData && !DTRACE(ExecFaulting)) {
506                    delete traceData;
507                    traceData = NULL;
508                }
509
510                postExecute();
511            }
512
513            // @todo remove me after debugging with legion done
514            if (curStaticInst && (!curStaticInst->isMicroop() ||
515                        curStaticInst->isFirstMicroop()))
516                instCnt++;
517
518            Tick stall_ticks = 0;
519            if (simulate_inst_stalls && icache_access)
520                stall_ticks += icache_latency;
521
522            if (simulate_data_stalls && dcache_access)
523                stall_ticks += dcache_latency;
524
525            if (stall_ticks) {
526                Tick stall_cycles = stall_ticks / ticks(1);
527                Tick aligned_stall_ticks = ticks(stall_cycles);
528
529                if (aligned_stall_ticks < stall_ticks)
530                    aligned_stall_ticks += 1;
531
532                latency += aligned_stall_ticks;
533            }
534
535        }
536        if(fault != NoFault || !stayAtPC)
537            advancePC(fault);
538    }
539
540    // instruction takes at least one cycle
541    if (latency < ticks(1))
542        latency = ticks(1);
543
544    if (_status != Idle)
545        schedule(tickEvent, curTick() + latency);
546}
547
548
549void
550AtomicSimpleCPU::printAddr(Addr a)
551{
552    dcachePort.printAddr(a);
553}
554
555
556////////////////////////////////////////////////////////////////////////
557//
558//  AtomicSimpleCPU Simulation Object
559//
560AtomicSimpleCPU *
561AtomicSimpleCPUParams::create()
562{
563    numThreads = 1;
564    if (!FullSystem && workload.size() != 1)
565        panic("only one workload allowed");
566    return new AtomicSimpleCPU(this);
567}
568