atomic.cc revision 8707:489489c67fd9
1/*
2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Steve Reinhardt
29 */
30
31#include "arch/locked_mem.hh"
32#include "arch/mmapped_ipr.hh"
33#include "arch/utility.hh"
34#include "base/bigint.hh"
35#include "config/the_isa.hh"
36#include "cpu/simple/atomic.hh"
37#include "cpu/exetrace.hh"
38#include "debug/ExecFaulting.hh"
39#include "debug/SimpleCPU.hh"
40#include "mem/packet.hh"
41#include "mem/packet_access.hh"
42#include "params/AtomicSimpleCPU.hh"
43#include "sim/faults.hh"
44#include "sim/system.hh"
45
46using namespace std;
47using namespace TheISA;
48
49AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
50    : Event(CPU_Tick_Pri), cpu(c)
51{
52}
53
54
55void
56AtomicSimpleCPU::TickEvent::process()
57{
58    cpu->tick();
59}
60
61const char *
62AtomicSimpleCPU::TickEvent::description() const
63{
64    return "AtomicSimpleCPU tick";
65}
66
67Port *
68AtomicSimpleCPU::getPort(const string &if_name, int idx)
69{
70    if (if_name == "dcache_port")
71        return &dcachePort;
72    else if (if_name == "icache_port")
73        return &icachePort;
74    else if (if_name == "physmem_port") {
75        hasPhysMemPort = true;
76        return &physmemPort;
77    }
78    else
79        panic("No Such Port\n");
80}
81
82void
83AtomicSimpleCPU::init()
84{
85    BaseCPU::init();
86#if FULL_SYSTEM
87    ThreadID size = threadContexts.size();
88    for (ThreadID i = 0; i < size; ++i) {
89        ThreadContext *tc = threadContexts[i];
90
91        // initialize CPU, including PC
92        TheISA::initCPU(tc, tc->contextId());
93    }
94
95    // Initialise the ThreadContext's memory proxies
96    tcBase()->initMemProxies(tcBase());
97#endif
98    if (hasPhysMemPort) {
99        bool snoop = false;
100        AddrRangeList pmAddrList;
101        physmemPort.getPeerAddressRanges(pmAddrList, snoop);
102        physMemAddr = *pmAddrList.begin();
103    }
104    // Atomic doesn't do MT right now, so contextId == threadId
105    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
106    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
107    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
108}
109
110AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
111    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
112      simulate_data_stalls(p->simulate_data_stalls),
113      simulate_inst_stalls(p->simulate_inst_stalls),
114      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
115      physmemPort(name() + "-iport", this), hasPhysMemPort(false)
116{
117    _status = Idle;
118}
119
120
121AtomicSimpleCPU::~AtomicSimpleCPU()
122{
123    if (tickEvent.scheduled()) {
124        deschedule(tickEvent);
125    }
126}
127
128void
129AtomicSimpleCPU::serialize(ostream &os)
130{
131    SimObject::State so_state = SimObject::getState();
132    SERIALIZE_ENUM(so_state);
133    SERIALIZE_SCALAR(locked);
134    BaseSimpleCPU::serialize(os);
135    nameOut(os, csprintf("%s.tickEvent", name()));
136    tickEvent.serialize(os);
137}
138
139void
140AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
141{
142    SimObject::State so_state;
143    UNSERIALIZE_ENUM(so_state);
144    UNSERIALIZE_SCALAR(locked);
145    BaseSimpleCPU::unserialize(cp, section);
146    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
147}
148
149void
150AtomicSimpleCPU::resume()
151{
152    if (_status == Idle || _status == SwitchedOut)
153        return;
154
155    DPRINTF(SimpleCPU, "Resume\n");
156    assert(system->getMemoryMode() == Enums::atomic);
157
158    changeState(SimObject::Running);
159    if (thread->status() == ThreadContext::Active) {
160        if (!tickEvent.scheduled())
161            schedule(tickEvent, nextCycle());
162    }
163    system->totalNumInsts = 0;
164}
165
166void
167AtomicSimpleCPU::switchOut()
168{
169    assert(_status == Running || _status == Idle);
170    _status = SwitchedOut;
171
172    tickEvent.squash();
173}
174
175
176void
177AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
178{
179    BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort);
180
181    assert(!tickEvent.scheduled());
182
183    // if any of this CPU's ThreadContexts are active, mark the CPU as
184    // running and schedule its tick event.
185    ThreadID size = threadContexts.size();
186    for (ThreadID i = 0; i < size; ++i) {
187        ThreadContext *tc = threadContexts[i];
188        if (tc->status() == ThreadContext::Active && _status != Running) {
189            _status = Running;
190            schedule(tickEvent, nextCycle());
191            break;
192        }
193    }
194    if (_status != Running) {
195        _status = Idle;
196    }
197    assert(threadContexts.size() == 1);
198    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
199    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
200    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
201}
202
203
204void
205AtomicSimpleCPU::activateContext(int thread_num, int delay)
206{
207    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
208
209    assert(thread_num == 0);
210    assert(thread);
211
212    assert(_status == Idle);
213    assert(!tickEvent.scheduled());
214
215    notIdleFraction++;
216    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
217
218    //Make sure ticks are still on multiples of cycles
219    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
220    _status = Running;
221}
222
223
224void
225AtomicSimpleCPU::suspendContext(int thread_num)
226{
227    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
228
229    assert(thread_num == 0);
230    assert(thread);
231
232    if (_status == Idle)
233        return;
234
235    assert(_status == Running);
236
237    // tick event may not be scheduled if this gets called from inside
238    // an instruction's execution, e.g. "quiesce"
239    if (tickEvent.scheduled())
240        deschedule(tickEvent);
241
242    notIdleFraction--;
243    _status = Idle;
244}
245
246
247Fault
248AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
249                         unsigned size, unsigned flags)
250{
251    // use the CPU's statically allocated read request and packet objects
252    Request *req = &data_read_req;
253
254    if (traceData) {
255        traceData->setAddr(addr);
256    }
257
258    //The block size of our peer.
259    unsigned blockSize = dcachePort.peerBlockSize();
260    //The size of the data we're trying to read.
261    int fullSize = size;
262
263    //The address of the second part of this access if it needs to be split
264    //across a cache line boundary.
265    Addr secondAddr = roundDown(addr + size - 1, blockSize);
266
267    if (secondAddr > addr)
268        size = secondAddr - addr;
269
270    dcache_latency = 0;
271
272    while (1) {
273        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
274
275        // translate to physical address
276        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
277
278        // Now do the access.
279        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
280            Packet pkt = Packet(req,
281                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
282                    Packet::Broadcast);
283            pkt.dataStatic(data);
284
285            if (req->isMmappedIpr())
286                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
287            else {
288                if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
289                    dcache_latency += physmemPort.sendAtomic(&pkt);
290                else
291                    dcache_latency += dcachePort.sendAtomic(&pkt);
292            }
293            dcache_access = true;
294
295            assert(!pkt.isError());
296
297            if (req->isLLSC()) {
298                TheISA::handleLockedRead(thread, req);
299            }
300        }
301
302        //If there's a fault, return it
303        if (fault != NoFault) {
304            if (req->isPrefetch()) {
305                return NoFault;
306            } else {
307                return fault;
308            }
309        }
310
311        //If we don't need to access a second cache line, stop now.
312        if (secondAddr <= addr)
313        {
314            if (req->isLocked() && fault == NoFault) {
315                assert(!locked);
316                locked = true;
317            }
318            return fault;
319        }
320
321        /*
322         * Set up for accessing the second cache line.
323         */
324
325        //Move the pointer we're reading into to the correct location.
326        data += size;
327        //Adjust the size to get the remaining bytes.
328        size = addr + fullSize - secondAddr;
329        //And access the right address.
330        addr = secondAddr;
331    }
332}
333
334
335Fault
336AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
337                          Addr addr, unsigned flags, uint64_t *res)
338{
339    // use the CPU's statically allocated write request and packet objects
340    Request *req = &data_write_req;
341
342    if (traceData) {
343        traceData->setAddr(addr);
344    }
345
346    //The block size of our peer.
347    unsigned blockSize = dcachePort.peerBlockSize();
348    //The size of the data we're trying to read.
349    int fullSize = size;
350
351    //The address of the second part of this access if it needs to be split
352    //across a cache line boundary.
353    Addr secondAddr = roundDown(addr + size - 1, blockSize);
354
355    if(secondAddr > addr)
356        size = secondAddr - addr;
357
358    dcache_latency = 0;
359
360    while(1) {
361        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
362
363        // translate to physical address
364        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
365
366        // Now do the access.
367        if (fault == NoFault) {
368            MemCmd cmd = MemCmd::WriteReq; // default
369            bool do_access = true;  // flag to suppress cache access
370
371            if (req->isLLSC()) {
372                cmd = MemCmd::StoreCondReq;
373                do_access = TheISA::handleLockedWrite(thread, req);
374            } else if (req->isSwap()) {
375                cmd = MemCmd::SwapReq;
376                if (req->isCondSwap()) {
377                    assert(res);
378                    req->setExtraData(*res);
379                }
380            }
381
382            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
383                Packet pkt = Packet(req, cmd, Packet::Broadcast);
384                pkt.dataStatic(data);
385
386                if (req->isMmappedIpr()) {
387                    dcache_latency +=
388                        TheISA::handleIprWrite(thread->getTC(), &pkt);
389                } else {
390                    if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
391                        dcache_latency += physmemPort.sendAtomic(&pkt);
392                    else
393                        dcache_latency += dcachePort.sendAtomic(&pkt);
394                }
395                dcache_access = true;
396                assert(!pkt.isError());
397
398                if (req->isSwap()) {
399                    assert(res);
400                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
401                }
402            }
403
404            if (res && !req->isSwap()) {
405                *res = req->getExtraData();
406            }
407        }
408
409        //If there's a fault or we don't need to access a second cache line,
410        //stop now.
411        if (fault != NoFault || secondAddr <= addr)
412        {
413            if (req->isLocked() && fault == NoFault) {
414                assert(locked);
415                locked = false;
416            }
417            if (fault != NoFault && req->isPrefetch()) {
418                return NoFault;
419            } else {
420                return fault;
421            }
422        }
423
424        /*
425         * Set up for accessing the second cache line.
426         */
427
428        //Move the pointer we're reading into to the correct location.
429        data += size;
430        //Adjust the size to get the remaining bytes.
431        size = addr + fullSize - secondAddr;
432        //And access the right address.
433        addr = secondAddr;
434    }
435}
436
437
438void
439AtomicSimpleCPU::tick()
440{
441    DPRINTF(SimpleCPU, "Tick\n");
442
443    Tick latency = 0;
444
445    for (int i = 0; i < width || locked; ++i) {
446        numCycles++;
447
448        if (!curStaticInst || !curStaticInst->isDelayedCommit())
449            checkForInterrupts();
450
451        checkPcEventQueue();
452        // We must have just got suspended by a PC event
453        if (_status == Idle)
454            return;
455
456        Fault fault = NoFault;
457
458        TheISA::PCState pcState = thread->pcState();
459
460        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
461                           !curMacroStaticInst;
462        if (needToFetch) {
463            setupFetchRequest(&ifetch_req);
464            fault = thread->itb->translateAtomic(&ifetch_req, tc,
465                                                 BaseTLB::Execute);
466        }
467
468        if (fault == NoFault) {
469            Tick icache_latency = 0;
470            bool icache_access = false;
471            dcache_access = false; // assume no dcache access
472
473            if (needToFetch) {
474                // This is commented out because the predecoder would act like
475                // a tiny cache otherwise. It wouldn't be flushed when needed
476                // like the I cache. It should be flushed, and when that works
477                // this code should be uncommented.
478                //Fetch more instruction memory if necessary
479                //if(predecoder.needMoreBytes())
480                //{
481                    icache_access = true;
482                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
483                                               Packet::Broadcast);
484                    ifetch_pkt.dataStatic(&inst);
485
486                    if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
487                        icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
488                    else
489                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
490
491                    assert(!ifetch_pkt.isError());
492
493                    // ifetch_req is initialized to read the instruction directly
494                    // into the CPU object's inst field.
495                //}
496            }
497
498            preExecute();
499
500            if (curStaticInst) {
501                fault = curStaticInst->execute(this, traceData);
502
503                // keep an instruction count
504                if (fault == NoFault)
505                    countInst();
506                else if (traceData && !DTRACE(ExecFaulting)) {
507                    delete traceData;
508                    traceData = NULL;
509                }
510
511                postExecute();
512            }
513
514            // @todo remove me after debugging with legion done
515            if (curStaticInst && (!curStaticInst->isMicroop() ||
516                        curStaticInst->isFirstMicroop()))
517                instCnt++;
518
519            Tick stall_ticks = 0;
520            if (simulate_inst_stalls && icache_access)
521                stall_ticks += icache_latency;
522
523            if (simulate_data_stalls && dcache_access)
524                stall_ticks += dcache_latency;
525
526            if (stall_ticks) {
527                Tick stall_cycles = stall_ticks / ticks(1);
528                Tick aligned_stall_ticks = ticks(stall_cycles);
529
530                if (aligned_stall_ticks < stall_ticks)
531                    aligned_stall_ticks += 1;
532
533                latency += aligned_stall_ticks;
534            }
535
536        }
537        if(fault != NoFault || !stayAtPC)
538            advancePC(fault);
539    }
540
541    // instruction takes at least one cycle
542    if (latency < ticks(1))
543        latency = ticks(1);
544
545    if (_status != Idle)
546        schedule(tickEvent, curTick() + latency);
547}
548
549
550void
551AtomicSimpleCPU::printAddr(Addr a)
552{
553    dcachePort.printAddr(a);
554}
555
556
557////////////////////////////////////////////////////////////////////////
558//
559//  AtomicSimpleCPU Simulation Object
560//
561AtomicSimpleCPU *
562AtomicSimpleCPUParams::create()
563{
564    numThreads = 1;
565#if !FULL_SYSTEM
566    if (workload.size() != 1)
567        panic("only one workload allowed");
568#endif
569    return new AtomicSimpleCPU(this);
570}
571