atomic.cc revision 8706
1/*
2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Steve Reinhardt
29 */
30
31#include "arch/locked_mem.hh"
32#include "arch/mmapped_ipr.hh"
33#include "arch/utility.hh"
34#include "base/bigint.hh"
35#include "config/the_isa.hh"
36#include "cpu/simple/atomic.hh"
37#include "cpu/exetrace.hh"
38#include "debug/ExecFaulting.hh"
39#include "debug/SimpleCPU.hh"
40#include "mem/packet.hh"
41#include "mem/packet_access.hh"
42#include "params/AtomicSimpleCPU.hh"
43#include "sim/faults.hh"
44#include "sim/system.hh"
45
46using namespace std;
47using namespace TheISA;
48
49AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
50    : Event(CPU_Tick_Pri), cpu(c)
51{
52}
53
54
55void
56AtomicSimpleCPU::TickEvent::process()
57{
58    cpu->tick();
59}
60
61const char *
62AtomicSimpleCPU::TickEvent::description() const
63{
64    return "AtomicSimpleCPU tick";
65}
66
67Port *
68AtomicSimpleCPU::getPort(const string &if_name, int idx)
69{
70    if (if_name == "dcache_port")
71        return &dcachePort;
72    else if (if_name == "icache_port")
73        return &icachePort;
74    else if (if_name == "physmem_port") {
75        hasPhysMemPort = true;
76        return &physmemPort;
77    }
78    else
79        panic("No Such Port\n");
80}
81
82void
83AtomicSimpleCPU::init()
84{
85    BaseCPU::init();
86#if FULL_SYSTEM
87    ThreadID size = threadContexts.size();
88    for (ThreadID i = 0; i < size; ++i) {
89        ThreadContext *tc = threadContexts[i];
90
91        // initialize CPU, including PC
92        TheISA::initCPU(tc, tc->contextId());
93    }
94
95    // Initialise the ThreadContext's memory proxies
96    tcBase()->initMemProxies(tcBase());
97#endif
98    if (hasPhysMemPort) {
99        bool snoop = false;
100        AddrRangeList pmAddrList;
101        physmemPort.getPeerAddressRanges(pmAddrList, snoop);
102        physMemAddr = *pmAddrList.begin();
103    }
104    // Atomic doesn't do MT right now, so contextId == threadId
105    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
106    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
107    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
108}
109
110bool
111AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt)
112{
113    panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
114    return true;
115}
116
117Tick
118AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
119{
120    //Snooping a coherence request, just return
121    return 0;
122}
123
124void
125AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
126{
127    //No internal storage to update, just return
128    return;
129}
130
131void
132AtomicSimpleCPU::CpuPort::recvStatusChange(Status status)
133{
134    if (status == RangeChange) {
135        if (!snoopRangeSent) {
136            snoopRangeSent = true;
137            sendStatusChange(Port::RangeChange);
138        }
139        return;
140    }
141
142    panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
143}
144
145void
146AtomicSimpleCPU::CpuPort::recvRetry()
147{
148    panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
149}
150
151AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
152    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
153      simulate_data_stalls(p->simulate_data_stalls),
154      simulate_inst_stalls(p->simulate_inst_stalls),
155      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
156      physmemPort(name() + "-iport", this), hasPhysMemPort(false)
157{
158    _status = Idle;
159
160    icachePort.snoopRangeSent = false;
161    dcachePort.snoopRangeSent = false;
162
163}
164
165
166AtomicSimpleCPU::~AtomicSimpleCPU()
167{
168    if (tickEvent.scheduled()) {
169        deschedule(tickEvent);
170    }
171}
172
173void
174AtomicSimpleCPU::serialize(ostream &os)
175{
176    SimObject::State so_state = SimObject::getState();
177    SERIALIZE_ENUM(so_state);
178    SERIALIZE_SCALAR(locked);
179    BaseSimpleCPU::serialize(os);
180    nameOut(os, csprintf("%s.tickEvent", name()));
181    tickEvent.serialize(os);
182}
183
184void
185AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
186{
187    SimObject::State so_state;
188    UNSERIALIZE_ENUM(so_state);
189    UNSERIALIZE_SCALAR(locked);
190    BaseSimpleCPU::unserialize(cp, section);
191    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
192}
193
194void
195AtomicSimpleCPU::resume()
196{
197    if (_status == Idle || _status == SwitchedOut)
198        return;
199
200    DPRINTF(SimpleCPU, "Resume\n");
201    assert(system->getMemoryMode() == Enums::atomic);
202
203    changeState(SimObject::Running);
204    if (thread->status() == ThreadContext::Active) {
205        if (!tickEvent.scheduled())
206            schedule(tickEvent, nextCycle());
207    }
208    system->totalNumInsts = 0;
209}
210
211void
212AtomicSimpleCPU::switchOut()
213{
214    assert(_status == Running || _status == Idle);
215    _status = SwitchedOut;
216
217    tickEvent.squash();
218}
219
220
221void
222AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
223{
224    BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort);
225
226    assert(!tickEvent.scheduled());
227
228    // if any of this CPU's ThreadContexts are active, mark the CPU as
229    // running and schedule its tick event.
230    ThreadID size = threadContexts.size();
231    for (ThreadID i = 0; i < size; ++i) {
232        ThreadContext *tc = threadContexts[i];
233        if (tc->status() == ThreadContext::Active && _status != Running) {
234            _status = Running;
235            schedule(tickEvent, nextCycle());
236            break;
237        }
238    }
239    if (_status != Running) {
240        _status = Idle;
241    }
242    assert(threadContexts.size() == 1);
243    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
244    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
245    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
246}
247
248
249void
250AtomicSimpleCPU::activateContext(int thread_num, int delay)
251{
252    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
253
254    assert(thread_num == 0);
255    assert(thread);
256
257    assert(_status == Idle);
258    assert(!tickEvent.scheduled());
259
260    notIdleFraction++;
261    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
262
263    //Make sure ticks are still on multiples of cycles
264    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
265    _status = Running;
266}
267
268
269void
270AtomicSimpleCPU::suspendContext(int thread_num)
271{
272    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
273
274    assert(thread_num == 0);
275    assert(thread);
276
277    if (_status == Idle)
278        return;
279
280    assert(_status == Running);
281
282    // tick event may not be scheduled if this gets called from inside
283    // an instruction's execution, e.g. "quiesce"
284    if (tickEvent.scheduled())
285        deschedule(tickEvent);
286
287    notIdleFraction--;
288    _status = Idle;
289}
290
291
292Fault
293AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
294                         unsigned size, unsigned flags)
295{
296    // use the CPU's statically allocated read request and packet objects
297    Request *req = &data_read_req;
298
299    if (traceData) {
300        traceData->setAddr(addr);
301    }
302
303    //The block size of our peer.
304    unsigned blockSize = dcachePort.peerBlockSize();
305    //The size of the data we're trying to read.
306    int fullSize = size;
307
308    //The address of the second part of this access if it needs to be split
309    //across a cache line boundary.
310    Addr secondAddr = roundDown(addr + size - 1, blockSize);
311
312    if (secondAddr > addr)
313        size = secondAddr - addr;
314
315    dcache_latency = 0;
316
317    while (1) {
318        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
319
320        // translate to physical address
321        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
322
323        // Now do the access.
324        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
325            Packet pkt = Packet(req,
326                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
327                    Packet::Broadcast);
328            pkt.dataStatic(data);
329
330            if (req->isMmappedIpr())
331                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
332            else {
333                if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
334                    dcache_latency += physmemPort.sendAtomic(&pkt);
335                else
336                    dcache_latency += dcachePort.sendAtomic(&pkt);
337            }
338            dcache_access = true;
339
340            assert(!pkt.isError());
341
342            if (req->isLLSC()) {
343                TheISA::handleLockedRead(thread, req);
344            }
345        }
346
347        //If there's a fault, return it
348        if (fault != NoFault) {
349            if (req->isPrefetch()) {
350                return NoFault;
351            } else {
352                return fault;
353            }
354        }
355
356        //If we don't need to access a second cache line, stop now.
357        if (secondAddr <= addr)
358        {
359            if (req->isLocked() && fault == NoFault) {
360                assert(!locked);
361                locked = true;
362            }
363            return fault;
364        }
365
366        /*
367         * Set up for accessing the second cache line.
368         */
369
370        //Move the pointer we're reading into to the correct location.
371        data += size;
372        //Adjust the size to get the remaining bytes.
373        size = addr + fullSize - secondAddr;
374        //And access the right address.
375        addr = secondAddr;
376    }
377}
378
379
380Fault
381AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
382                          Addr addr, unsigned flags, uint64_t *res)
383{
384    // use the CPU's statically allocated write request and packet objects
385    Request *req = &data_write_req;
386
387    if (traceData) {
388        traceData->setAddr(addr);
389    }
390
391    //The block size of our peer.
392    unsigned blockSize = dcachePort.peerBlockSize();
393    //The size of the data we're trying to read.
394    int fullSize = size;
395
396    //The address of the second part of this access if it needs to be split
397    //across a cache line boundary.
398    Addr secondAddr = roundDown(addr + size - 1, blockSize);
399
400    if(secondAddr > addr)
401        size = secondAddr - addr;
402
403    dcache_latency = 0;
404
405    while(1) {
406        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
407
408        // translate to physical address
409        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
410
411        // Now do the access.
412        if (fault == NoFault) {
413            MemCmd cmd = MemCmd::WriteReq; // default
414            bool do_access = true;  // flag to suppress cache access
415
416            if (req->isLLSC()) {
417                cmd = MemCmd::StoreCondReq;
418                do_access = TheISA::handleLockedWrite(thread, req);
419            } else if (req->isSwap()) {
420                cmd = MemCmd::SwapReq;
421                if (req->isCondSwap()) {
422                    assert(res);
423                    req->setExtraData(*res);
424                }
425            }
426
427            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
428                Packet pkt = Packet(req, cmd, Packet::Broadcast);
429                pkt.dataStatic(data);
430
431                if (req->isMmappedIpr()) {
432                    dcache_latency +=
433                        TheISA::handleIprWrite(thread->getTC(), &pkt);
434                } else {
435                    if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
436                        dcache_latency += physmemPort.sendAtomic(&pkt);
437                    else
438                        dcache_latency += dcachePort.sendAtomic(&pkt);
439                }
440                dcache_access = true;
441                assert(!pkt.isError());
442
443                if (req->isSwap()) {
444                    assert(res);
445                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
446                }
447            }
448
449            if (res && !req->isSwap()) {
450                *res = req->getExtraData();
451            }
452        }
453
454        //If there's a fault or we don't need to access a second cache line,
455        //stop now.
456        if (fault != NoFault || secondAddr <= addr)
457        {
458            if (req->isLocked() && fault == NoFault) {
459                assert(locked);
460                locked = false;
461            }
462            if (fault != NoFault && req->isPrefetch()) {
463                return NoFault;
464            } else {
465                return fault;
466            }
467        }
468
469        /*
470         * Set up for accessing the second cache line.
471         */
472
473        //Move the pointer we're reading into to the correct location.
474        data += size;
475        //Adjust the size to get the remaining bytes.
476        size = addr + fullSize - secondAddr;
477        //And access the right address.
478        addr = secondAddr;
479    }
480}
481
482
483void
484AtomicSimpleCPU::tick()
485{
486    DPRINTF(SimpleCPU, "Tick\n");
487
488    Tick latency = 0;
489
490    for (int i = 0; i < width || locked; ++i) {
491        numCycles++;
492
493        if (!curStaticInst || !curStaticInst->isDelayedCommit())
494            checkForInterrupts();
495
496        checkPcEventQueue();
497        // We must have just got suspended by a PC event
498        if (_status == Idle)
499            return;
500
501        Fault fault = NoFault;
502
503        TheISA::PCState pcState = thread->pcState();
504
505        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
506                           !curMacroStaticInst;
507        if (needToFetch) {
508            setupFetchRequest(&ifetch_req);
509            fault = thread->itb->translateAtomic(&ifetch_req, tc,
510                                                 BaseTLB::Execute);
511        }
512
513        if (fault == NoFault) {
514            Tick icache_latency = 0;
515            bool icache_access = false;
516            dcache_access = false; // assume no dcache access
517
518            if (needToFetch) {
519                // This is commented out because the predecoder would act like
520                // a tiny cache otherwise. It wouldn't be flushed when needed
521                // like the I cache. It should be flushed, and when that works
522                // this code should be uncommented.
523                //Fetch more instruction memory if necessary
524                //if(predecoder.needMoreBytes())
525                //{
526                    icache_access = true;
527                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
528                                               Packet::Broadcast);
529                    ifetch_pkt.dataStatic(&inst);
530
531                    if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
532                        icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
533                    else
534                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
535
536                    assert(!ifetch_pkt.isError());
537
538                    // ifetch_req is initialized to read the instruction directly
539                    // into the CPU object's inst field.
540                //}
541            }
542
543            preExecute();
544
545            if (curStaticInst) {
546                fault = curStaticInst->execute(this, traceData);
547
548                // keep an instruction count
549                if (fault == NoFault)
550                    countInst();
551                else if (traceData && !DTRACE(ExecFaulting)) {
552                    delete traceData;
553                    traceData = NULL;
554                }
555
556                postExecute();
557            }
558
559            // @todo remove me after debugging with legion done
560            if (curStaticInst && (!curStaticInst->isMicroop() ||
561                        curStaticInst->isFirstMicroop()))
562                instCnt++;
563
564            Tick stall_ticks = 0;
565            if (simulate_inst_stalls && icache_access)
566                stall_ticks += icache_latency;
567
568            if (simulate_data_stalls && dcache_access)
569                stall_ticks += dcache_latency;
570
571            if (stall_ticks) {
572                Tick stall_cycles = stall_ticks / ticks(1);
573                Tick aligned_stall_ticks = ticks(stall_cycles);
574
575                if (aligned_stall_ticks < stall_ticks)
576                    aligned_stall_ticks += 1;
577
578                latency += aligned_stall_ticks;
579            }
580
581        }
582        if(fault != NoFault || !stayAtPC)
583            advancePC(fault);
584    }
585
586    // instruction takes at least one cycle
587    if (latency < ticks(1))
588        latency = ticks(1);
589
590    if (_status != Idle)
591        schedule(tickEvent, curTick() + latency);
592}
593
594
595void
596AtomicSimpleCPU::printAddr(Addr a)
597{
598    dcachePort.printAddr(a);
599}
600
601
602////////////////////////////////////////////////////////////////////////
603//
604//  AtomicSimpleCPU Simulation Object
605//
606AtomicSimpleCPU *
607AtomicSimpleCPUParams::create()
608{
609    numThreads = 1;
610#if !FULL_SYSTEM
611    if (workload.size() != 1)
612        panic("only one workload allowed");
613#endif
614    return new AtomicSimpleCPU(this);
615}
616