atomic.cc revision 8737:770ccf3af571
1/*
2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Steve Reinhardt
29 */
30
31#include "arch/locked_mem.hh"
32#include "arch/mmapped_ipr.hh"
33#include "arch/utility.hh"
34#include "base/bigint.hh"
35#include "config/the_isa.hh"
36#include "cpu/simple/atomic.hh"
37#include "cpu/exetrace.hh"
38#include "debug/ExecFaulting.hh"
39#include "debug/SimpleCPU.hh"
40#include "mem/packet.hh"
41#include "mem/packet_access.hh"
42#include "params/AtomicSimpleCPU.hh"
43#include "sim/faults.hh"
44#include "sim/system.hh"
45
46using namespace std;
47using namespace TheISA;
48
49AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
50    : Event(CPU_Tick_Pri), cpu(c)
51{
52}
53
54
55void
56AtomicSimpleCPU::TickEvent::process()
57{
58    cpu->tick();
59}
60
61const char *
62AtomicSimpleCPU::TickEvent::description() const
63{
64    return "AtomicSimpleCPU tick";
65}
66
67Port *
68AtomicSimpleCPU::getPort(const string &if_name, int idx)
69{
70    if (if_name == "dcache_port")
71        return &dcachePort;
72    else if (if_name == "icache_port")
73        return &icachePort;
74    else if (if_name == "physmem_port") {
75        hasPhysMemPort = true;
76        return &physmemPort;
77    }
78    else
79        panic("No Such Port\n");
80}
81
82void
83AtomicSimpleCPU::init()
84{
85    BaseCPU::init();
86#if FULL_SYSTEM
87    ThreadID size = threadContexts.size();
88    for (ThreadID i = 0; i < size; ++i) {
89        ThreadContext *tc = threadContexts[i];
90
91        // initialize CPU, including PC
92        TheISA::initCPU(tc, tc->contextId());
93    }
94
95    // Initialise the ThreadContext's memory proxies
96    tcBase()->initMemProxies(tcBase());
97#endif
98    if (hasPhysMemPort) {
99        AddrRangeList pmAddrList = physmemPort.getPeer()->getAddrRanges();
100        physMemAddr = *pmAddrList.begin();
101    }
102    // Atomic doesn't do MT right now, so contextId == threadId
103    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
104    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
105    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
106}
107
108AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
109    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
110      simulate_data_stalls(p->simulate_data_stalls),
111      simulate_inst_stalls(p->simulate_inst_stalls),
112      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
113      physmemPort(name() + "-iport", this), hasPhysMemPort(false)
114{
115    _status = Idle;
116}
117
118
119AtomicSimpleCPU::~AtomicSimpleCPU()
120{
121    if (tickEvent.scheduled()) {
122        deschedule(tickEvent);
123    }
124}
125
126void
127AtomicSimpleCPU::serialize(ostream &os)
128{
129    SimObject::State so_state = SimObject::getState();
130    SERIALIZE_ENUM(so_state);
131    SERIALIZE_SCALAR(locked);
132    BaseSimpleCPU::serialize(os);
133    nameOut(os, csprintf("%s.tickEvent", name()));
134    tickEvent.serialize(os);
135}
136
137void
138AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
139{
140    SimObject::State so_state;
141    UNSERIALIZE_ENUM(so_state);
142    UNSERIALIZE_SCALAR(locked);
143    BaseSimpleCPU::unserialize(cp, section);
144    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
145}
146
147void
148AtomicSimpleCPU::resume()
149{
150    if (_status == Idle || _status == SwitchedOut)
151        return;
152
153    DPRINTF(SimpleCPU, "Resume\n");
154    assert(system->getMemoryMode() == Enums::atomic);
155
156    changeState(SimObject::Running);
157    if (thread->status() == ThreadContext::Active) {
158        if (!tickEvent.scheduled())
159            schedule(tickEvent, nextCycle());
160    }
161    system->totalNumInsts = 0;
162}
163
164void
165AtomicSimpleCPU::switchOut()
166{
167    assert(_status == Running || _status == Idle);
168    _status = SwitchedOut;
169
170    tickEvent.squash();
171}
172
173
174void
175AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
176{
177    BaseCPU::takeOverFrom(oldCPU);
178
179    assert(!tickEvent.scheduled());
180
181    // if any of this CPU's ThreadContexts are active, mark the CPU as
182    // running and schedule its tick event.
183    ThreadID size = threadContexts.size();
184    for (ThreadID i = 0; i < size; ++i) {
185        ThreadContext *tc = threadContexts[i];
186        if (tc->status() == ThreadContext::Active && _status != Running) {
187            _status = Running;
188            schedule(tickEvent, nextCycle());
189            break;
190        }
191    }
192    if (_status != Running) {
193        _status = Idle;
194    }
195    assert(threadContexts.size() == 1);
196    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
197    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
198    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
199}
200
201
202void
203AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay)
204{
205    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
206
207    assert(thread_num == 0);
208    assert(thread);
209
210    assert(_status == Idle);
211    assert(!tickEvent.scheduled());
212
213    notIdleFraction++;
214    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
215
216    //Make sure ticks are still on multiples of cycles
217    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
218    _status = Running;
219}
220
221
222void
223AtomicSimpleCPU::suspendContext(ThreadID thread_num)
224{
225    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
226
227    assert(thread_num == 0);
228    assert(thread);
229
230    if (_status == Idle)
231        return;
232
233    assert(_status == Running);
234
235    // tick event may not be scheduled if this gets called from inside
236    // an instruction's execution, e.g. "quiesce"
237    if (tickEvent.scheduled())
238        deschedule(tickEvent);
239
240    notIdleFraction--;
241    _status = Idle;
242}
243
244
245Fault
246AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
247                         unsigned size, unsigned flags)
248{
249    // use the CPU's statically allocated read request and packet objects
250    Request *req = &data_read_req;
251
252    if (traceData) {
253        traceData->setAddr(addr);
254    }
255
256    //The block size of our peer.
257    unsigned blockSize = dcachePort.peerBlockSize();
258    //The size of the data we're trying to read.
259    int fullSize = size;
260
261    //The address of the second part of this access if it needs to be split
262    //across a cache line boundary.
263    Addr secondAddr = roundDown(addr + size - 1, blockSize);
264
265    if (secondAddr > addr)
266        size = secondAddr - addr;
267
268    dcache_latency = 0;
269
270    while (1) {
271        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
272
273        // translate to physical address
274        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
275
276        // Now do the access.
277        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
278            Packet pkt = Packet(req,
279                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
280                    Packet::Broadcast);
281            pkt.dataStatic(data);
282
283            if (req->isMmappedIpr())
284                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
285            else {
286                if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
287                    dcache_latency += physmemPort.sendAtomic(&pkt);
288                else
289                    dcache_latency += dcachePort.sendAtomic(&pkt);
290            }
291            dcache_access = true;
292
293            assert(!pkt.isError());
294
295            if (req->isLLSC()) {
296                TheISA::handleLockedRead(thread, req);
297            }
298        }
299
300        //If there's a fault, return it
301        if (fault != NoFault) {
302            if (req->isPrefetch()) {
303                return NoFault;
304            } else {
305                return fault;
306            }
307        }
308
309        //If we don't need to access a second cache line, stop now.
310        if (secondAddr <= addr)
311        {
312            if (req->isLocked() && fault == NoFault) {
313                assert(!locked);
314                locked = true;
315            }
316            return fault;
317        }
318
319        /*
320         * Set up for accessing the second cache line.
321         */
322
323        //Move the pointer we're reading into to the correct location.
324        data += size;
325        //Adjust the size to get the remaining bytes.
326        size = addr + fullSize - secondAddr;
327        //And access the right address.
328        addr = secondAddr;
329    }
330}
331
332
333Fault
334AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
335                          Addr addr, unsigned flags, uint64_t *res)
336{
337    // use the CPU's statically allocated write request and packet objects
338    Request *req = &data_write_req;
339
340    if (traceData) {
341        traceData->setAddr(addr);
342    }
343
344    //The block size of our peer.
345    unsigned blockSize = dcachePort.peerBlockSize();
346    //The size of the data we're trying to read.
347    int fullSize = size;
348
349    //The address of the second part of this access if it needs to be split
350    //across a cache line boundary.
351    Addr secondAddr = roundDown(addr + size - 1, blockSize);
352
353    if(secondAddr > addr)
354        size = secondAddr - addr;
355
356    dcache_latency = 0;
357
358    while(1) {
359        req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
360
361        // translate to physical address
362        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
363
364        // Now do the access.
365        if (fault == NoFault) {
366            MemCmd cmd = MemCmd::WriteReq; // default
367            bool do_access = true;  // flag to suppress cache access
368
369            if (req->isLLSC()) {
370                cmd = MemCmd::StoreCondReq;
371                do_access = TheISA::handleLockedWrite(thread, req);
372            } else if (req->isSwap()) {
373                cmd = MemCmd::SwapReq;
374                if (req->isCondSwap()) {
375                    assert(res);
376                    req->setExtraData(*res);
377                }
378            }
379
380            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
381                Packet pkt = Packet(req, cmd, Packet::Broadcast);
382                pkt.dataStatic(data);
383
384                if (req->isMmappedIpr()) {
385                    dcache_latency +=
386                        TheISA::handleIprWrite(thread->getTC(), &pkt);
387                } else {
388                    if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
389                        dcache_latency += physmemPort.sendAtomic(&pkt);
390                    else
391                        dcache_latency += dcachePort.sendAtomic(&pkt);
392                }
393                dcache_access = true;
394                assert(!pkt.isError());
395
396                if (req->isSwap()) {
397                    assert(res);
398                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
399                }
400            }
401
402            if (res && !req->isSwap()) {
403                *res = req->getExtraData();
404            }
405        }
406
407        //If there's a fault or we don't need to access a second cache line,
408        //stop now.
409        if (fault != NoFault || secondAddr <= addr)
410        {
411            if (req->isLocked() && fault == NoFault) {
412                assert(locked);
413                locked = false;
414            }
415            if (fault != NoFault && req->isPrefetch()) {
416                return NoFault;
417            } else {
418                return fault;
419            }
420        }
421
422        /*
423         * Set up for accessing the second cache line.
424         */
425
426        //Move the pointer we're reading into to the correct location.
427        data += size;
428        //Adjust the size to get the remaining bytes.
429        size = addr + fullSize - secondAddr;
430        //And access the right address.
431        addr = secondAddr;
432    }
433}
434
435
436void
437AtomicSimpleCPU::tick()
438{
439    DPRINTF(SimpleCPU, "Tick\n");
440
441    Tick latency = 0;
442
443    for (int i = 0; i < width || locked; ++i) {
444        numCycles++;
445
446        if (!curStaticInst || !curStaticInst->isDelayedCommit())
447            checkForInterrupts();
448
449        checkPcEventQueue();
450        // We must have just got suspended by a PC event
451        if (_status == Idle)
452            return;
453
454        Fault fault = NoFault;
455
456        TheISA::PCState pcState = thread->pcState();
457
458        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
459                           !curMacroStaticInst;
460        if (needToFetch) {
461            setupFetchRequest(&ifetch_req);
462            fault = thread->itb->translateAtomic(&ifetch_req, tc,
463                                                 BaseTLB::Execute);
464        }
465
466        if (fault == NoFault) {
467            Tick icache_latency = 0;
468            bool icache_access = false;
469            dcache_access = false; // assume no dcache access
470
471            if (needToFetch) {
472                // This is commented out because the predecoder would act like
473                // a tiny cache otherwise. It wouldn't be flushed when needed
474                // like the I cache. It should be flushed, and when that works
475                // this code should be uncommented.
476                //Fetch more instruction memory if necessary
477                //if(predecoder.needMoreBytes())
478                //{
479                    icache_access = true;
480                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
481                                               Packet::Broadcast);
482                    ifetch_pkt.dataStatic(&inst);
483
484                    if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
485                        icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
486                    else
487                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
488
489                    assert(!ifetch_pkt.isError());
490
491                    // ifetch_req is initialized to read the instruction directly
492                    // into the CPU object's inst field.
493                //}
494            }
495
496            preExecute();
497
498            if (curStaticInst) {
499                fault = curStaticInst->execute(this, traceData);
500
501                // keep an instruction count
502                if (fault == NoFault)
503                    countInst();
504                else if (traceData && !DTRACE(ExecFaulting)) {
505                    delete traceData;
506                    traceData = NULL;
507                }
508
509                postExecute();
510            }
511
512            // @todo remove me after debugging with legion done
513            if (curStaticInst && (!curStaticInst->isMicroop() ||
514                        curStaticInst->isFirstMicroop()))
515                instCnt++;
516
517            Tick stall_ticks = 0;
518            if (simulate_inst_stalls && icache_access)
519                stall_ticks += icache_latency;
520
521            if (simulate_data_stalls && dcache_access)
522                stall_ticks += dcache_latency;
523
524            if (stall_ticks) {
525                Tick stall_cycles = stall_ticks / ticks(1);
526                Tick aligned_stall_ticks = ticks(stall_cycles);
527
528                if (aligned_stall_ticks < stall_ticks)
529                    aligned_stall_ticks += 1;
530
531                latency += aligned_stall_ticks;
532            }
533
534        }
535        if(fault != NoFault || !stayAtPC)
536            advancePC(fault);
537    }
538
539    // instruction takes at least one cycle
540    if (latency < ticks(1))
541        latency = ticks(1);
542
543    if (_status != Idle)
544        schedule(tickEvent, curTick() + latency);
545}
546
547
548void
549AtomicSimpleCPU::printAddr(Addr a)
550{
551    dcachePort.printAddr(a);
552}
553
554
555////////////////////////////////////////////////////////////////////////
556//
557//  AtomicSimpleCPU Simulation Object
558//
559AtomicSimpleCPU *
560AtomicSimpleCPUParams::create()
561{
562    numThreads = 1;
563#if !FULL_SYSTEM
564    if (workload.size() != 1)
565        panic("only one workload allowed");
566#endif
567    return new AtomicSimpleCPU(this);
568}
569