atomic.cc revision 10913
12199SN/A/*
22199SN/A * Copyright 2014 Google, Inc.
32199SN/A * Copyright (c) 2012-2013 ARM Limited
42199SN/A * All rights reserved.
52199SN/A *
62199SN/A * The license below extends only to copyright in the software and shall
72199SN/A * not be construed as granting a license to any other intellectual
82199SN/A * property including but not limited to intellectual property relating
92199SN/A * to a hardware implementation of the functionality of the software
102199SN/A * licensed hereunder.  You may use the software subject to the license
112199SN/A * terms below provided that you ensure that this notice is replicated
122199SN/A * unmodified and in its entirety in all distributions of the software,
132199SN/A * modified or unmodified, in source code or in binary form.
142199SN/A *
152199SN/A * Copyright (c) 2002-2005 The Regents of The University of Michigan
162199SN/A * All rights reserved.
172199SN/A *
182199SN/A * Redistribution and use in source and binary forms, with or without
192199SN/A * modification, are permitted provided that the following conditions are
202199SN/A * met: redistributions of source code must retain the above copyright
212199SN/A * notice, this list of conditions and the following disclaimer;
222199SN/A * redistributions in binary form must reproduce the above copyright
232199SN/A * notice, this list of conditions and the following disclaimer in the
242199SN/A * documentation and/or other materials provided with the distribution;
252199SN/A * neither the name of the copyright holders nor the names of its
262199SN/A * contributors may be used to endorse or promote products derived from
272665Ssaidi@eecs.umich.edu * this software without specific prior written permission.
282665Ssaidi@eecs.umich.edu *
292199SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
302199SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
312202SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
322202SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
332199SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
342584SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
352474SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
362199SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
372199SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
382474SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
392199SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
402202SN/A *
412474SN/A * Authors: Steve Reinhardt
422199SN/A */
432199SN/A
442199SN/A#include "arch/locked_mem.hh"
452202SN/A#include "arch/mmapped_ipr.hh"
462199SN/A#include "arch/utility.hh"
472458SN/A#include "base/bigint.hh"
482199SN/A#include "base/output.hh"
492199SN/A#include "config/the_isa.hh"
502199SN/A#include "cpu/simple/atomic.hh"
512199SN/A#include "cpu/exetrace.hh"
522199SN/A#include "debug/Drain.hh"
532199SN/A#include "debug/ExecFaulting.hh"
542199SN/A#include "debug/SimpleCPU.hh"
552199SN/A#include "mem/packet.hh"
562199SN/A#include "mem/packet_access.hh"
572199SN/A#include "mem/physical.hh"
582199SN/A#include "params/AtomicSimpleCPU.hh"
592199SN/A#include "sim/faults.hh"
602199SN/A#include "sim/system.hh"
612199SN/A#include "sim/full_system.hh"
622199SN/A
632561SN/Ausing namespace std;
642561SN/Ausing namespace TheISA;
652561SN/A
662474SN/AAtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
672199SN/A    : Event(CPU_Tick_Pri), cpu(c)
68{
69}
70
71
72void
73AtomicSimpleCPU::TickEvent::process()
74{
75    cpu->tick();
76}
77
78const char *
79AtomicSimpleCPU::TickEvent::description() const
80{
81    return "AtomicSimpleCPU tick";
82}
83
84void
85AtomicSimpleCPU::init()
86{
87    BaseCPU::init();
88
89    // Initialise the ThreadContext's memory proxies
90    tcBase()->initMemProxies(tcBase());
91
92    if (FullSystem && !params()->switched_out) {
93        ThreadID size = threadContexts.size();
94        for (ThreadID i = 0; i < size; ++i) {
95            ThreadContext *tc = threadContexts[i];
96            // initialize CPU, including PC
97            TheISA::initCPU(tc, tc->contextId());
98        }
99    }
100
101    // Atomic doesn't do MT right now, so contextId == threadId
102    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
103    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
104    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
105}
106
107AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
108    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
109      simulate_data_stalls(p->simulate_data_stalls),
110      simulate_inst_stalls(p->simulate_inst_stalls),
111      icachePort(name() + ".icache_port", this),
112      dcachePort(name() + ".dcache_port", this),
113      fastmem(p->fastmem), dcache_access(false), dcache_latency(0),
114      ppCommit(nullptr)
115{
116    _status = Idle;
117}
118
119
120AtomicSimpleCPU::~AtomicSimpleCPU()
121{
122    if (tickEvent.scheduled()) {
123        deschedule(tickEvent);
124    }
125}
126
127DrainState
128AtomicSimpleCPU::drain()
129{
130    if (switchedOut())
131        return DrainState::Drained;
132
133    if (!isDrained()) {
134        DPRINTF(Drain, "Requesting drain: %s\n", pcState());
135        return DrainState::Draining;
136    } else {
137        if (tickEvent.scheduled())
138            deschedule(tickEvent);
139
140        DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
141        return DrainState::Drained;
142    }
143}
144
145void
146AtomicSimpleCPU::drainResume()
147{
148    assert(!tickEvent.scheduled());
149    if (switchedOut())
150        return;
151
152    DPRINTF(SimpleCPU, "Resume\n");
153    verifyMemoryMode();
154
155    assert(!threadContexts.empty());
156    if (threadContexts.size() > 1)
157        fatal("The atomic CPU only supports one thread.\n");
158
159    if (thread->status() == ThreadContext::Active) {
160        schedule(tickEvent, nextCycle());
161        _status = BaseSimpleCPU::Running;
162        notIdleFraction = 1;
163    } else {
164        _status = BaseSimpleCPU::Idle;
165        notIdleFraction = 0;
166    }
167}
168
169bool
170AtomicSimpleCPU::tryCompleteDrain()
171{
172    if (drainState() != DrainState::Draining)
173        return false;
174
175    DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
176    if (!isDrained())
177        return false;
178
179    DPRINTF(Drain, "CPU done draining, processing drain event\n");
180    signalDrainDone();
181
182    return true;
183}
184
185
186void
187AtomicSimpleCPU::switchOut()
188{
189    BaseSimpleCPU::switchOut();
190
191    assert(!tickEvent.scheduled());
192    assert(_status == BaseSimpleCPU::Running || _status == Idle);
193    assert(isDrained());
194}
195
196
197void
198AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
199{
200    BaseSimpleCPU::takeOverFrom(oldCPU);
201
202    // The tick event should have been descheduled by drain()
203    assert(!tickEvent.scheduled());
204
205    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
206    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
207    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
208}
209
210void
211AtomicSimpleCPU::verifyMemoryMode() const
212{
213    if (!system->isAtomicMode()) {
214        fatal("The atomic CPU requires the memory system to be in "
215              "'atomic' mode.\n");
216    }
217}
218
219void
220AtomicSimpleCPU::activateContext(ThreadID thread_num)
221{
222    DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
223
224    assert(thread_num == 0);
225    assert(thread);
226
227    assert(_status == Idle);
228    assert(!tickEvent.scheduled());
229
230    notIdleFraction = 1;
231    Cycles delta = ticksToCycles(thread->lastActivate - thread->lastSuspend);
232    numCycles += delta;
233    ppCycles->notify(delta);
234
235    //Make sure ticks are still on multiples of cycles
236    schedule(tickEvent, clockEdge(Cycles(0)));
237    _status = BaseSimpleCPU::Running;
238}
239
240
241void
242AtomicSimpleCPU::suspendContext(ThreadID thread_num)
243{
244    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
245
246    assert(thread_num == 0);
247    assert(thread);
248
249    if (_status == Idle)
250        return;
251
252    assert(_status == BaseSimpleCPU::Running);
253
254    // tick event may not be scheduled if this gets called from inside
255    // an instruction's execution, e.g. "quiesce"
256    if (tickEvent.scheduled())
257        deschedule(tickEvent);
258
259    notIdleFraction = 0;
260    _status = Idle;
261}
262
263
264Tick
265AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
266{
267    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
268            pkt->cmdString());
269
270    // X86 ISA: Snooping an invalidation for monitor/mwait
271    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
272    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
273        cpu->wakeup();
274    }
275
276    // if snoop invalidates, release any associated locks
277    if (pkt->isInvalidate()) {
278        DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
279                pkt->getAddr());
280        TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
281    }
282
283    return 0;
284}
285
286void
287AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
288{
289    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
290            pkt->cmdString());
291
292    // X86 ISA: Snooping an invalidation for monitor/mwait
293    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
294    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
295        cpu->wakeup();
296    }
297
298    // if snoop invalidates, release any associated locks
299    if (pkt->isInvalidate()) {
300        DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
301                pkt->getAddr());
302        TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
303    }
304}
305
306Fault
307AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
308                         unsigned size, unsigned flags)
309{
310    // use the CPU's statically allocated read request and packet objects
311    Request *req = &data_read_req;
312
313    if (traceData)
314        traceData->setMem(addr, size, flags);
315
316    //The size of the data we're trying to read.
317    int fullSize = size;
318
319    //The address of the second part of this access if it needs to be split
320    //across a cache line boundary.
321    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
322
323    if (secondAddr > addr)
324        size = secondAddr - addr;
325
326    dcache_latency = 0;
327
328    req->taskId(taskId());
329    while (1) {
330        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
331
332        // translate to physical address
333        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
334
335        // Now do the access.
336        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
337            Packet pkt(req, Packet::makeReadCmd(req));
338            pkt.dataStatic(data);
339
340            if (req->isMmappedIpr())
341                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
342            else {
343                if (fastmem && system->isMemAddr(pkt.getAddr()))
344                    system->getPhysMem().access(&pkt);
345                else
346                    dcache_latency += dcachePort.sendAtomic(&pkt);
347            }
348            dcache_access = true;
349
350            assert(!pkt.isError());
351
352            if (req->isLLSC()) {
353                TheISA::handleLockedRead(thread, req);
354            }
355        }
356
357        //If there's a fault, return it
358        if (fault != NoFault) {
359            if (req->isPrefetch()) {
360                return NoFault;
361            } else {
362                return fault;
363            }
364        }
365
366        //If we don't need to access a second cache line, stop now.
367        if (secondAddr <= addr)
368        {
369            if (req->isLockedRMW() && fault == NoFault) {
370                assert(!locked);
371                locked = true;
372            }
373            return fault;
374        }
375
376        /*
377         * Set up for accessing the second cache line.
378         */
379
380        //Move the pointer we're reading into to the correct location.
381        data += size;
382        //Adjust the size to get the remaining bytes.
383        size = addr + fullSize - secondAddr;
384        //And access the right address.
385        addr = secondAddr;
386    }
387}
388
389
390Fault
391AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
392                          Addr addr, unsigned flags, uint64_t *res)
393{
394
395    static uint8_t zero_array[64] = {};
396
397    if (data == NULL) {
398        assert(size <= 64);
399        assert(flags & Request::CACHE_BLOCK_ZERO);
400        // This must be a cache block cleaning request
401        data = zero_array;
402    }
403
404    // use the CPU's statically allocated write request and packet objects
405    Request *req = &data_write_req;
406
407    if (traceData)
408        traceData->setMem(addr, size, flags);
409
410    //The size of the data we're trying to read.
411    int fullSize = size;
412
413    //The address of the second part of this access if it needs to be split
414    //across a cache line boundary.
415    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
416
417    if(secondAddr > addr)
418        size = secondAddr - addr;
419
420    dcache_latency = 0;
421
422    req->taskId(taskId());
423    while(1) {
424        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
425
426        // translate to physical address
427        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
428
429        // Now do the access.
430        if (fault == NoFault) {
431            MemCmd cmd = MemCmd::WriteReq; // default
432            bool do_access = true;  // flag to suppress cache access
433
434            if (req->isLLSC()) {
435                cmd = MemCmd::StoreCondReq;
436                do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
437            } else if (req->isSwap()) {
438                cmd = MemCmd::SwapReq;
439                if (req->isCondSwap()) {
440                    assert(res);
441                    req->setExtraData(*res);
442                }
443            }
444
445            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
446                Packet pkt = Packet(req, cmd);
447                pkt.dataStatic(data);
448
449                if (req->isMmappedIpr()) {
450                    dcache_latency +=
451                        TheISA::handleIprWrite(thread->getTC(), &pkt);
452                } else {
453                    if (fastmem && system->isMemAddr(pkt.getAddr()))
454                        system->getPhysMem().access(&pkt);
455                    else
456                        dcache_latency += dcachePort.sendAtomic(&pkt);
457                }
458                dcache_access = true;
459                assert(!pkt.isError());
460
461                if (req->isSwap()) {
462                    assert(res);
463                    memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
464                }
465            }
466
467            if (res && !req->isSwap()) {
468                *res = req->getExtraData();
469            }
470        }
471
472        //If there's a fault or we don't need to access a second cache line,
473        //stop now.
474        if (fault != NoFault || secondAddr <= addr)
475        {
476            if (req->isLockedRMW() && fault == NoFault) {
477                assert(locked);
478                locked = false;
479            }
480            if (fault != NoFault && req->isPrefetch()) {
481                return NoFault;
482            } else {
483                return fault;
484            }
485        }
486
487        /*
488         * Set up for accessing the second cache line.
489         */
490
491        //Move the pointer we're reading into to the correct location.
492        data += size;
493        //Adjust the size to get the remaining bytes.
494        size = addr + fullSize - secondAddr;
495        //And access the right address.
496        addr = secondAddr;
497    }
498}
499
500
501void
502AtomicSimpleCPU::tick()
503{
504    DPRINTF(SimpleCPU, "Tick\n");
505
506    Tick latency = 0;
507
508    for (int i = 0; i < width || locked; ++i) {
509        numCycles++;
510        ppCycles->notify(1);
511
512        if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
513            checkForInterrupts();
514            checkPcEventQueue();
515        }
516
517        // We must have just got suspended by a PC event
518        if (_status == Idle) {
519            tryCompleteDrain();
520            return;
521        }
522
523        Fault fault = NoFault;
524
525        TheISA::PCState pcState = thread->pcState();
526
527        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
528                           !curMacroStaticInst;
529        if (needToFetch) {
530            ifetch_req.taskId(taskId());
531            setupFetchRequest(&ifetch_req);
532            fault = thread->itb->translateAtomic(&ifetch_req, tc,
533                                                 BaseTLB::Execute);
534        }
535
536        if (fault == NoFault) {
537            Tick icache_latency = 0;
538            bool icache_access = false;
539            dcache_access = false; // assume no dcache access
540
541            if (needToFetch) {
542                // This is commented out because the decoder would act like
543                // a tiny cache otherwise. It wouldn't be flushed when needed
544                // like the I cache. It should be flushed, and when that works
545                // this code should be uncommented.
546                //Fetch more instruction memory if necessary
547                //if(decoder.needMoreBytes())
548                //{
549                    icache_access = true;
550                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
551                    ifetch_pkt.dataStatic(&inst);
552
553                    if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
554                        system->getPhysMem().access(&ifetch_pkt);
555                    else
556                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
557
558                    assert(!ifetch_pkt.isError());
559
560                    // ifetch_req is initialized to read the instruction directly
561                    // into the CPU object's inst field.
562                //}
563            }
564
565            preExecute();
566
567            if (curStaticInst) {
568                fault = curStaticInst->execute(this, traceData);
569
570                // keep an instruction count
571                if (fault == NoFault) {
572                    countInst();
573                    ppCommit->notify(std::make_pair(thread, curStaticInst));
574                }
575                else if (traceData && !DTRACE(ExecFaulting)) {
576                    delete traceData;
577                    traceData = NULL;
578                }
579
580                postExecute();
581            }
582
583            // @todo remove me after debugging with legion done
584            if (curStaticInst && (!curStaticInst->isMicroop() ||
585                        curStaticInst->isFirstMicroop()))
586                instCnt++;
587
588            Tick stall_ticks = 0;
589            if (simulate_inst_stalls && icache_access)
590                stall_ticks += icache_latency;
591
592            if (simulate_data_stalls && dcache_access)
593                stall_ticks += dcache_latency;
594
595            if (stall_ticks) {
596                // the atomic cpu does its accounting in ticks, so
597                // keep counting in ticks but round to the clock
598                // period
599                latency += divCeil(stall_ticks, clockPeriod()) *
600                    clockPeriod();
601            }
602
603        }
604        if(fault != NoFault || !stayAtPC)
605            advancePC(fault);
606    }
607
608    if (tryCompleteDrain())
609        return;
610
611    // instruction takes at least one cycle
612    if (latency < clockPeriod())
613        latency = clockPeriod();
614
615    if (_status != Idle)
616        schedule(tickEvent, curTick() + latency);
617}
618
619void
620AtomicSimpleCPU::regProbePoints()
621{
622    BaseCPU::regProbePoints();
623
624    ppCommit = new ProbePointArg<pair<SimpleThread*, const StaticInstPtr>>
625                                (getProbeManager(), "Commit");
626}
627
628void
629AtomicSimpleCPU::printAddr(Addr a)
630{
631    dcachePort.printAddr(a);
632}
633
634////////////////////////////////////////////////////////////////////////
635//
636//  AtomicSimpleCPU Simulation Object
637//
638AtomicSimpleCPU *
639AtomicSimpleCPUParams::create()
640{
641    numThreads = 1;
642    if (!FullSystem && workload.size() != 1)
643        panic("only one workload allowed");
644    return new AtomicSimpleCPU(this);
645}
646