fetch_impl.hh revision 3594:e401993e0cbb
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 *          Korey Sewell
30 */
31
32#include "config/use_checker.hh"
33
34#include "arch/isa_traits.hh"
35#include "arch/utility.hh"
36#include "cpu/checker/cpu.hh"
37#include "cpu/exetrace.hh"
38#include "cpu/o3/fetch.hh"
39#include "mem/packet.hh"
40#include "mem/request.hh"
41#include "sim/byteswap.hh"
42#include "sim/host.hh"
43#include "sim/root.hh"
44
45#if FULL_SYSTEM
46#include "arch/tlb.hh"
47#include "arch/vtophys.hh"
48#include "sim/system.hh"
49#endif // FULL_SYSTEM
50
51#include <algorithm>
52
53template<class Impl>
54Tick
55DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
56{
57    panic("DefaultFetch doesn't expect recvAtomic callback!");
58    return curTick;
59}
60
61template<class Impl>
62void
63DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
64{
65    DPRINTF(Fetch, "DefaultFetch doesn't update its state from a "
66            "functional call.");
67}
68
69template<class Impl>
70void
71DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
72{
73    if (status == RangeChange)
74        return;
75
76    panic("DefaultFetch doesn't expect recvStatusChange callback!");
77}
78
79template<class Impl>
80bool
81DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
82{
83    DPRINTF(Fetch, "Received timing\n");
84    if (pkt->isResponse()) {
85        fetch->processCacheCompletion(pkt);
86    }
87    //else Snooped a coherence request, just return
88    return true;
89}
90
91template<class Impl>
92void
93DefaultFetch<Impl>::IcachePort::recvRetry()
94{
95    fetch->recvRetry();
96}
97
98template<class Impl>
99DefaultFetch<Impl>::DefaultFetch(Params *params)
100    : branchPred(params),
101      decodeToFetchDelay(params->decodeToFetchDelay),
102      renameToFetchDelay(params->renameToFetchDelay),
103      iewToFetchDelay(params->iewToFetchDelay),
104      commitToFetchDelay(params->commitToFetchDelay),
105      fetchWidth(params->fetchWidth),
106      cacheBlocked(false),
107      retryPkt(NULL),
108      retryTid(-1),
109      numThreads(params->numberOfThreads),
110      numFetchingThreads(params->smtNumFetchingThreads),
111      interruptPending(false),
112      drainPending(false),
113      switchedOut(false)
114{
115    if (numThreads > Impl::MaxThreads)
116        fatal("numThreads is not a valid value\n");
117
118    // Set fetch stage's status to inactive.
119    _status = Inactive;
120
121    std::string policy = params->smtFetchPolicy;
122
123    // Convert string to lowercase
124    std::transform(policy.begin(), policy.end(), policy.begin(),
125                   (int(*)(int)) tolower);
126
127    // Figure out fetch policy
128    if (policy == "singlethread") {
129        fetchPolicy = SingleThread;
130        if (numThreads > 1)
131            panic("Invalid Fetch Policy for a SMT workload.");
132    } else if (policy == "roundrobin") {
133        fetchPolicy = RoundRobin;
134        DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
135    } else if (policy == "branch") {
136        fetchPolicy = Branch;
137        DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
138    } else if (policy == "iqcount") {
139        fetchPolicy = IQ;
140        DPRINTF(Fetch, "Fetch policy set to IQ count\n");
141    } else if (policy == "lsqcount") {
142        fetchPolicy = LSQ;
143        DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
144    } else {
145        fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
146              " RoundRobin,LSQcount,IQcount}\n");
147    }
148
149    // Size of cache block.
150    cacheBlkSize = 64;
151
152    // Create mask to get rid of offset bits.
153    cacheBlkMask = (cacheBlkSize - 1);
154
155    for (int tid=0; tid < numThreads; tid++) {
156
157        fetchStatus[tid] = Running;
158
159        priorityList.push_back(tid);
160
161        memReq[tid] = NULL;
162
163        // Create space to store a cache line.
164        cacheData[tid] = new uint8_t[cacheBlkSize];
165        cacheDataPC[tid] = 0;
166        cacheDataValid[tid] = false;
167
168        delaySlotInfo[tid].branchSeqNum = -1;
169        delaySlotInfo[tid].numInsts = 0;
170        delaySlotInfo[tid].targetAddr = 0;
171        delaySlotInfo[tid].targetReady = false;
172
173        stalls[tid].decode = false;
174        stalls[tid].rename = false;
175        stalls[tid].iew = false;
176        stalls[tid].commit = false;
177    }
178
179    // Get the size of an instruction.
180    instSize = sizeof(TheISA::MachInst);
181}
182
183template <class Impl>
184std::string
185DefaultFetch<Impl>::name() const
186{
187    return cpu->name() + ".fetch";
188}
189
190template <class Impl>
191void
192DefaultFetch<Impl>::regStats()
193{
194    icacheStallCycles
195        .name(name() + ".icacheStallCycles")
196        .desc("Number of cycles fetch is stalled on an Icache miss")
197        .prereq(icacheStallCycles);
198
199    fetchedInsts
200        .name(name() + ".Insts")
201        .desc("Number of instructions fetch has processed")
202        .prereq(fetchedInsts);
203
204    fetchedBranches
205        .name(name() + ".Branches")
206        .desc("Number of branches that fetch encountered")
207        .prereq(fetchedBranches);
208
209    predictedBranches
210        .name(name() + ".predictedBranches")
211        .desc("Number of branches that fetch has predicted taken")
212        .prereq(predictedBranches);
213
214    fetchCycles
215        .name(name() + ".Cycles")
216        .desc("Number of cycles fetch has run and was not squashing or"
217              " blocked")
218        .prereq(fetchCycles);
219
220    fetchSquashCycles
221        .name(name() + ".SquashCycles")
222        .desc("Number of cycles fetch has spent squashing")
223        .prereq(fetchSquashCycles);
224
225    fetchIdleCycles
226        .name(name() + ".IdleCycles")
227        .desc("Number of cycles fetch was idle")
228        .prereq(fetchIdleCycles);
229
230    fetchBlockedCycles
231        .name(name() + ".BlockedCycles")
232        .desc("Number of cycles fetch has spent blocked")
233        .prereq(fetchBlockedCycles);
234
235    fetchedCacheLines
236        .name(name() + ".CacheLines")
237        .desc("Number of cache lines fetched")
238        .prereq(fetchedCacheLines);
239
240    fetchMiscStallCycles
241        .name(name() + ".MiscStallCycles")
242        .desc("Number of cycles fetch has spent waiting on interrupts, or "
243              "bad addresses, or out of MSHRs")
244        .prereq(fetchMiscStallCycles);
245
246    fetchIcacheSquashes
247        .name(name() + ".IcacheSquashes")
248        .desc("Number of outstanding Icache misses that were squashed")
249        .prereq(fetchIcacheSquashes);
250
251    fetchNisnDist
252        .init(/* base value */ 0,
253              /* last value */ fetchWidth,
254              /* bucket size */ 1)
255        .name(name() + ".rateDist")
256        .desc("Number of instructions fetched each cycle (Total)")
257        .flags(Stats::pdf);
258
259    idleRate
260        .name(name() + ".idleRate")
261        .desc("Percent of cycles fetch was idle")
262        .prereq(idleRate);
263    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
264
265    branchRate
266        .name(name() + ".branchRate")
267        .desc("Number of branch fetches per cycle")
268        .flags(Stats::total);
269    branchRate = fetchedBranches / cpu->numCycles;
270
271    fetchRate
272        .name(name() + ".rate")
273        .desc("Number of inst fetches per cycle")
274        .flags(Stats::total);
275    fetchRate = fetchedInsts / cpu->numCycles;
276
277    branchPred.regStats();
278}
279
280template<class Impl>
281void
282DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
283{
284    DPRINTF(Fetch, "Setting the CPU pointer.\n");
285    cpu = cpu_ptr;
286
287    // Name is finally available, so create the port.
288    icachePort = new IcachePort(this);
289
290#if USE_CHECKER
291    if (cpu->checker) {
292        cpu->checker->setIcachePort(icachePort);
293    }
294#endif
295
296    // Schedule fetch to get the correct PC from the CPU
297    // scheduleFetchStartupEvent(1);
298
299    // Fetch needs to start fetching instructions at the very beginning,
300    // so it must start up in active state.
301    switchToActive();
302}
303
304template<class Impl>
305void
306DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
307{
308    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
309    timeBuffer = time_buffer;
310
311    // Create wires to get information from proper places in time buffer.
312    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
313    fromRename = timeBuffer->getWire(-renameToFetchDelay);
314    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
315    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
316}
317
318template<class Impl>
319void
320DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
321{
322    DPRINTF(Fetch, "Setting active threads list pointer.\n");
323    activeThreads = at_ptr;
324}
325
326template<class Impl>
327void
328DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
329{
330    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
331    fetchQueue = fq_ptr;
332
333    // Create wire to write information to proper place in fetch queue.
334    toDecode = fetchQueue->getWire(0);
335}
336
337template<class Impl>
338void
339DefaultFetch<Impl>::initStage()
340{
341    // Setup PC and nextPC with initial state.
342    for (int tid = 0; tid < numThreads; tid++) {
343        PC[tid] = cpu->readPC(tid);
344        nextPC[tid] = cpu->readNextPC(tid);
345#if ISA_HAS_DELAY_SLOT
346        nextNPC[tid] = cpu->readNextNPC(tid);
347#endif
348    }
349}
350
351template<class Impl>
352void
353DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
354{
355    unsigned tid = pkt->req->getThreadNum();
356
357    DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
358
359    // Only change the status if it's still waiting on the icache access
360    // to return.
361    if (fetchStatus[tid] != IcacheWaitResponse ||
362        pkt->req != memReq[tid] ||
363        isSwitchedOut()) {
364        ++fetchIcacheSquashes;
365        delete pkt->req;
366        delete pkt;
367        return;
368    }
369
370    memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
371    cacheDataValid[tid] = true;
372
373    if (!drainPending) {
374        // Wake up the CPU (if it went to sleep and was waiting on
375        // this completion event).
376        cpu->wakeCPU();
377
378        DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
379                tid);
380
381        switchToActive();
382    }
383
384    // Only switch to IcacheAccessComplete if we're not stalled as well.
385    if (checkStall(tid)) {
386        fetchStatus[tid] = Blocked;
387    } else {
388        fetchStatus[tid] = IcacheAccessComplete;
389    }
390
391    // Reset the mem req to NULL.
392    delete pkt->req;
393    delete pkt;
394    memReq[tid] = NULL;
395}
396
397template <class Impl>
398bool
399DefaultFetch<Impl>::drain()
400{
401    // Fetch is ready to drain at any time.
402    cpu->signalDrained();
403    drainPending = true;
404    return true;
405}
406
407template <class Impl>
408void
409DefaultFetch<Impl>::resume()
410{
411    drainPending = false;
412}
413
414template <class Impl>
415void
416DefaultFetch<Impl>::switchOut()
417{
418    switchedOut = true;
419    // Branch predictor needs to have its state cleared.
420    branchPred.switchOut();
421}
422
423template <class Impl>
424void
425DefaultFetch<Impl>::takeOverFrom()
426{
427    // Reset all state
428    for (int i = 0; i < Impl::MaxThreads; ++i) {
429        stalls[i].decode = 0;
430        stalls[i].rename = 0;
431        stalls[i].iew = 0;
432        stalls[i].commit = 0;
433        PC[i] = cpu->readPC(i);
434        nextPC[i] = cpu->readNextPC(i);
435#if ISA_HAS_DELAY_SLOT
436        nextNPC[i] = cpu->readNextNPC(i);
437        delaySlotInfo[i].branchSeqNum = -1;
438        delaySlotInfo[i].numInsts = 0;
439        delaySlotInfo[i].targetAddr = 0;
440        delaySlotInfo[i].targetReady = false;
441#endif
442        fetchStatus[i] = Running;
443    }
444    numInst = 0;
445    wroteToTimeBuffer = false;
446    _status = Inactive;
447    switchedOut = false;
448    interruptPending = false;
449    branchPred.takeOverFrom();
450}
451
452template <class Impl>
453void
454DefaultFetch<Impl>::wakeFromQuiesce()
455{
456    DPRINTF(Fetch, "Waking up from quiesce\n");
457    // Hopefully this is safe
458    // @todo: Allow other threads to wake from quiesce.
459    fetchStatus[0] = Running;
460}
461
462template <class Impl>
463inline void
464DefaultFetch<Impl>::switchToActive()
465{
466    if (_status == Inactive) {
467        DPRINTF(Activity, "Activating stage.\n");
468
469        cpu->activateStage(O3CPU::FetchIdx);
470
471        _status = Active;
472    }
473}
474
475template <class Impl>
476inline void
477DefaultFetch<Impl>::switchToInactive()
478{
479    if (_status == Active) {
480        DPRINTF(Activity, "Deactivating stage.\n");
481
482        cpu->deactivateStage(O3CPU::FetchIdx);
483
484        _status = Inactive;
485    }
486}
487
488template <class Impl>
489bool
490DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
491                                          Addr &next_NPC)
492{
493    // Do branch prediction check here.
494    // A bit of a misnomer...next_PC is actually the current PC until
495    // this function updates it.
496    bool predict_taken;
497
498    if (!inst->isControl()) {
499#if ISA_HAS_DELAY_SLOT
500        Addr cur_PC = next_PC;
501        next_PC  = cur_PC + instSize;      //next_NPC;
502        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
503        inst->setPredTarg(next_NPC);
504#else
505        next_PC = next_PC + instSize;
506        inst->setPredTarg(next_PC);
507#endif
508        return false;
509    }
510
511    int tid = inst->threadNumber;
512#if ISA_HAS_DELAY_SLOT
513    Addr pred_PC = next_PC;
514    predict_taken = branchPred.predict(inst, pred_PC, tid);
515
516    if (predict_taken) {
517        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
518    } else {
519        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
520    }
521
522    if (predict_taken) {
523        next_PC = next_NPC;
524        next_NPC = pred_PC;
525
526        // Update delay slot info
527        ++delaySlotInfo[tid].numInsts;
528        delaySlotInfo[tid].targetAddr = pred_PC;
529        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
530                delaySlotInfo[tid].numInsts);
531    } else { // !predict_taken
532        if (inst->isCondDelaySlot()) {
533            next_PC = pred_PC;
534            // The delay slot is skipped here if there is on
535            // prediction
536        } else {
537            next_PC = next_NPC;
538            // No need to declare a delay slot here since
539            // there is no for the pred. target to jump
540        }
541
542        next_NPC = next_NPC + instSize;
543    }
544#else
545    predict_taken = branchPred.predict(inst, next_PC, tid);
546#endif
547
548    ++fetchedBranches;
549
550    if (predict_taken) {
551        ++predictedBranches;
552    }
553
554    return predict_taken;
555}
556
557template <class Impl>
558bool
559DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
560{
561    Fault fault = NoFault;
562
563    //AlphaDep
564    if (cacheBlocked || isSwitchedOut() ||
565            (interruptPending && (fetch_PC & 0x3))) {
566        // Hold off fetch from getting new instructions when:
567        // Cache is blocked, or
568        // while an interrupt is pending and we're not in PAL mode, or
569        // fetch is switched out.
570        return false;
571    }
572
573    // Align the fetch PC so it's at the start of a cache block.
574    fetch_PC = icacheBlockAlignPC(fetch_PC);
575
576    // If we've already got the block, no need to try to fetch it again.
577    if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
578        return true;
579    }
580
581    // Setup the memReq to do a read of the first instruction's address.
582    // Set the appropriate read size and flags as well.
583    // Build request here.
584    RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, 0,
585                                     fetch_PC, cpu->readCpuId(), tid);
586
587    memReq[tid] = mem_req;
588
589    // Translate the instruction request.
590    fault = cpu->translateInstReq(mem_req, cpu->thread[tid]);
591
592    // In the case of faults, the fetch stage may need to stall and wait
593    // for the ITB miss to be handled.
594
595    // If translation was successful, attempt to read the first
596    // instruction.
597    if (fault == NoFault) {
598#if 0
599        if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
600            memReq[tid]->isUncacheable()) {
601            DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
602                    "misspeculating path)!",
603                    memReq[tid]->paddr);
604            ret_fault = TheISA::genMachineCheckFault();
605            return false;
606        }
607#endif
608
609        // Build packet here.
610        PacketPtr data_pkt = new Packet(mem_req,
611                                        Packet::ReadReq, Packet::Broadcast);
612        data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
613
614        cacheDataPC[tid] = fetch_PC;
615        cacheDataValid[tid] = false;
616
617        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
618
619        fetchedCacheLines++;
620
621        // Now do the timing access to see whether or not the instruction
622        // exists within the cache.
623        if (!icachePort->sendTiming(data_pkt)) {
624            if (data_pkt->result == Packet::BadAddress) {
625                fault = TheISA::genMachineCheckFault();
626                delete mem_req;
627                memReq[tid] = NULL;
628            }
629            assert(retryPkt == NULL);
630            assert(retryTid == -1);
631            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
632            fetchStatus[tid] = IcacheWaitRetry;
633            retryPkt = data_pkt;
634            retryTid = tid;
635            cacheBlocked = true;
636            return false;
637        }
638
639        DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
640
641        lastIcacheStall[tid] = curTick;
642
643        DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
644                "response.\n", tid);
645
646        fetchStatus[tid] = IcacheWaitResponse;
647    } else {
648        delete mem_req;
649        memReq[tid] = NULL;
650    }
651
652    ret_fault = fault;
653    return true;
654}
655
656template <class Impl>
657inline void
658DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
659{
660    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
661            tid, new_PC);
662
663    PC[tid] = new_PC;
664    nextPC[tid] = new_PC + instSize;
665    nextNPC[tid] = new_PC + (2 * instSize);
666
667    // Clear the icache miss if it's outstanding.
668    if (fetchStatus[tid] == IcacheWaitResponse) {
669        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
670                tid);
671        memReq[tid] = NULL;
672    }
673
674    // Get rid of the retrying packet if it was from this thread.
675    if (retryTid == tid) {
676        assert(cacheBlocked);
677        cacheBlocked = false;
678        retryTid = -1;
679        delete retryPkt->req;
680        delete retryPkt;
681        retryPkt = NULL;
682    }
683
684    fetchStatus[tid] = Squashing;
685
686    ++fetchSquashCycles;
687}
688
689template<class Impl>
690void
691DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
692                                     const InstSeqNum &seq_num,
693                                     unsigned tid)
694{
695    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
696
697    doSquash(new_PC, tid);
698
699#if ISA_HAS_DELAY_SLOT
700    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
701        delaySlotInfo[tid].numInsts = 0;
702        delaySlotInfo[tid].targetAddr = 0;
703        delaySlotInfo[tid].targetReady = false;
704    }
705#endif
706
707    // Tell the CPU to remove any instructions that are in flight between
708    // fetch and decode.
709    cpu->removeInstsUntil(seq_num, tid);
710}
711
712template<class Impl>
713bool
714DefaultFetch<Impl>::checkStall(unsigned tid) const
715{
716    bool ret_val = false;
717
718    if (cpu->contextSwitch) {
719        DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
720        ret_val = true;
721    } else if (stalls[tid].decode) {
722        DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
723        ret_val = true;
724    } else if (stalls[tid].rename) {
725        DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
726        ret_val = true;
727    } else if (stalls[tid].iew) {
728        DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
729        ret_val = true;
730    } else if (stalls[tid].commit) {
731        DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
732        ret_val = true;
733    }
734
735    return ret_val;
736}
737
738template<class Impl>
739typename DefaultFetch<Impl>::FetchStatus
740DefaultFetch<Impl>::updateFetchStatus()
741{
742    //Check Running
743    std::list<unsigned>::iterator threads = (*activeThreads).begin();
744
745    while (threads != (*activeThreads).end()) {
746
747        unsigned tid = *threads++;
748
749        if (fetchStatus[tid] == Running ||
750            fetchStatus[tid] == Squashing ||
751            fetchStatus[tid] == IcacheAccessComplete) {
752
753            if (_status == Inactive) {
754                DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
755
756                if (fetchStatus[tid] == IcacheAccessComplete) {
757                    DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
758                            "completion\n",tid);
759                }
760
761                cpu->activateStage(O3CPU::FetchIdx);
762            }
763
764            return Active;
765        }
766    }
767
768    // Stage is switching from active to inactive, notify CPU of it.
769    if (_status == Active) {
770        DPRINTF(Activity, "Deactivating stage.\n");
771
772        cpu->deactivateStage(O3CPU::FetchIdx);
773    }
774
775    return Inactive;
776}
777
778template <class Impl>
779void
780DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
781                           bool squash_delay_slot, unsigned tid)
782{
783    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
784
785    doSquash(new_PC, tid);
786
787#if ISA_HAS_DELAY_SLOT
788    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
789        delaySlotInfo[tid].numInsts = 0;
790        delaySlotInfo[tid].targetAddr = 0;
791        delaySlotInfo[tid].targetReady = false;
792    }
793
794    // Tell the CPU to remove any instructions that are not in the ROB.
795    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
796#else
797    // Tell the CPU to remove any instructions that are not in the ROB.
798    cpu->removeInstsNotInROB(tid, true, 0);
799#endif
800}
801
802template <class Impl>
803void
804DefaultFetch<Impl>::tick()
805{
806    std::list<unsigned>::iterator threads = (*activeThreads).begin();
807    bool status_change = false;
808
809    wroteToTimeBuffer = false;
810
811    while (threads != (*activeThreads).end()) {
812        unsigned tid = *threads++;
813
814        // Check the signals for each thread to determine the proper status
815        // for each thread.
816        bool updated_status = checkSignalsAndUpdate(tid);
817        status_change =  status_change || updated_status;
818    }
819
820    DPRINTF(Fetch, "Running stage.\n");
821
822    // Reset the number of the instruction we're fetching.
823    numInst = 0;
824
825#if FULL_SYSTEM
826    if (fromCommit->commitInfo[0].interruptPending) {
827        interruptPending = true;
828    }
829
830    if (fromCommit->commitInfo[0].clearInterrupt) {
831        interruptPending = false;
832    }
833#endif
834
835    for (threadFetched = 0; threadFetched < numFetchingThreads;
836         threadFetched++) {
837        // Fetch each of the actively fetching threads.
838        fetch(status_change);
839    }
840
841    // Record number of instructions fetched this cycle for distribution.
842    fetchNisnDist.sample(numInst);
843
844    if (status_change) {
845        // Change the fetch stage status if there was a status change.
846        _status = updateFetchStatus();
847    }
848
849    // If there was activity this cycle, inform the CPU of it.
850    if (wroteToTimeBuffer || cpu->contextSwitch) {
851        DPRINTF(Activity, "Activity this cycle.\n");
852
853        cpu->activityThisCycle();
854    }
855}
856
857template <class Impl>
858bool
859DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
860{
861    // Update the per thread stall statuses.
862    if (fromDecode->decodeBlock[tid]) {
863        stalls[tid].decode = true;
864    }
865
866    if (fromDecode->decodeUnblock[tid]) {
867        assert(stalls[tid].decode);
868        assert(!fromDecode->decodeBlock[tid]);
869        stalls[tid].decode = false;
870    }
871
872    if (fromRename->renameBlock[tid]) {
873        stalls[tid].rename = true;
874    }
875
876    if (fromRename->renameUnblock[tid]) {
877        assert(stalls[tid].rename);
878        assert(!fromRename->renameBlock[tid]);
879        stalls[tid].rename = false;
880    }
881
882    if (fromIEW->iewBlock[tid]) {
883        stalls[tid].iew = true;
884    }
885
886    if (fromIEW->iewUnblock[tid]) {
887        assert(stalls[tid].iew);
888        assert(!fromIEW->iewBlock[tid]);
889        stalls[tid].iew = false;
890    }
891
892    if (fromCommit->commitBlock[tid]) {
893        stalls[tid].commit = true;
894    }
895
896    if (fromCommit->commitUnblock[tid]) {
897        assert(stalls[tid].commit);
898        assert(!fromCommit->commitBlock[tid]);
899        stalls[tid].commit = false;
900    }
901
902    // Check squash signals from commit.
903    if (fromCommit->commitInfo[tid].squash) {
904
905        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
906                "from commit.\n",tid);
907
908#if ISA_HAS_DELAY_SLOT
909    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
910#else
911    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
912#endif
913        // In any case, squash.
914        squash(fromCommit->commitInfo[tid].nextPC,
915               doneSeqNum,
916               fromCommit->commitInfo[tid].squashDelaySlot,
917               tid);
918
919        // Also check if there's a mispredict that happened.
920        if (fromCommit->commitInfo[tid].branchMispredict) {
921            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
922                              fromCommit->commitInfo[tid].nextPC,
923                              fromCommit->commitInfo[tid].branchTaken,
924                              tid);
925        } else {
926            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
927                              tid);
928        }
929
930        return true;
931    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
932        // Update the branch predictor if it wasn't a squashed instruction
933        // that was broadcasted.
934        branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
935    }
936
937    // Check ROB squash signals from commit.
938    if (fromCommit->commitInfo[tid].robSquashing) {
939        DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
940
941        // Continue to squash.
942        fetchStatus[tid] = Squashing;
943
944        return true;
945    }
946
947    // Check squash signals from decode.
948    if (fromDecode->decodeInfo[tid].squash) {
949        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
950                "from decode.\n",tid);
951
952        // Update the branch predictor.
953        if (fromDecode->decodeInfo[tid].branchMispredict) {
954            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
955                              fromDecode->decodeInfo[tid].nextPC,
956                              fromDecode->decodeInfo[tid].branchTaken,
957                              tid);
958        } else {
959            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
960                              tid);
961        }
962
963        if (fetchStatus[tid] != Squashing) {
964
965#if ISA_HAS_DELAY_SLOT
966            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
967#else
968            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
969#endif
970            // Squash unless we're already squashing
971            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
972                             doneSeqNum,
973                             tid);
974
975            return true;
976        }
977    }
978
979    if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
980        DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
981
982        fetchStatus[tid] = Blocked;
983
984        return true;
985    }
986
987    if (fetchStatus[tid] == Blocked ||
988        fetchStatus[tid] == Squashing) {
989        // Switch status to running if fetch isn't being told to block or
990        // squash this cycle.
991        DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
992                tid);
993
994        fetchStatus[tid] = Running;
995
996        return true;
997    }
998
999    // If we've reached this point, we have not gotten any signals that
1000    // cause fetch to change its status.  Fetch remains the same as before.
1001    return false;
1002}
1003
1004template<class Impl>
1005void
1006DefaultFetch<Impl>::fetch(bool &status_change)
1007{
1008    //////////////////////////////////////////
1009    // Start actual fetch
1010    //////////////////////////////////////////
1011    int tid = getFetchingThread(fetchPolicy);
1012
1013    if (tid == -1 || drainPending) {
1014        DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1015
1016        // Breaks looping condition in tick()
1017        threadFetched = numFetchingThreads;
1018        return;
1019    }
1020
1021    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1022
1023    // The current PC.
1024    Addr &fetch_PC = PC[tid];
1025
1026    // Fault code for memory access.
1027    Fault fault = NoFault;
1028
1029    // If returning from the delay of a cache miss, then update the status
1030    // to running, otherwise do the cache access.  Possibly move this up
1031    // to tick() function.
1032    if (fetchStatus[tid] == IcacheAccessComplete) {
1033        DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
1034                tid);
1035
1036        fetchStatus[tid] = Running;
1037        status_change = true;
1038    } else if (fetchStatus[tid] == Running) {
1039        DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1040                "instruction, starting at PC %08p.\n",
1041                tid, fetch_PC);
1042
1043        bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
1044        if (!fetch_success) {
1045            if (cacheBlocked) {
1046                ++icacheStallCycles;
1047            } else {
1048                ++fetchMiscStallCycles;
1049            }
1050            return;
1051        }
1052    } else {
1053        if (fetchStatus[tid] == Idle) {
1054            ++fetchIdleCycles;
1055        } else if (fetchStatus[tid] == Blocked) {
1056            ++fetchBlockedCycles;
1057        } else if (fetchStatus[tid] == Squashing) {
1058            ++fetchSquashCycles;
1059        } else if (fetchStatus[tid] == IcacheWaitResponse) {
1060            ++icacheStallCycles;
1061        }
1062
1063        // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
1064        // fetch should do nothing.
1065        return;
1066    }
1067
1068    ++fetchCycles;
1069
1070    // If we had a stall due to an icache miss, then return.
1071    if (fetchStatus[tid] == IcacheWaitResponse) {
1072        ++icacheStallCycles;
1073        status_change = true;
1074        return;
1075    }
1076
1077    Addr next_PC = fetch_PC;
1078    Addr next_NPC = next_PC + instSize;
1079    InstSeqNum inst_seq;
1080    MachInst inst;
1081    ExtMachInst ext_inst;
1082    // @todo: Fix this hack.
1083    unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
1084
1085    if (fault == NoFault) {
1086        // If the read of the first instruction was successful, then grab the
1087        // instructions from the rest of the cache line and put them into the
1088        // queue heading to decode.
1089
1090        DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1091                "decode.\n",tid);
1092
1093        // Need to keep track of whether or not a predicted branch
1094        // ended this fetch block.
1095        bool predicted_branch = false;
1096
1097        // Need to keep track of whether or not a delay slot
1098        // instruction has been fetched
1099
1100        for (;
1101             offset < cacheBlkSize &&
1102                 numInst < fetchWidth &&
1103                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
1104             ++numInst) {
1105
1106            // Get a sequence number.
1107            inst_seq = cpu->getAndIncrementInstSeq();
1108
1109            // Make sure this is a valid index.
1110            assert(offset <= cacheBlkSize - instSize);
1111
1112            // Get the instruction from the array of the cache line.
1113            inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
1114                        (&cacheData[tid][offset]));
1115
1116#if THE_ISA == ALPHA_ISA
1117            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
1118#elif THE_ISA == SPARC_ISA
1119            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
1120#endif
1121
1122            // Create a new DynInst from the instruction fetched.
1123            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
1124                                                 next_PC,
1125                                                 inst_seq, cpu);
1126            instruction->setTid(tid);
1127
1128            instruction->setASID(tid);
1129
1130            instruction->setThreadState(cpu->thread[tid]);
1131
1132            DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
1133                    "[sn:%lli]\n",
1134                    tid, instruction->readPC(), inst_seq);
1135
1136            DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
1137                    tid, instruction->staticInst->disassemble(fetch_PC));
1138
1139            instruction->traceData =
1140                Trace::getInstRecord(curTick, cpu->tcBase(tid),
1141                                     instruction->staticInst,
1142                                     instruction->readPC());
1143
1144            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
1145                                                     next_NPC);
1146
1147            // Add instruction to the CPU's list of instructions.
1148            instruction->setInstListIt(cpu->addInst(instruction));
1149
1150            // Write the instruction to the first slot in the queue
1151            // that heads to decode.
1152            toDecode->insts[numInst] = instruction;
1153
1154            toDecode->size++;
1155
1156            // Increment stat of fetched instructions.
1157            ++fetchedInsts;
1158
1159            // Move to the next instruction, unless we have a branch.
1160            fetch_PC = next_PC;
1161
1162            if (instruction->isQuiesce()) {
1163                DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
1164                        curTick);
1165                fetchStatus[tid] = QuiescePending;
1166                ++numInst;
1167                status_change = true;
1168                break;
1169            }
1170
1171            offset += instSize;
1172
1173#if ISA_HAS_DELAY_SLOT
1174            if (predicted_branch) {
1175                delaySlotInfo[tid].branchSeqNum = inst_seq;
1176
1177                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
1178                        tid, inst_seq);
1179                continue;
1180            } else if (delaySlotInfo[tid].numInsts > 0) {
1181                --delaySlotInfo[tid].numInsts;
1182
1183                // It's OK to set PC to target of branch
1184                if (delaySlotInfo[tid].numInsts == 0) {
1185                    delaySlotInfo[tid].targetReady = true;
1186
1187                    // Break the looping condition
1188                    predicted_branch = true;
1189                }
1190
1191                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
1192                        " process.\n", tid, delaySlotInfo[tid].numInsts);
1193            }
1194#endif
1195        }
1196
1197        if (offset >= cacheBlkSize) {
1198            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
1199                    "block.\n", tid);
1200        } else if (numInst >= fetchWidth) {
1201            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1202                    "for this cycle.\n", tid);
1203        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
1204            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1205                    "instruction encountered.\n", tid);
1206        }
1207    }
1208
1209    if (numInst > 0) {
1210        wroteToTimeBuffer = true;
1211    }
1212
1213    // Now that fetching is completed, update the PC to signify what the next
1214    // cycle will be.
1215    if (fault == NoFault) {
1216#if ISA_HAS_DELAY_SLOT
1217        if (delaySlotInfo[tid].targetReady &&
1218            delaySlotInfo[tid].numInsts == 0) {
1219            // Set PC to target
1220            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
1221            nextPC[tid] = next_PC + instSize;        //next_NPC
1222            nextNPC[tid] = next_PC + (2 * instSize);
1223
1224            delaySlotInfo[tid].targetReady = false;
1225        } else {
1226            PC[tid] = next_PC;
1227            nextPC[tid] = next_NPC;
1228            nextNPC[tid] = next_NPC + instSize;
1229        }
1230
1231        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
1232#else
1233        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
1234        PC[tid] = next_PC;
1235        nextPC[tid] = next_PC + instSize;
1236#endif
1237    } else {
1238        // We shouldn't be in an icache miss and also have a fault (an ITB
1239        // miss)
1240        if (fetchStatus[tid] == IcacheWaitResponse) {
1241            panic("Fetch should have exited prior to this!");
1242        }
1243
1244        // Send the fault to commit.  This thread will not do anything
1245        // until commit handles the fault.  The only other way it can
1246        // wake up is if a squash comes along and changes the PC.
1247#if FULL_SYSTEM
1248        assert(numInst != fetchWidth);
1249        // Get a sequence number.
1250        inst_seq = cpu->getAndIncrementInstSeq();
1251        // We will use a nop in order to carry the fault.
1252        ext_inst = TheISA::NoopMachInst;
1253
1254        // Create a new DynInst from the dummy nop.
1255        DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
1256                                             next_PC,
1257                                             inst_seq, cpu);
1258        instruction->setPredTarg(next_PC + instSize);
1259        instruction->setTid(tid);
1260
1261        instruction->setASID(tid);
1262
1263        instruction->setThreadState(cpu->thread[tid]);
1264
1265        instruction->traceData = NULL;
1266
1267        instruction->setInstListIt(cpu->addInst(instruction));
1268
1269        instruction->fault = fault;
1270
1271        toDecode->insts[numInst] = instruction;
1272        toDecode->size++;
1273
1274        DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
1275
1276        fetchStatus[tid] = TrapPending;
1277        status_change = true;
1278#else // !FULL_SYSTEM
1279        fetchStatus[tid] = TrapPending;
1280        status_change = true;
1281
1282#endif // FULL_SYSTEM
1283        DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %08p",
1284                tid, fault->name(), PC[tid]);
1285    }
1286}
1287
1288template<class Impl>
1289void
1290DefaultFetch<Impl>::recvRetry()
1291{
1292    if (retryPkt != NULL) {
1293        assert(cacheBlocked);
1294        assert(retryTid != -1);
1295        assert(fetchStatus[retryTid] == IcacheWaitRetry);
1296
1297        if (icachePort->sendTiming(retryPkt)) {
1298            fetchStatus[retryTid] = IcacheWaitResponse;
1299            retryPkt = NULL;
1300            retryTid = -1;
1301            cacheBlocked = false;
1302        }
1303    } else {
1304        assert(retryTid == -1);
1305        // Access has been squashed since it was sent out.  Just clear
1306        // the cache being blocked.
1307        cacheBlocked = false;
1308    }
1309}
1310
1311///////////////////////////////////////
1312//                                   //
1313//  SMT FETCH POLICY MAINTAINED HERE //
1314//                                   //
1315///////////////////////////////////////
1316template<class Impl>
1317int
1318DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1319{
1320    if (numThreads > 1) {
1321        switch (fetch_priority) {
1322
1323          case SingleThread:
1324            return 0;
1325
1326          case RoundRobin:
1327            return roundRobin();
1328
1329          case IQ:
1330            return iqCount();
1331
1332          case LSQ:
1333            return lsqCount();
1334
1335          case Branch:
1336            return branchCount();
1337
1338          default:
1339            return -1;
1340        }
1341    } else {
1342        int tid = *((*activeThreads).begin());
1343
1344        if (fetchStatus[tid] == Running ||
1345            fetchStatus[tid] == IcacheAccessComplete ||
1346            fetchStatus[tid] == Idle) {
1347            return tid;
1348        } else {
1349            return -1;
1350        }
1351    }
1352
1353}
1354
1355
1356template<class Impl>
1357int
1358DefaultFetch<Impl>::roundRobin()
1359{
1360    std::list<unsigned>::iterator pri_iter = priorityList.begin();
1361    std::list<unsigned>::iterator end      = priorityList.end();
1362
1363    int high_pri;
1364
1365    while (pri_iter != end) {
1366        high_pri = *pri_iter;
1367
1368        assert(high_pri <= numThreads);
1369
1370        if (fetchStatus[high_pri] == Running ||
1371            fetchStatus[high_pri] == IcacheAccessComplete ||
1372            fetchStatus[high_pri] == Idle) {
1373
1374            priorityList.erase(pri_iter);
1375            priorityList.push_back(high_pri);
1376
1377            return high_pri;
1378        }
1379
1380        pri_iter++;
1381    }
1382
1383    return -1;
1384}
1385
1386template<class Impl>
1387int
1388DefaultFetch<Impl>::iqCount()
1389{
1390    std::priority_queue<unsigned> PQ;
1391
1392    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1393
1394    while (threads != (*activeThreads).end()) {
1395        unsigned tid = *threads++;
1396
1397        PQ.push(fromIEW->iewInfo[tid].iqCount);
1398    }
1399
1400    while (!PQ.empty()) {
1401
1402        unsigned high_pri = PQ.top();
1403
1404        if (fetchStatus[high_pri] == Running ||
1405            fetchStatus[high_pri] == IcacheAccessComplete ||
1406            fetchStatus[high_pri] == Idle)
1407            return high_pri;
1408        else
1409            PQ.pop();
1410
1411    }
1412
1413    return -1;
1414}
1415
1416template<class Impl>
1417int
1418DefaultFetch<Impl>::lsqCount()
1419{
1420    std::priority_queue<unsigned> PQ;
1421
1422
1423    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1424
1425    while (threads != (*activeThreads).end()) {
1426        unsigned tid = *threads++;
1427
1428        PQ.push(fromIEW->iewInfo[tid].ldstqCount);
1429    }
1430
1431    while (!PQ.empty()) {
1432
1433        unsigned high_pri = PQ.top();
1434
1435        if (fetchStatus[high_pri] == Running ||
1436            fetchStatus[high_pri] == IcacheAccessComplete ||
1437            fetchStatus[high_pri] == Idle)
1438            return high_pri;
1439        else
1440            PQ.pop();
1441
1442    }
1443
1444    return -1;
1445}
1446
1447template<class Impl>
1448int
1449DefaultFetch<Impl>::branchCount()
1450{
1451    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1452    panic("Branch Count Fetch policy unimplemented\n");
1453    return *threads;
1454}
1455