fetch_impl.hh revision 3536:89aa06409e4d
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 *          Korey Sewell
30 */
31
32#include "config/use_checker.hh"
33
34#include "arch/isa_traits.hh"
35#include "arch/utility.hh"
36#include "cpu/checker/cpu.hh"
37#include "cpu/exetrace.hh"
38#include "cpu/o3/fetch.hh"
39#include "mem/packet.hh"
40#include "mem/request.hh"
41#include "sim/byteswap.hh"
42#include "sim/host.hh"
43#include "sim/root.hh"
44
45#if FULL_SYSTEM
46#include "arch/tlb.hh"
47#include "arch/vtophys.hh"
48#include "sim/system.hh"
49#endif // FULL_SYSTEM
50
51#include <algorithm>
52
53template<class Impl>
54Tick
55DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
56{
57    panic("DefaultFetch doesn't expect recvAtomic callback!");
58    return curTick;
59}
60
61template<class Impl>
62void
63DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
64{
65    warn("Default fetch doesn't update it's state from a functional call.");
66}
67
68template<class Impl>
69void
70DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
71{
72    if (status == RangeChange)
73        return;
74
75    panic("DefaultFetch doesn't expect recvStatusChange callback!");
76}
77
78template<class Impl>
79bool
80DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
81{
82    if (pkt->isResponse()) {
83        fetch->processCacheCompletion(pkt);
84    }
85    //else Snooped a coherence request, just return
86    return true;
87}
88
89template<class Impl>
90void
91DefaultFetch<Impl>::IcachePort::recvRetry()
92{
93    fetch->recvRetry();
94}
95
96template<class Impl>
97DefaultFetch<Impl>::DefaultFetch(Params *params)
98    : branchPred(params),
99      decodeToFetchDelay(params->decodeToFetchDelay),
100      renameToFetchDelay(params->renameToFetchDelay),
101      iewToFetchDelay(params->iewToFetchDelay),
102      commitToFetchDelay(params->commitToFetchDelay),
103      fetchWidth(params->fetchWidth),
104      cacheBlocked(false),
105      retryPkt(NULL),
106      retryTid(-1),
107      numThreads(params->numberOfThreads),
108      numFetchingThreads(params->smtNumFetchingThreads),
109      interruptPending(false),
110      drainPending(false),
111      switchedOut(false)
112{
113    if (numThreads > Impl::MaxThreads)
114        fatal("numThreads is not a valid value\n");
115
116    // Set fetch stage's status to inactive.
117    _status = Inactive;
118
119    std::string policy = params->smtFetchPolicy;
120
121    // Convert string to lowercase
122    std::transform(policy.begin(), policy.end(), policy.begin(),
123                   (int(*)(int)) tolower);
124
125    // Figure out fetch policy
126    if (policy == "singlethread") {
127        fetchPolicy = SingleThread;
128        if (numThreads > 1)
129            panic("Invalid Fetch Policy for a SMT workload.");
130    } else if (policy == "roundrobin") {
131        fetchPolicy = RoundRobin;
132        DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
133    } else if (policy == "branch") {
134        fetchPolicy = Branch;
135        DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
136    } else if (policy == "iqcount") {
137        fetchPolicy = IQ;
138        DPRINTF(Fetch, "Fetch policy set to IQ count\n");
139    } else if (policy == "lsqcount") {
140        fetchPolicy = LSQ;
141        DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
142    } else {
143        fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
144              " RoundRobin,LSQcount,IQcount}\n");
145    }
146
147    // Size of cache block.
148    cacheBlkSize = 64;
149
150    // Create mask to get rid of offset bits.
151    cacheBlkMask = (cacheBlkSize - 1);
152
153    for (int tid=0; tid < numThreads; tid++) {
154
155        fetchStatus[tid] = Running;
156
157        priorityList.push_back(tid);
158
159        memReq[tid] = NULL;
160
161        // Create space to store a cache line.
162        cacheData[tid] = new uint8_t[cacheBlkSize];
163        cacheDataPC[tid] = 0;
164        cacheDataValid[tid] = false;
165
166        delaySlotInfo[tid].branchSeqNum = -1;
167        delaySlotInfo[tid].numInsts = 0;
168        delaySlotInfo[tid].targetAddr = 0;
169        delaySlotInfo[tid].targetReady = false;
170
171        stalls[tid].decode = false;
172        stalls[tid].rename = false;
173        stalls[tid].iew = false;
174        stalls[tid].commit = false;
175    }
176
177    // Get the size of an instruction.
178    instSize = sizeof(TheISA::MachInst);
179}
180
181template <class Impl>
182std::string
183DefaultFetch<Impl>::name() const
184{
185    return cpu->name() + ".fetch";
186}
187
188template <class Impl>
189void
190DefaultFetch<Impl>::regStats()
191{
192    icacheStallCycles
193        .name(name() + ".icacheStallCycles")
194        .desc("Number of cycles fetch is stalled on an Icache miss")
195        .prereq(icacheStallCycles);
196
197    fetchedInsts
198        .name(name() + ".Insts")
199        .desc("Number of instructions fetch has processed")
200        .prereq(fetchedInsts);
201
202    fetchedBranches
203        .name(name() + ".Branches")
204        .desc("Number of branches that fetch encountered")
205        .prereq(fetchedBranches);
206
207    predictedBranches
208        .name(name() + ".predictedBranches")
209        .desc("Number of branches that fetch has predicted taken")
210        .prereq(predictedBranches);
211
212    fetchCycles
213        .name(name() + ".Cycles")
214        .desc("Number of cycles fetch has run and was not squashing or"
215              " blocked")
216        .prereq(fetchCycles);
217
218    fetchSquashCycles
219        .name(name() + ".SquashCycles")
220        .desc("Number of cycles fetch has spent squashing")
221        .prereq(fetchSquashCycles);
222
223    fetchIdleCycles
224        .name(name() + ".IdleCycles")
225        .desc("Number of cycles fetch was idle")
226        .prereq(fetchIdleCycles);
227
228    fetchBlockedCycles
229        .name(name() + ".BlockedCycles")
230        .desc("Number of cycles fetch has spent blocked")
231        .prereq(fetchBlockedCycles);
232
233    fetchedCacheLines
234        .name(name() + ".CacheLines")
235        .desc("Number of cache lines fetched")
236        .prereq(fetchedCacheLines);
237
238    fetchMiscStallCycles
239        .name(name() + ".MiscStallCycles")
240        .desc("Number of cycles fetch has spent waiting on interrupts, or "
241              "bad addresses, or out of MSHRs")
242        .prereq(fetchMiscStallCycles);
243
244    fetchIcacheSquashes
245        .name(name() + ".IcacheSquashes")
246        .desc("Number of outstanding Icache misses that were squashed")
247        .prereq(fetchIcacheSquashes);
248
249    fetchNisnDist
250        .init(/* base value */ 0,
251              /* last value */ fetchWidth,
252              /* bucket size */ 1)
253        .name(name() + ".rateDist")
254        .desc("Number of instructions fetched each cycle (Total)")
255        .flags(Stats::pdf);
256
257    idleRate
258        .name(name() + ".idleRate")
259        .desc("Percent of cycles fetch was idle")
260        .prereq(idleRate);
261    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
262
263    branchRate
264        .name(name() + ".branchRate")
265        .desc("Number of branch fetches per cycle")
266        .flags(Stats::total);
267    branchRate = fetchedBranches / cpu->numCycles;
268
269    fetchRate
270        .name(name() + ".rate")
271        .desc("Number of inst fetches per cycle")
272        .flags(Stats::total);
273    fetchRate = fetchedInsts / cpu->numCycles;
274
275    branchPred.regStats();
276}
277
278template<class Impl>
279void
280DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
281{
282    DPRINTF(Fetch, "Setting the CPU pointer.\n");
283    cpu = cpu_ptr;
284
285    // Name is finally available, so create the port.
286    icachePort = new IcachePort(this);
287
288#if USE_CHECKER
289    if (cpu->checker) {
290        cpu->checker->setIcachePort(icachePort);
291    }
292#endif
293
294    // Schedule fetch to get the correct PC from the CPU
295    // scheduleFetchStartupEvent(1);
296
297    // Fetch needs to start fetching instructions at the very beginning,
298    // so it must start up in active state.
299    switchToActive();
300}
301
302template<class Impl>
303void
304DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
305{
306    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
307    timeBuffer = time_buffer;
308
309    // Create wires to get information from proper places in time buffer.
310    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
311    fromRename = timeBuffer->getWire(-renameToFetchDelay);
312    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
313    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
314}
315
316template<class Impl>
317void
318DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
319{
320    DPRINTF(Fetch, "Setting active threads list pointer.\n");
321    activeThreads = at_ptr;
322}
323
324template<class Impl>
325void
326DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
327{
328    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
329    fetchQueue = fq_ptr;
330
331    // Create wire to write information to proper place in fetch queue.
332    toDecode = fetchQueue->getWire(0);
333}
334
335template<class Impl>
336void
337DefaultFetch<Impl>::initStage()
338{
339    // Setup PC and nextPC with initial state.
340    for (int tid = 0; tid < numThreads; tid++) {
341        PC[tid] = cpu->readPC(tid);
342        nextPC[tid] = cpu->readNextPC(tid);
343#if ISA_HAS_DELAY_SLOT
344        nextNPC[tid] = cpu->readNextNPC(tid);
345#endif
346    }
347}
348
349template<class Impl>
350void
351DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
352{
353    unsigned tid = pkt->req->getThreadNum();
354
355    DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
356
357    // Only change the status if it's still waiting on the icache access
358    // to return.
359    if (fetchStatus[tid] != IcacheWaitResponse ||
360        pkt->req != memReq[tid] ||
361        isSwitchedOut()) {
362        ++fetchIcacheSquashes;
363        delete pkt->req;
364        delete pkt;
365        return;
366    }
367
368    memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
369    cacheDataValid[tid] = true;
370
371    if (!drainPending) {
372        // Wake up the CPU (if it went to sleep and was waiting on
373        // this completion event).
374        cpu->wakeCPU();
375
376        DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
377                tid);
378
379        switchToActive();
380    }
381
382    // Only switch to IcacheAccessComplete if we're not stalled as well.
383    if (checkStall(tid)) {
384        fetchStatus[tid] = Blocked;
385    } else {
386        fetchStatus[tid] = IcacheAccessComplete;
387    }
388
389    // Reset the mem req to NULL.
390    delete pkt->req;
391    delete pkt;
392    memReq[tid] = NULL;
393}
394
395template <class Impl>
396bool
397DefaultFetch<Impl>::drain()
398{
399    // Fetch is ready to drain at any time.
400    cpu->signalDrained();
401    drainPending = true;
402    return true;
403}
404
405template <class Impl>
406void
407DefaultFetch<Impl>::resume()
408{
409    drainPending = false;
410}
411
412template <class Impl>
413void
414DefaultFetch<Impl>::switchOut()
415{
416    switchedOut = true;
417    // Branch predictor needs to have its state cleared.
418    branchPred.switchOut();
419}
420
421template <class Impl>
422void
423DefaultFetch<Impl>::takeOverFrom()
424{
425    // Reset all state
426    for (int i = 0; i < Impl::MaxThreads; ++i) {
427        stalls[i].decode = 0;
428        stalls[i].rename = 0;
429        stalls[i].iew = 0;
430        stalls[i].commit = 0;
431        PC[i] = cpu->readPC(i);
432        nextPC[i] = cpu->readNextPC(i);
433#if ISA_HAS_DELAY_SLOT
434        nextNPC[i] = cpu->readNextNPC(i);
435        delaySlotInfo[i].branchSeqNum = -1;
436        delaySlotInfo[i].numInsts = 0;
437        delaySlotInfo[i].targetAddr = 0;
438        delaySlotInfo[i].targetReady = false;
439#endif
440        fetchStatus[i] = Running;
441    }
442    numInst = 0;
443    wroteToTimeBuffer = false;
444    _status = Inactive;
445    switchedOut = false;
446    interruptPending = false;
447    branchPred.takeOverFrom();
448}
449
450template <class Impl>
451void
452DefaultFetch<Impl>::wakeFromQuiesce()
453{
454    DPRINTF(Fetch, "Waking up from quiesce\n");
455    // Hopefully this is safe
456    // @todo: Allow other threads to wake from quiesce.
457    fetchStatus[0] = Running;
458}
459
460template <class Impl>
461inline void
462DefaultFetch<Impl>::switchToActive()
463{
464    if (_status == Inactive) {
465        DPRINTF(Activity, "Activating stage.\n");
466
467        cpu->activateStage(O3CPU::FetchIdx);
468
469        _status = Active;
470    }
471}
472
473template <class Impl>
474inline void
475DefaultFetch<Impl>::switchToInactive()
476{
477    if (_status == Active) {
478        DPRINTF(Activity, "Deactivating stage.\n");
479
480        cpu->deactivateStage(O3CPU::FetchIdx);
481
482        _status = Inactive;
483    }
484}
485
486template <class Impl>
487bool
488DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
489                                          Addr &next_NPC)
490{
491    // Do branch prediction check here.
492    // A bit of a misnomer...next_PC is actually the current PC until
493    // this function updates it.
494    bool predict_taken;
495
496    if (!inst->isControl()) {
497#if ISA_HAS_DELAY_SLOT
498        Addr cur_PC = next_PC;
499        next_PC  = cur_PC + instSize;      //next_NPC;
500        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
501        inst->setPredTarg(next_NPC);
502#else
503        next_PC = next_PC + instSize;
504        inst->setPredTarg(next_PC);
505#endif
506        return false;
507    }
508
509    int tid = inst->threadNumber;
510#if ISA_HAS_DELAY_SLOT
511    Addr pred_PC = next_PC;
512    predict_taken = branchPred.predict(inst, pred_PC, tid);
513
514    if (predict_taken) {
515        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
516    } else {
517        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
518    }
519
520    if (predict_taken) {
521        next_PC = next_NPC;
522        next_NPC = pred_PC;
523
524        // Update delay slot info
525        ++delaySlotInfo[tid].numInsts;
526        delaySlotInfo[tid].targetAddr = pred_PC;
527        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
528                delaySlotInfo[tid].numInsts);
529    } else { // !predict_taken
530        if (inst->isCondDelaySlot()) {
531            next_PC = pred_PC;
532            // The delay slot is skipped here if there is on
533            // prediction
534        } else {
535            next_PC = next_NPC;
536            // No need to declare a delay slot here since
537            // there is no for the pred. target to jump
538        }
539
540        next_NPC = next_NPC + instSize;
541    }
542#else
543    predict_taken = branchPred.predict(inst, next_PC, tid);
544#endif
545
546    ++fetchedBranches;
547
548    if (predict_taken) {
549        ++predictedBranches;
550    }
551
552    return predict_taken;
553}
554
555template <class Impl>
556bool
557DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
558{
559    Fault fault = NoFault;
560
561    //AlphaDep
562    if (cacheBlocked || isSwitchedOut() ||
563            (interruptPending && (fetch_PC & 0x3))) {
564        // Hold off fetch from getting new instructions when:
565        // Cache is blocked, or
566        // while an interrupt is pending and we're not in PAL mode, or
567        // fetch is switched out.
568        return false;
569    }
570
571    // Align the fetch PC so it's at the start of a cache block.
572    fetch_PC = icacheBlockAlignPC(fetch_PC);
573
574    // If we've already got the block, no need to try to fetch it again.
575    if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
576        return true;
577    }
578
579    // Setup the memReq to do a read of the first instruction's address.
580    // Set the appropriate read size and flags as well.
581    // Build request here.
582    RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, 0,
583                                     fetch_PC, cpu->readCpuId(), tid);
584
585    memReq[tid] = mem_req;
586
587    // Translate the instruction request.
588    fault = cpu->translateInstReq(mem_req, cpu->thread[tid]);
589
590    // In the case of faults, the fetch stage may need to stall and wait
591    // for the ITB miss to be handled.
592
593    // If translation was successful, attempt to read the first
594    // instruction.
595    if (fault == NoFault) {
596#if 0
597        if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
598            memReq[tid]->isUncacheable()) {
599            DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
600                    "misspeculating path)!",
601                    memReq[tid]->paddr);
602            ret_fault = TheISA::genMachineCheckFault();
603            return false;
604        }
605#endif
606
607        // Build packet here.
608        PacketPtr data_pkt = new Packet(mem_req,
609                                        Packet::ReadReq, Packet::Broadcast);
610        data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
611
612        cacheDataPC[tid] = fetch_PC;
613        cacheDataValid[tid] = false;
614
615        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
616
617        fetchedCacheLines++;
618
619        // Now do the timing access to see whether or not the instruction
620        // exists within the cache.
621        if (!icachePort->sendTiming(data_pkt)) {
622            if (data_pkt->result == Packet::BadAddress) {
623                fault = TheISA::genMachineCheckFault();
624                delete mem_req;
625                memReq[tid] = NULL;
626            }
627            assert(retryPkt == NULL);
628            assert(retryTid == -1);
629            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
630            fetchStatus[tid] = IcacheWaitRetry;
631            retryPkt = data_pkt;
632            retryTid = tid;
633            cacheBlocked = true;
634            return false;
635        }
636
637        DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
638
639        lastIcacheStall[tid] = curTick;
640
641        DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
642                "response.\n", tid);
643
644        fetchStatus[tid] = IcacheWaitResponse;
645    } else {
646        delete mem_req;
647        memReq[tid] = NULL;
648    }
649
650    ret_fault = fault;
651    return true;
652}
653
654template <class Impl>
655inline void
656DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
657{
658    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
659            tid, new_PC);
660
661    PC[tid] = new_PC;
662    nextPC[tid] = new_PC + instSize;
663    nextNPC[tid] = new_PC + (2 * instSize);
664
665    // Clear the icache miss if it's outstanding.
666    if (fetchStatus[tid] == IcacheWaitResponse) {
667        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
668                tid);
669        memReq[tid] = NULL;
670    }
671
672    // Get rid of the retrying packet if it was from this thread.
673    if (retryTid == tid) {
674        assert(cacheBlocked);
675        cacheBlocked = false;
676        retryTid = -1;
677        delete retryPkt->req;
678        delete retryPkt;
679        retryPkt = NULL;
680    }
681
682    fetchStatus[tid] = Squashing;
683
684    ++fetchSquashCycles;
685}
686
687template<class Impl>
688void
689DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
690                                     const InstSeqNum &seq_num,
691                                     unsigned tid)
692{
693    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
694
695    doSquash(new_PC, tid);
696
697#if ISA_HAS_DELAY_SLOT
698    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
699        delaySlotInfo[tid].numInsts = 0;
700        delaySlotInfo[tid].targetAddr = 0;
701        delaySlotInfo[tid].targetReady = false;
702    }
703#endif
704
705    // Tell the CPU to remove any instructions that are in flight between
706    // fetch and decode.
707    cpu->removeInstsUntil(seq_num, tid);
708}
709
710template<class Impl>
711bool
712DefaultFetch<Impl>::checkStall(unsigned tid) const
713{
714    bool ret_val = false;
715
716    if (cpu->contextSwitch) {
717        DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
718        ret_val = true;
719    } else if (stalls[tid].decode) {
720        DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
721        ret_val = true;
722    } else if (stalls[tid].rename) {
723        DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
724        ret_val = true;
725    } else if (stalls[tid].iew) {
726        DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
727        ret_val = true;
728    } else if (stalls[tid].commit) {
729        DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
730        ret_val = true;
731    }
732
733    return ret_val;
734}
735
736template<class Impl>
737typename DefaultFetch<Impl>::FetchStatus
738DefaultFetch<Impl>::updateFetchStatus()
739{
740    //Check Running
741    std::list<unsigned>::iterator threads = (*activeThreads).begin();
742
743    while (threads != (*activeThreads).end()) {
744
745        unsigned tid = *threads++;
746
747        if (fetchStatus[tid] == Running ||
748            fetchStatus[tid] == Squashing ||
749            fetchStatus[tid] == IcacheAccessComplete) {
750
751            if (_status == Inactive) {
752                DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
753
754                if (fetchStatus[tid] == IcacheAccessComplete) {
755                    DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
756                            "completion\n",tid);
757                }
758
759                cpu->activateStage(O3CPU::FetchIdx);
760            }
761
762            return Active;
763        }
764    }
765
766    // Stage is switching from active to inactive, notify CPU of it.
767    if (_status == Active) {
768        DPRINTF(Activity, "Deactivating stage.\n");
769
770        cpu->deactivateStage(O3CPU::FetchIdx);
771    }
772
773    return Inactive;
774}
775
776template <class Impl>
777void
778DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
779                           bool squash_delay_slot, unsigned tid)
780{
781    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
782
783    doSquash(new_PC, tid);
784
785#if ISA_HAS_DELAY_SLOT
786    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
787        delaySlotInfo[tid].numInsts = 0;
788        delaySlotInfo[tid].targetAddr = 0;
789        delaySlotInfo[tid].targetReady = false;
790    }
791
792    // Tell the CPU to remove any instructions that are not in the ROB.
793    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
794#else
795    // Tell the CPU to remove any instructions that are not in the ROB.
796    cpu->removeInstsNotInROB(tid, true, 0);
797#endif
798}
799
800template <class Impl>
801void
802DefaultFetch<Impl>::tick()
803{
804    std::list<unsigned>::iterator threads = (*activeThreads).begin();
805    bool status_change = false;
806
807    wroteToTimeBuffer = false;
808
809    while (threads != (*activeThreads).end()) {
810        unsigned tid = *threads++;
811
812        // Check the signals for each thread to determine the proper status
813        // for each thread.
814        bool updated_status = checkSignalsAndUpdate(tid);
815        status_change =  status_change || updated_status;
816    }
817
818    DPRINTF(Fetch, "Running stage.\n");
819
820    // Reset the number of the instruction we're fetching.
821    numInst = 0;
822
823#if FULL_SYSTEM
824    if (fromCommit->commitInfo[0].interruptPending) {
825        interruptPending = true;
826    }
827
828    if (fromCommit->commitInfo[0].clearInterrupt) {
829        interruptPending = false;
830    }
831#endif
832
833    for (threadFetched = 0; threadFetched < numFetchingThreads;
834         threadFetched++) {
835        // Fetch each of the actively fetching threads.
836        fetch(status_change);
837    }
838
839    // Record number of instructions fetched this cycle for distribution.
840    fetchNisnDist.sample(numInst);
841
842    if (status_change) {
843        // Change the fetch stage status if there was a status change.
844        _status = updateFetchStatus();
845    }
846
847    // If there was activity this cycle, inform the CPU of it.
848    if (wroteToTimeBuffer || cpu->contextSwitch) {
849        DPRINTF(Activity, "Activity this cycle.\n");
850
851        cpu->activityThisCycle();
852    }
853}
854
855template <class Impl>
856bool
857DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
858{
859    // Update the per thread stall statuses.
860    if (fromDecode->decodeBlock[tid]) {
861        stalls[tid].decode = true;
862    }
863
864    if (fromDecode->decodeUnblock[tid]) {
865        assert(stalls[tid].decode);
866        assert(!fromDecode->decodeBlock[tid]);
867        stalls[tid].decode = false;
868    }
869
870    if (fromRename->renameBlock[tid]) {
871        stalls[tid].rename = true;
872    }
873
874    if (fromRename->renameUnblock[tid]) {
875        assert(stalls[tid].rename);
876        assert(!fromRename->renameBlock[tid]);
877        stalls[tid].rename = false;
878    }
879
880    if (fromIEW->iewBlock[tid]) {
881        stalls[tid].iew = true;
882    }
883
884    if (fromIEW->iewUnblock[tid]) {
885        assert(stalls[tid].iew);
886        assert(!fromIEW->iewBlock[tid]);
887        stalls[tid].iew = false;
888    }
889
890    if (fromCommit->commitBlock[tid]) {
891        stalls[tid].commit = true;
892    }
893
894    if (fromCommit->commitUnblock[tid]) {
895        assert(stalls[tid].commit);
896        assert(!fromCommit->commitBlock[tid]);
897        stalls[tid].commit = false;
898    }
899
900    // Check squash signals from commit.
901    if (fromCommit->commitInfo[tid].squash) {
902
903        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
904                "from commit.\n",tid);
905
906#if ISA_HAS_DELAY_SLOT
907    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
908#else
909    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
910#endif
911        // In any case, squash.
912        squash(fromCommit->commitInfo[tid].nextPC,
913               doneSeqNum,
914               fromCommit->commitInfo[tid].squashDelaySlot,
915               tid);
916
917        // Also check if there's a mispredict that happened.
918        if (fromCommit->commitInfo[tid].branchMispredict) {
919            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
920                              fromCommit->commitInfo[tid].nextPC,
921                              fromCommit->commitInfo[tid].branchTaken,
922                              tid);
923        } else {
924            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
925                              tid);
926        }
927
928        return true;
929    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
930        // Update the branch predictor if it wasn't a squashed instruction
931        // that was broadcasted.
932        branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
933    }
934
935    // Check ROB squash signals from commit.
936    if (fromCommit->commitInfo[tid].robSquashing) {
937        DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
938
939        // Continue to squash.
940        fetchStatus[tid] = Squashing;
941
942        return true;
943    }
944
945    // Check squash signals from decode.
946    if (fromDecode->decodeInfo[tid].squash) {
947        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
948                "from decode.\n",tid);
949
950        // Update the branch predictor.
951        if (fromDecode->decodeInfo[tid].branchMispredict) {
952            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
953                              fromDecode->decodeInfo[tid].nextPC,
954                              fromDecode->decodeInfo[tid].branchTaken,
955                              tid);
956        } else {
957            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
958                              tid);
959        }
960
961        if (fetchStatus[tid] != Squashing) {
962
963#if ISA_HAS_DELAY_SLOT
964            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
965#else
966            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
967#endif
968            // Squash unless we're already squashing
969            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
970                             doneSeqNum,
971                             tid);
972
973            return true;
974        }
975    }
976
977    if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
978        DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
979
980        fetchStatus[tid] = Blocked;
981
982        return true;
983    }
984
985    if (fetchStatus[tid] == Blocked ||
986        fetchStatus[tid] == Squashing) {
987        // Switch status to running if fetch isn't being told to block or
988        // squash this cycle.
989        DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
990                tid);
991
992        fetchStatus[tid] = Running;
993
994        return true;
995    }
996
997    // If we've reached this point, we have not gotten any signals that
998    // cause fetch to change its status.  Fetch remains the same as before.
999    return false;
1000}
1001
1002template<class Impl>
1003void
1004DefaultFetch<Impl>::fetch(bool &status_change)
1005{
1006    //////////////////////////////////////////
1007    // Start actual fetch
1008    //////////////////////////////////////////
1009    int tid = getFetchingThread(fetchPolicy);
1010
1011    if (tid == -1 || drainPending) {
1012        DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1013
1014        // Breaks looping condition in tick()
1015        threadFetched = numFetchingThreads;
1016        return;
1017    }
1018
1019    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1020
1021    // The current PC.
1022    Addr &fetch_PC = PC[tid];
1023
1024    // Fault code for memory access.
1025    Fault fault = NoFault;
1026
1027    // If returning from the delay of a cache miss, then update the status
1028    // to running, otherwise do the cache access.  Possibly move this up
1029    // to tick() function.
1030    if (fetchStatus[tid] == IcacheAccessComplete) {
1031        DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
1032                tid);
1033
1034        fetchStatus[tid] = Running;
1035        status_change = true;
1036    } else if (fetchStatus[tid] == Running) {
1037        DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1038                "instruction, starting at PC %08p.\n",
1039                tid, fetch_PC);
1040
1041        bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
1042        if (!fetch_success) {
1043            if (cacheBlocked) {
1044                ++icacheStallCycles;
1045            } else {
1046                ++fetchMiscStallCycles;
1047            }
1048            return;
1049        }
1050    } else {
1051        if (fetchStatus[tid] == Idle) {
1052            ++fetchIdleCycles;
1053        } else if (fetchStatus[tid] == Blocked) {
1054            ++fetchBlockedCycles;
1055        } else if (fetchStatus[tid] == Squashing) {
1056            ++fetchSquashCycles;
1057        } else if (fetchStatus[tid] == IcacheWaitResponse) {
1058            ++icacheStallCycles;
1059        }
1060
1061        // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
1062        // fetch should do nothing.
1063        return;
1064    }
1065
1066    ++fetchCycles;
1067
1068    // If we had a stall due to an icache miss, then return.
1069    if (fetchStatus[tid] == IcacheWaitResponse) {
1070        ++icacheStallCycles;
1071        status_change = true;
1072        return;
1073    }
1074
1075    Addr next_PC = fetch_PC;
1076    Addr next_NPC = next_PC + instSize;
1077    InstSeqNum inst_seq;
1078    MachInst inst;
1079    ExtMachInst ext_inst;
1080    // @todo: Fix this hack.
1081    unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
1082
1083    if (fault == NoFault) {
1084        // If the read of the first instruction was successful, then grab the
1085        // instructions from the rest of the cache line and put them into the
1086        // queue heading to decode.
1087
1088        DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1089                "decode.\n",tid);
1090
1091        // Need to keep track of whether or not a predicted branch
1092        // ended this fetch block.
1093        bool predicted_branch = false;
1094
1095        // Need to keep track of whether or not a delay slot
1096        // instruction has been fetched
1097
1098        for (;
1099             offset < cacheBlkSize &&
1100                 numInst < fetchWidth &&
1101                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
1102             ++numInst) {
1103
1104            // Get a sequence number.
1105            inst_seq = cpu->getAndIncrementInstSeq();
1106
1107            // Make sure this is a valid index.
1108            assert(offset <= cacheBlkSize - instSize);
1109
1110            // Get the instruction from the array of the cache line.
1111            inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
1112                        (&cacheData[tid][offset]));
1113
1114#if THE_ISA == ALPHA_ISA
1115            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
1116#elif THE_ISA == SPARC_ISA
1117            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
1118#endif
1119
1120            // Create a new DynInst from the instruction fetched.
1121            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
1122                                                 next_PC,
1123                                                 inst_seq, cpu);
1124            instruction->setTid(tid);
1125
1126            instruction->setASID(tid);
1127
1128            instruction->setThreadState(cpu->thread[tid]);
1129
1130            DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
1131                    "[sn:%lli]\n",
1132                    tid, instruction->readPC(), inst_seq);
1133
1134            DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
1135                    tid, instruction->staticInst->disassemble(fetch_PC));
1136
1137            instruction->traceData =
1138                Trace::getInstRecord(curTick, cpu->tcBase(tid),
1139                                     instruction->staticInst,
1140                                     instruction->readPC());
1141
1142            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
1143                                                     next_NPC);
1144
1145            // Add instruction to the CPU's list of instructions.
1146            instruction->setInstListIt(cpu->addInst(instruction));
1147
1148            // Write the instruction to the first slot in the queue
1149            // that heads to decode.
1150            toDecode->insts[numInst] = instruction;
1151
1152            toDecode->size++;
1153
1154            // Increment stat of fetched instructions.
1155            ++fetchedInsts;
1156
1157            // Move to the next instruction, unless we have a branch.
1158            fetch_PC = next_PC;
1159
1160            if (instruction->isQuiesce()) {
1161//                warn("%lli: Quiesce instruction encountered, halting fetch!",
1162//                     curTick);
1163                fetchStatus[tid] = QuiescePending;
1164                ++numInst;
1165                status_change = true;
1166                break;
1167            }
1168
1169            offset += instSize;
1170
1171#if ISA_HAS_DELAY_SLOT
1172            if (predicted_branch) {
1173                delaySlotInfo[tid].branchSeqNum = inst_seq;
1174
1175                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
1176                        tid, inst_seq);
1177                continue;
1178            } else if (delaySlotInfo[tid].numInsts > 0) {
1179                --delaySlotInfo[tid].numInsts;
1180
1181                // It's OK to set PC to target of branch
1182                if (delaySlotInfo[tid].numInsts == 0) {
1183                    delaySlotInfo[tid].targetReady = true;
1184
1185                    // Break the looping condition
1186                    predicted_branch = true;
1187                }
1188
1189                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
1190                        " process.\n", tid, delaySlotInfo[tid].numInsts);
1191            }
1192#endif
1193        }
1194
1195        if (offset >= cacheBlkSize) {
1196            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
1197                    "block.\n", tid);
1198        } else if (numInst >= fetchWidth) {
1199            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1200                    "for this cycle.\n", tid);
1201        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
1202            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1203                    "instruction encountered.\n", tid);
1204        }
1205    }
1206
1207    if (numInst > 0) {
1208        wroteToTimeBuffer = true;
1209    }
1210
1211    // Now that fetching is completed, update the PC to signify what the next
1212    // cycle will be.
1213    if (fault == NoFault) {
1214#if ISA_HAS_DELAY_SLOT
1215        if (delaySlotInfo[tid].targetReady &&
1216            delaySlotInfo[tid].numInsts == 0) {
1217            // Set PC to target
1218            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
1219            nextPC[tid] = next_PC + instSize;        //next_NPC
1220            nextNPC[tid] = next_PC + (2 * instSize);
1221
1222            delaySlotInfo[tid].targetReady = false;
1223        } else {
1224            PC[tid] = next_PC;
1225            nextPC[tid] = next_NPC;
1226            nextNPC[tid] = next_NPC + instSize;
1227        }
1228
1229        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
1230#else
1231        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
1232        PC[tid] = next_PC;
1233        nextPC[tid] = next_PC + instSize;
1234#endif
1235    } else {
1236        // We shouldn't be in an icache miss and also have a fault (an ITB
1237        // miss)
1238        if (fetchStatus[tid] == IcacheWaitResponse) {
1239            panic("Fetch should have exited prior to this!");
1240        }
1241
1242        // Send the fault to commit.  This thread will not do anything
1243        // until commit handles the fault.  The only other way it can
1244        // wake up is if a squash comes along and changes the PC.
1245#if FULL_SYSTEM
1246        assert(numInst != fetchWidth);
1247        // Get a sequence number.
1248        inst_seq = cpu->getAndIncrementInstSeq();
1249        // We will use a nop in order to carry the fault.
1250        ext_inst = TheISA::NoopMachInst;
1251
1252        // Create a new DynInst from the dummy nop.
1253        DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
1254                                             next_PC,
1255                                             inst_seq, cpu);
1256        instruction->setPredTarg(next_PC + instSize);
1257        instruction->setTid(tid);
1258
1259        instruction->setASID(tid);
1260
1261        instruction->setThreadState(cpu->thread[tid]);
1262
1263        instruction->traceData = NULL;
1264
1265        instruction->setInstListIt(cpu->addInst(instruction));
1266
1267        instruction->fault = fault;
1268
1269        toDecode->insts[numInst] = instruction;
1270        toDecode->size++;
1271
1272        DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
1273
1274        fetchStatus[tid] = TrapPending;
1275        status_change = true;
1276
1277//        warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
1278#else // !FULL_SYSTEM
1279        warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
1280#endif // FULL_SYSTEM
1281    }
1282}
1283
1284template<class Impl>
1285void
1286DefaultFetch<Impl>::recvRetry()
1287{
1288    if (retryPkt != NULL) {
1289        assert(cacheBlocked);
1290        assert(retryTid != -1);
1291        assert(fetchStatus[retryTid] == IcacheWaitRetry);
1292
1293        if (icachePort->sendTiming(retryPkt)) {
1294            fetchStatus[retryTid] = IcacheWaitResponse;
1295            retryPkt = NULL;
1296            retryTid = -1;
1297            cacheBlocked = false;
1298        }
1299    } else {
1300        assert(retryTid == -1);
1301        // Access has been squashed since it was sent out.  Just clear
1302        // the cache being blocked.
1303        cacheBlocked = false;
1304    }
1305}
1306
1307///////////////////////////////////////
1308//                                   //
1309//  SMT FETCH POLICY MAINTAINED HERE //
1310//                                   //
1311///////////////////////////////////////
1312template<class Impl>
1313int
1314DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1315{
1316    if (numThreads > 1) {
1317        switch (fetch_priority) {
1318
1319          case SingleThread:
1320            return 0;
1321
1322          case RoundRobin:
1323            return roundRobin();
1324
1325          case IQ:
1326            return iqCount();
1327
1328          case LSQ:
1329            return lsqCount();
1330
1331          case Branch:
1332            return branchCount();
1333
1334          default:
1335            return -1;
1336        }
1337    } else {
1338        int tid = *((*activeThreads).begin());
1339
1340        if (fetchStatus[tid] == Running ||
1341            fetchStatus[tid] == IcacheAccessComplete ||
1342            fetchStatus[tid] == Idle) {
1343            return tid;
1344        } else {
1345            return -1;
1346        }
1347    }
1348
1349}
1350
1351
1352template<class Impl>
1353int
1354DefaultFetch<Impl>::roundRobin()
1355{
1356    std::list<unsigned>::iterator pri_iter = priorityList.begin();
1357    std::list<unsigned>::iterator end      = priorityList.end();
1358
1359    int high_pri;
1360
1361    while (pri_iter != end) {
1362        high_pri = *pri_iter;
1363
1364        assert(high_pri <= numThreads);
1365
1366        if (fetchStatus[high_pri] == Running ||
1367            fetchStatus[high_pri] == IcacheAccessComplete ||
1368            fetchStatus[high_pri] == Idle) {
1369
1370            priorityList.erase(pri_iter);
1371            priorityList.push_back(high_pri);
1372
1373            return high_pri;
1374        }
1375
1376        pri_iter++;
1377    }
1378
1379    return -1;
1380}
1381
1382template<class Impl>
1383int
1384DefaultFetch<Impl>::iqCount()
1385{
1386    std::priority_queue<unsigned> PQ;
1387
1388    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1389
1390    while (threads != (*activeThreads).end()) {
1391        unsigned tid = *threads++;
1392
1393        PQ.push(fromIEW->iewInfo[tid].iqCount);
1394    }
1395
1396    while (!PQ.empty()) {
1397
1398        unsigned high_pri = PQ.top();
1399
1400        if (fetchStatus[high_pri] == Running ||
1401            fetchStatus[high_pri] == IcacheAccessComplete ||
1402            fetchStatus[high_pri] == Idle)
1403            return high_pri;
1404        else
1405            PQ.pop();
1406
1407    }
1408
1409    return -1;
1410}
1411
1412template<class Impl>
1413int
1414DefaultFetch<Impl>::lsqCount()
1415{
1416    std::priority_queue<unsigned> PQ;
1417
1418
1419    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1420
1421    while (threads != (*activeThreads).end()) {
1422        unsigned tid = *threads++;
1423
1424        PQ.push(fromIEW->iewInfo[tid].ldstqCount);
1425    }
1426
1427    while (!PQ.empty()) {
1428
1429        unsigned high_pri = PQ.top();
1430
1431        if (fetchStatus[high_pri] == Running ||
1432            fetchStatus[high_pri] == IcacheAccessComplete ||
1433            fetchStatus[high_pri] == Idle)
1434            return high_pri;
1435        else
1436            PQ.pop();
1437
1438    }
1439
1440    return -1;
1441}
1442
1443template<class Impl>
1444int
1445DefaultFetch<Impl>::branchCount()
1446{
1447    std::list<unsigned>::iterator threads = (*activeThreads).begin();
1448    panic("Branch Count Fetch policy unimplemented\n");
1449    return *threads;
1450}
1451