fetch_impl.hh revision 13559:e9983a972327
14776Sgblack@eecs.umich.edu/*
26365Sgblack@eecs.umich.edu * Copyright (c) 2010-2014 ARM Limited
34776Sgblack@eecs.umich.edu * Copyright (c) 2012-2013 AMD
44776Sgblack@eecs.umich.edu * All rights reserved.
54776Sgblack@eecs.umich.edu *
64776Sgblack@eecs.umich.edu * The license below extends only to copyright in the software and shall
74776Sgblack@eecs.umich.edu * not be construed as granting a license to any other intellectual
84776Sgblack@eecs.umich.edu * property including but not limited to intellectual property relating
94776Sgblack@eecs.umich.edu * to a hardware implementation of the functionality of the software
104776Sgblack@eecs.umich.edu * licensed hereunder.  You may use the software subject to the license
114776Sgblack@eecs.umich.edu * terms below provided that you ensure that this notice is replicated
124776Sgblack@eecs.umich.edu * unmodified and in its entirety in all distributions of the software,
134776Sgblack@eecs.umich.edu * modified or unmodified, in source code or in binary form.
144776Sgblack@eecs.umich.edu *
154776Sgblack@eecs.umich.edu * Copyright (c) 2004-2006 The Regents of The University of Michigan
164776Sgblack@eecs.umich.edu * All rights reserved.
174776Sgblack@eecs.umich.edu *
184776Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without
194776Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are
204776Sgblack@eecs.umich.edu * met: redistributions of source code must retain the above copyright
214776Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer;
224776Sgblack@eecs.umich.edu * redistributions in binary form must reproduce the above copyright
234776Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer in the
244776Sgblack@eecs.umich.edu * documentation and/or other materials provided with the distribution;
254776Sgblack@eecs.umich.edu * neither the name of the copyright holders nor the names of its
264776Sgblack@eecs.umich.edu * contributors may be used to endorse or promote products derived from
274776Sgblack@eecs.umich.edu * this software without specific prior written permission.
286365Sgblack@eecs.umich.edu *
294776Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
304776Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
314776Sgblack@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
324776Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
334776Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
344776Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
354776Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
364776Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
374776Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
384776Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
394776Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
405523Snate@binkert.org *
416409Sgblack@eecs.umich.edu * Authors: Kevin Lim
424776Sgblack@eecs.umich.edu *          Korey Sewell
435523Snate@binkert.org */
445523Snate@binkert.org
455523Snate@binkert.org#ifndef __CPU_O3_FETCH_IMPL_HH__
464776Sgblack@eecs.umich.edu#define __CPU_O3_FETCH_IMPL_HH__
474776Sgblack@eecs.umich.edu
484776Sgblack@eecs.umich.edu#include <algorithm>
494776Sgblack@eecs.umich.edu#include <cstring>
504776Sgblack@eecs.umich.edu#include <list>
514776Sgblack@eecs.umich.edu#include <map>
524776Sgblack@eecs.umich.edu#include <queue>
534776Sgblack@eecs.umich.edu
545049Sgblack@eecs.umich.edu#include "arch/generic/tlb.hh"
555049Sgblack@eecs.umich.edu#include "arch/isa_traits.hh"
564776Sgblack@eecs.umich.edu#include "arch/utility.hh"
574776Sgblack@eecs.umich.edu#include "arch/vtophys.hh"
584776Sgblack@eecs.umich.edu#include "base/random.hh"
594776Sgblack@eecs.umich.edu#include "base/types.hh"
604776Sgblack@eecs.umich.edu#include "config/the_isa.hh"
616365Sgblack@eecs.umich.edu#include "cpu/base.hh"
626365Sgblack@eecs.umich.edu//#include "cpu/checker/cpu.hh"
634830Sgblack@eecs.umich.edu#include "cpu/o3/fetch.hh"
644830Sgblack@eecs.umich.edu#include "cpu/exetrace.hh"
657811Ssteve.reinhardt@amd.com#include "debug/Activity.hh"
66#include "debug/Drain.hh"
67#include "debug/Fetch.hh"
68#include "debug/O3PipeView.hh"
69#include "mem/packet.hh"
70#include "params/DerivO3CPU.hh"
71#include "sim/byteswap.hh"
72#include "sim/core.hh"
73#include "sim/eventq.hh"
74#include "sim/full_system.hh"
75#include "sim/system.hh"
76#include "cpu/o3/isa_specific.hh"
77
78using namespace std;
79
80template<class Impl>
81DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
82    : fetchPolicy(params->smtFetchPolicy),
83      cpu(_cpu),
84      branchPred(nullptr),
85      decodeToFetchDelay(params->decodeToFetchDelay),
86      renameToFetchDelay(params->renameToFetchDelay),
87      iewToFetchDelay(params->iewToFetchDelay),
88      commitToFetchDelay(params->commitToFetchDelay),
89      fetchWidth(params->fetchWidth),
90      decodeWidth(params->decodeWidth),
91      retryPkt(NULL),
92      retryTid(InvalidThreadID),
93      cacheBlkSize(cpu->cacheLineSize()),
94      fetchBufferSize(params->fetchBufferSize),
95      fetchBufferMask(fetchBufferSize - 1),
96      fetchQueueSize(params->fetchQueueSize),
97      numThreads(params->numThreads),
98      numFetchingThreads(params->smtNumFetchingThreads),
99      finishTranslationEvent(this)
100{
101    if (numThreads > Impl::MaxThreads)
102        fatal("numThreads (%d) is larger than compiled limit (%d),\n"
103              "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
104              numThreads, static_cast<int>(Impl::MaxThreads));
105    if (fetchWidth > Impl::MaxWidth)
106        fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
107             "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
108             fetchWidth, static_cast<int>(Impl::MaxWidth));
109    if (fetchBufferSize > cacheBlkSize)
110        fatal("fetch buffer size (%u bytes) is greater than the cache "
111              "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
112    if (cacheBlkSize % fetchBufferSize)
113        fatal("cache block (%u bytes) is not a multiple of the "
114              "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
115
116    // Figure out fetch policy
117    panic_if(fetchPolicy == FetchPolicy::SingleThread && numThreads > 1,
118             "Invalid Fetch Policy for a SMT workload.");
119
120    // Get the size of an instruction.
121    instSize = sizeof(TheISA::MachInst);
122
123    for (int i = 0; i < Impl::MaxThreads; i++) {
124        fetchStatus[i] = Idle;
125        decoder[i] = nullptr;
126        pc[i] = 0;
127        fetchOffset[i] = 0;
128        macroop[i] = nullptr;
129        delayedCommit[i] = false;
130        memReq[i] = nullptr;
131        stalls[i] = {false, false};
132        fetchBuffer[i] = NULL;
133        fetchBufferPC[i] = 0;
134        fetchBufferValid[i] = false;
135        lastIcacheStall[i] = 0;
136        issuePipelinedIfetch[i] = false;
137    }
138
139    branchPred = params->branchPred;
140
141    for (ThreadID tid = 0; tid < numThreads; tid++) {
142        decoder[tid] = new TheISA::Decoder(params->isa[tid]);
143        // Create space to buffer the cache line data,
144        // which may not hold the entire cache line.
145        fetchBuffer[tid] = new uint8_t[fetchBufferSize];
146    }
147}
148
149template <class Impl>
150std::string
151DefaultFetch<Impl>::name() const
152{
153    return cpu->name() + ".fetch";
154}
155
156template <class Impl>
157void
158DefaultFetch<Impl>::regProbePoints()
159{
160    ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
161    ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
162                                                       "FetchRequest");
163
164}
165
166template <class Impl>
167void
168DefaultFetch<Impl>::regStats()
169{
170    icacheStallCycles
171        .name(name() + ".icacheStallCycles")
172        .desc("Number of cycles fetch is stalled on an Icache miss")
173        .prereq(icacheStallCycles);
174
175    fetchedInsts
176        .name(name() + ".Insts")
177        .desc("Number of instructions fetch has processed")
178        .prereq(fetchedInsts);
179
180    fetchedBranches
181        .name(name() + ".Branches")
182        .desc("Number of branches that fetch encountered")
183        .prereq(fetchedBranches);
184
185    predictedBranches
186        .name(name() + ".predictedBranches")
187        .desc("Number of branches that fetch has predicted taken")
188        .prereq(predictedBranches);
189
190    fetchCycles
191        .name(name() + ".Cycles")
192        .desc("Number of cycles fetch has run and was not squashing or"
193              " blocked")
194        .prereq(fetchCycles);
195
196    fetchSquashCycles
197        .name(name() + ".SquashCycles")
198        .desc("Number of cycles fetch has spent squashing")
199        .prereq(fetchSquashCycles);
200
201    fetchTlbCycles
202        .name(name() + ".TlbCycles")
203        .desc("Number of cycles fetch has spent waiting for tlb")
204        .prereq(fetchTlbCycles);
205
206    fetchIdleCycles
207        .name(name() + ".IdleCycles")
208        .desc("Number of cycles fetch was idle")
209        .prereq(fetchIdleCycles);
210
211    fetchBlockedCycles
212        .name(name() + ".BlockedCycles")
213        .desc("Number of cycles fetch has spent blocked")
214        .prereq(fetchBlockedCycles);
215
216    fetchedCacheLines
217        .name(name() + ".CacheLines")
218        .desc("Number of cache lines fetched")
219        .prereq(fetchedCacheLines);
220
221    fetchMiscStallCycles
222        .name(name() + ".MiscStallCycles")
223        .desc("Number of cycles fetch has spent waiting on interrupts, or "
224              "bad addresses, or out of MSHRs")
225        .prereq(fetchMiscStallCycles);
226
227    fetchPendingDrainCycles
228        .name(name() + ".PendingDrainCycles")
229        .desc("Number of cycles fetch has spent waiting on pipes to drain")
230        .prereq(fetchPendingDrainCycles);
231
232    fetchNoActiveThreadStallCycles
233        .name(name() + ".NoActiveThreadStallCycles")
234        .desc("Number of stall cycles due to no active thread to fetch from")
235        .prereq(fetchNoActiveThreadStallCycles);
236
237    fetchPendingTrapStallCycles
238        .name(name() + ".PendingTrapStallCycles")
239        .desc("Number of stall cycles due to pending traps")
240        .prereq(fetchPendingTrapStallCycles);
241
242    fetchPendingQuiesceStallCycles
243        .name(name() + ".PendingQuiesceStallCycles")
244        .desc("Number of stall cycles due to pending quiesce instructions")
245        .prereq(fetchPendingQuiesceStallCycles);
246
247    fetchIcacheWaitRetryStallCycles
248        .name(name() + ".IcacheWaitRetryStallCycles")
249        .desc("Number of stall cycles due to full MSHR")
250        .prereq(fetchIcacheWaitRetryStallCycles);
251
252    fetchIcacheSquashes
253        .name(name() + ".IcacheSquashes")
254        .desc("Number of outstanding Icache misses that were squashed")
255        .prereq(fetchIcacheSquashes);
256
257    fetchTlbSquashes
258        .name(name() + ".ItlbSquashes")
259        .desc("Number of outstanding ITLB misses that were squashed")
260        .prereq(fetchTlbSquashes);
261
262    fetchNisnDist
263        .init(/* base value */ 0,
264              /* last value */ fetchWidth,
265              /* bucket size */ 1)
266        .name(name() + ".rateDist")
267        .desc("Number of instructions fetched each cycle (Total)")
268        .flags(Stats::pdf);
269
270    idleRate
271        .name(name() + ".idleRate")
272        .desc("Percent of cycles fetch was idle")
273        .prereq(idleRate);
274    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
275
276    branchRate
277        .name(name() + ".branchRate")
278        .desc("Number of branch fetches per cycle")
279        .flags(Stats::total);
280    branchRate = fetchedBranches / cpu->numCycles;
281
282    fetchRate
283        .name(name() + ".rate")
284        .desc("Number of inst fetches per cycle")
285        .flags(Stats::total);
286    fetchRate = fetchedInsts / cpu->numCycles;
287}
288
289template<class Impl>
290void
291DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
292{
293    timeBuffer = time_buffer;
294
295    // Create wires to get information from proper places in time buffer.
296    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
297    fromRename = timeBuffer->getWire(-renameToFetchDelay);
298    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
299    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
300}
301
302template<class Impl>
303void
304DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
305{
306    activeThreads = at_ptr;
307}
308
309template<class Impl>
310void
311DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
312{
313    // Create wire to write information to proper place in fetch time buf.
314    toDecode = ftb_ptr->getWire(0);
315}
316
317template<class Impl>
318void
319DefaultFetch<Impl>::startupStage()
320{
321    assert(priorityList.empty());
322    resetStage();
323
324    // Fetch needs to start fetching instructions at the very beginning,
325    // so it must start up in active state.
326    switchToActive();
327}
328
329template<class Impl>
330void
331DefaultFetch<Impl>::resetStage()
332{
333    numInst = 0;
334    interruptPending = false;
335    cacheBlocked = false;
336
337    priorityList.clear();
338
339    // Setup PC and nextPC with initial state.
340    for (ThreadID tid = 0; tid < numThreads; ++tid) {
341        fetchStatus[tid] = Running;
342        pc[tid] = cpu->pcState(tid);
343        fetchOffset[tid] = 0;
344        macroop[tid] = NULL;
345
346        delayedCommit[tid] = false;
347        memReq[tid] = NULL;
348
349        stalls[tid].decode = false;
350        stalls[tid].drain = false;
351
352        fetchBufferPC[tid] = 0;
353        fetchBufferValid[tid] = false;
354
355        fetchQueue[tid].clear();
356
357        priorityList.push_back(tid);
358    }
359
360    wroteToTimeBuffer = false;
361    _status = Inactive;
362}
363
364template<class Impl>
365void
366DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
367{
368    ThreadID tid = cpu->contextToThread(pkt->req->contextId());
369
370    DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
371    assert(!cpu->switchedOut());
372
373    // Only change the status if it's still waiting on the icache access
374    // to return.
375    if (fetchStatus[tid] != IcacheWaitResponse ||
376        pkt->req != memReq[tid]) {
377        ++fetchIcacheSquashes;
378        delete pkt;
379        return;
380    }
381
382    memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
383    fetchBufferValid[tid] = true;
384
385    // Wake up the CPU (if it went to sleep and was waiting on
386    // this completion event).
387    cpu->wakeCPU();
388
389    DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
390            tid);
391
392    switchToActive();
393
394    // Only switch to IcacheAccessComplete if we're not stalled as well.
395    if (checkStall(tid)) {
396        fetchStatus[tid] = Blocked;
397    } else {
398        fetchStatus[tid] = IcacheAccessComplete;
399    }
400
401    pkt->req->setAccessLatency();
402    cpu->ppInstAccessComplete->notify(pkt);
403    // Reset the mem req to NULL.
404    delete pkt;
405    memReq[tid] = NULL;
406}
407
408template <class Impl>
409void
410DefaultFetch<Impl>::drainResume()
411{
412    for (ThreadID i = 0; i < numThreads; ++i) {
413        stalls[i].decode = false;
414        stalls[i].drain = false;
415    }
416}
417
418template <class Impl>
419void
420DefaultFetch<Impl>::drainSanityCheck() const
421{
422    assert(isDrained());
423    assert(retryPkt == NULL);
424    assert(retryTid == InvalidThreadID);
425    assert(!cacheBlocked);
426    assert(!interruptPending);
427
428    for (ThreadID i = 0; i < numThreads; ++i) {
429        assert(!memReq[i]);
430        assert(fetchStatus[i] == Idle || stalls[i].drain);
431    }
432
433    branchPred->drainSanityCheck();
434}
435
436template <class Impl>
437bool
438DefaultFetch<Impl>::isDrained() const
439{
440    /* Make sure that threads are either idle of that the commit stage
441     * has signaled that draining has completed by setting the drain
442     * stall flag. This effectively forces the pipeline to be disabled
443     * until the whole system is drained (simulation may continue to
444     * drain other components).
445     */
446    for (ThreadID i = 0; i < numThreads; ++i) {
447        // Verify fetch queues are drained
448        if (!fetchQueue[i].empty())
449            return false;
450
451        // Return false if not idle or drain stalled
452        if (fetchStatus[i] != Idle) {
453            if (fetchStatus[i] == Blocked && stalls[i].drain)
454                continue;
455            else
456                return false;
457        }
458    }
459
460    /* The pipeline might start up again in the middle of the drain
461     * cycle if the finish translation event is scheduled, so make
462     * sure that's not the case.
463     */
464    return !finishTranslationEvent.scheduled();
465}
466
467template <class Impl>
468void
469DefaultFetch<Impl>::takeOverFrom()
470{
471    assert(cpu->getInstPort().isConnected());
472    resetStage();
473
474}
475
476template <class Impl>
477void
478DefaultFetch<Impl>::drainStall(ThreadID tid)
479{
480    assert(cpu->isDraining());
481    assert(!stalls[tid].drain);
482    DPRINTF(Drain, "%i: Thread drained.\n", tid);
483    stalls[tid].drain = true;
484}
485
486template <class Impl>
487void
488DefaultFetch<Impl>::wakeFromQuiesce()
489{
490    DPRINTF(Fetch, "Waking up from quiesce\n");
491    // Hopefully this is safe
492    // @todo: Allow other threads to wake from quiesce.
493    fetchStatus[0] = Running;
494}
495
496template <class Impl>
497inline void
498DefaultFetch<Impl>::switchToActive()
499{
500    if (_status == Inactive) {
501        DPRINTF(Activity, "Activating stage.\n");
502
503        cpu->activateStage(O3CPU::FetchIdx);
504
505        _status = Active;
506    }
507}
508
509template <class Impl>
510inline void
511DefaultFetch<Impl>::switchToInactive()
512{
513    if (_status == Active) {
514        DPRINTF(Activity, "Deactivating stage.\n");
515
516        cpu->deactivateStage(O3CPU::FetchIdx);
517
518        _status = Inactive;
519    }
520}
521
522template <class Impl>
523void
524DefaultFetch<Impl>::deactivateThread(ThreadID tid)
525{
526    // Update priority list
527    auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
528    if (thread_it != priorityList.end()) {
529        priorityList.erase(thread_it);
530    }
531}
532
533template <class Impl>
534bool
535DefaultFetch<Impl>::lookupAndUpdateNextPC(
536        const DynInstPtr &inst, TheISA::PCState &nextPC)
537{
538    // Do branch prediction check here.
539    // A bit of a misnomer...next_PC is actually the current PC until
540    // this function updates it.
541    bool predict_taken;
542
543    if (!inst->isControl()) {
544        TheISA::advancePC(nextPC, inst->staticInst);
545        inst->setPredTarg(nextPC);
546        inst->setPredTaken(false);
547        return false;
548    }
549
550    ThreadID tid = inst->threadNumber;
551    predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
552                                        nextPC, tid);
553
554    if (predict_taken) {
555        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
556                tid, inst->seqNum, nextPC);
557    } else {
558        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
559                tid, inst->seqNum);
560    }
561
562    DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
563            tid, inst->seqNum, nextPC);
564    inst->setPredTarg(nextPC);
565    inst->setPredTaken(predict_taken);
566
567    ++fetchedBranches;
568
569    if (predict_taken) {
570        ++predictedBranches;
571    }
572
573    return predict_taken;
574}
575
576template <class Impl>
577bool
578DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
579{
580    Fault fault = NoFault;
581
582    assert(!cpu->switchedOut());
583
584    // @todo: not sure if these should block translation.
585    //AlphaDep
586    if (cacheBlocked) {
587        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
588                tid);
589        return false;
590    } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
591        // Hold off fetch from getting new instructions when:
592        // Cache is blocked, or
593        // while an interrupt is pending and we're not in PAL mode, or
594        // fetch is switched out.
595        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
596                tid);
597        return false;
598    }
599
600    // Align the fetch address to the start of a fetch buffer segment.
601    Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
602
603    DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
604            tid, fetchBufferBlockPC, vaddr);
605
606    // Setup the memReq to do a read of the first instruction's address.
607    // Set the appropriate read size and flags as well.
608    // Build request here.
609    RequestPtr mem_req = std::make_shared<Request>(
610        tid, fetchBufferBlockPC, fetchBufferSize,
611        Request::INST_FETCH, cpu->instMasterId(), pc,
612        cpu->thread[tid]->contextId());
613
614    mem_req->taskId(cpu->taskId());
615
616    memReq[tid] = mem_req;
617
618    // Initiate translation of the icache block
619    fetchStatus[tid] = ItlbWait;
620    FetchTranslation *trans = new FetchTranslation(this);
621    cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
622                              trans, BaseTLB::Execute);
623    return true;
624}
625
626template <class Impl>
627void
628DefaultFetch<Impl>::finishTranslation(const Fault &fault,
629                                      const RequestPtr &mem_req)
630{
631    ThreadID tid = cpu->contextToThread(mem_req->contextId());
632    Addr fetchBufferBlockPC = mem_req->getVaddr();
633
634    assert(!cpu->switchedOut());
635
636    // Wake up CPU if it was idle
637    cpu->wakeCPU();
638
639    if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
640        mem_req->getVaddr() != memReq[tid]->getVaddr()) {
641        DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
642                tid);
643        ++fetchTlbSquashes;
644        return;
645    }
646
647
648    // If translation was successful, attempt to read the icache block.
649    if (fault == NoFault) {
650        // Check that we're not going off into random memory
651        // If we have, just wait around for commit to squash something and put
652        // us on the right track
653        if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
654            warn("Address %#x is outside of physical memory, stopping fetch\n",
655                    mem_req->getPaddr());
656            fetchStatus[tid] = NoGoodAddr;
657            memReq[tid] = NULL;
658            return;
659        }
660
661        // Build packet here.
662        PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
663        data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
664
665        fetchBufferPC[tid] = fetchBufferBlockPC;
666        fetchBufferValid[tid] = false;
667        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
668
669        fetchedCacheLines++;
670
671        // Access the cache.
672        if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
673            assert(retryPkt == NULL);
674            assert(retryTid == InvalidThreadID);
675            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
676
677            fetchStatus[tid] = IcacheWaitRetry;
678            retryPkt = data_pkt;
679            retryTid = tid;
680            cacheBlocked = true;
681        } else {
682            DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
683            DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
684                    "response.\n", tid);
685            lastIcacheStall[tid] = curTick();
686            fetchStatus[tid] = IcacheWaitResponse;
687            // Notify Fetch Request probe when a packet containing a fetch
688            // request is successfully sent
689            ppFetchRequestSent->notify(mem_req);
690        }
691    } else {
692        // Don't send an instruction to decode if we can't handle it.
693        if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
694            assert(!finishTranslationEvent.scheduled());
695            finishTranslationEvent.setFault(fault);
696            finishTranslationEvent.setReq(mem_req);
697            cpu->schedule(finishTranslationEvent,
698                          cpu->clockEdge(Cycles(1)));
699            return;
700        }
701        DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
702                tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
703        // Translation faulted, icache request won't be sent.
704        memReq[tid] = NULL;
705
706        // Send the fault to commit.  This thread will not do anything
707        // until commit handles the fault.  The only other way it can
708        // wake up is if a squash comes along and changes the PC.
709        TheISA::PCState fetchPC = pc[tid];
710
711        DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
712        // We will use a nop in ordier to carry the fault.
713        DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
714                                           NULL, fetchPC, fetchPC, false);
715        instruction->setNotAnInst();
716
717        instruction->setPredTarg(fetchPC);
718        instruction->fault = fault;
719        wroteToTimeBuffer = true;
720
721        DPRINTF(Activity, "Activity this cycle.\n");
722        cpu->activityThisCycle();
723
724        fetchStatus[tid] = TrapPending;
725
726        DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
727        DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
728                tid, fault->name(), pc[tid]);
729    }
730    _status = updateFetchStatus();
731}
732
733template <class Impl>
734inline void
735DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
736                             const DynInstPtr squashInst, ThreadID tid)
737{
738    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
739            tid, newPC);
740
741    pc[tid] = newPC;
742    fetchOffset[tid] = 0;
743    if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
744        macroop[tid] = squashInst->macroop;
745    else
746        macroop[tid] = NULL;
747    decoder[tid]->reset();
748
749    // Clear the icache miss if it's outstanding.
750    if (fetchStatus[tid] == IcacheWaitResponse) {
751        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
752                tid);
753        memReq[tid] = NULL;
754    } else if (fetchStatus[tid] == ItlbWait) {
755        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
756                tid);
757        memReq[tid] = NULL;
758    }
759
760    // Get rid of the retrying packet if it was from this thread.
761    if (retryTid == tid) {
762        assert(cacheBlocked);
763        if (retryPkt) {
764            delete retryPkt;
765        }
766        retryPkt = NULL;
767        retryTid = InvalidThreadID;
768    }
769
770    fetchStatus[tid] = Squashing;
771
772    // Empty fetch queue
773    fetchQueue[tid].clear();
774
775    // microops are being squashed, it is not known wheather the
776    // youngest non-squashed microop was  marked delayed commit
777    // or not. Setting the flag to true ensures that the
778    // interrupts are not handled when they cannot be, though
779    // some opportunities to handle interrupts may be missed.
780    delayedCommit[tid] = true;
781
782    ++fetchSquashCycles;
783}
784
785template<class Impl>
786void
787DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
788                                     const DynInstPtr squashInst,
789                                     const InstSeqNum seq_num, ThreadID tid)
790{
791    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
792
793    doSquash(newPC, squashInst, tid);
794
795    // Tell the CPU to remove any instructions that are in flight between
796    // fetch and decode.
797    cpu->removeInstsUntil(seq_num, tid);
798}
799
800template<class Impl>
801bool
802DefaultFetch<Impl>::checkStall(ThreadID tid) const
803{
804    bool ret_val = false;
805
806    if (stalls[tid].drain) {
807        assert(cpu->isDraining());
808        DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
809        ret_val = true;
810    }
811
812    return ret_val;
813}
814
815template<class Impl>
816typename DefaultFetch<Impl>::FetchStatus
817DefaultFetch<Impl>::updateFetchStatus()
818{
819    //Check Running
820    list<ThreadID>::iterator threads = activeThreads->begin();
821    list<ThreadID>::iterator end = activeThreads->end();
822
823    while (threads != end) {
824        ThreadID tid = *threads++;
825
826        if (fetchStatus[tid] == Running ||
827            fetchStatus[tid] == Squashing ||
828            fetchStatus[tid] == IcacheAccessComplete) {
829
830            if (_status == Inactive) {
831                DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
832
833                if (fetchStatus[tid] == IcacheAccessComplete) {
834                    DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
835                            "completion\n",tid);
836                }
837
838                cpu->activateStage(O3CPU::FetchIdx);
839            }
840
841            return Active;
842        }
843    }
844
845    // Stage is switching from active to inactive, notify CPU of it.
846    if (_status == Active) {
847        DPRINTF(Activity, "Deactivating stage.\n");
848
849        cpu->deactivateStage(O3CPU::FetchIdx);
850    }
851
852    return Inactive;
853}
854
855template <class Impl>
856void
857DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
858                           const InstSeqNum seq_num, DynInstPtr squashInst,
859                           ThreadID tid)
860{
861    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
862
863    doSquash(newPC, squashInst, tid);
864
865    // Tell the CPU to remove any instructions that are not in the ROB.
866    cpu->removeInstsNotInROB(tid);
867}
868
869template <class Impl>
870void
871DefaultFetch<Impl>::tick()
872{
873    list<ThreadID>::iterator threads = activeThreads->begin();
874    list<ThreadID>::iterator end = activeThreads->end();
875    bool status_change = false;
876
877    wroteToTimeBuffer = false;
878
879    for (ThreadID i = 0; i < numThreads; ++i) {
880        issuePipelinedIfetch[i] = false;
881    }
882
883    while (threads != end) {
884        ThreadID tid = *threads++;
885
886        // Check the signals for each thread to determine the proper status
887        // for each thread.
888        bool updated_status = checkSignalsAndUpdate(tid);
889        status_change =  status_change || updated_status;
890    }
891
892    DPRINTF(Fetch, "Running stage.\n");
893
894    if (FullSystem) {
895        if (fromCommit->commitInfo[0].interruptPending) {
896            interruptPending = true;
897        }
898
899        if (fromCommit->commitInfo[0].clearInterrupt) {
900            interruptPending = false;
901        }
902    }
903
904    for (threadFetched = 0; threadFetched < numFetchingThreads;
905         threadFetched++) {
906        // Fetch each of the actively fetching threads.
907        fetch(status_change);
908    }
909
910    // Record number of instructions fetched this cycle for distribution.
911    fetchNisnDist.sample(numInst);
912
913    if (status_change) {
914        // Change the fetch stage status if there was a status change.
915        _status = updateFetchStatus();
916    }
917
918    // Issue the next I-cache request if possible.
919    for (ThreadID i = 0; i < numThreads; ++i) {
920        if (issuePipelinedIfetch[i]) {
921            pipelineIcacheAccesses(i);
922        }
923    }
924
925    // Send instructions enqueued into the fetch queue to decode.
926    // Limit rate by fetchWidth.  Stall if decode is stalled.
927    unsigned insts_to_decode = 0;
928    unsigned available_insts = 0;
929
930    for (auto tid : *activeThreads) {
931        if (!stalls[tid].decode) {
932            available_insts += fetchQueue[tid].size();
933        }
934    }
935
936    // Pick a random thread to start trying to grab instructions from
937    auto tid_itr = activeThreads->begin();
938    std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
939
940    while (available_insts != 0 && insts_to_decode < decodeWidth) {
941        ThreadID tid = *tid_itr;
942        if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
943            const auto& inst = fetchQueue[tid].front();
944            toDecode->insts[toDecode->size++] = inst;
945            DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
946                    "fetch queue. Fetch queue size: %i.\n",
947                    tid, inst->seqNum, fetchQueue[tid].size());
948
949            wroteToTimeBuffer = true;
950            fetchQueue[tid].pop_front();
951            insts_to_decode++;
952            available_insts--;
953        }
954
955        tid_itr++;
956        // Wrap around if at end of active threads list
957        if (tid_itr == activeThreads->end())
958            tid_itr = activeThreads->begin();
959    }
960
961    // If there was activity this cycle, inform the CPU of it.
962    if (wroteToTimeBuffer) {
963        DPRINTF(Activity, "Activity this cycle.\n");
964        cpu->activityThisCycle();
965    }
966
967    // Reset the number of the instruction we've fetched.
968    numInst = 0;
969}
970
971template <class Impl>
972bool
973DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
974{
975    // Update the per thread stall statuses.
976    if (fromDecode->decodeBlock[tid]) {
977        stalls[tid].decode = true;
978    }
979
980    if (fromDecode->decodeUnblock[tid]) {
981        assert(stalls[tid].decode);
982        assert(!fromDecode->decodeBlock[tid]);
983        stalls[tid].decode = false;
984    }
985
986    // Check squash signals from commit.
987    if (fromCommit->commitInfo[tid].squash) {
988
989        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
990                "from commit.\n",tid);
991        // In any case, squash.
992        squash(fromCommit->commitInfo[tid].pc,
993               fromCommit->commitInfo[tid].doneSeqNum,
994               fromCommit->commitInfo[tid].squashInst, tid);
995
996        // If it was a branch mispredict on a control instruction, update the
997        // branch predictor with that instruction, otherwise just kill the
998        // invalid state we generated in after sequence number
999        if (fromCommit->commitInfo[tid].mispredictInst &&
1000            fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1001            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1002                              fromCommit->commitInfo[tid].pc,
1003                              fromCommit->commitInfo[tid].branchTaken,
1004                              tid);
1005        } else {
1006            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1007                              tid);
1008        }
1009
1010        return true;
1011    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1012        // Update the branch predictor if it wasn't a squashed instruction
1013        // that was broadcasted.
1014        branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1015    }
1016
1017    // Check squash signals from decode.
1018    if (fromDecode->decodeInfo[tid].squash) {
1019        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1020                "from decode.\n",tid);
1021
1022        // Update the branch predictor.
1023        if (fromDecode->decodeInfo[tid].branchMispredict) {
1024            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1025                              fromDecode->decodeInfo[tid].nextPC,
1026                              fromDecode->decodeInfo[tid].branchTaken,
1027                              tid);
1028        } else {
1029            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1030                              tid);
1031        }
1032
1033        if (fetchStatus[tid] != Squashing) {
1034
1035            DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1036                fromDecode->decodeInfo[tid].nextPC);
1037            // Squash unless we're already squashing
1038            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1039                             fromDecode->decodeInfo[tid].squashInst,
1040                             fromDecode->decodeInfo[tid].doneSeqNum,
1041                             tid);
1042
1043            return true;
1044        }
1045    }
1046
1047    if (checkStall(tid) &&
1048        fetchStatus[tid] != IcacheWaitResponse &&
1049        fetchStatus[tid] != IcacheWaitRetry &&
1050        fetchStatus[tid] != ItlbWait &&
1051        fetchStatus[tid] != QuiescePending) {
1052        DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1053
1054        fetchStatus[tid] = Blocked;
1055
1056        return true;
1057    }
1058
1059    if (fetchStatus[tid] == Blocked ||
1060        fetchStatus[tid] == Squashing) {
1061        // Switch status to running if fetch isn't being told to block or
1062        // squash this cycle.
1063        DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1064                tid);
1065
1066        fetchStatus[tid] = Running;
1067
1068        return true;
1069    }
1070
1071    // If we've reached this point, we have not gotten any signals that
1072    // cause fetch to change its status.  Fetch remains the same as before.
1073    return false;
1074}
1075
1076template<class Impl>
1077typename Impl::DynInstPtr
1078DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1079                              StaticInstPtr curMacroop, TheISA::PCState thisPC,
1080                              TheISA::PCState nextPC, bool trace)
1081{
1082    // Get a sequence number.
1083    InstSeqNum seq = cpu->getAndIncrementInstSeq();
1084
1085    // Create a new DynInst from the instruction fetched.
1086    DynInstPtr instruction =
1087        new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1088    instruction->setTid(tid);
1089
1090    instruction->setASID(tid);
1091
1092    instruction->setThreadState(cpu->thread[tid]);
1093
1094    DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1095            "[sn:%lli].\n", tid, thisPC.instAddr(),
1096            thisPC.microPC(), seq);
1097
1098    DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1099            instruction->staticInst->
1100            disassemble(thisPC.instAddr()));
1101
1102#if TRACING_ON
1103    if (trace) {
1104        instruction->traceData =
1105            cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1106                    instruction->staticInst, thisPC, curMacroop);
1107    }
1108#else
1109    instruction->traceData = NULL;
1110#endif
1111
1112    // Add instruction to the CPU's list of instructions.
1113    instruction->setInstListIt(cpu->addInst(instruction));
1114
1115    // Write the instruction to the first slot in the queue
1116    // that heads to decode.
1117    assert(numInst < fetchWidth);
1118    fetchQueue[tid].push_back(instruction);
1119    assert(fetchQueue[tid].size() <= fetchQueueSize);
1120    DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1121            tid, fetchQueue[tid].size(), fetchQueueSize);
1122    //toDecode->insts[toDecode->size++] = instruction;
1123
1124    // Keep track of if we can take an interrupt at this boundary
1125    delayedCommit[tid] = instruction->isDelayedCommit();
1126
1127    return instruction;
1128}
1129
1130template<class Impl>
1131void
1132DefaultFetch<Impl>::fetch(bool &status_change)
1133{
1134    //////////////////////////////////////////
1135    // Start actual fetch
1136    //////////////////////////////////////////
1137    ThreadID tid = getFetchingThread();
1138
1139    assert(!cpu->switchedOut());
1140
1141    if (tid == InvalidThreadID) {
1142        // Breaks looping condition in tick()
1143        threadFetched = numFetchingThreads;
1144
1145        if (numThreads == 1) {  // @todo Per-thread stats
1146            profileStall(0);
1147        }
1148
1149        return;
1150    }
1151
1152    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1153
1154    // The current PC.
1155    TheISA::PCState thisPC = pc[tid];
1156
1157    Addr pcOffset = fetchOffset[tid];
1158    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1159
1160    bool inRom = isRomMicroPC(thisPC.microPC());
1161
1162    // If returning from the delay of a cache miss, then update the status
1163    // to running, otherwise do the cache access.  Possibly move this up
1164    // to tick() function.
1165    if (fetchStatus[tid] == IcacheAccessComplete) {
1166        DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1167
1168        fetchStatus[tid] = Running;
1169        status_change = true;
1170    } else if (fetchStatus[tid] == Running) {
1171        // Align the fetch PC so its at the start of a fetch buffer segment.
1172        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1173
1174        // If buffer is no longer valid or fetchAddr has moved to point
1175        // to the next cache block, AND we have no remaining ucode
1176        // from a macro-op, then start fetch from icache.
1177        if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1178            && !inRom && !macroop[tid]) {
1179            DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1180                    "instruction, starting at PC %s.\n", tid, thisPC);
1181
1182            fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1183
1184            if (fetchStatus[tid] == IcacheWaitResponse)
1185                ++icacheStallCycles;
1186            else if (fetchStatus[tid] == ItlbWait)
1187                ++fetchTlbCycles;
1188            else
1189                ++fetchMiscStallCycles;
1190            return;
1191        } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1192            // Stall CPU if an interrupt is posted and we're not issuing
1193            // an delayed commit micro-op currently (delayed commit instructions
1194            // are not interruptable by interrupts, only faults)
1195            ++fetchMiscStallCycles;
1196            DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1197            return;
1198        }
1199    } else {
1200        if (fetchStatus[tid] == Idle) {
1201            ++fetchIdleCycles;
1202            DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1203        }
1204
1205        // Status is Idle, so fetch should do nothing.
1206        return;
1207    }
1208
1209    ++fetchCycles;
1210
1211    TheISA::PCState nextPC = thisPC;
1212
1213    StaticInstPtr staticInst = NULL;
1214    StaticInstPtr curMacroop = macroop[tid];
1215
1216    // If the read of the first instruction was successful, then grab the
1217    // instructions from the rest of the cache line and put them into the
1218    // queue heading to decode.
1219
1220    DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1221            "decode.\n", tid);
1222
1223    // Need to keep track of whether or not a predicted branch
1224    // ended this fetch block.
1225    bool predictedBranch = false;
1226
1227    // Need to halt fetch if quiesce instruction detected
1228    bool quiesce = false;
1229
1230    TheISA::MachInst *cacheInsts =
1231        reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1232
1233    const unsigned numInsts = fetchBufferSize / instSize;
1234    unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1235
1236    // Loop through instruction memory from the cache.
1237    // Keep issuing while fetchWidth is available and branch is not
1238    // predicted taken
1239    while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1240           && !predictedBranch && !quiesce) {
1241        // We need to process more memory if we aren't going to get a
1242        // StaticInst from the rom, the current macroop, or what's already
1243        // in the decoder.
1244        bool needMem = !inRom && !curMacroop &&
1245            !decoder[tid]->instReady();
1246        fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1247        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1248
1249        if (needMem) {
1250            // If buffer is no longer valid or fetchAddr has moved to point
1251            // to the next cache block then start fetch from icache.
1252            if (!fetchBufferValid[tid] ||
1253                fetchBufferBlockPC != fetchBufferPC[tid])
1254                break;
1255
1256            if (blkOffset >= numInsts) {
1257                // We need to process more memory, but we've run out of the
1258                // current block.
1259                break;
1260            }
1261
1262            MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1263            decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1264
1265            if (decoder[tid]->needMoreBytes()) {
1266                blkOffset++;
1267                fetchAddr += instSize;
1268                pcOffset += instSize;
1269            }
1270        }
1271
1272        // Extract as many instructions and/or microops as we can from
1273        // the memory we've processed so far.
1274        do {
1275            if (!(curMacroop || inRom)) {
1276                if (decoder[tid]->instReady()) {
1277                    staticInst = decoder[tid]->decode(thisPC);
1278
1279                    // Increment stat of fetched instructions.
1280                    ++fetchedInsts;
1281
1282                    if (staticInst->isMacroop()) {
1283                        curMacroop = staticInst;
1284                    } else {
1285                        pcOffset = 0;
1286                    }
1287                } else {
1288                    // We need more bytes for this instruction so blkOffset and
1289                    // pcOffset will be updated
1290                    break;
1291                }
1292            }
1293            // Whether we're moving to a new macroop because we're at the
1294            // end of the current one, or the branch predictor incorrectly
1295            // thinks we are...
1296            bool newMacro = false;
1297            if (curMacroop || inRom) {
1298                if (inRom) {
1299                    staticInst = cpu->microcodeRom.fetchMicroop(
1300                            thisPC.microPC(), curMacroop);
1301                } else {
1302                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1303                }
1304                newMacro |= staticInst->isLastMicroop();
1305            }
1306
1307            DynInstPtr instruction =
1308                buildInst(tid, staticInst, curMacroop,
1309                          thisPC, nextPC, true);
1310
1311            ppFetch->notify(instruction);
1312            numInst++;
1313
1314#if TRACING_ON
1315            if (DTRACE(O3PipeView)) {
1316                instruction->fetchTick = curTick();
1317            }
1318#endif
1319
1320            nextPC = thisPC;
1321
1322            // If we're branching after this instruction, quit fetching
1323            // from the same block.
1324            predictedBranch |= thisPC.branching();
1325            predictedBranch |=
1326                lookupAndUpdateNextPC(instruction, nextPC);
1327            if (predictedBranch) {
1328                DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1329            }
1330
1331            newMacro |= thisPC.instAddr() != nextPC.instAddr();
1332
1333            // Move to the next instruction, unless we have a branch.
1334            thisPC = nextPC;
1335            inRom = isRomMicroPC(thisPC.microPC());
1336
1337            if (newMacro) {
1338                fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1339                blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1340                pcOffset = 0;
1341                curMacroop = NULL;
1342            }
1343
1344            if (instruction->isQuiesce()) {
1345                DPRINTF(Fetch,
1346                        "Quiesce instruction encountered, halting fetch!\n");
1347                fetchStatus[tid] = QuiescePending;
1348                status_change = true;
1349                quiesce = true;
1350                break;
1351            }
1352        } while ((curMacroop || decoder[tid]->instReady()) &&
1353                 numInst < fetchWidth &&
1354                 fetchQueue[tid].size() < fetchQueueSize);
1355
1356        // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1357        // or not.
1358        inRom = isRomMicroPC(thisPC.microPC());
1359    }
1360
1361    if (predictedBranch) {
1362        DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1363                "instruction encountered.\n", tid);
1364    } else if (numInst >= fetchWidth) {
1365        DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1366                "for this cycle.\n", tid);
1367    } else if (blkOffset >= fetchBufferSize) {
1368        DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1369                "fetch buffer.\n", tid);
1370    }
1371
1372    macroop[tid] = curMacroop;
1373    fetchOffset[tid] = pcOffset;
1374
1375    if (numInst > 0) {
1376        wroteToTimeBuffer = true;
1377    }
1378
1379    pc[tid] = thisPC;
1380
1381    // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1382    // a state that would preclude fetching
1383    fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1384    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1385    issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1386        fetchStatus[tid] != IcacheWaitResponse &&
1387        fetchStatus[tid] != ItlbWait &&
1388        fetchStatus[tid] != IcacheWaitRetry &&
1389        fetchStatus[tid] != QuiescePending &&
1390        !curMacroop;
1391}
1392
1393template<class Impl>
1394void
1395DefaultFetch<Impl>::recvReqRetry()
1396{
1397    if (retryPkt != NULL) {
1398        assert(cacheBlocked);
1399        assert(retryTid != InvalidThreadID);
1400        assert(fetchStatus[retryTid] == IcacheWaitRetry);
1401
1402        if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1403            fetchStatus[retryTid] = IcacheWaitResponse;
1404            // Notify Fetch Request probe when a retryPkt is successfully sent.
1405            // Note that notify must be called before retryPkt is set to NULL.
1406            ppFetchRequestSent->notify(retryPkt->req);
1407            retryPkt = NULL;
1408            retryTid = InvalidThreadID;
1409            cacheBlocked = false;
1410        }
1411    } else {
1412        assert(retryTid == InvalidThreadID);
1413        // Access has been squashed since it was sent out.  Just clear
1414        // the cache being blocked.
1415        cacheBlocked = false;
1416    }
1417}
1418
1419///////////////////////////////////////
1420//                                   //
1421//  SMT FETCH POLICY MAINTAINED HERE //
1422//                                   //
1423///////////////////////////////////////
1424template<class Impl>
1425ThreadID
1426DefaultFetch<Impl>::getFetchingThread()
1427{
1428    if (numThreads > 1) {
1429        switch (fetchPolicy) {
1430          case FetchPolicy::RoundRobin:
1431            return roundRobin();
1432          case FetchPolicy::IQCount:
1433            return iqCount();
1434          case FetchPolicy::LSQCount:
1435            return lsqCount();
1436          case FetchPolicy::Branch:
1437            return branchCount();
1438          default:
1439            return InvalidThreadID;
1440        }
1441    } else {
1442        list<ThreadID>::iterator thread = activeThreads->begin();
1443        if (thread == activeThreads->end()) {
1444            return InvalidThreadID;
1445        }
1446
1447        ThreadID tid = *thread;
1448
1449        if (fetchStatus[tid] == Running ||
1450            fetchStatus[tid] == IcacheAccessComplete ||
1451            fetchStatus[tid] == Idle) {
1452            return tid;
1453        } else {
1454            return InvalidThreadID;
1455        }
1456    }
1457}
1458
1459
1460template<class Impl>
1461ThreadID
1462DefaultFetch<Impl>::roundRobin()
1463{
1464    list<ThreadID>::iterator pri_iter = priorityList.begin();
1465    list<ThreadID>::iterator end      = priorityList.end();
1466
1467    ThreadID high_pri;
1468
1469    while (pri_iter != end) {
1470        high_pri = *pri_iter;
1471
1472        assert(high_pri <= numThreads);
1473
1474        if (fetchStatus[high_pri] == Running ||
1475            fetchStatus[high_pri] == IcacheAccessComplete ||
1476            fetchStatus[high_pri] == Idle) {
1477
1478            priorityList.erase(pri_iter);
1479            priorityList.push_back(high_pri);
1480
1481            return high_pri;
1482        }
1483
1484        pri_iter++;
1485    }
1486
1487    return InvalidThreadID;
1488}
1489
1490template<class Impl>
1491ThreadID
1492DefaultFetch<Impl>::iqCount()
1493{
1494    //sorted from lowest->highest
1495    std::priority_queue<unsigned,vector<unsigned>,
1496                        std::greater<unsigned> > PQ;
1497    std::map<unsigned, ThreadID> threadMap;
1498
1499    list<ThreadID>::iterator threads = activeThreads->begin();
1500    list<ThreadID>::iterator end = activeThreads->end();
1501
1502    while (threads != end) {
1503        ThreadID tid = *threads++;
1504        unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1505
1506        //we can potentially get tid collisions if two threads
1507        //have the same iqCount, but this should be rare.
1508        PQ.push(iqCount);
1509        threadMap[iqCount] = tid;
1510    }
1511
1512    while (!PQ.empty()) {
1513        ThreadID high_pri = threadMap[PQ.top()];
1514
1515        if (fetchStatus[high_pri] == Running ||
1516            fetchStatus[high_pri] == IcacheAccessComplete ||
1517            fetchStatus[high_pri] == Idle)
1518            return high_pri;
1519        else
1520            PQ.pop();
1521
1522    }
1523
1524    return InvalidThreadID;
1525}
1526
1527template<class Impl>
1528ThreadID
1529DefaultFetch<Impl>::lsqCount()
1530{
1531    //sorted from lowest->highest
1532    std::priority_queue<unsigned,vector<unsigned>,
1533                        std::greater<unsigned> > PQ;
1534    std::map<unsigned, ThreadID> threadMap;
1535
1536    list<ThreadID>::iterator threads = activeThreads->begin();
1537    list<ThreadID>::iterator end = activeThreads->end();
1538
1539    while (threads != end) {
1540        ThreadID tid = *threads++;
1541        unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1542
1543        //we can potentially get tid collisions if two threads
1544        //have the same iqCount, but this should be rare.
1545        PQ.push(ldstqCount);
1546        threadMap[ldstqCount] = tid;
1547    }
1548
1549    while (!PQ.empty()) {
1550        ThreadID high_pri = threadMap[PQ.top()];
1551
1552        if (fetchStatus[high_pri] == Running ||
1553            fetchStatus[high_pri] == IcacheAccessComplete ||
1554            fetchStatus[high_pri] == Idle)
1555            return high_pri;
1556        else
1557            PQ.pop();
1558    }
1559
1560    return InvalidThreadID;
1561}
1562
1563template<class Impl>
1564ThreadID
1565DefaultFetch<Impl>::branchCount()
1566{
1567#if 0
1568    list<ThreadID>::iterator thread = activeThreads->begin();
1569    assert(thread != activeThreads->end());
1570    ThreadID tid = *thread;
1571#endif
1572
1573    panic("Branch Count Fetch policy unimplemented\n");
1574    return InvalidThreadID;
1575}
1576
1577template<class Impl>
1578void
1579DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1580{
1581    if (!issuePipelinedIfetch[tid]) {
1582        return;
1583    }
1584
1585    // The next PC to access.
1586    TheISA::PCState thisPC = pc[tid];
1587
1588    if (isRomMicroPC(thisPC.microPC())) {
1589        return;
1590    }
1591
1592    Addr pcOffset = fetchOffset[tid];
1593    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1594
1595    // Align the fetch PC so its at the start of a fetch buffer segment.
1596    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1597
1598    // Unless buffer already got the block, fetch it from icache.
1599    if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1600        DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1601                "starting at PC %s.\n", tid, thisPC);
1602
1603        fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1604    }
1605}
1606
1607template<class Impl>
1608void
1609DefaultFetch<Impl>::profileStall(ThreadID tid) {
1610    DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1611
1612    // @todo Per-thread stats
1613
1614    if (stalls[tid].drain) {
1615        ++fetchPendingDrainCycles;
1616        DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1617    } else if (activeThreads->empty()) {
1618        ++fetchNoActiveThreadStallCycles;
1619        DPRINTF(Fetch, "Fetch has no active thread!\n");
1620    } else if (fetchStatus[tid] == Blocked) {
1621        ++fetchBlockedCycles;
1622        DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1623    } else if (fetchStatus[tid] == Squashing) {
1624        ++fetchSquashCycles;
1625        DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1626    } else if (fetchStatus[tid] == IcacheWaitResponse) {
1627        ++icacheStallCycles;
1628        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1629                tid);
1630    } else if (fetchStatus[tid] == ItlbWait) {
1631        ++fetchTlbCycles;
1632        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1633                "finish!\n", tid);
1634    } else if (fetchStatus[tid] == TrapPending) {
1635        ++fetchPendingTrapStallCycles;
1636        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1637                tid);
1638    } else if (fetchStatus[tid] == QuiescePending) {
1639        ++fetchPendingQuiesceStallCycles;
1640        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1641                "instruction!\n", tid);
1642    } else if (fetchStatus[tid] == IcacheWaitRetry) {
1643        ++fetchIcacheWaitRetryStallCycles;
1644        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1645                tid);
1646    } else if (fetchStatus[tid] == NoGoodAddr) {
1647            DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1648                    tid);
1649    } else {
1650        DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1651             tid, fetchStatus[tid]);
1652    }
1653}
1654
1655#endif//__CPU_O3_FETCH_IMPL_HH__
1656