fetch_impl.hh revision 10329:12e3be8203a5
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2004-2006 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Kevin Lim
41 *          Korey Sewell
42 */
43
44#ifndef __CPU_O3_FETCH_IMPL_HH__
45#define __CPU_O3_FETCH_IMPL_HH__
46
47#include <algorithm>
48#include <cstring>
49#include <list>
50#include <map>
51#include <queue>
52
53#include "arch/isa_traits.hh"
54#include "arch/tlb.hh"
55#include "arch/utility.hh"
56#include "arch/vtophys.hh"
57#include "base/types.hh"
58#include "config/the_isa.hh"
59#include "cpu/base.hh"
60//#include "cpu/checker/cpu.hh"
61#include "cpu/o3/fetch.hh"
62#include "cpu/exetrace.hh"
63#include "debug/Activity.hh"
64#include "debug/Drain.hh"
65#include "debug/Fetch.hh"
66#include "debug/O3PipeView.hh"
67#include "mem/packet.hh"
68#include "params/DerivO3CPU.hh"
69#include "sim/byteswap.hh"
70#include "sim/core.hh"
71#include "sim/eventq.hh"
72#include "sim/full_system.hh"
73#include "sim/system.hh"
74
75using namespace std;
76
77template<class Impl>
78DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
79    : cpu(_cpu),
80      decodeToFetchDelay(params->decodeToFetchDelay),
81      renameToFetchDelay(params->renameToFetchDelay),
82      iewToFetchDelay(params->iewToFetchDelay),
83      commitToFetchDelay(params->commitToFetchDelay),
84      fetchWidth(params->fetchWidth),
85      decodeWidth(params->decodeWidth),
86      retryPkt(NULL),
87      retryTid(InvalidThreadID),
88      cacheBlkSize(cpu->cacheLineSize()),
89      fetchBufferSize(params->fetchBufferSize),
90      fetchBufferMask(fetchBufferSize - 1),
91      fetchQueueSize(params->fetchQueueSize),
92      numThreads(params->numThreads),
93      numFetchingThreads(params->smtNumFetchingThreads),
94      finishTranslationEvent(this)
95{
96    if (numThreads > Impl::MaxThreads)
97        fatal("numThreads (%d) is larger than compiled limit (%d),\n"
98              "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
99              numThreads, static_cast<int>(Impl::MaxThreads));
100    if (fetchWidth > Impl::MaxWidth)
101        fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
102             "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
103             fetchWidth, static_cast<int>(Impl::MaxWidth));
104    if (fetchBufferSize > cacheBlkSize)
105        fatal("fetch buffer size (%u bytes) is greater than the cache "
106              "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
107    if (cacheBlkSize % fetchBufferSize)
108        fatal("cache block (%u bytes) is not a multiple of the "
109              "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
110
111    std::string policy = params->smtFetchPolicy;
112
113    // Convert string to lowercase
114    std::transform(policy.begin(), policy.end(), policy.begin(),
115                   (int(*)(int)) tolower);
116
117    // Figure out fetch policy
118    if (policy == "singlethread") {
119        fetchPolicy = SingleThread;
120        if (numThreads > 1)
121            panic("Invalid Fetch Policy for a SMT workload.");
122    } else if (policy == "roundrobin") {
123        fetchPolicy = RoundRobin;
124        DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
125    } else if (policy == "branch") {
126        fetchPolicy = Branch;
127        DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
128    } else if (policy == "iqcount") {
129        fetchPolicy = IQ;
130        DPRINTF(Fetch, "Fetch policy set to IQ count\n");
131    } else if (policy == "lsqcount") {
132        fetchPolicy = LSQ;
133        DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
134    } else {
135        fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
136              " RoundRobin,LSQcount,IQcount}\n");
137    }
138
139    // Get the size of an instruction.
140    instSize = sizeof(TheISA::MachInst);
141
142    for (int i = 0; i < Impl::MaxThreads; i++) {
143        decoder[i] = NULL;
144        fetchBuffer[i] = NULL;
145        fetchBufferPC[i] = 0;
146        fetchBufferValid[i] = false;
147    }
148
149    branchPred = params->branchPred;
150
151    for (ThreadID tid = 0; tid < numThreads; tid++) {
152        decoder[tid] = new TheISA::Decoder;
153        // Create space to buffer the cache line data,
154        // which may not hold the entire cache line.
155        fetchBuffer[tid] = new uint8_t[fetchBufferSize];
156    }
157}
158
159template <class Impl>
160std::string
161DefaultFetch<Impl>::name() const
162{
163    return cpu->name() + ".fetch";
164}
165
166template <class Impl>
167void
168DefaultFetch<Impl>::regProbePoints()
169{
170    ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
171}
172
173template <class Impl>
174void
175DefaultFetch<Impl>::regStats()
176{
177    icacheStallCycles
178        .name(name() + ".icacheStallCycles")
179        .desc("Number of cycles fetch is stalled on an Icache miss")
180        .prereq(icacheStallCycles);
181
182    fetchedInsts
183        .name(name() + ".Insts")
184        .desc("Number of instructions fetch has processed")
185        .prereq(fetchedInsts);
186
187    fetchedBranches
188        .name(name() + ".Branches")
189        .desc("Number of branches that fetch encountered")
190        .prereq(fetchedBranches);
191
192    predictedBranches
193        .name(name() + ".predictedBranches")
194        .desc("Number of branches that fetch has predicted taken")
195        .prereq(predictedBranches);
196
197    fetchCycles
198        .name(name() + ".Cycles")
199        .desc("Number of cycles fetch has run and was not squashing or"
200              " blocked")
201        .prereq(fetchCycles);
202
203    fetchSquashCycles
204        .name(name() + ".SquashCycles")
205        .desc("Number of cycles fetch has spent squashing")
206        .prereq(fetchSquashCycles);
207
208    fetchTlbCycles
209        .name(name() + ".TlbCycles")
210        .desc("Number of cycles fetch has spent waiting for tlb")
211        .prereq(fetchTlbCycles);
212
213    fetchIdleCycles
214        .name(name() + ".IdleCycles")
215        .desc("Number of cycles fetch was idle")
216        .prereq(fetchIdleCycles);
217
218    fetchBlockedCycles
219        .name(name() + ".BlockedCycles")
220        .desc("Number of cycles fetch has spent blocked")
221        .prereq(fetchBlockedCycles);
222
223    fetchedCacheLines
224        .name(name() + ".CacheLines")
225        .desc("Number of cache lines fetched")
226        .prereq(fetchedCacheLines);
227
228    fetchMiscStallCycles
229        .name(name() + ".MiscStallCycles")
230        .desc("Number of cycles fetch has spent waiting on interrupts, or "
231              "bad addresses, or out of MSHRs")
232        .prereq(fetchMiscStallCycles);
233
234    fetchPendingDrainCycles
235        .name(name() + ".PendingDrainCycles")
236        .desc("Number of cycles fetch has spent waiting on pipes to drain")
237        .prereq(fetchPendingDrainCycles);
238
239    fetchNoActiveThreadStallCycles
240        .name(name() + ".NoActiveThreadStallCycles")
241        .desc("Number of stall cycles due to no active thread to fetch from")
242        .prereq(fetchNoActiveThreadStallCycles);
243
244    fetchPendingTrapStallCycles
245        .name(name() + ".PendingTrapStallCycles")
246        .desc("Number of stall cycles due to pending traps")
247        .prereq(fetchPendingTrapStallCycles);
248
249    fetchPendingQuiesceStallCycles
250        .name(name() + ".PendingQuiesceStallCycles")
251        .desc("Number of stall cycles due to pending quiesce instructions")
252        .prereq(fetchPendingQuiesceStallCycles);
253
254    fetchIcacheWaitRetryStallCycles
255        .name(name() + ".IcacheWaitRetryStallCycles")
256        .desc("Number of stall cycles due to full MSHR")
257        .prereq(fetchIcacheWaitRetryStallCycles);
258
259    fetchIcacheSquashes
260        .name(name() + ".IcacheSquashes")
261        .desc("Number of outstanding Icache misses that were squashed")
262        .prereq(fetchIcacheSquashes);
263
264    fetchTlbSquashes
265        .name(name() + ".ItlbSquashes")
266        .desc("Number of outstanding ITLB misses that were squashed")
267        .prereq(fetchTlbSquashes);
268
269    fetchNisnDist
270        .init(/* base value */ 0,
271              /* last value */ fetchWidth,
272              /* bucket size */ 1)
273        .name(name() + ".rateDist")
274        .desc("Number of instructions fetched each cycle (Total)")
275        .flags(Stats::pdf);
276
277    idleRate
278        .name(name() + ".idleRate")
279        .desc("Percent of cycles fetch was idle")
280        .prereq(idleRate);
281    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
282
283    branchRate
284        .name(name() + ".branchRate")
285        .desc("Number of branch fetches per cycle")
286        .flags(Stats::total);
287    branchRate = fetchedBranches / cpu->numCycles;
288
289    fetchRate
290        .name(name() + ".rate")
291        .desc("Number of inst fetches per cycle")
292        .flags(Stats::total);
293    fetchRate = fetchedInsts / cpu->numCycles;
294}
295
296template<class Impl>
297void
298DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
299{
300    timeBuffer = time_buffer;
301
302    // Create wires to get information from proper places in time buffer.
303    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
304    fromRename = timeBuffer->getWire(-renameToFetchDelay);
305    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
306    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
307}
308
309template<class Impl>
310void
311DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
312{
313    activeThreads = at_ptr;
314}
315
316template<class Impl>
317void
318DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
319{
320    // Create wire to write information to proper place in fetch time buf.
321    toDecode = ftb_ptr->getWire(0);
322}
323
324template<class Impl>
325void
326DefaultFetch<Impl>::startupStage()
327{
328    assert(priorityList.empty());
329    resetStage();
330
331    // Fetch needs to start fetching instructions at the very beginning,
332    // so it must start up in active state.
333    switchToActive();
334}
335
336template<class Impl>
337void
338DefaultFetch<Impl>::resetStage()
339{
340    numInst = 0;
341    interruptPending = false;
342    cacheBlocked = false;
343
344    priorityList.clear();
345    fetchQueue.clear();
346
347    // Setup PC and nextPC with initial state.
348    for (ThreadID tid = 0; tid < numThreads; ++tid) {
349        fetchStatus[tid] = Running;
350        pc[tid] = cpu->pcState(tid);
351        fetchOffset[tid] = 0;
352        macroop[tid] = NULL;
353
354        delayedCommit[tid] = false;
355        memReq[tid] = NULL;
356
357        stalls[tid].decode = false;
358        stalls[tid].drain = false;
359
360        fetchBufferPC[tid] = 0;
361        fetchBufferValid[tid] = false;
362
363        priorityList.push_back(tid);
364    }
365
366    wroteToTimeBuffer = false;
367    _status = Inactive;
368}
369
370template<class Impl>
371void
372DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
373{
374    ThreadID tid = pkt->req->threadId();
375
376    DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
377    assert(!cpu->switchedOut());
378
379    // Only change the status if it's still waiting on the icache access
380    // to return.
381    if (fetchStatus[tid] != IcacheWaitResponse ||
382        pkt->req != memReq[tid]) {
383        ++fetchIcacheSquashes;
384        delete pkt->req;
385        delete pkt;
386        return;
387    }
388
389    memcpy(fetchBuffer[tid], pkt->getPtr<uint8_t>(), fetchBufferSize);
390    fetchBufferValid[tid] = true;
391
392    // Wake up the CPU (if it went to sleep and was waiting on
393    // this completion event).
394    cpu->wakeCPU();
395
396    DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
397            tid);
398
399    switchToActive();
400
401    // Only switch to IcacheAccessComplete if we're not stalled as well.
402    if (checkStall(tid)) {
403        fetchStatus[tid] = Blocked;
404    } else {
405        fetchStatus[tid] = IcacheAccessComplete;
406    }
407
408    pkt->req->setAccessLatency();
409    cpu->ppInstAccessComplete->notify(pkt);
410    // Reset the mem req to NULL.
411    delete pkt->req;
412    delete pkt;
413    memReq[tid] = NULL;
414}
415
416template <class Impl>
417void
418DefaultFetch<Impl>::drainResume()
419{
420    for (ThreadID i = 0; i < Impl::MaxThreads; ++i)
421        stalls[i].drain = false;
422}
423
424template <class Impl>
425void
426DefaultFetch<Impl>::drainSanityCheck() const
427{
428    assert(isDrained());
429    assert(retryPkt == NULL);
430    assert(retryTid == InvalidThreadID);
431    assert(!cacheBlocked);
432    assert(!interruptPending);
433
434    for (ThreadID i = 0; i < numThreads; ++i) {
435        assert(!memReq[i]);
436        assert(fetchStatus[i] == Idle || stalls[i].drain);
437    }
438
439    branchPred->drainSanityCheck();
440}
441
442template <class Impl>
443bool
444DefaultFetch<Impl>::isDrained() const
445{
446    /* Make sure that threads are either idle of that the commit stage
447     * has signaled that draining has completed by setting the drain
448     * stall flag. This effectively forces the pipeline to be disabled
449     * until the whole system is drained (simulation may continue to
450     * drain other components).
451     */
452    for (ThreadID i = 0; i < numThreads; ++i) {
453        if (!(fetchStatus[i] == Idle ||
454              (fetchStatus[i] == Blocked && stalls[i].drain)))
455            return false;
456    }
457
458    // Not drained if fetch queue contains entries
459    if (!fetchQueue.empty())
460        return false;
461
462    /* The pipeline might start up again in the middle of the drain
463     * cycle if the finish translation event is scheduled, so make
464     * sure that's not the case.
465     */
466    return !finishTranslationEvent.scheduled();
467}
468
469template <class Impl>
470void
471DefaultFetch<Impl>::takeOverFrom()
472{
473    assert(cpu->getInstPort().isConnected());
474    resetStage();
475
476}
477
478template <class Impl>
479void
480DefaultFetch<Impl>::drainStall(ThreadID tid)
481{
482    assert(cpu->isDraining());
483    assert(!stalls[tid].drain);
484    DPRINTF(Drain, "%i: Thread drained.\n", tid);
485    stalls[tid].drain = true;
486}
487
488template <class Impl>
489void
490DefaultFetch<Impl>::wakeFromQuiesce()
491{
492    DPRINTF(Fetch, "Waking up from quiesce\n");
493    // Hopefully this is safe
494    // @todo: Allow other threads to wake from quiesce.
495    fetchStatus[0] = Running;
496}
497
498template <class Impl>
499inline void
500DefaultFetch<Impl>::switchToActive()
501{
502    if (_status == Inactive) {
503        DPRINTF(Activity, "Activating stage.\n");
504
505        cpu->activateStage(O3CPU::FetchIdx);
506
507        _status = Active;
508    }
509}
510
511template <class Impl>
512inline void
513DefaultFetch<Impl>::switchToInactive()
514{
515    if (_status == Active) {
516        DPRINTF(Activity, "Deactivating stage.\n");
517
518        cpu->deactivateStage(O3CPU::FetchIdx);
519
520        _status = Inactive;
521    }
522}
523
524template <class Impl>
525bool
526DefaultFetch<Impl>::lookupAndUpdateNextPC(
527        DynInstPtr &inst, TheISA::PCState &nextPC)
528{
529    // Do branch prediction check here.
530    // A bit of a misnomer...next_PC is actually the current PC until
531    // this function updates it.
532    bool predict_taken;
533
534    if (!inst->isControl()) {
535        TheISA::advancePC(nextPC, inst->staticInst);
536        inst->setPredTarg(nextPC);
537        inst->setPredTaken(false);
538        return false;
539    }
540
541    ThreadID tid = inst->threadNumber;
542    predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
543                                        nextPC, tid);
544
545    if (predict_taken) {
546        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
547                tid, inst->seqNum, nextPC);
548    } else {
549        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
550                tid, inst->seqNum);
551    }
552
553    DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
554            tid, inst->seqNum, nextPC);
555    inst->setPredTarg(nextPC);
556    inst->setPredTaken(predict_taken);
557
558    ++fetchedBranches;
559
560    if (predict_taken) {
561        ++predictedBranches;
562    }
563
564    return predict_taken;
565}
566
567template <class Impl>
568bool
569DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
570{
571    Fault fault = NoFault;
572
573    assert(!cpu->switchedOut());
574
575    // @todo: not sure if these should block translation.
576    //AlphaDep
577    if (cacheBlocked) {
578        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
579                tid);
580        return false;
581    } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
582        // Hold off fetch from getting new instructions when:
583        // Cache is blocked, or
584        // while an interrupt is pending and we're not in PAL mode, or
585        // fetch is switched out.
586        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
587                tid);
588        return false;
589    }
590
591    // Align the fetch address to the start of a fetch buffer segment.
592    Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
593
594    DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
595            tid, fetchBufferBlockPC, vaddr);
596
597    // Setup the memReq to do a read of the first instruction's address.
598    // Set the appropriate read size and flags as well.
599    // Build request here.
600    RequestPtr mem_req =
601        new Request(tid, fetchBufferBlockPC, fetchBufferSize,
602                    Request::INST_FETCH, cpu->instMasterId(), pc,
603                    cpu->thread[tid]->contextId(), tid);
604
605    mem_req->taskId(cpu->taskId());
606
607    memReq[tid] = mem_req;
608
609    // Initiate translation of the icache block
610    fetchStatus[tid] = ItlbWait;
611    FetchTranslation *trans = new FetchTranslation(this);
612    cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
613                              trans, BaseTLB::Execute);
614    return true;
615}
616
617template <class Impl>
618void
619DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
620{
621    ThreadID tid = mem_req->threadId();
622    Addr fetchBufferBlockPC = mem_req->getVaddr();
623
624    assert(!cpu->switchedOut());
625
626    // Wake up CPU if it was idle
627    cpu->wakeCPU();
628
629    if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
630        mem_req->getVaddr() != memReq[tid]->getVaddr()) {
631        DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
632                tid);
633        ++fetchTlbSquashes;
634        delete mem_req;
635        return;
636    }
637
638
639    // If translation was successful, attempt to read the icache block.
640    if (fault == NoFault) {
641        // Check that we're not going off into random memory
642        // If we have, just wait around for commit to squash something and put
643        // us on the right track
644        if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
645            warn("Address %#x is outside of physical memory, stopping fetch\n",
646                    mem_req->getPaddr());
647            fetchStatus[tid] = NoGoodAddr;
648            delete mem_req;
649            memReq[tid] = NULL;
650            return;
651        }
652
653        // Build packet here.
654        PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
655        data_pkt->dataDynamicArray(new uint8_t[fetchBufferSize]);
656
657        fetchBufferPC[tid] = fetchBufferBlockPC;
658        fetchBufferValid[tid] = false;
659        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
660
661        fetchedCacheLines++;
662
663        // Access the cache.
664        if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
665            assert(retryPkt == NULL);
666            assert(retryTid == InvalidThreadID);
667            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
668
669            fetchStatus[tid] = IcacheWaitRetry;
670            retryPkt = data_pkt;
671            retryTid = tid;
672            cacheBlocked = true;
673        } else {
674            DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
675            DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
676                    "response.\n", tid);
677            lastIcacheStall[tid] = curTick();
678            fetchStatus[tid] = IcacheWaitResponse;
679        }
680    } else {
681        // Don't send an instruction to decode if we can't handle it.
682        if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) {
683            assert(!finishTranslationEvent.scheduled());
684            finishTranslationEvent.setFault(fault);
685            finishTranslationEvent.setReq(mem_req);
686            cpu->schedule(finishTranslationEvent,
687                          cpu->clockEdge(Cycles(1)));
688            return;
689        }
690        DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
691                tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
692        // Translation faulted, icache request won't be sent.
693        delete mem_req;
694        memReq[tid] = NULL;
695
696        // Send the fault to commit.  This thread will not do anything
697        // until commit handles the fault.  The only other way it can
698        // wake up is if a squash comes along and changes the PC.
699        TheISA::PCState fetchPC = pc[tid];
700
701        DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
702        // We will use a nop in ordier to carry the fault.
703        DynInstPtr instruction = buildInst(tid,
704                decoder[tid]->decode(TheISA::NoopMachInst, fetchPC.instAddr()),
705                NULL, fetchPC, fetchPC, false);
706
707        instruction->setPredTarg(fetchPC);
708        instruction->fault = fault;
709        wroteToTimeBuffer = true;
710
711        DPRINTF(Activity, "Activity this cycle.\n");
712        cpu->activityThisCycle();
713
714        fetchStatus[tid] = TrapPending;
715
716        DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
717        DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
718                tid, fault->name(), pc[tid]);
719    }
720    _status = updateFetchStatus();
721}
722
723template <class Impl>
724inline void
725DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
726                             const DynInstPtr squashInst, ThreadID tid)
727{
728    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
729            tid, newPC);
730
731    pc[tid] = newPC;
732    fetchOffset[tid] = 0;
733    if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
734        macroop[tid] = squashInst->macroop;
735    else
736        macroop[tid] = NULL;
737    decoder[tid]->reset();
738
739    // Clear the icache miss if it's outstanding.
740    if (fetchStatus[tid] == IcacheWaitResponse) {
741        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
742                tid);
743        memReq[tid] = NULL;
744    } else if (fetchStatus[tid] == ItlbWait) {
745        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
746                tid);
747        memReq[tid] = NULL;
748    }
749
750    // Get rid of the retrying packet if it was from this thread.
751    if (retryTid == tid) {
752        assert(cacheBlocked);
753        if (retryPkt) {
754            delete retryPkt->req;
755            delete retryPkt;
756        }
757        retryPkt = NULL;
758        retryTid = InvalidThreadID;
759    }
760
761    fetchStatus[tid] = Squashing;
762
763    // Empty fetch queue
764    auto inst_itr = fetchQueue.begin();
765    while (inst_itr != fetchQueue.end()) {
766        if ((*inst_itr)->threadNumber == tid)
767            inst_itr = fetchQueue.erase(inst_itr);
768         else
769            ++inst_itr;
770    }
771
772    // microops are being squashed, it is not known wheather the
773    // youngest non-squashed microop was  marked delayed commit
774    // or not. Setting the flag to true ensures that the
775    // interrupts are not handled when they cannot be, though
776    // some opportunities to handle interrupts may be missed.
777    delayedCommit[tid] = true;
778
779    ++fetchSquashCycles;
780}
781
782template<class Impl>
783void
784DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
785                                     const DynInstPtr squashInst,
786                                     const InstSeqNum seq_num, ThreadID tid)
787{
788    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
789
790    doSquash(newPC, squashInst, tid);
791
792    // Tell the CPU to remove any instructions that are in flight between
793    // fetch and decode.
794    cpu->removeInstsUntil(seq_num, tid);
795}
796
797template<class Impl>
798bool
799DefaultFetch<Impl>::checkStall(ThreadID tid) const
800{
801    bool ret_val = false;
802
803    if (cpu->contextSwitch) {
804        DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
805        ret_val = true;
806    } else if (stalls[tid].drain) {
807        assert(cpu->isDraining());
808        DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
809        ret_val = true;
810    }
811
812    return ret_val;
813}
814
815template<class Impl>
816typename DefaultFetch<Impl>::FetchStatus
817DefaultFetch<Impl>::updateFetchStatus()
818{
819    //Check Running
820    list<ThreadID>::iterator threads = activeThreads->begin();
821    list<ThreadID>::iterator end = activeThreads->end();
822
823    while (threads != end) {
824        ThreadID tid = *threads++;
825
826        if (fetchStatus[tid] == Running ||
827            fetchStatus[tid] == Squashing ||
828            fetchStatus[tid] == IcacheAccessComplete) {
829
830            if (_status == Inactive) {
831                DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
832
833                if (fetchStatus[tid] == IcacheAccessComplete) {
834                    DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
835                            "completion\n",tid);
836                }
837
838                cpu->activateStage(O3CPU::FetchIdx);
839            }
840
841            return Active;
842        }
843    }
844
845    // Stage is switching from active to inactive, notify CPU of it.
846    if (_status == Active) {
847        DPRINTF(Activity, "Deactivating stage.\n");
848
849        cpu->deactivateStage(O3CPU::FetchIdx);
850    }
851
852    return Inactive;
853}
854
855template <class Impl>
856void
857DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
858                           const InstSeqNum seq_num, DynInstPtr squashInst,
859                           ThreadID tid)
860{
861    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
862
863    doSquash(newPC, squashInst, tid);
864
865    // Tell the CPU to remove any instructions that are not in the ROB.
866    cpu->removeInstsNotInROB(tid);
867}
868
869template <class Impl>
870void
871DefaultFetch<Impl>::tick()
872{
873    list<ThreadID>::iterator threads = activeThreads->begin();
874    list<ThreadID>::iterator end = activeThreads->end();
875    bool status_change = false;
876
877    wroteToTimeBuffer = false;
878
879    for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
880        issuePipelinedIfetch[i] = false;
881    }
882
883    while (threads != end) {
884        ThreadID tid = *threads++;
885
886        // Check the signals for each thread to determine the proper status
887        // for each thread.
888        bool updated_status = checkSignalsAndUpdate(tid);
889        status_change =  status_change || updated_status;
890    }
891
892    DPRINTF(Fetch, "Running stage.\n");
893
894    if (FullSystem) {
895        if (fromCommit->commitInfo[0].interruptPending) {
896            interruptPending = true;
897        }
898
899        if (fromCommit->commitInfo[0].clearInterrupt) {
900            interruptPending = false;
901        }
902    }
903
904    for (threadFetched = 0; threadFetched < numFetchingThreads;
905         threadFetched++) {
906        // Fetch each of the actively fetching threads.
907        fetch(status_change);
908    }
909
910    // Record number of instructions fetched this cycle for distribution.
911    fetchNisnDist.sample(numInst);
912
913    if (status_change) {
914        // Change the fetch stage status if there was a status change.
915        _status = updateFetchStatus();
916    }
917
918    // If there was activity this cycle, inform the CPU of it.
919    if (wroteToTimeBuffer || cpu->contextSwitch) {
920        DPRINTF(Activity, "Activity this cycle.\n");
921
922        cpu->activityThisCycle();
923    }
924
925    // Issue the next I-cache request if possible.
926    for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
927        if (issuePipelinedIfetch[i]) {
928            pipelineIcacheAccesses(i);
929        }
930    }
931
932    // Send instructions enqueued into the fetch queue to decode.
933    // Limit rate by fetchWidth.  Stall if decode is stalled.
934    unsigned instsToDecode = 0;
935    while(!fetchQueue.empty() &&
936          instsToDecode < decodeWidth &&
937          !stalls[fetchQueue.front()->threadNumber].decode) {
938        auto inst = fetchQueue.front();
939        toDecode->insts[toDecode->size++] = inst;
940        DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
941                "fetch queue. Fetch queue size: %i.\n",
942                inst->threadNumber, inst->seqNum, fetchQueue.size());
943        fetchQueue.pop_front();
944        instsToDecode++;
945    }
946
947    // Reset the number of the instruction we've fetched.
948    numInst = 0;
949}
950
951template <class Impl>
952bool
953DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
954{
955    // Update the per thread stall statuses.
956    if (fromDecode->decodeBlock[tid]) {
957        stalls[tid].decode = true;
958    }
959
960    if (fromDecode->decodeUnblock[tid]) {
961        assert(stalls[tid].decode);
962        assert(!fromDecode->decodeBlock[tid]);
963        stalls[tid].decode = false;
964    }
965
966    // Check squash signals from commit.
967    if (fromCommit->commitInfo[tid].squash) {
968
969        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
970                "from commit.\n",tid);
971        // In any case, squash.
972        squash(fromCommit->commitInfo[tid].pc,
973               fromCommit->commitInfo[tid].doneSeqNum,
974               fromCommit->commitInfo[tid].squashInst, tid);
975
976        // If it was a branch mispredict on a control instruction, update the
977        // branch predictor with that instruction, otherwise just kill the
978        // invalid state we generated in after sequence number
979        if (fromCommit->commitInfo[tid].mispredictInst &&
980            fromCommit->commitInfo[tid].mispredictInst->isControl()) {
981            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
982                              fromCommit->commitInfo[tid].pc,
983                              fromCommit->commitInfo[tid].branchTaken,
984                              tid);
985        } else {
986            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
987                              tid);
988        }
989
990        return true;
991    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
992        // Update the branch predictor if it wasn't a squashed instruction
993        // that was broadcasted.
994        branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
995    }
996
997    // Check squash signals from decode.
998    if (fromDecode->decodeInfo[tid].squash) {
999        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1000                "from decode.\n",tid);
1001
1002        // Update the branch predictor.
1003        if (fromDecode->decodeInfo[tid].branchMispredict) {
1004            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1005                              fromDecode->decodeInfo[tid].nextPC,
1006                              fromDecode->decodeInfo[tid].branchTaken,
1007                              tid);
1008        } else {
1009            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1010                              tid);
1011        }
1012
1013        if (fetchStatus[tid] != Squashing) {
1014
1015            DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1016                fromDecode->decodeInfo[tid].nextPC);
1017            // Squash unless we're already squashing
1018            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1019                             fromDecode->decodeInfo[tid].squashInst,
1020                             fromDecode->decodeInfo[tid].doneSeqNum,
1021                             tid);
1022
1023            return true;
1024        }
1025    }
1026
1027    if (checkStall(tid) &&
1028        fetchStatus[tid] != IcacheWaitResponse &&
1029        fetchStatus[tid] != IcacheWaitRetry &&
1030        fetchStatus[tid] != ItlbWait &&
1031        fetchStatus[tid] != QuiescePending) {
1032        DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1033
1034        fetchStatus[tid] = Blocked;
1035
1036        return true;
1037    }
1038
1039    if (fetchStatus[tid] == Blocked ||
1040        fetchStatus[tid] == Squashing) {
1041        // Switch status to running if fetch isn't being told to block or
1042        // squash this cycle.
1043        DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1044                tid);
1045
1046        fetchStatus[tid] = Running;
1047
1048        return true;
1049    }
1050
1051    // If we've reached this point, we have not gotten any signals that
1052    // cause fetch to change its status.  Fetch remains the same as before.
1053    return false;
1054}
1055
1056template<class Impl>
1057typename Impl::DynInstPtr
1058DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1059                              StaticInstPtr curMacroop, TheISA::PCState thisPC,
1060                              TheISA::PCState nextPC, bool trace)
1061{
1062    // Get a sequence number.
1063    InstSeqNum seq = cpu->getAndIncrementInstSeq();
1064
1065    // Create a new DynInst from the instruction fetched.
1066    DynInstPtr instruction =
1067        new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1068    instruction->setTid(tid);
1069
1070    instruction->setASID(tid);
1071
1072    instruction->setThreadState(cpu->thread[tid]);
1073
1074    DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1075            "[sn:%lli].\n", tid, thisPC.instAddr(),
1076            thisPC.microPC(), seq);
1077
1078    DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1079            instruction->staticInst->
1080            disassemble(thisPC.instAddr()));
1081
1082#if TRACING_ON
1083    if (trace) {
1084        instruction->traceData =
1085            cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1086                    instruction->staticInst, thisPC, curMacroop);
1087    }
1088#else
1089    instruction->traceData = NULL;
1090#endif
1091
1092    // Add instruction to the CPU's list of instructions.
1093    instruction->setInstListIt(cpu->addInst(instruction));
1094
1095    // Write the instruction to the first slot in the queue
1096    // that heads to decode.
1097    assert(numInst < fetchWidth);
1098    fetchQueue.push_back(instruction);
1099    assert(fetchQueue.size() <= fetchQueueSize);
1100    DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1101            tid, fetchQueue.size(), fetchQueueSize);
1102    //toDecode->insts[toDecode->size++] = instruction;
1103
1104    // Keep track of if we can take an interrupt at this boundary
1105    delayedCommit[tid] = instruction->isDelayedCommit();
1106
1107    return instruction;
1108}
1109
1110template<class Impl>
1111void
1112DefaultFetch<Impl>::fetch(bool &status_change)
1113{
1114    //////////////////////////////////////////
1115    // Start actual fetch
1116    //////////////////////////////////////////
1117    ThreadID tid = getFetchingThread(fetchPolicy);
1118
1119    assert(!cpu->switchedOut());
1120
1121    if (tid == InvalidThreadID) {
1122        // Breaks looping condition in tick()
1123        threadFetched = numFetchingThreads;
1124
1125        if (numThreads == 1) {  // @todo Per-thread stats
1126            profileStall(0);
1127        }
1128
1129        return;
1130    }
1131
1132    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1133
1134    // The current PC.
1135    TheISA::PCState thisPC = pc[tid];
1136
1137    Addr pcOffset = fetchOffset[tid];
1138    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1139
1140    bool inRom = isRomMicroPC(thisPC.microPC());
1141
1142    // If returning from the delay of a cache miss, then update the status
1143    // to running, otherwise do the cache access.  Possibly move this up
1144    // to tick() function.
1145    if (fetchStatus[tid] == IcacheAccessComplete) {
1146        DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1147
1148        fetchStatus[tid] = Running;
1149        status_change = true;
1150    } else if (fetchStatus[tid] == Running) {
1151        // Align the fetch PC so its at the start of a fetch buffer segment.
1152        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1153
1154        // If buffer is no longer valid or fetchAddr has moved to point
1155        // to the next cache block, AND we have no remaining ucode
1156        // from a macro-op, then start fetch from icache.
1157        if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1158            && !inRom && !macroop[tid]) {
1159            DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1160                    "instruction, starting at PC %s.\n", tid, thisPC);
1161
1162            fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1163
1164            if (fetchStatus[tid] == IcacheWaitResponse)
1165                ++icacheStallCycles;
1166            else if (fetchStatus[tid] == ItlbWait)
1167                ++fetchTlbCycles;
1168            else
1169                ++fetchMiscStallCycles;
1170            return;
1171        } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1172            // Stall CPU if an interrupt is posted and we're not issuing
1173            // an delayed commit micro-op currently (delayed commit instructions
1174            // are not interruptable by interrupts, only faults)
1175            ++fetchMiscStallCycles;
1176            DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1177            return;
1178        }
1179    } else {
1180        if (fetchStatus[tid] == Idle) {
1181            ++fetchIdleCycles;
1182            DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1183        }
1184
1185        // Status is Idle, so fetch should do nothing.
1186        return;
1187    }
1188
1189    ++fetchCycles;
1190
1191    TheISA::PCState nextPC = thisPC;
1192
1193    StaticInstPtr staticInst = NULL;
1194    StaticInstPtr curMacroop = macroop[tid];
1195
1196    // If the read of the first instruction was successful, then grab the
1197    // instructions from the rest of the cache line and put them into the
1198    // queue heading to decode.
1199
1200    DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1201            "decode.\n", tid);
1202
1203    // Need to keep track of whether or not a predicted branch
1204    // ended this fetch block.
1205    bool predictedBranch = false;
1206
1207    TheISA::MachInst *cacheInsts =
1208        reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1209
1210    const unsigned numInsts = fetchBufferSize / instSize;
1211    unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1212
1213    // Loop through instruction memory from the cache.
1214    // Keep issuing while fetchWidth is available and branch is not
1215    // predicted taken
1216    while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize
1217           && !predictedBranch) {
1218        // We need to process more memory if we aren't going to get a
1219        // StaticInst from the rom, the current macroop, or what's already
1220        // in the decoder.
1221        bool needMem = !inRom && !curMacroop &&
1222            !decoder[tid]->instReady();
1223        fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1224        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1225
1226        if (needMem) {
1227            // If buffer is no longer valid or fetchAddr has moved to point
1228            // to the next cache block then start fetch from icache.
1229            if (!fetchBufferValid[tid] ||
1230                fetchBufferBlockPC != fetchBufferPC[tid])
1231                break;
1232
1233            if (blkOffset >= numInsts) {
1234                // We need to process more memory, but we've run out of the
1235                // current block.
1236                break;
1237            }
1238
1239            if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
1240                // Walk past any annulled delay slot instructions.
1241                Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
1242                while (fetchAddr != pcAddr && blkOffset < numInsts) {
1243                    blkOffset++;
1244                    fetchAddr += instSize;
1245                }
1246                if (blkOffset >= numInsts)
1247                    break;
1248            }
1249
1250            MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1251            decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1252
1253            if (decoder[tid]->needMoreBytes()) {
1254                blkOffset++;
1255                fetchAddr += instSize;
1256                pcOffset += instSize;
1257            }
1258        }
1259
1260        // Extract as many instructions and/or microops as we can from
1261        // the memory we've processed so far.
1262        do {
1263            if (!(curMacroop || inRom)) {
1264                if (decoder[tid]->instReady()) {
1265                    staticInst = decoder[tid]->decode(thisPC);
1266
1267                    // Increment stat of fetched instructions.
1268                    ++fetchedInsts;
1269
1270                    if (staticInst->isMacroop()) {
1271                        curMacroop = staticInst;
1272                    } else {
1273                        pcOffset = 0;
1274                    }
1275                } else {
1276                    // We need more bytes for this instruction so blkOffset and
1277                    // pcOffset will be updated
1278                    break;
1279                }
1280            }
1281            // Whether we're moving to a new macroop because we're at the
1282            // end of the current one, or the branch predictor incorrectly
1283            // thinks we are...
1284            bool newMacro = false;
1285            if (curMacroop || inRom) {
1286                if (inRom) {
1287                    staticInst = cpu->microcodeRom.fetchMicroop(
1288                            thisPC.microPC(), curMacroop);
1289                } else {
1290                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1291                }
1292                newMacro |= staticInst->isLastMicroop();
1293            }
1294
1295            DynInstPtr instruction =
1296                buildInst(tid, staticInst, curMacroop,
1297                          thisPC, nextPC, true);
1298
1299            ppFetch->notify(instruction);
1300            numInst++;
1301
1302#if TRACING_ON
1303            if (DTRACE(O3PipeView)) {
1304                instruction->fetchTick = curTick();
1305            }
1306#endif
1307
1308            nextPC = thisPC;
1309
1310            // If we're branching after this instruction, quit fetching
1311            // from the same block.
1312            predictedBranch |= thisPC.branching();
1313            predictedBranch |=
1314                lookupAndUpdateNextPC(instruction, nextPC);
1315            if (predictedBranch) {
1316                DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1317            }
1318
1319            newMacro |= thisPC.instAddr() != nextPC.instAddr();
1320
1321            // Move to the next instruction, unless we have a branch.
1322            thisPC = nextPC;
1323            inRom = isRomMicroPC(thisPC.microPC());
1324
1325            if (newMacro) {
1326                fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1327                blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1328                pcOffset = 0;
1329                curMacroop = NULL;
1330            }
1331
1332            if (instruction->isQuiesce()) {
1333                DPRINTF(Fetch,
1334                        "Quiesce instruction encountered, halting fetch!");
1335                fetchStatus[tid] = QuiescePending;
1336                status_change = true;
1337                break;
1338            }
1339        } while ((curMacroop || decoder[tid]->instReady()) &&
1340                 numInst < fetchWidth && fetchQueue.size() < fetchQueueSize);
1341    }
1342
1343    if (predictedBranch) {
1344        DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1345                "instruction encountered.\n", tid);
1346    } else if (numInst >= fetchWidth) {
1347        DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1348                "for this cycle.\n", tid);
1349    } else if (blkOffset >= fetchBufferSize) {
1350        DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1351                "fetch buffer.\n", tid);
1352    }
1353
1354    macroop[tid] = curMacroop;
1355    fetchOffset[tid] = pcOffset;
1356
1357    if (numInst > 0) {
1358        wroteToTimeBuffer = true;
1359    }
1360
1361    pc[tid] = thisPC;
1362
1363    // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1364    // a state that would preclude fetching
1365    fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1366    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1367    issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1368        fetchStatus[tid] != IcacheWaitResponse &&
1369        fetchStatus[tid] != ItlbWait &&
1370        fetchStatus[tid] != IcacheWaitRetry &&
1371        fetchStatus[tid] != QuiescePending &&
1372        !curMacroop;
1373}
1374
1375template<class Impl>
1376void
1377DefaultFetch<Impl>::recvRetry()
1378{
1379    if (retryPkt != NULL) {
1380        assert(cacheBlocked);
1381        assert(retryTid != InvalidThreadID);
1382        assert(fetchStatus[retryTid] == IcacheWaitRetry);
1383
1384        if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1385            fetchStatus[retryTid] = IcacheWaitResponse;
1386            retryPkt = NULL;
1387            retryTid = InvalidThreadID;
1388            cacheBlocked = false;
1389        }
1390    } else {
1391        assert(retryTid == InvalidThreadID);
1392        // Access has been squashed since it was sent out.  Just clear
1393        // the cache being blocked.
1394        cacheBlocked = false;
1395    }
1396}
1397
1398///////////////////////////////////////
1399//                                   //
1400//  SMT FETCH POLICY MAINTAINED HERE //
1401//                                   //
1402///////////////////////////////////////
1403template<class Impl>
1404ThreadID
1405DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1406{
1407    if (numThreads > 1) {
1408        switch (fetch_priority) {
1409
1410          case SingleThread:
1411            return 0;
1412
1413          case RoundRobin:
1414            return roundRobin();
1415
1416          case IQ:
1417            return iqCount();
1418
1419          case LSQ:
1420            return lsqCount();
1421
1422          case Branch:
1423            return branchCount();
1424
1425          default:
1426            return InvalidThreadID;
1427        }
1428    } else {
1429        list<ThreadID>::iterator thread = activeThreads->begin();
1430        if (thread == activeThreads->end()) {
1431            return InvalidThreadID;
1432        }
1433
1434        ThreadID tid = *thread;
1435
1436        if (fetchStatus[tid] == Running ||
1437            fetchStatus[tid] == IcacheAccessComplete ||
1438            fetchStatus[tid] == Idle) {
1439            return tid;
1440        } else {
1441            return InvalidThreadID;
1442        }
1443    }
1444}
1445
1446
1447template<class Impl>
1448ThreadID
1449DefaultFetch<Impl>::roundRobin()
1450{
1451    list<ThreadID>::iterator pri_iter = priorityList.begin();
1452    list<ThreadID>::iterator end      = priorityList.end();
1453
1454    ThreadID high_pri;
1455
1456    while (pri_iter != end) {
1457        high_pri = *pri_iter;
1458
1459        assert(high_pri <= numThreads);
1460
1461        if (fetchStatus[high_pri] == Running ||
1462            fetchStatus[high_pri] == IcacheAccessComplete ||
1463            fetchStatus[high_pri] == Idle) {
1464
1465            priorityList.erase(pri_iter);
1466            priorityList.push_back(high_pri);
1467
1468            return high_pri;
1469        }
1470
1471        pri_iter++;
1472    }
1473
1474    return InvalidThreadID;
1475}
1476
1477template<class Impl>
1478ThreadID
1479DefaultFetch<Impl>::iqCount()
1480{
1481    std::priority_queue<unsigned> PQ;
1482    std::map<unsigned, ThreadID> threadMap;
1483
1484    list<ThreadID>::iterator threads = activeThreads->begin();
1485    list<ThreadID>::iterator end = activeThreads->end();
1486
1487    while (threads != end) {
1488        ThreadID tid = *threads++;
1489        unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1490
1491        PQ.push(iqCount);
1492        threadMap[iqCount] = tid;
1493    }
1494
1495    while (!PQ.empty()) {
1496        ThreadID high_pri = threadMap[PQ.top()];
1497
1498        if (fetchStatus[high_pri] == Running ||
1499            fetchStatus[high_pri] == IcacheAccessComplete ||
1500            fetchStatus[high_pri] == Idle)
1501            return high_pri;
1502        else
1503            PQ.pop();
1504
1505    }
1506
1507    return InvalidThreadID;
1508}
1509
1510template<class Impl>
1511ThreadID
1512DefaultFetch<Impl>::lsqCount()
1513{
1514    std::priority_queue<unsigned> PQ;
1515    std::map<unsigned, ThreadID> threadMap;
1516
1517    list<ThreadID>::iterator threads = activeThreads->begin();
1518    list<ThreadID>::iterator end = activeThreads->end();
1519
1520    while (threads != end) {
1521        ThreadID tid = *threads++;
1522        unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1523
1524        PQ.push(ldstqCount);
1525        threadMap[ldstqCount] = tid;
1526    }
1527
1528    while (!PQ.empty()) {
1529        ThreadID high_pri = threadMap[PQ.top()];
1530
1531        if (fetchStatus[high_pri] == Running ||
1532            fetchStatus[high_pri] == IcacheAccessComplete ||
1533            fetchStatus[high_pri] == Idle)
1534            return high_pri;
1535        else
1536            PQ.pop();
1537    }
1538
1539    return InvalidThreadID;
1540}
1541
1542template<class Impl>
1543ThreadID
1544DefaultFetch<Impl>::branchCount()
1545{
1546#if 0
1547    list<ThreadID>::iterator thread = activeThreads->begin();
1548    assert(thread != activeThreads->end());
1549    ThreadID tid = *thread;
1550#endif
1551
1552    panic("Branch Count Fetch policy unimplemented\n");
1553    return InvalidThreadID;
1554}
1555
1556template<class Impl>
1557void
1558DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1559{
1560    if (!issuePipelinedIfetch[tid]) {
1561        return;
1562    }
1563
1564    // The next PC to access.
1565    TheISA::PCState thisPC = pc[tid];
1566
1567    if (isRomMicroPC(thisPC.microPC())) {
1568        return;
1569    }
1570
1571    Addr pcOffset = fetchOffset[tid];
1572    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1573
1574    // Align the fetch PC so its at the start of a fetch buffer segment.
1575    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1576
1577    // Unless buffer already got the block, fetch it from icache.
1578    if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1579        DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1580                "starting at PC %s.\n", tid, thisPC);
1581
1582        fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1583    }
1584}
1585
1586template<class Impl>
1587void
1588DefaultFetch<Impl>::profileStall(ThreadID tid) {
1589    DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1590
1591    // @todo Per-thread stats
1592
1593    if (stalls[tid].drain) {
1594        ++fetchPendingDrainCycles;
1595        DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1596    } else if (activeThreads->empty()) {
1597        ++fetchNoActiveThreadStallCycles;
1598        DPRINTF(Fetch, "Fetch has no active thread!\n");
1599    } else if (fetchStatus[tid] == Blocked) {
1600        ++fetchBlockedCycles;
1601        DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1602    } else if (fetchStatus[tid] == Squashing) {
1603        ++fetchSquashCycles;
1604        DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1605    } else if (fetchStatus[tid] == IcacheWaitResponse) {
1606        ++icacheStallCycles;
1607        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1608                tid);
1609    } else if (fetchStatus[tid] == ItlbWait) {
1610        ++fetchTlbCycles;
1611        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1612                "finish!\n", tid);
1613    } else if (fetchStatus[tid] == TrapPending) {
1614        ++fetchPendingTrapStallCycles;
1615        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1616                tid);
1617    } else if (fetchStatus[tid] == QuiescePending) {
1618        ++fetchPendingQuiesceStallCycles;
1619        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1620                "instruction!\n", tid);
1621    } else if (fetchStatus[tid] == IcacheWaitRetry) {
1622        ++fetchIcacheWaitRetryStallCycles;
1623        DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1624                tid);
1625    } else if (fetchStatus[tid] == NoGoodAddr) {
1626            DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1627                    tid);
1628    } else {
1629        DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1630             tid, fetchStatus[tid]);
1631    }
1632}
1633
1634#endif//__CPU_O3_FETCH_IMPL_HH__
1635