1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32#include <algorithm>
33#include <cstring>
34
32#include "config/use_checker.hh"
33
34#include "arch/isa_traits.hh"
35#include "arch/utility.hh"
36#include "cpu/checker/cpu.hh"
37#include "cpu/exetrace.hh"
38#include "cpu/o3/fetch.hh"
39#include "mem/packet.hh"
40#include "mem/request.hh"
41#include "sim/byteswap.hh"
42#include "sim/host.hh"
43#include "sim/core.hh"
44
45#if FULL_SYSTEM
46#include "arch/tlb.hh"
47#include "arch/vtophys.hh"
48#include "sim/system.hh"
49#endif // FULL_SYSTEM
50
51#include <algorithm>
52
53template<class Impl>
54void
55DefaultFetch<Impl>::IcachePort::setPeer(Port *port)
56{
57 Port::setPeer(port);
58
59 fetch->setIcache();
60}
61
62template<class Impl>
63Tick
64DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
65{
66 panic("DefaultFetch doesn't expect recvAtomic callback!");
67 return curTick;
68}
69
70template<class Impl>
71void
72DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
73{
74 DPRINTF(Fetch, "DefaultFetch doesn't update its state from a "
75 "functional call.");
76}
77
78template<class Impl>
79void
80DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
81{
82 if (status == RangeChange) {
83 if (!snoopRangeSent) {
84 snoopRangeSent = true;
85 sendStatusChange(Port::RangeChange);
86 }
87 return;
88 }
89
90 panic("DefaultFetch doesn't expect recvStatusChange callback!");
91}
92
93template<class Impl>
94bool
95DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
96{
97 DPRINTF(Fetch, "Received timing\n");
98 if (pkt->isResponse()) {
99 fetch->processCacheCompletion(pkt);
100 }
101 //else Snooped a coherence request, just return
102 return true;
103}
104
105template<class Impl>
106void
107DefaultFetch<Impl>::IcachePort::recvRetry()
108{
109 fetch->recvRetry();
110}
111
112template<class Impl>
113DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, Params *params)
114 : cpu(_cpu),
115 branchPred(params),
116 predecoder(NULL),
117 decodeToFetchDelay(params->decodeToFetchDelay),
118 renameToFetchDelay(params->renameToFetchDelay),
119 iewToFetchDelay(params->iewToFetchDelay),
120 commitToFetchDelay(params->commitToFetchDelay),
121 fetchWidth(params->fetchWidth),
122 cacheBlocked(false),
123 retryPkt(NULL),
124 retryTid(-1),
125 numThreads(params->numberOfThreads),
126 numFetchingThreads(params->smtNumFetchingThreads),
127 interruptPending(false),
128 drainPending(false),
129 switchedOut(false)
130{
131 if (numThreads > Impl::MaxThreads)
132 fatal("numThreads is not a valid value\n");
133
134 // Set fetch stage's status to inactive.
135 _status = Inactive;
136
137 std::string policy = params->smtFetchPolicy;
138
139 // Convert string to lowercase
140 std::transform(policy.begin(), policy.end(), policy.begin(),
141 (int(*)(int)) tolower);
142
143 // Figure out fetch policy
144 if (policy == "singlethread") {
145 fetchPolicy = SingleThread;
146 if (numThreads > 1)
147 panic("Invalid Fetch Policy for a SMT workload.");
148 } else if (policy == "roundrobin") {
149 fetchPolicy = RoundRobin;
150 DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
151 } else if (policy == "branch") {
152 fetchPolicy = Branch;
153 DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
154 } else if (policy == "iqcount") {
155 fetchPolicy = IQ;
156 DPRINTF(Fetch, "Fetch policy set to IQ count\n");
157 } else if (policy == "lsqcount") {
158 fetchPolicy = LSQ;
159 DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
160 } else {
161 fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
162 " RoundRobin,LSQcount,IQcount}\n");
163 }
164
165 // Get the size of an instruction.
166 instSize = sizeof(TheISA::MachInst);
167
168 // Name is finally available, so create the port.
169 icachePort = new IcachePort(this);
170
171 icachePort->snoopRangeSent = false;
172
173#if USE_CHECKER
174 if (cpu->checker) {
175 cpu->checker->setIcachePort(icachePort);
176 }
177#endif
178}
179
180template <class Impl>
181std::string
182DefaultFetch<Impl>::name() const
183{
184 return cpu->name() + ".fetch";
185}
186
187template <class Impl>
188void
189DefaultFetch<Impl>::regStats()
190{
191 icacheStallCycles
192 .name(name() + ".icacheStallCycles")
193 .desc("Number of cycles fetch is stalled on an Icache miss")
194 .prereq(icacheStallCycles);
195
196 fetchedInsts
197 .name(name() + ".Insts")
198 .desc("Number of instructions fetch has processed")
199 .prereq(fetchedInsts);
200
201 fetchedBranches
202 .name(name() + ".Branches")
203 .desc("Number of branches that fetch encountered")
204 .prereq(fetchedBranches);
205
206 predictedBranches
207 .name(name() + ".predictedBranches")
208 .desc("Number of branches that fetch has predicted taken")
209 .prereq(predictedBranches);
210
211 fetchCycles
212 .name(name() + ".Cycles")
213 .desc("Number of cycles fetch has run and was not squashing or"
214 " blocked")
215 .prereq(fetchCycles);
216
217 fetchSquashCycles
218 .name(name() + ".SquashCycles")
219 .desc("Number of cycles fetch has spent squashing")
220 .prereq(fetchSquashCycles);
221
222 fetchIdleCycles
223 .name(name() + ".IdleCycles")
224 .desc("Number of cycles fetch was idle")
225 .prereq(fetchIdleCycles);
226
227 fetchBlockedCycles
228 .name(name() + ".BlockedCycles")
229 .desc("Number of cycles fetch has spent blocked")
230 .prereq(fetchBlockedCycles);
231
232 fetchedCacheLines
233 .name(name() + ".CacheLines")
234 .desc("Number of cache lines fetched")
235 .prereq(fetchedCacheLines);
236
237 fetchMiscStallCycles
238 .name(name() + ".MiscStallCycles")
239 .desc("Number of cycles fetch has spent waiting on interrupts, or "
240 "bad addresses, or out of MSHRs")
241 .prereq(fetchMiscStallCycles);
242
243 fetchIcacheSquashes
244 .name(name() + ".IcacheSquashes")
245 .desc("Number of outstanding Icache misses that were squashed")
246 .prereq(fetchIcacheSquashes);
247
248 fetchNisnDist
249 .init(/* base value */ 0,
250 /* last value */ fetchWidth,
251 /* bucket size */ 1)
252 .name(name() + ".rateDist")
253 .desc("Number of instructions fetched each cycle (Total)")
254 .flags(Stats::pdf);
255
256 idleRate
257 .name(name() + ".idleRate")
258 .desc("Percent of cycles fetch was idle")
259 .prereq(idleRate);
260 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
261
262 branchRate
263 .name(name() + ".branchRate")
264 .desc("Number of branch fetches per cycle")
265 .flags(Stats::total);
266 branchRate = fetchedBranches / cpu->numCycles;
267
268 fetchRate
269 .name(name() + ".rate")
270 .desc("Number of inst fetches per cycle")
271 .flags(Stats::total);
272 fetchRate = fetchedInsts / cpu->numCycles;
273
274 branchPred.regStats();
275}
276
277template<class Impl>
278void
279DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
280{
281 timeBuffer = time_buffer;
282
283 // Create wires to get information from proper places in time buffer.
284 fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
285 fromRename = timeBuffer->getWire(-renameToFetchDelay);
286 fromIEW = timeBuffer->getWire(-iewToFetchDelay);
287 fromCommit = timeBuffer->getWire(-commitToFetchDelay);
288}
289
290template<class Impl>
291void
292DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
293{
294 activeThreads = at_ptr;
295}
296
297template<class Impl>
298void
299DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
300{
301 fetchQueue = fq_ptr;
302
303 // Create wire to write information to proper place in fetch queue.
304 toDecode = fetchQueue->getWire(0);
305}
306
307template<class Impl>
308void
309DefaultFetch<Impl>::initStage()
310{
311 // Setup PC and nextPC with initial state.
312 for (int tid = 0; tid < numThreads; tid++) {
313 PC[tid] = cpu->readPC(tid);
314 nextPC[tid] = cpu->readNextPC(tid);
315 nextNPC[tid] = cpu->readNextNPC(tid);
316 }
317
318 for (int tid=0; tid < numThreads; tid++) {
319
320 fetchStatus[tid] = Running;
321
322 priorityList.push_back(tid);
323
324 memReq[tid] = NULL;
325
326 stalls[tid].decode = false;
327 stalls[tid].rename = false;
328 stalls[tid].iew = false;
329 stalls[tid].commit = false;
330 }
331
332 // Schedule fetch to get the correct PC from the CPU
333 // scheduleFetchStartupEvent(1);
334
335 // Fetch needs to start fetching instructions at the very beginning,
336 // so it must start up in active state.
337 switchToActive();
338}
339
340template<class Impl>
341void
342DefaultFetch<Impl>::setIcache()
343{
344 // Size of cache block.
345 cacheBlkSize = icachePort->peerBlockSize();
346
347 // Create mask to get rid of offset bits.
348 cacheBlkMask = (cacheBlkSize - 1);
349
350 for (int tid=0; tid < numThreads; tid++) {
351 // Create space to store a cache line.
352 cacheData[tid] = new uint8_t[cacheBlkSize];
353 cacheDataPC[tid] = 0;
354 cacheDataValid[tid] = false;
355 }
356}
357
358template<class Impl>
359void
360DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
361{
362 unsigned tid = pkt->req->getThreadNum();
363
364 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
365
366 // Only change the status if it's still waiting on the icache access
367 // to return.
368 if (fetchStatus[tid] != IcacheWaitResponse ||
369 pkt->req != memReq[tid] ||
370 isSwitchedOut()) {
371 ++fetchIcacheSquashes;
372 delete pkt->req;
373 delete pkt;
374 return;
375 }
376
378 memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize);
377 memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
378 cacheDataValid[tid] = true;
379
380 if (!drainPending) {
381 // Wake up the CPU (if it went to sleep and was waiting on
382 // this completion event).
383 cpu->wakeCPU();
384
385 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
386 tid);
387
388 switchToActive();
389 }
390
391 // Only switch to IcacheAccessComplete if we're not stalled as well.
392 if (checkStall(tid)) {
393 fetchStatus[tid] = Blocked;
394 } else {
395 fetchStatus[tid] = IcacheAccessComplete;
396 }
397
398 // Reset the mem req to NULL.
399 delete pkt->req;
400 delete pkt;
401 memReq[tid] = NULL;
402}
403
404template <class Impl>
405bool
406DefaultFetch<Impl>::drain()
407{
408 // Fetch is ready to drain at any time.
409 cpu->signalDrained();
410 drainPending = true;
411 return true;
412}
413
414template <class Impl>
415void
416DefaultFetch<Impl>::resume()
417{
418 drainPending = false;
419}
420
421template <class Impl>
422void
423DefaultFetch<Impl>::switchOut()
424{
425 switchedOut = true;
426 // Branch predictor needs to have its state cleared.
427 branchPred.switchOut();
428}
429
430template <class Impl>
431void
432DefaultFetch<Impl>::takeOverFrom()
433{
434 // Reset all state
435 for (int i = 0; i < Impl::MaxThreads; ++i) {
436 stalls[i].decode = 0;
437 stalls[i].rename = 0;
438 stalls[i].iew = 0;
439 stalls[i].commit = 0;
440 PC[i] = cpu->readPC(i);
441 nextPC[i] = cpu->readNextPC(i);
442#if ISA_HAS_DELAY_SLOT
443 nextNPC[i] = cpu->readNextNPC(i);
444#else
445 nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
446#endif
447 fetchStatus[i] = Running;
448 }
449 numInst = 0;
450 wroteToTimeBuffer = false;
451 _status = Inactive;
452 switchedOut = false;
453 interruptPending = false;
454 branchPred.takeOverFrom();
455}
456
457template <class Impl>
458void
459DefaultFetch<Impl>::wakeFromQuiesce()
460{
461 DPRINTF(Fetch, "Waking up from quiesce\n");
462 // Hopefully this is safe
463 // @todo: Allow other threads to wake from quiesce.
464 fetchStatus[0] = Running;
465}
466
467template <class Impl>
468inline void
469DefaultFetch<Impl>::switchToActive()
470{
471 if (_status == Inactive) {
472 DPRINTF(Activity, "Activating stage.\n");
473
474 cpu->activateStage(O3CPU::FetchIdx);
475
476 _status = Active;
477 }
478}
479
480template <class Impl>
481inline void
482DefaultFetch<Impl>::switchToInactive()
483{
484 if (_status == Active) {
485 DPRINTF(Activity, "Deactivating stage.\n");
486
487 cpu->deactivateStage(O3CPU::FetchIdx);
488
489 _status = Inactive;
490 }
491}
492
493template <class Impl>
494bool
495DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
496 Addr &next_NPC)
497{
498 // Do branch prediction check here.
499 // A bit of a misnomer...next_PC is actually the current PC until
500 // this function updates it.
501 bool predict_taken;
502
503 if (!inst->isControl()) {
504 next_PC = next_NPC;
505 next_NPC = next_NPC + instSize;
506 inst->setPredTarg(next_PC, next_NPC);
507 inst->setPredTaken(false);
508 return false;
509 }
510
511 int tid = inst->threadNumber;
512 Addr pred_PC = next_PC;
513 predict_taken = branchPred.predict(inst, pred_PC, tid);
514
515/* if (predict_taken) {
516 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
517 tid, pred_PC);
518 } else {
519 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
520 }*/
521
522#if ISA_HAS_DELAY_SLOT
523 next_PC = next_NPC;
524 if (predict_taken)
525 next_NPC = pred_PC;
526 else
527 next_NPC += instSize;
528#else
529 if (predict_taken)
530 next_PC = pred_PC;
531 else
532 next_PC += instSize;
533 next_NPC = next_PC + instSize;
534#endif
535/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
536 tid, next_PC, next_NPC);*/
537 inst->setPredTarg(next_PC, next_NPC);
538 inst->setPredTaken(predict_taken);
539
540 ++fetchedBranches;
541
542 if (predict_taken) {
543 ++predictedBranches;
544 }
545
546 return predict_taken;
547}
548
549template <class Impl>
550bool
551DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
552{
553 Fault fault = NoFault;
554
555 //AlphaDep
556 if (cacheBlocked) {
557 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
558 tid);
559 return false;
560 } else if (isSwitchedOut()) {
561 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n",
562 tid);
563 return false;
564 } else if (interruptPending && !(fetch_PC & 0x3)) {
565 // Hold off fetch from getting new instructions when:
566 // Cache is blocked, or
567 // while an interrupt is pending and we're not in PAL mode, or
568 // fetch is switched out.
569 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
570 tid);
571 return false;
572 }
573
574 // Align the fetch PC so it's at the start of a cache block.
575 Addr block_PC = icacheBlockAlignPC(fetch_PC);
576
577 // If we've already got the block, no need to try to fetch it again.
578 if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) {
579 return true;
580 }
581
582 // Setup the memReq to do a read of the first instruction's address.
583 // Set the appropriate read size and flags as well.
584 // Build request here.
585 RequestPtr mem_req = new Request(tid, block_PC, cacheBlkSize, 0,
586 fetch_PC, cpu->readCpuId(), tid);
587
588 memReq[tid] = mem_req;
589
590 // Translate the instruction request.
591 fault = cpu->translateInstReq(mem_req, cpu->thread[tid]);
592
593 // In the case of faults, the fetch stage may need to stall and wait
594 // for the ITB miss to be handled.
595
596 // If translation was successful, attempt to read the first
597 // instruction.
598 if (fault == NoFault) {
599#if 0
600 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
601 memReq[tid]->isUncacheable()) {
602 DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
603 "misspeculating path)!",
604 memReq[tid]->paddr);
605 ret_fault = TheISA::genMachineCheckFault();
606 return false;
607 }
608#endif
609
610 // Build packet here.
611 PacketPtr data_pkt = new Packet(mem_req,
612 MemCmd::ReadReq, Packet::Broadcast);
613 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
614
615 cacheDataPC[tid] = block_PC;
616 cacheDataValid[tid] = false;
617
618 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
619
620 fetchedCacheLines++;
621
622 // Now do the timing access to see whether or not the instruction
623 // exists within the cache.
624 if (!icachePort->sendTiming(data_pkt)) {
625 if (data_pkt->result == Packet::BadAddress) {
626 fault = TheISA::genMachineCheckFault();
627 delete mem_req;
628 memReq[tid] = NULL;
629 warn("Bad address!\n");
630 }
631 assert(retryPkt == NULL);
632 assert(retryTid == -1);
633 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
634 fetchStatus[tid] = IcacheWaitRetry;
635 retryPkt = data_pkt;
636 retryTid = tid;
637 cacheBlocked = true;
638 return false;
639 }
640
641 DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
642
643 lastIcacheStall[tid] = curTick;
644
645 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
646 "response.\n", tid);
647
648 fetchStatus[tid] = IcacheWaitResponse;
649 } else {
650 delete mem_req;
651 memReq[tid] = NULL;
652 }
653
654 ret_fault = fault;
655 return true;
656}
657
658template <class Impl>
659inline void
660DefaultFetch<Impl>::doSquash(const Addr &new_PC,
661 const Addr &new_NPC, unsigned tid)
662{
663 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
664 tid, new_PC, new_NPC);
665
666 PC[tid] = new_PC;
667 nextPC[tid] = new_NPC;
668 nextNPC[tid] = new_NPC + instSize;
669
670 // Clear the icache miss if it's outstanding.
671 if (fetchStatus[tid] == IcacheWaitResponse) {
672 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
673 tid);
674 memReq[tid] = NULL;
675 }
676
677 // Get rid of the retrying packet if it was from this thread.
678 if (retryTid == tid) {
679 assert(cacheBlocked);
680 if (retryPkt) {
681 delete retryPkt->req;
682 delete retryPkt;
683 }
684 retryPkt = NULL;
685 retryTid = -1;
686 }
687
688 fetchStatus[tid] = Squashing;
689
690 ++fetchSquashCycles;
691}
692
693template<class Impl>
694void
695DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
696 const InstSeqNum &seq_num,
697 unsigned tid)
698{
699 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
700
701 doSquash(new_PC, new_NPC, tid);
702
703 // Tell the CPU to remove any instructions that are in flight between
704 // fetch and decode.
705 cpu->removeInstsUntil(seq_num, tid);
706}
707
708template<class Impl>
709bool
710DefaultFetch<Impl>::checkStall(unsigned tid) const
711{
712 bool ret_val = false;
713
714 if (cpu->contextSwitch) {
715 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
716 ret_val = true;
717 } else if (stalls[tid].decode) {
718 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
719 ret_val = true;
720 } else if (stalls[tid].rename) {
721 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
722 ret_val = true;
723 } else if (stalls[tid].iew) {
724 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
725 ret_val = true;
726 } else if (stalls[tid].commit) {
727 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
728 ret_val = true;
729 }
730
731 return ret_val;
732}
733
734template<class Impl>
735typename DefaultFetch<Impl>::FetchStatus
736DefaultFetch<Impl>::updateFetchStatus()
737{
738 //Check Running
739 std::list<unsigned>::iterator threads = activeThreads->begin();
740 std::list<unsigned>::iterator end = activeThreads->end();
741
742 while (threads != end) {
743 unsigned tid = *threads++;
744
745 if (fetchStatus[tid] == Running ||
746 fetchStatus[tid] == Squashing ||
747 fetchStatus[tid] == IcacheAccessComplete) {
748
749 if (_status == Inactive) {
750 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
751
752 if (fetchStatus[tid] == IcacheAccessComplete) {
753 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
754 "completion\n",tid);
755 }
756
757 cpu->activateStage(O3CPU::FetchIdx);
758 }
759
760 return Active;
761 }
762 }
763
764 // Stage is switching from active to inactive, notify CPU of it.
765 if (_status == Active) {
766 DPRINTF(Activity, "Deactivating stage.\n");
767
768 cpu->deactivateStage(O3CPU::FetchIdx);
769 }
770
771 return Inactive;
772}
773
774template <class Impl>
775void
776DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
778 const InstSeqNum &seq_num,
779 bool squash_delay_slot, unsigned tid)
777 const InstSeqNum &seq_num, unsigned tid)
778{
779 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
780
781 doSquash(new_PC, new_NPC, tid);
782
785#if ISA_HAS_DELAY_SLOT
783 // Tell the CPU to remove any instructions that are not in the ROB.
787 cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
788#else
789 // Tell the CPU to remove any instructions that are not in the ROB.
790 cpu->removeInstsNotInROB(tid, true, 0);
791#endif
784 cpu->removeInstsNotInROB(tid);
785}
786
787template <class Impl>
788void
789DefaultFetch<Impl>::tick()
790{
791 std::list<unsigned>::iterator threads = activeThreads->begin();
792 std::list<unsigned>::iterator end = activeThreads->end();
793 bool status_change = false;
794
795 wroteToTimeBuffer = false;
796
797 while (threads != end) {
798 unsigned tid = *threads++;
799
800 // Check the signals for each thread to determine the proper status
801 // for each thread.
802 bool updated_status = checkSignalsAndUpdate(tid);
803 status_change = status_change || updated_status;
804 }
805
806 DPRINTF(Fetch, "Running stage.\n");
807
808 // Reset the number of the instruction we're fetching.
809 numInst = 0;
810
811#if FULL_SYSTEM
812 if (fromCommit->commitInfo[0].interruptPending) {
813 interruptPending = true;
814 }
815
816 if (fromCommit->commitInfo[0].clearInterrupt) {
817 interruptPending = false;
818 }
819#endif
820
821 for (threadFetched = 0; threadFetched < numFetchingThreads;
822 threadFetched++) {
823 // Fetch each of the actively fetching threads.
824 fetch(status_change);
825 }
826
827 // Record number of instructions fetched this cycle for distribution.
828 fetchNisnDist.sample(numInst);
829
830 if (status_change) {
831 // Change the fetch stage status if there was a status change.
832 _status = updateFetchStatus();
833 }
834
835 // If there was activity this cycle, inform the CPU of it.
836 if (wroteToTimeBuffer || cpu->contextSwitch) {
837 DPRINTF(Activity, "Activity this cycle.\n");
838
839 cpu->activityThisCycle();
840 }
841}
842
843template <class Impl>
844bool
845DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
846{
847 // Update the per thread stall statuses.
848 if (fromDecode->decodeBlock[tid]) {
849 stalls[tid].decode = true;
850 }
851
852 if (fromDecode->decodeUnblock[tid]) {
853 assert(stalls[tid].decode);
854 assert(!fromDecode->decodeBlock[tid]);
855 stalls[tid].decode = false;
856 }
857
858 if (fromRename->renameBlock[tid]) {
859 stalls[tid].rename = true;
860 }
861
862 if (fromRename->renameUnblock[tid]) {
863 assert(stalls[tid].rename);
864 assert(!fromRename->renameBlock[tid]);
865 stalls[tid].rename = false;
866 }
867
868 if (fromIEW->iewBlock[tid]) {
869 stalls[tid].iew = true;
870 }
871
872 if (fromIEW->iewUnblock[tid]) {
873 assert(stalls[tid].iew);
874 assert(!fromIEW->iewBlock[tid]);
875 stalls[tid].iew = false;
876 }
877
878 if (fromCommit->commitBlock[tid]) {
879 stalls[tid].commit = true;
880 }
881
882 if (fromCommit->commitUnblock[tid]) {
883 assert(stalls[tid].commit);
884 assert(!fromCommit->commitBlock[tid]);
885 stalls[tid].commit = false;
886 }
887
888 // Check squash signals from commit.
889 if (fromCommit->commitInfo[tid].squash) {
890
891 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
892 "from commit.\n",tid);
900
901#if ISA_HAS_DELAY_SLOT
902 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
903#else
904 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
905#endif
893 // In any case, squash.
894 squash(fromCommit->commitInfo[tid].nextPC,
895 fromCommit->commitInfo[tid].nextNPC,
909 doneSeqNum,
910 fromCommit->commitInfo[tid].squashDelaySlot,
896 fromCommit->commitInfo[tid].doneSeqNum,
897 tid);
898
899 // Also check if there's a mispredict that happened.
900 if (fromCommit->commitInfo[tid].branchMispredict) {
901 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
902 fromCommit->commitInfo[tid].nextPC,
903 fromCommit->commitInfo[tid].branchTaken,
904 tid);
905 } else {
906 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
907 tid);
908 }
909
910 return true;
911 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
912 // Update the branch predictor if it wasn't a squashed instruction
913 // that was broadcasted.
914 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
915 }
916
917 // Check ROB squash signals from commit.
918 if (fromCommit->commitInfo[tid].robSquashing) {
919 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
920
921 // Continue to squash.
922 fetchStatus[tid] = Squashing;
923
924 return true;
925 }
926
927 // Check squash signals from decode.
928 if (fromDecode->decodeInfo[tid].squash) {
929 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
930 "from decode.\n",tid);
931
932 // Update the branch predictor.
933 if (fromDecode->decodeInfo[tid].branchMispredict) {
934 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
935 fromDecode->decodeInfo[tid].nextPC,
936 fromDecode->decodeInfo[tid].branchTaken,
937 tid);
938 } else {
939 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
940 tid);
941 }
942
943 if (fetchStatus[tid] != Squashing) {
944
959#if ISA_HAS_DELAY_SLOT
960 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
961#else
962 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
963#endif
945 DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
946 fromDecode->decodeInfo[tid].nextPC,
947 fromDecode->decodeInfo[tid].nextNPC);
948 // Squash unless we're already squashing
949 squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
950 fromDecode->decodeInfo[tid].nextNPC,
970 doneSeqNum,
951 fromDecode->decodeInfo[tid].doneSeqNum,
952 tid);
953
954 return true;
955 }
956 }
957
958 if (checkStall(tid) &&
959 fetchStatus[tid] != IcacheWaitResponse &&
960 fetchStatus[tid] != IcacheWaitRetry) {
961 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
962
963 fetchStatus[tid] = Blocked;
964
965 return true;
966 }
967
968 if (fetchStatus[tid] == Blocked ||
969 fetchStatus[tid] == Squashing) {
970 // Switch status to running if fetch isn't being told to block or
971 // squash this cycle.
972 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
973 tid);
974
975 fetchStatus[tid] = Running;
976
977 return true;
978 }
979
980 // If we've reached this point, we have not gotten any signals that
981 // cause fetch to change its status. Fetch remains the same as before.
982 return false;
983}
984
985template<class Impl>
986void
987DefaultFetch<Impl>::fetch(bool &status_change)
988{
989 //////////////////////////////////////////
990 // Start actual fetch
991 //////////////////////////////////////////
992 int tid = getFetchingThread(fetchPolicy);
993
994 if (tid == -1 || drainPending) {
995 DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
996
997 // Breaks looping condition in tick()
998 threadFetched = numFetchingThreads;
999 return;
1000 }
1001
1002 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1003
1004 // The current PC.
1005 Addr &fetch_PC = PC[tid];
1006
1007 Addr &fetch_NPC = nextPC[tid];
1008
1009 // Fault code for memory access.
1010 Fault fault = NoFault;
1011
1012 // If returning from the delay of a cache miss, then update the status
1013 // to running, otherwise do the cache access. Possibly move this up
1014 // to tick() function.
1015 if (fetchStatus[tid] == IcacheAccessComplete) {
1016 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
1017 tid);
1018
1019 fetchStatus[tid] = Running;
1020 status_change = true;
1021 } else if (fetchStatus[tid] == Running) {
1022 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1023 "instruction, starting at PC %08p.\n",
1024 tid, fetch_PC);
1025
1026 bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
1027 if (!fetch_success) {
1028 if (cacheBlocked) {
1029 ++icacheStallCycles;
1030 } else {
1031 ++fetchMiscStallCycles;
1032 }
1033 return;
1034 }
1035 } else {
1036 if (fetchStatus[tid] == Idle) {
1037 ++fetchIdleCycles;
1038 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1039 } else if (fetchStatus[tid] == Blocked) {
1040 ++fetchBlockedCycles;
1041 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1042 } else if (fetchStatus[tid] == Squashing) {
1043 ++fetchSquashCycles;
1044 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1045 } else if (fetchStatus[tid] == IcacheWaitResponse) {
1046 ++icacheStallCycles;
1047 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", tid);
1048 }
1049
1050 // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
1051 // fetch should do nothing.
1052 return;
1053 }
1054
1055 ++fetchCycles;
1056
1057 // If we had a stall due to an icache miss, then return.
1058 if (fetchStatus[tid] == IcacheWaitResponse) {
1059 ++icacheStallCycles;
1060 status_change = true;
1061 return;
1062 }
1063
1064 Addr next_PC = fetch_PC;
1065 Addr next_NPC = fetch_NPC;
1066
1067 InstSeqNum inst_seq;
1068 MachInst inst;
1069 ExtMachInst ext_inst;
1070 // @todo: Fix this hack.
1071 unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
1072
1073 if (fault == NoFault) {
1074 // If the read of the first instruction was successful, then grab the
1075 // instructions from the rest of the cache line and put them into the
1076 // queue heading to decode.
1077
1078 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1079 "decode.\n",tid);
1080
1081 // Need to keep track of whether or not a predicted branch
1082 // ended this fetch block.
1083 bool predicted_branch = false;
1084
1085 for (;
1086 offset < cacheBlkSize &&
1087 numInst < fetchWidth &&
1088 !predicted_branch;
1089 ++numInst) {
1090
1091 // If we're branching after this instruction, quite fetching
1092 // from the same block then.
1093 predicted_branch =
1094 (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
1095 if (predicted_branch) {
1096 DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
1097 fetch_PC, fetch_NPC);
1098 }
1099
1100
1101 // Get a sequence number.
1102 inst_seq = cpu->getAndIncrementInstSeq();
1103
1104 // Make sure this is a valid index.
1105 assert(offset <= cacheBlkSize - instSize);
1106
1107 // Get the instruction from the array of the cache line.
1108 inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
1109 (&cacheData[tid][offset]));
1110
1111 predecoder.setTC(cpu->thread[tid]->getTC());
1131 predecoder.moreBytes(fetch_PC, fetch_PC, inst);
1112 predecoder.moreBytes(fetch_PC, 0, inst);
1113
1114 ext_inst = predecoder.getExtMachInst();
1115
1116 // Create a new DynInst from the instruction fetched.
1117 DynInstPtr instruction = new DynInst(ext_inst,
1118 fetch_PC, fetch_NPC,
1119 next_PC, next_NPC,
1120 inst_seq, cpu);
1121 instruction->setTid(tid);
1122
1123 instruction->setASID(tid);
1124
1125 instruction->setThreadState(cpu->thread[tid]);
1126
1127 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
1128 "[sn:%lli]\n",
1129 tid, instruction->readPC(), inst_seq);
1130
1131 //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
1132
1133 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
1134 tid, instruction->staticInst->disassemble(fetch_PC));
1135
1155#if TRACING_ON
1136 instruction->traceData =
1137 Trace::getInstRecord(curTick, cpu->tcBase(tid),
1138 instruction->staticInst,
1139 instruction->readPC());
1160#else
1161 instruction->traceData = NULL;
1162#endif
1140
1141 ///FIXME This needs to be more robust in dealing with delay slots
1165#if !ISA_HAS_DELAY_SLOT
1166// predicted_branch |=
1167#endif
1142 lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
1143 predicted_branch |= (next_PC != fetch_NPC);
1144
1145 // Add instruction to the CPU's list of instructions.
1146 instruction->setInstListIt(cpu->addInst(instruction));
1147
1148 // Write the instruction to the first slot in the queue
1149 // that heads to decode.
1150 toDecode->insts[numInst] = instruction;
1151
1152 toDecode->size++;
1153
1154 // Increment stat of fetched instructions.
1155 ++fetchedInsts;
1156
1157 // Move to the next instruction, unless we have a branch.
1158 fetch_PC = next_PC;
1159 fetch_NPC = next_NPC;
1160
1161 if (instruction->isQuiesce()) {
1162 DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
1163 curTick);
1164 fetchStatus[tid] = QuiescePending;
1165 ++numInst;
1166 status_change = true;
1167 break;
1168 }
1169
1170 offset += instSize;
1171 }
1172
1173 if (offset >= cacheBlkSize) {
1174 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
1175 "block.\n", tid);
1176 } else if (numInst >= fetchWidth) {
1177 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1178 "for this cycle.\n", tid);
1179 } else if (predicted_branch) {
1180 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1181 "instruction encountered.\n", tid);
1182 }
1183 }
1184
1185 if (numInst > 0) {
1186 wroteToTimeBuffer = true;
1187 }
1188
1189 // Now that fetching is completed, update the PC to signify what the next
1190 // cycle will be.
1191 if (fault == NoFault) {
1192 PC[tid] = next_PC;
1193 nextPC[tid] = next_NPC;
1194 nextNPC[tid] = next_NPC + instSize;
1221#if ISA_HAS_DELAY_SLOT
1222 DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
1223#else
1195 DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
1225#endif
1196 } else {
1197 // We shouldn't be in an icache miss and also have a fault (an ITB
1198 // miss)
1199 if (fetchStatus[tid] == IcacheWaitResponse) {
1200 panic("Fetch should have exited prior to this!");
1201 }
1202
1203 // Send the fault to commit. This thread will not do anything
1204 // until commit handles the fault. The only other way it can
1205 // wake up is if a squash comes along and changes the PC.
1206#if FULL_SYSTEM
1207 assert(numInst < fetchWidth);
1208 // Get a sequence number.
1209 inst_seq = cpu->getAndIncrementInstSeq();
1210 // We will use a nop in order to carry the fault.
1211 ext_inst = TheISA::NoopMachInst;
1212
1213 // Create a new DynInst from the dummy nop.
1214 DynInstPtr instruction = new DynInst(ext_inst,
1215 fetch_PC, fetch_NPC,
1216 next_PC, next_NPC,
1217 inst_seq, cpu);
1218 instruction->setPredTarg(next_PC, next_NPC);
1219 instruction->setTid(tid);
1220
1221 instruction->setASID(tid);
1222
1223 instruction->setThreadState(cpu->thread[tid]);
1224
1225 instruction->traceData = NULL;
1226
1227 instruction->setInstListIt(cpu->addInst(instruction));
1228
1229 instruction->fault = fault;
1230
1231 toDecode->insts[numInst] = instruction;
1232 toDecode->size++;
1233
1234 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
1235
1236 fetchStatus[tid] = TrapPending;
1237 status_change = true;
1238#else // !FULL_SYSTEM
1239 fetchStatus[tid] = TrapPending;
1240 status_change = true;
1241
1242#endif // FULL_SYSTEM
1243 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %08p",
1244 tid, fault->name(), PC[tid]);
1245 }
1246}
1247
1248template<class Impl>
1249void
1250DefaultFetch<Impl>::recvRetry()
1251{
1252 if (retryPkt != NULL) {
1253 assert(cacheBlocked);
1254 assert(retryTid != -1);
1255 assert(fetchStatus[retryTid] == IcacheWaitRetry);
1256
1257 if (icachePort->sendTiming(retryPkt)) {
1258 fetchStatus[retryTid] = IcacheWaitResponse;
1259 retryPkt = NULL;
1260 retryTid = -1;
1261 cacheBlocked = false;
1262 }
1263 } else {
1264 assert(retryTid == -1);
1265 // Access has been squashed since it was sent out. Just clear
1266 // the cache being blocked.
1267 cacheBlocked = false;
1268 }
1269}
1270
1271///////////////////////////////////////
1272// //
1273// SMT FETCH POLICY MAINTAINED HERE //
1274// //
1275///////////////////////////////////////
1276template<class Impl>
1277int
1278DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1279{
1280 if (numThreads > 1) {
1281 switch (fetch_priority) {
1282
1283 case SingleThread:
1284 return 0;
1285
1286 case RoundRobin:
1287 return roundRobin();
1288
1289 case IQ:
1290 return iqCount();
1291
1292 case LSQ:
1293 return lsqCount();
1294
1295 case Branch:
1296 return branchCount();
1297
1298 default:
1299 return -1;
1300 }
1301 } else {
1302 std::list<unsigned>::iterator thread = activeThreads->begin();
1303 assert(thread != activeThreads->end());
1304 int tid = *thread;
1305
1306 if (fetchStatus[tid] == Running ||
1307 fetchStatus[tid] == IcacheAccessComplete ||
1308 fetchStatus[tid] == Idle) {
1309 return tid;
1310 } else {
1311 return -1;
1312 }
1313 }
1314
1315}
1316
1317
1318template<class Impl>
1319int
1320DefaultFetch<Impl>::roundRobin()
1321{
1322 std::list<unsigned>::iterator pri_iter = priorityList.begin();
1323 std::list<unsigned>::iterator end = priorityList.end();
1324
1325 int high_pri;
1326
1327 while (pri_iter != end) {
1328 high_pri = *pri_iter;
1329
1330 assert(high_pri <= numThreads);
1331
1332 if (fetchStatus[high_pri] == Running ||
1333 fetchStatus[high_pri] == IcacheAccessComplete ||
1334 fetchStatus[high_pri] == Idle) {
1335
1336 priorityList.erase(pri_iter);
1337 priorityList.push_back(high_pri);
1338
1339 return high_pri;
1340 }
1341
1342 pri_iter++;
1343 }
1344
1345 return -1;
1346}
1347
1348template<class Impl>
1349int
1350DefaultFetch<Impl>::iqCount()
1351{
1352 std::priority_queue<unsigned> PQ;
1353
1354 std::list<unsigned>::iterator threads = activeThreads->begin();
1355 std::list<unsigned>::iterator end = activeThreads->end();
1356
1357 while (threads != end) {
1358 unsigned tid = *threads++;
1359
1360 PQ.push(fromIEW->iewInfo[tid].iqCount);
1361 }
1362
1363 while (!PQ.empty()) {
1364
1365 unsigned high_pri = PQ.top();
1366
1367 if (fetchStatus[high_pri] == Running ||
1368 fetchStatus[high_pri] == IcacheAccessComplete ||
1369 fetchStatus[high_pri] == Idle)
1370 return high_pri;
1371 else
1372 PQ.pop();
1373
1374 }
1375
1376 return -1;
1377}
1378
1379template<class Impl>
1380int
1381DefaultFetch<Impl>::lsqCount()
1382{
1383 std::priority_queue<unsigned> PQ;
1384
1385 std::list<unsigned>::iterator threads = activeThreads->begin();
1386 std::list<unsigned>::iterator end = activeThreads->end();
1387
1388 while (threads != end) {
1389 unsigned tid = *threads++;
1390
1391 PQ.push(fromIEW->iewInfo[tid].ldstqCount);
1392 }
1393
1394 while (!PQ.empty()) {
1395
1396 unsigned high_pri = PQ.top();
1397
1398 if (fetchStatus[high_pri] == Running ||
1399 fetchStatus[high_pri] == IcacheAccessComplete ||
1400 fetchStatus[high_pri] == Idle)
1401 return high_pri;
1402 else
1403 PQ.pop();
1404
1405 }
1406
1407 return -1;
1408}
1409
1410template<class Impl>
1411int
1412DefaultFetch<Impl>::branchCount()
1413{
1414 std::list<unsigned>::iterator thread = activeThreads->begin();
1415 assert(thread != activeThreads->end());
1416 unsigned tid = *thread;
1417
1418 panic("Branch Count Fetch policy unimplemented\n");
1419 return 0 * tid;
1420}