fetch_impl.hh revision 5712:199d31b47f7b
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 */ 31 32#include <algorithm> 33#include <cstring> 34 35#include "config/use_checker.hh" 36 37#include "arch/isa_traits.hh" 38#include "arch/utility.hh" 39#include "cpu/checker/cpu.hh" 40#include "cpu/exetrace.hh" 41#include "cpu/o3/fetch.hh" 42#include "mem/packet.hh" 43#include "mem/request.hh" 44#include "sim/byteswap.hh" 45#include "sim/host.hh" 46#include "sim/core.hh" 47 48#if FULL_SYSTEM 49#include "arch/tlb.hh" 50#include "arch/vtophys.hh" 51#include "sim/system.hh" 52#endif // FULL_SYSTEM 53 54#include "params/DerivO3CPU.hh" 55 56template<class Impl> 57void 58DefaultFetch<Impl>::IcachePort::setPeer(Port *port) 59{ 60 Port::setPeer(port); 61 62 fetch->setIcache(); 63} 64 65template<class Impl> 66Tick 67DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) 68{ 69 panic("DefaultFetch doesn't expect recvAtomic callback!"); 70 return curTick; 71} 72 73template<class Impl> 74void 75DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) 76{ 77 DPRINTF(Fetch, "DefaultFetch doesn't update its state from a " 78 "functional call."); 79} 80 81template<class Impl> 82void 83DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status) 84{ 85 if (status == RangeChange) { 86 if (!snoopRangeSent) { 87 snoopRangeSent = true; 88 sendStatusChange(Port::RangeChange); 89 } 90 return; 91 } 92 93 panic("DefaultFetch doesn't expect recvStatusChange callback!"); 94} 95 96template<class Impl> 97bool 98DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt) 99{ 100 DPRINTF(Fetch, "Received timing\n"); 101 if (pkt->isResponse()) { 102 fetch->processCacheCompletion(pkt); 103 } 104 //else Snooped a coherence request, just return 105 return true; 106} 107 108template<class Impl> 109void 110DefaultFetch<Impl>::IcachePort::recvRetry() 111{ 112 fetch->recvRetry(); 113} 114 115template<class Impl> 116DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) 117 : cpu(_cpu), 118 branchPred(params), 119 predecoder(NULL), 120 decodeToFetchDelay(params->decodeToFetchDelay), 121 renameToFetchDelay(params->renameToFetchDelay), 122 iewToFetchDelay(params->iewToFetchDelay), 123 commitToFetchDelay(params->commitToFetchDelay), 124 fetchWidth(params->fetchWidth), 125 cacheBlocked(false), 126 retryPkt(NULL), 127 retryTid(-1), 128 numThreads(params->numThreads), 129 numFetchingThreads(params->smtNumFetchingThreads), 130 interruptPending(false), 131 drainPending(false), 132 switchedOut(false) 133{ 134 if (numThreads > Impl::MaxThreads) 135 fatal("numThreads is not a valid value\n"); 136 137 // Set fetch stage's status to inactive. 138 _status = Inactive; 139 140 std::string policy = params->smtFetchPolicy; 141 142 // Convert string to lowercase 143 std::transform(policy.begin(), policy.end(), policy.begin(), 144 (int(*)(int)) tolower); 145 146 // Figure out fetch policy 147 if (policy == "singlethread") { 148 fetchPolicy = SingleThread; 149 if (numThreads > 1) 150 panic("Invalid Fetch Policy for a SMT workload."); 151 } else if (policy == "roundrobin") { 152 fetchPolicy = RoundRobin; 153 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 154 } else if (policy == "branch") { 155 fetchPolicy = Branch; 156 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 157 } else if (policy == "iqcount") { 158 fetchPolicy = IQ; 159 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 160 } else if (policy == "lsqcount") { 161 fetchPolicy = LSQ; 162 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 163 } else { 164 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 165 " RoundRobin,LSQcount,IQcount}\n"); 166 } 167 168 // Get the size of an instruction. 169 instSize = sizeof(TheISA::MachInst); 170 171 // Name is finally available, so create the port. 172 icachePort = new IcachePort(this); 173 174 icachePort->snoopRangeSent = false; 175 176#if USE_CHECKER 177 if (cpu->checker) { 178 cpu->checker->setIcachePort(icachePort); 179 } 180#endif 181} 182 183template <class Impl> 184std::string 185DefaultFetch<Impl>::name() const 186{ 187 return cpu->name() + ".fetch"; 188} 189 190template <class Impl> 191void 192DefaultFetch<Impl>::regStats() 193{ 194 icacheStallCycles 195 .name(name() + ".icacheStallCycles") 196 .desc("Number of cycles fetch is stalled on an Icache miss") 197 .prereq(icacheStallCycles); 198 199 fetchedInsts 200 .name(name() + ".Insts") 201 .desc("Number of instructions fetch has processed") 202 .prereq(fetchedInsts); 203 204 fetchedBranches 205 .name(name() + ".Branches") 206 .desc("Number of branches that fetch encountered") 207 .prereq(fetchedBranches); 208 209 predictedBranches 210 .name(name() + ".predictedBranches") 211 .desc("Number of branches that fetch has predicted taken") 212 .prereq(predictedBranches); 213 214 fetchCycles 215 .name(name() + ".Cycles") 216 .desc("Number of cycles fetch has run and was not squashing or" 217 " blocked") 218 .prereq(fetchCycles); 219 220 fetchSquashCycles 221 .name(name() + ".SquashCycles") 222 .desc("Number of cycles fetch has spent squashing") 223 .prereq(fetchSquashCycles); 224 225 fetchIdleCycles 226 .name(name() + ".IdleCycles") 227 .desc("Number of cycles fetch was idle") 228 .prereq(fetchIdleCycles); 229 230 fetchBlockedCycles 231 .name(name() + ".BlockedCycles") 232 .desc("Number of cycles fetch has spent blocked") 233 .prereq(fetchBlockedCycles); 234 235 fetchedCacheLines 236 .name(name() + ".CacheLines") 237 .desc("Number of cache lines fetched") 238 .prereq(fetchedCacheLines); 239 240 fetchMiscStallCycles 241 .name(name() + ".MiscStallCycles") 242 .desc("Number of cycles fetch has spent waiting on interrupts, or " 243 "bad addresses, or out of MSHRs") 244 .prereq(fetchMiscStallCycles); 245 246 fetchIcacheSquashes 247 .name(name() + ".IcacheSquashes") 248 .desc("Number of outstanding Icache misses that were squashed") 249 .prereq(fetchIcacheSquashes); 250 251 fetchNisnDist 252 .init(/* base value */ 0, 253 /* last value */ fetchWidth, 254 /* bucket size */ 1) 255 .name(name() + ".rateDist") 256 .desc("Number of instructions fetched each cycle (Total)") 257 .flags(Stats::pdf); 258 259 idleRate 260 .name(name() + ".idleRate") 261 .desc("Percent of cycles fetch was idle") 262 .prereq(idleRate); 263 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 264 265 branchRate 266 .name(name() + ".branchRate") 267 .desc("Number of branch fetches per cycle") 268 .flags(Stats::total); 269 branchRate = fetchedBranches / cpu->numCycles; 270 271 fetchRate 272 .name(name() + ".rate") 273 .desc("Number of inst fetches per cycle") 274 .flags(Stats::total); 275 fetchRate = fetchedInsts / cpu->numCycles; 276 277 branchPred.regStats(); 278} 279 280template<class Impl> 281void 282DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) 283{ 284 timeBuffer = time_buffer; 285 286 // Create wires to get information from proper places in time buffer. 287 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 288 fromRename = timeBuffer->getWire(-renameToFetchDelay); 289 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 290 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 291} 292 293template<class Impl> 294void 295DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) 296{ 297 activeThreads = at_ptr; 298} 299 300template<class Impl> 301void 302DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) 303{ 304 fetchQueue = fq_ptr; 305 306 // Create wire to write information to proper place in fetch queue. 307 toDecode = fetchQueue->getWire(0); 308} 309 310template<class Impl> 311void 312DefaultFetch<Impl>::initStage() 313{ 314 // Setup PC and nextPC with initial state. 315 for (int tid = 0; tid < numThreads; tid++) { 316 PC[tid] = cpu->readPC(tid); 317 nextPC[tid] = cpu->readNextPC(tid); 318 microPC[tid] = cpu->readMicroPC(tid); 319 } 320 321 for (int tid=0; tid < numThreads; tid++) { 322 323 fetchStatus[tid] = Running; 324 325 priorityList.push_back(tid); 326 327 memReq[tid] = NULL; 328 329 stalls[tid].decode = false; 330 stalls[tid].rename = false; 331 stalls[tid].iew = false; 332 stalls[tid].commit = false; 333 } 334 335 // Schedule fetch to get the correct PC from the CPU 336 // scheduleFetchStartupEvent(1); 337 338 // Fetch needs to start fetching instructions at the very beginning, 339 // so it must start up in active state. 340 switchToActive(); 341} 342 343template<class Impl> 344void 345DefaultFetch<Impl>::setIcache() 346{ 347 // Size of cache block. 348 cacheBlkSize = icachePort->peerBlockSize(); 349 350 // Create mask to get rid of offset bits. 351 cacheBlkMask = (cacheBlkSize - 1); 352 353 for (int tid=0; tid < numThreads; tid++) { 354 // Create space to store a cache line. 355 cacheData[tid] = new uint8_t[cacheBlkSize]; 356 cacheDataPC[tid] = 0; 357 cacheDataValid[tid] = false; 358 } 359} 360 361template<class Impl> 362void 363DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 364{ 365 unsigned tid = pkt->req->getThreadNum(); 366 367 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid); 368 369 assert(!pkt->wasNacked()); 370 371 // Only change the status if it's still waiting on the icache access 372 // to return. 373 if (fetchStatus[tid] != IcacheWaitResponse || 374 pkt->req != memReq[tid] || 375 isSwitchedOut()) { 376 ++fetchIcacheSquashes; 377 delete pkt->req; 378 delete pkt; 379 return; 380 } 381 382 memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize); 383 cacheDataValid[tid] = true; 384 385 if (!drainPending) { 386 // Wake up the CPU (if it went to sleep and was waiting on 387 // this completion event). 388 cpu->wakeCPU(); 389 390 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 391 tid); 392 393 switchToActive(); 394 } 395 396 // Only switch to IcacheAccessComplete if we're not stalled as well. 397 if (checkStall(tid)) { 398 fetchStatus[tid] = Blocked; 399 } else { 400 fetchStatus[tid] = IcacheAccessComplete; 401 } 402 403 // Reset the mem req to NULL. 404 delete pkt->req; 405 delete pkt; 406 memReq[tid] = NULL; 407} 408 409template <class Impl> 410bool 411DefaultFetch<Impl>::drain() 412{ 413 // Fetch is ready to drain at any time. 414 cpu->signalDrained(); 415 drainPending = true; 416 return true; 417} 418 419template <class Impl> 420void 421DefaultFetch<Impl>::resume() 422{ 423 drainPending = false; 424} 425 426template <class Impl> 427void 428DefaultFetch<Impl>::switchOut() 429{ 430 switchedOut = true; 431 // Branch predictor needs to have its state cleared. 432 branchPred.switchOut(); 433} 434 435template <class Impl> 436void 437DefaultFetch<Impl>::takeOverFrom() 438{ 439 // Reset all state 440 for (int i = 0; i < Impl::MaxThreads; ++i) { 441 stalls[i].decode = 0; 442 stalls[i].rename = 0; 443 stalls[i].iew = 0; 444 stalls[i].commit = 0; 445 PC[i] = cpu->readPC(i); 446 nextPC[i] = cpu->readNextPC(i); 447 microPC[i] = cpu->readMicroPC(i); 448 fetchStatus[i] = Running; 449 } 450 numInst = 0; 451 wroteToTimeBuffer = false; 452 _status = Inactive; 453 switchedOut = false; 454 interruptPending = false; 455 branchPred.takeOverFrom(); 456} 457 458template <class Impl> 459void 460DefaultFetch<Impl>::wakeFromQuiesce() 461{ 462 DPRINTF(Fetch, "Waking up from quiesce\n"); 463 // Hopefully this is safe 464 // @todo: Allow other threads to wake from quiesce. 465 fetchStatus[0] = Running; 466} 467 468template <class Impl> 469inline void 470DefaultFetch<Impl>::switchToActive() 471{ 472 if (_status == Inactive) { 473 DPRINTF(Activity, "Activating stage.\n"); 474 475 cpu->activateStage(O3CPU::FetchIdx); 476 477 _status = Active; 478 } 479} 480 481template <class Impl> 482inline void 483DefaultFetch<Impl>::switchToInactive() 484{ 485 if (_status == Active) { 486 DPRINTF(Activity, "Deactivating stage.\n"); 487 488 cpu->deactivateStage(O3CPU::FetchIdx); 489 490 _status = Inactive; 491 } 492} 493 494template <class Impl> 495bool 496DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, 497 Addr &next_NPC, Addr &next_MicroPC) 498{ 499 // Do branch prediction check here. 500 // A bit of a misnomer...next_PC is actually the current PC until 501 // this function updates it. 502 bool predict_taken; 503 504 if (!inst->isControl()) { 505 if (inst->isMicroop() && !inst->isLastMicroop()) { 506 next_MicroPC++; 507 } else { 508 next_PC = next_NPC; 509 next_NPC = next_NPC + instSize; 510 next_MicroPC = 0; 511 } 512 inst->setPredTarg(next_PC, next_NPC, next_MicroPC); 513 inst->setPredTaken(false); 514 return false; 515 } 516 517 //Assume for now that all control flow is to a different macroop which 518 //would reset the micro pc to 0. 519 next_MicroPC = 0; 520 521 int tid = inst->threadNumber; 522 Addr pred_PC = next_PC; 523 predict_taken = branchPred.predict(inst, pred_PC, tid); 524 525/* if (predict_taken) { 526 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n", 527 tid, pred_PC); 528 } else { 529 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid); 530 }*/ 531 532#if ISA_HAS_DELAY_SLOT 533 next_PC = next_NPC; 534 if (predict_taken) 535 next_NPC = pred_PC; 536 else 537 next_NPC += instSize; 538#else 539 if (predict_taken) 540 next_PC = pred_PC; 541 else 542 next_PC += instSize; 543 next_NPC = next_PC + instSize; 544#endif 545/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", 546 tid, next_PC, next_NPC);*/ 547 inst->setPredTarg(next_PC, next_NPC, next_MicroPC); 548 inst->setPredTaken(predict_taken); 549 550 ++fetchedBranches; 551 552 if (predict_taken) { 553 ++predictedBranches; 554 } 555 556 return predict_taken; 557} 558 559template <class Impl> 560bool 561DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid) 562{ 563 Fault fault = NoFault; 564 565 //AlphaDep 566 if (cacheBlocked) { 567 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 568 tid); 569 return false; 570 } else if (isSwitchedOut()) { 571 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", 572 tid); 573 return false; 574 } else if (interruptPending && !(fetch_PC & 0x3)) { 575 // Hold off fetch from getting new instructions when: 576 // Cache is blocked, or 577 // while an interrupt is pending and we're not in PAL mode, or 578 // fetch is switched out. 579 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 580 tid); 581 return false; 582 } 583 584 // Align the fetch PC so it's at the start of a cache block. 585 Addr block_PC = icacheBlockAlignPC(fetch_PC); 586 587 // If we've already got the block, no need to try to fetch it again. 588 if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) { 589 return true; 590 } 591 592 // Setup the memReq to do a read of the first instruction's address. 593 // Set the appropriate read size and flags as well. 594 // Build request here. 595 RequestPtr mem_req = new Request(tid, block_PC, cacheBlkSize, 0, 596 fetch_PC, cpu->cpuId(), tid); 597 598 memReq[tid] = mem_req; 599 600 // Translate the instruction request. 601 fault = cpu->translateInstReq(mem_req, cpu->thread[tid]); 602 603 // In the case of faults, the fetch stage may need to stall and wait 604 // for the ITB miss to be handled. 605 606 // If translation was successful, attempt to read the first 607 // instruction. 608 if (fault == NoFault) { 609#if 0 610 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || 611 memReq[tid]->isUncacheable()) { 612 DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " 613 "misspeculating path)!", 614 memReq[tid]->paddr); 615 ret_fault = TheISA::genMachineCheckFault(); 616 return false; 617 } 618#endif 619 620 // Build packet here. 621 PacketPtr data_pkt = new Packet(mem_req, 622 MemCmd::ReadReq, Packet::Broadcast); 623 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 624 625 cacheDataPC[tid] = block_PC; 626 cacheDataValid[tid] = false; 627 628 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 629 630 fetchedCacheLines++; 631 632 // Now do the timing access to see whether or not the instruction 633 // exists within the cache. 634 if (!icachePort->sendTiming(data_pkt)) { 635 assert(retryPkt == NULL); 636 assert(retryTid == -1); 637 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 638 fetchStatus[tid] = IcacheWaitRetry; 639 retryPkt = data_pkt; 640 retryTid = tid; 641 cacheBlocked = true; 642 return false; 643 } 644 645 DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); 646 647 lastIcacheStall[tid] = curTick; 648 649 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 650 "response.\n", tid); 651 652 fetchStatus[tid] = IcacheWaitResponse; 653 } else { 654 delete mem_req; 655 memReq[tid] = NULL; 656 } 657 658 ret_fault = fault; 659 return true; 660} 661 662template <class Impl> 663inline void 664DefaultFetch<Impl>::doSquash(const Addr &new_PC, 665 const Addr &new_NPC, const Addr &new_microPC, unsigned tid) 666{ 667 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", 668 tid, new_PC, new_NPC); 669 670 PC[tid] = new_PC; 671 nextPC[tid] = new_NPC; 672 microPC[tid] = new_microPC; 673 674 // Clear the icache miss if it's outstanding. 675 if (fetchStatus[tid] == IcacheWaitResponse) { 676 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 677 tid); 678 memReq[tid] = NULL; 679 } 680 681 // Get rid of the retrying packet if it was from this thread. 682 if (retryTid == tid) { 683 assert(cacheBlocked); 684 if (retryPkt) { 685 delete retryPkt->req; 686 delete retryPkt; 687 } 688 retryPkt = NULL; 689 retryTid = -1; 690 } 691 692 fetchStatus[tid] = Squashing; 693 694 ++fetchSquashCycles; 695} 696 697template<class Impl> 698void 699DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, 700 const Addr &new_MicroPC, 701 const InstSeqNum &seq_num, unsigned tid) 702{ 703 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); 704 705 doSquash(new_PC, new_NPC, new_MicroPC, tid); 706 707 // Tell the CPU to remove any instructions that are in flight between 708 // fetch and decode. 709 cpu->removeInstsUntil(seq_num, tid); 710} 711 712template<class Impl> 713bool 714DefaultFetch<Impl>::checkStall(unsigned tid) const 715{ 716 bool ret_val = false; 717 718 if (cpu->contextSwitch) { 719 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 720 ret_val = true; 721 } else if (stalls[tid].decode) { 722 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 723 ret_val = true; 724 } else if (stalls[tid].rename) { 725 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 726 ret_val = true; 727 } else if (stalls[tid].iew) { 728 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 729 ret_val = true; 730 } else if (stalls[tid].commit) { 731 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 732 ret_val = true; 733 } 734 735 return ret_val; 736} 737 738template<class Impl> 739typename DefaultFetch<Impl>::FetchStatus 740DefaultFetch<Impl>::updateFetchStatus() 741{ 742 //Check Running 743 std::list<unsigned>::iterator threads = activeThreads->begin(); 744 std::list<unsigned>::iterator end = activeThreads->end(); 745 746 while (threads != end) { 747 unsigned tid = *threads++; 748 749 if (fetchStatus[tid] == Running || 750 fetchStatus[tid] == Squashing || 751 fetchStatus[tid] == IcacheAccessComplete) { 752 753 if (_status == Inactive) { 754 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 755 756 if (fetchStatus[tid] == IcacheAccessComplete) { 757 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 758 "completion\n",tid); 759 } 760 761 cpu->activateStage(O3CPU::FetchIdx); 762 } 763 764 return Active; 765 } 766 } 767 768 // Stage is switching from active to inactive, notify CPU of it. 769 if (_status == Active) { 770 DPRINTF(Activity, "Deactivating stage.\n"); 771 772 cpu->deactivateStage(O3CPU::FetchIdx); 773 } 774 775 return Inactive; 776} 777 778template <class Impl> 779void 780DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC, 781 const Addr &new_MicroPC, 782 const InstSeqNum &seq_num, unsigned tid) 783{ 784 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); 785 786 doSquash(new_PC, new_NPC, new_MicroPC, tid); 787 788 // Tell the CPU to remove any instructions that are not in the ROB. 789 cpu->removeInstsNotInROB(tid); 790} 791 792template <class Impl> 793void 794DefaultFetch<Impl>::tick() 795{ 796 std::list<unsigned>::iterator threads = activeThreads->begin(); 797 std::list<unsigned>::iterator end = activeThreads->end(); 798 bool status_change = false; 799 800 wroteToTimeBuffer = false; 801 802 while (threads != end) { 803 unsigned tid = *threads++; 804 805 // Check the signals for each thread to determine the proper status 806 // for each thread. 807 bool updated_status = checkSignalsAndUpdate(tid); 808 status_change = status_change || updated_status; 809 } 810 811 DPRINTF(Fetch, "Running stage.\n"); 812 813 // Reset the number of the instruction we're fetching. 814 numInst = 0; 815 816#if FULL_SYSTEM 817 if (fromCommit->commitInfo[0].interruptPending) { 818 interruptPending = true; 819 } 820 821 if (fromCommit->commitInfo[0].clearInterrupt) { 822 interruptPending = false; 823 } 824#endif 825 826 for (threadFetched = 0; threadFetched < numFetchingThreads; 827 threadFetched++) { 828 // Fetch each of the actively fetching threads. 829 fetch(status_change); 830 } 831 832 // Record number of instructions fetched this cycle for distribution. 833 fetchNisnDist.sample(numInst); 834 835 if (status_change) { 836 // Change the fetch stage status if there was a status change. 837 _status = updateFetchStatus(); 838 } 839 840 // If there was activity this cycle, inform the CPU of it. 841 if (wroteToTimeBuffer || cpu->contextSwitch) { 842 DPRINTF(Activity, "Activity this cycle.\n"); 843 844 cpu->activityThisCycle(); 845 } 846} 847 848template <class Impl> 849bool 850DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) 851{ 852 // Update the per thread stall statuses. 853 if (fromDecode->decodeBlock[tid]) { 854 stalls[tid].decode = true; 855 } 856 857 if (fromDecode->decodeUnblock[tid]) { 858 assert(stalls[tid].decode); 859 assert(!fromDecode->decodeBlock[tid]); 860 stalls[tid].decode = false; 861 } 862 863 if (fromRename->renameBlock[tid]) { 864 stalls[tid].rename = true; 865 } 866 867 if (fromRename->renameUnblock[tid]) { 868 assert(stalls[tid].rename); 869 assert(!fromRename->renameBlock[tid]); 870 stalls[tid].rename = false; 871 } 872 873 if (fromIEW->iewBlock[tid]) { 874 stalls[tid].iew = true; 875 } 876 877 if (fromIEW->iewUnblock[tid]) { 878 assert(stalls[tid].iew); 879 assert(!fromIEW->iewBlock[tid]); 880 stalls[tid].iew = false; 881 } 882 883 if (fromCommit->commitBlock[tid]) { 884 stalls[tid].commit = true; 885 } 886 887 if (fromCommit->commitUnblock[tid]) { 888 assert(stalls[tid].commit); 889 assert(!fromCommit->commitBlock[tid]); 890 stalls[tid].commit = false; 891 } 892 893 // Check squash signals from commit. 894 if (fromCommit->commitInfo[tid].squash) { 895 896 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 897 "from commit.\n",tid); 898 // In any case, squash. 899 squash(fromCommit->commitInfo[tid].nextPC, 900 fromCommit->commitInfo[tid].nextNPC, 901 fromCommit->commitInfo[tid].nextMicroPC, 902 fromCommit->commitInfo[tid].doneSeqNum, 903 tid); 904 905 // Also check if there's a mispredict that happened. 906 if (fromCommit->commitInfo[tid].branchMispredict) { 907 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 908 fromCommit->commitInfo[tid].nextPC, 909 fromCommit->commitInfo[tid].branchTaken, 910 tid); 911 } else { 912 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 913 tid); 914 } 915 916 return true; 917 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 918 // Update the branch predictor if it wasn't a squashed instruction 919 // that was broadcasted. 920 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 921 } 922 923 // Check ROB squash signals from commit. 924 if (fromCommit->commitInfo[tid].robSquashing) { 925 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 926 927 // Continue to squash. 928 fetchStatus[tid] = Squashing; 929 930 return true; 931 } 932 933 // Check squash signals from decode. 934 if (fromDecode->decodeInfo[tid].squash) { 935 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 936 "from decode.\n",tid); 937 938 // Update the branch predictor. 939 if (fromDecode->decodeInfo[tid].branchMispredict) { 940 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 941 fromDecode->decodeInfo[tid].nextPC, 942 fromDecode->decodeInfo[tid].branchTaken, 943 tid); 944 } else { 945 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 946 tid); 947 } 948 949 if (fetchStatus[tid] != Squashing) { 950 951 DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n", 952 fromDecode->decodeInfo[tid].nextPC, 953 fromDecode->decodeInfo[tid].nextNPC); 954 // Squash unless we're already squashing 955 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 956 fromDecode->decodeInfo[tid].nextNPC, 957 fromDecode->decodeInfo[tid].nextMicroPC, 958 fromDecode->decodeInfo[tid].doneSeqNum, 959 tid); 960 961 return true; 962 } 963 } 964 965 if (checkStall(tid) && 966 fetchStatus[tid] != IcacheWaitResponse && 967 fetchStatus[tid] != IcacheWaitRetry) { 968 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 969 970 fetchStatus[tid] = Blocked; 971 972 return true; 973 } 974 975 if (fetchStatus[tid] == Blocked || 976 fetchStatus[tid] == Squashing) { 977 // Switch status to running if fetch isn't being told to block or 978 // squash this cycle. 979 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 980 tid); 981 982 fetchStatus[tid] = Running; 983 984 return true; 985 } 986 987 // If we've reached this point, we have not gotten any signals that 988 // cause fetch to change its status. Fetch remains the same as before. 989 return false; 990} 991 992template<class Impl> 993void 994DefaultFetch<Impl>::fetch(bool &status_change) 995{ 996 ////////////////////////////////////////// 997 // Start actual fetch 998 ////////////////////////////////////////// 999 int tid = getFetchingThread(fetchPolicy); 1000 1001 if (tid == -1 || drainPending) { 1002 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1003 1004 // Breaks looping condition in tick() 1005 threadFetched = numFetchingThreads; 1006 return; 1007 } 1008 1009 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1010 1011 // The current PC. 1012 Addr fetch_PC = PC[tid]; 1013 Addr fetch_NPC = nextPC[tid]; 1014 Addr fetch_MicroPC = microPC[tid]; 1015 1016 // Fault code for memory access. 1017 Fault fault = NoFault; 1018 1019 // If returning from the delay of a cache miss, then update the status 1020 // to running, otherwise do the cache access. Possibly move this up 1021 // to tick() function. 1022 if (fetchStatus[tid] == IcacheAccessComplete) { 1023 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", 1024 tid); 1025 1026 fetchStatus[tid] = Running; 1027 status_change = true; 1028 } else if (fetchStatus[tid] == Running) { 1029 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1030 "instruction, starting at PC %08p.\n", 1031 tid, fetch_PC); 1032 1033 bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); 1034 if (!fetch_success) { 1035 if (cacheBlocked) { 1036 ++icacheStallCycles; 1037 } else { 1038 ++fetchMiscStallCycles; 1039 } 1040 return; 1041 } 1042 } else { 1043 if (fetchStatus[tid] == Idle) { 1044 ++fetchIdleCycles; 1045 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1046 } else if (fetchStatus[tid] == Blocked) { 1047 ++fetchBlockedCycles; 1048 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1049 } else if (fetchStatus[tid] == Squashing) { 1050 ++fetchSquashCycles; 1051 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1052 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1053 ++icacheStallCycles; 1054 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", tid); 1055 } 1056 1057 // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so 1058 // fetch should do nothing. 1059 return; 1060 } 1061 1062 ++fetchCycles; 1063 1064 // If we had a stall due to an icache miss, then return. 1065 if (fetchStatus[tid] == IcacheWaitResponse) { 1066 ++icacheStallCycles; 1067 status_change = true; 1068 return; 1069 } 1070 1071 Addr next_PC = fetch_PC; 1072 Addr next_NPC = fetch_NPC; 1073 Addr next_MicroPC = fetch_MicroPC; 1074 1075 InstSeqNum inst_seq; 1076 MachInst inst; 1077 ExtMachInst ext_inst; 1078 // @todo: Fix this hack. 1079 unsigned offset = (fetch_PC & cacheBlkMask) & ~3; 1080 1081 StaticInstPtr staticInst = NULL; 1082 StaticInstPtr macroop = NULL; 1083 1084 if (fault == NoFault) { 1085 // If the read of the first instruction was successful, then grab the 1086 // instructions from the rest of the cache line and put them into the 1087 // queue heading to decode. 1088 1089 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1090 "decode.\n",tid); 1091 1092 // Need to keep track of whether or not a predicted branch 1093 // ended this fetch block. 1094 bool predicted_branch = false; 1095 1096 while (offset < cacheBlkSize && 1097 numInst < fetchWidth && 1098 !predicted_branch) { 1099 1100 // If we're branching after this instruction, quite fetching 1101 // from the same block then. 1102 predicted_branch = 1103 (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC); 1104 if (predicted_branch) { 1105 DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n", 1106 fetch_PC, fetch_NPC); 1107 } 1108 1109 // Make sure this is a valid index. 1110 assert(offset <= cacheBlkSize - instSize); 1111 1112 if (!macroop) { 1113 // Get the instruction from the array of the cache line. 1114 inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> 1115 (&cacheData[tid][offset])); 1116 1117 predecoder.setTC(cpu->thread[tid]->getTC()); 1118 predecoder.moreBytes(fetch_PC, fetch_PC, inst); 1119 1120 ext_inst = predecoder.getExtMachInst(); 1121 staticInst = StaticInstPtr(ext_inst, fetch_PC); 1122 if (staticInst->isMacroop()) 1123 macroop = staticInst; 1124 } 1125 do { 1126 if (macroop) { 1127 staticInst = macroop->fetchMicroop(fetch_MicroPC); 1128 if (staticInst->isLastMicroop()) 1129 macroop = NULL; 1130 } 1131 1132 // Get a sequence number. 1133 inst_seq = cpu->getAndIncrementInstSeq(); 1134 1135 // Create a new DynInst from the instruction fetched. 1136 DynInstPtr instruction = new DynInst(staticInst, 1137 fetch_PC, fetch_NPC, fetch_MicroPC, 1138 next_PC, next_NPC, next_MicroPC, 1139 inst_seq, cpu); 1140 instruction->setTid(tid); 1141 1142 instruction->setASID(tid); 1143 1144 instruction->setThreadState(cpu->thread[tid]); 1145 1146 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " 1147 "[sn:%lli]\n", 1148 tid, instruction->readPC(), inst_seq); 1149 1150 //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); 1151 1152 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", 1153 tid, instruction->staticInst->disassemble(fetch_PC)); 1154 1155#if TRACING_ON 1156 instruction->traceData = 1157 cpu->getTracer()->getInstRecord(curTick, cpu->tcBase(tid), 1158 instruction->staticInst, instruction->readPC()); 1159#else 1160 instruction->traceData = NULL; 1161#endif 1162 1163 ///FIXME This needs to be more robust in dealing with delay slots 1164 predicted_branch |= 1165 lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); 1166 1167 // Add instruction to the CPU's list of instructions. 1168 instruction->setInstListIt(cpu->addInst(instruction)); 1169 1170 // Write the instruction to the first slot in the queue 1171 // that heads to decode. 1172 toDecode->insts[numInst] = instruction; 1173 1174 toDecode->size++; 1175 1176 // Increment stat of fetched instructions. 1177 ++fetchedInsts; 1178 1179 // Move to the next instruction, unless we have a branch. 1180 fetch_PC = next_PC; 1181 fetch_NPC = next_NPC; 1182 fetch_MicroPC = next_MicroPC; 1183 1184 if (instruction->isQuiesce()) { 1185 DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", 1186 curTick); 1187 fetchStatus[tid] = QuiescePending; 1188 ++numInst; 1189 status_change = true; 1190 break; 1191 } 1192 1193 ++numInst; 1194 } while (staticInst->isMicroop() && 1195 !staticInst->isLastMicroop() && 1196 numInst < fetchWidth); 1197 offset += instSize; 1198 } 1199 1200 if (predicted_branch) { 1201 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1202 "instruction encountered.\n", tid); 1203 } else if (numInst >= fetchWidth) { 1204 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1205 "for this cycle.\n", tid); 1206 } else if (offset >= cacheBlkSize) { 1207 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1208 "block.\n", tid); 1209 } 1210 } 1211 1212 if (numInst > 0) { 1213 wroteToTimeBuffer = true; 1214 } 1215 1216 // Now that fetching is completed, update the PC to signify what the next 1217 // cycle will be. 1218 if (fault == NoFault) { 1219 PC[tid] = next_PC; 1220 nextPC[tid] = next_NPC; 1221 microPC[tid] = next_MicroPC; 1222 DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); 1223 } else { 1224 // We shouldn't be in an icache miss and also have a fault (an ITB 1225 // miss) 1226 if (fetchStatus[tid] == IcacheWaitResponse) { 1227 panic("Fetch should have exited prior to this!"); 1228 } 1229 1230 // Send the fault to commit. This thread will not do anything 1231 // until commit handles the fault. The only other way it can 1232 // wake up is if a squash comes along and changes the PC. 1233 assert(numInst < fetchWidth); 1234 // Get a sequence number. 1235 inst_seq = cpu->getAndIncrementInstSeq(); 1236 // We will use a nop in order to carry the fault. 1237 ext_inst = TheISA::NoopMachInst; 1238 1239 // Create a new DynInst from the dummy nop. 1240 DynInstPtr instruction = new DynInst(ext_inst, 1241 fetch_PC, fetch_NPC, fetch_MicroPC, 1242 next_PC, next_NPC, next_MicroPC, 1243 inst_seq, cpu); 1244 instruction->setPredTarg(next_NPC, next_NPC + instSize, 0); 1245 instruction->setTid(tid); 1246 1247 instruction->setASID(tid); 1248 1249 instruction->setThreadState(cpu->thread[tid]); 1250 1251 instruction->traceData = NULL; 1252 1253 instruction->setInstListIt(cpu->addInst(instruction)); 1254 1255 instruction->fault = fault; 1256 1257 toDecode->insts[numInst] = instruction; 1258 toDecode->size++; 1259 1260 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); 1261 1262 fetchStatus[tid] = TrapPending; 1263 status_change = true; 1264 1265 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %08p", 1266 tid, fault->name(), PC[tid]); 1267 } 1268} 1269 1270template<class Impl> 1271void 1272DefaultFetch<Impl>::recvRetry() 1273{ 1274 if (retryPkt != NULL) { 1275 assert(cacheBlocked); 1276 assert(retryTid != -1); 1277 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1278 1279 if (icachePort->sendTiming(retryPkt)) { 1280 fetchStatus[retryTid] = IcacheWaitResponse; 1281 retryPkt = NULL; 1282 retryTid = -1; 1283 cacheBlocked = false; 1284 } 1285 } else { 1286 assert(retryTid == -1); 1287 // Access has been squashed since it was sent out. Just clear 1288 // the cache being blocked. 1289 cacheBlocked = false; 1290 } 1291} 1292 1293/////////////////////////////////////// 1294// // 1295// SMT FETCH POLICY MAINTAINED HERE // 1296// // 1297/////////////////////////////////////// 1298template<class Impl> 1299int 1300DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1301{ 1302 if (numThreads > 1) { 1303 switch (fetch_priority) { 1304 1305 case SingleThread: 1306 return 0; 1307 1308 case RoundRobin: 1309 return roundRobin(); 1310 1311 case IQ: 1312 return iqCount(); 1313 1314 case LSQ: 1315 return lsqCount(); 1316 1317 case Branch: 1318 return branchCount(); 1319 1320 default: 1321 return -1; 1322 } 1323 } else { 1324 std::list<unsigned>::iterator thread = activeThreads->begin(); 1325 assert(thread != activeThreads->end()); 1326 int tid = *thread; 1327 1328 if (fetchStatus[tid] == Running || 1329 fetchStatus[tid] == IcacheAccessComplete || 1330 fetchStatus[tid] == Idle) { 1331 return tid; 1332 } else { 1333 return -1; 1334 } 1335 } 1336 1337} 1338 1339 1340template<class Impl> 1341int 1342DefaultFetch<Impl>::roundRobin() 1343{ 1344 std::list<unsigned>::iterator pri_iter = priorityList.begin(); 1345 std::list<unsigned>::iterator end = priorityList.end(); 1346 1347 int high_pri; 1348 1349 while (pri_iter != end) { 1350 high_pri = *pri_iter; 1351 1352 assert(high_pri <= numThreads); 1353 1354 if (fetchStatus[high_pri] == Running || 1355 fetchStatus[high_pri] == IcacheAccessComplete || 1356 fetchStatus[high_pri] == Idle) { 1357 1358 priorityList.erase(pri_iter); 1359 priorityList.push_back(high_pri); 1360 1361 return high_pri; 1362 } 1363 1364 pri_iter++; 1365 } 1366 1367 return -1; 1368} 1369 1370template<class Impl> 1371int 1372DefaultFetch<Impl>::iqCount() 1373{ 1374 std::priority_queue<unsigned> PQ; 1375 1376 std::list<unsigned>::iterator threads = activeThreads->begin(); 1377 std::list<unsigned>::iterator end = activeThreads->end(); 1378 1379 while (threads != end) { 1380 unsigned tid = *threads++; 1381 1382 PQ.push(fromIEW->iewInfo[tid].iqCount); 1383 } 1384 1385 while (!PQ.empty()) { 1386 1387 unsigned high_pri = PQ.top(); 1388 1389 if (fetchStatus[high_pri] == Running || 1390 fetchStatus[high_pri] == IcacheAccessComplete || 1391 fetchStatus[high_pri] == Idle) 1392 return high_pri; 1393 else 1394 PQ.pop(); 1395 1396 } 1397 1398 return -1; 1399} 1400 1401template<class Impl> 1402int 1403DefaultFetch<Impl>::lsqCount() 1404{ 1405 std::priority_queue<unsigned> PQ; 1406 1407 std::list<unsigned>::iterator threads = activeThreads->begin(); 1408 std::list<unsigned>::iterator end = activeThreads->end(); 1409 1410 while (threads != end) { 1411 unsigned tid = *threads++; 1412 1413 PQ.push(fromIEW->iewInfo[tid].ldstqCount); 1414 } 1415 1416 while (!PQ.empty()) { 1417 1418 unsigned high_pri = PQ.top(); 1419 1420 if (fetchStatus[high_pri] == Running || 1421 fetchStatus[high_pri] == IcacheAccessComplete || 1422 fetchStatus[high_pri] == Idle) 1423 return high_pri; 1424 else 1425 PQ.pop(); 1426 1427 } 1428 1429 return -1; 1430} 1431 1432template<class Impl> 1433int 1434DefaultFetch<Impl>::branchCount() 1435{ 1436 std::list<unsigned>::iterator thread = activeThreads->begin(); 1437 assert(thread != activeThreads->end()); 1438 unsigned tid = *thread; 1439 1440 panic("Branch Count Fetch policy unimplemented\n"); 1441 return 0 * tid; 1442} 1443