fetch_impl.hh revision 3070:0ca43be10749
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 */ 31 32#include "config/use_checker.hh" 33 34#include "arch/isa_traits.hh" 35#include "arch/utility.hh" 36#include "cpu/checker/cpu.hh" 37#include "cpu/exetrace.hh" 38#include "cpu/o3/fetch.hh" 39#include "mem/packet.hh" 40#include "mem/request.hh" 41#include "sim/byteswap.hh" 42#include "sim/host.hh" 43#include "sim/root.hh" 44 45#if FULL_SYSTEM 46#include "arch/tlb.hh" 47#include "arch/vtophys.hh" 48#include "base/remote_gdb.hh" 49#include "sim/system.hh" 50#endif // FULL_SYSTEM 51 52#include <algorithm> 53 54template<class Impl> 55Tick 56DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) 57{ 58 panic("DefaultFetch doesn't expect recvAtomic callback!"); 59 return curTick; 60} 61 62template<class Impl> 63void 64DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) 65{ 66 panic("DefaultFetch doesn't expect recvFunctional callback!"); 67} 68 69template<class Impl> 70void 71DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status) 72{ 73 if (status == RangeChange) 74 return; 75 76 panic("DefaultFetch doesn't expect recvStatusChange callback!"); 77} 78 79template<class Impl> 80bool 81DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt) 82{ 83 fetch->processCacheCompletion(pkt); 84 return true; 85} 86 87template<class Impl> 88void 89DefaultFetch<Impl>::IcachePort::recvRetry() 90{ 91 fetch->recvRetry(); 92} 93 94template<class Impl> 95DefaultFetch<Impl>::DefaultFetch(Params *params) 96 : mem(params->mem), 97 branchPred(params), 98 decodeToFetchDelay(params->decodeToFetchDelay), 99 renameToFetchDelay(params->renameToFetchDelay), 100 iewToFetchDelay(params->iewToFetchDelay), 101 commitToFetchDelay(params->commitToFetchDelay), 102 fetchWidth(params->fetchWidth), 103 cacheBlocked(false), 104 retryPkt(NULL), 105 retryTid(-1), 106 numThreads(params->numberOfThreads), 107 numFetchingThreads(params->smtNumFetchingThreads), 108 interruptPending(false), 109 drainPending(false), 110 switchedOut(false) 111{ 112 if (numThreads > Impl::MaxThreads) 113 fatal("numThreads is not a valid value\n"); 114 115 // Set fetch stage's status to inactive. 116 _status = Inactive; 117 118 std::string policy = params->smtFetchPolicy; 119 120 // Convert string to lowercase 121 std::transform(policy.begin(), policy.end(), policy.begin(), 122 (int(*)(int)) tolower); 123 124 // Figure out fetch policy 125 if (policy == "singlethread") { 126 fetchPolicy = SingleThread; 127 if (numThreads > 1) 128 panic("Invalid Fetch Policy for a SMT workload."); 129 } else if (policy == "roundrobin") { 130 fetchPolicy = RoundRobin; 131 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 132 } else if (policy == "branch") { 133 fetchPolicy = Branch; 134 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 135 } else if (policy == "iqcount") { 136 fetchPolicy = IQ; 137 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 138 } else if (policy == "lsqcount") { 139 fetchPolicy = LSQ; 140 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 141 } else { 142 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 143 " RoundRobin,LSQcount,IQcount}\n"); 144 } 145 146 // Size of cache block. 147 cacheBlkSize = 64; 148 149 // Create mask to get rid of offset bits. 150 cacheBlkMask = (cacheBlkSize - 1); 151 152 for (int tid=0; tid < numThreads; tid++) { 153 154 fetchStatus[tid] = Running; 155 156 priorityList.push_back(tid); 157 158 memReq[tid] = NULL; 159 160 // Create space to store a cache line. 161 cacheData[tid] = new uint8_t[cacheBlkSize]; 162 cacheDataPC[tid] = 0; 163 cacheDataValid[tid] = false; 164 165 delaySlotInfo[tid].branchSeqNum = -1; 166 delaySlotInfo[tid].numInsts = 0; 167 delaySlotInfo[tid].targetAddr = 0; 168 delaySlotInfo[tid].targetReady = false; 169 170 stalls[tid].decode = false; 171 stalls[tid].rename = false; 172 stalls[tid].iew = false; 173 stalls[tid].commit = false; 174 } 175 176 // Get the size of an instruction. 177 instSize = sizeof(TheISA::MachInst); 178} 179 180template <class Impl> 181std::string 182DefaultFetch<Impl>::name() const 183{ 184 return cpu->name() + ".fetch"; 185} 186 187template <class Impl> 188void 189DefaultFetch<Impl>::regStats() 190{ 191 icacheStallCycles 192 .name(name() + ".icacheStallCycles") 193 .desc("Number of cycles fetch is stalled on an Icache miss") 194 .prereq(icacheStallCycles); 195 196 fetchedInsts 197 .name(name() + ".Insts") 198 .desc("Number of instructions fetch has processed") 199 .prereq(fetchedInsts); 200 201 fetchedBranches 202 .name(name() + ".Branches") 203 .desc("Number of branches that fetch encountered") 204 .prereq(fetchedBranches); 205 206 predictedBranches 207 .name(name() + ".predictedBranches") 208 .desc("Number of branches that fetch has predicted taken") 209 .prereq(predictedBranches); 210 211 fetchCycles 212 .name(name() + ".Cycles") 213 .desc("Number of cycles fetch has run and was not squashing or" 214 " blocked") 215 .prereq(fetchCycles); 216 217 fetchSquashCycles 218 .name(name() + ".SquashCycles") 219 .desc("Number of cycles fetch has spent squashing") 220 .prereq(fetchSquashCycles); 221 222 fetchIdleCycles 223 .name(name() + ".IdleCycles") 224 .desc("Number of cycles fetch was idle") 225 .prereq(fetchIdleCycles); 226 227 fetchBlockedCycles 228 .name(name() + ".BlockedCycles") 229 .desc("Number of cycles fetch has spent blocked") 230 .prereq(fetchBlockedCycles); 231 232 fetchedCacheLines 233 .name(name() + ".CacheLines") 234 .desc("Number of cache lines fetched") 235 .prereq(fetchedCacheLines); 236 237 fetchMiscStallCycles 238 .name(name() + ".MiscStallCycles") 239 .desc("Number of cycles fetch has spent waiting on interrupts, or " 240 "bad addresses, or out of MSHRs") 241 .prereq(fetchMiscStallCycles); 242 243 fetchIcacheSquashes 244 .name(name() + ".IcacheSquashes") 245 .desc("Number of outstanding Icache misses that were squashed") 246 .prereq(fetchIcacheSquashes); 247 248 fetchNisnDist 249 .init(/* base value */ 0, 250 /* last value */ fetchWidth, 251 /* bucket size */ 1) 252 .name(name() + ".rateDist") 253 .desc("Number of instructions fetched each cycle (Total)") 254 .flags(Stats::pdf); 255 256 idleRate 257 .name(name() + ".idleRate") 258 .desc("Percent of cycles fetch was idle") 259 .prereq(idleRate); 260 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 261 262 branchRate 263 .name(name() + ".branchRate") 264 .desc("Number of branch fetches per cycle") 265 .flags(Stats::total); 266 branchRate = fetchedBranches / cpu->numCycles; 267 268 fetchRate 269 .name(name() + ".rate") 270 .desc("Number of inst fetches per cycle") 271 .flags(Stats::total); 272 fetchRate = fetchedInsts / cpu->numCycles; 273 274 branchPred.regStats(); 275} 276 277template<class Impl> 278void 279DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) 280{ 281 DPRINTF(Fetch, "Setting the CPU pointer.\n"); 282 cpu = cpu_ptr; 283 284 // Name is finally available, so create the port. 285 icachePort = new IcachePort(this); 286 287#if USE_CHECKER 288 if (cpu->checker) { 289 cpu->checker->setIcachePort(icachePort); 290 } 291#endif 292 293 // Schedule fetch to get the correct PC from the CPU 294 // scheduleFetchStartupEvent(1); 295 296 // Fetch needs to start fetching instructions at the very beginning, 297 // so it must start up in active state. 298 switchToActive(); 299} 300 301template<class Impl> 302void 303DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) 304{ 305 DPRINTF(Fetch, "Setting the time buffer pointer.\n"); 306 timeBuffer = time_buffer; 307 308 // Create wires to get information from proper places in time buffer. 309 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 310 fromRename = timeBuffer->getWire(-renameToFetchDelay); 311 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 312 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 313} 314 315template<class Impl> 316void 317DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) 318{ 319 DPRINTF(Fetch, "Setting active threads list pointer.\n"); 320 activeThreads = at_ptr; 321} 322 323template<class Impl> 324void 325DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) 326{ 327 DPRINTF(Fetch, "Setting the fetch queue pointer.\n"); 328 fetchQueue = fq_ptr; 329 330 // Create wire to write information to proper place in fetch queue. 331 toDecode = fetchQueue->getWire(0); 332} 333 334template<class Impl> 335void 336DefaultFetch<Impl>::initStage() 337{ 338 // Setup PC and nextPC with initial state. 339 for (int tid = 0; tid < numThreads; tid++) { 340 PC[tid] = cpu->readPC(tid); 341 nextPC[tid] = cpu->readNextPC(tid); 342#if THE_ISA != ALPHA_ISA 343 nextNPC[tid] = cpu->readNextNPC(tid); 344#endif 345 } 346} 347 348template<class Impl> 349void 350DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 351{ 352 unsigned tid = pkt->req->getThreadNum(); 353 354 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid); 355 356 // Only change the status if it's still waiting on the icache access 357 // to return. 358 if (fetchStatus[tid] != IcacheWaitResponse || 359 pkt->req != memReq[tid] || 360 isSwitchedOut()) { 361 ++fetchIcacheSquashes; 362 delete pkt->req; 363 delete pkt; 364 return; 365 } 366 367 memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); 368 cacheDataValid[tid] = true; 369 370 if (!drainPending) { 371 // Wake up the CPU (if it went to sleep and was waiting on 372 // this completion event). 373 cpu->wakeCPU(); 374 375 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 376 tid); 377 378 switchToActive(); 379 } 380 381 // Only switch to IcacheAccessComplete if we're not stalled as well. 382 if (checkStall(tid)) { 383 fetchStatus[tid] = Blocked; 384 } else { 385 fetchStatus[tid] = IcacheAccessComplete; 386 } 387 388 // Reset the mem req to NULL. 389 delete pkt->req; 390 delete pkt; 391 memReq[tid] = NULL; 392} 393 394template <class Impl> 395bool 396DefaultFetch<Impl>::drain() 397{ 398 // Fetch is ready to drain at any time. 399 cpu->signalDrained(); 400 drainPending = true; 401 return true; 402} 403 404template <class Impl> 405void 406DefaultFetch<Impl>::resume() 407{ 408 drainPending = false; 409} 410 411template <class Impl> 412void 413DefaultFetch<Impl>::switchOut() 414{ 415 switchedOut = true; 416 // Branch predictor needs to have its state cleared. 417 branchPred.switchOut(); 418} 419 420template <class Impl> 421void 422DefaultFetch<Impl>::takeOverFrom() 423{ 424 // Reset all state 425 for (int i = 0; i < Impl::MaxThreads; ++i) { 426 stalls[i].decode = 0; 427 stalls[i].rename = 0; 428 stalls[i].iew = 0; 429 stalls[i].commit = 0; 430 PC[i] = cpu->readPC(i); 431 nextPC[i] = cpu->readNextPC(i); 432#if THE_ISA != ALPHA_ISA 433 nextNPC[i] = cpu->readNextNPC(i); 434 delaySlotInfo[i].branchSeqNum = -1; 435 delaySlotInfo[i].numInsts = 0; 436 delaySlotInfo[i].targetAddr = 0; 437 delaySlotInfo[i].targetReady = false; 438#endif 439 fetchStatus[i] = Running; 440 } 441 numInst = 0; 442 wroteToTimeBuffer = false; 443 _status = Inactive; 444 switchedOut = false; 445 branchPred.takeOverFrom(); 446} 447 448template <class Impl> 449void 450DefaultFetch<Impl>::wakeFromQuiesce() 451{ 452 DPRINTF(Fetch, "Waking up from quiesce\n"); 453 // Hopefully this is safe 454 // @todo: Allow other threads to wake from quiesce. 455 fetchStatus[0] = Running; 456} 457 458template <class Impl> 459inline void 460DefaultFetch<Impl>::switchToActive() 461{ 462 if (_status == Inactive) { 463 DPRINTF(Activity, "Activating stage.\n"); 464 465 cpu->activateStage(O3CPU::FetchIdx); 466 467 _status = Active; 468 } 469} 470 471template <class Impl> 472inline void 473DefaultFetch<Impl>::switchToInactive() 474{ 475 if (_status == Active) { 476 DPRINTF(Activity, "Deactivating stage.\n"); 477 478 cpu->deactivateStage(O3CPU::FetchIdx); 479 480 _status = Inactive; 481 } 482} 483 484template <class Impl> 485bool 486DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, 487 Addr &next_NPC) 488{ 489 // Do branch prediction check here. 490 // A bit of a misnomer...next_PC is actually the current PC until 491 // this function updates it. 492 bool predict_taken; 493 494 if (!inst->isControl()) { 495#if THE_ISA == ALPHA_ISA 496 next_PC = next_PC + instSize; 497 inst->setPredTarg(next_PC); 498#else 499 Addr cur_PC = next_PC; 500 next_PC = cur_PC + instSize; //next_NPC; 501 next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; 502 inst->setPredTarg(next_NPC); 503#endif 504 return false; 505 } 506 507 int tid = inst->threadNumber; 508#if THE_ISA == ALPHA_ISA 509 predict_taken = branchPred.predict(inst, next_PC, tid); 510#else 511 Addr pred_PC = next_PC; 512 predict_taken = branchPred.predict(inst, pred_PC, tid); 513 514 if (predict_taken) { 515 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); 516 } else { 517 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); 518 } 519 520 if (predict_taken) { 521 next_PC = next_NPC; 522 next_NPC = pred_PC; 523 524 // Update delay slot info 525 ++delaySlotInfo[tid].numInsts; 526 delaySlotInfo[tid].targetAddr = pred_PC; 527 DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, 528 delaySlotInfo[tid].numInsts); 529 } else { // !predict_taken 530 if (inst->isCondDelaySlot()) { 531 next_PC = pred_PC; 532 // The delay slot is skipped here if there is on 533 // prediction 534 } else { 535 next_PC = next_NPC; 536 // No need to declare a delay slot here since 537 // there is no for the pred. target to jump 538 } 539 540 next_NPC = next_NPC + instSize; 541 } 542#endif 543 544 ++fetchedBranches; 545 546 if (predict_taken) { 547 ++predictedBranches; 548 } 549 550 return predict_taken; 551} 552 553template <class Impl> 554bool 555DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid) 556{ 557 Fault fault = NoFault; 558 559#if FULL_SYSTEM 560 // Flag to say whether or not address is physical addr. 561 unsigned flags = cpu->inPalMode(fetch_PC) ? PHYSICAL : 0; 562#else 563 unsigned flags = 0; 564#endif // FULL_SYSTEM 565 566 if (cacheBlocked || (interruptPending && flags == 0)) { 567 // Hold off fetch from getting new instructions when: 568 // Cache is blocked, or 569 // while an interrupt is pending and we're not in PAL mode, or 570 // fetch is switched out. 571 return false; 572 } 573 574 // Align the fetch PC so it's at the start of a cache block. 575 fetch_PC = icacheBlockAlignPC(fetch_PC); 576 577 // If we've already got the block, no need to try to fetch it again. 578 if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { 579 return true; 580 } 581 582 // Setup the memReq to do a read of the first instruction's address. 583 // Set the appropriate read size and flags as well. 584 // Build request here. 585 RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags, 586 fetch_PC, cpu->readCpuId(), tid); 587 588 memReq[tid] = mem_req; 589 590 // Translate the instruction request. 591 fault = cpu->translateInstReq(mem_req, cpu->thread[tid]); 592 593 // In the case of faults, the fetch stage may need to stall and wait 594 // for the ITB miss to be handled. 595 596 // If translation was successful, attempt to read the first 597 // instruction. 598 if (fault == NoFault) { 599#if 0 600 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || 601 memReq[tid]->flags & UNCACHEABLE) { 602 DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " 603 "misspeculating path)!", 604 memReq[tid]->paddr); 605 ret_fault = TheISA::genMachineCheckFault(); 606 return false; 607 } 608#endif 609 610 // Build packet here. 611 PacketPtr data_pkt = new Packet(mem_req, 612 Packet::ReadReq, Packet::Broadcast); 613 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 614 615 cacheDataPC[tid] = fetch_PC; 616 cacheDataValid[tid] = false; 617 618 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 619 620 fetchedCacheLines++; 621 622 // Now do the timing access to see whether or not the instruction 623 // exists within the cache. 624 if (!icachePort->sendTiming(data_pkt)) { 625 assert(retryPkt == NULL); 626 assert(retryTid == -1); 627 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 628 fetchStatus[tid] = IcacheWaitRetry; 629 retryPkt = data_pkt; 630 retryTid = tid; 631 cacheBlocked = true; 632 return false; 633 } 634 635 DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); 636 637 lastIcacheStall[tid] = curTick; 638 639 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 640 "response.\n", tid); 641 642 fetchStatus[tid] = IcacheWaitResponse; 643 } else { 644 delete mem_req; 645 memReq[tid] = NULL; 646 } 647 648 ret_fault = fault; 649 return true; 650} 651 652template <class Impl> 653inline void 654DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) 655{ 656 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n", 657 tid, new_PC); 658 659 PC[tid] = new_PC; 660 nextPC[tid] = new_PC + instSize; 661 nextNPC[tid] = new_PC + (2 * instSize); 662 663 // Clear the icache miss if it's outstanding. 664 if (fetchStatus[tid] == IcacheWaitResponse) { 665 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 666 tid); 667 memReq[tid] = NULL; 668 } 669 670 // Get rid of the retrying packet if it was from this thread. 671 if (retryTid == tid) { 672 assert(cacheBlocked); 673 cacheBlocked = false; 674 retryTid = -1; 675 delete retryPkt->req; 676 delete retryPkt; 677 retryPkt = NULL; 678 } 679 680 fetchStatus[tid] = Squashing; 681 682 ++fetchSquashCycles; 683} 684 685template<class Impl> 686void 687DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, 688 const InstSeqNum &seq_num, 689 unsigned tid) 690{ 691 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); 692 693 doSquash(new_PC, tid); 694 695#if THE_ISA != ALPHA_ISA 696 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 697 delaySlotInfo[tid].numInsts = 0; 698 delaySlotInfo[tid].targetAddr = 0; 699 delaySlotInfo[tid].targetReady = false; 700 } 701#endif 702 703 // Tell the CPU to remove any instructions that are in flight between 704 // fetch and decode. 705 cpu->removeInstsUntil(seq_num, tid); 706} 707 708template<class Impl> 709bool 710DefaultFetch<Impl>::checkStall(unsigned tid) const 711{ 712 bool ret_val = false; 713 714 if (cpu->contextSwitch) { 715 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 716 ret_val = true; 717 } else if (stalls[tid].decode) { 718 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 719 ret_val = true; 720 } else if (stalls[tid].rename) { 721 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 722 ret_val = true; 723 } else if (stalls[tid].iew) { 724 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 725 ret_val = true; 726 } else if (stalls[tid].commit) { 727 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 728 ret_val = true; 729 } 730 731 return ret_val; 732} 733 734template<class Impl> 735typename DefaultFetch<Impl>::FetchStatus 736DefaultFetch<Impl>::updateFetchStatus() 737{ 738 //Check Running 739 std::list<unsigned>::iterator threads = (*activeThreads).begin(); 740 741 while (threads != (*activeThreads).end()) { 742 743 unsigned tid = *threads++; 744 745 if (fetchStatus[tid] == Running || 746 fetchStatus[tid] == Squashing || 747 fetchStatus[tid] == IcacheAccessComplete) { 748 749 if (_status == Inactive) { 750 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 751 752 if (fetchStatus[tid] == IcacheAccessComplete) { 753 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 754 "completion\n",tid); 755 } 756 757 cpu->activateStage(O3CPU::FetchIdx); 758 } 759 760 return Active; 761 } 762 } 763 764 // Stage is switching from active to inactive, notify CPU of it. 765 if (_status == Active) { 766 DPRINTF(Activity, "Deactivating stage.\n"); 767 768 cpu->deactivateStage(O3CPU::FetchIdx); 769 } 770 771 return Inactive; 772} 773 774template <class Impl> 775void 776DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, 777 bool squash_delay_slot, unsigned tid) 778{ 779 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); 780 781 doSquash(new_PC, tid); 782 783#if THE_ISA == ALPHA_ISA 784 // Tell the CPU to remove any instructions that are not in the ROB. 785 cpu->removeInstsNotInROB(tid, true, 0); 786#else 787 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 788 delaySlotInfo[tid].numInsts = 0; 789 delaySlotInfo[tid].targetAddr = 0; 790 delaySlotInfo[tid].targetReady = false; 791 } 792 793 // Tell the CPU to remove any instructions that are not in the ROB. 794 cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); 795#endif 796} 797 798template <class Impl> 799void 800DefaultFetch<Impl>::tick() 801{ 802 std::list<unsigned>::iterator threads = (*activeThreads).begin(); 803 bool status_change = false; 804 805 wroteToTimeBuffer = false; 806 807 while (threads != (*activeThreads).end()) { 808 unsigned tid = *threads++; 809 810 // Check the signals for each thread to determine the proper status 811 // for each thread. 812 bool updated_status = checkSignalsAndUpdate(tid); 813 status_change = status_change || updated_status; 814 } 815 816 DPRINTF(Fetch, "Running stage.\n"); 817 818 // Reset the number of the instruction we're fetching. 819 numInst = 0; 820 821#if FULL_SYSTEM 822 if (fromCommit->commitInfo[0].interruptPending) { 823 interruptPending = true; 824 } 825 826 if (fromCommit->commitInfo[0].clearInterrupt) { 827 interruptPending = false; 828 } 829#endif 830 831 for (threadFetched = 0; threadFetched < numFetchingThreads; 832 threadFetched++) { 833 // Fetch each of the actively fetching threads. 834 fetch(status_change); 835 } 836 837 // Record number of instructions fetched this cycle for distribution. 838 fetchNisnDist.sample(numInst); 839 840 if (status_change) { 841 // Change the fetch stage status if there was a status change. 842 _status = updateFetchStatus(); 843 } 844 845 // If there was activity this cycle, inform the CPU of it. 846 if (wroteToTimeBuffer || cpu->contextSwitch) { 847 DPRINTF(Activity, "Activity this cycle.\n"); 848 849 cpu->activityThisCycle(); 850 } 851} 852 853template <class Impl> 854bool 855DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) 856{ 857 // Update the per thread stall statuses. 858 if (fromDecode->decodeBlock[tid]) { 859 stalls[tid].decode = true; 860 } 861 862 if (fromDecode->decodeUnblock[tid]) { 863 assert(stalls[tid].decode); 864 assert(!fromDecode->decodeBlock[tid]); 865 stalls[tid].decode = false; 866 } 867 868 if (fromRename->renameBlock[tid]) { 869 stalls[tid].rename = true; 870 } 871 872 if (fromRename->renameUnblock[tid]) { 873 assert(stalls[tid].rename); 874 assert(!fromRename->renameBlock[tid]); 875 stalls[tid].rename = false; 876 } 877 878 if (fromIEW->iewBlock[tid]) { 879 stalls[tid].iew = true; 880 } 881 882 if (fromIEW->iewUnblock[tid]) { 883 assert(stalls[tid].iew); 884 assert(!fromIEW->iewBlock[tid]); 885 stalls[tid].iew = false; 886 } 887 888 if (fromCommit->commitBlock[tid]) { 889 stalls[tid].commit = true; 890 } 891 892 if (fromCommit->commitUnblock[tid]) { 893 assert(stalls[tid].commit); 894 assert(!fromCommit->commitBlock[tid]); 895 stalls[tid].commit = false; 896 } 897 898 // Check squash signals from commit. 899 if (fromCommit->commitInfo[tid].squash) { 900 901 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 902 "from commit.\n",tid); 903 904#if THE_ISA == ALPHA_ISA 905 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; 906#else 907 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; 908#endif 909 // In any case, squash. 910 squash(fromCommit->commitInfo[tid].nextPC, 911 doneSeqNum, 912 fromCommit->commitInfo[tid].squashDelaySlot, 913 tid); 914 915 // Also check if there's a mispredict that happened. 916 if (fromCommit->commitInfo[tid].branchMispredict) { 917 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 918 fromCommit->commitInfo[tid].nextPC, 919 fromCommit->commitInfo[tid].branchTaken, 920 tid); 921 } else { 922 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 923 tid); 924 } 925 926 return true; 927 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 928 // Update the branch predictor if it wasn't a squashed instruction 929 // that was broadcasted. 930 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 931 } 932 933 // Check ROB squash signals from commit. 934 if (fromCommit->commitInfo[tid].robSquashing) { 935 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 936 937 // Continue to squash. 938 fetchStatus[tid] = Squashing; 939 940 return true; 941 } 942 943 // Check squash signals from decode. 944 if (fromDecode->decodeInfo[tid].squash) { 945 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 946 "from decode.\n",tid); 947 948 // Update the branch predictor. 949 if (fromDecode->decodeInfo[tid].branchMispredict) { 950 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 951 fromDecode->decodeInfo[tid].nextPC, 952 fromDecode->decodeInfo[tid].branchTaken, 953 tid); 954 } else { 955 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 956 tid); 957 } 958 959 if (fetchStatus[tid] != Squashing) { 960 961#if THE_ISA == ALPHA_ISA 962 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; 963#else 964 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; 965#endif 966 // Squash unless we're already squashing 967 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 968 doneSeqNum, 969 tid); 970 971 return true; 972 } 973 } 974 975 if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) { 976 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 977 978 fetchStatus[tid] = Blocked; 979 980 return true; 981 } 982 983 if (fetchStatus[tid] == Blocked || 984 fetchStatus[tid] == Squashing) { 985 // Switch status to running if fetch isn't being told to block or 986 // squash this cycle. 987 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 988 tid); 989 990 fetchStatus[tid] = Running; 991 992 return true; 993 } 994 995 // If we've reached this point, we have not gotten any signals that 996 // cause fetch to change its status. Fetch remains the same as before. 997 return false; 998} 999 1000template<class Impl> 1001void 1002DefaultFetch<Impl>::fetch(bool &status_change) 1003{ 1004 ////////////////////////////////////////// 1005 // Start actual fetch 1006 ////////////////////////////////////////// 1007 int tid = getFetchingThread(fetchPolicy); 1008 1009 if (tid == -1 || drainPending) { 1010 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1011 1012 // Breaks looping condition in tick() 1013 threadFetched = numFetchingThreads; 1014 return; 1015 } 1016 1017 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1018 1019 // The current PC. 1020 Addr &fetch_PC = PC[tid]; 1021 1022 // Fault code for memory access. 1023 Fault fault = NoFault; 1024 1025 // If returning from the delay of a cache miss, then update the status 1026 // to running, otherwise do the cache access. Possibly move this up 1027 // to tick() function. 1028 if (fetchStatus[tid] == IcacheAccessComplete) { 1029 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", 1030 tid); 1031 1032 fetchStatus[tid] = Running; 1033 status_change = true; 1034 } else if (fetchStatus[tid] == Running) { 1035 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1036 "instruction, starting at PC %08p.\n", 1037 tid, fetch_PC); 1038 1039 bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); 1040 if (!fetch_success) { 1041 if (cacheBlocked) { 1042 ++icacheStallCycles; 1043 } else { 1044 ++fetchMiscStallCycles; 1045 } 1046 return; 1047 } 1048 } else { 1049 if (fetchStatus[tid] == Idle) { 1050 ++fetchIdleCycles; 1051 } else if (fetchStatus[tid] == Blocked) { 1052 ++fetchBlockedCycles; 1053 } else if (fetchStatus[tid] == Squashing) { 1054 ++fetchSquashCycles; 1055 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1056 ++icacheStallCycles; 1057 } 1058 1059 // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so 1060 // fetch should do nothing. 1061 return; 1062 } 1063 1064 ++fetchCycles; 1065 1066 // If we had a stall due to an icache miss, then return. 1067 if (fetchStatus[tid] == IcacheWaitResponse) { 1068 ++icacheStallCycles; 1069 status_change = true; 1070 return; 1071 } 1072 1073 Addr next_PC = fetch_PC; 1074 Addr next_NPC = next_PC + instSize; 1075 InstSeqNum inst_seq; 1076 MachInst inst; 1077 ExtMachInst ext_inst; 1078 // @todo: Fix this hack. 1079 unsigned offset = (fetch_PC & cacheBlkMask) & ~3; 1080 1081 if (fault == NoFault) { 1082 // If the read of the first instruction was successful, then grab the 1083 // instructions from the rest of the cache line and put them into the 1084 // queue heading to decode. 1085 1086 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1087 "decode.\n",tid); 1088 1089 // Need to keep track of whether or not a predicted branch 1090 // ended this fetch block. 1091 bool predicted_branch = false; 1092 1093 // Need to keep track of whether or not a delay slot 1094 // instruction has been fetched 1095 1096 for (; 1097 offset < cacheBlkSize && 1098 numInst < fetchWidth && 1099 (!predicted_branch || delaySlotInfo[tid].numInsts > 0); 1100 ++numInst) { 1101 1102 // Get a sequence number. 1103 inst_seq = cpu->getAndIncrementInstSeq(); 1104 1105 // Make sure this is a valid index. 1106 assert(offset <= cacheBlkSize - instSize); 1107 1108 // Get the instruction from the array of the cache line. 1109 inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> 1110 (&cacheData[tid][offset])); 1111 1112 ext_inst = TheISA::makeExtMI(inst, fetch_PC); 1113 1114 // Create a new DynInst from the instruction fetched. 1115 DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, 1116 next_PC, 1117 inst_seq, cpu); 1118 instruction->setTid(tid); 1119 1120 instruction->setASID(tid); 1121 1122 instruction->setThreadState(cpu->thread[tid]); 1123 1124 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " 1125 "[sn:%lli]\n", 1126 tid, instruction->readPC(), inst_seq); 1127 1128 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", 1129 tid, instruction->staticInst->disassemble(fetch_PC)); 1130 1131 instruction->traceData = 1132 Trace::getInstRecord(curTick, cpu->tcBase(tid), 1133 instruction->staticInst, 1134 instruction->readPC()); 1135 1136 predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, 1137 next_NPC); 1138 1139 // Add instruction to the CPU's list of instructions. 1140 instruction->setInstListIt(cpu->addInst(instruction)); 1141 1142 // Write the instruction to the first slot in the queue 1143 // that heads to decode. 1144 toDecode->insts[numInst] = instruction; 1145 1146 toDecode->size++; 1147 1148 // Increment stat of fetched instructions. 1149 ++fetchedInsts; 1150 1151 // Move to the next instruction, unless we have a branch. 1152 fetch_PC = next_PC; 1153 1154 if (instruction->isQuiesce()) { 1155 warn("cycle %lli: Quiesce instruction encountered, halting fetch!", 1156 curTick); 1157 fetchStatus[tid] = QuiescePending; 1158 ++numInst; 1159 status_change = true; 1160 break; 1161 } 1162 1163 offset += instSize; 1164 1165#if THE_ISA != ALPHA_ISA 1166 if (predicted_branch) { 1167 delaySlotInfo[tid].branchSeqNum = inst_seq; 1168 1169 DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", 1170 tid, inst_seq); 1171 continue; 1172 } else if (delaySlotInfo[tid].numInsts > 0) { 1173 --delaySlotInfo[tid].numInsts; 1174 1175 // It's OK to set PC to target of branch 1176 if (delaySlotInfo[tid].numInsts == 0) { 1177 delaySlotInfo[tid].targetReady = true; 1178 1179 // Break the looping condition 1180 predicted_branch = true; 1181 } 1182 1183 DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" 1184 " process.\n", tid, delaySlotInfo[tid].numInsts); 1185 } 1186#endif 1187 } 1188 1189 if (offset >= cacheBlkSize) { 1190 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1191 "block.\n", tid); 1192 } else if (numInst >= fetchWidth) { 1193 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1194 "for this cycle.\n", tid); 1195 } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { 1196 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1197 "instruction encountered.\n", tid); 1198 } 1199 } 1200 1201 if (numInst > 0) { 1202 wroteToTimeBuffer = true; 1203 } 1204 1205 // Now that fetching is completed, update the PC to signify what the next 1206 // cycle will be. 1207 if (fault == NoFault) { 1208#if THE_ISA == ALPHA_ISA 1209 DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); 1210 PC[tid] = next_PC; 1211 nextPC[tid] = next_PC + instSize; 1212#else 1213 if (delaySlotInfo[tid].targetReady && 1214 delaySlotInfo[tid].numInsts == 0) { 1215 // Set PC to target 1216 PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC 1217 nextPC[tid] = next_PC + instSize; //next_NPC 1218 nextNPC[tid] = next_PC + (2 * instSize); 1219 1220 delaySlotInfo[tid].targetReady = false; 1221 } else { 1222 PC[tid] = next_PC; 1223 nextPC[tid] = next_NPC; 1224 nextNPC[tid] = next_NPC + instSize; 1225 } 1226 1227 DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); 1228#endif 1229 } else { 1230 // We shouldn't be in an icache miss and also have a fault (an ITB 1231 // miss) 1232 if (fetchStatus[tid] == IcacheWaitResponse) { 1233 panic("Fetch should have exited prior to this!"); 1234 } 1235 1236 // Send the fault to commit. This thread will not do anything 1237 // until commit handles the fault. The only other way it can 1238 // wake up is if a squash comes along and changes the PC. 1239#if FULL_SYSTEM 1240 assert(numInst != fetchWidth); 1241 // Get a sequence number. 1242 inst_seq = cpu->getAndIncrementInstSeq(); 1243 // We will use a nop in order to carry the fault. 1244 ext_inst = TheISA::NoopMachInst; 1245 1246 // Create a new DynInst from the dummy nop. 1247 DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, 1248 next_PC, 1249 inst_seq, cpu); 1250 instruction->setPredTarg(next_PC + instSize); 1251 instruction->setTid(tid); 1252 1253 instruction->setASID(tid); 1254 1255 instruction->setThreadState(cpu->thread[tid]); 1256 1257 instruction->traceData = NULL; 1258 1259 instruction->setInstListIt(cpu->addInst(instruction)); 1260 1261 instruction->fault = fault; 1262 1263 toDecode->insts[numInst] = instruction; 1264 toDecode->size++; 1265 1266 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); 1267 1268 fetchStatus[tid] = TrapPending; 1269 status_change = true; 1270 1271 warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); 1272#else // !FULL_SYSTEM 1273 warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); 1274#endif // FULL_SYSTEM 1275 } 1276} 1277 1278template<class Impl> 1279void 1280DefaultFetch<Impl>::recvRetry() 1281{ 1282 assert(cacheBlocked); 1283 if (retryPkt != NULL) { 1284 assert(retryTid != -1); 1285 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1286 1287 if (icachePort->sendTiming(retryPkt)) { 1288 fetchStatus[retryTid] = IcacheWaitResponse; 1289 retryPkt = NULL; 1290 retryTid = -1; 1291 cacheBlocked = false; 1292 } 1293 } else { 1294 assert(retryTid == -1); 1295 // Access has been squashed since it was sent out. Just clear 1296 // the cache being blocked. 1297 cacheBlocked = false; 1298 } 1299} 1300 1301/////////////////////////////////////// 1302// // 1303// SMT FETCH POLICY MAINTAINED HERE // 1304// // 1305/////////////////////////////////////// 1306template<class Impl> 1307int 1308DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1309{ 1310 if (numThreads > 1) { 1311 switch (fetch_priority) { 1312 1313 case SingleThread: 1314 return 0; 1315 1316 case RoundRobin: 1317 return roundRobin(); 1318 1319 case IQ: 1320 return iqCount(); 1321 1322 case LSQ: 1323 return lsqCount(); 1324 1325 case Branch: 1326 return branchCount(); 1327 1328 default: 1329 return -1; 1330 } 1331 } else { 1332 int tid = *((*activeThreads).begin()); 1333 1334 if (fetchStatus[tid] == Running || 1335 fetchStatus[tid] == IcacheAccessComplete || 1336 fetchStatus[tid] == Idle) { 1337 return tid; 1338 } else { 1339 return -1; 1340 } 1341 } 1342 1343} 1344 1345 1346template<class Impl> 1347int 1348DefaultFetch<Impl>::roundRobin() 1349{ 1350 std::list<unsigned>::iterator pri_iter = priorityList.begin(); 1351 std::list<unsigned>::iterator end = priorityList.end(); 1352 1353 int high_pri; 1354 1355 while (pri_iter != end) { 1356 high_pri = *pri_iter; 1357 1358 assert(high_pri <= numThreads); 1359 1360 if (fetchStatus[high_pri] == Running || 1361 fetchStatus[high_pri] == IcacheAccessComplete || 1362 fetchStatus[high_pri] == Idle) { 1363 1364 priorityList.erase(pri_iter); 1365 priorityList.push_back(high_pri); 1366 1367 return high_pri; 1368 } 1369 1370 pri_iter++; 1371 } 1372 1373 return -1; 1374} 1375 1376template<class Impl> 1377int 1378DefaultFetch<Impl>::iqCount() 1379{ 1380 std::priority_queue<unsigned> PQ; 1381 1382 std::list<unsigned>::iterator threads = (*activeThreads).begin(); 1383 1384 while (threads != (*activeThreads).end()) { 1385 unsigned tid = *threads++; 1386 1387 PQ.push(fromIEW->iewInfo[tid].iqCount); 1388 } 1389 1390 while (!PQ.empty()) { 1391 1392 unsigned high_pri = PQ.top(); 1393 1394 if (fetchStatus[high_pri] == Running || 1395 fetchStatus[high_pri] == IcacheAccessComplete || 1396 fetchStatus[high_pri] == Idle) 1397 return high_pri; 1398 else 1399 PQ.pop(); 1400 1401 } 1402 1403 return -1; 1404} 1405 1406template<class Impl> 1407int 1408DefaultFetch<Impl>::lsqCount() 1409{ 1410 std::priority_queue<unsigned> PQ; 1411 1412 1413 std::list<unsigned>::iterator threads = (*activeThreads).begin(); 1414 1415 while (threads != (*activeThreads).end()) { 1416 unsigned tid = *threads++; 1417 1418 PQ.push(fromIEW->iewInfo[tid].ldstqCount); 1419 } 1420 1421 while (!PQ.empty()) { 1422 1423 unsigned high_pri = PQ.top(); 1424 1425 if (fetchStatus[high_pri] == Running || 1426 fetchStatus[high_pri] == IcacheAccessComplete || 1427 fetchStatus[high_pri] == Idle) 1428 return high_pri; 1429 else 1430 PQ.pop(); 1431 1432 } 1433 1434 return -1; 1435} 1436 1437template<class Impl> 1438int 1439DefaultFetch<Impl>::branchCount() 1440{ 1441 std::list<unsigned>::iterator threads = (*activeThreads).begin(); 1442 panic("Branch Count Fetch policy unimplemented\n"); 1443 return *threads; 1444} 1445