Cross Reference: /gem5/src/cpu/o3/fetch

Deleted Added

sdiff udiff text old ( 8793:5f25086326ac ) new ( 8797:3202eb01e01e )

full compact

fetch_impl.hh (8793:5f25086326ac)	fetch_impl.hh (8797:3202eb01e01e)
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 * Korey Sewell 42 / 43 44#include <algorithm> 45#include <cstring> 46 47#include "arch/isa_traits.hh" 48#include "arch/tlb.hh" 49#include "arch/utility.hh" 50#include "arch/vtophys.hh" 51#include "base/types.hh" 52#include "config/the_isa.hh" 53#include "config/use_checker.hh" 54#include "cpu/base.hh" 55#include "cpu/checker/cpu.hh" 56#include "cpu/o3/fetch.hh" 57#include "cpu/exetrace.hh" 58#include "debug/Activity.hh" 59#include "debug/Fetch.hh" 60#include "mem/packet.hh" 61#include "mem/request.hh" 62#include "params/DerivO3CPU.hh" 63#include "sim/byteswap.hh" 64#include "sim/core.hh" 65#include "sim/eventq.hh" 66#include "sim/full_system.hh" 67#include "sim/system.hh" 68 69using namespace std; 70 71template<class Impl> 72void 73DefaultFetch<Impl>::IcachePort::setPeer(Port port) 74{ 75 Port::setPeer(port); 76 77 fetch->setIcache(); 78} 79 80template<class Impl> 81Tick 82DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) 83{ 84 panic("DefaultFetch doesn't expect recvAtomic callback!"); 85 return curTick(); 86} 87 88template<class Impl> 89void 90DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) 91{ 92 DPRINTF(Fetch, "DefaultFetch doesn't update its state from a " 93 "functional call.\n"); 94} 95 96template<class Impl> 97void 98DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status) 99{ 100 if (status == RangeChange) { 101 if (!snoopRangeSent) { 102 snoopRangeSent = true; 103 sendStatusChange(Port::RangeChange); 104 } 105 return; 106 } 107 108 panic("DefaultFetch doesn't expect recvStatusChange callback!"); 109} 110 111template<class Impl> 112bool 113DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt) 114{ 115 DPRINTF(Fetch, "Received timing\n"); 116 if (pkt->isResponse()) { 117 // We shouldn't ever get a block in ownership state 118 assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); 119 120 fetch->processCacheCompletion(pkt); 121 } 122 //else Snooped a coherence request, just return 123 return true; 124} 125 126template<class Impl> 127void 128DefaultFetch<Impl>::IcachePort::recvRetry() 129{ 130 fetch->recvRetry(); 131} 132 133template<class Impl> 134DefaultFetch<Impl>::DefaultFetch(O3CPU _cpu, DerivO3CPUParams params) 135 : cpu(_cpu), 136 branchPred(params), 137 predecoder(NULL), 138 numInst(0), 139 decodeToFetchDelay(params->decodeToFetchDelay), 140 renameToFetchDelay(params->renameToFetchDelay), 141 iewToFetchDelay(params->iewToFetchDelay), 142 commitToFetchDelay(params->commitToFetchDelay), 143 fetchWidth(params->fetchWidth), 144 cacheBlocked(false), 145 retryPkt(NULL), 146 retryTid(InvalidThreadID), 147 numThreads(params->numThreads), 148 numFetchingThreads(params->smtNumFetchingThreads), 149 interruptPending(false), 150 drainPending(false), 151 switchedOut(false), 152 finishTranslationEvent(this) 153{ 154 if (numThreads > Impl::MaxThreads) 155 fatal("numThreads (%d) is larger than compiled limit (%d),\n" 156 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", 157 numThreads, static_cast<int>(Impl::MaxThreads)); 158 159 // Set fetch stage's status to inactive. 160 _status = Inactive; 161 162 std::string policy = params->smtFetchPolicy; 163 164 // Convert string to lowercase 165 std::transform(policy.begin(), policy.end(), policy.begin(), 166 (int()(int)) tolower); 167* 168 // Figure out fetch policy 169 if (policy == "singlethread") { 170 fetchPolicy = SingleThread; 171 if (numThreads > 1) 172 panic("Invalid Fetch Policy for a SMT workload."); 173 } else if (policy == "roundrobin") { 174 fetchPolicy = RoundRobin; 175 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 176 } else if (policy == "branch") { 177 fetchPolicy = Branch; 178 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 179 } else if (policy == "iqcount") { 180 fetchPolicy = IQ; 181 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 182 } else if (policy == "lsqcount") { 183 fetchPolicy = LSQ; 184 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 185 } else { 186 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 187 " RoundRobin,LSQcount,IQcount}\n"); 188 } 189 190 // Get the size of an instruction. 191 instSize = sizeof(TheISA::MachInst); 192 193 // Name is finally available, so create the port. 194 icachePort = new IcachePort(this); 195 196 icachePort->snoopRangeSent = false; 197 198#if USE_CHECKER 199 if (cpu->checker) { 200 cpu->checker->setIcachePort(icachePort); 201 } 202#endif 203} 204 205template <class Impl> 206std::string 207DefaultFetch<Impl>::name() const 208{ 209 return cpu->name() + ".fetch"; 210} 211 212template <class Impl> 213void 214DefaultFetch<Impl>::regStats() 215{ 216 icacheStallCycles 217 .name(name() + ".icacheStallCycles") 218 .desc("Number of cycles fetch is stalled on an Icache miss") 219 .prereq(icacheStallCycles); 220 221 fetchedInsts 222 .name(name() + ".Insts") 223 .desc("Number of instructions fetch has processed") 224 .prereq(fetchedInsts); 225 226 fetchedBranches 227 .name(name() + ".Branches") 228 .desc("Number of branches that fetch encountered") 229 .prereq(fetchedBranches); 230 231 predictedBranches 232 .name(name() + ".predictedBranches") 233 .desc("Number of branches that fetch has predicted taken") 234 .prereq(predictedBranches); 235 236 fetchCycles 237 .name(name() + ".Cycles") 238 .desc("Number of cycles fetch has run and was not squashing or" 239 " blocked") 240 .prereq(fetchCycles); 241 242 fetchSquashCycles 243 .name(name() + ".SquashCycles") 244 .desc("Number of cycles fetch has spent squashing") 245 .prereq(fetchSquashCycles); 246 247 fetchTlbCycles 248 .name(name() + ".TlbCycles") 249 .desc("Number of cycles fetch has spent waiting for tlb") 250 .prereq(fetchTlbCycles); 251 252 fetchIdleCycles 253 .name(name() + ".IdleCycles") 254 .desc("Number of cycles fetch was idle") 255 .prereq(fetchIdleCycles); 256 257 fetchBlockedCycles 258 .name(name() + ".BlockedCycles") 259 .desc("Number of cycles fetch has spent blocked") 260 .prereq(fetchBlockedCycles); 261 262 fetchedCacheLines 263 .name(name() + ".CacheLines") 264 .desc("Number of cache lines fetched") 265 .prereq(fetchedCacheLines); 266 267 fetchMiscStallCycles 268 .name(name() + ".MiscStallCycles") 269 .desc("Number of cycles fetch has spent waiting on interrupts, or " 270 "bad addresses, or out of MSHRs") 271 .prereq(fetchMiscStallCycles); 272 273 fetchPendingDrainCycles 274 .name(name() + ".PendingDrainCycles") 275 .desc("Number of cycles fetch has spent waiting on pipes to drain") 276 .prereq(fetchPendingDrainCycles); 277 278 fetchNoActiveThreadStallCycles 279 .name(name() + ".NoActiveThreadStallCycles") 280 .desc("Number of stall cycles due to no active thread to fetch from") 281 .prereq(fetchNoActiveThreadStallCycles); 282 283 fetchPendingTrapStallCycles 284 .name(name() + ".PendingTrapStallCycles") 285 .desc("Number of stall cycles due to pending traps") 286 .prereq(fetchPendingTrapStallCycles); 287 288 fetchPendingQuiesceStallCycles 289 .name(name() + ".PendingQuiesceStallCycles") 290 .desc("Number of stall cycles due to pending quiesce instructions") 291 .prereq(fetchPendingQuiesceStallCycles); 292 293 fetchIcacheWaitRetryStallCycles 294 .name(name() + ".IcacheWaitRetryStallCycles") 295 .desc("Number of stall cycles due to full MSHR") 296 .prereq(fetchIcacheWaitRetryStallCycles); 297 298 fetchIcacheSquashes 299 .name(name() + ".IcacheSquashes") 300 .desc("Number of outstanding Icache misses that were squashed") 301 .prereq(fetchIcacheSquashes); 302 303 fetchTlbSquashes 304 .name(name() + ".ItlbSquashes") 305 .desc("Number of outstanding ITLB misses that were squashed") 306 .prereq(fetchTlbSquashes); 307 308 fetchNisnDist 309 .init(/* base value / 0, 310* /* last value / fetchWidth, 311* /* bucket size / 1) 312* .name(name() + ".rateDist") 313 .desc("Number of instructions fetched each cycle (Total)") 314 .flags(Stats::pdf); 315 316 idleRate 317 .name(name() + ".idleRate") 318 .desc("Percent of cycles fetch was idle") 319 .prereq(idleRate); 320 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 321 322 branchRate 323 .name(name() + ".branchRate") 324 .desc("Number of branch fetches per cycle") 325 .flags(Stats::total); 326 branchRate = fetchedBranches / cpu->numCycles; 327 328 fetchRate 329 .name(name() + ".rate") 330 .desc("Number of inst fetches per cycle") 331 .flags(Stats::total); 332 fetchRate = fetchedInsts / cpu->numCycles; 333 334 branchPred.regStats(); 335} 336 337template<class Impl> 338void 339DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> time_buffer) 340{ 341* timeBuffer = time_buffer; 342 343 // Create wires to get information from proper places in time buffer. 344 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 345 fromRename = timeBuffer->getWire(-renameToFetchDelay); 346 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 347 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 348} 349 350template<class Impl> 351void 352DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> at_ptr) 353{ 354* activeThreads = at_ptr; 355} 356 357template<class Impl> 358void 359DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> fq_ptr) 360{ 361* fetchQueue = fq_ptr; 362 363 // Create wire to write information to proper place in fetch queue. 364 toDecode = fetchQueue->getWire(0); 365} 366 367template<class Impl> 368void 369DefaultFetch<Impl>::initStage() 370{ 371 // Setup PC and nextPC with initial state. 372 for (ThreadID tid = 0; tid < numThreads; tid++) { 373 pc[tid] = cpu->pcState(tid); 374 fetchOffset[tid] = 0; 375 macroop[tid] = NULL; 376 delayedCommit[tid] = false; 377 } 378 379 for (ThreadID tid = 0; tid < numThreads; tid++) { 380 381 fetchStatus[tid] = Running; 382 383 priorityList.push_back(tid); 384 385 memReq[tid] = NULL; 386 387 stalls[tid].decode = false; 388 stalls[tid].rename = false; 389 stalls[tid].iew = false; 390 stalls[tid].commit = false; 391 } 392 393 // Schedule fetch to get the correct PC from the CPU 394 // scheduleFetchStartupEvent(1); 395 396 // Fetch needs to start fetching instructions at the very beginning, 397 // so it must start up in active state. 398 switchToActive(); 399} 400 401template<class Impl> 402void 403DefaultFetch<Impl>::setIcache() 404{ 405 // Size of cache block. 406 cacheBlkSize = icachePort->peerBlockSize(); 407 408 // Create mask to get rid of offset bits. 409 cacheBlkMask = (cacheBlkSize - 1); 410 411 for (ThreadID tid = 0; tid < numThreads; tid++) { 412 // Create space to store a cache line. 413 cacheData[tid] = new uint8_t[cacheBlkSize]; 414 cacheDataPC[tid] = 0; 415 cacheDataValid[tid] = false; 416 } 417} 418 419template<class Impl> 420void 421DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 422{ 423 ThreadID tid = pkt->req->threadId(); 424 425 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); 426 427 assert(!pkt->wasNacked()); 428 429 // Only change the status if it's still waiting on the icache access 430 // to return. 431 if (fetchStatus[tid] != IcacheWaitResponse \|\| 432 pkt->req != memReq[tid] \|\| 433 isSwitchedOut()) { 434 ++fetchIcacheSquashes; 435 delete pkt->req; 436 delete pkt; 437 return; 438 } 439 440 memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize); 441 cacheDataValid[tid] = true; 442 443 if (!drainPending) { 444 // Wake up the CPU (if it went to sleep and was waiting on 445 // this completion event). 446 cpu->wakeCPU(); 447 448 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 449 tid); 450 451 switchToActive(); 452 } 453 454 // Only switch to IcacheAccessComplete if we're not stalled as well. 455 if (checkStall(tid)) { 456 fetchStatus[tid] = Blocked; 457 } else { 458 fetchStatus[tid] = IcacheAccessComplete; 459 } 460 461 // Reset the mem req to NULL. 462 delete pkt->req; 463 delete pkt; 464 memReq[tid] = NULL; 465} 466 467template <class Impl> 468bool 469DefaultFetch<Impl>::drain() 470{ 471 // Fetch is ready to drain at any time. 472 cpu->signalDrained(); 473 drainPending = true; 474 return true; 475} 476 477template <class Impl> 478void 479DefaultFetch<Impl>::resume() 480{ 481 drainPending = false; 482} 483 484template <class Impl> 485void 486DefaultFetch<Impl>::switchOut() 487{ 488 switchedOut = true; 489 // Branch predictor needs to have its state cleared. 490 branchPred.switchOut(); 491} 492 493template <class Impl> 494void 495DefaultFetch<Impl>::takeOverFrom() 496{ 497 // Reset all state 498 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 499 stalls[i].decode = 0; 500 stalls[i].rename = 0; 501 stalls[i].iew = 0; 502 stalls[i].commit = 0; 503 pc[i] = cpu->pcState(i); 504 fetchStatus[i] = Running; 505 } 506 numInst = 0; 507 wroteToTimeBuffer = false; 508 _status = Inactive; 509 switchedOut = false; 510 interruptPending = false; 511 branchPred.takeOverFrom(); 512} 513 514template <class Impl> 515void 516DefaultFetch<Impl>::wakeFromQuiesce() 517{ 518 DPRINTF(Fetch, "Waking up from quiesce\n"); 519 // Hopefully this is safe 520 // @todo: Allow other threads to wake from quiesce. 521 fetchStatus[0] = Running; 522} 523 524template <class Impl> 525inline void 526DefaultFetch<Impl>::switchToActive() 527{ 528 if (_status == Inactive) { 529 DPRINTF(Activity, "Activating stage.\n"); 530 531 cpu->activateStage(O3CPU::FetchIdx); 532 533 _status = Active; 534 } 535} 536 537template <class Impl> 538inline void 539DefaultFetch<Impl>::switchToInactive() 540{ 541 if (_status == Active) { 542 DPRINTF(Activity, "Deactivating stage.\n"); 543 544 cpu->deactivateStage(O3CPU::FetchIdx); 545 546 _status = Inactive; 547 } 548} 549 550template <class Impl> 551bool 552DefaultFetch<Impl>::lookupAndUpdateNextPC( 553 DynInstPtr &inst, TheISA::PCState &nextPC) 554{ 555 // Do branch prediction check here. 556 // A bit of a misnomer...next_PC is actually the current PC until 557 // this function updates it. 558 bool predict_taken; 559 560 if (!inst->isControl()) { 561 TheISA::advancePC(nextPC, inst->staticInst); 562 inst->setPredTarg(nextPC); 563 inst->setPredTaken(false); 564 return false; 565 } 566 567 ThreadID tid = inst->threadNumber; 568 predict_taken = branchPred.predict(inst, nextPC, tid); 569 570 if (predict_taken) { 571 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", 572 tid, inst->seqNum, nextPC); 573 } else { 574 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", 575 tid, inst->seqNum); 576 } 577 578 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n", 579 tid, inst->seqNum, nextPC); 580 inst->setPredTarg(nextPC); 581 inst->setPredTaken(predict_taken); 582 583 ++fetchedBranches; 584 585 if (predict_taken) { 586 ++predictedBranches; 587 } 588 589 return predict_taken; 590} 591 592template <class Impl> 593bool 594DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) 595{ 596 Fault fault = NoFault; 597 598 // @todo: not sure if these should block translation. 599 //AlphaDep 600 if (cacheBlocked) { 601 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 602 tid); 603 return false; 604 } else if (isSwitchedOut()) { 605 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", 606 tid); 607 return false; 608 } else if (checkInterrupt(pc)) { 609 // Hold off fetch from getting new instructions when: 610 // Cache is blocked, or 611 // while an interrupt is pending and we're not in PAL mode, or 612 // fetch is switched out. 613 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 614 tid); 615 return false; 616 } 617 618 // Align the fetch address so it's at the start of a cache block. 619 Addr block_PC = icacheBlockAlignPC(vaddr); 620 621 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", 622 tid, block_PC, vaddr); 623 624 // Setup the memReq to do a read of the first instruction's address. 625 // Set the appropriate read size and flags as well. 626 // Build request here. 627 RequestPtr mem_req = 628 new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, 629 pc, cpu->thread[tid]->contextId(), tid); 630 631 memReq[tid] = mem_req; 632 633 // Initiate translation of the icache block 634 fetchStatus[tid] = ItlbWait; 635 FetchTranslation trans = new FetchTranslation(this); 636* cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), 637 trans, BaseTLB::Execute); 638 return true; 639} 640 641template <class Impl> 642void 643DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) 644{ 645 ThreadID tid = mem_req->threadId(); 646 Addr block_PC = mem_req->getVaddr(); 647 648 // Wake up CPU if it was idle 649 cpu->wakeCPU(); 650 651 if (fetchStatus[tid] != ItlbWait \|\| mem_req != memReq[tid] \|\| 652 mem_req->getVaddr() != memReq[tid]->getVaddr() \|\| isSwitchedOut()) { 653 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", 654 tid); 655 ++fetchTlbSquashes; 656 delete mem_req; 657 return; 658 } 659 660 661 // If translation was successful, attempt to read the icache block. 662 if (fault == NoFault) { 663 // Check that we're not going off into random memory 664 // If we have, just wait around for commit to squash something and put 665 // us on the right track 666 if (!cpu->system->isMemory(mem_req->getPaddr())) { 667 warn("Address %#x is outside of physical memory, stopping fetch\n", 668 mem_req->getPaddr()); 669 fetchStatus[tid] = NoGoodAddr; 670 delete mem_req; 671 memReq[tid] = NULL; 672 return; 673 } 674 675 // Build packet here. 676 PacketPtr data_pkt = new Packet(mem_req, 677 MemCmd::ReadReq, Packet::Broadcast); 678 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 679 680 cacheDataPC[tid] = block_PC; 681 cacheDataValid[tid] = false; 682 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 683 684 fetchedCacheLines++; 685 686 // Access the cache. 687 if (!icachePort->sendTiming(data_pkt)) { 688 assert(retryPkt == NULL); 689 assert(retryTid == InvalidThreadID); 690 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 691 692 fetchStatus[tid] = IcacheWaitRetry; 693 retryPkt = data_pkt; 694 retryTid = tid; 695 cacheBlocked = true; 696 } else { 697 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); 698 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 699 "response.\n", tid); 700 701 lastIcacheStall[tid] = curTick(); 702 fetchStatus[tid] = IcacheWaitResponse; 703 } 704 } else { 705 if (!(numInst < fetchWidth)) { 706 assert(!finishTranslationEvent.scheduled()); 707 finishTranslationEvent.setFault(fault); 708 finishTranslationEvent.setReq(mem_req); 709 cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); 710 return; 711 } 712 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", 713 tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); 714 // Translation faulted, icache request won't be sent. 715 delete mem_req; 716 memReq[tid] = NULL; 717 718 // Send the fault to commit. This thread will not do anything 719 // until commit handles the fault. The only other way it can 720 // wake up is if a squash comes along and changes the PC. 721 TheISA::PCState fetchPC = pc[tid]; 722 723 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid); 724 // We will use a nop in ordier to carry the fault. 725 DynInstPtr instruction = buildInst(tid, 726 decoder.decode(TheISA::NoopMachInst, fetchPC.instAddr()), 727 NULL, fetchPC, fetchPC, false); 728 729 instruction->setPredTarg(fetchPC); 730 instruction->fault = fault; 731 wroteToTimeBuffer = true; 732 733 DPRINTF(Activity, "Activity this cycle.\n"); 734 cpu->activityThisCycle(); 735 736 fetchStatus[tid] = TrapPending; 737 738 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); 739 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", 740 tid, fault->name(), pc[tid]); 741 } 742 _status = updateFetchStatus(); 743} 744 745template <class Impl> 746inline void 747DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, 748 const DynInstPtr squashInst, ThreadID tid) 749{ 750 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n", 751 tid, newPC); 752 753 pc[tid] = newPC; 754 fetchOffset[tid] = 0; 755 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) 756 macroop[tid] = squashInst->macroop; 757 else 758 macroop[tid] = NULL; 759 predecoder.reset(); 760 761 // Clear the icache miss if it's outstanding. 762 if (fetchStatus[tid] == IcacheWaitResponse) { 763 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 764 tid); 765 memReq[tid] = NULL; 766 } else if (fetchStatus[tid] == ItlbWait) { 767 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n", 768 tid); 769 memReq[tid] = NULL; 770 } 771 772 // Get rid of the retrying packet if it was from this thread. 773 if (retryTid == tid) { 774 assert(cacheBlocked); 775 if (retryPkt) { 776 delete retryPkt->req; 777 delete retryPkt; 778 } 779 retryPkt = NULL; 780 retryTid = InvalidThreadID; 781 } 782 783 fetchStatus[tid] = Squashing; 784 785 ++fetchSquashCycles; 786} 787 788template<class Impl> 789void 790DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, 791 const DynInstPtr squashInst, 792 const InstSeqNum seq_num, ThreadID tid) 793{ 794 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid); 795 796 doSquash(newPC, squashInst, tid); 797 798 // Tell the CPU to remove any instructions that are in flight between 799 // fetch and decode. 800 cpu->removeInstsUntil(seq_num, tid); 801} 802 803template<class Impl> 804bool 805DefaultFetch<Impl>::checkStall(ThreadID tid) const 806{ 807 bool ret_val = false; 808 809 if (cpu->contextSwitch) { 810 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 811 ret_val = true; 812 } else if (stalls[tid].decode) { 813 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 814 ret_val = true; 815 } else if (stalls[tid].rename) { 816 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 817 ret_val = true; 818 } else if (stalls[tid].iew) { 819 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 820 ret_val = true; 821 } else if (stalls[tid].commit) { 822 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 823 ret_val = true; 824 } 825 826 return ret_val; 827} 828 829template<class Impl> 830typename DefaultFetch<Impl>::FetchStatus 831DefaultFetch<Impl>::updateFetchStatus() 832{ 833 //Check Running 834 list<ThreadID>::iterator threads = activeThreads->begin(); 835 list<ThreadID>::iterator end = activeThreads->end(); 836 837 while (threads != end) { 838 ThreadID tid = threads++; 839* 840 if (fetchStatus[tid] == Running \|\| 841 fetchStatus[tid] == Squashing \|\| 842 fetchStatus[tid] == IcacheAccessComplete) { 843 844 if (_status == Inactive) { 845 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 846 847 if (fetchStatus[tid] == IcacheAccessComplete) { 848 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 849 "completion\n",tid); 850 } 851 852 cpu->activateStage(O3CPU::FetchIdx); 853 } 854 855 return Active; 856 } 857 } 858 859 // Stage is switching from active to inactive, notify CPU of it. 860 if (_status == Active) { 861 DPRINTF(Activity, "Deactivating stage.\n"); 862 863 cpu->deactivateStage(O3CPU::FetchIdx); 864 } 865 866 return Inactive; 867} 868 869template <class Impl> 870void 871DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, 872 const InstSeqNum seq_num, DynInstPtr squashInst, 873 ThreadID tid) 874{ 875 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); 876 877 doSquash(newPC, squashInst, tid); 878 879 // Tell the CPU to remove any instructions that are not in the ROB. 880 cpu->removeInstsNotInROB(tid); 881} 882 883template <class Impl> 884void 885DefaultFetch<Impl>::tick() 886{ 887 list<ThreadID>::iterator threads = activeThreads->begin(); 888 list<ThreadID>::iterator end = activeThreads->end(); 889 bool status_change = false; 890 891 wroteToTimeBuffer = false; 892 893 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 894 issuePipelinedIfetch[i] = false; 895 } 896 897 while (threads != end) { 898 ThreadID tid = threads++; 899* 900 // Check the signals for each thread to determine the proper status 901 // for each thread. 902 bool updated_status = checkSignalsAndUpdate(tid); 903 status_change = status_change \|\| updated_status; 904 } 905 906 DPRINTF(Fetch, "Running stage.\n"); 907 908 if (FullSystem) { 909 if (fromCommit->commitInfo[0].interruptPending) { 910 interruptPending = true; 911 } 912 913 if (fromCommit->commitInfo[0].clearInterrupt) { 914 interruptPending = false; 915 } 916 } 917 918 for (threadFetched = 0; threadFetched < numFetchingThreads; 919 threadFetched++) { 920 // Fetch each of the actively fetching threads. 921 fetch(status_change); 922 } 923 924 // Record number of instructions fetched this cycle for distribution. 925 fetchNisnDist.sample(numInst); 926 927 if (status_change) { 928 // Change the fetch stage status if there was a status change. 929 _status = updateFetchStatus(); 930 } 931 932 // If there was activity this cycle, inform the CPU of it. 933 if (wroteToTimeBuffer \|\| cpu->contextSwitch) { 934 DPRINTF(Activity, "Activity this cycle.\n"); 935 936 cpu->activityThisCycle(); 937 } 938 939 // Issue the next I-cache request if possible. 940 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 941 if (issuePipelinedIfetch[i]) { 942 pipelineIcacheAccesses(i); 943 } 944 } 945 946 // Reset the number of the instruction we've fetched. 947 numInst = 0; 948} 949 950template <class Impl> 951bool 952DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) 953{ 954 // Update the per thread stall statuses. 955 if (fromDecode->decodeBlock[tid]) { 956 stalls[tid].decode = true; 957 } 958 959 if (fromDecode->decodeUnblock[tid]) { 960 assert(stalls[tid].decode); 961 assert(!fromDecode->decodeBlock[tid]); 962 stalls[tid].decode = false; 963 } 964 965 if (fromRename->renameBlock[tid]) { 966 stalls[tid].rename = true; 967 } 968 969 if (fromRename->renameUnblock[tid]) { 970 assert(stalls[tid].rename); 971 assert(!fromRename->renameBlock[tid]); 972 stalls[tid].rename = false; 973 } 974 975 if (fromIEW->iewBlock[tid]) { 976 stalls[tid].iew = true; 977 } 978 979 if (fromIEW->iewUnblock[tid]) { 980 assert(stalls[tid].iew); 981 assert(!fromIEW->iewBlock[tid]); 982 stalls[tid].iew = false; 983 } 984 985 if (fromCommit->commitBlock[tid]) { 986 stalls[tid].commit = true; 987 } 988 989 if (fromCommit->commitUnblock[tid]) { 990 assert(stalls[tid].commit); 991 assert(!fromCommit->commitBlock[tid]); 992 stalls[tid].commit = false; 993 } 994 995 // Check squash signals from commit. 996 if (fromCommit->commitInfo[tid].squash) { 997 998 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 999 "from commit.\n",tid); 1000 // In any case, squash. 1001 squash(fromCommit->commitInfo[tid].pc, 1002 fromCommit->commitInfo[tid].doneSeqNum, 1003 fromCommit->commitInfo[tid].squashInst, tid); 1004 1005 // If it was a branch mispredict on a control instruction, update the 1006 // branch predictor with that instruction, otherwise just kill the 1007 // invalid state we generated in after sequence number 1008 if (fromCommit->commitInfo[tid].mispredictInst && 1009 fromCommit->commitInfo[tid].mispredictInst->isControl()) { 1010 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 1011 fromCommit->commitInfo[tid].pc, 1012 fromCommit->commitInfo[tid].branchTaken, 1013 tid); 1014 } else { 1015 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 1016 tid); 1017 } 1018 1019 return true; 1020 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 1021 // Update the branch predictor if it wasn't a squashed instruction 1022 // that was broadcasted. 1023 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 1024 } 1025 1026 // Check ROB squash signals from commit. 1027 if (fromCommit->commitInfo[tid].robSquashing) { 1028 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 1029 1030 // Continue to squash. 1031 fetchStatus[tid] = Squashing; 1032 1033 return true; 1034 } 1035 1036 // Check squash signals from decode. 1037 if (fromDecode->decodeInfo[tid].squash) { 1038 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 1039 "from decode.\n",tid); 1040 1041 // Update the branch predictor. 1042 if (fromDecode->decodeInfo[tid].branchMispredict) { 1043 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 1044 fromDecode->decodeInfo[tid].nextPC, 1045 fromDecode->decodeInfo[tid].branchTaken, 1046 tid); 1047 } else { 1048 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 1049 tid); 1050 } 1051 1052 if (fetchStatus[tid] != Squashing) { 1053	1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 * Korey Sewell 42 / 43 44#include <algorithm> 45#include <cstring> 46 47#include "arch/isa_traits.hh" 48#include "arch/tlb.hh" 49#include "arch/utility.hh" 50#include "arch/vtophys.hh" 51#include "base/types.hh" 52#include "config/the_isa.hh" 53#include "config/use_checker.hh" 54#include "cpu/base.hh" 55#include "cpu/checker/cpu.hh" 56#include "cpu/o3/fetch.hh" 57#include "cpu/exetrace.hh" 58#include "debug/Activity.hh" 59#include "debug/Fetch.hh" 60#include "mem/packet.hh" 61#include "mem/request.hh" 62#include "params/DerivO3CPU.hh" 63#include "sim/byteswap.hh" 64#include "sim/core.hh" 65#include "sim/eventq.hh" 66#include "sim/full_system.hh" 67#include "sim/system.hh" 68 69using namespace std; 70 71template<class Impl> 72void 73DefaultFetch<Impl>::IcachePort::setPeer(Port port) 74{ 75 Port::setPeer(port); 76 77 fetch->setIcache(); 78} 79 80template<class Impl> 81Tick 82DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) 83{ 84 panic("DefaultFetch doesn't expect recvAtomic callback!"); 85 return curTick(); 86} 87 88template<class Impl> 89void 90DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) 91{ 92 DPRINTF(Fetch, "DefaultFetch doesn't update its state from a " 93 "functional call.\n"); 94} 95 96template<class Impl> 97void 98DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status) 99{ 100 if (status == RangeChange) { 101 if (!snoopRangeSent) { 102 snoopRangeSent = true; 103 sendStatusChange(Port::RangeChange); 104 } 105 return; 106 } 107 108 panic("DefaultFetch doesn't expect recvStatusChange callback!"); 109} 110 111template<class Impl> 112bool 113DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt) 114{ 115 DPRINTF(Fetch, "Received timing\n"); 116 if (pkt->isResponse()) { 117 // We shouldn't ever get a block in ownership state 118 assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); 119 120 fetch->processCacheCompletion(pkt); 121 } 122 //else Snooped a coherence request, just return 123 return true; 124} 125 126template<class Impl> 127void 128DefaultFetch<Impl>::IcachePort::recvRetry() 129{ 130 fetch->recvRetry(); 131} 132 133template<class Impl> 134DefaultFetch<Impl>::DefaultFetch(O3CPU _cpu, DerivO3CPUParams params) 135 : cpu(_cpu), 136 branchPred(params), 137 predecoder(NULL), 138 numInst(0), 139 decodeToFetchDelay(params->decodeToFetchDelay), 140 renameToFetchDelay(params->renameToFetchDelay), 141 iewToFetchDelay(params->iewToFetchDelay), 142 commitToFetchDelay(params->commitToFetchDelay), 143 fetchWidth(params->fetchWidth), 144 cacheBlocked(false), 145 retryPkt(NULL), 146 retryTid(InvalidThreadID), 147 numThreads(params->numThreads), 148 numFetchingThreads(params->smtNumFetchingThreads), 149 interruptPending(false), 150 drainPending(false), 151 switchedOut(false), 152 finishTranslationEvent(this) 153{ 154 if (numThreads > Impl::MaxThreads) 155 fatal("numThreads (%d) is larger than compiled limit (%d),\n" 156 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", 157 numThreads, static_cast<int>(Impl::MaxThreads)); 158 159 // Set fetch stage's status to inactive. 160 _status = Inactive; 161 162 std::string policy = params->smtFetchPolicy; 163 164 // Convert string to lowercase 165 std::transform(policy.begin(), policy.end(), policy.begin(), 166 (int()(int)) tolower); 167* 168 // Figure out fetch policy 169 if (policy == "singlethread") { 170 fetchPolicy = SingleThread; 171 if (numThreads > 1) 172 panic("Invalid Fetch Policy for a SMT workload."); 173 } else if (policy == "roundrobin") { 174 fetchPolicy = RoundRobin; 175 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 176 } else if (policy == "branch") { 177 fetchPolicy = Branch; 178 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 179 } else if (policy == "iqcount") { 180 fetchPolicy = IQ; 181 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 182 } else if (policy == "lsqcount") { 183 fetchPolicy = LSQ; 184 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 185 } else { 186 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 187 " RoundRobin,LSQcount,IQcount}\n"); 188 } 189 190 // Get the size of an instruction. 191 instSize = sizeof(TheISA::MachInst); 192 193 // Name is finally available, so create the port. 194 icachePort = new IcachePort(this); 195 196 icachePort->snoopRangeSent = false; 197 198#if USE_CHECKER 199 if (cpu->checker) { 200 cpu->checker->setIcachePort(icachePort); 201 } 202#endif 203} 204 205template <class Impl> 206std::string 207DefaultFetch<Impl>::name() const 208{ 209 return cpu->name() + ".fetch"; 210} 211 212template <class Impl> 213void 214DefaultFetch<Impl>::regStats() 215{ 216 icacheStallCycles 217 .name(name() + ".icacheStallCycles") 218 .desc("Number of cycles fetch is stalled on an Icache miss") 219 .prereq(icacheStallCycles); 220 221 fetchedInsts 222 .name(name() + ".Insts") 223 .desc("Number of instructions fetch has processed") 224 .prereq(fetchedInsts); 225 226 fetchedBranches 227 .name(name() + ".Branches") 228 .desc("Number of branches that fetch encountered") 229 .prereq(fetchedBranches); 230 231 predictedBranches 232 .name(name() + ".predictedBranches") 233 .desc("Number of branches that fetch has predicted taken") 234 .prereq(predictedBranches); 235 236 fetchCycles 237 .name(name() + ".Cycles") 238 .desc("Number of cycles fetch has run and was not squashing or" 239 " blocked") 240 .prereq(fetchCycles); 241 242 fetchSquashCycles 243 .name(name() + ".SquashCycles") 244 .desc("Number of cycles fetch has spent squashing") 245 .prereq(fetchSquashCycles); 246 247 fetchTlbCycles 248 .name(name() + ".TlbCycles") 249 .desc("Number of cycles fetch has spent waiting for tlb") 250 .prereq(fetchTlbCycles); 251 252 fetchIdleCycles 253 .name(name() + ".IdleCycles") 254 .desc("Number of cycles fetch was idle") 255 .prereq(fetchIdleCycles); 256 257 fetchBlockedCycles 258 .name(name() + ".BlockedCycles") 259 .desc("Number of cycles fetch has spent blocked") 260 .prereq(fetchBlockedCycles); 261 262 fetchedCacheLines 263 .name(name() + ".CacheLines") 264 .desc("Number of cache lines fetched") 265 .prereq(fetchedCacheLines); 266 267 fetchMiscStallCycles 268 .name(name() + ".MiscStallCycles") 269 .desc("Number of cycles fetch has spent waiting on interrupts, or " 270 "bad addresses, or out of MSHRs") 271 .prereq(fetchMiscStallCycles); 272 273 fetchPendingDrainCycles 274 .name(name() + ".PendingDrainCycles") 275 .desc("Number of cycles fetch has spent waiting on pipes to drain") 276 .prereq(fetchPendingDrainCycles); 277 278 fetchNoActiveThreadStallCycles 279 .name(name() + ".NoActiveThreadStallCycles") 280 .desc("Number of stall cycles due to no active thread to fetch from") 281 .prereq(fetchNoActiveThreadStallCycles); 282 283 fetchPendingTrapStallCycles 284 .name(name() + ".PendingTrapStallCycles") 285 .desc("Number of stall cycles due to pending traps") 286 .prereq(fetchPendingTrapStallCycles); 287 288 fetchPendingQuiesceStallCycles 289 .name(name() + ".PendingQuiesceStallCycles") 290 .desc("Number of stall cycles due to pending quiesce instructions") 291 .prereq(fetchPendingQuiesceStallCycles); 292 293 fetchIcacheWaitRetryStallCycles 294 .name(name() + ".IcacheWaitRetryStallCycles") 295 .desc("Number of stall cycles due to full MSHR") 296 .prereq(fetchIcacheWaitRetryStallCycles); 297 298 fetchIcacheSquashes 299 .name(name() + ".IcacheSquashes") 300 .desc("Number of outstanding Icache misses that were squashed") 301 .prereq(fetchIcacheSquashes); 302 303 fetchTlbSquashes 304 .name(name() + ".ItlbSquashes") 305 .desc("Number of outstanding ITLB misses that were squashed") 306 .prereq(fetchTlbSquashes); 307 308 fetchNisnDist 309 .init(/* base value / 0, 310* /* last value / fetchWidth, 311* /* bucket size / 1) 312* .name(name() + ".rateDist") 313 .desc("Number of instructions fetched each cycle (Total)") 314 .flags(Stats::pdf); 315 316 idleRate 317 .name(name() + ".idleRate") 318 .desc("Percent of cycles fetch was idle") 319 .prereq(idleRate); 320 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 321 322 branchRate 323 .name(name() + ".branchRate") 324 .desc("Number of branch fetches per cycle") 325 .flags(Stats::total); 326 branchRate = fetchedBranches / cpu->numCycles; 327 328 fetchRate 329 .name(name() + ".rate") 330 .desc("Number of inst fetches per cycle") 331 .flags(Stats::total); 332 fetchRate = fetchedInsts / cpu->numCycles; 333 334 branchPred.regStats(); 335} 336 337template<class Impl> 338void 339DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> time_buffer) 340{ 341* timeBuffer = time_buffer; 342 343 // Create wires to get information from proper places in time buffer. 344 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 345 fromRename = timeBuffer->getWire(-renameToFetchDelay); 346 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 347 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 348} 349 350template<class Impl> 351void 352DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> at_ptr) 353{ 354* activeThreads = at_ptr; 355} 356 357template<class Impl> 358void 359DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> fq_ptr) 360{ 361* fetchQueue = fq_ptr; 362 363 // Create wire to write information to proper place in fetch queue. 364 toDecode = fetchQueue->getWire(0); 365} 366 367template<class Impl> 368void 369DefaultFetch<Impl>::initStage() 370{ 371 // Setup PC and nextPC with initial state. 372 for (ThreadID tid = 0; tid < numThreads; tid++) { 373 pc[tid] = cpu->pcState(tid); 374 fetchOffset[tid] = 0; 375 macroop[tid] = NULL; 376 delayedCommit[tid] = false; 377 } 378 379 for (ThreadID tid = 0; tid < numThreads; tid++) { 380 381 fetchStatus[tid] = Running; 382 383 priorityList.push_back(tid); 384 385 memReq[tid] = NULL; 386 387 stalls[tid].decode = false; 388 stalls[tid].rename = false; 389 stalls[tid].iew = false; 390 stalls[tid].commit = false; 391 } 392 393 // Schedule fetch to get the correct PC from the CPU 394 // scheduleFetchStartupEvent(1); 395 396 // Fetch needs to start fetching instructions at the very beginning, 397 // so it must start up in active state. 398 switchToActive(); 399} 400 401template<class Impl> 402void 403DefaultFetch<Impl>::setIcache() 404{ 405 // Size of cache block. 406 cacheBlkSize = icachePort->peerBlockSize(); 407 408 // Create mask to get rid of offset bits. 409 cacheBlkMask = (cacheBlkSize - 1); 410 411 for (ThreadID tid = 0; tid < numThreads; tid++) { 412 // Create space to store a cache line. 413 cacheData[tid] = new uint8_t[cacheBlkSize]; 414 cacheDataPC[tid] = 0; 415 cacheDataValid[tid] = false; 416 } 417} 418 419template<class Impl> 420void 421DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 422{ 423 ThreadID tid = pkt->req->threadId(); 424 425 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); 426 427 assert(!pkt->wasNacked()); 428 429 // Only change the status if it's still waiting on the icache access 430 // to return. 431 if (fetchStatus[tid] != IcacheWaitResponse \|\| 432 pkt->req != memReq[tid] \|\| 433 isSwitchedOut()) { 434 ++fetchIcacheSquashes; 435 delete pkt->req; 436 delete pkt; 437 return; 438 } 439 440 memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize); 441 cacheDataValid[tid] = true; 442 443 if (!drainPending) { 444 // Wake up the CPU (if it went to sleep and was waiting on 445 // this completion event). 446 cpu->wakeCPU(); 447 448 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 449 tid); 450 451 switchToActive(); 452 } 453 454 // Only switch to IcacheAccessComplete if we're not stalled as well. 455 if (checkStall(tid)) { 456 fetchStatus[tid] = Blocked; 457 } else { 458 fetchStatus[tid] = IcacheAccessComplete; 459 } 460 461 // Reset the mem req to NULL. 462 delete pkt->req; 463 delete pkt; 464 memReq[tid] = NULL; 465} 466 467template <class Impl> 468bool 469DefaultFetch<Impl>::drain() 470{ 471 // Fetch is ready to drain at any time. 472 cpu->signalDrained(); 473 drainPending = true; 474 return true; 475} 476 477template <class Impl> 478void 479DefaultFetch<Impl>::resume() 480{ 481 drainPending = false; 482} 483 484template <class Impl> 485void 486DefaultFetch<Impl>::switchOut() 487{ 488 switchedOut = true; 489 // Branch predictor needs to have its state cleared. 490 branchPred.switchOut(); 491} 492 493template <class Impl> 494void 495DefaultFetch<Impl>::takeOverFrom() 496{ 497 // Reset all state 498 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 499 stalls[i].decode = 0; 500 stalls[i].rename = 0; 501 stalls[i].iew = 0; 502 stalls[i].commit = 0; 503 pc[i] = cpu->pcState(i); 504 fetchStatus[i] = Running; 505 } 506 numInst = 0; 507 wroteToTimeBuffer = false; 508 _status = Inactive; 509 switchedOut = false; 510 interruptPending = false; 511 branchPred.takeOverFrom(); 512} 513 514template <class Impl> 515void 516DefaultFetch<Impl>::wakeFromQuiesce() 517{ 518 DPRINTF(Fetch, "Waking up from quiesce\n"); 519 // Hopefully this is safe 520 // @todo: Allow other threads to wake from quiesce. 521 fetchStatus[0] = Running; 522} 523 524template <class Impl> 525inline void 526DefaultFetch<Impl>::switchToActive() 527{ 528 if (_status == Inactive) { 529 DPRINTF(Activity, "Activating stage.\n"); 530 531 cpu->activateStage(O3CPU::FetchIdx); 532 533 _status = Active; 534 } 535} 536 537template <class Impl> 538inline void 539DefaultFetch<Impl>::switchToInactive() 540{ 541 if (_status == Active) { 542 DPRINTF(Activity, "Deactivating stage.\n"); 543 544 cpu->deactivateStage(O3CPU::FetchIdx); 545 546 _status = Inactive; 547 } 548} 549 550template <class Impl> 551bool 552DefaultFetch<Impl>::lookupAndUpdateNextPC( 553 DynInstPtr &inst, TheISA::PCState &nextPC) 554{ 555 // Do branch prediction check here. 556 // A bit of a misnomer...next_PC is actually the current PC until 557 // this function updates it. 558 bool predict_taken; 559 560 if (!inst->isControl()) { 561 TheISA::advancePC(nextPC, inst->staticInst); 562 inst->setPredTarg(nextPC); 563 inst->setPredTaken(false); 564 return false; 565 } 566 567 ThreadID tid = inst->threadNumber; 568 predict_taken = branchPred.predict(inst, nextPC, tid); 569 570 if (predict_taken) { 571 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", 572 tid, inst->seqNum, nextPC); 573 } else { 574 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", 575 tid, inst->seqNum); 576 } 577 578 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n", 579 tid, inst->seqNum, nextPC); 580 inst->setPredTarg(nextPC); 581 inst->setPredTaken(predict_taken); 582 583 ++fetchedBranches; 584 585 if (predict_taken) { 586 ++predictedBranches; 587 } 588 589 return predict_taken; 590} 591 592template <class Impl> 593bool 594DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) 595{ 596 Fault fault = NoFault; 597 598 // @todo: not sure if these should block translation. 599 //AlphaDep 600 if (cacheBlocked) { 601 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 602 tid); 603 return false; 604 } else if (isSwitchedOut()) { 605 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", 606 tid); 607 return false; 608 } else if (checkInterrupt(pc)) { 609 // Hold off fetch from getting new instructions when: 610 // Cache is blocked, or 611 // while an interrupt is pending and we're not in PAL mode, or 612 // fetch is switched out. 613 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 614 tid); 615 return false; 616 } 617 618 // Align the fetch address so it's at the start of a cache block. 619 Addr block_PC = icacheBlockAlignPC(vaddr); 620 621 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", 622 tid, block_PC, vaddr); 623 624 // Setup the memReq to do a read of the first instruction's address. 625 // Set the appropriate read size and flags as well. 626 // Build request here. 627 RequestPtr mem_req = 628 new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, 629 pc, cpu->thread[tid]->contextId(), tid); 630 631 memReq[tid] = mem_req; 632 633 // Initiate translation of the icache block 634 fetchStatus[tid] = ItlbWait; 635 FetchTranslation trans = new FetchTranslation(this); 636* cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), 637 trans, BaseTLB::Execute); 638 return true; 639} 640 641template <class Impl> 642void 643DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) 644{ 645 ThreadID tid = mem_req->threadId(); 646 Addr block_PC = mem_req->getVaddr(); 647 648 // Wake up CPU if it was idle 649 cpu->wakeCPU(); 650 651 if (fetchStatus[tid] != ItlbWait \|\| mem_req != memReq[tid] \|\| 652 mem_req->getVaddr() != memReq[tid]->getVaddr() \|\| isSwitchedOut()) { 653 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", 654 tid); 655 ++fetchTlbSquashes; 656 delete mem_req; 657 return; 658 } 659 660 661 // If translation was successful, attempt to read the icache block. 662 if (fault == NoFault) { 663 // Check that we're not going off into random memory 664 // If we have, just wait around for commit to squash something and put 665 // us on the right track 666 if (!cpu->system->isMemory(mem_req->getPaddr())) { 667 warn("Address %#x is outside of physical memory, stopping fetch\n", 668 mem_req->getPaddr()); 669 fetchStatus[tid] = NoGoodAddr; 670 delete mem_req; 671 memReq[tid] = NULL; 672 return; 673 } 674 675 // Build packet here. 676 PacketPtr data_pkt = new Packet(mem_req, 677 MemCmd::ReadReq, Packet::Broadcast); 678 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 679 680 cacheDataPC[tid] = block_PC; 681 cacheDataValid[tid] = false; 682 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 683 684 fetchedCacheLines++; 685 686 // Access the cache. 687 if (!icachePort->sendTiming(data_pkt)) { 688 assert(retryPkt == NULL); 689 assert(retryTid == InvalidThreadID); 690 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 691 692 fetchStatus[tid] = IcacheWaitRetry; 693 retryPkt = data_pkt; 694 retryTid = tid; 695 cacheBlocked = true; 696 } else { 697 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); 698 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 699 "response.\n", tid); 700 701 lastIcacheStall[tid] = curTick(); 702 fetchStatus[tid] = IcacheWaitResponse; 703 } 704 } else { 705 if (!(numInst < fetchWidth)) { 706 assert(!finishTranslationEvent.scheduled()); 707 finishTranslationEvent.setFault(fault); 708 finishTranslationEvent.setReq(mem_req); 709 cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); 710 return; 711 } 712 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", 713 tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); 714 // Translation faulted, icache request won't be sent. 715 delete mem_req; 716 memReq[tid] = NULL; 717 718 // Send the fault to commit. This thread will not do anything 719 // until commit handles the fault. The only other way it can 720 // wake up is if a squash comes along and changes the PC. 721 TheISA::PCState fetchPC = pc[tid]; 722 723 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid); 724 // We will use a nop in ordier to carry the fault. 725 DynInstPtr instruction = buildInst(tid, 726 decoder.decode(TheISA::NoopMachInst, fetchPC.instAddr()), 727 NULL, fetchPC, fetchPC, false); 728 729 instruction->setPredTarg(fetchPC); 730 instruction->fault = fault; 731 wroteToTimeBuffer = true; 732 733 DPRINTF(Activity, "Activity this cycle.\n"); 734 cpu->activityThisCycle(); 735 736 fetchStatus[tid] = TrapPending; 737 738 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); 739 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", 740 tid, fault->name(), pc[tid]); 741 } 742 _status = updateFetchStatus(); 743} 744 745template <class Impl> 746inline void 747DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, 748 const DynInstPtr squashInst, ThreadID tid) 749{ 750 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n", 751 tid, newPC); 752 753 pc[tid] = newPC; 754 fetchOffset[tid] = 0; 755 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) 756 macroop[tid] = squashInst->macroop; 757 else 758 macroop[tid] = NULL; 759 predecoder.reset(); 760 761 // Clear the icache miss if it's outstanding. 762 if (fetchStatus[tid] == IcacheWaitResponse) { 763 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 764 tid); 765 memReq[tid] = NULL; 766 } else if (fetchStatus[tid] == ItlbWait) { 767 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n", 768 tid); 769 memReq[tid] = NULL; 770 } 771 772 // Get rid of the retrying packet if it was from this thread. 773 if (retryTid == tid) { 774 assert(cacheBlocked); 775 if (retryPkt) { 776 delete retryPkt->req; 777 delete retryPkt; 778 } 779 retryPkt = NULL; 780 retryTid = InvalidThreadID; 781 } 782 783 fetchStatus[tid] = Squashing; 784 785 ++fetchSquashCycles; 786} 787 788template<class Impl> 789void 790DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, 791 const DynInstPtr squashInst, 792 const InstSeqNum seq_num, ThreadID tid) 793{ 794 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid); 795 796 doSquash(newPC, squashInst, tid); 797 798 // Tell the CPU to remove any instructions that are in flight between 799 // fetch and decode. 800 cpu->removeInstsUntil(seq_num, tid); 801} 802 803template<class Impl> 804bool 805DefaultFetch<Impl>::checkStall(ThreadID tid) const 806{ 807 bool ret_val = false; 808 809 if (cpu->contextSwitch) { 810 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 811 ret_val = true; 812 } else if (stalls[tid].decode) { 813 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 814 ret_val = true; 815 } else if (stalls[tid].rename) { 816 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 817 ret_val = true; 818 } else if (stalls[tid].iew) { 819 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 820 ret_val = true; 821 } else if (stalls[tid].commit) { 822 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 823 ret_val = true; 824 } 825 826 return ret_val; 827} 828 829template<class Impl> 830typename DefaultFetch<Impl>::FetchStatus 831DefaultFetch<Impl>::updateFetchStatus() 832{ 833 //Check Running 834 list<ThreadID>::iterator threads = activeThreads->begin(); 835 list<ThreadID>::iterator end = activeThreads->end(); 836 837 while (threads != end) { 838 ThreadID tid = threads++; 839* 840 if (fetchStatus[tid] == Running \|\| 841 fetchStatus[tid] == Squashing \|\| 842 fetchStatus[tid] == IcacheAccessComplete) { 843 844 if (_status == Inactive) { 845 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 846 847 if (fetchStatus[tid] == IcacheAccessComplete) { 848 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 849 "completion\n",tid); 850 } 851 852 cpu->activateStage(O3CPU::FetchIdx); 853 } 854 855 return Active; 856 } 857 } 858 859 // Stage is switching from active to inactive, notify CPU of it. 860 if (_status == Active) { 861 DPRINTF(Activity, "Deactivating stage.\n"); 862 863 cpu->deactivateStage(O3CPU::FetchIdx); 864 } 865 866 return Inactive; 867} 868 869template <class Impl> 870void 871DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, 872 const InstSeqNum seq_num, DynInstPtr squashInst, 873 ThreadID tid) 874{ 875 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); 876 877 doSquash(newPC, squashInst, tid); 878 879 // Tell the CPU to remove any instructions that are not in the ROB. 880 cpu->removeInstsNotInROB(tid); 881} 882 883template <class Impl> 884void 885DefaultFetch<Impl>::tick() 886{ 887 list<ThreadID>::iterator threads = activeThreads->begin(); 888 list<ThreadID>::iterator end = activeThreads->end(); 889 bool status_change = false; 890 891 wroteToTimeBuffer = false; 892 893 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 894 issuePipelinedIfetch[i] = false; 895 } 896 897 while (threads != end) { 898 ThreadID tid = threads++; 899* 900 // Check the signals for each thread to determine the proper status 901 // for each thread. 902 bool updated_status = checkSignalsAndUpdate(tid); 903 status_change = status_change \|\| updated_status; 904 } 905 906 DPRINTF(Fetch, "Running stage.\n"); 907 908 if (FullSystem) { 909 if (fromCommit->commitInfo[0].interruptPending) { 910 interruptPending = true; 911 } 912 913 if (fromCommit->commitInfo[0].clearInterrupt) { 914 interruptPending = false; 915 } 916 } 917 918 for (threadFetched = 0; threadFetched < numFetchingThreads; 919 threadFetched++) { 920 // Fetch each of the actively fetching threads. 921 fetch(status_change); 922 } 923 924 // Record number of instructions fetched this cycle for distribution. 925 fetchNisnDist.sample(numInst); 926 927 if (status_change) { 928 // Change the fetch stage status if there was a status change. 929 _status = updateFetchStatus(); 930 } 931 932 // If there was activity this cycle, inform the CPU of it. 933 if (wroteToTimeBuffer \|\| cpu->contextSwitch) { 934 DPRINTF(Activity, "Activity this cycle.\n"); 935 936 cpu->activityThisCycle(); 937 } 938 939 // Issue the next I-cache request if possible. 940 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 941 if (issuePipelinedIfetch[i]) { 942 pipelineIcacheAccesses(i); 943 } 944 } 945 946 // Reset the number of the instruction we've fetched. 947 numInst = 0; 948} 949 950template <class Impl> 951bool 952DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) 953{ 954 // Update the per thread stall statuses. 955 if (fromDecode->decodeBlock[tid]) { 956 stalls[tid].decode = true; 957 } 958 959 if (fromDecode->decodeUnblock[tid]) { 960 assert(stalls[tid].decode); 961 assert(!fromDecode->decodeBlock[tid]); 962 stalls[tid].decode = false; 963 } 964 965 if (fromRename->renameBlock[tid]) { 966 stalls[tid].rename = true; 967 } 968 969 if (fromRename->renameUnblock[tid]) { 970 assert(stalls[tid].rename); 971 assert(!fromRename->renameBlock[tid]); 972 stalls[tid].rename = false; 973 } 974 975 if (fromIEW->iewBlock[tid]) { 976 stalls[tid].iew = true; 977 } 978 979 if (fromIEW->iewUnblock[tid]) { 980 assert(stalls[tid].iew); 981 assert(!fromIEW->iewBlock[tid]); 982 stalls[tid].iew = false; 983 } 984 985 if (fromCommit->commitBlock[tid]) { 986 stalls[tid].commit = true; 987 } 988 989 if (fromCommit->commitUnblock[tid]) { 990 assert(stalls[tid].commit); 991 assert(!fromCommit->commitBlock[tid]); 992 stalls[tid].commit = false; 993 } 994 995 // Check squash signals from commit. 996 if (fromCommit->commitInfo[tid].squash) { 997 998 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 999 "from commit.\n",tid); 1000 // In any case, squash. 1001 squash(fromCommit->commitInfo[tid].pc, 1002 fromCommit->commitInfo[tid].doneSeqNum, 1003 fromCommit->commitInfo[tid].squashInst, tid); 1004 1005 // If it was a branch mispredict on a control instruction, update the 1006 // branch predictor with that instruction, otherwise just kill the 1007 // invalid state we generated in after sequence number 1008 if (fromCommit->commitInfo[tid].mispredictInst && 1009 fromCommit->commitInfo[tid].mispredictInst->isControl()) { 1010 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 1011 fromCommit->commitInfo[tid].pc, 1012 fromCommit->commitInfo[tid].branchTaken, 1013 tid); 1014 } else { 1015 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 1016 tid); 1017 } 1018 1019 return true; 1020 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 1021 // Update the branch predictor if it wasn't a squashed instruction 1022 // that was broadcasted. 1023 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 1024 } 1025 1026 // Check ROB squash signals from commit. 1027 if (fromCommit->commitInfo[tid].robSquashing) { 1028 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 1029 1030 // Continue to squash. 1031 fetchStatus[tid] = Squashing; 1032 1033 return true; 1034 } 1035 1036 // Check squash signals from decode. 1037 if (fromDecode->decodeInfo[tid].squash) { 1038 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 1039 "from decode.\n",tid); 1040 1041 // Update the branch predictor. 1042 if (fromDecode->decodeInfo[tid].branchMispredict) { 1043 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 1044 fromDecode->decodeInfo[tid].nextPC, 1045 fromDecode->decodeInfo[tid].branchTaken, 1046 tid); 1047 } else { 1048 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 1049 tid); 1050 } 1051 1052 if (fetchStatus[tid] != Squashing) { 1053
1054 TheISA::PCState nextPC = fromDecode->decodeInfo[tid].nextPC; 1055 DPRINTF(Fetch, "Squashing from decode with PC = %s\n", nextPC);	1054 DPRINTF(Fetch, "Squashing from decode with PC = %s\n", 1055 fromDecode->decodeInfo[tid].nextPC);
1056 // Squash unless we're already squashing 1057 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 1058 fromDecode->decodeInfo[tid].squashInst, 1059 fromDecode->decodeInfo[tid].doneSeqNum, 1060 tid); 1061 1062 return true; 1063 } 1064 } 1065 1066 if (checkStall(tid) && 1067 fetchStatus[tid] != IcacheWaitResponse && 1068 fetchStatus[tid] != IcacheWaitRetry) { 1069 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 1070 1071 fetchStatus[tid] = Blocked; 1072 1073 return true; 1074 } 1075 1076 if (fetchStatus[tid] == Blocked \|\| 1077 fetchStatus[tid] == Squashing) { 1078 // Switch status to running if fetch isn't being told to block or 1079 // squash this cycle. 1080 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 1081 tid); 1082 1083 fetchStatus[tid] = Running; 1084 1085 return true; 1086 } 1087 1088 // If we've reached this point, we have not gotten any signals that 1089 // cause fetch to change its status. Fetch remains the same as before. 1090 return false; 1091} 1092 1093template<class Impl> 1094typename Impl::DynInstPtr 1095DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, 1096 StaticInstPtr curMacroop, TheISA::PCState thisPC, 1097 TheISA::PCState nextPC, bool trace) 1098{ 1099 // Get a sequence number. 1100 InstSeqNum seq = cpu->getAndIncrementInstSeq(); 1101 1102 // Create a new DynInst from the instruction fetched. 1103 DynInstPtr instruction = 1104 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); 1105 instruction->setTid(tid); 1106 1107 instruction->setASID(tid); 1108 1109 instruction->setThreadState(cpu->thread[tid]); 1110 1111 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " 1112 "[sn:%lli].\n", tid, thisPC.instAddr(), 1113 thisPC.microPC(), seq); 1114 1115 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, 1116 instruction->staticInst-> 1117 disassemble(thisPC.instAddr())); 1118 1119#if TRACING_ON 1120 if (trace) { 1121 instruction->traceData = 1122 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), 1123 instruction->staticInst, thisPC, curMacroop); 1124 } 1125#else 1126 instruction->traceData = NULL; 1127#endif 1128 1129 // Add instruction to the CPU's list of instructions. 1130 instruction->setInstListIt(cpu->addInst(instruction)); 1131 1132 // Write the instruction to the first slot in the queue 1133 // that heads to decode. 1134 assert(numInst < fetchWidth); 1135 toDecode->insts[toDecode->size++] = instruction; 1136 1137 // Keep track of if we can take an interrupt at this boundary 1138 delayedCommit[tid] = instruction->isDelayedCommit(); 1139 1140 return instruction; 1141} 1142 1143template<class Impl> 1144void 1145DefaultFetch<Impl>::fetch(bool &status_change) 1146{ 1147 ////////////////////////////////////////// 1148 // Start actual fetch 1149 ////////////////////////////////////////// 1150 ThreadID tid = getFetchingThread(fetchPolicy); 1151 1152 if (tid == InvalidThreadID \|\| drainPending) { 1153 // Breaks looping condition in tick() 1154 threadFetched = numFetchingThreads; 1155 1156 if (numThreads == 1) { // @todo Per-thread stats 1157 profileStall(0); 1158 } 1159 1160 return; 1161 } 1162 1163 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1164 1165 // The current PC. 1166 TheISA::PCState thisPC = pc[tid]; 1167 1168 Addr pcOffset = fetchOffset[tid]; 1169 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1170 1171 bool inRom = isRomMicroPC(thisPC.microPC()); 1172 1173 // If returning from the delay of a cache miss, then update the status 1174 // to running, otherwise do the cache access. Possibly move this up 1175 // to tick() function. 1176 if (fetchStatus[tid] == IcacheAccessComplete) { 1177 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); 1178 1179 fetchStatus[tid] = Running; 1180 status_change = true; 1181 } else if (fetchStatus[tid] == Running) { 1182 // Align the fetch PC so its at the start of a cache block. 1183 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1184 1185 // If buffer is no longer valid or fetchAddr has moved to point 1186 // to the next cache block, AND we have no remaining ucode 1187 // from a macro-op, then start fetch from icache. 1188 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) 1189 && !inRom && !macroop[tid]) { 1190 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1191 "instruction, starting at PC %s.\n", tid, thisPC); 1192 1193 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1194 1195 if (fetchStatus[tid] == IcacheWaitResponse) 1196 ++icacheStallCycles; 1197 else if (fetchStatus[tid] == ItlbWait) 1198 ++fetchTlbCycles; 1199 else 1200 ++fetchMiscStallCycles; 1201 return; 1202 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid]) 1203 \|\| isSwitchedOut()) { 1204 // Stall CPU if an interrupt is posted and we're not issuing 1205 // an delayed commit micro-op currently (delayed commit instructions 1206 // are not interruptable by interrupts, only faults) 1207 ++fetchMiscStallCycles; 1208 return; 1209 } 1210 } else { 1211 if (fetchStatus[tid] == Idle) { 1212 ++fetchIdleCycles; 1213 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1214 } 1215 1216 // Status is Idle, so fetch should do nothing. 1217 return; 1218 } 1219 1220 ++fetchCycles; 1221 1222 TheISA::PCState nextPC = thisPC; 1223 1224 StaticInstPtr staticInst = NULL; 1225 StaticInstPtr curMacroop = macroop[tid]; 1226 1227 // If the read of the first instruction was successful, then grab the 1228 // instructions from the rest of the cache line and put them into the 1229 // queue heading to decode. 1230 1231 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1232 "decode.\n", tid); 1233 1234 // Need to keep track of whether or not a predicted branch 1235 // ended this fetch block. 1236 bool predictedBranch = false; 1237 1238 TheISA::MachInst cacheInsts = 1239* reinterpret_cast<TheISA::MachInst >(cacheData[tid]); 1240* 1241 const unsigned numInsts = cacheBlkSize / instSize; 1242 unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1243 1244 // Loop through instruction memory from the cache. 1245 // Keep issuing while fetchWidth is available and branch is not 1246 // predicted taken 1247 while (numInst < fetchWidth && !predictedBranch) { 1248 1249 // We need to process more memory if we aren't going to get a 1250 // StaticInst from the rom, the current macroop, or what's already 1251 // in the predecoder. 1252 bool needMem = !inRom && !curMacroop && !predecoder.extMachInstReady(); 1253 1254 if (needMem) { 1255 if (blkOffset >= numInsts) { 1256 // We need to process more memory, but we've run out of the 1257 // current block. 1258 break; 1259 } 1260 1261 if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { 1262 // Walk past any annulled delay slot instructions. 1263 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; 1264 while (fetchAddr != pcAddr && blkOffset < numInsts) { 1265 blkOffset++; 1266 fetchAddr += instSize; 1267 } 1268 if (blkOffset >= numInsts) 1269 break; 1270 } 1271 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); 1272 1273 predecoder.setTC(cpu->thread[tid]->getTC()); 1274 predecoder.moreBytes(thisPC, fetchAddr, inst); 1275 1276 if (predecoder.needMoreBytes()) { 1277 blkOffset++; 1278 fetchAddr += instSize; 1279 pcOffset += instSize; 1280 } 1281 } 1282 1283 // Extract as many instructions and/or microops as we can from 1284 // the memory we've processed so far. 1285 do { 1286 if (!(curMacroop \|\| inRom)) { 1287 if (predecoder.extMachInstReady()) { 1288 ExtMachInst extMachInst = 1289 predecoder.getExtMachInst(thisPC); 1290 staticInst = 1291 decoder.decode(extMachInst, thisPC.instAddr()); 1292 1293 // Increment stat of fetched instructions. 1294 ++fetchedInsts; 1295 1296 if (staticInst->isMacroop()) { 1297 curMacroop = staticInst; 1298 } else { 1299 pcOffset = 0; 1300 } 1301 } else { 1302 // We need more bytes for this instruction so blkOffset and 1303 // pcOffset will be updated 1304 break; 1305 } 1306 } 1307 // Whether we're moving to a new macroop because we're at the 1308 // end of the current one, or the branch predictor incorrectly 1309 // thinks we are... 1310 bool newMacro = false; 1311 if (curMacroop \|\| inRom) { 1312 if (inRom) { 1313 staticInst = cpu->microcodeRom.fetchMicroop( 1314 thisPC.microPC(), curMacroop); 1315 } else { 1316 staticInst = curMacroop->fetchMicroop(thisPC.microPC()); 1317 } 1318 newMacro \|= staticInst->isLastMicroop(); 1319 } 1320 1321 DynInstPtr instruction = 1322 buildInst(tid, staticInst, curMacroop, 1323 thisPC, nextPC, true); 1324 1325 numInst++; 1326 1327#if TRACING_ON 1328 instruction->fetchTick = curTick(); 1329#endif 1330 1331 nextPC = thisPC; 1332 1333 // If we're branching after this instruction, quite fetching 1334 // from the same block then. 1335 predictedBranch \|= thisPC.branching(); 1336 predictedBranch \|= 1337 lookupAndUpdateNextPC(instruction, nextPC); 1338 if (predictedBranch) { 1339 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); 1340 } 1341 1342 newMacro \|= thisPC.instAddr() != nextPC.instAddr(); 1343 1344 // Move to the next instruction, unless we have a branch. 1345 thisPC = nextPC; 1346 1347 if (newMacro) { 1348 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; 1349 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1350 pcOffset = 0; 1351 curMacroop = NULL; 1352 } 1353 1354 if (instruction->isQuiesce()) { 1355 DPRINTF(Fetch, 1356 "Quiesce instruction encountered, halting fetch!"); 1357 fetchStatus[tid] = QuiescePending; 1358 status_change = true; 1359 break; 1360 } 1361 } while ((curMacroop \|\| predecoder.extMachInstReady()) && 1362 numInst < fetchWidth); 1363 } 1364 1365 if (predictedBranch) { 1366 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1367 "instruction encountered.\n", tid); 1368 } else if (numInst >= fetchWidth) { 1369 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1370 "for this cycle.\n", tid); 1371 } else if (blkOffset >= cacheBlkSize) { 1372 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1373 "block.\n", tid); 1374 } 1375 1376 macroop[tid] = curMacroop; 1377 fetchOffset[tid] = pcOffset; 1378 1379 if (numInst > 0) { 1380 wroteToTimeBuffer = true; 1381 } 1382 1383 pc[tid] = thisPC; 1384 1385 // pipeline a fetch if we're crossing a cache boundary and not in 1386 // a state that would preclude fetching 1387 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1388 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1389 issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && 1390 fetchStatus[tid] != IcacheWaitResponse && 1391 fetchStatus[tid] != ItlbWait && 1392 fetchStatus[tid] != IcacheWaitRetry && 1393 fetchStatus[tid] != QuiescePending && 1394 !curMacroop; 1395} 1396 1397template<class Impl> 1398void 1399DefaultFetch<Impl>::recvRetry() 1400{ 1401 if (retryPkt != NULL) { 1402 assert(cacheBlocked); 1403 assert(retryTid != InvalidThreadID); 1404 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1405 1406 if (icachePort->sendTiming(retryPkt)) { 1407 fetchStatus[retryTid] = IcacheWaitResponse; 1408 retryPkt = NULL; 1409 retryTid = InvalidThreadID; 1410 cacheBlocked = false; 1411 } 1412 } else { 1413 assert(retryTid == InvalidThreadID); 1414 // Access has been squashed since it was sent out. Just clear 1415 // the cache being blocked. 1416 cacheBlocked = false; 1417 } 1418} 1419 1420/////////////////////////////////////// 1421// // 1422// SMT FETCH POLICY MAINTAINED HERE // 1423// // 1424/////////////////////////////////////// 1425template<class Impl> 1426ThreadID 1427DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1428{ 1429 if (numThreads > 1) { 1430 switch (fetch_priority) { 1431 1432 case SingleThread: 1433 return 0; 1434 1435 case RoundRobin: 1436 return roundRobin(); 1437 1438 case IQ: 1439 return iqCount(); 1440 1441 case LSQ: 1442 return lsqCount(); 1443 1444 case Branch: 1445 return branchCount(); 1446 1447 default: 1448 return InvalidThreadID; 1449 } 1450 } else { 1451 list<ThreadID>::iterator thread = activeThreads->begin(); 1452 if (thread == activeThreads->end()) { 1453 return InvalidThreadID; 1454 } 1455 1456 ThreadID tid = thread; 1457* 1458 if (fetchStatus[tid] == Running \|\| 1459 fetchStatus[tid] == IcacheAccessComplete \|\| 1460 fetchStatus[tid] == Idle) { 1461 return tid; 1462 } else { 1463 return InvalidThreadID; 1464 } 1465 } 1466} 1467 1468 1469template<class Impl> 1470ThreadID 1471DefaultFetch<Impl>::roundRobin() 1472{ 1473 list<ThreadID>::iterator pri_iter = priorityList.begin(); 1474 list<ThreadID>::iterator end = priorityList.end(); 1475 1476 ThreadID high_pri; 1477 1478 while (pri_iter != end) { 1479 high_pri = pri_iter; 1480* 1481 assert(high_pri <= numThreads); 1482 1483 if (fetchStatus[high_pri] == Running \|\| 1484 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1485 fetchStatus[high_pri] == Idle) { 1486 1487 priorityList.erase(pri_iter); 1488 priorityList.push_back(high_pri); 1489 1490 return high_pri; 1491 } 1492 1493 pri_iter++; 1494 } 1495 1496 return InvalidThreadID; 1497} 1498 1499template<class Impl> 1500ThreadID 1501DefaultFetch<Impl>::iqCount() 1502{ 1503 std::priority_queue<unsigned> PQ; 1504 std::map<unsigned, ThreadID> threadMap; 1505 1506 list<ThreadID>::iterator threads = activeThreads->begin(); 1507 list<ThreadID>::iterator end = activeThreads->end(); 1508 1509 while (threads != end) { 1510 ThreadID tid = threads++; 1511* unsigned iqCount = fromIEW->iewInfo[tid].iqCount; 1512 1513 PQ.push(iqCount); 1514 threadMap[iqCount] = tid; 1515 } 1516 1517 while (!PQ.empty()) { 1518 ThreadID high_pri = threadMap[PQ.top()]; 1519 1520 if (fetchStatus[high_pri] == Running \|\| 1521 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1522 fetchStatus[high_pri] == Idle) 1523 return high_pri; 1524 else 1525 PQ.pop(); 1526 1527 } 1528 1529 return InvalidThreadID; 1530} 1531 1532template<class Impl> 1533ThreadID 1534DefaultFetch<Impl>::lsqCount() 1535{ 1536 std::priority_queue<unsigned> PQ; 1537 std::map<unsigned, ThreadID> threadMap; 1538 1539 list<ThreadID>::iterator threads = activeThreads->begin(); 1540 list<ThreadID>::iterator end = activeThreads->end(); 1541 1542 while (threads != end) { 1543 ThreadID tid = threads++; 1544* unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; 1545 1546 PQ.push(ldstqCount); 1547 threadMap[ldstqCount] = tid; 1548 } 1549 1550 while (!PQ.empty()) { 1551 ThreadID high_pri = threadMap[PQ.top()]; 1552 1553 if (fetchStatus[high_pri] == Running \|\| 1554 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1555 fetchStatus[high_pri] == Idle) 1556 return high_pri; 1557 else 1558 PQ.pop(); 1559 } 1560 1561 return InvalidThreadID; 1562} 1563 1564template<class Impl> 1565ThreadID 1566DefaultFetch<Impl>::branchCount() 1567{ 1568#if 0 1569 list<ThreadID>::iterator thread = activeThreads->begin(); 1570 assert(thread != activeThreads->end()); 1571 ThreadID tid = thread; 1572#endif 1573* 1574 panic("Branch Count Fetch policy unimplemented\n"); 1575 return InvalidThreadID; 1576} 1577 1578template<class Impl> 1579void 1580DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) 1581{ 1582 if (!issuePipelinedIfetch[tid]) { 1583 return; 1584 } 1585 1586 // The next PC to access. 1587 TheISA::PCState thisPC = pc[tid]; 1588 1589 if (isRomMicroPC(thisPC.microPC())) { 1590 return; 1591 } 1592 1593 Addr pcOffset = fetchOffset[tid]; 1594 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1595 1596 // Align the fetch PC so its at the start of a cache block. 1597 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1598 1599 // Unless buffer already got the block, fetch it from icache. 1600 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { 1601 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " 1602 "starting at PC %s.\n", tid, thisPC); 1603 1604 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1605 } 1606} 1607 1608template<class Impl> 1609void 1610DefaultFetch<Impl>::profileStall(ThreadID tid) { 1611 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1612 1613 // @todo Per-thread stats 1614 1615 if (drainPending) { 1616 ++fetchPendingDrainCycles; 1617 DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); 1618 } else if (activeThreads->empty()) { 1619 ++fetchNoActiveThreadStallCycles; 1620 DPRINTF(Fetch, "Fetch has no active thread!\n"); 1621 } else if (fetchStatus[tid] == Blocked) { 1622 ++fetchBlockedCycles; 1623 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1624 } else if (fetchStatus[tid] == Squashing) { 1625 ++fetchSquashCycles; 1626 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1627 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1628 ++icacheStallCycles; 1629 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", 1630 tid); 1631 } else if (fetchStatus[tid] == ItlbWait) { 1632 ++fetchTlbCycles; 1633 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " 1634 "finish!\n", tid); 1635 } else if (fetchStatus[tid] == TrapPending) { 1636 ++fetchPendingTrapStallCycles; 1637 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", 1638 tid); 1639 } else if (fetchStatus[tid] == QuiescePending) { 1640 ++fetchPendingQuiesceStallCycles; 1641 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " 1642 "instruction!\n", tid); 1643 } else if (fetchStatus[tid] == IcacheWaitRetry) { 1644 ++fetchIcacheWaitRetryStallCycles; 1645 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", 1646 tid); 1647 } else if (fetchStatus[tid] == NoGoodAddr) { 1648 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", 1649 tid); 1650 } else { 1651 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", 1652 tid, fetchStatus[tid]); 1653 } 1654}	1056 // Squash unless we're already squashing 1057 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 1058 fromDecode->decodeInfo[tid].squashInst, 1059 fromDecode->decodeInfo[tid].doneSeqNum, 1060 tid); 1061 1062 return true; 1063 } 1064 } 1065 1066 if (checkStall(tid) && 1067 fetchStatus[tid] != IcacheWaitResponse && 1068 fetchStatus[tid] != IcacheWaitRetry) { 1069 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 1070 1071 fetchStatus[tid] = Blocked; 1072 1073 return true; 1074 } 1075 1076 if (fetchStatus[tid] == Blocked \|\| 1077 fetchStatus[tid] == Squashing) { 1078 // Switch status to running if fetch isn't being told to block or 1079 // squash this cycle. 1080 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 1081 tid); 1082 1083 fetchStatus[tid] = Running; 1084 1085 return true; 1086 } 1087 1088 // If we've reached this point, we have not gotten any signals that 1089 // cause fetch to change its status. Fetch remains the same as before. 1090 return false; 1091} 1092 1093template<class Impl> 1094typename Impl::DynInstPtr 1095DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, 1096 StaticInstPtr curMacroop, TheISA::PCState thisPC, 1097 TheISA::PCState nextPC, bool trace) 1098{ 1099 // Get a sequence number. 1100 InstSeqNum seq = cpu->getAndIncrementInstSeq(); 1101 1102 // Create a new DynInst from the instruction fetched. 1103 DynInstPtr instruction = 1104 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); 1105 instruction->setTid(tid); 1106 1107 instruction->setASID(tid); 1108 1109 instruction->setThreadState(cpu->thread[tid]); 1110 1111 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " 1112 "[sn:%lli].\n", tid, thisPC.instAddr(), 1113 thisPC.microPC(), seq); 1114 1115 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, 1116 instruction->staticInst-> 1117 disassemble(thisPC.instAddr())); 1118 1119#if TRACING_ON 1120 if (trace) { 1121 instruction->traceData = 1122 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), 1123 instruction->staticInst, thisPC, curMacroop); 1124 } 1125#else 1126 instruction->traceData = NULL; 1127#endif 1128 1129 // Add instruction to the CPU's list of instructions. 1130 instruction->setInstListIt(cpu->addInst(instruction)); 1131 1132 // Write the instruction to the first slot in the queue 1133 // that heads to decode. 1134 assert(numInst < fetchWidth); 1135 toDecode->insts[toDecode->size++] = instruction; 1136 1137 // Keep track of if we can take an interrupt at this boundary 1138 delayedCommit[tid] = instruction->isDelayedCommit(); 1139 1140 return instruction; 1141} 1142 1143template<class Impl> 1144void 1145DefaultFetch<Impl>::fetch(bool &status_change) 1146{ 1147 ////////////////////////////////////////// 1148 // Start actual fetch 1149 ////////////////////////////////////////// 1150 ThreadID tid = getFetchingThread(fetchPolicy); 1151 1152 if (tid == InvalidThreadID \|\| drainPending) { 1153 // Breaks looping condition in tick() 1154 threadFetched = numFetchingThreads; 1155 1156 if (numThreads == 1) { // @todo Per-thread stats 1157 profileStall(0); 1158 } 1159 1160 return; 1161 } 1162 1163 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1164 1165 // The current PC. 1166 TheISA::PCState thisPC = pc[tid]; 1167 1168 Addr pcOffset = fetchOffset[tid]; 1169 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1170 1171 bool inRom = isRomMicroPC(thisPC.microPC()); 1172 1173 // If returning from the delay of a cache miss, then update the status 1174 // to running, otherwise do the cache access. Possibly move this up 1175 // to tick() function. 1176 if (fetchStatus[tid] == IcacheAccessComplete) { 1177 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); 1178 1179 fetchStatus[tid] = Running; 1180 status_change = true; 1181 } else if (fetchStatus[tid] == Running) { 1182 // Align the fetch PC so its at the start of a cache block. 1183 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1184 1185 // If buffer is no longer valid or fetchAddr has moved to point 1186 // to the next cache block, AND we have no remaining ucode 1187 // from a macro-op, then start fetch from icache. 1188 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) 1189 && !inRom && !macroop[tid]) { 1190 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1191 "instruction, starting at PC %s.\n", tid, thisPC); 1192 1193 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1194 1195 if (fetchStatus[tid] == IcacheWaitResponse) 1196 ++icacheStallCycles; 1197 else if (fetchStatus[tid] == ItlbWait) 1198 ++fetchTlbCycles; 1199 else 1200 ++fetchMiscStallCycles; 1201 return; 1202 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid]) 1203 \|\| isSwitchedOut()) { 1204 // Stall CPU if an interrupt is posted and we're not issuing 1205 // an delayed commit micro-op currently (delayed commit instructions 1206 // are not interruptable by interrupts, only faults) 1207 ++fetchMiscStallCycles; 1208 return; 1209 } 1210 } else { 1211 if (fetchStatus[tid] == Idle) { 1212 ++fetchIdleCycles; 1213 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1214 } 1215 1216 // Status is Idle, so fetch should do nothing. 1217 return; 1218 } 1219 1220 ++fetchCycles; 1221 1222 TheISA::PCState nextPC = thisPC; 1223 1224 StaticInstPtr staticInst = NULL; 1225 StaticInstPtr curMacroop = macroop[tid]; 1226 1227 // If the read of the first instruction was successful, then grab the 1228 // instructions from the rest of the cache line and put them into the 1229 // queue heading to decode. 1230 1231 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1232 "decode.\n", tid); 1233 1234 // Need to keep track of whether or not a predicted branch 1235 // ended this fetch block. 1236 bool predictedBranch = false; 1237 1238 TheISA::MachInst cacheInsts = 1239* reinterpret_cast<TheISA::MachInst >(cacheData[tid]); 1240* 1241 const unsigned numInsts = cacheBlkSize / instSize; 1242 unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1243 1244 // Loop through instruction memory from the cache. 1245 // Keep issuing while fetchWidth is available and branch is not 1246 // predicted taken 1247 while (numInst < fetchWidth && !predictedBranch) { 1248 1249 // We need to process more memory if we aren't going to get a 1250 // StaticInst from the rom, the current macroop, or what's already 1251 // in the predecoder. 1252 bool needMem = !inRom && !curMacroop && !predecoder.extMachInstReady(); 1253 1254 if (needMem) { 1255 if (blkOffset >= numInsts) { 1256 // We need to process more memory, but we've run out of the 1257 // current block. 1258 break; 1259 } 1260 1261 if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { 1262 // Walk past any annulled delay slot instructions. 1263 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; 1264 while (fetchAddr != pcAddr && blkOffset < numInsts) { 1265 blkOffset++; 1266 fetchAddr += instSize; 1267 } 1268 if (blkOffset >= numInsts) 1269 break; 1270 } 1271 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); 1272 1273 predecoder.setTC(cpu->thread[tid]->getTC()); 1274 predecoder.moreBytes(thisPC, fetchAddr, inst); 1275 1276 if (predecoder.needMoreBytes()) { 1277 blkOffset++; 1278 fetchAddr += instSize; 1279 pcOffset += instSize; 1280 } 1281 } 1282 1283 // Extract as many instructions and/or microops as we can from 1284 // the memory we've processed so far. 1285 do { 1286 if (!(curMacroop \|\| inRom)) { 1287 if (predecoder.extMachInstReady()) { 1288 ExtMachInst extMachInst = 1289 predecoder.getExtMachInst(thisPC); 1290 staticInst = 1291 decoder.decode(extMachInst, thisPC.instAddr()); 1292 1293 // Increment stat of fetched instructions. 1294 ++fetchedInsts; 1295 1296 if (staticInst->isMacroop()) { 1297 curMacroop = staticInst; 1298 } else { 1299 pcOffset = 0; 1300 } 1301 } else { 1302 // We need more bytes for this instruction so blkOffset and 1303 // pcOffset will be updated 1304 break; 1305 } 1306 } 1307 // Whether we're moving to a new macroop because we're at the 1308 // end of the current one, or the branch predictor incorrectly 1309 // thinks we are... 1310 bool newMacro = false; 1311 if (curMacroop \|\| inRom) { 1312 if (inRom) { 1313 staticInst = cpu->microcodeRom.fetchMicroop( 1314 thisPC.microPC(), curMacroop); 1315 } else { 1316 staticInst = curMacroop->fetchMicroop(thisPC.microPC()); 1317 } 1318 newMacro \|= staticInst->isLastMicroop(); 1319 } 1320 1321 DynInstPtr instruction = 1322 buildInst(tid, staticInst, curMacroop, 1323 thisPC, nextPC, true); 1324 1325 numInst++; 1326 1327#if TRACING_ON 1328 instruction->fetchTick = curTick(); 1329#endif 1330 1331 nextPC = thisPC; 1332 1333 // If we're branching after this instruction, quite fetching 1334 // from the same block then. 1335 predictedBranch \|= thisPC.branching(); 1336 predictedBranch \|= 1337 lookupAndUpdateNextPC(instruction, nextPC); 1338 if (predictedBranch) { 1339 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); 1340 } 1341 1342 newMacro \|= thisPC.instAddr() != nextPC.instAddr(); 1343 1344 // Move to the next instruction, unless we have a branch. 1345 thisPC = nextPC; 1346 1347 if (newMacro) { 1348 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; 1349 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1350 pcOffset = 0; 1351 curMacroop = NULL; 1352 } 1353 1354 if (instruction->isQuiesce()) { 1355 DPRINTF(Fetch, 1356 "Quiesce instruction encountered, halting fetch!"); 1357 fetchStatus[tid] = QuiescePending; 1358 status_change = true; 1359 break; 1360 } 1361 } while ((curMacroop \|\| predecoder.extMachInstReady()) && 1362 numInst < fetchWidth); 1363 } 1364 1365 if (predictedBranch) { 1366 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1367 "instruction encountered.\n", tid); 1368 } else if (numInst >= fetchWidth) { 1369 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1370 "for this cycle.\n", tid); 1371 } else if (blkOffset >= cacheBlkSize) { 1372 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1373 "block.\n", tid); 1374 } 1375 1376 macroop[tid] = curMacroop; 1377 fetchOffset[tid] = pcOffset; 1378 1379 if (numInst > 0) { 1380 wroteToTimeBuffer = true; 1381 } 1382 1383 pc[tid] = thisPC; 1384 1385 // pipeline a fetch if we're crossing a cache boundary and not in 1386 // a state that would preclude fetching 1387 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1388 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1389 issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && 1390 fetchStatus[tid] != IcacheWaitResponse && 1391 fetchStatus[tid] != ItlbWait && 1392 fetchStatus[tid] != IcacheWaitRetry && 1393 fetchStatus[tid] != QuiescePending && 1394 !curMacroop; 1395} 1396 1397template<class Impl> 1398void 1399DefaultFetch<Impl>::recvRetry() 1400{ 1401 if (retryPkt != NULL) { 1402 assert(cacheBlocked); 1403 assert(retryTid != InvalidThreadID); 1404 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1405 1406 if (icachePort->sendTiming(retryPkt)) { 1407 fetchStatus[retryTid] = IcacheWaitResponse; 1408 retryPkt = NULL; 1409 retryTid = InvalidThreadID; 1410 cacheBlocked = false; 1411 } 1412 } else { 1413 assert(retryTid == InvalidThreadID); 1414 // Access has been squashed since it was sent out. Just clear 1415 // the cache being blocked. 1416 cacheBlocked = false; 1417 } 1418} 1419 1420/////////////////////////////////////// 1421// // 1422// SMT FETCH POLICY MAINTAINED HERE // 1423// // 1424/////////////////////////////////////// 1425template<class Impl> 1426ThreadID 1427DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1428{ 1429 if (numThreads > 1) { 1430 switch (fetch_priority) { 1431 1432 case SingleThread: 1433 return 0; 1434 1435 case RoundRobin: 1436 return roundRobin(); 1437 1438 case IQ: 1439 return iqCount(); 1440 1441 case LSQ: 1442 return lsqCount(); 1443 1444 case Branch: 1445 return branchCount(); 1446 1447 default: 1448 return InvalidThreadID; 1449 } 1450 } else { 1451 list<ThreadID>::iterator thread = activeThreads->begin(); 1452 if (thread == activeThreads->end()) { 1453 return InvalidThreadID; 1454 } 1455 1456 ThreadID tid = thread; 1457* 1458 if (fetchStatus[tid] == Running \|\| 1459 fetchStatus[tid] == IcacheAccessComplete \|\| 1460 fetchStatus[tid] == Idle) { 1461 return tid; 1462 } else { 1463 return InvalidThreadID; 1464 } 1465 } 1466} 1467 1468 1469template<class Impl> 1470ThreadID 1471DefaultFetch<Impl>::roundRobin() 1472{ 1473 list<ThreadID>::iterator pri_iter = priorityList.begin(); 1474 list<ThreadID>::iterator end = priorityList.end(); 1475 1476 ThreadID high_pri; 1477 1478 while (pri_iter != end) { 1479 high_pri = pri_iter; 1480* 1481 assert(high_pri <= numThreads); 1482 1483 if (fetchStatus[high_pri] == Running \|\| 1484 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1485 fetchStatus[high_pri] == Idle) { 1486 1487 priorityList.erase(pri_iter); 1488 priorityList.push_back(high_pri); 1489 1490 return high_pri; 1491 } 1492 1493 pri_iter++; 1494 } 1495 1496 return InvalidThreadID; 1497} 1498 1499template<class Impl> 1500ThreadID 1501DefaultFetch<Impl>::iqCount() 1502{ 1503 std::priority_queue<unsigned> PQ; 1504 std::map<unsigned, ThreadID> threadMap; 1505 1506 list<ThreadID>::iterator threads = activeThreads->begin(); 1507 list<ThreadID>::iterator end = activeThreads->end(); 1508 1509 while (threads != end) { 1510 ThreadID tid = threads++; 1511* unsigned iqCount = fromIEW->iewInfo[tid].iqCount; 1512 1513 PQ.push(iqCount); 1514 threadMap[iqCount] = tid; 1515 } 1516 1517 while (!PQ.empty()) { 1518 ThreadID high_pri = threadMap[PQ.top()]; 1519 1520 if (fetchStatus[high_pri] == Running \|\| 1521 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1522 fetchStatus[high_pri] == Idle) 1523 return high_pri; 1524 else 1525 PQ.pop(); 1526 1527 } 1528 1529 return InvalidThreadID; 1530} 1531 1532template<class Impl> 1533ThreadID 1534DefaultFetch<Impl>::lsqCount() 1535{ 1536 std::priority_queue<unsigned> PQ; 1537 std::map<unsigned, ThreadID> threadMap; 1538 1539 list<ThreadID>::iterator threads = activeThreads->begin(); 1540 list<ThreadID>::iterator end = activeThreads->end(); 1541 1542 while (threads != end) { 1543 ThreadID tid = threads++; 1544* unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; 1545 1546 PQ.push(ldstqCount); 1547 threadMap[ldstqCount] = tid; 1548 } 1549 1550 while (!PQ.empty()) { 1551 ThreadID high_pri = threadMap[PQ.top()]; 1552 1553 if (fetchStatus[high_pri] == Running \|\| 1554 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1555 fetchStatus[high_pri] == Idle) 1556 return high_pri; 1557 else 1558 PQ.pop(); 1559 } 1560 1561 return InvalidThreadID; 1562} 1563 1564template<class Impl> 1565ThreadID 1566DefaultFetch<Impl>::branchCount() 1567{ 1568#if 0 1569 list<ThreadID>::iterator thread = activeThreads->begin(); 1570 assert(thread != activeThreads->end()); 1571 ThreadID tid = thread; 1572#endif 1573* 1574 panic("Branch Count Fetch policy unimplemented\n"); 1575 return InvalidThreadID; 1576} 1577 1578template<class Impl> 1579void 1580DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) 1581{ 1582 if (!issuePipelinedIfetch[tid]) { 1583 return; 1584 } 1585 1586 // The next PC to access. 1587 TheISA::PCState thisPC = pc[tid]; 1588 1589 if (isRomMicroPC(thisPC.microPC())) { 1590 return; 1591 } 1592 1593 Addr pcOffset = fetchOffset[tid]; 1594 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1595 1596 // Align the fetch PC so its at the start of a cache block. 1597 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1598 1599 // Unless buffer already got the block, fetch it from icache. 1600 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { 1601 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " 1602 "starting at PC %s.\n", tid, thisPC); 1603 1604 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1605 } 1606} 1607 1608template<class Impl> 1609void 1610DefaultFetch<Impl>::profileStall(ThreadID tid) { 1611 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1612 1613 // @todo Per-thread stats 1614 1615 if (drainPending) { 1616 ++fetchPendingDrainCycles; 1617 DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); 1618 } else if (activeThreads->empty()) { 1619 ++fetchNoActiveThreadStallCycles; 1620 DPRINTF(Fetch, "Fetch has no active thread!\n"); 1621 } else if (fetchStatus[tid] == Blocked) { 1622 ++fetchBlockedCycles; 1623 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1624 } else if (fetchStatus[tid] == Squashing) { 1625 ++fetchSquashCycles; 1626 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1627 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1628 ++icacheStallCycles; 1629 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", 1630 tid); 1631 } else if (fetchStatus[tid] == ItlbWait) { 1632 ++fetchTlbCycles; 1633 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " 1634 "finish!\n", tid); 1635 } else if (fetchStatus[tid] == TrapPending) { 1636 ++fetchPendingTrapStallCycles; 1637 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", 1638 tid); 1639 } else if (fetchStatus[tid] == QuiescePending) { 1640 ++fetchPendingQuiesceStallCycles; 1641 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " 1642 "instruction!\n", tid); 1643 } else if (fetchStatus[tid] == IcacheWaitRetry) { 1644 ++fetchIcacheWaitRetryStallCycles; 1645 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", 1646 tid); 1647 } else if (fetchStatus[tid] == NoGoodAddr) { 1648 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", 1649 tid); 1650 } else { 1651 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", 1652 tid, fetchStatus[tid]); 1653 } 1654}