fetch_impl.hh revision 13559:e9983a972327
14776Sgblack@eecs.umich.edu/* 26365Sgblack@eecs.umich.edu * Copyright (c) 2010-2014 ARM Limited 34776Sgblack@eecs.umich.edu * Copyright (c) 2012-2013 AMD 44776Sgblack@eecs.umich.edu * All rights reserved. 54776Sgblack@eecs.umich.edu * 64776Sgblack@eecs.umich.edu * The license below extends only to copyright in the software and shall 74776Sgblack@eecs.umich.edu * not be construed as granting a license to any other intellectual 84776Sgblack@eecs.umich.edu * property including but not limited to intellectual property relating 94776Sgblack@eecs.umich.edu * to a hardware implementation of the functionality of the software 104776Sgblack@eecs.umich.edu * licensed hereunder. You may use the software subject to the license 114776Sgblack@eecs.umich.edu * terms below provided that you ensure that this notice is replicated 124776Sgblack@eecs.umich.edu * unmodified and in its entirety in all distributions of the software, 134776Sgblack@eecs.umich.edu * modified or unmodified, in source code or in binary form. 144776Sgblack@eecs.umich.edu * 154776Sgblack@eecs.umich.edu * Copyright (c) 2004-2006 The Regents of The University of Michigan 164776Sgblack@eecs.umich.edu * All rights reserved. 174776Sgblack@eecs.umich.edu * 184776Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without 194776Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are 204776Sgblack@eecs.umich.edu * met: redistributions of source code must retain the above copyright 214776Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer; 224776Sgblack@eecs.umich.edu * redistributions in binary form must reproduce the above copyright 234776Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer in the 244776Sgblack@eecs.umich.edu * documentation and/or other materials provided with the distribution; 254776Sgblack@eecs.umich.edu * neither the name of the copyright holders nor the names of its 264776Sgblack@eecs.umich.edu * contributors may be used to endorse or promote products derived from 274776Sgblack@eecs.umich.edu * this software without specific prior written permission. 286365Sgblack@eecs.umich.edu * 294776Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 304776Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 314776Sgblack@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 324776Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 334776Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 344776Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 354776Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 364776Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 374776Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 384776Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 394776Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 405523Snate@binkert.org * 416409Sgblack@eecs.umich.edu * Authors: Kevin Lim 424776Sgblack@eecs.umich.edu * Korey Sewell 435523Snate@binkert.org */ 445523Snate@binkert.org 455523Snate@binkert.org#ifndef __CPU_O3_FETCH_IMPL_HH__ 464776Sgblack@eecs.umich.edu#define __CPU_O3_FETCH_IMPL_HH__ 474776Sgblack@eecs.umich.edu 484776Sgblack@eecs.umich.edu#include <algorithm> 494776Sgblack@eecs.umich.edu#include <cstring> 504776Sgblack@eecs.umich.edu#include <list> 514776Sgblack@eecs.umich.edu#include <map> 524776Sgblack@eecs.umich.edu#include <queue> 534776Sgblack@eecs.umich.edu 545049Sgblack@eecs.umich.edu#include "arch/generic/tlb.hh" 555049Sgblack@eecs.umich.edu#include "arch/isa_traits.hh" 564776Sgblack@eecs.umich.edu#include "arch/utility.hh" 574776Sgblack@eecs.umich.edu#include "arch/vtophys.hh" 584776Sgblack@eecs.umich.edu#include "base/random.hh" 594776Sgblack@eecs.umich.edu#include "base/types.hh" 604776Sgblack@eecs.umich.edu#include "config/the_isa.hh" 616365Sgblack@eecs.umich.edu#include "cpu/base.hh" 626365Sgblack@eecs.umich.edu//#include "cpu/checker/cpu.hh" 634830Sgblack@eecs.umich.edu#include "cpu/o3/fetch.hh" 644830Sgblack@eecs.umich.edu#include "cpu/exetrace.hh" 657811Ssteve.reinhardt@amd.com#include "debug/Activity.hh" 66#include "debug/Drain.hh" 67#include "debug/Fetch.hh" 68#include "debug/O3PipeView.hh" 69#include "mem/packet.hh" 70#include "params/DerivO3CPU.hh" 71#include "sim/byteswap.hh" 72#include "sim/core.hh" 73#include "sim/eventq.hh" 74#include "sim/full_system.hh" 75#include "sim/system.hh" 76#include "cpu/o3/isa_specific.hh" 77 78using namespace std; 79 80template<class Impl> 81DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) 82 : fetchPolicy(params->smtFetchPolicy), 83 cpu(_cpu), 84 branchPred(nullptr), 85 decodeToFetchDelay(params->decodeToFetchDelay), 86 renameToFetchDelay(params->renameToFetchDelay), 87 iewToFetchDelay(params->iewToFetchDelay), 88 commitToFetchDelay(params->commitToFetchDelay), 89 fetchWidth(params->fetchWidth), 90 decodeWidth(params->decodeWidth), 91 retryPkt(NULL), 92 retryTid(InvalidThreadID), 93 cacheBlkSize(cpu->cacheLineSize()), 94 fetchBufferSize(params->fetchBufferSize), 95 fetchBufferMask(fetchBufferSize - 1), 96 fetchQueueSize(params->fetchQueueSize), 97 numThreads(params->numThreads), 98 numFetchingThreads(params->smtNumFetchingThreads), 99 finishTranslationEvent(this) 100{ 101 if (numThreads > Impl::MaxThreads) 102 fatal("numThreads (%d) is larger than compiled limit (%d),\n" 103 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", 104 numThreads, static_cast<int>(Impl::MaxThreads)); 105 if (fetchWidth > Impl::MaxWidth) 106 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" 107 "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", 108 fetchWidth, static_cast<int>(Impl::MaxWidth)); 109 if (fetchBufferSize > cacheBlkSize) 110 fatal("fetch buffer size (%u bytes) is greater than the cache " 111 "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); 112 if (cacheBlkSize % fetchBufferSize) 113 fatal("cache block (%u bytes) is not a multiple of the " 114 "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize); 115 116 // Figure out fetch policy 117 panic_if(fetchPolicy == FetchPolicy::SingleThread && numThreads > 1, 118 "Invalid Fetch Policy for a SMT workload."); 119 120 // Get the size of an instruction. 121 instSize = sizeof(TheISA::MachInst); 122 123 for (int i = 0; i < Impl::MaxThreads; i++) { 124 fetchStatus[i] = Idle; 125 decoder[i] = nullptr; 126 pc[i] = 0; 127 fetchOffset[i] = 0; 128 macroop[i] = nullptr; 129 delayedCommit[i] = false; 130 memReq[i] = nullptr; 131 stalls[i] = {false, false}; 132 fetchBuffer[i] = NULL; 133 fetchBufferPC[i] = 0; 134 fetchBufferValid[i] = false; 135 lastIcacheStall[i] = 0; 136 issuePipelinedIfetch[i] = false; 137 } 138 139 branchPred = params->branchPred; 140 141 for (ThreadID tid = 0; tid < numThreads; tid++) { 142 decoder[tid] = new TheISA::Decoder(params->isa[tid]); 143 // Create space to buffer the cache line data, 144 // which may not hold the entire cache line. 145 fetchBuffer[tid] = new uint8_t[fetchBufferSize]; 146 } 147} 148 149template <class Impl> 150std::string 151DefaultFetch<Impl>::name() const 152{ 153 return cpu->name() + ".fetch"; 154} 155 156template <class Impl> 157void 158DefaultFetch<Impl>::regProbePoints() 159{ 160 ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch"); 161 ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(), 162 "FetchRequest"); 163 164} 165 166template <class Impl> 167void 168DefaultFetch<Impl>::regStats() 169{ 170 icacheStallCycles 171 .name(name() + ".icacheStallCycles") 172 .desc("Number of cycles fetch is stalled on an Icache miss") 173 .prereq(icacheStallCycles); 174 175 fetchedInsts 176 .name(name() + ".Insts") 177 .desc("Number of instructions fetch has processed") 178 .prereq(fetchedInsts); 179 180 fetchedBranches 181 .name(name() + ".Branches") 182 .desc("Number of branches that fetch encountered") 183 .prereq(fetchedBranches); 184 185 predictedBranches 186 .name(name() + ".predictedBranches") 187 .desc("Number of branches that fetch has predicted taken") 188 .prereq(predictedBranches); 189 190 fetchCycles 191 .name(name() + ".Cycles") 192 .desc("Number of cycles fetch has run and was not squashing or" 193 " blocked") 194 .prereq(fetchCycles); 195 196 fetchSquashCycles 197 .name(name() + ".SquashCycles") 198 .desc("Number of cycles fetch has spent squashing") 199 .prereq(fetchSquashCycles); 200 201 fetchTlbCycles 202 .name(name() + ".TlbCycles") 203 .desc("Number of cycles fetch has spent waiting for tlb") 204 .prereq(fetchTlbCycles); 205 206 fetchIdleCycles 207 .name(name() + ".IdleCycles") 208 .desc("Number of cycles fetch was idle") 209 .prereq(fetchIdleCycles); 210 211 fetchBlockedCycles 212 .name(name() + ".BlockedCycles") 213 .desc("Number of cycles fetch has spent blocked") 214 .prereq(fetchBlockedCycles); 215 216 fetchedCacheLines 217 .name(name() + ".CacheLines") 218 .desc("Number of cache lines fetched") 219 .prereq(fetchedCacheLines); 220 221 fetchMiscStallCycles 222 .name(name() + ".MiscStallCycles") 223 .desc("Number of cycles fetch has spent waiting on interrupts, or " 224 "bad addresses, or out of MSHRs") 225 .prereq(fetchMiscStallCycles); 226 227 fetchPendingDrainCycles 228 .name(name() + ".PendingDrainCycles") 229 .desc("Number of cycles fetch has spent waiting on pipes to drain") 230 .prereq(fetchPendingDrainCycles); 231 232 fetchNoActiveThreadStallCycles 233 .name(name() + ".NoActiveThreadStallCycles") 234 .desc("Number of stall cycles due to no active thread to fetch from") 235 .prereq(fetchNoActiveThreadStallCycles); 236 237 fetchPendingTrapStallCycles 238 .name(name() + ".PendingTrapStallCycles") 239 .desc("Number of stall cycles due to pending traps") 240 .prereq(fetchPendingTrapStallCycles); 241 242 fetchPendingQuiesceStallCycles 243 .name(name() + ".PendingQuiesceStallCycles") 244 .desc("Number of stall cycles due to pending quiesce instructions") 245 .prereq(fetchPendingQuiesceStallCycles); 246 247 fetchIcacheWaitRetryStallCycles 248 .name(name() + ".IcacheWaitRetryStallCycles") 249 .desc("Number of stall cycles due to full MSHR") 250 .prereq(fetchIcacheWaitRetryStallCycles); 251 252 fetchIcacheSquashes 253 .name(name() + ".IcacheSquashes") 254 .desc("Number of outstanding Icache misses that were squashed") 255 .prereq(fetchIcacheSquashes); 256 257 fetchTlbSquashes 258 .name(name() + ".ItlbSquashes") 259 .desc("Number of outstanding ITLB misses that were squashed") 260 .prereq(fetchTlbSquashes); 261 262 fetchNisnDist 263 .init(/* base value */ 0, 264 /* last value */ fetchWidth, 265 /* bucket size */ 1) 266 .name(name() + ".rateDist") 267 .desc("Number of instructions fetched each cycle (Total)") 268 .flags(Stats::pdf); 269 270 idleRate 271 .name(name() + ".idleRate") 272 .desc("Percent of cycles fetch was idle") 273 .prereq(idleRate); 274 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 275 276 branchRate 277 .name(name() + ".branchRate") 278 .desc("Number of branch fetches per cycle") 279 .flags(Stats::total); 280 branchRate = fetchedBranches / cpu->numCycles; 281 282 fetchRate 283 .name(name() + ".rate") 284 .desc("Number of inst fetches per cycle") 285 .flags(Stats::total); 286 fetchRate = fetchedInsts / cpu->numCycles; 287} 288 289template<class Impl> 290void 291DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) 292{ 293 timeBuffer = time_buffer; 294 295 // Create wires to get information from proper places in time buffer. 296 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 297 fromRename = timeBuffer->getWire(-renameToFetchDelay); 298 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 299 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 300} 301 302template<class Impl> 303void 304DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr) 305{ 306 activeThreads = at_ptr; 307} 308 309template<class Impl> 310void 311DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr) 312{ 313 // Create wire to write information to proper place in fetch time buf. 314 toDecode = ftb_ptr->getWire(0); 315} 316 317template<class Impl> 318void 319DefaultFetch<Impl>::startupStage() 320{ 321 assert(priorityList.empty()); 322 resetStage(); 323 324 // Fetch needs to start fetching instructions at the very beginning, 325 // so it must start up in active state. 326 switchToActive(); 327} 328 329template<class Impl> 330void 331DefaultFetch<Impl>::resetStage() 332{ 333 numInst = 0; 334 interruptPending = false; 335 cacheBlocked = false; 336 337 priorityList.clear(); 338 339 // Setup PC and nextPC with initial state. 340 for (ThreadID tid = 0; tid < numThreads; ++tid) { 341 fetchStatus[tid] = Running; 342 pc[tid] = cpu->pcState(tid); 343 fetchOffset[tid] = 0; 344 macroop[tid] = NULL; 345 346 delayedCommit[tid] = false; 347 memReq[tid] = NULL; 348 349 stalls[tid].decode = false; 350 stalls[tid].drain = false; 351 352 fetchBufferPC[tid] = 0; 353 fetchBufferValid[tid] = false; 354 355 fetchQueue[tid].clear(); 356 357 priorityList.push_back(tid); 358 } 359 360 wroteToTimeBuffer = false; 361 _status = Inactive; 362} 363 364template<class Impl> 365void 366DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 367{ 368 ThreadID tid = cpu->contextToThread(pkt->req->contextId()); 369 370 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); 371 assert(!cpu->switchedOut()); 372 373 // Only change the status if it's still waiting on the icache access 374 // to return. 375 if (fetchStatus[tid] != IcacheWaitResponse || 376 pkt->req != memReq[tid]) { 377 ++fetchIcacheSquashes; 378 delete pkt; 379 return; 380 } 381 382 memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize); 383 fetchBufferValid[tid] = true; 384 385 // Wake up the CPU (if it went to sleep and was waiting on 386 // this completion event). 387 cpu->wakeCPU(); 388 389 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 390 tid); 391 392 switchToActive(); 393 394 // Only switch to IcacheAccessComplete if we're not stalled as well. 395 if (checkStall(tid)) { 396 fetchStatus[tid] = Blocked; 397 } else { 398 fetchStatus[tid] = IcacheAccessComplete; 399 } 400 401 pkt->req->setAccessLatency(); 402 cpu->ppInstAccessComplete->notify(pkt); 403 // Reset the mem req to NULL. 404 delete pkt; 405 memReq[tid] = NULL; 406} 407 408template <class Impl> 409void 410DefaultFetch<Impl>::drainResume() 411{ 412 for (ThreadID i = 0; i < numThreads; ++i) { 413 stalls[i].decode = false; 414 stalls[i].drain = false; 415 } 416} 417 418template <class Impl> 419void 420DefaultFetch<Impl>::drainSanityCheck() const 421{ 422 assert(isDrained()); 423 assert(retryPkt == NULL); 424 assert(retryTid == InvalidThreadID); 425 assert(!cacheBlocked); 426 assert(!interruptPending); 427 428 for (ThreadID i = 0; i < numThreads; ++i) { 429 assert(!memReq[i]); 430 assert(fetchStatus[i] == Idle || stalls[i].drain); 431 } 432 433 branchPred->drainSanityCheck(); 434} 435 436template <class Impl> 437bool 438DefaultFetch<Impl>::isDrained() const 439{ 440 /* Make sure that threads are either idle of that the commit stage 441 * has signaled that draining has completed by setting the drain 442 * stall flag. This effectively forces the pipeline to be disabled 443 * until the whole system is drained (simulation may continue to 444 * drain other components). 445 */ 446 for (ThreadID i = 0; i < numThreads; ++i) { 447 // Verify fetch queues are drained 448 if (!fetchQueue[i].empty()) 449 return false; 450 451 // Return false if not idle or drain stalled 452 if (fetchStatus[i] != Idle) { 453 if (fetchStatus[i] == Blocked && stalls[i].drain) 454 continue; 455 else 456 return false; 457 } 458 } 459 460 /* The pipeline might start up again in the middle of the drain 461 * cycle if the finish translation event is scheduled, so make 462 * sure that's not the case. 463 */ 464 return !finishTranslationEvent.scheduled(); 465} 466 467template <class Impl> 468void 469DefaultFetch<Impl>::takeOverFrom() 470{ 471 assert(cpu->getInstPort().isConnected()); 472 resetStage(); 473 474} 475 476template <class Impl> 477void 478DefaultFetch<Impl>::drainStall(ThreadID tid) 479{ 480 assert(cpu->isDraining()); 481 assert(!stalls[tid].drain); 482 DPRINTF(Drain, "%i: Thread drained.\n", tid); 483 stalls[tid].drain = true; 484} 485 486template <class Impl> 487void 488DefaultFetch<Impl>::wakeFromQuiesce() 489{ 490 DPRINTF(Fetch, "Waking up from quiesce\n"); 491 // Hopefully this is safe 492 // @todo: Allow other threads to wake from quiesce. 493 fetchStatus[0] = Running; 494} 495 496template <class Impl> 497inline void 498DefaultFetch<Impl>::switchToActive() 499{ 500 if (_status == Inactive) { 501 DPRINTF(Activity, "Activating stage.\n"); 502 503 cpu->activateStage(O3CPU::FetchIdx); 504 505 _status = Active; 506 } 507} 508 509template <class Impl> 510inline void 511DefaultFetch<Impl>::switchToInactive() 512{ 513 if (_status == Active) { 514 DPRINTF(Activity, "Deactivating stage.\n"); 515 516 cpu->deactivateStage(O3CPU::FetchIdx); 517 518 _status = Inactive; 519 } 520} 521 522template <class Impl> 523void 524DefaultFetch<Impl>::deactivateThread(ThreadID tid) 525{ 526 // Update priority list 527 auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid); 528 if (thread_it != priorityList.end()) { 529 priorityList.erase(thread_it); 530 } 531} 532 533template <class Impl> 534bool 535DefaultFetch<Impl>::lookupAndUpdateNextPC( 536 const DynInstPtr &inst, TheISA::PCState &nextPC) 537{ 538 // Do branch prediction check here. 539 // A bit of a misnomer...next_PC is actually the current PC until 540 // this function updates it. 541 bool predict_taken; 542 543 if (!inst->isControl()) { 544 TheISA::advancePC(nextPC, inst->staticInst); 545 inst->setPredTarg(nextPC); 546 inst->setPredTaken(false); 547 return false; 548 } 549 550 ThreadID tid = inst->threadNumber; 551 predict_taken = branchPred->predict(inst->staticInst, inst->seqNum, 552 nextPC, tid); 553 554 if (predict_taken) { 555 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", 556 tid, inst->seqNum, nextPC); 557 } else { 558 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", 559 tid, inst->seqNum); 560 } 561 562 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n", 563 tid, inst->seqNum, nextPC); 564 inst->setPredTarg(nextPC); 565 inst->setPredTaken(predict_taken); 566 567 ++fetchedBranches; 568 569 if (predict_taken) { 570 ++predictedBranches; 571 } 572 573 return predict_taken; 574} 575 576template <class Impl> 577bool 578DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) 579{ 580 Fault fault = NoFault; 581 582 assert(!cpu->switchedOut()); 583 584 // @todo: not sure if these should block translation. 585 //AlphaDep 586 if (cacheBlocked) { 587 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 588 tid); 589 return false; 590 } else if (checkInterrupt(pc) && !delayedCommit[tid]) { 591 // Hold off fetch from getting new instructions when: 592 // Cache is blocked, or 593 // while an interrupt is pending and we're not in PAL mode, or 594 // fetch is switched out. 595 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 596 tid); 597 return false; 598 } 599 600 // Align the fetch address to the start of a fetch buffer segment. 601 Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr); 602 603 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", 604 tid, fetchBufferBlockPC, vaddr); 605 606 // Setup the memReq to do a read of the first instruction's address. 607 // Set the appropriate read size and flags as well. 608 // Build request here. 609 RequestPtr mem_req = std::make_shared<Request>( 610 tid, fetchBufferBlockPC, fetchBufferSize, 611 Request::INST_FETCH, cpu->instMasterId(), pc, 612 cpu->thread[tid]->contextId()); 613 614 mem_req->taskId(cpu->taskId()); 615 616 memReq[tid] = mem_req; 617 618 // Initiate translation of the icache block 619 fetchStatus[tid] = ItlbWait; 620 FetchTranslation *trans = new FetchTranslation(this); 621 cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), 622 trans, BaseTLB::Execute); 623 return true; 624} 625 626template <class Impl> 627void 628DefaultFetch<Impl>::finishTranslation(const Fault &fault, 629 const RequestPtr &mem_req) 630{ 631 ThreadID tid = cpu->contextToThread(mem_req->contextId()); 632 Addr fetchBufferBlockPC = mem_req->getVaddr(); 633 634 assert(!cpu->switchedOut()); 635 636 // Wake up CPU if it was idle 637 cpu->wakeCPU(); 638 639 if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] || 640 mem_req->getVaddr() != memReq[tid]->getVaddr()) { 641 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", 642 tid); 643 ++fetchTlbSquashes; 644 return; 645 } 646 647 648 // If translation was successful, attempt to read the icache block. 649 if (fault == NoFault) { 650 // Check that we're not going off into random memory 651 // If we have, just wait around for commit to squash something and put 652 // us on the right track 653 if (!cpu->system->isMemAddr(mem_req->getPaddr())) { 654 warn("Address %#x is outside of physical memory, stopping fetch\n", 655 mem_req->getPaddr()); 656 fetchStatus[tid] = NoGoodAddr; 657 memReq[tid] = NULL; 658 return; 659 } 660 661 // Build packet here. 662 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); 663 data_pkt->dataDynamic(new uint8_t[fetchBufferSize]); 664 665 fetchBufferPC[tid] = fetchBufferBlockPC; 666 fetchBufferValid[tid] = false; 667 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 668 669 fetchedCacheLines++; 670 671 // Access the cache. 672 if (!cpu->getInstPort().sendTimingReq(data_pkt)) { 673 assert(retryPkt == NULL); 674 assert(retryTid == InvalidThreadID); 675 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 676 677 fetchStatus[tid] = IcacheWaitRetry; 678 retryPkt = data_pkt; 679 retryTid = tid; 680 cacheBlocked = true; 681 } else { 682 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); 683 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 684 "response.\n", tid); 685 lastIcacheStall[tid] = curTick(); 686 fetchStatus[tid] = IcacheWaitResponse; 687 // Notify Fetch Request probe when a packet containing a fetch 688 // request is successfully sent 689 ppFetchRequestSent->notify(mem_req); 690 } 691 } else { 692 // Don't send an instruction to decode if we can't handle it. 693 if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) { 694 assert(!finishTranslationEvent.scheduled()); 695 finishTranslationEvent.setFault(fault); 696 finishTranslationEvent.setReq(mem_req); 697 cpu->schedule(finishTranslationEvent, 698 cpu->clockEdge(Cycles(1))); 699 return; 700 } 701 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", 702 tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); 703 // Translation faulted, icache request won't be sent. 704 memReq[tid] = NULL; 705 706 // Send the fault to commit. This thread will not do anything 707 // until commit handles the fault. The only other way it can 708 // wake up is if a squash comes along and changes the PC. 709 TheISA::PCState fetchPC = pc[tid]; 710 711 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid); 712 // We will use a nop in ordier to carry the fault. 713 DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr, 714 NULL, fetchPC, fetchPC, false); 715 instruction->setNotAnInst(); 716 717 instruction->setPredTarg(fetchPC); 718 instruction->fault = fault; 719 wroteToTimeBuffer = true; 720 721 DPRINTF(Activity, "Activity this cycle.\n"); 722 cpu->activityThisCycle(); 723 724 fetchStatus[tid] = TrapPending; 725 726 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); 727 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", 728 tid, fault->name(), pc[tid]); 729 } 730 _status = updateFetchStatus(); 731} 732 733template <class Impl> 734inline void 735DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, 736 const DynInstPtr squashInst, ThreadID tid) 737{ 738 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n", 739 tid, newPC); 740 741 pc[tid] = newPC; 742 fetchOffset[tid] = 0; 743 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) 744 macroop[tid] = squashInst->macroop; 745 else 746 macroop[tid] = NULL; 747 decoder[tid]->reset(); 748 749 // Clear the icache miss if it's outstanding. 750 if (fetchStatus[tid] == IcacheWaitResponse) { 751 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 752 tid); 753 memReq[tid] = NULL; 754 } else if (fetchStatus[tid] == ItlbWait) { 755 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n", 756 tid); 757 memReq[tid] = NULL; 758 } 759 760 // Get rid of the retrying packet if it was from this thread. 761 if (retryTid == tid) { 762 assert(cacheBlocked); 763 if (retryPkt) { 764 delete retryPkt; 765 } 766 retryPkt = NULL; 767 retryTid = InvalidThreadID; 768 } 769 770 fetchStatus[tid] = Squashing; 771 772 // Empty fetch queue 773 fetchQueue[tid].clear(); 774 775 // microops are being squashed, it is not known wheather the 776 // youngest non-squashed microop was marked delayed commit 777 // or not. Setting the flag to true ensures that the 778 // interrupts are not handled when they cannot be, though 779 // some opportunities to handle interrupts may be missed. 780 delayedCommit[tid] = true; 781 782 ++fetchSquashCycles; 783} 784 785template<class Impl> 786void 787DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, 788 const DynInstPtr squashInst, 789 const InstSeqNum seq_num, ThreadID tid) 790{ 791 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid); 792 793 doSquash(newPC, squashInst, tid); 794 795 // Tell the CPU to remove any instructions that are in flight between 796 // fetch and decode. 797 cpu->removeInstsUntil(seq_num, tid); 798} 799 800template<class Impl> 801bool 802DefaultFetch<Impl>::checkStall(ThreadID tid) const 803{ 804 bool ret_val = false; 805 806 if (stalls[tid].drain) { 807 assert(cpu->isDraining()); 808 DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid); 809 ret_val = true; 810 } 811 812 return ret_val; 813} 814 815template<class Impl> 816typename DefaultFetch<Impl>::FetchStatus 817DefaultFetch<Impl>::updateFetchStatus() 818{ 819 //Check Running 820 list<ThreadID>::iterator threads = activeThreads->begin(); 821 list<ThreadID>::iterator end = activeThreads->end(); 822 823 while (threads != end) { 824 ThreadID tid = *threads++; 825 826 if (fetchStatus[tid] == Running || 827 fetchStatus[tid] == Squashing || 828 fetchStatus[tid] == IcacheAccessComplete) { 829 830 if (_status == Inactive) { 831 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 832 833 if (fetchStatus[tid] == IcacheAccessComplete) { 834 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 835 "completion\n",tid); 836 } 837 838 cpu->activateStage(O3CPU::FetchIdx); 839 } 840 841 return Active; 842 } 843 } 844 845 // Stage is switching from active to inactive, notify CPU of it. 846 if (_status == Active) { 847 DPRINTF(Activity, "Deactivating stage.\n"); 848 849 cpu->deactivateStage(O3CPU::FetchIdx); 850 } 851 852 return Inactive; 853} 854 855template <class Impl> 856void 857DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, 858 const InstSeqNum seq_num, DynInstPtr squashInst, 859 ThreadID tid) 860{ 861 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); 862 863 doSquash(newPC, squashInst, tid); 864 865 // Tell the CPU to remove any instructions that are not in the ROB. 866 cpu->removeInstsNotInROB(tid); 867} 868 869template <class Impl> 870void 871DefaultFetch<Impl>::tick() 872{ 873 list<ThreadID>::iterator threads = activeThreads->begin(); 874 list<ThreadID>::iterator end = activeThreads->end(); 875 bool status_change = false; 876 877 wroteToTimeBuffer = false; 878 879 for (ThreadID i = 0; i < numThreads; ++i) { 880 issuePipelinedIfetch[i] = false; 881 } 882 883 while (threads != end) { 884 ThreadID tid = *threads++; 885 886 // Check the signals for each thread to determine the proper status 887 // for each thread. 888 bool updated_status = checkSignalsAndUpdate(tid); 889 status_change = status_change || updated_status; 890 } 891 892 DPRINTF(Fetch, "Running stage.\n"); 893 894 if (FullSystem) { 895 if (fromCommit->commitInfo[0].interruptPending) { 896 interruptPending = true; 897 } 898 899 if (fromCommit->commitInfo[0].clearInterrupt) { 900 interruptPending = false; 901 } 902 } 903 904 for (threadFetched = 0; threadFetched < numFetchingThreads; 905 threadFetched++) { 906 // Fetch each of the actively fetching threads. 907 fetch(status_change); 908 } 909 910 // Record number of instructions fetched this cycle for distribution. 911 fetchNisnDist.sample(numInst); 912 913 if (status_change) { 914 // Change the fetch stage status if there was a status change. 915 _status = updateFetchStatus(); 916 } 917 918 // Issue the next I-cache request if possible. 919 for (ThreadID i = 0; i < numThreads; ++i) { 920 if (issuePipelinedIfetch[i]) { 921 pipelineIcacheAccesses(i); 922 } 923 } 924 925 // Send instructions enqueued into the fetch queue to decode. 926 // Limit rate by fetchWidth. Stall if decode is stalled. 927 unsigned insts_to_decode = 0; 928 unsigned available_insts = 0; 929 930 for (auto tid : *activeThreads) { 931 if (!stalls[tid].decode) { 932 available_insts += fetchQueue[tid].size(); 933 } 934 } 935 936 // Pick a random thread to start trying to grab instructions from 937 auto tid_itr = activeThreads->begin(); 938 std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1)); 939 940 while (available_insts != 0 && insts_to_decode < decodeWidth) { 941 ThreadID tid = *tid_itr; 942 if (!stalls[tid].decode && !fetchQueue[tid].empty()) { 943 const auto& inst = fetchQueue[tid].front(); 944 toDecode->insts[toDecode->size++] = inst; 945 DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from " 946 "fetch queue. Fetch queue size: %i.\n", 947 tid, inst->seqNum, fetchQueue[tid].size()); 948 949 wroteToTimeBuffer = true; 950 fetchQueue[tid].pop_front(); 951 insts_to_decode++; 952 available_insts--; 953 } 954 955 tid_itr++; 956 // Wrap around if at end of active threads list 957 if (tid_itr == activeThreads->end()) 958 tid_itr = activeThreads->begin(); 959 } 960 961 // If there was activity this cycle, inform the CPU of it. 962 if (wroteToTimeBuffer) { 963 DPRINTF(Activity, "Activity this cycle.\n"); 964 cpu->activityThisCycle(); 965 } 966 967 // Reset the number of the instruction we've fetched. 968 numInst = 0; 969} 970 971template <class Impl> 972bool 973DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) 974{ 975 // Update the per thread stall statuses. 976 if (fromDecode->decodeBlock[tid]) { 977 stalls[tid].decode = true; 978 } 979 980 if (fromDecode->decodeUnblock[tid]) { 981 assert(stalls[tid].decode); 982 assert(!fromDecode->decodeBlock[tid]); 983 stalls[tid].decode = false; 984 } 985 986 // Check squash signals from commit. 987 if (fromCommit->commitInfo[tid].squash) { 988 989 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 990 "from commit.\n",tid); 991 // In any case, squash. 992 squash(fromCommit->commitInfo[tid].pc, 993 fromCommit->commitInfo[tid].doneSeqNum, 994 fromCommit->commitInfo[tid].squashInst, tid); 995 996 // If it was a branch mispredict on a control instruction, update the 997 // branch predictor with that instruction, otherwise just kill the 998 // invalid state we generated in after sequence number 999 if (fromCommit->commitInfo[tid].mispredictInst && 1000 fromCommit->commitInfo[tid].mispredictInst->isControl()) { 1001 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, 1002 fromCommit->commitInfo[tid].pc, 1003 fromCommit->commitInfo[tid].branchTaken, 1004 tid); 1005 } else { 1006 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, 1007 tid); 1008 } 1009 1010 return true; 1011 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 1012 // Update the branch predictor if it wasn't a squashed instruction 1013 // that was broadcasted. 1014 branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid); 1015 } 1016 1017 // Check squash signals from decode. 1018 if (fromDecode->decodeInfo[tid].squash) { 1019 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 1020 "from decode.\n",tid); 1021 1022 // Update the branch predictor. 1023 if (fromDecode->decodeInfo[tid].branchMispredict) { 1024 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, 1025 fromDecode->decodeInfo[tid].nextPC, 1026 fromDecode->decodeInfo[tid].branchTaken, 1027 tid); 1028 } else { 1029 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, 1030 tid); 1031 } 1032 1033 if (fetchStatus[tid] != Squashing) { 1034 1035 DPRINTF(Fetch, "Squashing from decode with PC = %s\n", 1036 fromDecode->decodeInfo[tid].nextPC); 1037 // Squash unless we're already squashing 1038 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 1039 fromDecode->decodeInfo[tid].squashInst, 1040 fromDecode->decodeInfo[tid].doneSeqNum, 1041 tid); 1042 1043 return true; 1044 } 1045 } 1046 1047 if (checkStall(tid) && 1048 fetchStatus[tid] != IcacheWaitResponse && 1049 fetchStatus[tid] != IcacheWaitRetry && 1050 fetchStatus[tid] != ItlbWait && 1051 fetchStatus[tid] != QuiescePending) { 1052 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 1053 1054 fetchStatus[tid] = Blocked; 1055 1056 return true; 1057 } 1058 1059 if (fetchStatus[tid] == Blocked || 1060 fetchStatus[tid] == Squashing) { 1061 // Switch status to running if fetch isn't being told to block or 1062 // squash this cycle. 1063 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 1064 tid); 1065 1066 fetchStatus[tid] = Running; 1067 1068 return true; 1069 } 1070 1071 // If we've reached this point, we have not gotten any signals that 1072 // cause fetch to change its status. Fetch remains the same as before. 1073 return false; 1074} 1075 1076template<class Impl> 1077typename Impl::DynInstPtr 1078DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, 1079 StaticInstPtr curMacroop, TheISA::PCState thisPC, 1080 TheISA::PCState nextPC, bool trace) 1081{ 1082 // Get a sequence number. 1083 InstSeqNum seq = cpu->getAndIncrementInstSeq(); 1084 1085 // Create a new DynInst from the instruction fetched. 1086 DynInstPtr instruction = 1087 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); 1088 instruction->setTid(tid); 1089 1090 instruction->setASID(tid); 1091 1092 instruction->setThreadState(cpu->thread[tid]); 1093 1094 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " 1095 "[sn:%lli].\n", tid, thisPC.instAddr(), 1096 thisPC.microPC(), seq); 1097 1098 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, 1099 instruction->staticInst-> 1100 disassemble(thisPC.instAddr())); 1101 1102#if TRACING_ON 1103 if (trace) { 1104 instruction->traceData = 1105 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), 1106 instruction->staticInst, thisPC, curMacroop); 1107 } 1108#else 1109 instruction->traceData = NULL; 1110#endif 1111 1112 // Add instruction to the CPU's list of instructions. 1113 instruction->setInstListIt(cpu->addInst(instruction)); 1114 1115 // Write the instruction to the first slot in the queue 1116 // that heads to decode. 1117 assert(numInst < fetchWidth); 1118 fetchQueue[tid].push_back(instruction); 1119 assert(fetchQueue[tid].size() <= fetchQueueSize); 1120 DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n", 1121 tid, fetchQueue[tid].size(), fetchQueueSize); 1122 //toDecode->insts[toDecode->size++] = instruction; 1123 1124 // Keep track of if we can take an interrupt at this boundary 1125 delayedCommit[tid] = instruction->isDelayedCommit(); 1126 1127 return instruction; 1128} 1129 1130template<class Impl> 1131void 1132DefaultFetch<Impl>::fetch(bool &status_change) 1133{ 1134 ////////////////////////////////////////// 1135 // Start actual fetch 1136 ////////////////////////////////////////// 1137 ThreadID tid = getFetchingThread(); 1138 1139 assert(!cpu->switchedOut()); 1140 1141 if (tid == InvalidThreadID) { 1142 // Breaks looping condition in tick() 1143 threadFetched = numFetchingThreads; 1144 1145 if (numThreads == 1) { // @todo Per-thread stats 1146 profileStall(0); 1147 } 1148 1149 return; 1150 } 1151 1152 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1153 1154 // The current PC. 1155 TheISA::PCState thisPC = pc[tid]; 1156 1157 Addr pcOffset = fetchOffset[tid]; 1158 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1159 1160 bool inRom = isRomMicroPC(thisPC.microPC()); 1161 1162 // If returning from the delay of a cache miss, then update the status 1163 // to running, otherwise do the cache access. Possibly move this up 1164 // to tick() function. 1165 if (fetchStatus[tid] == IcacheAccessComplete) { 1166 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); 1167 1168 fetchStatus[tid] = Running; 1169 status_change = true; 1170 } else if (fetchStatus[tid] == Running) { 1171 // Align the fetch PC so its at the start of a fetch buffer segment. 1172 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); 1173 1174 // If buffer is no longer valid or fetchAddr has moved to point 1175 // to the next cache block, AND we have no remaining ucode 1176 // from a macro-op, then start fetch from icache. 1177 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid]) 1178 && !inRom && !macroop[tid]) { 1179 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1180 "instruction, starting at PC %s.\n", tid, thisPC); 1181 1182 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1183 1184 if (fetchStatus[tid] == IcacheWaitResponse) 1185 ++icacheStallCycles; 1186 else if (fetchStatus[tid] == ItlbWait) 1187 ++fetchTlbCycles; 1188 else 1189 ++fetchMiscStallCycles; 1190 return; 1191 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) { 1192 // Stall CPU if an interrupt is posted and we're not issuing 1193 // an delayed commit micro-op currently (delayed commit instructions 1194 // are not interruptable by interrupts, only faults) 1195 ++fetchMiscStallCycles; 1196 DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid); 1197 return; 1198 } 1199 } else { 1200 if (fetchStatus[tid] == Idle) { 1201 ++fetchIdleCycles; 1202 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1203 } 1204 1205 // Status is Idle, so fetch should do nothing. 1206 return; 1207 } 1208 1209 ++fetchCycles; 1210 1211 TheISA::PCState nextPC = thisPC; 1212 1213 StaticInstPtr staticInst = NULL; 1214 StaticInstPtr curMacroop = macroop[tid]; 1215 1216 // If the read of the first instruction was successful, then grab the 1217 // instructions from the rest of the cache line and put them into the 1218 // queue heading to decode. 1219 1220 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1221 "decode.\n", tid); 1222 1223 // Need to keep track of whether or not a predicted branch 1224 // ended this fetch block. 1225 bool predictedBranch = false; 1226 1227 // Need to halt fetch if quiesce instruction detected 1228 bool quiesce = false; 1229 1230 TheISA::MachInst *cacheInsts = 1231 reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]); 1232 1233 const unsigned numInsts = fetchBufferSize / instSize; 1234 unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; 1235 1236 // Loop through instruction memory from the cache. 1237 // Keep issuing while fetchWidth is available and branch is not 1238 // predicted taken 1239 while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize 1240 && !predictedBranch && !quiesce) { 1241 // We need to process more memory if we aren't going to get a 1242 // StaticInst from the rom, the current macroop, or what's already 1243 // in the decoder. 1244 bool needMem = !inRom && !curMacroop && 1245 !decoder[tid]->instReady(); 1246 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1247 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); 1248 1249 if (needMem) { 1250 // If buffer is no longer valid or fetchAddr has moved to point 1251 // to the next cache block then start fetch from icache. 1252 if (!fetchBufferValid[tid] || 1253 fetchBufferBlockPC != fetchBufferPC[tid]) 1254 break; 1255 1256 if (blkOffset >= numInsts) { 1257 // We need to process more memory, but we've run out of the 1258 // current block. 1259 break; 1260 } 1261 1262 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); 1263 decoder[tid]->moreBytes(thisPC, fetchAddr, inst); 1264 1265 if (decoder[tid]->needMoreBytes()) { 1266 blkOffset++; 1267 fetchAddr += instSize; 1268 pcOffset += instSize; 1269 } 1270 } 1271 1272 // Extract as many instructions and/or microops as we can from 1273 // the memory we've processed so far. 1274 do { 1275 if (!(curMacroop || inRom)) { 1276 if (decoder[tid]->instReady()) { 1277 staticInst = decoder[tid]->decode(thisPC); 1278 1279 // Increment stat of fetched instructions. 1280 ++fetchedInsts; 1281 1282 if (staticInst->isMacroop()) { 1283 curMacroop = staticInst; 1284 } else { 1285 pcOffset = 0; 1286 } 1287 } else { 1288 // We need more bytes for this instruction so blkOffset and 1289 // pcOffset will be updated 1290 break; 1291 } 1292 } 1293 // Whether we're moving to a new macroop because we're at the 1294 // end of the current one, or the branch predictor incorrectly 1295 // thinks we are... 1296 bool newMacro = false; 1297 if (curMacroop || inRom) { 1298 if (inRom) { 1299 staticInst = cpu->microcodeRom.fetchMicroop( 1300 thisPC.microPC(), curMacroop); 1301 } else { 1302 staticInst = curMacroop->fetchMicroop(thisPC.microPC()); 1303 } 1304 newMacro |= staticInst->isLastMicroop(); 1305 } 1306 1307 DynInstPtr instruction = 1308 buildInst(tid, staticInst, curMacroop, 1309 thisPC, nextPC, true); 1310 1311 ppFetch->notify(instruction); 1312 numInst++; 1313 1314#if TRACING_ON 1315 if (DTRACE(O3PipeView)) { 1316 instruction->fetchTick = curTick(); 1317 } 1318#endif 1319 1320 nextPC = thisPC; 1321 1322 // If we're branching after this instruction, quit fetching 1323 // from the same block. 1324 predictedBranch |= thisPC.branching(); 1325 predictedBranch |= 1326 lookupAndUpdateNextPC(instruction, nextPC); 1327 if (predictedBranch) { 1328 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); 1329 } 1330 1331 newMacro |= thisPC.instAddr() != nextPC.instAddr(); 1332 1333 // Move to the next instruction, unless we have a branch. 1334 thisPC = nextPC; 1335 inRom = isRomMicroPC(thisPC.microPC()); 1336 1337 if (newMacro) { 1338 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; 1339 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; 1340 pcOffset = 0; 1341 curMacroop = NULL; 1342 } 1343 1344 if (instruction->isQuiesce()) { 1345 DPRINTF(Fetch, 1346 "Quiesce instruction encountered, halting fetch!\n"); 1347 fetchStatus[tid] = QuiescePending; 1348 status_change = true; 1349 quiesce = true; 1350 break; 1351 } 1352 } while ((curMacroop || decoder[tid]->instReady()) && 1353 numInst < fetchWidth && 1354 fetchQueue[tid].size() < fetchQueueSize); 1355 1356 // Re-evaluate whether the next instruction to fetch is in micro-op ROM 1357 // or not. 1358 inRom = isRomMicroPC(thisPC.microPC()); 1359 } 1360 1361 if (predictedBranch) { 1362 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1363 "instruction encountered.\n", tid); 1364 } else if (numInst >= fetchWidth) { 1365 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1366 "for this cycle.\n", tid); 1367 } else if (blkOffset >= fetchBufferSize) { 1368 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the" 1369 "fetch buffer.\n", tid); 1370 } 1371 1372 macroop[tid] = curMacroop; 1373 fetchOffset[tid] = pcOffset; 1374 1375 if (numInst > 0) { 1376 wroteToTimeBuffer = true; 1377 } 1378 1379 pc[tid] = thisPC; 1380 1381 // pipeline a fetch if we're crossing a fetch buffer boundary and not in 1382 // a state that would preclude fetching 1383 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1384 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); 1385 issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] && 1386 fetchStatus[tid] != IcacheWaitResponse && 1387 fetchStatus[tid] != ItlbWait && 1388 fetchStatus[tid] != IcacheWaitRetry && 1389 fetchStatus[tid] != QuiescePending && 1390 !curMacroop; 1391} 1392 1393template<class Impl> 1394void 1395DefaultFetch<Impl>::recvReqRetry() 1396{ 1397 if (retryPkt != NULL) { 1398 assert(cacheBlocked); 1399 assert(retryTid != InvalidThreadID); 1400 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1401 1402 if (cpu->getInstPort().sendTimingReq(retryPkt)) { 1403 fetchStatus[retryTid] = IcacheWaitResponse; 1404 // Notify Fetch Request probe when a retryPkt is successfully sent. 1405 // Note that notify must be called before retryPkt is set to NULL. 1406 ppFetchRequestSent->notify(retryPkt->req); 1407 retryPkt = NULL; 1408 retryTid = InvalidThreadID; 1409 cacheBlocked = false; 1410 } 1411 } else { 1412 assert(retryTid == InvalidThreadID); 1413 // Access has been squashed since it was sent out. Just clear 1414 // the cache being blocked. 1415 cacheBlocked = false; 1416 } 1417} 1418 1419/////////////////////////////////////// 1420// // 1421// SMT FETCH POLICY MAINTAINED HERE // 1422// // 1423/////////////////////////////////////// 1424template<class Impl> 1425ThreadID 1426DefaultFetch<Impl>::getFetchingThread() 1427{ 1428 if (numThreads > 1) { 1429 switch (fetchPolicy) { 1430 case FetchPolicy::RoundRobin: 1431 return roundRobin(); 1432 case FetchPolicy::IQCount: 1433 return iqCount(); 1434 case FetchPolicy::LSQCount: 1435 return lsqCount(); 1436 case FetchPolicy::Branch: 1437 return branchCount(); 1438 default: 1439 return InvalidThreadID; 1440 } 1441 } else { 1442 list<ThreadID>::iterator thread = activeThreads->begin(); 1443 if (thread == activeThreads->end()) { 1444 return InvalidThreadID; 1445 } 1446 1447 ThreadID tid = *thread; 1448 1449 if (fetchStatus[tid] == Running || 1450 fetchStatus[tid] == IcacheAccessComplete || 1451 fetchStatus[tid] == Idle) { 1452 return tid; 1453 } else { 1454 return InvalidThreadID; 1455 } 1456 } 1457} 1458 1459 1460template<class Impl> 1461ThreadID 1462DefaultFetch<Impl>::roundRobin() 1463{ 1464 list<ThreadID>::iterator pri_iter = priorityList.begin(); 1465 list<ThreadID>::iterator end = priorityList.end(); 1466 1467 ThreadID high_pri; 1468 1469 while (pri_iter != end) { 1470 high_pri = *pri_iter; 1471 1472 assert(high_pri <= numThreads); 1473 1474 if (fetchStatus[high_pri] == Running || 1475 fetchStatus[high_pri] == IcacheAccessComplete || 1476 fetchStatus[high_pri] == Idle) { 1477 1478 priorityList.erase(pri_iter); 1479 priorityList.push_back(high_pri); 1480 1481 return high_pri; 1482 } 1483 1484 pri_iter++; 1485 } 1486 1487 return InvalidThreadID; 1488} 1489 1490template<class Impl> 1491ThreadID 1492DefaultFetch<Impl>::iqCount() 1493{ 1494 //sorted from lowest->highest 1495 std::priority_queue<unsigned,vector<unsigned>, 1496 std::greater<unsigned> > PQ; 1497 std::map<unsigned, ThreadID> threadMap; 1498 1499 list<ThreadID>::iterator threads = activeThreads->begin(); 1500 list<ThreadID>::iterator end = activeThreads->end(); 1501 1502 while (threads != end) { 1503 ThreadID tid = *threads++; 1504 unsigned iqCount = fromIEW->iewInfo[tid].iqCount; 1505 1506 //we can potentially get tid collisions if two threads 1507 //have the same iqCount, but this should be rare. 1508 PQ.push(iqCount); 1509 threadMap[iqCount] = tid; 1510 } 1511 1512 while (!PQ.empty()) { 1513 ThreadID high_pri = threadMap[PQ.top()]; 1514 1515 if (fetchStatus[high_pri] == Running || 1516 fetchStatus[high_pri] == IcacheAccessComplete || 1517 fetchStatus[high_pri] == Idle) 1518 return high_pri; 1519 else 1520 PQ.pop(); 1521 1522 } 1523 1524 return InvalidThreadID; 1525} 1526 1527template<class Impl> 1528ThreadID 1529DefaultFetch<Impl>::lsqCount() 1530{ 1531 //sorted from lowest->highest 1532 std::priority_queue<unsigned,vector<unsigned>, 1533 std::greater<unsigned> > PQ; 1534 std::map<unsigned, ThreadID> threadMap; 1535 1536 list<ThreadID>::iterator threads = activeThreads->begin(); 1537 list<ThreadID>::iterator end = activeThreads->end(); 1538 1539 while (threads != end) { 1540 ThreadID tid = *threads++; 1541 unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; 1542 1543 //we can potentially get tid collisions if two threads 1544 //have the same iqCount, but this should be rare. 1545 PQ.push(ldstqCount); 1546 threadMap[ldstqCount] = tid; 1547 } 1548 1549 while (!PQ.empty()) { 1550 ThreadID high_pri = threadMap[PQ.top()]; 1551 1552 if (fetchStatus[high_pri] == Running || 1553 fetchStatus[high_pri] == IcacheAccessComplete || 1554 fetchStatus[high_pri] == Idle) 1555 return high_pri; 1556 else 1557 PQ.pop(); 1558 } 1559 1560 return InvalidThreadID; 1561} 1562 1563template<class Impl> 1564ThreadID 1565DefaultFetch<Impl>::branchCount() 1566{ 1567#if 0 1568 list<ThreadID>::iterator thread = activeThreads->begin(); 1569 assert(thread != activeThreads->end()); 1570 ThreadID tid = *thread; 1571#endif 1572 1573 panic("Branch Count Fetch policy unimplemented\n"); 1574 return InvalidThreadID; 1575} 1576 1577template<class Impl> 1578void 1579DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) 1580{ 1581 if (!issuePipelinedIfetch[tid]) { 1582 return; 1583 } 1584 1585 // The next PC to access. 1586 TheISA::PCState thisPC = pc[tid]; 1587 1588 if (isRomMicroPC(thisPC.microPC())) { 1589 return; 1590 } 1591 1592 Addr pcOffset = fetchOffset[tid]; 1593 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1594 1595 // Align the fetch PC so its at the start of a fetch buffer segment. 1596 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); 1597 1598 // Unless buffer already got the block, fetch it from icache. 1599 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) { 1600 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " 1601 "starting at PC %s.\n", tid, thisPC); 1602 1603 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1604 } 1605} 1606 1607template<class Impl> 1608void 1609DefaultFetch<Impl>::profileStall(ThreadID tid) { 1610 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1611 1612 // @todo Per-thread stats 1613 1614 if (stalls[tid].drain) { 1615 ++fetchPendingDrainCycles; 1616 DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); 1617 } else if (activeThreads->empty()) { 1618 ++fetchNoActiveThreadStallCycles; 1619 DPRINTF(Fetch, "Fetch has no active thread!\n"); 1620 } else if (fetchStatus[tid] == Blocked) { 1621 ++fetchBlockedCycles; 1622 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1623 } else if (fetchStatus[tid] == Squashing) { 1624 ++fetchSquashCycles; 1625 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1626 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1627 ++icacheStallCycles; 1628 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", 1629 tid); 1630 } else if (fetchStatus[tid] == ItlbWait) { 1631 ++fetchTlbCycles; 1632 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " 1633 "finish!\n", tid); 1634 } else if (fetchStatus[tid] == TrapPending) { 1635 ++fetchPendingTrapStallCycles; 1636 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", 1637 tid); 1638 } else if (fetchStatus[tid] == QuiescePending) { 1639 ++fetchPendingQuiesceStallCycles; 1640 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " 1641 "instruction!\n", tid); 1642 } else if (fetchStatus[tid] == IcacheWaitRetry) { 1643 ++fetchIcacheWaitRetryStallCycles; 1644 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", 1645 tid); 1646 } else if (fetchStatus[tid] == NoGoodAddr) { 1647 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", 1648 tid); 1649 } else { 1650 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", 1651 tid, fetchStatus[tid]); 1652 } 1653} 1654 1655#endif//__CPU_O3_FETCH_IMPL_HH__ 1656