Cross Reference: /gem5/src/cpu/o3/fetch

Deleted Added

sdiff udiff text old ( 8949:3fa1ee293096 ) new ( 8975:7f36d4436074 )

full compact

fetch_impl.hh (8949:3fa1ee293096)	fetch_impl.hh (8975:7f36d4436074)
1/* 2 * Copyright (c) 2010-2011 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 * Korey Sewell 42 / 43 44#include <algorithm> 45#include <cstring> 46#include <list> 47#include <map> 48#include <queue> 49 50#include "arch/isa_traits.hh" 51#include "arch/tlb.hh" 52#include "arch/utility.hh" 53#include "arch/vtophys.hh" 54#include "base/types.hh" 55#include "config/the_isa.hh" 56#include "cpu/base.hh" 57//#include "cpu/checker/cpu.hh" 58#include "cpu/o3/fetch.hh" 59#include "cpu/exetrace.hh" 60#include "debug/Activity.hh" 61#include "debug/Fetch.hh" 62#include "mem/packet.hh" 63#include "params/DerivO3CPU.hh" 64#include "sim/byteswap.hh" 65#include "sim/core.hh" 66#include "sim/eventq.hh" 67#include "sim/full_system.hh" 68#include "sim/system.hh" 69 70using namespace std; 71 72template<class Impl> 73DefaultFetch<Impl>::DefaultFetch(O3CPU _cpu, DerivO3CPUParams params) 74 : cpu(_cpu), 75 branchPred(params), 76 predecoder(NULL), 77 numInst(0), 78 decodeToFetchDelay(params->decodeToFetchDelay), 79 renameToFetchDelay(params->renameToFetchDelay), 80 iewToFetchDelay(params->iewToFetchDelay), 81 commitToFetchDelay(params->commitToFetchDelay), 82 fetchWidth(params->fetchWidth), 83 cacheBlocked(false), 84 retryPkt(NULL), 85 retryTid(InvalidThreadID), 86 numThreads(params->numThreads), 87 numFetchingThreads(params->smtNumFetchingThreads), 88 interruptPending(false), 89 drainPending(false), 90 switchedOut(false), 91 finishTranslationEvent(this) 92{ 93 if (numThreads > Impl::MaxThreads) 94 fatal("numThreads (%d) is larger than compiled limit (%d),\n" 95 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", 96 numThreads, static_cast<int>(Impl::MaxThreads)); 97 if (fetchWidth > Impl::MaxWidth) 98 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" 99 "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", 100* fetchWidth, static_cast<int>(Impl::MaxWidth)); 101 102 // Set fetch stage's status to inactive. 103 _status = Inactive; 104 105 std::string policy = params->smtFetchPolicy; 106 107 // Convert string to lowercase 108 std::transform(policy.begin(), policy.end(), policy.begin(), 109 (int()(int)) tolower); 110* 111 // Figure out fetch policy 112 if (policy == "singlethread") { 113 fetchPolicy = SingleThread; 114 if (numThreads > 1) 115 panic("Invalid Fetch Policy for a SMT workload."); 116 } else if (policy == "roundrobin") { 117 fetchPolicy = RoundRobin; 118 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 119 } else if (policy == "branch") { 120 fetchPolicy = Branch; 121 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 122 } else if (policy == "iqcount") { 123 fetchPolicy = IQ; 124 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 125 } else if (policy == "lsqcount") { 126 fetchPolicy = LSQ; 127 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 128 } else { 129 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 130 " RoundRobin,LSQcount,IQcount}\n"); 131 } 132 133 // Get the size of an instruction. 134 instSize = sizeof(TheISA::MachInst); 135} 136 137template <class Impl> 138std::string 139DefaultFetch<Impl>::name() const 140{ 141 return cpu->name() + ".fetch"; 142} 143 144template <class Impl> 145void 146DefaultFetch<Impl>::regStats() 147{ 148 icacheStallCycles 149 .name(name() + ".icacheStallCycles") 150 .desc("Number of cycles fetch is stalled on an Icache miss") 151 .prereq(icacheStallCycles); 152 153 fetchedInsts 154 .name(name() + ".Insts") 155 .desc("Number of instructions fetch has processed") 156 .prereq(fetchedInsts); 157 158 fetchedBranches 159 .name(name() + ".Branches") 160 .desc("Number of branches that fetch encountered") 161 .prereq(fetchedBranches); 162 163 predictedBranches 164 .name(name() + ".predictedBranches") 165 .desc("Number of branches that fetch has predicted taken") 166 .prereq(predictedBranches); 167 168 fetchCycles 169 .name(name() + ".Cycles") 170 .desc("Number of cycles fetch has run and was not squashing or" 171 " blocked") 172 .prereq(fetchCycles); 173 174 fetchSquashCycles 175 .name(name() + ".SquashCycles") 176 .desc("Number of cycles fetch has spent squashing") 177 .prereq(fetchSquashCycles); 178 179 fetchTlbCycles 180 .name(name() + ".TlbCycles") 181 .desc("Number of cycles fetch has spent waiting for tlb") 182 .prereq(fetchTlbCycles); 183 184 fetchIdleCycles 185 .name(name() + ".IdleCycles") 186 .desc("Number of cycles fetch was idle") 187 .prereq(fetchIdleCycles); 188 189 fetchBlockedCycles 190 .name(name() + ".BlockedCycles") 191 .desc("Number of cycles fetch has spent blocked") 192 .prereq(fetchBlockedCycles); 193 194 fetchedCacheLines 195 .name(name() + ".CacheLines") 196 .desc("Number of cache lines fetched") 197 .prereq(fetchedCacheLines); 198 199 fetchMiscStallCycles 200 .name(name() + ".MiscStallCycles") 201 .desc("Number of cycles fetch has spent waiting on interrupts, or " 202 "bad addresses, or out of MSHRs") 203 .prereq(fetchMiscStallCycles); 204 205 fetchPendingDrainCycles 206 .name(name() + ".PendingDrainCycles") 207 .desc("Number of cycles fetch has spent waiting on pipes to drain") 208 .prereq(fetchPendingDrainCycles); 209 210 fetchNoActiveThreadStallCycles 211 .name(name() + ".NoActiveThreadStallCycles") 212 .desc("Number of stall cycles due to no active thread to fetch from") 213 .prereq(fetchNoActiveThreadStallCycles); 214 215 fetchPendingTrapStallCycles 216 .name(name() + ".PendingTrapStallCycles") 217 .desc("Number of stall cycles due to pending traps") 218 .prereq(fetchPendingTrapStallCycles); 219 220 fetchPendingQuiesceStallCycles 221 .name(name() + ".PendingQuiesceStallCycles") 222 .desc("Number of stall cycles due to pending quiesce instructions") 223 .prereq(fetchPendingQuiesceStallCycles); 224 225 fetchIcacheWaitRetryStallCycles 226 .name(name() + ".IcacheWaitRetryStallCycles") 227 .desc("Number of stall cycles due to full MSHR") 228 .prereq(fetchIcacheWaitRetryStallCycles); 229 230 fetchIcacheSquashes 231 .name(name() + ".IcacheSquashes") 232 .desc("Number of outstanding Icache misses that were squashed") 233 .prereq(fetchIcacheSquashes); 234 235 fetchTlbSquashes 236 .name(name() + ".ItlbSquashes") 237 .desc("Number of outstanding ITLB misses that were squashed") 238 .prereq(fetchTlbSquashes); 239 240 fetchNisnDist 241 .init(/* base value / 0, 242* /* last value / fetchWidth, 243* /* bucket size / 1) 244* .name(name() + ".rateDist") 245 .desc("Number of instructions fetched each cycle (Total)") 246 .flags(Stats::pdf); 247 248 idleRate 249 .name(name() + ".idleRate") 250 .desc("Percent of cycles fetch was idle") 251 .prereq(idleRate); 252 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 253 254 branchRate 255 .name(name() + ".branchRate") 256 .desc("Number of branch fetches per cycle") 257 .flags(Stats::total); 258 branchRate = fetchedBranches / cpu->numCycles; 259 260 fetchRate 261 .name(name() + ".rate") 262 .desc("Number of inst fetches per cycle") 263 .flags(Stats::total); 264 fetchRate = fetchedInsts / cpu->numCycles; 265 266 branchPred.regStats(); 267} 268 269template<class Impl> 270void 271DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> time_buffer) 272{ 273* timeBuffer = time_buffer; 274 275 // Create wires to get information from proper places in time buffer. 276 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 277 fromRename = timeBuffer->getWire(-renameToFetchDelay); 278 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 279 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 280} 281 282template<class Impl> 283void 284DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> at_ptr) 285{ 286* activeThreads = at_ptr; 287} 288 289template<class Impl> 290void 291DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> fq_ptr) 292{ 293* fetchQueue = fq_ptr; 294 295 // Create wire to write information to proper place in fetch queue. 296 toDecode = fetchQueue->getWire(0); 297} 298 299template<class Impl> 300void 301DefaultFetch<Impl>::initStage() 302{ 303 // Setup PC and nextPC with initial state. 304 for (ThreadID tid = 0; tid < numThreads; tid++) { 305 pc[tid] = cpu->pcState(tid); 306 fetchOffset[tid] = 0; 307 macroop[tid] = NULL; 308 delayedCommit[tid] = false; 309 } 310 311 for (ThreadID tid = 0; tid < numThreads; tid++) { 312 313 fetchStatus[tid] = Running; 314 315 priorityList.push_back(tid); 316 317 memReq[tid] = NULL; 318 319 stalls[tid].decode = false; 320 stalls[tid].rename = false; 321 stalls[tid].iew = false; 322 stalls[tid].commit = false; 323 } 324 325 // Schedule fetch to get the correct PC from the CPU 326 // scheduleFetchStartupEvent(1); 327 328 // Fetch needs to start fetching instructions at the very beginning, 329 // so it must start up in active state. 330 switchToActive(); 331} 332 333template<class Impl> 334void 335DefaultFetch<Impl>::setIcache() 336{ 337 assert(cpu->getInstPort().isConnected()); 338 339 // Size of cache block. 340 cacheBlkSize = cpu->getInstPort().peerBlockSize(); 341 342 // Create mask to get rid of offset bits. 343 cacheBlkMask = (cacheBlkSize - 1); 344 345 for (ThreadID tid = 0; tid < numThreads; tid++) { 346 // Create space to store a cache line. 347 cacheData[tid] = new uint8_t[cacheBlkSize]; 348 cacheDataPC[tid] = 0; 349 cacheDataValid[tid] = false; 350 } 351} 352 353template<class Impl> 354void 355DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 356{ 357 ThreadID tid = pkt->req->threadId(); 358 359 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); 360 361 assert(!pkt->wasNacked()); 362 363 // Only change the status if it's still waiting on the icache access 364 // to return. 365 if (fetchStatus[tid] != IcacheWaitResponse \|\| 366 pkt->req != memReq[tid] \|\| 367 isSwitchedOut()) { 368 ++fetchIcacheSquashes; 369 delete pkt->req; 370 delete pkt; 371 return; 372 } 373 374 memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize); 375 cacheDataValid[tid] = true; 376 377 if (!drainPending) { 378 // Wake up the CPU (if it went to sleep and was waiting on 379 // this completion event). 380 cpu->wakeCPU(); 381 382 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 383 tid); 384 385 switchToActive(); 386 } 387 388 // Only switch to IcacheAccessComplete if we're not stalled as well. 389 if (checkStall(tid)) { 390 fetchStatus[tid] = Blocked; 391 } else { 392 fetchStatus[tid] = IcacheAccessComplete; 393 } 394 395 // Reset the mem req to NULL. 396 delete pkt->req; 397 delete pkt; 398 memReq[tid] = NULL; 399} 400 401template <class Impl> 402bool 403DefaultFetch<Impl>::drain() 404{ 405 // Fetch is ready to drain at any time. 406 cpu->signalDrained(); 407 drainPending = true; 408 return true; 409} 410 411template <class Impl> 412void 413DefaultFetch<Impl>::resume() 414{ 415 drainPending = false; 416} 417 418template <class Impl> 419void 420DefaultFetch<Impl>::switchOut() 421{ 422 switchedOut = true; 423 // Branch predictor needs to have its state cleared. 424 branchPred.switchOut(); 425} 426 427template <class Impl> 428void 429DefaultFetch<Impl>::takeOverFrom() 430{ 431 // the instruction port is now connected so we can get the block 432 // size 433 setIcache(); 434 435 // Reset all state 436 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 437 stalls[i].decode = 0; 438 stalls[i].rename = 0; 439 stalls[i].iew = 0; 440 stalls[i].commit = 0; 441 pc[i] = cpu->pcState(i); 442 fetchStatus[i] = Running; 443 } 444 numInst = 0; 445 wroteToTimeBuffer = false; 446 _status = Inactive; 447 switchedOut = false; 448 interruptPending = false; 449 branchPred.takeOverFrom(); 450} 451 452template <class Impl> 453void 454DefaultFetch<Impl>::wakeFromQuiesce() 455{ 456 DPRINTF(Fetch, "Waking up from quiesce\n"); 457 // Hopefully this is safe 458 // @todo: Allow other threads to wake from quiesce. 459 fetchStatus[0] = Running; 460} 461 462template <class Impl> 463inline void 464DefaultFetch<Impl>::switchToActive() 465{ 466 if (_status == Inactive) { 467 DPRINTF(Activity, "Activating stage.\n"); 468 469 cpu->activateStage(O3CPU::FetchIdx); 470 471 _status = Active; 472 } 473} 474 475template <class Impl> 476inline void 477DefaultFetch<Impl>::switchToInactive() 478{ 479 if (_status == Active) { 480 DPRINTF(Activity, "Deactivating stage.\n"); 481 482 cpu->deactivateStage(O3CPU::FetchIdx); 483 484 _status = Inactive; 485 } 486} 487 488template <class Impl> 489bool 490DefaultFetch<Impl>::lookupAndUpdateNextPC( 491 DynInstPtr &inst, TheISA::PCState &nextPC) 492{ 493 // Do branch prediction check here. 494 // A bit of a misnomer...next_PC is actually the current PC until 495 // this function updates it. 496 bool predict_taken; 497 498 if (!inst->isControl()) { 499 TheISA::advancePC(nextPC, inst->staticInst); 500 inst->setPredTarg(nextPC); 501 inst->setPredTaken(false); 502 return false; 503 } 504 505 ThreadID tid = inst->threadNumber; 506 predict_taken = branchPred.predict(inst, nextPC, tid); 507 508 if (predict_taken) { 509 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", 510 tid, inst->seqNum, nextPC); 511 } else { 512 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", 513 tid, inst->seqNum); 514 } 515 516 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n", 517 tid, inst->seqNum, nextPC); 518 inst->setPredTarg(nextPC); 519 inst->setPredTaken(predict_taken); 520 521 ++fetchedBranches; 522 523 if (predict_taken) { 524 ++predictedBranches; 525 } 526 527 return predict_taken; 528} 529 530template <class Impl> 531bool 532DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) 533{ 534 Fault fault = NoFault; 535 536 // @todo: not sure if these should block translation. 537 //AlphaDep 538 if (cacheBlocked) { 539 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 540 tid); 541 return false; 542 } else if (isSwitchedOut()) { 543 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", 544 tid); 545 return false; 546 } else if (checkInterrupt(pc) && !delayedCommit[tid]) { 547 // Hold off fetch from getting new instructions when: 548 // Cache is blocked, or 549 // while an interrupt is pending and we're not in PAL mode, or 550 // fetch is switched out. 551 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 552 tid); 553 return false; 554 } 555 556 // Align the fetch address so it's at the start of a cache block. 557 Addr block_PC = icacheBlockAlignPC(vaddr); 558 559 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", 560 tid, block_PC, vaddr); 561 562 // Setup the memReq to do a read of the first instruction's address. 563 // Set the appropriate read size and flags as well. 564 // Build request here. 565 RequestPtr mem_req = 566 new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, 567 cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid); 568 569 memReq[tid] = mem_req; 570 571 // Initiate translation of the icache block 572 fetchStatus[tid] = ItlbWait; 573 FetchTranslation trans = new FetchTranslation(this); 574* cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), 575 trans, BaseTLB::Execute); 576 return true; 577} 578 579template <class Impl> 580void 581DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) 582{ 583 ThreadID tid = mem_req->threadId(); 584 Addr block_PC = mem_req->getVaddr(); 585 586 // Wake up CPU if it was idle 587 cpu->wakeCPU(); 588 589 if (fetchStatus[tid] != ItlbWait \|\| mem_req != memReq[tid] \|\| 590 mem_req->getVaddr() != memReq[tid]->getVaddr() \|\| isSwitchedOut()) { 591 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", 592 tid); 593 ++fetchTlbSquashes; 594 delete mem_req; 595 return; 596 } 597 598 599 // If translation was successful, attempt to read the icache block. 600 if (fault == NoFault) { 601 // Check that we're not going off into random memory 602 // If we have, just wait around for commit to squash something and put 603 // us on the right track 604 if (!cpu->system->isMemAddr(mem_req->getPaddr())) { 605 warn("Address %#x is outside of physical memory, stopping fetch\n", 606 mem_req->getPaddr()); 607 fetchStatus[tid] = NoGoodAddr; 608 delete mem_req; 609 memReq[tid] = NULL; 610 return; 611 } 612 613 // Build packet here. 614 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); 615 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 616 617 cacheDataPC[tid] = block_PC; 618 cacheDataValid[tid] = false; 619 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 620 621 fetchedCacheLines++; 622 623 // Access the cache.	1/* 2 * Copyright (c) 2010-2011 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 * Korey Sewell 42 / 43 44#include <algorithm> 45#include <cstring> 46#include <list> 47#include <map> 48#include <queue> 49 50#include "arch/isa_traits.hh" 51#include "arch/tlb.hh" 52#include "arch/utility.hh" 53#include "arch/vtophys.hh" 54#include "base/types.hh" 55#include "config/the_isa.hh" 56#include "cpu/base.hh" 57//#include "cpu/checker/cpu.hh" 58#include "cpu/o3/fetch.hh" 59#include "cpu/exetrace.hh" 60#include "debug/Activity.hh" 61#include "debug/Fetch.hh" 62#include "mem/packet.hh" 63#include "params/DerivO3CPU.hh" 64#include "sim/byteswap.hh" 65#include "sim/core.hh" 66#include "sim/eventq.hh" 67#include "sim/full_system.hh" 68#include "sim/system.hh" 69 70using namespace std; 71 72template<class Impl> 73DefaultFetch<Impl>::DefaultFetch(O3CPU _cpu, DerivO3CPUParams params) 74 : cpu(_cpu), 75 branchPred(params), 76 predecoder(NULL), 77 numInst(0), 78 decodeToFetchDelay(params->decodeToFetchDelay), 79 renameToFetchDelay(params->renameToFetchDelay), 80 iewToFetchDelay(params->iewToFetchDelay), 81 commitToFetchDelay(params->commitToFetchDelay), 82 fetchWidth(params->fetchWidth), 83 cacheBlocked(false), 84 retryPkt(NULL), 85 retryTid(InvalidThreadID), 86 numThreads(params->numThreads), 87 numFetchingThreads(params->smtNumFetchingThreads), 88 interruptPending(false), 89 drainPending(false), 90 switchedOut(false), 91 finishTranslationEvent(this) 92{ 93 if (numThreads > Impl::MaxThreads) 94 fatal("numThreads (%d) is larger than compiled limit (%d),\n" 95 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", 96 numThreads, static_cast<int>(Impl::MaxThreads)); 97 if (fetchWidth > Impl::MaxWidth) 98 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" 99 "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", 100* fetchWidth, static_cast<int>(Impl::MaxWidth)); 101 102 // Set fetch stage's status to inactive. 103 _status = Inactive; 104 105 std::string policy = params->smtFetchPolicy; 106 107 // Convert string to lowercase 108 std::transform(policy.begin(), policy.end(), policy.begin(), 109 (int()(int)) tolower); 110* 111 // Figure out fetch policy 112 if (policy == "singlethread") { 113 fetchPolicy = SingleThread; 114 if (numThreads > 1) 115 panic("Invalid Fetch Policy for a SMT workload."); 116 } else if (policy == "roundrobin") { 117 fetchPolicy = RoundRobin; 118 DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); 119 } else if (policy == "branch") { 120 fetchPolicy = Branch; 121 DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); 122 } else if (policy == "iqcount") { 123 fetchPolicy = IQ; 124 DPRINTF(Fetch, "Fetch policy set to IQ count\n"); 125 } else if (policy == "lsqcount") { 126 fetchPolicy = LSQ; 127 DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); 128 } else { 129 fatal("Invalid Fetch Policy. Options Are: {SingleThread," 130 " RoundRobin,LSQcount,IQcount}\n"); 131 } 132 133 // Get the size of an instruction. 134 instSize = sizeof(TheISA::MachInst); 135} 136 137template <class Impl> 138std::string 139DefaultFetch<Impl>::name() const 140{ 141 return cpu->name() + ".fetch"; 142} 143 144template <class Impl> 145void 146DefaultFetch<Impl>::regStats() 147{ 148 icacheStallCycles 149 .name(name() + ".icacheStallCycles") 150 .desc("Number of cycles fetch is stalled on an Icache miss") 151 .prereq(icacheStallCycles); 152 153 fetchedInsts 154 .name(name() + ".Insts") 155 .desc("Number of instructions fetch has processed") 156 .prereq(fetchedInsts); 157 158 fetchedBranches 159 .name(name() + ".Branches") 160 .desc("Number of branches that fetch encountered") 161 .prereq(fetchedBranches); 162 163 predictedBranches 164 .name(name() + ".predictedBranches") 165 .desc("Number of branches that fetch has predicted taken") 166 .prereq(predictedBranches); 167 168 fetchCycles 169 .name(name() + ".Cycles") 170 .desc("Number of cycles fetch has run and was not squashing or" 171 " blocked") 172 .prereq(fetchCycles); 173 174 fetchSquashCycles 175 .name(name() + ".SquashCycles") 176 .desc("Number of cycles fetch has spent squashing") 177 .prereq(fetchSquashCycles); 178 179 fetchTlbCycles 180 .name(name() + ".TlbCycles") 181 .desc("Number of cycles fetch has spent waiting for tlb") 182 .prereq(fetchTlbCycles); 183 184 fetchIdleCycles 185 .name(name() + ".IdleCycles") 186 .desc("Number of cycles fetch was idle") 187 .prereq(fetchIdleCycles); 188 189 fetchBlockedCycles 190 .name(name() + ".BlockedCycles") 191 .desc("Number of cycles fetch has spent blocked") 192 .prereq(fetchBlockedCycles); 193 194 fetchedCacheLines 195 .name(name() + ".CacheLines") 196 .desc("Number of cache lines fetched") 197 .prereq(fetchedCacheLines); 198 199 fetchMiscStallCycles 200 .name(name() + ".MiscStallCycles") 201 .desc("Number of cycles fetch has spent waiting on interrupts, or " 202 "bad addresses, or out of MSHRs") 203 .prereq(fetchMiscStallCycles); 204 205 fetchPendingDrainCycles 206 .name(name() + ".PendingDrainCycles") 207 .desc("Number of cycles fetch has spent waiting on pipes to drain") 208 .prereq(fetchPendingDrainCycles); 209 210 fetchNoActiveThreadStallCycles 211 .name(name() + ".NoActiveThreadStallCycles") 212 .desc("Number of stall cycles due to no active thread to fetch from") 213 .prereq(fetchNoActiveThreadStallCycles); 214 215 fetchPendingTrapStallCycles 216 .name(name() + ".PendingTrapStallCycles") 217 .desc("Number of stall cycles due to pending traps") 218 .prereq(fetchPendingTrapStallCycles); 219 220 fetchPendingQuiesceStallCycles 221 .name(name() + ".PendingQuiesceStallCycles") 222 .desc("Number of stall cycles due to pending quiesce instructions") 223 .prereq(fetchPendingQuiesceStallCycles); 224 225 fetchIcacheWaitRetryStallCycles 226 .name(name() + ".IcacheWaitRetryStallCycles") 227 .desc("Number of stall cycles due to full MSHR") 228 .prereq(fetchIcacheWaitRetryStallCycles); 229 230 fetchIcacheSquashes 231 .name(name() + ".IcacheSquashes") 232 .desc("Number of outstanding Icache misses that were squashed") 233 .prereq(fetchIcacheSquashes); 234 235 fetchTlbSquashes 236 .name(name() + ".ItlbSquashes") 237 .desc("Number of outstanding ITLB misses that were squashed") 238 .prereq(fetchTlbSquashes); 239 240 fetchNisnDist 241 .init(/* base value / 0, 242* /* last value / fetchWidth, 243* /* bucket size / 1) 244* .name(name() + ".rateDist") 245 .desc("Number of instructions fetched each cycle (Total)") 246 .flags(Stats::pdf); 247 248 idleRate 249 .name(name() + ".idleRate") 250 .desc("Percent of cycles fetch was idle") 251 .prereq(idleRate); 252 idleRate = fetchIdleCycles * 100 / cpu->numCycles; 253 254 branchRate 255 .name(name() + ".branchRate") 256 .desc("Number of branch fetches per cycle") 257 .flags(Stats::total); 258 branchRate = fetchedBranches / cpu->numCycles; 259 260 fetchRate 261 .name(name() + ".rate") 262 .desc("Number of inst fetches per cycle") 263 .flags(Stats::total); 264 fetchRate = fetchedInsts / cpu->numCycles; 265 266 branchPred.regStats(); 267} 268 269template<class Impl> 270void 271DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> time_buffer) 272{ 273* timeBuffer = time_buffer; 274 275 // Create wires to get information from proper places in time buffer. 276 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 277 fromRename = timeBuffer->getWire(-renameToFetchDelay); 278 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 279 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 280} 281 282template<class Impl> 283void 284DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> at_ptr) 285{ 286* activeThreads = at_ptr; 287} 288 289template<class Impl> 290void 291DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> fq_ptr) 292{ 293* fetchQueue = fq_ptr; 294 295 // Create wire to write information to proper place in fetch queue. 296 toDecode = fetchQueue->getWire(0); 297} 298 299template<class Impl> 300void 301DefaultFetch<Impl>::initStage() 302{ 303 // Setup PC and nextPC with initial state. 304 for (ThreadID tid = 0; tid < numThreads; tid++) { 305 pc[tid] = cpu->pcState(tid); 306 fetchOffset[tid] = 0; 307 macroop[tid] = NULL; 308 delayedCommit[tid] = false; 309 } 310 311 for (ThreadID tid = 0; tid < numThreads; tid++) { 312 313 fetchStatus[tid] = Running; 314 315 priorityList.push_back(tid); 316 317 memReq[tid] = NULL; 318 319 stalls[tid].decode = false; 320 stalls[tid].rename = false; 321 stalls[tid].iew = false; 322 stalls[tid].commit = false; 323 } 324 325 // Schedule fetch to get the correct PC from the CPU 326 // scheduleFetchStartupEvent(1); 327 328 // Fetch needs to start fetching instructions at the very beginning, 329 // so it must start up in active state. 330 switchToActive(); 331} 332 333template<class Impl> 334void 335DefaultFetch<Impl>::setIcache() 336{ 337 assert(cpu->getInstPort().isConnected()); 338 339 // Size of cache block. 340 cacheBlkSize = cpu->getInstPort().peerBlockSize(); 341 342 // Create mask to get rid of offset bits. 343 cacheBlkMask = (cacheBlkSize - 1); 344 345 for (ThreadID tid = 0; tid < numThreads; tid++) { 346 // Create space to store a cache line. 347 cacheData[tid] = new uint8_t[cacheBlkSize]; 348 cacheDataPC[tid] = 0; 349 cacheDataValid[tid] = false; 350 } 351} 352 353template<class Impl> 354void 355DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) 356{ 357 ThreadID tid = pkt->req->threadId(); 358 359 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); 360 361 assert(!pkt->wasNacked()); 362 363 // Only change the status if it's still waiting on the icache access 364 // to return. 365 if (fetchStatus[tid] != IcacheWaitResponse \|\| 366 pkt->req != memReq[tid] \|\| 367 isSwitchedOut()) { 368 ++fetchIcacheSquashes; 369 delete pkt->req; 370 delete pkt; 371 return; 372 } 373 374 memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize); 375 cacheDataValid[tid] = true; 376 377 if (!drainPending) { 378 // Wake up the CPU (if it went to sleep and was waiting on 379 // this completion event). 380 cpu->wakeCPU(); 381 382 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", 383 tid); 384 385 switchToActive(); 386 } 387 388 // Only switch to IcacheAccessComplete if we're not stalled as well. 389 if (checkStall(tid)) { 390 fetchStatus[tid] = Blocked; 391 } else { 392 fetchStatus[tid] = IcacheAccessComplete; 393 } 394 395 // Reset the mem req to NULL. 396 delete pkt->req; 397 delete pkt; 398 memReq[tid] = NULL; 399} 400 401template <class Impl> 402bool 403DefaultFetch<Impl>::drain() 404{ 405 // Fetch is ready to drain at any time. 406 cpu->signalDrained(); 407 drainPending = true; 408 return true; 409} 410 411template <class Impl> 412void 413DefaultFetch<Impl>::resume() 414{ 415 drainPending = false; 416} 417 418template <class Impl> 419void 420DefaultFetch<Impl>::switchOut() 421{ 422 switchedOut = true; 423 // Branch predictor needs to have its state cleared. 424 branchPred.switchOut(); 425} 426 427template <class Impl> 428void 429DefaultFetch<Impl>::takeOverFrom() 430{ 431 // the instruction port is now connected so we can get the block 432 // size 433 setIcache(); 434 435 // Reset all state 436 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 437 stalls[i].decode = 0; 438 stalls[i].rename = 0; 439 stalls[i].iew = 0; 440 stalls[i].commit = 0; 441 pc[i] = cpu->pcState(i); 442 fetchStatus[i] = Running; 443 } 444 numInst = 0; 445 wroteToTimeBuffer = false; 446 _status = Inactive; 447 switchedOut = false; 448 interruptPending = false; 449 branchPred.takeOverFrom(); 450} 451 452template <class Impl> 453void 454DefaultFetch<Impl>::wakeFromQuiesce() 455{ 456 DPRINTF(Fetch, "Waking up from quiesce\n"); 457 // Hopefully this is safe 458 // @todo: Allow other threads to wake from quiesce. 459 fetchStatus[0] = Running; 460} 461 462template <class Impl> 463inline void 464DefaultFetch<Impl>::switchToActive() 465{ 466 if (_status == Inactive) { 467 DPRINTF(Activity, "Activating stage.\n"); 468 469 cpu->activateStage(O3CPU::FetchIdx); 470 471 _status = Active; 472 } 473} 474 475template <class Impl> 476inline void 477DefaultFetch<Impl>::switchToInactive() 478{ 479 if (_status == Active) { 480 DPRINTF(Activity, "Deactivating stage.\n"); 481 482 cpu->deactivateStage(O3CPU::FetchIdx); 483 484 _status = Inactive; 485 } 486} 487 488template <class Impl> 489bool 490DefaultFetch<Impl>::lookupAndUpdateNextPC( 491 DynInstPtr &inst, TheISA::PCState &nextPC) 492{ 493 // Do branch prediction check here. 494 // A bit of a misnomer...next_PC is actually the current PC until 495 // this function updates it. 496 bool predict_taken; 497 498 if (!inst->isControl()) { 499 TheISA::advancePC(nextPC, inst->staticInst); 500 inst->setPredTarg(nextPC); 501 inst->setPredTaken(false); 502 return false; 503 } 504 505 ThreadID tid = inst->threadNumber; 506 predict_taken = branchPred.predict(inst, nextPC, tid); 507 508 if (predict_taken) { 509 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", 510 tid, inst->seqNum, nextPC); 511 } else { 512 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", 513 tid, inst->seqNum); 514 } 515 516 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n", 517 tid, inst->seqNum, nextPC); 518 inst->setPredTarg(nextPC); 519 inst->setPredTaken(predict_taken); 520 521 ++fetchedBranches; 522 523 if (predict_taken) { 524 ++predictedBranches; 525 } 526 527 return predict_taken; 528} 529 530template <class Impl> 531bool 532DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) 533{ 534 Fault fault = NoFault; 535 536 // @todo: not sure if these should block translation. 537 //AlphaDep 538 if (cacheBlocked) { 539 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", 540 tid); 541 return false; 542 } else if (isSwitchedOut()) { 543 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", 544 tid); 545 return false; 546 } else if (checkInterrupt(pc) && !delayedCommit[tid]) { 547 // Hold off fetch from getting new instructions when: 548 // Cache is blocked, or 549 // while an interrupt is pending and we're not in PAL mode, or 550 // fetch is switched out. 551 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", 552 tid); 553 return false; 554 } 555 556 // Align the fetch address so it's at the start of a cache block. 557 Addr block_PC = icacheBlockAlignPC(vaddr); 558 559 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", 560 tid, block_PC, vaddr); 561 562 // Setup the memReq to do a read of the first instruction's address. 563 // Set the appropriate read size and flags as well. 564 // Build request here. 565 RequestPtr mem_req = 566 new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, 567 cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid); 568 569 memReq[tid] = mem_req; 570 571 // Initiate translation of the icache block 572 fetchStatus[tid] = ItlbWait; 573 FetchTranslation trans = new FetchTranslation(this); 574* cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), 575 trans, BaseTLB::Execute); 576 return true; 577} 578 579template <class Impl> 580void 581DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) 582{ 583 ThreadID tid = mem_req->threadId(); 584 Addr block_PC = mem_req->getVaddr(); 585 586 // Wake up CPU if it was idle 587 cpu->wakeCPU(); 588 589 if (fetchStatus[tid] != ItlbWait \|\| mem_req != memReq[tid] \|\| 590 mem_req->getVaddr() != memReq[tid]->getVaddr() \|\| isSwitchedOut()) { 591 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", 592 tid); 593 ++fetchTlbSquashes; 594 delete mem_req; 595 return; 596 } 597 598 599 // If translation was successful, attempt to read the icache block. 600 if (fault == NoFault) { 601 // Check that we're not going off into random memory 602 // If we have, just wait around for commit to squash something and put 603 // us on the right track 604 if (!cpu->system->isMemAddr(mem_req->getPaddr())) { 605 warn("Address %#x is outside of physical memory, stopping fetch\n", 606 mem_req->getPaddr()); 607 fetchStatus[tid] = NoGoodAddr; 608 delete mem_req; 609 memReq[tid] = NULL; 610 return; 611 } 612 613 // Build packet here. 614 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); 615 data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); 616 617 cacheDataPC[tid] = block_PC; 618 cacheDataValid[tid] = false; 619 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 620 621 fetchedCacheLines++; 622 623 // Access the cache.
624 if (!cpu->getInstPort().sendTiming(data_pkt)) {	624 if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
625 assert(retryPkt == NULL); 626 assert(retryTid == InvalidThreadID); 627 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 628 629 fetchStatus[tid] = IcacheWaitRetry; 630 retryPkt = data_pkt; 631 retryTid = tid; 632 cacheBlocked = true; 633 } else { 634 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); 635 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 636 "response.\n", tid); 637 638 lastIcacheStall[tid] = curTick(); 639 fetchStatus[tid] = IcacheWaitResponse; 640 } 641 } else { 642 if (!(numInst < fetchWidth)) { 643 assert(!finishTranslationEvent.scheduled()); 644 finishTranslationEvent.setFault(fault); 645 finishTranslationEvent.setReq(mem_req); 646 cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); 647 return; 648 } 649 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", 650 tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); 651 // Translation faulted, icache request won't be sent. 652 delete mem_req; 653 memReq[tid] = NULL; 654 655 // Send the fault to commit. This thread will not do anything 656 // until commit handles the fault. The only other way it can 657 // wake up is if a squash comes along and changes the PC. 658 TheISA::PCState fetchPC = pc[tid]; 659 660 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid); 661 // We will use a nop in ordier to carry the fault. 662 DynInstPtr instruction = buildInst(tid, 663 decoder.decode(TheISA::NoopMachInst, fetchPC.instAddr()), 664 NULL, fetchPC, fetchPC, false); 665 666 instruction->setPredTarg(fetchPC); 667 instruction->fault = fault; 668 wroteToTimeBuffer = true; 669 670 DPRINTF(Activity, "Activity this cycle.\n"); 671 cpu->activityThisCycle(); 672 673 fetchStatus[tid] = TrapPending; 674 675 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); 676 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", 677 tid, fault->name(), pc[tid]); 678 } 679 _status = updateFetchStatus(); 680} 681 682template <class Impl> 683inline void 684DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, 685 const DynInstPtr squashInst, ThreadID tid) 686{ 687 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n", 688 tid, newPC); 689 690 pc[tid] = newPC; 691 fetchOffset[tid] = 0; 692 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) 693 macroop[tid] = squashInst->macroop; 694 else 695 macroop[tid] = NULL; 696 predecoder.reset(); 697 698 // Clear the icache miss if it's outstanding. 699 if (fetchStatus[tid] == IcacheWaitResponse) { 700 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 701 tid); 702 memReq[tid] = NULL; 703 } else if (fetchStatus[tid] == ItlbWait) { 704 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n", 705 tid); 706 memReq[tid] = NULL; 707 } 708 709 // Get rid of the retrying packet if it was from this thread. 710 if (retryTid == tid) { 711 assert(cacheBlocked); 712 if (retryPkt) { 713 delete retryPkt->req; 714 delete retryPkt; 715 } 716 retryPkt = NULL; 717 retryTid = InvalidThreadID; 718 } 719 720 fetchStatus[tid] = Squashing; 721 722 // microops are being squashed, it is not known wheather the 723 // youngest non-squashed microop was marked delayed commit 724 // or not. Setting the flag to true ensures that the 725 // interrupts are not handled when they cannot be, though 726 // some opportunities to handle interrupts may be missed. 727 delayedCommit[tid] = true; 728 729 ++fetchSquashCycles; 730} 731 732template<class Impl> 733void 734DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, 735 const DynInstPtr squashInst, 736 const InstSeqNum seq_num, ThreadID tid) 737{ 738 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid); 739 740 doSquash(newPC, squashInst, tid); 741 742 // Tell the CPU to remove any instructions that are in flight between 743 // fetch and decode. 744 cpu->removeInstsUntil(seq_num, tid); 745} 746 747template<class Impl> 748bool 749DefaultFetch<Impl>::checkStall(ThreadID tid) const 750{ 751 bool ret_val = false; 752 753 if (cpu->contextSwitch) { 754 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 755 ret_val = true; 756 } else if (stalls[tid].decode) { 757 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 758 ret_val = true; 759 } else if (stalls[tid].rename) { 760 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 761 ret_val = true; 762 } else if (stalls[tid].iew) { 763 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 764 ret_val = true; 765 } else if (stalls[tid].commit) { 766 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 767 ret_val = true; 768 } 769 770 return ret_val; 771} 772 773template<class Impl> 774typename DefaultFetch<Impl>::FetchStatus 775DefaultFetch<Impl>::updateFetchStatus() 776{ 777 //Check Running 778 list<ThreadID>::iterator threads = activeThreads->begin(); 779 list<ThreadID>::iterator end = activeThreads->end(); 780 781 while (threads != end) { 782 ThreadID tid = threads++; 783* 784 if (fetchStatus[tid] == Running \|\| 785 fetchStatus[tid] == Squashing \|\| 786 fetchStatus[tid] == IcacheAccessComplete) { 787 788 if (_status == Inactive) { 789 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 790 791 if (fetchStatus[tid] == IcacheAccessComplete) { 792 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 793 "completion\n",tid); 794 } 795 796 cpu->activateStage(O3CPU::FetchIdx); 797 } 798 799 return Active; 800 } 801 } 802 803 // Stage is switching from active to inactive, notify CPU of it. 804 if (_status == Active) { 805 DPRINTF(Activity, "Deactivating stage.\n"); 806 807 cpu->deactivateStage(O3CPU::FetchIdx); 808 } 809 810 return Inactive; 811} 812 813template <class Impl> 814void 815DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, 816 const InstSeqNum seq_num, DynInstPtr squashInst, 817 ThreadID tid) 818{ 819 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); 820 821 doSquash(newPC, squashInst, tid); 822 823 // Tell the CPU to remove any instructions that are not in the ROB. 824 cpu->removeInstsNotInROB(tid); 825} 826 827template <class Impl> 828void 829DefaultFetch<Impl>::tick() 830{ 831 list<ThreadID>::iterator threads = activeThreads->begin(); 832 list<ThreadID>::iterator end = activeThreads->end(); 833 bool status_change = false; 834 835 wroteToTimeBuffer = false; 836 837 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 838 issuePipelinedIfetch[i] = false; 839 } 840 841 while (threads != end) { 842 ThreadID tid = threads++; 843* 844 // Check the signals for each thread to determine the proper status 845 // for each thread. 846 bool updated_status = checkSignalsAndUpdate(tid); 847 status_change = status_change \|\| updated_status; 848 } 849 850 DPRINTF(Fetch, "Running stage.\n"); 851 852 if (FullSystem) { 853 if (fromCommit->commitInfo[0].interruptPending) { 854 interruptPending = true; 855 } 856 857 if (fromCommit->commitInfo[0].clearInterrupt) { 858 interruptPending = false; 859 } 860 } 861 862 for (threadFetched = 0; threadFetched < numFetchingThreads; 863 threadFetched++) { 864 // Fetch each of the actively fetching threads. 865 fetch(status_change); 866 } 867 868 // Record number of instructions fetched this cycle for distribution. 869 fetchNisnDist.sample(numInst); 870 871 if (status_change) { 872 // Change the fetch stage status if there was a status change. 873 _status = updateFetchStatus(); 874 } 875 876 // If there was activity this cycle, inform the CPU of it. 877 if (wroteToTimeBuffer \|\| cpu->contextSwitch) { 878 DPRINTF(Activity, "Activity this cycle.\n"); 879 880 cpu->activityThisCycle(); 881 } 882 883 // Issue the next I-cache request if possible. 884 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 885 if (issuePipelinedIfetch[i]) { 886 pipelineIcacheAccesses(i); 887 } 888 } 889 890 // Reset the number of the instruction we've fetched. 891 numInst = 0; 892} 893 894template <class Impl> 895bool 896DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) 897{ 898 // Update the per thread stall statuses. 899 if (fromDecode->decodeBlock[tid]) { 900 stalls[tid].decode = true; 901 } 902 903 if (fromDecode->decodeUnblock[tid]) { 904 assert(stalls[tid].decode); 905 assert(!fromDecode->decodeBlock[tid]); 906 stalls[tid].decode = false; 907 } 908 909 if (fromRename->renameBlock[tid]) { 910 stalls[tid].rename = true; 911 } 912 913 if (fromRename->renameUnblock[tid]) { 914 assert(stalls[tid].rename); 915 assert(!fromRename->renameBlock[tid]); 916 stalls[tid].rename = false; 917 } 918 919 if (fromIEW->iewBlock[tid]) { 920 stalls[tid].iew = true; 921 } 922 923 if (fromIEW->iewUnblock[tid]) { 924 assert(stalls[tid].iew); 925 assert(!fromIEW->iewBlock[tid]); 926 stalls[tid].iew = false; 927 } 928 929 if (fromCommit->commitBlock[tid]) { 930 stalls[tid].commit = true; 931 } 932 933 if (fromCommit->commitUnblock[tid]) { 934 assert(stalls[tid].commit); 935 assert(!fromCommit->commitBlock[tid]); 936 stalls[tid].commit = false; 937 } 938 939 // Check squash signals from commit. 940 if (fromCommit->commitInfo[tid].squash) { 941 942 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 943 "from commit.\n",tid); 944 // In any case, squash. 945 squash(fromCommit->commitInfo[tid].pc, 946 fromCommit->commitInfo[tid].doneSeqNum, 947 fromCommit->commitInfo[tid].squashInst, tid); 948 949 // If it was a branch mispredict on a control instruction, update the 950 // branch predictor with that instruction, otherwise just kill the 951 // invalid state we generated in after sequence number 952 if (fromCommit->commitInfo[tid].mispredictInst && 953 fromCommit->commitInfo[tid].mispredictInst->isControl()) { 954 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 955 fromCommit->commitInfo[tid].pc, 956 fromCommit->commitInfo[tid].branchTaken, 957 tid); 958 } else { 959 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 960 tid); 961 } 962 963 return true; 964 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 965 // Update the branch predictor if it wasn't a squashed instruction 966 // that was broadcasted. 967 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 968 } 969 970 // Check ROB squash signals from commit. 971 if (fromCommit->commitInfo[tid].robSquashing) { 972 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 973 974 // Continue to squash. 975 fetchStatus[tid] = Squashing; 976 977 return true; 978 } 979 980 // Check squash signals from decode. 981 if (fromDecode->decodeInfo[tid].squash) { 982 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 983 "from decode.\n",tid); 984 985 // Update the branch predictor. 986 if (fromDecode->decodeInfo[tid].branchMispredict) { 987 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 988 fromDecode->decodeInfo[tid].nextPC, 989 fromDecode->decodeInfo[tid].branchTaken, 990 tid); 991 } else { 992 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 993 tid); 994 } 995 996 if (fetchStatus[tid] != Squashing) { 997 998 DPRINTF(Fetch, "Squashing from decode with PC = %s\n", 999 fromDecode->decodeInfo[tid].nextPC); 1000 // Squash unless we're already squashing 1001 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 1002 fromDecode->decodeInfo[tid].squashInst, 1003 fromDecode->decodeInfo[tid].doneSeqNum, 1004 tid); 1005 1006 return true; 1007 } 1008 } 1009 1010 if (checkStall(tid) && 1011 fetchStatus[tid] != IcacheWaitResponse && 1012 fetchStatus[tid] != IcacheWaitRetry) { 1013 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 1014 1015 fetchStatus[tid] = Blocked; 1016 1017 return true; 1018 } 1019 1020 if (fetchStatus[tid] == Blocked \|\| 1021 fetchStatus[tid] == Squashing) { 1022 // Switch status to running if fetch isn't being told to block or 1023 // squash this cycle. 1024 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 1025 tid); 1026 1027 fetchStatus[tid] = Running; 1028 1029 return true; 1030 } 1031 1032 // If we've reached this point, we have not gotten any signals that 1033 // cause fetch to change its status. Fetch remains the same as before. 1034 return false; 1035} 1036 1037template<class Impl> 1038typename Impl::DynInstPtr 1039DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, 1040 StaticInstPtr curMacroop, TheISA::PCState thisPC, 1041 TheISA::PCState nextPC, bool trace) 1042{ 1043 // Get a sequence number. 1044 InstSeqNum seq = cpu->getAndIncrementInstSeq(); 1045 1046 // Create a new DynInst from the instruction fetched. 1047 DynInstPtr instruction = 1048 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); 1049 instruction->setTid(tid); 1050 1051 instruction->setASID(tid); 1052 1053 instruction->setThreadState(cpu->thread[tid]); 1054 1055 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " 1056 "[sn:%lli].\n", tid, thisPC.instAddr(), 1057 thisPC.microPC(), seq); 1058 1059 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, 1060 instruction->staticInst-> 1061 disassemble(thisPC.instAddr())); 1062 1063#if TRACING_ON 1064 if (trace) { 1065 instruction->traceData = 1066 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), 1067 instruction->staticInst, thisPC, curMacroop); 1068 } 1069#else 1070 instruction->traceData = NULL; 1071#endif 1072 1073 // Add instruction to the CPU's list of instructions. 1074 instruction->setInstListIt(cpu->addInst(instruction)); 1075 1076 // Write the instruction to the first slot in the queue 1077 // that heads to decode. 1078 assert(numInst < fetchWidth); 1079 toDecode->insts[toDecode->size++] = instruction; 1080 1081 // Keep track of if we can take an interrupt at this boundary 1082 delayedCommit[tid] = instruction->isDelayedCommit(); 1083 1084 return instruction; 1085} 1086 1087template<class Impl> 1088void 1089DefaultFetch<Impl>::fetch(bool &status_change) 1090{ 1091 ////////////////////////////////////////// 1092 // Start actual fetch 1093 ////////////////////////////////////////// 1094 ThreadID tid = getFetchingThread(fetchPolicy); 1095 1096 if (tid == InvalidThreadID \|\| drainPending) { 1097 // Breaks looping condition in tick() 1098 threadFetched = numFetchingThreads; 1099 1100 if (numThreads == 1) { // @todo Per-thread stats 1101 profileStall(0); 1102 } 1103 1104 return; 1105 } 1106 1107 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1108 1109 // The current PC. 1110 TheISA::PCState thisPC = pc[tid]; 1111 1112 Addr pcOffset = fetchOffset[tid]; 1113 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1114 1115 bool inRom = isRomMicroPC(thisPC.microPC()); 1116 1117 // If returning from the delay of a cache miss, then update the status 1118 // to running, otherwise do the cache access. Possibly move this up 1119 // to tick() function. 1120 if (fetchStatus[tid] == IcacheAccessComplete) { 1121 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); 1122 1123 fetchStatus[tid] = Running; 1124 status_change = true; 1125 } else if (fetchStatus[tid] == Running) { 1126 // Align the fetch PC so its at the start of a cache block. 1127 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1128 1129 // If buffer is no longer valid or fetchAddr has moved to point 1130 // to the next cache block, AND we have no remaining ucode 1131 // from a macro-op, then start fetch from icache. 1132 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) 1133 && !inRom && !macroop[tid]) { 1134 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1135 "instruction, starting at PC %s.\n", tid, thisPC); 1136 1137 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1138 1139 if (fetchStatus[tid] == IcacheWaitResponse) 1140 ++icacheStallCycles; 1141 else if (fetchStatus[tid] == ItlbWait) 1142 ++fetchTlbCycles; 1143 else 1144 ++fetchMiscStallCycles; 1145 return; 1146 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid]) 1147 \|\| isSwitchedOut()) { 1148 // Stall CPU if an interrupt is posted and we're not issuing 1149 // an delayed commit micro-op currently (delayed commit instructions 1150 // are not interruptable by interrupts, only faults) 1151 ++fetchMiscStallCycles; 1152 DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid); 1153 return; 1154 } 1155 } else { 1156 if (fetchStatus[tid] == Idle) { 1157 ++fetchIdleCycles; 1158 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1159 } 1160 1161 // Status is Idle, so fetch should do nothing. 1162 return; 1163 } 1164 1165 ++fetchCycles; 1166 1167 TheISA::PCState nextPC = thisPC; 1168 1169 StaticInstPtr staticInst = NULL; 1170 StaticInstPtr curMacroop = macroop[tid]; 1171 1172 // If the read of the first instruction was successful, then grab the 1173 // instructions from the rest of the cache line and put them into the 1174 // queue heading to decode. 1175 1176 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1177 "decode.\n", tid); 1178 1179 // Need to keep track of whether or not a predicted branch 1180 // ended this fetch block. 1181 bool predictedBranch = false; 1182 1183 TheISA::MachInst cacheInsts = 1184* reinterpret_cast<TheISA::MachInst >(cacheData[tid]); 1185* 1186 const unsigned numInsts = cacheBlkSize / instSize; 1187 unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1188 1189 // Loop through instruction memory from the cache. 1190 // Keep issuing while fetchWidth is available and branch is not 1191 // predicted taken 1192 while (numInst < fetchWidth && !predictedBranch) { 1193 1194 // We need to process more memory if we aren't going to get a 1195 // StaticInst from the rom, the current macroop, or what's already 1196 // in the predecoder. 1197 bool needMem = !inRom && !curMacroop && !predecoder.extMachInstReady(); 1198 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1199 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1200 1201 if (needMem) { 1202 // If buffer is no longer valid or fetchAddr has moved to point 1203 // to the next cache block then start fetch from icache. 1204 if (!cacheDataValid[tid] \|\| block_PC != cacheDataPC[tid]) 1205 break; 1206 1207 if (blkOffset >= numInsts) { 1208 // We need to process more memory, but we've run out of the 1209 // current block. 1210 break; 1211 } 1212 1213 if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { 1214 // Walk past any annulled delay slot instructions. 1215 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; 1216 while (fetchAddr != pcAddr && blkOffset < numInsts) { 1217 blkOffset++; 1218 fetchAddr += instSize; 1219 } 1220 if (blkOffset >= numInsts) 1221 break; 1222 } 1223 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); 1224 1225 predecoder.setTC(cpu->thread[tid]->getTC()); 1226 predecoder.moreBytes(thisPC, fetchAddr, inst); 1227 1228 if (predecoder.needMoreBytes()) { 1229 blkOffset++; 1230 fetchAddr += instSize; 1231 pcOffset += instSize; 1232 } 1233 } 1234 1235 // Extract as many instructions and/or microops as we can from 1236 // the memory we've processed so far. 1237 do { 1238 if (!(curMacroop \|\| inRom)) { 1239 if (predecoder.extMachInstReady()) { 1240 ExtMachInst extMachInst = 1241 predecoder.getExtMachInst(thisPC); 1242 staticInst = 1243 decoder.decode(extMachInst, thisPC.instAddr()); 1244 1245 // Increment stat of fetched instructions. 1246 ++fetchedInsts; 1247 1248 if (staticInst->isMacroop()) { 1249 curMacroop = staticInst; 1250 } else { 1251 pcOffset = 0; 1252 } 1253 } else { 1254 // We need more bytes for this instruction so blkOffset and 1255 // pcOffset will be updated 1256 break; 1257 } 1258 } 1259 // Whether we're moving to a new macroop because we're at the 1260 // end of the current one, or the branch predictor incorrectly 1261 // thinks we are... 1262 bool newMacro = false; 1263 if (curMacroop \|\| inRom) { 1264 if (inRom) { 1265 staticInst = cpu->microcodeRom.fetchMicroop( 1266 thisPC.microPC(), curMacroop); 1267 } else { 1268 staticInst = curMacroop->fetchMicroop(thisPC.microPC()); 1269 } 1270 newMacro \|= staticInst->isLastMicroop(); 1271 } 1272 1273 DynInstPtr instruction = 1274 buildInst(tid, staticInst, curMacroop, 1275 thisPC, nextPC, true); 1276 1277 numInst++; 1278 1279#if TRACING_ON 1280 instruction->fetchTick = curTick(); 1281#endif 1282 1283 nextPC = thisPC; 1284 1285 // If we're branching after this instruction, quite fetching 1286 // from the same block then. 1287 predictedBranch \|= thisPC.branching(); 1288 predictedBranch \|= 1289 lookupAndUpdateNextPC(instruction, nextPC); 1290 if (predictedBranch) { 1291 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); 1292 } 1293 1294 newMacro \|= thisPC.instAddr() != nextPC.instAddr(); 1295 1296 // Move to the next instruction, unless we have a branch. 1297 thisPC = nextPC; 1298 inRom = isRomMicroPC(thisPC.microPC()); 1299 1300 if (newMacro) { 1301 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; 1302 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1303 pcOffset = 0; 1304 curMacroop = NULL; 1305 } 1306 1307 if (instruction->isQuiesce()) { 1308 DPRINTF(Fetch, 1309 "Quiesce instruction encountered, halting fetch!"); 1310 fetchStatus[tid] = QuiescePending; 1311 status_change = true; 1312 break; 1313 } 1314 } while ((curMacroop \|\| predecoder.extMachInstReady()) && 1315 numInst < fetchWidth); 1316 } 1317 1318 if (predictedBranch) { 1319 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1320 "instruction encountered.\n", tid); 1321 } else if (numInst >= fetchWidth) { 1322 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1323 "for this cycle.\n", tid); 1324 } else if (blkOffset >= cacheBlkSize) { 1325 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1326 "block.\n", tid); 1327 } 1328 1329 macroop[tid] = curMacroop; 1330 fetchOffset[tid] = pcOffset; 1331 1332 if (numInst > 0) { 1333 wroteToTimeBuffer = true; 1334 } 1335 1336 pc[tid] = thisPC; 1337 1338 // pipeline a fetch if we're crossing a cache boundary and not in 1339 // a state that would preclude fetching 1340 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1341 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1342 issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && 1343 fetchStatus[tid] != IcacheWaitResponse && 1344 fetchStatus[tid] != ItlbWait && 1345 fetchStatus[tid] != IcacheWaitRetry && 1346 fetchStatus[tid] != QuiescePending && 1347 !curMacroop; 1348} 1349 1350template<class Impl> 1351void 1352DefaultFetch<Impl>::recvRetry() 1353{ 1354 if (retryPkt != NULL) { 1355 assert(cacheBlocked); 1356 assert(retryTid != InvalidThreadID); 1357 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1358	625 assert(retryPkt == NULL); 626 assert(retryTid == InvalidThreadID); 627 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); 628 629 fetchStatus[tid] = IcacheWaitRetry; 630 retryPkt = data_pkt; 631 retryTid = tid; 632 cacheBlocked = true; 633 } else { 634 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); 635 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " 636 "response.\n", tid); 637 638 lastIcacheStall[tid] = curTick(); 639 fetchStatus[tid] = IcacheWaitResponse; 640 } 641 } else { 642 if (!(numInst < fetchWidth)) { 643 assert(!finishTranslationEvent.scheduled()); 644 finishTranslationEvent.setFault(fault); 645 finishTranslationEvent.setReq(mem_req); 646 cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); 647 return; 648 } 649 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", 650 tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); 651 // Translation faulted, icache request won't be sent. 652 delete mem_req; 653 memReq[tid] = NULL; 654 655 // Send the fault to commit. This thread will not do anything 656 // until commit handles the fault. The only other way it can 657 // wake up is if a squash comes along and changes the PC. 658 TheISA::PCState fetchPC = pc[tid]; 659 660 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid); 661 // We will use a nop in ordier to carry the fault. 662 DynInstPtr instruction = buildInst(tid, 663 decoder.decode(TheISA::NoopMachInst, fetchPC.instAddr()), 664 NULL, fetchPC, fetchPC, false); 665 666 instruction->setPredTarg(fetchPC); 667 instruction->fault = fault; 668 wroteToTimeBuffer = true; 669 670 DPRINTF(Activity, "Activity this cycle.\n"); 671 cpu->activityThisCycle(); 672 673 fetchStatus[tid] = TrapPending; 674 675 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); 676 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", 677 tid, fault->name(), pc[tid]); 678 } 679 _status = updateFetchStatus(); 680} 681 682template <class Impl> 683inline void 684DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, 685 const DynInstPtr squashInst, ThreadID tid) 686{ 687 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n", 688 tid, newPC); 689 690 pc[tid] = newPC; 691 fetchOffset[tid] = 0; 692 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) 693 macroop[tid] = squashInst->macroop; 694 else 695 macroop[tid] = NULL; 696 predecoder.reset(); 697 698 // Clear the icache miss if it's outstanding. 699 if (fetchStatus[tid] == IcacheWaitResponse) { 700 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 701 tid); 702 memReq[tid] = NULL; 703 } else if (fetchStatus[tid] == ItlbWait) { 704 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n", 705 tid); 706 memReq[tid] = NULL; 707 } 708 709 // Get rid of the retrying packet if it was from this thread. 710 if (retryTid == tid) { 711 assert(cacheBlocked); 712 if (retryPkt) { 713 delete retryPkt->req; 714 delete retryPkt; 715 } 716 retryPkt = NULL; 717 retryTid = InvalidThreadID; 718 } 719 720 fetchStatus[tid] = Squashing; 721 722 // microops are being squashed, it is not known wheather the 723 // youngest non-squashed microop was marked delayed commit 724 // or not. Setting the flag to true ensures that the 725 // interrupts are not handled when they cannot be, though 726 // some opportunities to handle interrupts may be missed. 727 delayedCommit[tid] = true; 728 729 ++fetchSquashCycles; 730} 731 732template<class Impl> 733void 734DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, 735 const DynInstPtr squashInst, 736 const InstSeqNum seq_num, ThreadID tid) 737{ 738 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid); 739 740 doSquash(newPC, squashInst, tid); 741 742 // Tell the CPU to remove any instructions that are in flight between 743 // fetch and decode. 744 cpu->removeInstsUntil(seq_num, tid); 745} 746 747template<class Impl> 748bool 749DefaultFetch<Impl>::checkStall(ThreadID tid) const 750{ 751 bool ret_val = false; 752 753 if (cpu->contextSwitch) { 754 DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); 755 ret_val = true; 756 } else if (stalls[tid].decode) { 757 DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); 758 ret_val = true; 759 } else if (stalls[tid].rename) { 760 DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); 761 ret_val = true; 762 } else if (stalls[tid].iew) { 763 DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); 764 ret_val = true; 765 } else if (stalls[tid].commit) { 766 DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); 767 ret_val = true; 768 } 769 770 return ret_val; 771} 772 773template<class Impl> 774typename DefaultFetch<Impl>::FetchStatus 775DefaultFetch<Impl>::updateFetchStatus() 776{ 777 //Check Running 778 list<ThreadID>::iterator threads = activeThreads->begin(); 779 list<ThreadID>::iterator end = activeThreads->end(); 780 781 while (threads != end) { 782 ThreadID tid = threads++; 783* 784 if (fetchStatus[tid] == Running \|\| 785 fetchStatus[tid] == Squashing \|\| 786 fetchStatus[tid] == IcacheAccessComplete) { 787 788 if (_status == Inactive) { 789 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); 790 791 if (fetchStatus[tid] == IcacheAccessComplete) { 792 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" 793 "completion\n",tid); 794 } 795 796 cpu->activateStage(O3CPU::FetchIdx); 797 } 798 799 return Active; 800 } 801 } 802 803 // Stage is switching from active to inactive, notify CPU of it. 804 if (_status == Active) { 805 DPRINTF(Activity, "Deactivating stage.\n"); 806 807 cpu->deactivateStage(O3CPU::FetchIdx); 808 } 809 810 return Inactive; 811} 812 813template <class Impl> 814void 815DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, 816 const InstSeqNum seq_num, DynInstPtr squashInst, 817 ThreadID tid) 818{ 819 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); 820 821 doSquash(newPC, squashInst, tid); 822 823 // Tell the CPU to remove any instructions that are not in the ROB. 824 cpu->removeInstsNotInROB(tid); 825} 826 827template <class Impl> 828void 829DefaultFetch<Impl>::tick() 830{ 831 list<ThreadID>::iterator threads = activeThreads->begin(); 832 list<ThreadID>::iterator end = activeThreads->end(); 833 bool status_change = false; 834 835 wroteToTimeBuffer = false; 836 837 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 838 issuePipelinedIfetch[i] = false; 839 } 840 841 while (threads != end) { 842 ThreadID tid = threads++; 843* 844 // Check the signals for each thread to determine the proper status 845 // for each thread. 846 bool updated_status = checkSignalsAndUpdate(tid); 847 status_change = status_change \|\| updated_status; 848 } 849 850 DPRINTF(Fetch, "Running stage.\n"); 851 852 if (FullSystem) { 853 if (fromCommit->commitInfo[0].interruptPending) { 854 interruptPending = true; 855 } 856 857 if (fromCommit->commitInfo[0].clearInterrupt) { 858 interruptPending = false; 859 } 860 } 861 862 for (threadFetched = 0; threadFetched < numFetchingThreads; 863 threadFetched++) { 864 // Fetch each of the actively fetching threads. 865 fetch(status_change); 866 } 867 868 // Record number of instructions fetched this cycle for distribution. 869 fetchNisnDist.sample(numInst); 870 871 if (status_change) { 872 // Change the fetch stage status if there was a status change. 873 _status = updateFetchStatus(); 874 } 875 876 // If there was activity this cycle, inform the CPU of it. 877 if (wroteToTimeBuffer \|\| cpu->contextSwitch) { 878 DPRINTF(Activity, "Activity this cycle.\n"); 879 880 cpu->activityThisCycle(); 881 } 882 883 // Issue the next I-cache request if possible. 884 for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { 885 if (issuePipelinedIfetch[i]) { 886 pipelineIcacheAccesses(i); 887 } 888 } 889 890 // Reset the number of the instruction we've fetched. 891 numInst = 0; 892} 893 894template <class Impl> 895bool 896DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) 897{ 898 // Update the per thread stall statuses. 899 if (fromDecode->decodeBlock[tid]) { 900 stalls[tid].decode = true; 901 } 902 903 if (fromDecode->decodeUnblock[tid]) { 904 assert(stalls[tid].decode); 905 assert(!fromDecode->decodeBlock[tid]); 906 stalls[tid].decode = false; 907 } 908 909 if (fromRename->renameBlock[tid]) { 910 stalls[tid].rename = true; 911 } 912 913 if (fromRename->renameUnblock[tid]) { 914 assert(stalls[tid].rename); 915 assert(!fromRename->renameBlock[tid]); 916 stalls[tid].rename = false; 917 } 918 919 if (fromIEW->iewBlock[tid]) { 920 stalls[tid].iew = true; 921 } 922 923 if (fromIEW->iewUnblock[tid]) { 924 assert(stalls[tid].iew); 925 assert(!fromIEW->iewBlock[tid]); 926 stalls[tid].iew = false; 927 } 928 929 if (fromCommit->commitBlock[tid]) { 930 stalls[tid].commit = true; 931 } 932 933 if (fromCommit->commitUnblock[tid]) { 934 assert(stalls[tid].commit); 935 assert(!fromCommit->commitBlock[tid]); 936 stalls[tid].commit = false; 937 } 938 939 // Check squash signals from commit. 940 if (fromCommit->commitInfo[tid].squash) { 941 942 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 943 "from commit.\n",tid); 944 // In any case, squash. 945 squash(fromCommit->commitInfo[tid].pc, 946 fromCommit->commitInfo[tid].doneSeqNum, 947 fromCommit->commitInfo[tid].squashInst, tid); 948 949 // If it was a branch mispredict on a control instruction, update the 950 // branch predictor with that instruction, otherwise just kill the 951 // invalid state we generated in after sequence number 952 if (fromCommit->commitInfo[tid].mispredictInst && 953 fromCommit->commitInfo[tid].mispredictInst->isControl()) { 954 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 955 fromCommit->commitInfo[tid].pc, 956 fromCommit->commitInfo[tid].branchTaken, 957 tid); 958 } else { 959 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 960 tid); 961 } 962 963 return true; 964 } else if (fromCommit->commitInfo[tid].doneSeqNum) { 965 // Update the branch predictor if it wasn't a squashed instruction 966 // that was broadcasted. 967 branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); 968 } 969 970 // Check ROB squash signals from commit. 971 if (fromCommit->commitInfo[tid].robSquashing) { 972 DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); 973 974 // Continue to squash. 975 fetchStatus[tid] = Squashing; 976 977 return true; 978 } 979 980 // Check squash signals from decode. 981 if (fromDecode->decodeInfo[tid].squash) { 982 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " 983 "from decode.\n",tid); 984 985 // Update the branch predictor. 986 if (fromDecode->decodeInfo[tid].branchMispredict) { 987 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 988 fromDecode->decodeInfo[tid].nextPC, 989 fromDecode->decodeInfo[tid].branchTaken, 990 tid); 991 } else { 992 branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, 993 tid); 994 } 995 996 if (fetchStatus[tid] != Squashing) { 997 998 DPRINTF(Fetch, "Squashing from decode with PC = %s\n", 999 fromDecode->decodeInfo[tid].nextPC); 1000 // Squash unless we're already squashing 1001 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, 1002 fromDecode->decodeInfo[tid].squashInst, 1003 fromDecode->decodeInfo[tid].doneSeqNum, 1004 tid); 1005 1006 return true; 1007 } 1008 } 1009 1010 if (checkStall(tid) && 1011 fetchStatus[tid] != IcacheWaitResponse && 1012 fetchStatus[tid] != IcacheWaitRetry) { 1013 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); 1014 1015 fetchStatus[tid] = Blocked; 1016 1017 return true; 1018 } 1019 1020 if (fetchStatus[tid] == Blocked \|\| 1021 fetchStatus[tid] == Squashing) { 1022 // Switch status to running if fetch isn't being told to block or 1023 // squash this cycle. 1024 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", 1025 tid); 1026 1027 fetchStatus[tid] = Running; 1028 1029 return true; 1030 } 1031 1032 // If we've reached this point, we have not gotten any signals that 1033 // cause fetch to change its status. Fetch remains the same as before. 1034 return false; 1035} 1036 1037template<class Impl> 1038typename Impl::DynInstPtr 1039DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, 1040 StaticInstPtr curMacroop, TheISA::PCState thisPC, 1041 TheISA::PCState nextPC, bool trace) 1042{ 1043 // Get a sequence number. 1044 InstSeqNum seq = cpu->getAndIncrementInstSeq(); 1045 1046 // Create a new DynInst from the instruction fetched. 1047 DynInstPtr instruction = 1048 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); 1049 instruction->setTid(tid); 1050 1051 instruction->setASID(tid); 1052 1053 instruction->setThreadState(cpu->thread[tid]); 1054 1055 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " 1056 "[sn:%lli].\n", tid, thisPC.instAddr(), 1057 thisPC.microPC(), seq); 1058 1059 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, 1060 instruction->staticInst-> 1061 disassemble(thisPC.instAddr())); 1062 1063#if TRACING_ON 1064 if (trace) { 1065 instruction->traceData = 1066 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), 1067 instruction->staticInst, thisPC, curMacroop); 1068 } 1069#else 1070 instruction->traceData = NULL; 1071#endif 1072 1073 // Add instruction to the CPU's list of instructions. 1074 instruction->setInstListIt(cpu->addInst(instruction)); 1075 1076 // Write the instruction to the first slot in the queue 1077 // that heads to decode. 1078 assert(numInst < fetchWidth); 1079 toDecode->insts[toDecode->size++] = instruction; 1080 1081 // Keep track of if we can take an interrupt at this boundary 1082 delayedCommit[tid] = instruction->isDelayedCommit(); 1083 1084 return instruction; 1085} 1086 1087template<class Impl> 1088void 1089DefaultFetch<Impl>::fetch(bool &status_change) 1090{ 1091 ////////////////////////////////////////// 1092 // Start actual fetch 1093 ////////////////////////////////////////// 1094 ThreadID tid = getFetchingThread(fetchPolicy); 1095 1096 if (tid == InvalidThreadID \|\| drainPending) { 1097 // Breaks looping condition in tick() 1098 threadFetched = numFetchingThreads; 1099 1100 if (numThreads == 1) { // @todo Per-thread stats 1101 profileStall(0); 1102 } 1103 1104 return; 1105 } 1106 1107 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1108 1109 // The current PC. 1110 TheISA::PCState thisPC = pc[tid]; 1111 1112 Addr pcOffset = fetchOffset[tid]; 1113 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1114 1115 bool inRom = isRomMicroPC(thisPC.microPC()); 1116 1117 // If returning from the delay of a cache miss, then update the status 1118 // to running, otherwise do the cache access. Possibly move this up 1119 // to tick() function. 1120 if (fetchStatus[tid] == IcacheAccessComplete) { 1121 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); 1122 1123 fetchStatus[tid] = Running; 1124 status_change = true; 1125 } else if (fetchStatus[tid] == Running) { 1126 // Align the fetch PC so its at the start of a cache block. 1127 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1128 1129 // If buffer is no longer valid or fetchAddr has moved to point 1130 // to the next cache block, AND we have no remaining ucode 1131 // from a macro-op, then start fetch from icache. 1132 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) 1133 && !inRom && !macroop[tid]) { 1134 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " 1135 "instruction, starting at PC %s.\n", tid, thisPC); 1136 1137 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1138 1139 if (fetchStatus[tid] == IcacheWaitResponse) 1140 ++icacheStallCycles; 1141 else if (fetchStatus[tid] == ItlbWait) 1142 ++fetchTlbCycles; 1143 else 1144 ++fetchMiscStallCycles; 1145 return; 1146 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid]) 1147 \|\| isSwitchedOut()) { 1148 // Stall CPU if an interrupt is posted and we're not issuing 1149 // an delayed commit micro-op currently (delayed commit instructions 1150 // are not interruptable by interrupts, only faults) 1151 ++fetchMiscStallCycles; 1152 DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid); 1153 return; 1154 } 1155 } else { 1156 if (fetchStatus[tid] == Idle) { 1157 ++fetchIdleCycles; 1158 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); 1159 } 1160 1161 // Status is Idle, so fetch should do nothing. 1162 return; 1163 } 1164 1165 ++fetchCycles; 1166 1167 TheISA::PCState nextPC = thisPC; 1168 1169 StaticInstPtr staticInst = NULL; 1170 StaticInstPtr curMacroop = macroop[tid]; 1171 1172 // If the read of the first instruction was successful, then grab the 1173 // instructions from the rest of the cache line and put them into the 1174 // queue heading to decode. 1175 1176 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " 1177 "decode.\n", tid); 1178 1179 // Need to keep track of whether or not a predicted branch 1180 // ended this fetch block. 1181 bool predictedBranch = false; 1182 1183 TheISA::MachInst cacheInsts = 1184* reinterpret_cast<TheISA::MachInst >(cacheData[tid]); 1185* 1186 const unsigned numInsts = cacheBlkSize / instSize; 1187 unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1188 1189 // Loop through instruction memory from the cache. 1190 // Keep issuing while fetchWidth is available and branch is not 1191 // predicted taken 1192 while (numInst < fetchWidth && !predictedBranch) { 1193 1194 // We need to process more memory if we aren't going to get a 1195 // StaticInst from the rom, the current macroop, or what's already 1196 // in the predecoder. 1197 bool needMem = !inRom && !curMacroop && !predecoder.extMachInstReady(); 1198 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1199 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1200 1201 if (needMem) { 1202 // If buffer is no longer valid or fetchAddr has moved to point 1203 // to the next cache block then start fetch from icache. 1204 if (!cacheDataValid[tid] \|\| block_PC != cacheDataPC[tid]) 1205 break; 1206 1207 if (blkOffset >= numInsts) { 1208 // We need to process more memory, but we've run out of the 1209 // current block. 1210 break; 1211 } 1212 1213 if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { 1214 // Walk past any annulled delay slot instructions. 1215 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; 1216 while (fetchAddr != pcAddr && blkOffset < numInsts) { 1217 blkOffset++; 1218 fetchAddr += instSize; 1219 } 1220 if (blkOffset >= numInsts) 1221 break; 1222 } 1223 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); 1224 1225 predecoder.setTC(cpu->thread[tid]->getTC()); 1226 predecoder.moreBytes(thisPC, fetchAddr, inst); 1227 1228 if (predecoder.needMoreBytes()) { 1229 blkOffset++; 1230 fetchAddr += instSize; 1231 pcOffset += instSize; 1232 } 1233 } 1234 1235 // Extract as many instructions and/or microops as we can from 1236 // the memory we've processed so far. 1237 do { 1238 if (!(curMacroop \|\| inRom)) { 1239 if (predecoder.extMachInstReady()) { 1240 ExtMachInst extMachInst = 1241 predecoder.getExtMachInst(thisPC); 1242 staticInst = 1243 decoder.decode(extMachInst, thisPC.instAddr()); 1244 1245 // Increment stat of fetched instructions. 1246 ++fetchedInsts; 1247 1248 if (staticInst->isMacroop()) { 1249 curMacroop = staticInst; 1250 } else { 1251 pcOffset = 0; 1252 } 1253 } else { 1254 // We need more bytes for this instruction so blkOffset and 1255 // pcOffset will be updated 1256 break; 1257 } 1258 } 1259 // Whether we're moving to a new macroop because we're at the 1260 // end of the current one, or the branch predictor incorrectly 1261 // thinks we are... 1262 bool newMacro = false; 1263 if (curMacroop \|\| inRom) { 1264 if (inRom) { 1265 staticInst = cpu->microcodeRom.fetchMicroop( 1266 thisPC.microPC(), curMacroop); 1267 } else { 1268 staticInst = curMacroop->fetchMicroop(thisPC.microPC()); 1269 } 1270 newMacro \|= staticInst->isLastMicroop(); 1271 } 1272 1273 DynInstPtr instruction = 1274 buildInst(tid, staticInst, curMacroop, 1275 thisPC, nextPC, true); 1276 1277 numInst++; 1278 1279#if TRACING_ON 1280 instruction->fetchTick = curTick(); 1281#endif 1282 1283 nextPC = thisPC; 1284 1285 // If we're branching after this instruction, quite fetching 1286 // from the same block then. 1287 predictedBranch \|= thisPC.branching(); 1288 predictedBranch \|= 1289 lookupAndUpdateNextPC(instruction, nextPC); 1290 if (predictedBranch) { 1291 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); 1292 } 1293 1294 newMacro \|= thisPC.instAddr() != nextPC.instAddr(); 1295 1296 // Move to the next instruction, unless we have a branch. 1297 thisPC = nextPC; 1298 inRom = isRomMicroPC(thisPC.microPC()); 1299 1300 if (newMacro) { 1301 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; 1302 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; 1303 pcOffset = 0; 1304 curMacroop = NULL; 1305 } 1306 1307 if (instruction->isQuiesce()) { 1308 DPRINTF(Fetch, 1309 "Quiesce instruction encountered, halting fetch!"); 1310 fetchStatus[tid] = QuiescePending; 1311 status_change = true; 1312 break; 1313 } 1314 } while ((curMacroop \|\| predecoder.extMachInstReady()) && 1315 numInst < fetchWidth); 1316 } 1317 1318 if (predictedBranch) { 1319 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " 1320 "instruction encountered.\n", tid); 1321 } else if (numInst >= fetchWidth) { 1322 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " 1323 "for this cycle.\n", tid); 1324 } else if (blkOffset >= cacheBlkSize) { 1325 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " 1326 "block.\n", tid); 1327 } 1328 1329 macroop[tid] = curMacroop; 1330 fetchOffset[tid] = pcOffset; 1331 1332 if (numInst > 0) { 1333 wroteToTimeBuffer = true; 1334 } 1335 1336 pc[tid] = thisPC; 1337 1338 // pipeline a fetch if we're crossing a cache boundary and not in 1339 // a state that would preclude fetching 1340 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1341 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1342 issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && 1343 fetchStatus[tid] != IcacheWaitResponse && 1344 fetchStatus[tid] != ItlbWait && 1345 fetchStatus[tid] != IcacheWaitRetry && 1346 fetchStatus[tid] != QuiescePending && 1347 !curMacroop; 1348} 1349 1350template<class Impl> 1351void 1352DefaultFetch<Impl>::recvRetry() 1353{ 1354 if (retryPkt != NULL) { 1355 assert(cacheBlocked); 1356 assert(retryTid != InvalidThreadID); 1357 assert(fetchStatus[retryTid] == IcacheWaitRetry); 1358
1359 if (cpu->getInstPort().sendTiming(retryPkt)) {	1359 if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1360 fetchStatus[retryTid] = IcacheWaitResponse; 1361 retryPkt = NULL; 1362 retryTid = InvalidThreadID; 1363 cacheBlocked = false; 1364 } 1365 } else { 1366 assert(retryTid == InvalidThreadID); 1367 // Access has been squashed since it was sent out. Just clear 1368 // the cache being blocked. 1369 cacheBlocked = false; 1370 } 1371} 1372 1373/////////////////////////////////////// 1374// // 1375// SMT FETCH POLICY MAINTAINED HERE // 1376// // 1377/////////////////////////////////////// 1378template<class Impl> 1379ThreadID 1380DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1381{ 1382 if (numThreads > 1) { 1383 switch (fetch_priority) { 1384 1385 case SingleThread: 1386 return 0; 1387 1388 case RoundRobin: 1389 return roundRobin(); 1390 1391 case IQ: 1392 return iqCount(); 1393 1394 case LSQ: 1395 return lsqCount(); 1396 1397 case Branch: 1398 return branchCount(); 1399 1400 default: 1401 return InvalidThreadID; 1402 } 1403 } else { 1404 list<ThreadID>::iterator thread = activeThreads->begin(); 1405 if (thread == activeThreads->end()) { 1406 return InvalidThreadID; 1407 } 1408 1409 ThreadID tid = thread; 1410* 1411 if (fetchStatus[tid] == Running \|\| 1412 fetchStatus[tid] == IcacheAccessComplete \|\| 1413 fetchStatus[tid] == Idle) { 1414 return tid; 1415 } else { 1416 return InvalidThreadID; 1417 } 1418 } 1419} 1420 1421 1422template<class Impl> 1423ThreadID 1424DefaultFetch<Impl>::roundRobin() 1425{ 1426 list<ThreadID>::iterator pri_iter = priorityList.begin(); 1427 list<ThreadID>::iterator end = priorityList.end(); 1428 1429 ThreadID high_pri; 1430 1431 while (pri_iter != end) { 1432 high_pri = pri_iter; 1433* 1434 assert(high_pri <= numThreads); 1435 1436 if (fetchStatus[high_pri] == Running \|\| 1437 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1438 fetchStatus[high_pri] == Idle) { 1439 1440 priorityList.erase(pri_iter); 1441 priorityList.push_back(high_pri); 1442 1443 return high_pri; 1444 } 1445 1446 pri_iter++; 1447 } 1448 1449 return InvalidThreadID; 1450} 1451 1452template<class Impl> 1453ThreadID 1454DefaultFetch<Impl>::iqCount() 1455{ 1456 std::priority_queue<unsigned> PQ; 1457 std::map<unsigned, ThreadID> threadMap; 1458 1459 list<ThreadID>::iterator threads = activeThreads->begin(); 1460 list<ThreadID>::iterator end = activeThreads->end(); 1461 1462 while (threads != end) { 1463 ThreadID tid = threads++; 1464* unsigned iqCount = fromIEW->iewInfo[tid].iqCount; 1465 1466 PQ.push(iqCount); 1467 threadMap[iqCount] = tid; 1468 } 1469 1470 while (!PQ.empty()) { 1471 ThreadID high_pri = threadMap[PQ.top()]; 1472 1473 if (fetchStatus[high_pri] == Running \|\| 1474 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1475 fetchStatus[high_pri] == Idle) 1476 return high_pri; 1477 else 1478 PQ.pop(); 1479 1480 } 1481 1482 return InvalidThreadID; 1483} 1484 1485template<class Impl> 1486ThreadID 1487DefaultFetch<Impl>::lsqCount() 1488{ 1489 std::priority_queue<unsigned> PQ; 1490 std::map<unsigned, ThreadID> threadMap; 1491 1492 list<ThreadID>::iterator threads = activeThreads->begin(); 1493 list<ThreadID>::iterator end = activeThreads->end(); 1494 1495 while (threads != end) { 1496 ThreadID tid = threads++; 1497* unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; 1498 1499 PQ.push(ldstqCount); 1500 threadMap[ldstqCount] = tid; 1501 } 1502 1503 while (!PQ.empty()) { 1504 ThreadID high_pri = threadMap[PQ.top()]; 1505 1506 if (fetchStatus[high_pri] == Running \|\| 1507 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1508 fetchStatus[high_pri] == Idle) 1509 return high_pri; 1510 else 1511 PQ.pop(); 1512 } 1513 1514 return InvalidThreadID; 1515} 1516 1517template<class Impl> 1518ThreadID 1519DefaultFetch<Impl>::branchCount() 1520{ 1521#if 0 1522 list<ThreadID>::iterator thread = activeThreads->begin(); 1523 assert(thread != activeThreads->end()); 1524 ThreadID tid = thread; 1525#endif 1526* 1527 panic("Branch Count Fetch policy unimplemented\n"); 1528 return InvalidThreadID; 1529} 1530 1531template<class Impl> 1532void 1533DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) 1534{ 1535 if (!issuePipelinedIfetch[tid]) { 1536 return; 1537 } 1538 1539 // The next PC to access. 1540 TheISA::PCState thisPC = pc[tid]; 1541 1542 if (isRomMicroPC(thisPC.microPC())) { 1543 return; 1544 } 1545 1546 Addr pcOffset = fetchOffset[tid]; 1547 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1548 1549 // Align the fetch PC so its at the start of a cache block. 1550 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1551 1552 // Unless buffer already got the block, fetch it from icache. 1553 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { 1554 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " 1555 "starting at PC %s.\n", tid, thisPC); 1556 1557 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1558 } 1559} 1560 1561template<class Impl> 1562void 1563DefaultFetch<Impl>::profileStall(ThreadID tid) { 1564 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1565 1566 // @todo Per-thread stats 1567 1568 if (drainPending) { 1569 ++fetchPendingDrainCycles; 1570 DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); 1571 } else if (activeThreads->empty()) { 1572 ++fetchNoActiveThreadStallCycles; 1573 DPRINTF(Fetch, "Fetch has no active thread!\n"); 1574 } else if (fetchStatus[tid] == Blocked) { 1575 ++fetchBlockedCycles; 1576 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1577 } else if (fetchStatus[tid] == Squashing) { 1578 ++fetchSquashCycles; 1579 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1580 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1581 ++icacheStallCycles; 1582 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", 1583 tid); 1584 } else if (fetchStatus[tid] == ItlbWait) { 1585 ++fetchTlbCycles; 1586 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " 1587 "finish!\n", tid); 1588 } else if (fetchStatus[tid] == TrapPending) { 1589 ++fetchPendingTrapStallCycles; 1590 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", 1591 tid); 1592 } else if (fetchStatus[tid] == QuiescePending) { 1593 ++fetchPendingQuiesceStallCycles; 1594 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " 1595 "instruction!\n", tid); 1596 } else if (fetchStatus[tid] == IcacheWaitRetry) { 1597 ++fetchIcacheWaitRetryStallCycles; 1598 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", 1599 tid); 1600 } else if (fetchStatus[tid] == NoGoodAddr) { 1601 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", 1602 tid); 1603 } else { 1604 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", 1605 tid, fetchStatus[tid]); 1606 } 1607}	1360 fetchStatus[retryTid] = IcacheWaitResponse; 1361 retryPkt = NULL; 1362 retryTid = InvalidThreadID; 1363 cacheBlocked = false; 1364 } 1365 } else { 1366 assert(retryTid == InvalidThreadID); 1367 // Access has been squashed since it was sent out. Just clear 1368 // the cache being blocked. 1369 cacheBlocked = false; 1370 } 1371} 1372 1373/////////////////////////////////////// 1374// // 1375// SMT FETCH POLICY MAINTAINED HERE // 1376// // 1377/////////////////////////////////////// 1378template<class Impl> 1379ThreadID 1380DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority) 1381{ 1382 if (numThreads > 1) { 1383 switch (fetch_priority) { 1384 1385 case SingleThread: 1386 return 0; 1387 1388 case RoundRobin: 1389 return roundRobin(); 1390 1391 case IQ: 1392 return iqCount(); 1393 1394 case LSQ: 1395 return lsqCount(); 1396 1397 case Branch: 1398 return branchCount(); 1399 1400 default: 1401 return InvalidThreadID; 1402 } 1403 } else { 1404 list<ThreadID>::iterator thread = activeThreads->begin(); 1405 if (thread == activeThreads->end()) { 1406 return InvalidThreadID; 1407 } 1408 1409 ThreadID tid = thread; 1410* 1411 if (fetchStatus[tid] == Running \|\| 1412 fetchStatus[tid] == IcacheAccessComplete \|\| 1413 fetchStatus[tid] == Idle) { 1414 return tid; 1415 } else { 1416 return InvalidThreadID; 1417 } 1418 } 1419} 1420 1421 1422template<class Impl> 1423ThreadID 1424DefaultFetch<Impl>::roundRobin() 1425{ 1426 list<ThreadID>::iterator pri_iter = priorityList.begin(); 1427 list<ThreadID>::iterator end = priorityList.end(); 1428 1429 ThreadID high_pri; 1430 1431 while (pri_iter != end) { 1432 high_pri = pri_iter; 1433* 1434 assert(high_pri <= numThreads); 1435 1436 if (fetchStatus[high_pri] == Running \|\| 1437 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1438 fetchStatus[high_pri] == Idle) { 1439 1440 priorityList.erase(pri_iter); 1441 priorityList.push_back(high_pri); 1442 1443 return high_pri; 1444 } 1445 1446 pri_iter++; 1447 } 1448 1449 return InvalidThreadID; 1450} 1451 1452template<class Impl> 1453ThreadID 1454DefaultFetch<Impl>::iqCount() 1455{ 1456 std::priority_queue<unsigned> PQ; 1457 std::map<unsigned, ThreadID> threadMap; 1458 1459 list<ThreadID>::iterator threads = activeThreads->begin(); 1460 list<ThreadID>::iterator end = activeThreads->end(); 1461 1462 while (threads != end) { 1463 ThreadID tid = threads++; 1464* unsigned iqCount = fromIEW->iewInfo[tid].iqCount; 1465 1466 PQ.push(iqCount); 1467 threadMap[iqCount] = tid; 1468 } 1469 1470 while (!PQ.empty()) { 1471 ThreadID high_pri = threadMap[PQ.top()]; 1472 1473 if (fetchStatus[high_pri] == Running \|\| 1474 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1475 fetchStatus[high_pri] == Idle) 1476 return high_pri; 1477 else 1478 PQ.pop(); 1479 1480 } 1481 1482 return InvalidThreadID; 1483} 1484 1485template<class Impl> 1486ThreadID 1487DefaultFetch<Impl>::lsqCount() 1488{ 1489 std::priority_queue<unsigned> PQ; 1490 std::map<unsigned, ThreadID> threadMap; 1491 1492 list<ThreadID>::iterator threads = activeThreads->begin(); 1493 list<ThreadID>::iterator end = activeThreads->end(); 1494 1495 while (threads != end) { 1496 ThreadID tid = threads++; 1497* unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; 1498 1499 PQ.push(ldstqCount); 1500 threadMap[ldstqCount] = tid; 1501 } 1502 1503 while (!PQ.empty()) { 1504 ThreadID high_pri = threadMap[PQ.top()]; 1505 1506 if (fetchStatus[high_pri] == Running \|\| 1507 fetchStatus[high_pri] == IcacheAccessComplete \|\| 1508 fetchStatus[high_pri] == Idle) 1509 return high_pri; 1510 else 1511 PQ.pop(); 1512 } 1513 1514 return InvalidThreadID; 1515} 1516 1517template<class Impl> 1518ThreadID 1519DefaultFetch<Impl>::branchCount() 1520{ 1521#if 0 1522 list<ThreadID>::iterator thread = activeThreads->begin(); 1523 assert(thread != activeThreads->end()); 1524 ThreadID tid = thread; 1525#endif 1526* 1527 panic("Branch Count Fetch policy unimplemented\n"); 1528 return InvalidThreadID; 1529} 1530 1531template<class Impl> 1532void 1533DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) 1534{ 1535 if (!issuePipelinedIfetch[tid]) { 1536 return; 1537 } 1538 1539 // The next PC to access. 1540 TheISA::PCState thisPC = pc[tid]; 1541 1542 if (isRomMicroPC(thisPC.microPC())) { 1543 return; 1544 } 1545 1546 Addr pcOffset = fetchOffset[tid]; 1547 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; 1548 1549 // Align the fetch PC so its at the start of a cache block. 1550 Addr block_PC = icacheBlockAlignPC(fetchAddr); 1551 1552 // Unless buffer already got the block, fetch it from icache. 1553 if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { 1554 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " 1555 "starting at PC %s.\n", tid, thisPC); 1556 1557 fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); 1558 } 1559} 1560 1561template<class Impl> 1562void 1563DefaultFetch<Impl>::profileStall(ThreadID tid) { 1564 DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); 1565 1566 // @todo Per-thread stats 1567 1568 if (drainPending) { 1569 ++fetchPendingDrainCycles; 1570 DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); 1571 } else if (activeThreads->empty()) { 1572 ++fetchNoActiveThreadStallCycles; 1573 DPRINTF(Fetch, "Fetch has no active thread!\n"); 1574 } else if (fetchStatus[tid] == Blocked) { 1575 ++fetchBlockedCycles; 1576 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); 1577 } else if (fetchStatus[tid] == Squashing) { 1578 ++fetchSquashCycles; 1579 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); 1580 } else if (fetchStatus[tid] == IcacheWaitResponse) { 1581 ++icacheStallCycles; 1582 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", 1583 tid); 1584 } else if (fetchStatus[tid] == ItlbWait) { 1585 ++fetchTlbCycles; 1586 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " 1587 "finish!\n", tid); 1588 } else if (fetchStatus[tid] == TrapPending) { 1589 ++fetchPendingTrapStallCycles; 1590 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", 1591 tid); 1592 } else if (fetchStatus[tid] == QuiescePending) { 1593 ++fetchPendingQuiesceStallCycles; 1594 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " 1595 "instruction!\n", tid); 1596 } else if (fetchStatus[tid] == IcacheWaitRetry) { 1597 ++fetchIcacheWaitRetryStallCycles; 1598 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", 1599 tid); 1600 } else if (fetchStatus[tid] == NoGoodAddr) { 1601 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", 1602 tid); 1603 } else { 1604 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", 1605 tid, fetchStatus[tid]); 1606 } 1607}