1/* 2 * Copyright (c) 2010-2012, 2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 * Korey Sewell 42 */ 43 44#ifndef __CPU_O3_FETCH_HH__ 45#define __CPU_O3_FETCH_HH__ 46 47#include "arch/decoder.hh" 48#include "arch/utility.hh" 49#include "base/statistics.hh" 50#include "config/the_isa.hh" 51#include "cpu/pc_event.hh" 52#include "cpu/pred/bpred_unit.hh" 53#include "cpu/timebuf.hh" 54#include "cpu/translation.hh" 55#include "enums/FetchPolicy.hh" 56#include "mem/packet.hh" 57#include "mem/port.hh" 58#include "sim/eventq.hh" 59#include "sim/probe/probe.hh" 60 61struct DerivO3CPUParams; 62template <class Impl> 63class FullO3CPU; 64 65/** 66 * DefaultFetch class handles both single threaded and SMT fetch. Its 67 * width is specified by the parameters; each cycle it tries to fetch 68 * that many instructions. It supports using a branch predictor to 69 * predict direction and targets. 70 * It supports the idling functionality of the CPU by indicating to 71 * the CPU when it is active and inactive. 72 */ 73template <class Impl> 74class DefaultFetch 75{ 76 public: 77 /** Typedefs from Impl. */ 78 typedef typename Impl::CPUPol CPUPol; 79 typedef typename Impl::DynInst DynInst; 80 typedef typename Impl::DynInstPtr DynInstPtr; 81 typedef typename Impl::O3CPU O3CPU; 82 83 /** Typedefs from the CPU policy. */ 84 typedef typename CPUPol::FetchStruct FetchStruct; 85 typedef typename CPUPol::TimeStruct TimeStruct; 86 87 /** Typedefs from ISA. */ 88 typedef TheISA::MachInst MachInst; 89 90 /** 91 * IcachePort class for instruction fetch. 92 */ 93 class IcachePort : public MasterPort 94 { 95 protected: 96 /** Pointer to fetch. */ 97 DefaultFetch<Impl> *fetch; 98 99 public: 100 /** Default constructor. */ 101 IcachePort(DefaultFetch<Impl> *_fetch, FullO3CPU<Impl>* _cpu) 102 : MasterPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch) 103 { } 104 105 protected: 106 107 /** Timing version of receive. Handles setting fetch to the 108 * proper status to start fetching. */ 109 virtual bool recvTimingResp(PacketPtr pkt); 110 111 /** Handles doing a retry of a failed fetch. */ 112 virtual void recvReqRetry(); 113 }; 114 115 class FetchTranslation : public BaseTLB::Translation 116 { 117 protected: 118 DefaultFetch<Impl> *fetch; 119 120 public: 121 FetchTranslation(DefaultFetch<Impl> *_fetch) 122 : fetch(_fetch) 123 {} 124 125 void 126 markDelayed() 127 {} 128 129 void 130 finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, 131 BaseTLB::Mode mode) 132 { 133 assert(mode == BaseTLB::Execute); 134 fetch->finishTranslation(fault, req); 135 delete this; 136 } 137 }; 138 139 private: 140 /* Event to delay delivery of a fetch translation result in case of 141 * a fault and the nop to carry the fault cannot be generated 142 * immediately */ 143 class FinishTranslationEvent : public Event 144 { 145 private: 146 DefaultFetch<Impl> *fetch; 147 Fault fault; 148 RequestPtr req; 149 150 public: 151 FinishTranslationEvent(DefaultFetch<Impl> *_fetch) 152 : fetch(_fetch), req(nullptr) 153 {} 154 155 void setFault(Fault _fault) 156 { 157 fault = _fault; 158 } 159 160 void setReq(const RequestPtr &_req) 161 { 162 req = _req; 163 } 164 165 /** Process the delayed finish translation */ 166 void process() 167 { 168 assert(fetch->numInst < fetch->fetchWidth); 169 fetch->finishTranslation(fault, req); 170 } 171 172 const char *description() const 173 { 174 return "FullO3CPU FetchFinishTranslation"; 175 } 176 }; 177 178 public: 179 /** Overall fetch status. Used to determine if the CPU can 180 * deschedule itsef due to a lack of activity. 181 */ 182 enum FetchStatus { 183 Active, 184 Inactive 185 }; 186 187 /** Individual thread status. */ 188 enum ThreadStatus { 189 Running, 190 Idle, 191 Squashing, 192 Blocked, 193 Fetching, 194 TrapPending, 195 QuiescePending, 196 ItlbWait, 197 IcacheWaitResponse, 198 IcacheWaitRetry, 199 IcacheAccessComplete, 200 NoGoodAddr 201 }; 202 203 private: 204 /** Fetch status. */ 205 FetchStatus _status; 206 207 /** Per-thread status. */ 208 ThreadStatus fetchStatus[Impl::MaxThreads]; 209 210 /** Fetch policy. */ 211 FetchPolicy fetchPolicy; 212 213 /** List that has the threads organized by priority. */ 214 std::list<ThreadID> priorityList; 215 216 /** Probe points. */ 217 ProbePointArg<DynInstPtr> *ppFetch; 218 /** To probe when a fetch request is successfully sent. */ 219 ProbePointArg<RequestPtr> *ppFetchRequestSent; 220 221 public: 222 /** DefaultFetch constructor. */ 223 DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params); 224 225 /** Returns the name of fetch. */ 226 std::string name() const; 227 228 /** Registers statistics. */ 229 void regStats(); 230 231 /** Registers probes. */ 232 void regProbePoints(); 233 234 /** Sets the main backwards communication time buffer pointer. */ 235 void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); 236 237 /** Sets pointer to list of active threads. */ 238 void setActiveThreads(std::list<ThreadID> *at_ptr); 239 240 /** Sets pointer to time buffer used to communicate to the next stage. */ 241 void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); 242 243 /** Initialize stage. */ 244 void startupStage(); 245 246 /** Clear all thread-specific states*/ 247 void clearStates(ThreadID tid); 248 249 /** Handles retrying the fetch access. */ 250 void recvReqRetry(); 251 252 /** Processes cache completion event. */ 253 void processCacheCompletion(PacketPtr pkt); 254 255 /** Resume after a drain. */ 256 void drainResume(); 257 258 /** Perform sanity checks after a drain. */ 259 void drainSanityCheck() const; 260 261 /** Has the stage drained? */ 262 bool isDrained() const; 263 264 /** Takes over from another CPU's thread. */ 265 void takeOverFrom(); 266 267 /** 268 * Stall the fetch stage after reaching a safe drain point. 269 * 270 * The CPU uses this method to stop fetching instructions from a 271 * thread that has been drained. The drain stall is different from 272 * all other stalls in that it is signaled instantly from the 273 * commit stage (without the normal communication delay) when it 274 * has reached a safe point to drain from. 275 */ 276 void drainStall(ThreadID tid); 277 278 /** Tells fetch to wake up from a quiesce instruction. */ 279 void wakeFromQuiesce(); 280 281 /** For priority-based fetch policies, need to keep update priorityList */ 282 void deactivateThread(ThreadID tid); 283 private: 284 /** Reset this pipeline stage */ 285 void resetStage(); 286 287 /** Changes the status of this stage to active, and indicates this 288 * to the CPU. 289 */ 290 inline void switchToActive(); 291 292 /** Changes the status of this stage to inactive, and indicates 293 * this to the CPU. 294 */ 295 inline void switchToInactive(); 296 297 /** 298 * Looks up in the branch predictor to see if the next PC should be 299 * either next PC+=MachInst or a branch target. 300 * @param next_PC Next PC variable passed in by reference. It is 301 * expected to be set to the current PC; it will be updated with what 302 * the next PC will be. 303 * @param next_NPC Used for ISAs which use delay slots. 304 * @return Whether or not a branch was predicted as taken. 305 */ 306 bool lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &pc); 307 308 /** 309 * Fetches the cache line that contains the fetch PC. Returns any 310 * fault that happened. Puts the data into the class variable 311 * fetchBuffer, which may not hold the entire fetched cache line. 312 * @param vaddr The memory address that is being fetched from. 313 * @param ret_fault The fault reference that will be set to the result of 314 * the icache access. 315 * @param tid Thread id. 316 * @param pc The actual PC of the current instruction. 317 * @return Any fault that occured. 318 */ 319 bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc); 320 void finishTranslation(const Fault &fault, const RequestPtr &mem_req); 321 322 323 /** Check if an interrupt is pending and that we need to handle 324 */ 325 bool 326 checkInterrupt(Addr pc) 327 { 328 return (interruptPending && (THE_ISA != ALPHA_ISA || !(pc & 0x3))); 329 } 330 331 /** Squashes a specific thread and resets the PC. */ 332 inline void doSquash(const TheISA::PCState &newPC, 333 const DynInstPtr squashInst, ThreadID tid); 334 335 /** Squashes a specific thread and resets the PC. Also tells the CPU to 336 * remove any instructions between fetch and decode that should be sqaushed. 337 */ 338 void squashFromDecode(const TheISA::PCState &newPC, 339 const DynInstPtr squashInst, 340 const InstSeqNum seq_num, ThreadID tid); 341 342 /** Checks if a thread is stalled. */ 343 bool checkStall(ThreadID tid) const; 344 345 /** Updates overall fetch stage status; to be called at the end of each 346 * cycle. */ 347 FetchStatus updateFetchStatus(); 348 349 public: 350 /** Squashes a specific thread and resets the PC. Also tells the CPU to 351 * remove any instructions that are not in the ROB. The source of this 352 * squash should be the commit stage. 353 */ 354 void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num, 355 DynInstPtr squashInst, ThreadID tid); 356 357 /** Ticks the fetch stage, processing all inputs signals and fetching 358 * as many instructions as possible. 359 */ 360 void tick(); 361 362 /** Checks all input signals and updates the status as necessary. 363 * @return: Returns if the status has changed due to input signals. 364 */ 365 bool checkSignalsAndUpdate(ThreadID tid); 366 367 /** Does the actual fetching of instructions and passing them on to the 368 * next stage. 369 * @param status_change fetch() sets this variable if there was a status 370 * change (ie switching to IcacheMissStall). 371 */ 372 void fetch(bool &status_change); 373 374 /** Align a PC to the start of a fetch buffer block. */ 375 Addr fetchBufferAlignPC(Addr addr) 376 { 377 return (addr & ~(fetchBufferMask)); 378 } 379 380 /** The decoder. */ 381 TheISA::Decoder *decoder[Impl::MaxThreads]; 382 383 MasterPort &getInstPort() { return icachePort; } 384 385 private: 386 DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, 387 StaticInstPtr curMacroop, TheISA::PCState thisPC, 388 TheISA::PCState nextPC, bool trace); 389 390 /** Returns the appropriate thread to fetch, given the fetch policy. */ 391 ThreadID getFetchingThread(); 392 393 /** Returns the appropriate thread to fetch using a round robin policy. */ 394 ThreadID roundRobin(); 395 396 /** Returns the appropriate thread to fetch using the IQ count policy. */ 397 ThreadID iqCount(); 398 399 /** Returns the appropriate thread to fetch using the LSQ count policy. */ 400 ThreadID lsqCount(); 401 402 /** Returns the appropriate thread to fetch using the branch count 403 * policy. */ 404 ThreadID branchCount(); 405 406 /** Pipeline the next I-cache access to the current one. */ 407 void pipelineIcacheAccesses(ThreadID tid); 408 409 /** Profile the reasons of fetch stall. */ 410 void profileStall(ThreadID tid); 411 412 private: 413 /** Pointer to the O3CPU. */ 414 O3CPU *cpu; 415 416 /** Time buffer interface. */ 417 TimeBuffer<TimeStruct> *timeBuffer; 418 419 /** Wire to get decode's information from backwards time buffer. */ 420 typename TimeBuffer<TimeStruct>::wire fromDecode; 421 422 /** Wire to get rename's information from backwards time buffer. */ 423 typename TimeBuffer<TimeStruct>::wire fromRename; 424 425 /** Wire to get iew's information from backwards time buffer. */ 426 typename TimeBuffer<TimeStruct>::wire fromIEW; 427 428 /** Wire to get commit's information from backwards time buffer. */ 429 typename TimeBuffer<TimeStruct>::wire fromCommit; 430 431 //Might be annoying how this name is different than the queue. 432 /** Wire used to write any information heading to decode. */ 433 typename TimeBuffer<FetchStruct>::wire toDecode; 434 435 /** BPredUnit. */ 436 BPredUnit *branchPred; 437 438 TheISA::PCState pc[Impl::MaxThreads]; 439 440 Addr fetchOffset[Impl::MaxThreads]; 441 442 StaticInstPtr macroop[Impl::MaxThreads]; 443 444 /** Can the fetch stage redirect from an interrupt on this instruction? */ 445 bool delayedCommit[Impl::MaxThreads]; 446 447 /** Memory request used to access cache. */ 448 RequestPtr memReq[Impl::MaxThreads]; 449 450 /** Variable that tracks if fetch has written to the time buffer this 451 * cycle. Used to tell CPU if there is activity this cycle. 452 */ 453 bool wroteToTimeBuffer; 454 455 /** Tracks how many instructions has been fetched this cycle. */ 456 int numInst; 457 458 /** Source of possible stalls. */ 459 struct Stalls { 460 bool decode; 461 bool drain; 462 }; 463 464 /** Tracks which stages are telling fetch to stall. */ 465 Stalls stalls[Impl::MaxThreads]; 466 467 /** Decode to fetch delay. */ 468 Cycles decodeToFetchDelay; 469 470 /** Rename to fetch delay. */ 471 Cycles renameToFetchDelay; 472 473 /** IEW to fetch delay. */ 474 Cycles iewToFetchDelay; 475 476 /** Commit to fetch delay. */ 477 Cycles commitToFetchDelay; 478 479 /** The width of fetch in instructions. */ 480 unsigned fetchWidth; 481 482 /** The width of decode in instructions. */ 483 unsigned decodeWidth; 484 485 /** Is the cache blocked? If so no threads can access it. */ 486 bool cacheBlocked; 487 488 /** The packet that is waiting to be retried. */ 489 PacketPtr retryPkt; 490 491 /** The thread that is waiting on the cache to tell fetch to retry. */ 492 ThreadID retryTid; 493 494 /** Cache block size. */ 495 unsigned int cacheBlkSize; 496 497 /** The size of the fetch buffer in bytes. The fetch buffer 498 * itself may be smaller than a cache line. 499 */ 500 unsigned fetchBufferSize; 501 502 /** Mask to align a fetch address to a fetch buffer boundary. */ 503 Addr fetchBufferMask; 504 505 /** The fetch data that is being fetched and buffered. */ 506 uint8_t *fetchBuffer[Impl::MaxThreads]; 507 508 /** The PC of the first instruction loaded into the fetch buffer. */ 509 Addr fetchBufferPC[Impl::MaxThreads]; 510 511 /** The size of the fetch queue in micro-ops */ 512 unsigned fetchQueueSize; 513 514 /** Queue of fetched instructions. Per-thread to prevent HoL blocking. */ 515 std::deque<DynInstPtr> fetchQueue[Impl::MaxThreads]; 516 517 /** Whether or not the fetch buffer data is valid. */ 518 bool fetchBufferValid[Impl::MaxThreads]; 519 520 /** Size of instructions. */ 521 int instSize; 522 523 /** Icache stall statistics. */ 524 Counter lastIcacheStall[Impl::MaxThreads]; 525 526 /** List of Active Threads */ 527 std::list<ThreadID> *activeThreads; 528 529 /** Number of threads. */ 530 ThreadID numThreads; 531 532 /** Number of threads that are actively fetching. */ 533 ThreadID numFetchingThreads; 534 535 /** Thread ID being fetched. */ 536 ThreadID threadFetched; 537 538 /** Checks if there is an interrupt pending. If there is, fetch 539 * must stop once it is not fetching PAL instructions. 540 */ 541 bool interruptPending; 542 543 /** Instruction port. Note that it has to appear after the fetch stage. */ 544 IcachePort icachePort; 545 546 /** Set to true if a pipelined I-cache request should be issued. */ 547 bool issuePipelinedIfetch[Impl::MaxThreads]; 548 549 /** Event used to delay fault generation of translation faults */ 550 FinishTranslationEvent finishTranslationEvent; 551 552 // @todo: Consider making these vectors and tracking on a per thread basis. 553 /** Stat for total number of cycles stalled due to an icache miss. */ 554 Stats::Scalar icacheStallCycles; 555 /** Stat for total number of fetched instructions. */ 556 Stats::Scalar fetchedInsts; 557 /** Total number of fetched branches. */ 558 Stats::Scalar fetchedBranches; 559 /** Stat for total number of predicted branches. */ 560 Stats::Scalar predictedBranches; 561 /** Stat for total number of cycles spent fetching. */ 562 Stats::Scalar fetchCycles; 563 /** Stat for total number of cycles spent squashing. */ 564 Stats::Scalar fetchSquashCycles; 565 /** Stat for total number of cycles spent waiting for translation */ 566 Stats::Scalar fetchTlbCycles; 567 /** Stat for total number of cycles spent blocked due to other stages in 568 * the pipeline. 569 */ 570 Stats::Scalar fetchIdleCycles; 571 /** Total number of cycles spent blocked. */ 572 Stats::Scalar fetchBlockedCycles; 573 /** Total number of cycles spent in any other state. */ 574 Stats::Scalar fetchMiscStallCycles; 575 /** Total number of cycles spent in waiting for drains. */ 576 Stats::Scalar fetchPendingDrainCycles; 577 /** Total number of stall cycles caused by no active threads to run. */ 578 Stats::Scalar fetchNoActiveThreadStallCycles; 579 /** Total number of stall cycles caused by pending traps. */ 580 Stats::Scalar fetchPendingTrapStallCycles; 581 /** Total number of stall cycles caused by pending quiesce instructions. */ 582 Stats::Scalar fetchPendingQuiesceStallCycles; 583 /** Total number of stall cycles caused by I-cache wait retrys. */ 584 Stats::Scalar fetchIcacheWaitRetryStallCycles; 585 /** Stat for total number of fetched cache lines. */ 586 Stats::Scalar fetchedCacheLines; 587 /** Total number of outstanding icache accesses that were dropped 588 * due to a squash. 589 */ 590 Stats::Scalar fetchIcacheSquashes; 591 /** Total number of outstanding tlb accesses that were dropped 592 * due to a squash. 593 */ 594 Stats::Scalar fetchTlbSquashes; 595 /** Distribution of number of instructions fetched each cycle. */ 596 Stats::Distribution fetchNisnDist; 597 /** Rate of how often fetch was idle. */ 598 Stats::Formula idleRate; 599 /** Number of branch fetches per cycle. */ 600 Stats::Formula branchRate; 601 /** Number of instruction fetched per cycle. */ 602 Stats::Formula fetchRate; 603}; 604 605#endif //__CPU_O3_FETCH_HH__ 606