fetch_impl.hh revision 2632:1bb2f91485ea
1/* 2 * Copyright (c) 2004-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29// Remove this later; used only for debugging. 30#define OPCODE(X) (X >> 26) & 0x3f 31 32#include "arch/isa_traits.hh" 33#include "sim/byteswap.hh" 34#include "cpu/exetrace.hh" 35#include "mem/base_mem.hh" 36#include "mem/mem_interface.hh" 37#include "mem/mem_req.hh" 38#include "cpu/o3/fetch.hh" 39 40#include "sim/root.hh" 41 42template<class Impl> 43SimpleFetch<Impl>::CacheCompletionEvent 44::CacheCompletionEvent(SimpleFetch *_fetch) 45 : Event(&mainEventQueue), 46 fetch(_fetch) 47{ 48} 49 50template<class Impl> 51void 52SimpleFetch<Impl>::CacheCompletionEvent::process() 53{ 54 fetch->processCacheCompletion(); 55} 56 57template<class Impl> 58const char * 59SimpleFetch<Impl>::CacheCompletionEvent::description() 60{ 61 return "SimpleFetch cache completion event"; 62} 63 64template<class Impl> 65SimpleFetch<Impl>::SimpleFetch(Params ¶ms) 66 : icacheInterface(params.icacheInterface), 67 branchPred(params), 68 decodeToFetchDelay(params.decodeToFetchDelay), 69 renameToFetchDelay(params.renameToFetchDelay), 70 iewToFetchDelay(params.iewToFetchDelay), 71 commitToFetchDelay(params.commitToFetchDelay), 72 fetchWidth(params.fetchWidth) 73{ 74 DPRINTF(Fetch, "Fetch: Fetch constructor called\n"); 75 76 // Set status to idle. 77 _status = Idle; 78 79 // Create a new memory request. 80 memReq = new MemReq(); 81 // Not sure of this parameter. I think it should be based on the 82 // thread number. 83#if !FULL_SYSTEM 84 memReq->asid = 0; 85#else 86 memReq->asid = 0; 87#endif // FULL_SYSTEM 88 memReq->data = new uint8_t[64]; 89 90 // Size of cache block. 91 cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; 92 93 // Create mask to get rid of offset bits. 94 cacheBlkMask = (cacheBlkSize - 1); 95 96 // Get the size of an instruction. 97 instSize = sizeof(MachInst); 98 99 // Create space to store a cache line. 100 cacheData = new uint8_t[cacheBlkSize]; 101} 102 103template <class Impl> 104void 105SimpleFetch<Impl>::regStats() 106{ 107 icacheStallCycles 108 .name(name() + ".icacheStallCycles") 109 .desc("Number of cycles fetch is stalled on an Icache miss") 110 .prereq(icacheStallCycles); 111 112 fetchedInsts 113 .name(name() + ".fetchedInsts") 114 .desc("Number of instructions fetch has processed") 115 .prereq(fetchedInsts); 116 predictedBranches 117 .name(name() + ".predictedBranches") 118 .desc("Number of branches that fetch has predicted taken") 119 .prereq(predictedBranches); 120 fetchCycles 121 .name(name() + ".fetchCycles") 122 .desc("Number of cycles fetch has run and was not squashing or" 123 " blocked") 124 .prereq(fetchCycles); 125 fetchSquashCycles 126 .name(name() + ".fetchSquashCycles") 127 .desc("Number of cycles fetch has spent squashing") 128 .prereq(fetchSquashCycles); 129 fetchBlockedCycles 130 .name(name() + ".fetchBlockedCycles") 131 .desc("Number of cycles fetch has spent blocked") 132 .prereq(fetchBlockedCycles); 133 fetchedCacheLines 134 .name(name() + ".fetchedCacheLines") 135 .desc("Number of cache lines fetched") 136 .prereq(fetchedCacheLines); 137 138 fetch_nisn_dist 139 .init(/* base value */ 0, 140 /* last value */ fetchWidth, 141 /* bucket size */ 1) 142 .name(name() + ".FETCH:rate_dist") 143 .desc("Number of instructions fetched each cycle (Total)") 144 .flags(Stats::pdf) 145 ; 146 147 branchPred.regStats(); 148} 149 150template<class Impl> 151void 152SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr) 153{ 154 DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); 155 cpu = cpu_ptr; 156 // This line will be removed eventually. 157 memReq->xc = cpu->xcBase(); 158} 159 160template<class Impl> 161void 162SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) 163{ 164 DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); 165 timeBuffer = time_buffer; 166 167 // Create wires to get information from proper places in time buffer. 168 fromDecode = timeBuffer->getWire(-decodeToFetchDelay); 169 fromRename = timeBuffer->getWire(-renameToFetchDelay); 170 fromIEW = timeBuffer->getWire(-iewToFetchDelay); 171 fromCommit = timeBuffer->getWire(-commitToFetchDelay); 172} 173 174template<class Impl> 175void 176SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) 177{ 178 DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); 179 fetchQueue = fq_ptr; 180 181 // Create wire to write information to proper place in fetch queue. 182 toDecode = fetchQueue->getWire(0); 183} 184 185template<class Impl> 186void 187SimpleFetch<Impl>::processCacheCompletion() 188{ 189 DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); 190 191 // Only change the status if it's still waiting on the icache access 192 // to return. 193 // Can keep track of how many cache accesses go unused due to 194 // misspeculation here. 195 if (_status == IcacheMissStall) 196 _status = IcacheMissComplete; 197} 198 199template <class Impl> 200bool 201SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) 202{ 203 // Do branch prediction check here. 204 // A bit of a misnomer...next_PC is actually the current PC until 205 // this function updates it. 206 bool predict_taken; 207 208 if (!inst->isControl()) { 209 next_PC = next_PC + instSize; 210 inst->setPredTarg(next_PC); 211 return false; 212 } 213 214 predict_taken = branchPred.predict(inst, next_PC); 215 216 if (predict_taken) { 217 ++predictedBranches; 218 } 219 220 return predict_taken; 221} 222 223template <class Impl> 224Fault 225SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC) 226{ 227 // Check if the instruction exists within the cache. 228 // If it does, then proceed on to read the instruction and the rest 229 // of the instructions in the cache line until either the end of the 230 // cache line or a predicted taken branch is encountered. 231 232#if FULL_SYSTEM 233 // Flag to say whether or not address is physical addr. 234 unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; 235#else 236 unsigned flags = 0; 237#endif // FULL_SYSTEM 238 239 Fault fault = NoFault; 240 241 // Align the fetch PC so it's at the start of a cache block. 242 fetch_PC = icacheBlockAlignPC(fetch_PC); 243 244 // Setup the memReq to do a read of the first isntruction's address. 245 // Set the appropriate read size and flags as well. 246 memReq->cmd = Read; 247 memReq->reset(fetch_PC, cacheBlkSize, flags); 248 249 // Translate the instruction request. 250 // Should this function be 251 // in the CPU class ? Probably...ITB/DTB should exist within the 252 // CPU. 253 254 fault = cpu->translateInstReq(memReq); 255 256 // In the case of faults, the fetch stage may need to stall and wait 257 // on what caused the fetch (ITB or Icache miss). 258 259 // If translation was successful, attempt to read the first 260 // instruction. 261 if (fault == NoFault) { 262 DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); 263 fault = cpu->mem->read(memReq, cacheData); 264 // This read may change when the mem interface changes. 265 266 fetchedCacheLines++; 267 } 268 269 // Now do the timing access to see whether or not the instruction 270 // exists within the cache. 271 if (icacheInterface && fault == NoFault) { 272 DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); 273 memReq->completionEvent = NULL; 274 275 memReq->time = curTick; 276 277 MemAccessResult result = icacheInterface->access(memReq); 278 279 // If the cache missed (in this model functional and timing 280 // memories are different), then schedule an event to wake 281 // up this stage once the cache miss completes. 282 if (result != MA_HIT && icacheInterface->doEvents()) { 283 memReq->completionEvent = new CacheCompletionEvent(this); 284 285 // How does current model work as far as individual 286 // stages scheduling/unscheduling? 287 // Perhaps have only the main CPU scheduled/unscheduled, 288 // and have it choose what stages to run appropriately. 289 290 DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); 291 _status = IcacheMissStall; 292 } 293 } 294 295 return fault; 296} 297 298template <class Impl> 299inline void 300SimpleFetch<Impl>::doSquash(const Addr &new_PC) 301{ 302 DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); 303 304 cpu->setNextPC(new_PC + instSize); 305 cpu->setPC(new_PC); 306 307 // Clear the icache miss if it's outstanding. 308 if (_status == IcacheMissStall && icacheInterface) { 309 DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); 310 // @todo: Use an actual thread number here. 311 icacheInterface->squash(0); 312 } 313 314 _status = Squashing; 315 316 ++fetchSquashCycles; 317} 318 319template<class Impl> 320void 321SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC, 322 const InstSeqNum &seq_num) 323{ 324 DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); 325 326 doSquash(new_PC); 327 328 // Tell the CPU to remove any instructions that are in flight between 329 // fetch and decode. 330 cpu->removeInstsUntil(seq_num); 331} 332 333template <class Impl> 334void 335SimpleFetch<Impl>::squash(const Addr &new_PC) 336{ 337 DPRINTF(Fetch, "Fetch: Squash from commit.\n"); 338 339 doSquash(new_PC); 340 341 // Tell the CPU to remove any instructions that are not in the ROB. 342 cpu->removeInstsNotInROB(); 343} 344 345template<class Impl> 346void 347SimpleFetch<Impl>::tick() 348{ 349 // Check squash signals from commit. 350 if (fromCommit->commitInfo.squash) { 351 DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " 352 "from commit.\n"); 353 354 // In any case, squash. 355 squash(fromCommit->commitInfo.nextPC); 356 357 // Also check if there's a mispredict that happened. 358 if (fromCommit->commitInfo.branchMispredict) { 359 branchPred.squash(fromCommit->commitInfo.doneSeqNum, 360 fromCommit->commitInfo.nextPC, 361 fromCommit->commitInfo.branchTaken); 362 } else { 363 branchPred.squash(fromCommit->commitInfo.doneSeqNum); 364 } 365 366 return; 367 } else if (fromCommit->commitInfo.doneSeqNum) { 368 // Update the branch predictor if it wasn't a squashed instruction 369 // that was braodcasted. 370 branchPred.update(fromCommit->commitInfo.doneSeqNum); 371 } 372 373 // Check ROB squash signals from commit. 374 if (fromCommit->commitInfo.robSquashing) { 375 DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); 376 377 // Continue to squash. 378 _status = Squashing; 379 380 ++fetchSquashCycles; 381 return; 382 } 383 384 // Check squash signals from decode. 385 if (fromDecode->decodeInfo.squash) { 386 DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " 387 "from decode.\n"); 388 389 // Update the branch predictor. 390 if (fromDecode->decodeInfo.branchMispredict) { 391 branchPred.squash(fromDecode->decodeInfo.doneSeqNum, 392 fromDecode->decodeInfo.nextPC, 393 fromDecode->decodeInfo.branchTaken); 394 } else { 395 branchPred.squash(fromDecode->decodeInfo.doneSeqNum); 396 } 397 398 if (_status != Squashing) { 399 // Squash unless we're already squashing? 400 squashFromDecode(fromDecode->decodeInfo.nextPC, 401 fromDecode->decodeInfo.doneSeqNum); 402 return; 403 } 404 } 405 406 // Check if any of the stall signals are high. 407 if (fromDecode->decodeInfo.stall || 408 fromRename->renameInfo.stall || 409 fromIEW->iewInfo.stall || 410 fromCommit->commitInfo.stall) 411 { 412 // Block stage, regardless of current status. 413 414 DPRINTF(Fetch, "Fetch: Stalling stage.\n"); 415 DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " 416 "Commit: %i\n", 417 fromDecode->decodeInfo.stall, 418 fromRename->renameInfo.stall, 419 fromIEW->iewInfo.stall, 420 fromCommit->commitInfo.stall); 421 422 _status = Blocked; 423 424 ++fetchBlockedCycles; 425 return; 426 } else if (_status == Blocked) { 427 // Unblock stage if status is currently blocked and none of the 428 // stall signals are being held high. 429 _status = Running; 430 431 ++fetchBlockedCycles; 432 return; 433 } 434 435 // If fetch has reached this point, then there are no squash signals 436 // still being held high. Check if fetch is in the squashing state; 437 // if so, fetch can switch to running. 438 // Similarly, there are no blocked signals still being held high. 439 // Check if fetch is in the blocked state; if so, fetch can switch to 440 // running. 441 if (_status == Squashing) { 442 DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); 443 444 // Switch status to running 445 _status = Running; 446 447 ++fetchCycles; 448 449 fetch(); 450 } else if (_status != IcacheMissStall) { 451 DPRINTF(Fetch, "Fetch: Running stage.\n"); 452 453 ++fetchCycles; 454 455 fetch(); 456 } 457} 458 459template<class Impl> 460void 461SimpleFetch<Impl>::fetch() 462{ 463 ////////////////////////////////////////// 464 // Start actual fetch 465 ////////////////////////////////////////// 466 467 // The current PC. 468 Addr fetch_PC = cpu->readPC(); 469 470 // Fault code for memory access. 471 Fault fault = NoFault; 472 473 // If returning from the delay of a cache miss, then update the status 474 // to running, otherwise do the cache access. Possibly move this up 475 // to tick() function. 476 if (_status == IcacheMissComplete) { 477 DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); 478 479 // Reset the completion event to NULL. 480 memReq->completionEvent = NULL; 481 482 _status = Running; 483 } else { 484 DPRINTF(Fetch, "Fetch: Attempting to translate and read " 485 "instruction, starting at PC %08p.\n", 486 fetch_PC); 487 488 fault = fetchCacheLine(fetch_PC); 489 } 490 491 // If we had a stall due to an icache miss, then return. It'd 492 // be nicer if this were handled through the kind of fault that 493 // is returned by the function. 494 if (_status == IcacheMissStall) { 495 return; 496 } 497 498 // As far as timing goes, the CPU will need to send an event through 499 // the MemReq in order to be woken up once the memory access completes. 500 // Probably have a status on a per thread basis so each thread can 501 // block independently and be woken up independently. 502 503 Addr next_PC = fetch_PC; 504 InstSeqNum inst_seq; 505 MachInst inst; 506 unsigned offset = fetch_PC & cacheBlkMask; 507 unsigned fetched; 508 509 if (fault == NoFault) { 510 // If the read of the first instruction was successful, then grab the 511 // instructions from the rest of the cache line and put them into the 512 // queue heading to decode. 513 514 DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); 515 516 ////////////////////////// 517 // Fetch first instruction 518 ////////////////////////// 519 520 // Need to keep track of whether or not a predicted branch 521 // ended this fetch block. 522 bool predicted_branch = false; 523 524 for (fetched = 0; 525 offset < cacheBlkSize && 526 fetched < fetchWidth && 527 !predicted_branch; 528 ++fetched) 529 { 530 531 // Get a sequence number. 532 inst_seq = cpu->getAndIncrementInstSeq(); 533 534 // Make sure this is a valid index. 535 assert(offset <= cacheBlkSize - instSize); 536 537 // Get the instruction from the array of the cache line. 538 inst = gtoh(*reinterpret_cast<MachInst *> 539 (&cacheData[offset])); 540 541 // Create a new DynInst from the instruction fetched. 542 DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, 543 inst_seq, cpu); 544 545 DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", 546 inst_seq, instruction->readPC()); 547 548 DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", 549 OPCODE(inst)); 550 551 instruction->traceData = 552 Trace::getInstRecord(curTick, cpu->xcBase(), cpu, 553 instruction->staticInst, 554 instruction->readPC(), 0); 555 556 predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); 557 558 // Add instruction to the CPU's list of instructions. 559 cpu->addInst(instruction); 560 561 // Write the instruction to the first slot in the queue 562 // that heads to decode. 563 toDecode->insts[fetched] = instruction; 564 565 toDecode->size++; 566 567 // Increment stat of fetched instructions. 568 ++fetchedInsts; 569 570 // Move to the next instruction, unless we have a branch. 571 fetch_PC = next_PC; 572 573 offset+= instSize; 574 } 575 576 fetch_nisn_dist.sample(fetched); 577 } 578 579 // Now that fetching is completed, update the PC to signify what the next 580 // cycle will be. Might want to move this to the beginning of this 581 // function so that the PC updates at the beginning of everything. 582 // Or might want to leave setting the PC to the main CPU, with fetch 583 // only changing the nextPC (will require correct determination of 584 // next PC). 585 if (fault == NoFault) { 586 DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); 587 cpu->setPC(next_PC); 588 cpu->setNextPC(next_PC + instSize); 589 } else { 590 // If the issue was an icache miss, then we can just return and 591 // wait until it is handled. 592 if (_status == IcacheMissStall) { 593 return; 594 } 595 596 // Handle the fault. 597 // This stage will not be able to continue until all the ROB 598 // slots are empty, at which point the fault can be handled. 599 // The only other way it can wake up is if a squash comes along 600 // and changes the PC. Not sure how to handle that case...perhaps 601 // have it handled by the upper level CPU class which peeks into the 602 // time buffer and sees if a squash comes along, in which case it 603 // changes the status. 604 605 DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); 606 607 _status = Blocked; 608#if FULL_SYSTEM 609// cpu->trap(fault); 610 // Send a signal to the ROB indicating that there's a trap from the 611 // fetch stage that needs to be handled. Need to indicate that 612 // there's a fault, and the fault type. 613#else // !FULL_SYSTEM 614 fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); 615#endif // FULL_SYSTEM 616 } 617} 618