lsq_unit.hh revision 2292
1/* 2 * Copyright (c) 2004-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <map> 33#include <queue> 34#include <algorithm> 35 36#include "config/full_system.hh" 37#include "base/hashmap.hh" 38#include "cpu/inst_seq.hh" 39#include "mem/mem_interface.hh" 40//#include "mem/page_table.hh" 41#include "sim/sim_object.hh" 42#include "arch/faults.hh" 43 44/** 45 * Class that implements the actual LQ and SQ for each specific thread. 46 * Both are circular queues; load entries are freed upon committing, while 47 * store entries are freed once they writeback. The LSQUnit tracks if there 48 * are memory ordering violations, and also detects partial load to store 49 * forwarding cases (a store only has part of a load's data) that requires 50 * the load to wait until the store writes back. In the former case it 51 * holds onto the instruction until the dependence unit looks at it, and 52 * in the latter it stalls the LSQ until the store writes back. At that 53 * point the load is replayed. 54 */ 55template <class Impl> 56class LSQUnit { 57 protected: 58 typedef TheISA::IntReg IntReg; 59 public: 60 typedef typename Impl::Params Params; 61 typedef typename Impl::FullCPU FullCPU; 62 typedef typename Impl::DynInstPtr DynInstPtr; 63 typedef typename Impl::CPUPol::IEW IEW; 64 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 65 66 private: 67 class StoreCompletionEvent : public Event { 68 public: 69 /** Constructs a store completion event. */ 70 StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr); 71 72 /** Processes the store completion event. */ 73 void process(); 74 75 /** Returns the description of this event. */ 76 const char *description(); 77 78 private: 79 /** The store index of the store being written back. */ 80 int storeIdx; 81 /** The writeback event for the store. Needed for store 82 * conditionals. 83 */ 84 Event *wbEvent; 85 /** The pointer to the LSQ unit that issued the store. */ 86 LSQUnit<Impl> *lsqPtr; 87 }; 88 89 friend class StoreCompletionEvent; 90 91 public: 92 /** Constructs an LSQ unit. init() must be called prior to use. */ 93 LSQUnit(); 94 95 /** Initializes the LSQ unit with the specified number of entries. */ 96 void init(Params *params, unsigned maxLQEntries, 97 unsigned maxSQEntries, unsigned id); 98 99 /** Returns the name of the LSQ unit. */ 100 std::string name() const; 101 102 /** Sets the CPU pointer. */ 103 void setCPU(FullCPU *cpu_ptr) 104 { cpu = cpu_ptr; } 105 106 /** Sets the IEW stage pointer. */ 107 void setIEW(IEW *iew_ptr) 108 { iewStage = iew_ptr; } 109 110 /** Sets the page table pointer. */ 111// void setPageTable(PageTable *pt_ptr); 112 113 /** Ticks the LSQ unit, which in this case only resets the number of 114 * used cache ports. 115 * @todo: Move the number of used ports up to the LSQ level so it can 116 * be shared by all LSQ units. 117 */ 118 void tick() { usedPorts = 0; } 119 120 /** Inserts an instruction. */ 121 void insert(DynInstPtr &inst); 122 /** Inserts a load instruction. */ 123 void insertLoad(DynInstPtr &load_inst); 124 /** Inserts a store instruction. */ 125 void insertStore(DynInstPtr &store_inst); 126 127 /** Executes a load instruction. */ 128 Fault executeLoad(DynInstPtr &inst); 129 130 Fault executeLoad(int lq_idx); 131 /** Executes a store instruction. */ 132 Fault executeStore(DynInstPtr &inst); 133 134 /** Commits the head load. */ 135 void commitLoad(); 136 /** Commits a specific load, given by the sequence number. */ 137 void commitLoad(InstSeqNum &inst); 138 /** Commits loads older than a specific sequence number. */ 139 void commitLoads(InstSeqNum &youngest_inst); 140 141 /** Commits stores older than a specific sequence number. */ 142 void commitStores(InstSeqNum &youngest_inst); 143 144 /** Writes back stores. */ 145 void writebackStores(); 146 147 // @todo: Include stats in the LSQ unit. 148 //void regStats(); 149 150 /** Clears all the entries in the LQ. */ 151 void clearLQ(); 152 153 /** Clears all the entries in the SQ. */ 154 void clearSQ(); 155 156 /** Resizes the LQ to a given size. */ 157 void resizeLQ(unsigned size); 158 159 /** Resizes the SQ to a given size. */ 160 void resizeSQ(unsigned size); 161 162 /** Squashes all instructions younger than a specific sequence number. */ 163 void squash(const InstSeqNum &squashed_num); 164 165 /** Returns if there is a memory ordering violation. Value is reset upon 166 * call to getMemDepViolator(). 167 */ 168 bool violation() { return memDepViolator; } 169 170 /** Returns the memory ordering violator. */ 171 DynInstPtr getMemDepViolator(); 172 173 /** Returns if a load became blocked due to the memory system. It clears 174 * the bool's value upon this being called. 175 */ 176 bool loadBlocked() 177 { return isLoadBlocked; } 178 179 void clearLoadBlocked() 180 { isLoadBlocked = false; } 181 182 bool isLoadBlockedHandled() 183 { return loadBlockedHandled; } 184 185 void setLoadBlockedHandled() 186 { loadBlockedHandled = true; } 187 188 /** Returns the number of free entries (min of free LQ and SQ entries). */ 189 unsigned numFreeEntries(); 190 191 /** Returns the number of loads ready to execute. */ 192 int numLoadsReady(); 193 194 /** Returns the number of loads in the LQ. */ 195 int numLoads() { return loads; } 196 197 /** Returns the number of stores in the SQ. */ 198 int numStores() { return stores; } 199 200 /** Returns if either the LQ or SQ is full. */ 201 bool isFull() { return lqFull() || sqFull(); } 202 203 /** Returns if the LQ is full. */ 204 bool lqFull() { return loads >= (LQEntries - 1); } 205 206 /** Returns if the SQ is full. */ 207 bool sqFull() { return stores >= (SQEntries - 1); } 208 209 /** Debugging function to dump instructions in the LSQ. */ 210 void dumpInsts(); 211 212 /** Returns the number of instructions in the LSQ. */ 213 unsigned getCount() { return loads + stores; } 214 215 /** Returns if there are any stores to writeback. */ 216 bool hasStoresToWB() { return storesToWB; } 217 218 /** Returns the number of stores to writeback. */ 219 int numStoresToWB() { return storesToWB; } 220 221 /** Returns if the LSQ unit will writeback on this cycle. */ 222 bool willWB() { return storeQueue[storeWBIdx].canWB && 223 !storeQueue[storeWBIdx].completed && 224 !dcacheInterface->isBlocked(); } 225 226 private: 227 /** Completes the store at the specified index. */ 228 void completeStore(int store_idx); 229 230 /** Increments the given store index (circular queue). */ 231 inline void incrStIdx(int &store_idx); 232 /** Decrements the given store index (circular queue). */ 233 inline void decrStIdx(int &store_idx); 234 /** Increments the given load index (circular queue). */ 235 inline void incrLdIdx(int &load_idx); 236 /** Decrements the given load index (circular queue). */ 237 inline void decrLdIdx(int &load_idx); 238 239 private: 240 /** Pointer to the CPU. */ 241 FullCPU *cpu; 242 243 /** Pointer to the IEW stage. */ 244 IEW *iewStage; 245 246 /** Pointer to the D-cache. */ 247 MemInterface *dcacheInterface; 248 249 /** Pointer to the page table. */ 250// PageTable *pTable; 251 252 public: 253 struct SQEntry { 254 /** Constructs an empty store queue entry. */ 255 SQEntry() 256 : inst(NULL), req(NULL), size(0), data(0), 257 canWB(0), committed(0), completed(0) 258 { } 259 260 /** Constructs a store queue entry for a given instruction. */ 261 SQEntry(DynInstPtr &_inst) 262 : inst(_inst), req(NULL), size(0), data(0), 263 canWB(0), committed(0), completed(0) 264 { } 265 266 /** The store instruction. */ 267 DynInstPtr inst; 268 /** The memory request for the store. */ 269 MemReqPtr req; 270 /** The size of the store. */ 271 int size; 272 /** The store data. */ 273 IntReg data; 274 /** Whether or not the store can writeback. */ 275 bool canWB; 276 /** Whether or not the store is committed. */ 277 bool committed; 278 /** Whether or not the store is completed. */ 279 bool completed; 280 }; 281 282 enum Status { 283 Running, 284 Idle, 285 DcacheMissStall, 286 DcacheMissSwitch 287 }; 288 289 private: 290 /** The LSQUnit thread id. */ 291 unsigned lsqID; 292 293 /** The status of the LSQ unit. */ 294 Status _status; 295 296 /** The store queue. */ 297 std::vector<SQEntry> storeQueue; 298 299 /** The load queue. */ 300 std::vector<DynInstPtr> loadQueue; 301 302 // Consider making these 16 bits 303 /** The number of LQ entries. */ 304 unsigned LQEntries; 305 /** The number of SQ entries. */ 306 unsigned SQEntries; 307 308 /** The number of load instructions in the LQ. */ 309 int loads; 310 /** The number of store instructions in the SQ (excludes those waiting to 311 * writeback). 312 */ 313 int stores; 314 /** The number of store instructions in the SQ waiting to writeback. */ 315 int storesToWB; 316 317 /** The index of the head instruction in the LQ. */ 318 int loadHead; 319 /** The index of the tail instruction in the LQ. */ 320 int loadTail; 321 322 /** The index of the head instruction in the SQ. */ 323 int storeHead; 324 /** The index of the first instruction that is ready to be written back, 325 * and has not yet been written back. 326 */ 327 int storeWBIdx; 328 /** The index of the tail instruction in the SQ. */ 329 int storeTail; 330 331 /// @todo Consider moving to a more advanced model with write vs read ports 332 /** The number of cache ports available each cycle. */ 333 int cachePorts; 334 335 /** The number of used cache ports in this cycle. */ 336 int usedPorts; 337 338 //list<InstSeqNum> mshrSeqNums; 339 340 //Stats::Scalar<> dcacheStallCycles; 341 Counter lastDcacheStall; 342 343 /** Wire to read information from the issue stage time queue. */ 344 typename TimeBuffer<IssueStruct>::wire fromIssue; 345 346 // Make these per thread? 347 /** Whether or not the LSQ is stalled. */ 348 bool stalled; 349 /** The store that causes the stall due to partial store to load 350 * forwarding. 351 */ 352 InstSeqNum stallingStoreIsn; 353 /** The index of the above store. */ 354 int stallingLoadIdx; 355 356 /** Whether or not a load is blocked due to the memory system. It is 357 * cleared when this value is checked via loadBlocked(). 358 */ 359 bool isLoadBlocked; 360 361 bool loadBlockedHandled; 362 363 InstSeqNum blockedLoadSeqNum; 364 365 /** The oldest faulting load instruction. */ 366 DynInstPtr loadFaultInst; 367 /** The oldest faulting store instruction. */ 368 DynInstPtr storeFaultInst; 369 370 /** The oldest load that caused a memory ordering violation. */ 371 DynInstPtr memDepViolator; 372 373 // Will also need how many read/write ports the Dcache has. Or keep track 374 // of that in stage that is one level up, and only call executeLoad/Store 375 // the appropriate number of times. 376 377 public: 378 /** Executes the load at the given index. */ 379 template <class T> 380 Fault read(MemReqPtr &req, T &data, int load_idx); 381 382 /** Executes the store at the given index. */ 383 template <class T> 384 Fault write(MemReqPtr &req, T &data, int store_idx); 385 386 /** Returns the index of the head load instruction. */ 387 int getLoadHead() { return loadHead; } 388 /** Returns the sequence number of the head load instruction. */ 389 InstSeqNum getLoadHeadSeqNum() 390 { 391 if (loadQueue[loadHead]) { 392 return loadQueue[loadHead]->seqNum; 393 } else { 394 return 0; 395 } 396 397 } 398 399 /** Returns the index of the head store instruction. */ 400 int getStoreHead() { return storeHead; } 401 /** Returns the sequence number of the head store instruction. */ 402 InstSeqNum getStoreHeadSeqNum() 403 { 404 if (storeQueue[storeHead].inst) { 405 return storeQueue[storeHead].inst->seqNum; 406 } else { 407 return 0; 408 } 409 410 } 411 412 /** Returns whether or not the LSQ unit is stalled. */ 413 bool isStalled() { return stalled; } 414}; 415 416template <class Impl> 417template <class T> 418Fault 419LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) 420{ 421 //Depending on issue2execute delay a squashed load could 422 //execute if it is found to be squashed in the same 423 //cycle it is scheduled to execute 424 assert(loadQueue[load_idx]); 425 426 if (loadQueue[load_idx]->isExecuted()) { 427 panic("Should not reach this point with split ops!"); 428 memcpy(&data,req->data,req->size); 429 430 return NoFault; 431 } 432 433 // Make sure this isn't an uncacheable access 434 // A bit of a hackish way to get uncached accesses to work only if they're 435 // at the head of the LSQ and are ready to commit (at the head of the ROB 436 // too). 437 // @todo: Fix uncached accesses. 438 if (req->flags & UNCACHEABLE && 439 (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { 440 iewStage->rescheduleMemInst(loadQueue[load_idx]); 441 return TheISA::genMachineCheckFault(); 442 } 443 444 // Check the SQ for any previous stores that might lead to forwarding 445 int store_idx = loadQueue[load_idx]->sqIdx; 446 447 int store_size = 0; 448 449 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 450 "storeHead: %i addr: %#x\n", 451 load_idx, store_idx, storeHead, req->paddr); 452 453#ifdef FULL_SYSTEM 454 if (req->flags & LOCKED) { 455 cpu->lockAddr = req->paddr; 456 cpu->lockFlag = true; 457 } 458#endif 459 460 while (store_idx != -1) { 461 // End once we've reached the top of the LSQ 462 if (store_idx == storeWBIdx) { 463 break; 464 } 465 466 // Move the index to one younger 467 if (--store_idx < 0) 468 store_idx += SQEntries; 469 470 assert(storeQueue[store_idx].inst); 471 472 store_size = storeQueue[store_idx].size; 473 474 if (store_size == 0) 475 continue; 476 477 // Check if the store data is within the lower and upper bounds of 478 // addresses that the request needs. 479 bool store_has_lower_limit = 480 req->vaddr >= storeQueue[store_idx].inst->effAddr; 481 bool store_has_upper_limit = 482 (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + 483 store_size); 484 bool lower_load_has_store_part = 485 req->vaddr < (storeQueue[store_idx].inst->effAddr + 486 store_size); 487 bool upper_load_has_store_part = 488 (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; 489 490 // If the store's data has all of the data needed, we can forward. 491 if (store_has_lower_limit && store_has_upper_limit) { 492 493 int shift_amt = req->vaddr & (store_size - 1); 494 // Assumes byte addressing 495 shift_amt = shift_amt << 3; 496 497 // Cast this to type T? 498 data = storeQueue[store_idx].data >> shift_amt; 499 500 req->cmd = Read; 501 assert(!req->completionEvent); 502 req->completionEvent = NULL; 503 req->time = curTick; 504 assert(!req->data); 505 req->data = new uint8_t[64]; 506 507 memcpy(req->data, &data, req->size); 508 509 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 510 "addr %#x, data %#x\n", 511 store_idx, req->vaddr, *(req->data)); 512 513 typename IEW::LdWritebackEvent *wb = 514 new typename IEW::LdWritebackEvent(loadQueue[load_idx], 515 iewStage); 516 517 // We'll say this has a 1 cycle load-store forwarding latency 518 // for now. 519 // @todo: Need to make this a parameter. 520 wb->schedule(curTick); 521 522 // Should keep track of stat for forwarded data 523 return NoFault; 524 } else if ((store_has_lower_limit && lower_load_has_store_part) || 525 (store_has_upper_limit && upper_load_has_store_part) || 526 (lower_load_has_store_part && upper_load_has_store_part)) { 527 // This is the partial store-load forwarding case where a store 528 // has only part of the load's data. 529 530 // If it's already been written back, then don't worry about 531 // stalling on it. 532 if (storeQueue[store_idx].completed) { 533 continue; 534 } 535 536 // Must stall load and force it to retry, so long as it's the oldest 537 // load that needs to do so. 538 if (!stalled || 539 (stalled && 540 loadQueue[load_idx]->seqNum < 541 loadQueue[stallingLoadIdx]->seqNum)) { 542 stalled = true; 543 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 544 stallingLoadIdx = load_idx; 545 } 546 547 // Tell IQ/mem dep unit that this instruction will need to be 548 // rescheduled eventually 549 iewStage->rescheduleMemInst(loadQueue[load_idx]); 550 551 // Do not generate a writeback event as this instruction is not 552 // complete. 553 554 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 555 "Store idx %i to load addr %#x\n", 556 store_idx, req->vaddr); 557 558 return NoFault; 559 } 560 } 561 562 563 // If there's no forwarding case, then go access memory 564 DynInstPtr inst = loadQueue[load_idx]; 565 566 DPRINTF(LSQUnit, "Doing functional access for inst PC %#x\n", 567 loadQueue[load_idx]->readPC()); 568 assert(!req->data); 569 req->data = new uint8_t[64]; 570 Fault fault = cpu->read(req, data); 571 memcpy(req->data, &data, sizeof(T)); 572 573 ++usedPorts; 574 575 // if we have a cache, do cache access too 576 if (fault == NoFault && dcacheInterface) { 577 if (dcacheInterface->isBlocked()) { 578 // There's an older load that's already going to squash. 579 if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) 580 return NoFault; 581 582 isLoadBlocked = true; 583 loadBlockedHandled = false; 584 blockedLoadSeqNum = inst->seqNum; 585 // No fault occurred, even though the interface is blocked. 586 return NoFault; 587 } 588 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 589 loadQueue[load_idx]->readPC()); 590 req->cmd = Read; 591 req->completionEvent = NULL; 592 req->time = curTick; 593 594 assert(!req->completionEvent); 595 req->completionEvent = 596 new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); 597 MemAccessResult result = dcacheInterface->access(req); 598 599 assert(dcacheInterface->doEvents()); 600 601 // Ugly hack to get an event scheduled *only* if the access is 602 // a miss. We really should add first-class support for this 603 // at some point. 604 if (result != MA_HIT) { 605 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 606 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 607 inst->seqNum); 608 609 lastDcacheStall = curTick; 610 611 _status = DcacheMissStall; 612 613 } else { 614 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 615 inst->seqNum); 616 617 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 618 } 619 } 620#if 0 621 // if we have a cache, do cache access too 622 if (dcacheInterface) { 623 if (dcacheInterface->isBlocked()) { 624 isLoadBlocked = true; 625 // No fault occurred, even though the interface is blocked. 626 return NoFault; 627 } 628 629 DPRINTF(LSQUnit, "LSQUnit: D-cache: PC:%#x reading from paddr:%#x " 630 "vaddr:%#x flags:%i\n", 631 inst->readPC(), req->paddr, req->vaddr, req->flags); 632 633 // Setup MemReq pointer 634 req->cmd = Read; 635 req->completionEvent = NULL; 636 req->time = curTick; 637 assert(!req->data); 638 req->data = new uint8_t[64]; 639 640 assert(!req->completionEvent); 641 req->completionEvent = 642 new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); 643 644 // Do Cache Access 645 MemAccessResult result = dcacheInterface->access(req); 646 647 // Ugly hack to get an event scheduled *only* if the access is 648 // a miss. We really should add first-class support for this 649 // at some point. 650 // @todo: Probably should support having no events 651 if (result != MA_HIT) { 652 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 653 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 654 inst->seqNum); 655 656 lastDcacheStall = curTick; 657 658 _status = DcacheMissStall; 659 660 } else { 661 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 662 inst->seqNum); 663 664 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 665 } 666 } else { 667 fatal("Must use D-cache with new memory system"); 668 } 669#endif 670 671 return fault; 672} 673 674template <class Impl> 675template <class T> 676Fault 677LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx) 678{ 679 assert(storeQueue[store_idx].inst); 680 681 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 682 " | storeHead:%i [sn:%i]\n", 683 store_idx, req->paddr, data, storeHead, 684 storeQueue[store_idx].inst->seqNum); 685/* 686 if (req->flags & LOCKED) { 687 if (req->flags & UNCACHEABLE) { 688 req->result = 2; 689 } else { 690 req->result = 1; 691 } 692 } 693*/ 694 storeQueue[store_idx].req = req; 695 storeQueue[store_idx].size = sizeof(T); 696 storeQueue[store_idx].data = data; 697 698 // This function only writes the data to the store queue, so no fault 699 // can happen here. 700 return NoFault; 701} 702 703#endif // __CPU_O3_LSQ_UNIT_HH__ 704