lsq_unit.hh revision 2348
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <algorithm> 33#include <map> 34#include <queue> 35 36#include "arch/faults.hh" 37#include "config/full_system.hh" 38#include "base/hashmap.hh" 39#include "cpu/inst_seq.hh" 40#include "mem/mem_interface.hh" 41//#include "mem/page_table.hh" 42//#include "sim/debug.hh" 43//#include "sim/sim_object.hh" 44 45/** 46 * Class that implements the actual LQ and SQ for each specific 47 * thread. Both are circular queues; load entries are freed upon 48 * committing, while store entries are freed once they writeback. The 49 * LSQUnit tracks if there are memory ordering violations, and also 50 * detects partial load to store forwarding cases (a store only has 51 * part of a load's data) that requires the load to wait until the 52 * store writes back. In the former case it holds onto the instruction 53 * until the dependence unit looks at it, and in the latter it stalls 54 * the LSQ until the store writes back. At that point the load is 55 * replayed. 56 */ 57template <class Impl> 58class LSQUnit { 59 protected: 60 typedef TheISA::IntReg IntReg; 61 public: 62 typedef typename Impl::Params Params; 63 typedef typename Impl::FullCPU FullCPU; 64 typedef typename Impl::DynInstPtr DynInstPtr; 65 typedef typename Impl::CPUPol::IEW IEW; 66 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 67 68 private: 69 class StoreCompletionEvent : public Event { 70 public: 71 /** Constructs a store completion event. */ 72 StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr); 73 74 /** Processes the store completion event. */ 75 void process(); 76 77 /** Returns the description of this event. */ 78 const char *description(); 79 80 /** The writeback event for the store. Needed for store 81 * conditionals. 82 */ 83 Event *wbEvent; 84 85 private: 86 /** The store index of the store being written back. */ 87 int storeIdx; 88 private: 89 /** The pointer to the LSQ unit that issued the store. */ 90 LSQUnit<Impl> *lsqPtr; 91 }; 92 93 public: 94 /** Constructs an LSQ unit. init() must be called prior to use. */ 95 LSQUnit(); 96 97 /** Initializes the LSQ unit with the specified number of entries. */ 98 void init(Params *params, unsigned maxLQEntries, 99 unsigned maxSQEntries, unsigned id); 100 101 /** Returns the name of the LSQ unit. */ 102 std::string name() const; 103 104 /** Sets the CPU pointer. */ 105 void setCPU(FullCPU *cpu_ptr) 106 { cpu = cpu_ptr; } 107 108 /** Sets the IEW stage pointer. */ 109 void setIEW(IEW *iew_ptr) 110 { iewStage = iew_ptr; } 111 112 /** Sets the page table pointer. */ 113// void setPageTable(PageTable *pt_ptr); 114 115 /** Switches out LSQ unit. */ 116 void switchOut(); 117 118 /** Takes over from another CPU's thread. */ 119 void takeOverFrom(); 120 121 /** Returns if the LSQ is switched out. */ 122 bool isSwitchedOut() { return switchedOut; } 123 124 /** Ticks the LSQ unit, which in this case only resets the number of 125 * used cache ports. 126 * @todo: Move the number of used ports up to the LSQ level so it can 127 * be shared by all LSQ units. 128 */ 129 void tick() { usedPorts = 0; } 130 131 /** Inserts an instruction. */ 132 void insert(DynInstPtr &inst); 133 /** Inserts a load instruction. */ 134 void insertLoad(DynInstPtr &load_inst); 135 /** Inserts a store instruction. */ 136 void insertStore(DynInstPtr &store_inst); 137 138 /** Executes a load instruction. */ 139 Fault executeLoad(DynInstPtr &inst); 140 141 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 142 /** Executes a store instruction. */ 143 Fault executeStore(DynInstPtr &inst); 144 145 /** Commits the head load. */ 146 void commitLoad(); 147 /** Commits loads older than a specific sequence number. */ 148 void commitLoads(InstSeqNum &youngest_inst); 149 150 /** Commits stores older than a specific sequence number. */ 151 void commitStores(InstSeqNum &youngest_inst); 152 153 /** Writes back stores. */ 154 void writebackStores(); 155 156 // @todo: Include stats in the LSQ unit. 157 //void regStats(); 158 159 /** Clears all the entries in the LQ. */ 160 void clearLQ(); 161 162 /** Clears all the entries in the SQ. */ 163 void clearSQ(); 164 165 /** Resizes the LQ to a given size. */ 166 void resizeLQ(unsigned size); 167 168 /** Resizes the SQ to a given size. */ 169 void resizeSQ(unsigned size); 170 171 /** Squashes all instructions younger than a specific sequence number. */ 172 void squash(const InstSeqNum &squashed_num); 173 174 /** Returns if there is a memory ordering violation. Value is reset upon 175 * call to getMemDepViolator(). 176 */ 177 bool violation() { return memDepViolator; } 178 179 /** Returns the memory ordering violator. */ 180 DynInstPtr getMemDepViolator(); 181 182 /** Returns if a load became blocked due to the memory system. */ 183 bool loadBlocked() 184 { return isLoadBlocked; } 185 186 /** Clears the signal that a load became blocked. */ 187 void clearLoadBlocked() 188 { isLoadBlocked = false; } 189 190 /** Returns if the blocked load was handled. */ 191 bool isLoadBlockedHandled() 192 { return loadBlockedHandled; } 193 194 /** Records the blocked load as being handled. */ 195 void setLoadBlockedHandled() 196 { loadBlockedHandled = true; } 197 198 /** Returns the number of free entries (min of free LQ and SQ entries). */ 199 unsigned numFreeEntries(); 200 201 /** Returns the number of loads ready to execute. */ 202 int numLoadsReady(); 203 204 /** Returns the number of loads in the LQ. */ 205 int numLoads() { return loads; } 206 207 /** Returns the number of stores in the SQ. */ 208 int numStores() { return stores; } 209 210 /** Returns if either the LQ or SQ is full. */ 211 bool isFull() { return lqFull() || sqFull(); } 212 213 /** Returns if the LQ is full. */ 214 bool lqFull() { return loads >= (LQEntries - 1); } 215 216 /** Returns if the SQ is full. */ 217 bool sqFull() { return stores >= (SQEntries - 1); } 218 219 /** Returns the number of instructions in the LSQ. */ 220 unsigned getCount() { return loads + stores; } 221 222 /** Returns if there are any stores to writeback. */ 223 bool hasStoresToWB() { return storesToWB; } 224 225 /** Returns the number of stores to writeback. */ 226 int numStoresToWB() { return storesToWB; } 227 228 /** Returns if the LSQ unit will writeback on this cycle. */ 229 bool willWB() { return storeQueue[storeWBIdx].canWB && 230 !storeQueue[storeWBIdx].completed && 231 !dcacheInterface->isBlocked(); } 232 233 private: 234 /** Completes the store at the specified index. */ 235 void completeStore(int store_idx); 236 237 /** Increments the given store index (circular queue). */ 238 inline void incrStIdx(int &store_idx); 239 /** Decrements the given store index (circular queue). */ 240 inline void decrStIdx(int &store_idx); 241 /** Increments the given load index (circular queue). */ 242 inline void incrLdIdx(int &load_idx); 243 /** Decrements the given load index (circular queue). */ 244 inline void decrLdIdx(int &load_idx); 245 246 public: 247 /** Debugging function to dump instructions in the LSQ. */ 248 void dumpInsts(); 249 250 private: 251 /** Pointer to the CPU. */ 252 FullCPU *cpu; 253 254 /** Pointer to the IEW stage. */ 255 IEW *iewStage; 256 257 /** Pointer to the D-cache. */ 258 MemInterface *dcacheInterface; 259 260 /** Pointer to the page table. */ 261// PageTable *pTable; 262 263 public: 264 struct SQEntry { 265 /** Constructs an empty store queue entry. */ 266 SQEntry() 267 : inst(NULL), req(NULL), size(0), data(0), 268 canWB(0), committed(0), completed(0) 269 { } 270 271 /** Constructs a store queue entry for a given instruction. */ 272 SQEntry(DynInstPtr &_inst) 273 : inst(_inst), req(NULL), size(0), data(0), 274 canWB(0), committed(0), completed(0) 275 { } 276 277 /** The store instruction. */ 278 DynInstPtr inst; 279 /** The memory request for the store. */ 280 MemReqPtr req; 281 /** The size of the store. */ 282 int size; 283 /** The store data. */ 284 IntReg data; 285 /** Whether or not the store can writeback. */ 286 bool canWB; 287 /** Whether or not the store is committed. */ 288 bool committed; 289 /** Whether or not the store is completed. */ 290 bool completed; 291 }; 292 293 private: 294 /** The LSQUnit thread id. */ 295 unsigned lsqID; 296 297 /** The store queue. */ 298 std::vector<SQEntry> storeQueue; 299 300 /** The load queue. */ 301 std::vector<DynInstPtr> loadQueue; 302 303 /** The number of LQ entries, plus a sentinel entry (circular queue). 304 * @todo: Consider having var that records the true number of LQ entries. 305 */ 306 unsigned LQEntries; 307 /** The number of SQ entries, plus a sentinel entry (circular queue). 308 * @todo: Consider having var that records the true number of SQ entries. 309 */ 310 unsigned SQEntries; 311 312 /** The number of load instructions in the LQ. */ 313 int loads; 314 /** The number of store instructions in the SQ. */ 315 int stores; 316 /** The number of store instructions in the SQ waiting to writeback. */ 317 int storesToWB; 318 319 /** The index of the head instruction in the LQ. */ 320 int loadHead; 321 /** The index of the tail instruction in the LQ. */ 322 int loadTail; 323 324 /** The index of the head instruction in the SQ. */ 325 int storeHead; 326 /** The index of the first instruction that may be ready to be 327 * written back, and has not yet been written back. 328 */ 329 int storeWBIdx; 330 /** The index of the tail instruction in the SQ. */ 331 int storeTail; 332 333 /// @todo Consider moving to a more advanced model with write vs read ports 334 /** The number of cache ports available each cycle. */ 335 int cachePorts; 336 337 /** The number of used cache ports in this cycle. */ 338 int usedPorts; 339 340 /** Is the LSQ switched out. */ 341 bool switchedOut; 342 343 //list<InstSeqNum> mshrSeqNums; 344 345 /** Wire to read information from the issue stage time queue. */ 346 typename TimeBuffer<IssueStruct>::wire fromIssue; 347 348 /** Whether or not the LSQ is stalled. */ 349 bool stalled; 350 /** The store that causes the stall due to partial store to load 351 * forwarding. 352 */ 353 InstSeqNum stallingStoreIsn; 354 /** The index of the above store. */ 355 int stallingLoadIdx; 356 357 /** Whether or not a load is blocked due to the memory system. */ 358 bool isLoadBlocked; 359 360 /** Has the blocked load been handled. */ 361 bool loadBlockedHandled; 362 363 /** The sequence number of the blocked load. */ 364 InstSeqNum blockedLoadSeqNum; 365 366 /** The oldest load that caused a memory ordering violation. */ 367 DynInstPtr memDepViolator; 368 369 // Will also need how many read/write ports the Dcache has. Or keep track 370 // of that in stage that is one level up, and only call executeLoad/Store 371 // the appropriate number of times. 372/* 373 // total number of loads forwaded from LSQ stores 374 Stats::Vector<> lsq_forw_loads; 375 376 // total number of loads ignored due to invalid addresses 377 Stats::Vector<> inv_addr_loads; 378 379 // total number of software prefetches ignored due to invalid addresses 380 Stats::Vector<> inv_addr_swpfs; 381 382 // total non-speculative bogus addresses seen (debug var) 383 Counter sim_invalid_addrs; 384 Stats::Vector<> fu_busy; //cumulative fu busy 385 386 // ready loads blocked due to memory disambiguation 387 Stats::Vector<> lsq_blocked_loads; 388 389 Stats::Scalar<> lsqInversion; 390*/ 391 public: 392 /** Executes the load at the given index. */ 393 template <class T> 394 Fault read(MemReqPtr &req, T &data, int load_idx); 395 396 /** Executes the store at the given index. */ 397 template <class T> 398 Fault write(MemReqPtr &req, T &data, int store_idx); 399 400 /** Returns the index of the head load instruction. */ 401 int getLoadHead() { return loadHead; } 402 /** Returns the sequence number of the head load instruction. */ 403 InstSeqNum getLoadHeadSeqNum() 404 { 405 if (loadQueue[loadHead]) { 406 return loadQueue[loadHead]->seqNum; 407 } else { 408 return 0; 409 } 410 411 } 412 413 /** Returns the index of the head store instruction. */ 414 int getStoreHead() { return storeHead; } 415 /** Returns the sequence number of the head store instruction. */ 416 InstSeqNum getStoreHeadSeqNum() 417 { 418 if (storeQueue[storeHead].inst) { 419 return storeQueue[storeHead].inst->seqNum; 420 } else { 421 return 0; 422 } 423 424 } 425 426 /** Returns whether or not the LSQ unit is stalled. */ 427 bool isStalled() { return stalled; } 428}; 429 430template <class Impl> 431template <class T> 432Fault 433LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) 434{ 435 assert(loadQueue[load_idx]); 436 437 assert(!loadQueue[load_idx]->isExecuted()); 438 439 // Make sure this isn't an uncacheable access 440 // A bit of a hackish way to get uncached accesses to work only if they're 441 // at the head of the LSQ and are ready to commit (at the head of the ROB 442 // too). 443 if (req->flags & UNCACHEABLE && 444 (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { 445 iewStage->rescheduleMemInst(loadQueue[load_idx]); 446 return TheISA::genMachineCheckFault(); 447 } 448 449 // Check the SQ for any previous stores that might lead to forwarding 450 int store_idx = loadQueue[load_idx]->sqIdx; 451 452 int store_size = 0; 453 454 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 455 "storeHead: %i addr: %#x\n", 456 load_idx, store_idx, storeHead, req->paddr); 457 458#if 0 459 if (req->flags & LOCKED) { 460 cpu->lockAddr = req->paddr; 461 cpu->lockFlag = true; 462 } 463#endif 464 req->cmd = Read; 465 assert(!req->completionEvent); 466 req->completionEvent = NULL; 467 req->time = curTick; 468 469 while (store_idx != -1) { 470 // End once we've reached the top of the LSQ 471 if (store_idx == storeWBIdx) { 472 break; 473 } 474 475 // Move the index to one younger 476 if (--store_idx < 0) 477 store_idx += SQEntries; 478 479 assert(storeQueue[store_idx].inst); 480 481 store_size = storeQueue[store_idx].size; 482 483 if (store_size == 0) 484 continue; 485 486 // Check if the store data is within the lower and upper bounds of 487 // addresses that the request needs. 488 bool store_has_lower_limit = 489 req->vaddr >= storeQueue[store_idx].inst->effAddr; 490 bool store_has_upper_limit = 491 (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + 492 store_size); 493 bool lower_load_has_store_part = 494 req->vaddr < (storeQueue[store_idx].inst->effAddr + 495 store_size); 496 bool upper_load_has_store_part = 497 (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; 498 499 // If the store's data has all of the data needed, we can forward. 500 if (store_has_lower_limit && store_has_upper_limit) { 501 // Get shift amount for offset into the store's data. 502 int shift_amt = req->vaddr & (store_size - 1); 503 // @todo: Magic number, assumes byte addressing 504 shift_amt = shift_amt << 3; 505 506 // Cast this to type T? 507 data = storeQueue[store_idx].data >> shift_amt; 508 509 assert(!req->data); 510 req->data = new uint8_t[64]; 511 512 memcpy(req->data, &data, req->size); 513 514 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 515 "addr %#x, data %#x\n", 516 store_idx, req->vaddr, *(req->data)); 517 518 typename IEW::LdWritebackEvent *wb = 519 new typename IEW::LdWritebackEvent(loadQueue[load_idx], 520 iewStage); 521 522 // We'll say this has a 1 cycle load-store forwarding latency 523 // for now. 524 // @todo: Need to make this a parameter. 525 wb->schedule(curTick); 526 527 // Should keep track of stat for forwarded data 528 return NoFault; 529 } else if ((store_has_lower_limit && lower_load_has_store_part) || 530 (store_has_upper_limit && upper_load_has_store_part) || 531 (lower_load_has_store_part && upper_load_has_store_part)) { 532 // This is the partial store-load forwarding case where a store 533 // has only part of the load's data. 534 535 // If it's already been written back, then don't worry about 536 // stalling on it. 537 if (storeQueue[store_idx].completed) { 538 continue; 539 } 540 541 // Must stall load and force it to retry, so long as it's the oldest 542 // load that needs to do so. 543 if (!stalled || 544 (stalled && 545 loadQueue[load_idx]->seqNum < 546 loadQueue[stallingLoadIdx]->seqNum)) { 547 stalled = true; 548 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 549 stallingLoadIdx = load_idx; 550 } 551 552 // Tell IQ/mem dep unit that this instruction will need to be 553 // rescheduled eventually 554 iewStage->rescheduleMemInst(loadQueue[load_idx]); 555 556 // Do not generate a writeback event as this instruction is not 557 // complete. 558 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 559 "Store idx %i to load addr %#x\n", 560 store_idx, req->vaddr); 561 562 return NoFault; 563 } 564 } 565 566 // If there's no forwarding case, then go access memory 567 DynInstPtr inst = loadQueue[load_idx]; 568 569 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 570 loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC()); 571 572 assert(!req->data); 573 req->data = new uint8_t[64]; 574 Fault fault = cpu->read(req, data); 575 memcpy(req->data, &data, sizeof(T)); 576 577 ++usedPorts; 578 579 // if we have a cache, do cache access too 580 if (fault == NoFault && dcacheInterface) { 581 if (dcacheInterface->isBlocked()) { 582 // There's an older load that's already going to squash. 583 if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) 584 return NoFault; 585 586 // Record that the load was blocked due to memory. This 587 // load will squash all instructions after it, be 588 // refetched, and re-executed. 589 isLoadBlocked = true; 590 loadBlockedHandled = false; 591 blockedLoadSeqNum = inst->seqNum; 592 // No fault occurred, even though the interface is blocked. 593 return NoFault; 594 } 595 596 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 597 loadQueue[load_idx]->readPC()); 598 599 assert(!req->completionEvent); 600 req->completionEvent = 601 new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); 602 MemAccessResult result = dcacheInterface->access(req); 603 604 assert(dcacheInterface->doEvents()); 605 606 if (result != MA_HIT) { 607 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 608 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 609 inst->seqNum); 610 } else { 611 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 612 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 613 inst->seqNum); 614 } 615 } 616 617 return fault; 618} 619 620template <class Impl> 621template <class T> 622Fault 623LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx) 624{ 625 assert(storeQueue[store_idx].inst); 626 627 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 628 " | storeHead:%i [sn:%i]\n", 629 store_idx, req->paddr, data, storeHead, 630 storeQueue[store_idx].inst->seqNum); 631 632 storeQueue[store_idx].req = req; 633 storeQueue[store_idx].size = sizeof(T); 634 storeQueue[store_idx].data = data; 635 636 // This function only writes the data to the store queue, so no fault 637 // can happen here. 638 return NoFault; 639} 640 641#endif // __CPU_O3_LSQ_UNIT_HH__ 642