lsq_unit.hh revision 2669
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <algorithm> 33#include <map> 34#include <queue> 35 36#include "arch/faults.hh" 37#include "config/full_system.hh" 38#include "base/hashmap.hh" 39#include "cpu/inst_seq.hh" 40#include "mem/packet.hh" 41#include "mem/port.hh" 42//#include "mem/page_table.hh" 43//#include "sim/debug.hh" 44//#include "sim/sim_object.hh" 45 46/** 47 * Class that implements the actual LQ and SQ for each specific 48 * thread. Both are circular queues; load entries are freed upon 49 * committing, while store entries are freed once they writeback. The 50 * LSQUnit tracks if there are memory ordering violations, and also 51 * detects partial load to store forwarding cases (a store only has 52 * part of a load's data) that requires the load to wait until the 53 * store writes back. In the former case it holds onto the instruction 54 * until the dependence unit looks at it, and in the latter it stalls 55 * the LSQ until the store writes back. At that point the load is 56 * replayed. 57 */ 58template <class Impl> 59class LSQUnit { 60 protected: 61 typedef TheISA::IntReg IntReg; 62 public: 63 typedef typename Impl::Params Params; 64 typedef typename Impl::FullCPU FullCPU; 65 typedef typename Impl::DynInstPtr DynInstPtr; 66 typedef typename Impl::CPUPol::IEW IEW; 67 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 68 69 public: 70 /** Constructs an LSQ unit. init() must be called prior to use. */ 71 LSQUnit(); 72 73 /** Initializes the LSQ unit with the specified number of entries. */ 74 void init(Params *params, unsigned maxLQEntries, 75 unsigned maxSQEntries, unsigned id); 76 77 /** Returns the name of the LSQ unit. */ 78 std::string name() const; 79 80 /** Sets the CPU pointer. */ 81 void setCPU(FullCPU *cpu_ptr); 82 83 /** Sets the IEW stage pointer. */ 84 void setIEW(IEW *iew_ptr) 85 { iewStage = iew_ptr; } 86 87 /** Sets the page table pointer. */ 88// void setPageTable(PageTable *pt_ptr); 89 90 void switchOut(); 91 92 void takeOverFrom(); 93 94 bool isSwitchedOut() { return switchedOut; } 95 96 /** Ticks the LSQ unit, which in this case only resets the number of 97 * used cache ports. 98 * @todo: Move the number of used ports up to the LSQ level so it can 99 * be shared by all LSQ units. 100 */ 101 void tick() { usedPorts = 0; } 102 103 /** Inserts an instruction. */ 104 void insert(DynInstPtr &inst); 105 /** Inserts a load instruction. */ 106 void insertLoad(DynInstPtr &load_inst); 107 /** Inserts a store instruction. */ 108 void insertStore(DynInstPtr &store_inst); 109 110 /** Executes a load instruction. */ 111 Fault executeLoad(DynInstPtr &inst); 112 113 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 114 /** Executes a store instruction. */ 115 Fault executeStore(DynInstPtr &inst); 116 117 /** Commits the head load. */ 118 void commitLoad(); 119 /** Commits loads older than a specific sequence number. */ 120 void commitLoads(InstSeqNum &youngest_inst); 121 122 /** Commits stores older than a specific sequence number. */ 123 void commitStores(InstSeqNum &youngest_inst); 124 125 /** Writes back stores. */ 126 void writebackStores(); 127 128 void completeDataAccess(PacketPtr pkt); 129 130 void completeStoreDataAccess(DynInstPtr &inst); 131 132 // @todo: Include stats in the LSQ unit. 133 //void regStats(); 134 135 /** Clears all the entries in the LQ. */ 136 void clearLQ(); 137 138 /** Clears all the entries in the SQ. */ 139 void clearSQ(); 140 141 /** Resizes the LQ to a given size. */ 142 void resizeLQ(unsigned size); 143 144 /** Resizes the SQ to a given size. */ 145 void resizeSQ(unsigned size); 146 147 /** Squashes all instructions younger than a specific sequence number. */ 148 void squash(const InstSeqNum &squashed_num); 149 150 /** Returns if there is a memory ordering violation. Value is reset upon 151 * call to getMemDepViolator(). 152 */ 153 bool violation() { return memDepViolator; } 154 155 /** Returns the memory ordering violator. */ 156 DynInstPtr getMemDepViolator(); 157 158 /** Returns if a load became blocked due to the memory system. */ 159 bool loadBlocked() 160 { return isLoadBlocked; } 161 162 void clearLoadBlocked() 163 { isLoadBlocked = false; } 164 165 bool isLoadBlockedHandled() 166 { return loadBlockedHandled; } 167 168 void setLoadBlockedHandled() 169 { loadBlockedHandled = true; } 170 171 /** Returns the number of free entries (min of free LQ and SQ entries). */ 172 unsigned numFreeEntries(); 173 174 /** Returns the number of loads ready to execute. */ 175 int numLoadsReady(); 176 177 /** Returns the number of loads in the LQ. */ 178 int numLoads() { return loads; } 179 180 /** Returns the number of stores in the SQ. */ 181 int numStores() { return stores; } 182 183 /** Returns if either the LQ or SQ is full. */ 184 bool isFull() { return lqFull() || sqFull(); } 185 186 /** Returns if the LQ is full. */ 187 bool lqFull() { return loads >= (LQEntries - 1); } 188 189 /** Returns if the SQ is full. */ 190 bool sqFull() { return stores >= (SQEntries - 1); } 191 192 /** Returns the number of instructions in the LSQ. */ 193 unsigned getCount() { return loads + stores; } 194 195 /** Returns if there are any stores to writeback. */ 196 bool hasStoresToWB() { return storesToWB; } 197 198 /** Returns the number of stores to writeback. */ 199 int numStoresToWB() { return storesToWB; } 200 201 /** Returns if the LSQ unit will writeback on this cycle. */ 202 bool willWB() { return storeQueue[storeWBIdx].canWB && 203 !storeQueue[storeWBIdx].completed/* && 204 !dcacheInterface->isBlocked()*/; } 205 206 private: 207 /** Completes the store at the specified index. */ 208 void completeStore(int store_idx); 209 210 /** Increments the given store index (circular queue). */ 211 inline void incrStIdx(int &store_idx); 212 /** Decrements the given store index (circular queue). */ 213 inline void decrStIdx(int &store_idx); 214 /** Increments the given load index (circular queue). */ 215 inline void incrLdIdx(int &load_idx); 216 /** Decrements the given load index (circular queue). */ 217 inline void decrLdIdx(int &load_idx); 218 219 public: 220 /** Debugging function to dump instructions in the LSQ. */ 221 void dumpInsts(); 222 223 private: 224 /** Pointer to the CPU. */ 225 FullCPU *cpu; 226 227 /** Pointer to the IEW stage. */ 228 IEW *iewStage; 229 230 MemObject *mem; 231 232 class DcachePort : public Port 233 { 234 protected: 235 FullCPU *cpu; 236 LSQUnit *lsq; 237 238 public: 239 DcachePort(FullCPU *_cpu, LSQUnit *_lsq) 240 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 241 { } 242 243 protected: 244 virtual Tick recvAtomic(PacketPtr pkt); 245 246 virtual void recvFunctional(PacketPtr pkt); 247 248 virtual void recvStatusChange(Status status); 249 250 virtual void getDeviceAddressRanges(AddrRangeList &resp, 251 AddrRangeList &snoop) 252 { resp.clear(); snoop.clear(); } 253 254 virtual bool recvTiming(PacketPtr pkt); 255 256 virtual void recvRetry(); 257 }; 258 259 /** Pointer to the D-cache. */ 260 DcachePort *dcachePort; 261 262 /** Pointer to the page table. */ 263// PageTable *pTable; 264 265 public: 266 struct SQEntry { 267 /** Constructs an empty store queue entry. */ 268 SQEntry() 269 : inst(NULL), req(NULL), size(0), data(0), 270 canWB(0), committed(0), completed(0) 271 { } 272 273 /** Constructs a store queue entry for a given instruction. */ 274 SQEntry(DynInstPtr &_inst) 275 : inst(_inst), req(NULL), size(0), data(0), 276 canWB(0), committed(0), completed(0) 277 { } 278 279 /** The store instruction. */ 280 DynInstPtr inst; 281 /** The request for the store. */ 282 RequestPtr req; 283 /** The size of the store. */ 284 int size; 285 /** The store data. */ 286 IntReg data; 287 /** Whether or not the store can writeback. */ 288 bool canWB; 289 /** Whether or not the store is committed. */ 290 bool committed; 291 /** Whether or not the store is completed. */ 292 bool completed; 293 }; 294 295 private: 296 /** The LSQUnit thread id. */ 297 unsigned lsqID; 298 299 /** The store queue. */ 300 std::vector<SQEntry> storeQueue; 301 302 /** The load queue. */ 303 std::vector<DynInstPtr> loadQueue; 304 305 /** The number of LQ entries, plus a sentinel entry (circular queue). 306 * @todo: Consider having var that records the true number of LQ entries. 307 */ 308 unsigned LQEntries; 309 /** The number of SQ entries, plus a sentinel entry (circular queue). 310 * @todo: Consider having var that records the true number of SQ entries. 311 */ 312 unsigned SQEntries; 313 314 /** The number of load instructions in the LQ. */ 315 int loads; 316 /** The number of store instructions in the SQ. */ 317 int stores; 318 /** The number of store instructions in the SQ waiting to writeback. */ 319 int storesToWB; 320 321 /** The index of the head instruction in the LQ. */ 322 int loadHead; 323 /** The index of the tail instruction in the LQ. */ 324 int loadTail; 325 326 /** The index of the head instruction in the SQ. */ 327 int storeHead; 328 /** The index of the first instruction that may be ready to be 329 * written back, and has not yet been written back. 330 */ 331 int storeWBIdx; 332 /** The index of the tail instruction in the SQ. */ 333 int storeTail; 334 335 /// @todo Consider moving to a more advanced model with write vs read ports 336 /** The number of cache ports available each cycle. */ 337 int cachePorts; 338 339 /** The number of used cache ports in this cycle. */ 340 int usedPorts; 341 342 bool switchedOut; 343 344 //list<InstSeqNum> mshrSeqNums; 345 346 /** Wire to read information from the issue stage time queue. */ 347 typename TimeBuffer<IssueStruct>::wire fromIssue; 348 349 /** Whether or not the LSQ is stalled. */ 350 bool stalled; 351 /** The store that causes the stall due to partial store to load 352 * forwarding. 353 */ 354 InstSeqNum stallingStoreIsn; 355 /** The index of the above store. */ 356 int stallingLoadIdx; 357 358 /** Whether or not a load is blocked due to the memory system. */ 359 bool isLoadBlocked; 360 361 bool loadBlockedHandled; 362 363 InstSeqNum blockedLoadSeqNum; 364 365 /** The oldest load that caused a memory ordering violation. */ 366 DynInstPtr memDepViolator; 367 368 // Will also need how many read/write ports the Dcache has. Or keep track 369 // of that in stage that is one level up, and only call executeLoad/Store 370 // the appropriate number of times. 371/* 372 // total number of loads forwaded from LSQ stores 373 Stats::Vector<> lsq_forw_loads; 374 375 // total number of loads ignored due to invalid addresses 376 Stats::Vector<> inv_addr_loads; 377 378 // total number of software prefetches ignored due to invalid addresses 379 Stats::Vector<> inv_addr_swpfs; 380 381 // total non-speculative bogus addresses seen (debug var) 382 Counter sim_invalid_addrs; 383 Stats::Vector<> fu_busy; //cumulative fu busy 384 385 // ready loads blocked due to memory disambiguation 386 Stats::Vector<> lsq_blocked_loads; 387 388 Stats::Scalar<> lsqInversion; 389*/ 390 public: 391 /** Executes the load at the given index. */ 392 template <class T> 393 Fault read(Request *req, T &data, int load_idx); 394 395 /** Executes the store at the given index. */ 396 template <class T> 397 Fault write(Request *req, T &data, int store_idx); 398 399 /** Returns the index of the head load instruction. */ 400 int getLoadHead() { return loadHead; } 401 /** Returns the sequence number of the head load instruction. */ 402 InstSeqNum getLoadHeadSeqNum() 403 { 404 if (loadQueue[loadHead]) { 405 return loadQueue[loadHead]->seqNum; 406 } else { 407 return 0; 408 } 409 410 } 411 412 /** Returns the index of the head store instruction. */ 413 int getStoreHead() { return storeHead; } 414 /** Returns the sequence number of the head store instruction. */ 415 InstSeqNum getStoreHeadSeqNum() 416 { 417 if (storeQueue[storeHead].inst) { 418 return storeQueue[storeHead].inst->seqNum; 419 } else { 420 return 0; 421 } 422 423 } 424 425 /** Returns whether or not the LSQ unit is stalled. */ 426 bool isStalled() { return stalled; } 427}; 428 429template <class Impl> 430template <class T> 431Fault 432LSQUnit<Impl>::read(Request *req, T &data, int load_idx) 433{ 434 DynInstPtr load_inst = loadQueue[load_idx]; 435 436 assert(load_inst); 437 438 assert(!load_inst->isExecuted()); 439 440 // Make sure this isn't an uncacheable access 441 // A bit of a hackish way to get uncached accesses to work only if they're 442 // at the head of the LSQ and are ready to commit (at the head of the ROB 443 // too). 444 if (req->getFlags() & UNCACHEABLE && 445 (load_idx != loadHead || !load_inst->reachedCommit)) { 446 iewStage->rescheduleMemInst(load_inst); 447 return TheISA::genMachineCheckFault(); 448 } 449 450 // Check the SQ for any previous stores that might lead to forwarding 451 int store_idx = load_inst->sqIdx; 452 453 int store_size = 0; 454 455 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 456 "storeHead: %i addr: %#x\n", 457 load_idx, store_idx, storeHead, req->getPaddr()); 458 459#if 0 460 if (req->getFlags() & LOCKED) { 461 cpu->lockAddr = req->getPaddr(); 462 cpu->lockFlag = true; 463 } 464#endif 465 466 while (store_idx != -1) { 467 // End once we've reached the top of the LSQ 468 if (store_idx == storeWBIdx) { 469 break; 470 } 471 472 // Move the index to one younger 473 if (--store_idx < 0) 474 store_idx += SQEntries; 475 476 assert(storeQueue[store_idx].inst); 477 478 store_size = storeQueue[store_idx].size; 479 480 if (store_size == 0) 481 continue; 482 483 // Check if the store data is within the lower and upper bounds of 484 // addresses that the request needs. 485 bool store_has_lower_limit = 486 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 487 bool store_has_upper_limit = 488 (req->getVaddr() + req->getSize()) <= 489 (storeQueue[store_idx].inst->effAddr + store_size); 490 bool lower_load_has_store_part = 491 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 492 store_size); 493 bool upper_load_has_store_part = 494 (req->getVaddr() + req->getSize()) > 495 storeQueue[store_idx].inst->effAddr; 496 497 // If the store's data has all of the data needed, we can forward. 498 if (store_has_lower_limit && store_has_upper_limit) { 499 // Get shift amount for offset into the store's data. 500 int shift_amt = req->getVaddr() & (store_size - 1); 501 // @todo: Magic number, assumes byte addressing 502 shift_amt = shift_amt << 3; 503 504 // Cast this to type T? 505 data = storeQueue[store_idx].data >> shift_amt; 506 507 assert(!load_inst->memData); 508 load_inst->memData = new uint8_t[64]; 509 510 memcpy(load_inst->memData, &data, req->getSize()); 511 512 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 513 "addr %#x, data %#x\n", 514 store_idx, req->getVaddr(), *(load_inst->memData)); 515/* 516 typename LdWritebackEvent *wb = 517 new typename LdWritebackEvent(load_inst, 518 iewStage); 519 520 // We'll say this has a 1 cycle load-store forwarding latency 521 // for now. 522 // @todo: Need to make this a parameter. 523 wb->schedule(curTick); 524*/ 525 // Should keep track of stat for forwarded data 526 return NoFault; 527 } else if ((store_has_lower_limit && lower_load_has_store_part) || 528 (store_has_upper_limit && upper_load_has_store_part) || 529 (lower_load_has_store_part && upper_load_has_store_part)) { 530 // This is the partial store-load forwarding case where a store 531 // has only part of the load's data. 532 533 // If it's already been written back, then don't worry about 534 // stalling on it. 535 if (storeQueue[store_idx].completed) { 536 continue; 537 } 538 539 // Must stall load and force it to retry, so long as it's the oldest 540 // load that needs to do so. 541 if (!stalled || 542 (stalled && 543 load_inst->seqNum < 544 loadQueue[stallingLoadIdx]->seqNum)) { 545 stalled = true; 546 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 547 stallingLoadIdx = load_idx; 548 } 549 550 // Tell IQ/mem dep unit that this instruction will need to be 551 // rescheduled eventually 552 iewStage->rescheduleMemInst(load_inst); 553 554 // Do not generate a writeback event as this instruction is not 555 // complete. 556 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 557 "Store idx %i to load addr %#x\n", 558 store_idx, req->getVaddr()); 559 560 return NoFault; 561 } 562 } 563 564 // If there's no forwarding case, then go access memory 565 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 566 load_inst->seqNum, load_inst->readPC()); 567 568 assert(!load_inst->memData); 569 load_inst->memData = new uint8_t[64]; 570 571 ++usedPorts; 572 573 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 574 load_inst->readPC()); 575 576 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 577 data_pkt->dataStatic(load_inst->memData); 578 579 // if we have a cache, do cache access too 580 if (!dcachePort->sendTiming(data_pkt)) { 581 // There's an older load that's already going to squash. 582 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 583 return NoFault; 584 585 // Record that the load was blocked due to memory. This 586 // load will squash all instructions after it, be 587 // refetched, and re-executed. 588 isLoadBlocked = true; 589 loadBlockedHandled = false; 590 blockedLoadSeqNum = load_inst->seqNum; 591 // No fault occurred, even though the interface is blocked. 592 return NoFault; 593 } 594 595 if (data_pkt->result != Packet::Success) { 596 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 597 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 598 load_inst->seqNum); 599 } else { 600 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 601 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 602 load_inst->seqNum); 603 } 604 605 return NoFault; 606} 607 608template <class Impl> 609template <class T> 610Fault 611LSQUnit<Impl>::write(Request *req, T &data, int store_idx) 612{ 613 assert(storeQueue[store_idx].inst); 614 615 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 616 " | storeHead:%i [sn:%i]\n", 617 store_idx, req->getPaddr(), data, storeHead, 618 storeQueue[store_idx].inst->seqNum); 619 620 storeQueue[store_idx].req = req; 621 storeQueue[store_idx].size = sizeof(T); 622 storeQueue[store_idx].data = data; 623 624 // This function only writes the data to the store queue, so no fault 625 // can happen here. 626 return NoFault; 627} 628 629#endif // __CPU_O3_LSQ_UNIT_HH__ 630