lsq_unit.hh revision 2678
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <algorithm> 33#include <map> 34#include <queue> 35 36#include "arch/faults.hh" 37#include "config/full_system.hh" 38#include "base/hashmap.hh" 39#include "cpu/inst_seq.hh" 40#include "mem/packet.hh" 41#include "mem/port.hh" 42//#include "mem/page_table.hh" 43//#include "sim/debug.hh" 44//#include "sim/sim_object.hh" 45 46/** 47 * Class that implements the actual LQ and SQ for each specific 48 * thread. Both are circular queues; load entries are freed upon 49 * committing, while store entries are freed once they writeback. The 50 * LSQUnit tracks if there are memory ordering violations, and also 51 * detects partial load to store forwarding cases (a store only has 52 * part of a load's data) that requires the load to wait until the 53 * store writes back. In the former case it holds onto the instruction 54 * until the dependence unit looks at it, and in the latter it stalls 55 * the LSQ until the store writes back. At that point the load is 56 * replayed. 57 */ 58template <class Impl> 59class LSQUnit { 60 protected: 61 typedef TheISA::IntReg IntReg; 62 public: 63 typedef typename Impl::Params Params; 64 typedef typename Impl::FullCPU FullCPU; 65 typedef typename Impl::DynInstPtr DynInstPtr; 66 typedef typename Impl::CPUPol::IEW IEW; 67 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 68 69 public: 70 /** Constructs an LSQ unit. init() must be called prior to use. */ 71 LSQUnit(); 72 73 /** Initializes the LSQ unit with the specified number of entries. */ 74 void init(Params *params, unsigned maxLQEntries, 75 unsigned maxSQEntries, unsigned id); 76 77 /** Returns the name of the LSQ unit. */ 78 std::string name() const; 79 80 /** Sets the CPU pointer. */ 81 void setCPU(FullCPU *cpu_ptr); 82 83 /** Sets the IEW stage pointer. */ 84 void setIEW(IEW *iew_ptr) 85 { iewStage = iew_ptr; } 86 87 /** Sets the page table pointer. */ 88// void setPageTable(PageTable *pt_ptr); 89 90 /** Switches out LSQ unit. */ 91 void switchOut(); 92 93 /** Takes over from another CPU's thread. */ 94 void takeOverFrom(); 95 96 /** Returns if the LSQ is switched out. */ 97 bool isSwitchedOut() { return switchedOut; } 98 99 /** Ticks the LSQ unit, which in this case only resets the number of 100 * used cache ports. 101 * @todo: Move the number of used ports up to the LSQ level so it can 102 * be shared by all LSQ units. 103 */ 104 void tick() { usedPorts = 0; } 105 106 /** Inserts an instruction. */ 107 void insert(DynInstPtr &inst); 108 /** Inserts a load instruction. */ 109 void insertLoad(DynInstPtr &load_inst); 110 /** Inserts a store instruction. */ 111 void insertStore(DynInstPtr &store_inst); 112 113 /** Executes a load instruction. */ 114 Fault executeLoad(DynInstPtr &inst); 115 116 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 117 /** Executes a store instruction. */ 118 Fault executeStore(DynInstPtr &inst); 119 120 /** Commits the head load. */ 121 void commitLoad(); 122 /** Commits loads older than a specific sequence number. */ 123 void commitLoads(InstSeqNum &youngest_inst); 124 125 /** Commits stores older than a specific sequence number. */ 126 void commitStores(InstSeqNum &youngest_inst); 127 128 /** Writes back stores. */ 129 void writebackStores(); 130 131 void completeDataAccess(PacketPtr pkt); 132 133 // @todo: Include stats in the LSQ unit. 134 //void regStats(); 135 136 /** Clears all the entries in the LQ. */ 137 void clearLQ(); 138 139 /** Clears all the entries in the SQ. */ 140 void clearSQ(); 141 142 /** Resizes the LQ to a given size. */ 143 void resizeLQ(unsigned size); 144 145 /** Resizes the SQ to a given size. */ 146 void resizeSQ(unsigned size); 147 148 /** Squashes all instructions younger than a specific sequence number. */ 149 void squash(const InstSeqNum &squashed_num); 150 151 /** Returns if there is a memory ordering violation. Value is reset upon 152 * call to getMemDepViolator(). 153 */ 154 bool violation() { return memDepViolator; } 155 156 /** Returns the memory ordering violator. */ 157 DynInstPtr getMemDepViolator(); 158 159 /** Returns if a load became blocked due to the memory system. */ 160 bool loadBlocked() 161 { return isLoadBlocked; } 162 163 /** Clears the signal that a load became blocked. */ 164 void clearLoadBlocked() 165 { isLoadBlocked = false; } 166 167 /** Returns if the blocked load was handled. */ 168 bool isLoadBlockedHandled() 169 { return loadBlockedHandled; } 170 171 /** Records the blocked load as being handled. */ 172 void setLoadBlockedHandled() 173 { loadBlockedHandled = true; } 174 175 /** Returns the number of free entries (min of free LQ and SQ entries). */ 176 unsigned numFreeEntries(); 177 178 /** Returns the number of loads ready to execute. */ 179 int numLoadsReady(); 180 181 /** Returns the number of loads in the LQ. */ 182 int numLoads() { return loads; } 183 184 /** Returns the number of stores in the SQ. */ 185 int numStores() { return stores; } 186 187 /** Returns if either the LQ or SQ is full. */ 188 bool isFull() { return lqFull() || sqFull(); } 189 190 /** Returns if the LQ is full. */ 191 bool lqFull() { return loads >= (LQEntries - 1); } 192 193 /** Returns if the SQ is full. */ 194 bool sqFull() { return stores >= (SQEntries - 1); } 195 196 /** Returns the number of instructions in the LSQ. */ 197 unsigned getCount() { return loads + stores; } 198 199 /** Returns if there are any stores to writeback. */ 200 bool hasStoresToWB() { return storesToWB; } 201 202 /** Returns the number of stores to writeback. */ 203 int numStoresToWB() { return storesToWB; } 204 205 /** Returns if the LSQ unit will writeback on this cycle. */ 206 bool willWB() { return storeQueue[storeWBIdx].canWB && 207 !storeQueue[storeWBIdx].completed && 208 !isStoreBlocked; } 209 210 private: 211 void writeback(DynInstPtr &inst, PacketPtr pkt); 212 213 /** Completes the store at the specified index. */ 214 void completeStore(int store_idx); 215 216 /** Increments the given store index (circular queue). */ 217 inline void incrStIdx(int &store_idx); 218 /** Decrements the given store index (circular queue). */ 219 inline void decrStIdx(int &store_idx); 220 /** Increments the given load index (circular queue). */ 221 inline void incrLdIdx(int &load_idx); 222 /** Decrements the given load index (circular queue). */ 223 inline void decrLdIdx(int &load_idx); 224 225 public: 226 /** Debugging function to dump instructions in the LSQ. */ 227 void dumpInsts(); 228 229 private: 230 /** Pointer to the CPU. */ 231 FullCPU *cpu; 232 233 /** Pointer to the IEW stage. */ 234 IEW *iewStage; 235 236 MemObject *mem; 237 238 class DcachePort : public Port 239 { 240 protected: 241 FullCPU *cpu; 242 LSQUnit *lsq; 243 244 public: 245 DcachePort(FullCPU *_cpu, LSQUnit *_lsq) 246 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 247 { } 248 249 protected: 250 virtual Tick recvAtomic(PacketPtr pkt); 251 252 virtual void recvFunctional(PacketPtr pkt); 253 254 virtual void recvStatusChange(Status status); 255 256 virtual void getDeviceAddressRanges(AddrRangeList &resp, 257 AddrRangeList &snoop) 258 { resp.clear(); snoop.clear(); } 259 260 virtual bool recvTiming(PacketPtr pkt); 261 262 virtual void recvRetry(); 263 }; 264 265 /** Pointer to the D-cache. */ 266 DcachePort *dcachePort; 267 268 class LSQSenderState : public Packet::SenderState 269 { 270 public: 271 LSQSenderState() 272 : noWB(false) 273 { } 274 275// protected: 276 DynInstPtr inst; 277 bool isLoad; 278 int idx; 279 bool noWB; 280 }; 281 282 /** Pointer to the page table. */ 283// PageTable *pTable; 284 285 class WritebackEvent : public Event { 286 public: 287 /** Constructs a writeback event. */ 288 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); 289 290 /** Processes the writeback event. */ 291 void process(); 292 293 /** Returns the description of this event. */ 294 const char *description(); 295 296 private: 297 DynInstPtr inst; 298 299 PacketPtr pkt; 300 301 /** The pointer to the LSQ unit that issued the store. */ 302 LSQUnit<Impl> *lsqPtr; 303 }; 304 305 public: 306 struct SQEntry { 307 /** Constructs an empty store queue entry. */ 308 SQEntry() 309 : inst(NULL), req(NULL), size(0), data(0), 310 canWB(0), committed(0), completed(0) 311 { } 312 313 /** Constructs a store queue entry for a given instruction. */ 314 SQEntry(DynInstPtr &_inst) 315 : inst(_inst), req(NULL), size(0), data(0), 316 canWB(0), committed(0), completed(0) 317 { } 318 319 /** The store instruction. */ 320 DynInstPtr inst; 321 /** The request for the store. */ 322 RequestPtr req; 323 /** The size of the store. */ 324 int size; 325 /** The store data. */ 326 IntReg data; 327 /** Whether or not the store can writeback. */ 328 bool canWB; 329 /** Whether or not the store is committed. */ 330 bool committed; 331 /** Whether or not the store is completed. */ 332 bool completed; 333 }; 334 335 private: 336 /** The LSQUnit thread id. */ 337 unsigned lsqID; 338 339 /** The store queue. */ 340 std::vector<SQEntry> storeQueue; 341 342 /** The load queue. */ 343 std::vector<DynInstPtr> loadQueue; 344 345 /** The number of LQ entries, plus a sentinel entry (circular queue). 346 * @todo: Consider having var that records the true number of LQ entries. 347 */ 348 unsigned LQEntries; 349 /** The number of SQ entries, plus a sentinel entry (circular queue). 350 * @todo: Consider having var that records the true number of SQ entries. 351 */ 352 unsigned SQEntries; 353 354 /** The number of load instructions in the LQ. */ 355 int loads; 356 /** The number of store instructions in the SQ. */ 357 int stores; 358 /** The number of store instructions in the SQ waiting to writeback. */ 359 int storesToWB; 360 361 /** The index of the head instruction in the LQ. */ 362 int loadHead; 363 /** The index of the tail instruction in the LQ. */ 364 int loadTail; 365 366 /** The index of the head instruction in the SQ. */ 367 int storeHead; 368 /** The index of the first instruction that may be ready to be 369 * written back, and has not yet been written back. 370 */ 371 int storeWBIdx; 372 /** The index of the tail instruction in the SQ. */ 373 int storeTail; 374 375 /// @todo Consider moving to a more advanced model with write vs read ports 376 /** The number of cache ports available each cycle. */ 377 int cachePorts; 378 379 /** The number of used cache ports in this cycle. */ 380 int usedPorts; 381 382 /** Is the LSQ switched out. */ 383 bool switchedOut; 384 385 //list<InstSeqNum> mshrSeqNums; 386 387 /** Wire to read information from the issue stage time queue. */ 388 typename TimeBuffer<IssueStruct>::wire fromIssue; 389 390 /** Whether or not the LSQ is stalled. */ 391 bool stalled; 392 /** The store that causes the stall due to partial store to load 393 * forwarding. 394 */ 395 InstSeqNum stallingStoreIsn; 396 /** The index of the above store. */ 397 int stallingLoadIdx; 398 399 bool isStoreBlocked; 400 401 /** Whether or not a load is blocked due to the memory system. */ 402 bool isLoadBlocked; 403 404 /** Has the blocked load been handled. */ 405 bool loadBlockedHandled; 406 407 /** The sequence number of the blocked load. */ 408 InstSeqNum blockedLoadSeqNum; 409 410 /** The oldest load that caused a memory ordering violation. */ 411 DynInstPtr memDepViolator; 412 413 // Will also need how many read/write ports the Dcache has. Or keep track 414 // of that in stage that is one level up, and only call executeLoad/Store 415 // the appropriate number of times. 416/* 417 // total number of loads forwaded from LSQ stores 418 Stats::Vector<> lsq_forw_loads; 419 420 // total number of loads ignored due to invalid addresses 421 Stats::Vector<> inv_addr_loads; 422 423 // total number of software prefetches ignored due to invalid addresses 424 Stats::Vector<> inv_addr_swpfs; 425 426 // total non-speculative bogus addresses seen (debug var) 427 Counter sim_invalid_addrs; 428 Stats::Vector<> fu_busy; //cumulative fu busy 429 430 // ready loads blocked due to memory disambiguation 431 Stats::Vector<> lsq_blocked_loads; 432 433 Stats::Scalar<> lsqInversion; 434*/ 435 public: 436 /** Executes the load at the given index. */ 437 template <class T> 438 Fault read(Request *req, T &data, int load_idx); 439 440 /** Executes the store at the given index. */ 441 template <class T> 442 Fault write(Request *req, T &data, int store_idx); 443 444 /** Returns the index of the head load instruction. */ 445 int getLoadHead() { return loadHead; } 446 /** Returns the sequence number of the head load instruction. */ 447 InstSeqNum getLoadHeadSeqNum() 448 { 449 if (loadQueue[loadHead]) { 450 return loadQueue[loadHead]->seqNum; 451 } else { 452 return 0; 453 } 454 455 } 456 457 /** Returns the index of the head store instruction. */ 458 int getStoreHead() { return storeHead; } 459 /** Returns the sequence number of the head store instruction. */ 460 InstSeqNum getStoreHeadSeqNum() 461 { 462 if (storeQueue[storeHead].inst) { 463 return storeQueue[storeHead].inst->seqNum; 464 } else { 465 return 0; 466 } 467 468 } 469 470 /** Returns whether or not the LSQ unit is stalled. */ 471 bool isStalled() { return stalled; } 472}; 473 474template <class Impl> 475template <class T> 476Fault 477LSQUnit<Impl>::read(Request *req, T &data, int load_idx) 478{ 479 DynInstPtr load_inst = loadQueue[load_idx]; 480 481 assert(load_inst); 482 483 assert(!load_inst->isExecuted()); 484 485 // Make sure this isn't an uncacheable access 486 // A bit of a hackish way to get uncached accesses to work only if they're 487 // at the head of the LSQ and are ready to commit (at the head of the ROB 488 // too). 489 if (req->getFlags() & UNCACHEABLE && 490 (load_idx != loadHead || !load_inst->reachedCommit)) { 491 iewStage->rescheduleMemInst(load_inst); 492 return TheISA::genMachineCheckFault(); 493 } 494 495 // Check the SQ for any previous stores that might lead to forwarding 496 int store_idx = load_inst->sqIdx; 497 498 int store_size = 0; 499 500 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 501 "storeHead: %i addr: %#x\n", 502 load_idx, store_idx, storeHead, req->getPaddr()); 503 504#if 0 505 if (req->getFlags() & LOCKED) { 506 cpu->lockAddr = req->getPaddr(); 507 cpu->lockFlag = true; 508 } 509#endif 510 511 while (store_idx != -1) { 512 // End once we've reached the top of the LSQ 513 if (store_idx == storeWBIdx) { 514 break; 515 } 516 517 // Move the index to one younger 518 if (--store_idx < 0) 519 store_idx += SQEntries; 520 521 assert(storeQueue[store_idx].inst); 522 523 store_size = storeQueue[store_idx].size; 524 525 if (store_size == 0) 526 continue; 527 528 // Check if the store data is within the lower and upper bounds of 529 // addresses that the request needs. 530 bool store_has_lower_limit = 531 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 532 bool store_has_upper_limit = 533 (req->getVaddr() + req->getSize()) <= 534 (storeQueue[store_idx].inst->effAddr + store_size); 535 bool lower_load_has_store_part = 536 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 537 store_size); 538 bool upper_load_has_store_part = 539 (req->getVaddr() + req->getSize()) > 540 storeQueue[store_idx].inst->effAddr; 541 542 // If the store's data has all of the data needed, we can forward. 543 if (store_has_lower_limit && store_has_upper_limit) { 544 // Get shift amount for offset into the store's data. 545 int shift_amt = req->getVaddr() & (store_size - 1); 546 // @todo: Magic number, assumes byte addressing 547 shift_amt = shift_amt << 3; 548 549 // Cast this to type T? 550 data = storeQueue[store_idx].data >> shift_amt; 551 552 assert(!load_inst->memData); 553 load_inst->memData = new uint8_t[64]; 554 555 memcpy(load_inst->memData, &data, req->getSize()); 556 557 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 558 "addr %#x, data %#x\n", 559 store_idx, req->getVaddr(), *(load_inst->memData)); 560 561 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 562 data_pkt->dataStatic(load_inst->memData); 563 564 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); 565 566 // We'll say this has a 1 cycle load-store forwarding latency 567 // for now. 568 // @todo: Need to make this a parameter. 569 wb->schedule(curTick); 570 571 // Should keep track of stat for forwarded data 572 return NoFault; 573 } else if ((store_has_lower_limit && lower_load_has_store_part) || 574 (store_has_upper_limit && upper_load_has_store_part) || 575 (lower_load_has_store_part && upper_load_has_store_part)) { 576 // This is the partial store-load forwarding case where a store 577 // has only part of the load's data. 578 579 // If it's already been written back, then don't worry about 580 // stalling on it. 581 if (storeQueue[store_idx].completed) { 582 continue; 583 } 584 585 // Must stall load and force it to retry, so long as it's the oldest 586 // load that needs to do so. 587 if (!stalled || 588 (stalled && 589 load_inst->seqNum < 590 loadQueue[stallingLoadIdx]->seqNum)) { 591 stalled = true; 592 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 593 stallingLoadIdx = load_idx; 594 } 595 596 // Tell IQ/mem dep unit that this instruction will need to be 597 // rescheduled eventually 598 iewStage->rescheduleMemInst(load_inst); 599 600 // Do not generate a writeback event as this instruction is not 601 // complete. 602 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 603 "Store idx %i to load addr %#x\n", 604 store_idx, req->getVaddr()); 605 606 return NoFault; 607 } 608 } 609 610 // If there's no forwarding case, then go access memory 611 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 612 load_inst->seqNum, load_inst->readPC()); 613 614 assert(!load_inst->memData); 615 load_inst->memData = new uint8_t[64]; 616 617 ++usedPorts; 618 619 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 620 load_inst->readPC()); 621 622 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 623 data_pkt->dataStatic(load_inst->memData); 624 625 LSQSenderState *state = new LSQSenderState; 626 state->isLoad = true; 627 state->idx = load_idx; 628 state->inst = load_inst; 629 data_pkt->senderState = state; 630 631 // if we have a cache, do cache access too 632 if (!dcachePort->sendTiming(data_pkt)) { 633 // There's an older load that's already going to squash. 634 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 635 return NoFault; 636 637 // Record that the load was blocked due to memory. This 638 // load will squash all instructions after it, be 639 // refetched, and re-executed. 640 isLoadBlocked = true; 641 loadBlockedHandled = false; 642 blockedLoadSeqNum = load_inst->seqNum; 643 // No fault occurred, even though the interface is blocked. 644 return NoFault; 645 } 646 647 if (data_pkt->result != Packet::Success) { 648 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 649 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 650 load_inst->seqNum); 651 } else { 652 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 653 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 654 load_inst->seqNum); 655 } 656 657 return NoFault; 658} 659 660template <class Impl> 661template <class T> 662Fault 663LSQUnit<Impl>::write(Request *req, T &data, int store_idx) 664{ 665 assert(storeQueue[store_idx].inst); 666 667 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 668 " | storeHead:%i [sn:%i]\n", 669 store_idx, req->getPaddr(), data, storeHead, 670 storeQueue[store_idx].inst->seqNum); 671 672 storeQueue[store_idx].req = req; 673 storeQueue[store_idx].size = sizeof(T); 674 storeQueue[store_idx].data = data; 675 676 // This function only writes the data to the store queue, so no fault 677 // can happen here. 678 return NoFault; 679} 680 681#endif // __CPU_O3_LSQ_UNIT_HH__ 682