1/* 2 * Copyright (c) 2012-2014,2017-2018 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * Copyright (c) 2013 Advanced Micro Devices, Inc. 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Kevin Lim 42 * Korey Sewell 43 */ 44 45#ifndef __CPU_O3_LSQ_UNIT_HH__ 46#define __CPU_O3_LSQ_UNIT_HH__ 47 48#include <algorithm> 49#include <cstring> 50#include <map> 51#include <queue> 52 53#include "arch/generic/debugfaults.hh" 54#include "arch/generic/vec_reg.hh" 55#include "arch/isa_traits.hh" 56#include "arch/locked_mem.hh" 57#include "arch/mmapped_ipr.hh" 58#include "config/the_isa.hh" 59#include "cpu/inst_seq.hh" 60#include "cpu/timebuf.hh" 61#include "debug/LSQUnit.hh" 62#include "mem/packet.hh" 63#include "mem/port.hh" 64 65struct DerivO3CPUParams; 66#include "base/circular_queue.hh" 67 68/** 69 * Class that implements the actual LQ and SQ for each specific 70 * thread. Both are circular queues; load entries are freed upon 71 * committing, while store entries are freed once they writeback. The 72 * LSQUnit tracks if there are memory ordering violations, and also 73 * detects partial load to store forwarding cases (a store only has 74 * part of a load's data) that requires the load to wait until the 75 * store writes back. In the former case it holds onto the instruction 76 * until the dependence unit looks at it, and in the latter it stalls 77 * the LSQ until the store writes back. At that point the load is 78 * replayed. 79 */ 80template <class Impl> 81class LSQUnit 82{ 83 public: 84 static constexpr auto MaxDataBytes = MaxVecRegLenInBytes; 85 86 typedef typename Impl::O3CPU O3CPU; 87 typedef typename Impl::DynInstPtr DynInstPtr; 88 typedef typename Impl::CPUPol::IEW IEW; 89 typedef typename Impl::CPUPol::LSQ LSQ; 90 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 91 92 using LSQSenderState = typename LSQ::LSQSenderState; 93 using LSQRequest = typename Impl::CPUPol::LSQ::LSQRequest; 94 private: 95 class LSQEntry 96 { 97 private: 98 /** The instruction. */ 99 DynInstPtr inst; 100 /** The request. */ 101 LSQRequest* req; 102 /** The size of the operation. */ 103 uint32_t _size; 104 /** Valid entry. */ 105 bool _valid; 106 public: 107 /** Constructs an empty store queue entry. */ 108 LSQEntry() 109 : inst(nullptr), req(nullptr), _size(0), _valid(false) 110 { 111 } 112 113 ~LSQEntry() 114 { 115 inst = nullptr; 116 if (req != nullptr) { 117 req->freeLSQEntry(); 118 req = nullptr; 119 } 120 } 121 122 void 123 clear() 124 { 125 inst = nullptr; 126 if (req != nullptr) { 127 req->freeLSQEntry(); 128 } 129 req = nullptr; 130 _valid = false; 131 _size = 0; 132 } 133 134 void 135 set(const DynInstPtr& inst) 136 { 137 assert(!_valid); 138 this->inst = inst; 139 _valid = true; 140 _size = 0; 141 } 142 LSQRequest* request() { return req; } 143 void setRequest(LSQRequest* r) { req = r; } 144 bool hasRequest() { return req != nullptr; } 145 /** Member accessors. */ 146 /** @{ */ 147 bool valid() const { return _valid; } 148 uint32_t& size() { return _size; } 149 const uint32_t& size() const { return _size; } 150 const DynInstPtr& instruction() const { return inst; } 151 /** @} */ 152 }; 153 154 class SQEntry : public LSQEntry 155 { 156 private: 157 /** The store data. */ 158 char _data[MaxDataBytes]; 159 /** Whether or not the store can writeback. */ 160 bool _canWB; 161 /** Whether or not the store is committed. */ 162 bool _committed; 163 /** Whether or not the store is completed. */ 164 bool _completed; 165 /** Does this request write all zeros and thus doesn't 166 * have any data attached to it. Used for cache block zero 167 * style instructs (ARM DC ZVA; ALPHA WH64) 168 */ 169 bool _isAllZeros; 170 public: 171 static constexpr size_t DataSize = sizeof(_data); 172 /** Constructs an empty store queue entry. */ 173 SQEntry() 174 : _canWB(false), _committed(false), _completed(false), 175 _isAllZeros(false) 176 { 177 std::memset(_data, 0, DataSize); 178 } 179 180 ~SQEntry() 181 { 182 } 183 184 void 185 set(const DynInstPtr& inst) 186 { 187 LSQEntry::set(inst); 188 } 189 190 void 191 clear() 192 { 193 LSQEntry::clear(); 194 _canWB = _completed = _committed = _isAllZeros = false; 195 } 196 /** Member accessors. */ 197 /** @{ */ 198 bool& canWB() { return _canWB; } 199 const bool& canWB() const { return _canWB; } 200 bool& completed() { return _completed; } 201 const bool& completed() const { return _completed; } 202 bool& committed() { return _committed; } 203 const bool& committed() const { return _committed; } 204 bool& isAllZeros() { return _isAllZeros; } 205 const bool& isAllZeros() const { return _isAllZeros; } 206 char* data() { return _data; } 207 const char* data() const { return _data; } 208 /** @} */ 209 }; 210 using LQEntry = LSQEntry; 211 212 public: 213 using LoadQueue = CircularQueue<LQEntry>; 214 using StoreQueue = CircularQueue<SQEntry>; 215 216 public: 217 /** Constructs an LSQ unit. init() must be called prior to use. */ 218 LSQUnit(uint32_t lqEntries, uint32_t sqEntries); 219 220 /** We cannot copy LSQUnit because it has stats for which copy 221 * contructor is deleted explicitly. However, STL vector requires 222 * a valid copy constructor for the base type at compile time. 223 */ 224 LSQUnit(const LSQUnit &l) { panic("LSQUnit is not copy-able"); } 225 226 /** Initializes the LSQ unit with the specified number of entries. */ 227 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 228 LSQ *lsq_ptr, unsigned id); 229 230 /** Returns the name of the LSQ unit. */ 231 std::string name() const; 232 233 /** Registers statistics. */ 234 void regStats(); 235 236 /** Sets the pointer to the dcache port. */ 237 void setDcachePort(MasterPort *dcache_port); 238 239 /** Perform sanity checks after a drain. */ 240 void drainSanityCheck() const; 241 242 /** Takes over from another CPU's thread. */ 243 void takeOverFrom(); 244 245 /** Inserts an instruction. */ 246 void insert(const DynInstPtr &inst); 247 /** Inserts a load instruction. */ 248 void insertLoad(const DynInstPtr &load_inst); 249 /** Inserts a store instruction. */ 250 void insertStore(const DynInstPtr &store_inst); 251 252 /** Check for ordering violations in the LSQ. For a store squash if we 253 * ever find a conflicting load. For a load, only squash if we 254 * an external snoop invalidate has been seen for that load address 255 * @param load_idx index to start checking at 256 * @param inst the instruction to check 257 */ 258 Fault checkViolations(typename LoadQueue::iterator& loadIt, 259 const DynInstPtr& inst); 260 261 /** Check if an incoming invalidate hits in the lsq on a load 262 * that might have issued out of order wrt another load beacuse 263 * of the intermediate invalidate. 264 */ 265 void checkSnoop(PacketPtr pkt); 266 267 /** Executes a load instruction. */ 268 Fault executeLoad(const DynInstPtr &inst); 269 270 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 271 /** Executes a store instruction. */ 272 Fault executeStore(const DynInstPtr &inst); 273 274 /** Commits the head load. */ 275 void commitLoad(); 276 /** Commits loads older than a specific sequence number. */ 277 void commitLoads(InstSeqNum &youngest_inst); 278 279 /** Commits stores older than a specific sequence number. */ 280 void commitStores(InstSeqNum &youngest_inst); 281 282 /** Writes back stores. */ 283 void writebackStores(); 284 285 /** Completes the data access that has been returned from the 286 * memory system. */ 287 void completeDataAccess(PacketPtr pkt); 288 289 /** Squashes all instructions younger than a specific sequence number. */ 290 void squash(const InstSeqNum &squashed_num); 291 292 /** Returns if there is a memory ordering violation. Value is reset upon 293 * call to getMemDepViolator(). 294 */ 295 bool violation() { return memDepViolator; } 296 297 /** Returns the memory ordering violator. */ 298 DynInstPtr getMemDepViolator(); 299 300 /** Returns the number of free LQ entries. */ 301 unsigned numFreeLoadEntries(); 302 303 /** Returns the number of free SQ entries. */ 304 unsigned numFreeStoreEntries(); 305 306 /** Returns the number of loads in the LQ. */ 307 int numLoads() { return loads; } 308 309 /** Returns the number of stores in the SQ. */ 310 int numStores() { return stores; } 311 312 /** Returns if either the LQ or SQ is full. */ 313 bool isFull() { return lqFull() || sqFull(); } 314 315 /** Returns if both the LQ and SQ are empty. */ 316 bool isEmpty() const { return lqEmpty() && sqEmpty(); } 317 318 /** Returns if the LQ is full. */ 319 bool lqFull() { return loadQueue.full(); } 320 321 /** Returns if the SQ is full. */ 322 bool sqFull() { return storeQueue.full(); } 323 324 /** Returns if the LQ is empty. */ 325 bool lqEmpty() const { return loads == 0; } 326 327 /** Returns if the SQ is empty. */ 328 bool sqEmpty() const { return stores == 0; } 329 330 /** Returns the number of instructions in the LSQ. */ 331 unsigned getCount() { return loads + stores; } 332 333 /** Returns if there are any stores to writeback. */ 334 bool hasStoresToWB() { return storesToWB; } 335 336 /** Returns the number of stores to writeback. */ 337 int numStoresToWB() { return storesToWB; } 338 339 /** Returns if the LSQ unit will writeback on this cycle. */ 340 bool 341 willWB() 342 { 343 return storeWBIt.dereferenceable() && 344 storeWBIt->valid() && 345 storeWBIt->canWB() && 346 !storeWBIt->completed() && 347 !isStoreBlocked; 348 } 349 350 /** Handles doing the retry. */ 351 void recvRetry(); 352 353 unsigned int cacheLineSize(); 354 private: 355 /** Reset the LSQ state */ 356 void resetState(); 357 358 /** Writes back the instruction, sending it to IEW. */ 359 void writeback(const DynInstPtr &inst, PacketPtr pkt); 360 361 /** Try to finish a previously blocked write back attempt */ 362 void writebackBlockedStore(); 363 364 /** Completes the store at the specified index. */ 365 void completeStore(typename StoreQueue::iterator store_idx); 366 367 /** Handles completing the send of a store to memory. */ 368 void storePostSend(); 369 370 public: 371 /** Attempts to send a packet to the cache. 372 * Check if there are ports available. Return true if 373 * there are, false if there are not. 374 */ 375 bool trySendPacket(bool isLoad, PacketPtr data_pkt); 376 377 378 /** Debugging function to dump instructions in the LSQ. */ 379 void dumpInsts() const; 380 381 /** Schedule event for the cpu. */ 382 void schedule(Event& ev, Tick when) { cpu->schedule(ev, when); } 383 384 BaseTLB* dTLB() { return cpu->dtb; } 385 386 private: 387 /** Pointer to the CPU. */ 388 O3CPU *cpu; 389 390 /** Pointer to the IEW stage. */ 391 IEW *iewStage; 392 393 /** Pointer to the LSQ. */ 394 LSQ *lsq; 395 396 /** Pointer to the dcache port. Used only for sending. */ 397 MasterPort *dcachePort; 398 399 /** Particularisation of the LSQSenderState to the LQ. */ 400 class LQSenderState : public LSQSenderState 401 { 402 using LSQSenderState::alive; 403 public: 404 LQSenderState(typename LoadQueue::iterator idx_) 405 : LSQSenderState(idx_->request(), true), idx(idx_) { } 406 407 /** The LQ index of the instruction. */ 408 typename LoadQueue::iterator idx; 409 //virtual LSQRequest* request() { return idx->request(); } 410 virtual void 411 complete() 412 { 413 //if (alive()) 414 // idx->request()->senderState(nullptr); 415 } 416 }; 417 418 /** Particularisation of the LSQSenderState to the SQ. */ 419 class SQSenderState : public LSQSenderState 420 { 421 using LSQSenderState::alive; 422 public: 423 SQSenderState(typename StoreQueue::iterator idx_) 424 : LSQSenderState(idx_->request(), false), idx(idx_) { } 425 /** The SQ index of the instruction. */ 426 typename StoreQueue::iterator idx; 427 //virtual LSQRequest* request() { return idx->request(); } 428 virtual void 429 complete() 430 { 431 //if (alive()) 432 // idx->request()->senderState(nullptr); 433 } 434 }; 435 436 /** Writeback event, specifically for when stores forward data to loads. */ 437 class WritebackEvent : public Event 438 { 439 public: 440 /** Constructs a writeback event. */ 441 WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, 442 LSQUnit *lsq_ptr); 443 444 /** Processes the writeback event. */ 445 void process(); 446 447 /** Returns the description of this event. */ 448 const char *description() const; 449 450 private: 451 /** Instruction whose results are being written back. */ 452 DynInstPtr inst; 453 454 /** The packet that would have been sent to memory. */ 455 PacketPtr pkt; 456 457 /** The pointer to the LSQ unit that issued the store. */ 458 LSQUnit<Impl> *lsqPtr; 459 }; 460 461 public: 462 /** 463 * Handles writing back and completing the load or store that has 464 * returned from memory. 465 * 466 * @param pkt Response packet from the memory sub-system 467 */ 468 bool recvTimingResp(PacketPtr pkt); 469 470 private: 471 /** The LSQUnit thread id. */ 472 ThreadID lsqID; 473 public: 474 /** The store queue. */ 475 CircularQueue<SQEntry> storeQueue; 476 477 /** The load queue. */ 478 LoadQueue loadQueue; 479 480 private: 481 /** The number of places to shift addresses in the LSQ before checking 482 * for dependency violations 483 */ 484 unsigned depCheckShift; 485 486 /** Should loads be checked for dependency issues */ 487 bool checkLoads; 488 489 /** The number of load instructions in the LQ. */ 490 int loads; 491 /** The number of store instructions in the SQ. */ 492 int stores; 493 /** The number of store instructions in the SQ waiting to writeback. */ 494 int storesToWB; 495 496 /** The index of the first instruction that may be ready to be 497 * written back, and has not yet been written back. 498 */ 499 typename StoreQueue::iterator storeWBIt; 500 501 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */ 502 Addr cacheBlockMask; 503 504 /** Wire to read information from the issue stage time queue. */ 505 typename TimeBuffer<IssueStruct>::wire fromIssue; 506 507 /** Whether or not the LSQ is stalled. */ 508 bool stalled; 509 /** The store that causes the stall due to partial store to load 510 * forwarding. 511 */ 512 InstSeqNum stallingStoreIsn; 513 /** The index of the above store. */ 514 int stallingLoadIdx; 515 516 /** The packet that needs to be retried. */ 517 PacketPtr retryPkt; 518 519 /** Whehter or not a store is blocked due to the memory system. */ 520 bool isStoreBlocked; 521 522 /** Whether or not a store is in flight. */ 523 bool storeInFlight; 524 525 /** The oldest load that caused a memory ordering violation. */ 526 DynInstPtr memDepViolator; 527 528 /** Whether or not there is a packet that couldn't be sent because of 529 * a lack of cache ports. */ 530 bool hasPendingRequest; 531 532 /** The packet that is pending free cache ports. */ 533 LSQRequest* pendingRequest; 534 535 /** Flag for memory model. */ 536 bool needsTSO; 537 538 // Will also need how many read/write ports the Dcache has. Or keep track 539 // of that in stage that is one level up, and only call executeLoad/Store 540 // the appropriate number of times. 541 /** Total number of loads forwaded from LSQ stores. */ 542 Stats::Scalar lsqForwLoads; 543 544 /** Total number of loads ignored due to invalid addresses. */ 545 Stats::Scalar invAddrLoads; 546 547 /** Total number of squashed loads. */ 548 Stats::Scalar lsqSquashedLoads; 549 550 /** Total number of responses from the memory system that are 551 * ignored due to the instruction already being squashed. */ 552 Stats::Scalar lsqIgnoredResponses; 553 554 /** Tota number of memory ordering violations. */ 555 Stats::Scalar lsqMemOrderViolation; 556 557 /** Total number of squashed stores. */ 558 Stats::Scalar lsqSquashedStores; 559 560 /** Total number of software prefetches ignored due to invalid addresses. */ 561 Stats::Scalar invAddrSwpfs; 562 563 /** Ready loads blocked due to partial store-forwarding. */ 564 Stats::Scalar lsqBlockedLoads; 565 566 /** Number of loads that were rescheduled. */ 567 Stats::Scalar lsqRescheduledLoads; 568 569 /** Number of times the LSQ is blocked due to the cache. */ 570 Stats::Scalar lsqCacheBlocked; 571 572 public: 573 /** Executes the load at the given index. */ 574 Fault read(LSQRequest *req, int load_idx); 575 576 /** Executes the store at the given index. */ 577 Fault write(LSQRequest *req, uint8_t *data, int store_idx); 578 579 /** Returns the index of the head load instruction. */ 580 int getLoadHead() { return loadQueue.head(); } 581 582 /** Returns the sequence number of the head load instruction. */ 583 InstSeqNum 584 getLoadHeadSeqNum() 585 { 586 return loadQueue.front().valid() 587 ? loadQueue.front().instruction()->seqNum 588 : 0; 589 } 590 591 /** Returns the index of the head store instruction. */ 592 int getStoreHead() { return storeQueue.head(); } 593 /** Returns the sequence number of the head store instruction. */ 594 InstSeqNum 595 getStoreHeadSeqNum() 596 { 597 return storeQueue.front().valid() 598 ? storeQueue.front().instruction()->seqNum 599 : 0; 600 } 601 602 /** Returns whether or not the LSQ unit is stalled. */ 603 bool isStalled() { return stalled; } 604 public: 605 typedef typename CircularQueue<LQEntry>::iterator LQIterator; 606 typedef typename CircularQueue<SQEntry>::iterator SQIterator; 607 typedef CircularQueue<LQEntry> LQueue; 608 typedef CircularQueue<SQEntry> SQueue; 609}; 610 611template <class Impl> 612Fault 613LSQUnit<Impl>::read(LSQRequest *req, int load_idx) 614{ 615 LQEntry& load_req = loadQueue[load_idx]; 616 const DynInstPtr& load_inst = load_req.instruction(); 617 618 load_req.setRequest(req); 619 assert(load_inst); 620 621 assert(!load_inst->isExecuted()); 622 623 // Make sure this isn't a strictly ordered load 624 // A bit of a hackish way to get strictly ordered accesses to work 625 // only if they're at the head of the LSQ and are ready to commit 626 // (at the head of the ROB too). 627 628 if (req->mainRequest()->isStrictlyOrdered() && 629 (load_idx != loadQueue.head() || !load_inst->isAtCommit())) { 630 // Tell IQ/mem dep unit that this instruction will need to be 631 // rescheduled eventually 632 iewStage->rescheduleMemInst(load_inst); 633 load_inst->clearIssued(); 634 load_inst->effAddrValid(false); 635 ++lsqRescheduledLoads; 636 DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n", 637 load_inst->seqNum, load_inst->pcState()); 638 639 // Must delete request now that it wasn't handed off to 640 // memory. This is quite ugly. @todo: Figure out the proper 641 // place to really handle request deletes. 642 load_req.setRequest(nullptr); 643 req->discard(); 644 return std::make_shared<GenericISA::M5PanicFault>( 645 "Strictly ordered load [sn:%llx] PC %s\n", 646 load_inst->seqNum, load_inst->pcState()); 647 } 648 649 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 650 "storeHead: %i addr: %#x%s\n", 651 load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1, 652 req->mainRequest()->getPaddr(), req->isSplit() ? " split" : ""); 653 654 if (req->mainRequest()->isLLSC()) { 655 // Disable recording the result temporarily. Writing to misc 656 // regs normally updates the result, but this is not the 657 // desired behavior when handling store conditionals. 658 load_inst->recordResult(false); 659 TheISA::handleLockedRead(load_inst.get(), req->mainRequest()); 660 load_inst->recordResult(true); 661 } 662 663 if (req->mainRequest()->isMmappedIpr()) { 664 assert(!load_inst->memData); 665 load_inst->memData = new uint8_t[MaxDataBytes]; 666 667 ThreadContext *thread = cpu->tcBase(lsqID); 668 PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq); 669 670 main_pkt->dataStatic(load_inst->memData); 671 672 Cycles delay = req->handleIprRead(thread, main_pkt); 673 674 WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this); 675 cpu->schedule(wb, cpu->clockEdge(delay)); 676 return NoFault; 677 } 678 679 // Check the SQ for any previous stores that might lead to forwarding 680 auto store_it = load_inst->sqIt; 681 assert (store_it >= storeWBIt); 682 // End once we've reached the top of the LSQ 683 while (store_it != storeWBIt) { 684 // Move the index to one younger 685 store_it--; 686 assert(store_it->valid()); 687 assert(store_it->instruction()->seqNum < load_inst->seqNum); 688 int store_size = store_it->size(); 689 690 // Cache maintenance instructions go down via the store 691 // path but they carry no data and they shouldn't be 692 // considered for forwarding 693 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() && 694 !(store_it->request()->mainRequest() && 695 store_it->request()->mainRequest()->isCacheMaintenance())) { 696 assert(store_it->instruction()->effAddrValid()); 697 698 // Check if the store data is within the lower and upper bounds of 699 // addresses that the request needs. 700 auto req_s = req->mainRequest()->getVaddr(); 701 auto req_e = req_s + req->mainRequest()->getSize(); 702 auto st_s = store_it->instruction()->effAddr; 703 auto st_e = st_s + store_size; 704 705 bool store_has_lower_limit = req_s >= st_s; 706 bool store_has_upper_limit = req_e <= st_e; 707 bool lower_load_has_store_part = req_s < st_e; 708 bool upper_load_has_store_part = req_e > st_s; 709 710 // If the store entry is not atomic (atomic does not have valid 711 // data), the store has all of the data needed, and 712 // the load is not LLSC, then 713 // we can forward data from the store to the load 714 if (!store_it->instruction()->isAtomic() && 715 store_has_lower_limit && store_has_upper_limit && 716 !req->mainRequest()->isLLSC()) { 717 718 // Get shift amount for offset into the store's data. 719 int shift_amt = req->mainRequest()->getVaddr() - 720 store_it->instruction()->effAddr; 721 722 // Allocate memory if this is the first time a load is issued. 723 if (!load_inst->memData) { 724 load_inst->memData = 725 new uint8_t[req->mainRequest()->getSize()]; 726 } 727 if (store_it->isAllZeros()) 728 memset(load_inst->memData, 0, 729 req->mainRequest()->getSize()); 730 else 731 memcpy(load_inst->memData, 732 store_it->data() + shift_amt, 733 req->mainRequest()->getSize()); 734 735 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 736 "addr %#x\n", store_it._idx, 737 req->mainRequest()->getVaddr()); 738 739 PacketPtr data_pkt = new Packet(req->mainRequest(), 740 MemCmd::ReadReq); 741 data_pkt->dataStatic(load_inst->memData); 742 743 if (req->isAnyOutstandingRequest()) { 744 assert(req->_numOutstandingPackets > 0); 745 // There are memory requests packets in flight already. 746 // This may happen if the store was not complete the 747 // first time this load got executed. Signal the senderSate 748 // that response packets should be discarded. 749 req->discardSenderState(); 750 } 751 752 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, 753 this); 754 755 // We'll say this has a 1 cycle load-store forwarding latency 756 // for now. 757 // @todo: Need to make this a parameter. 758 cpu->schedule(wb, curTick()); 759 760 // Don't need to do anything special for split loads. 761 ++lsqForwLoads; 762 763 return NoFault; 764 } else if ( 765 // This is the partial store-load forwarding case where a store 766 // has only part of the load's data and the load isn't LLSC 767 (!req->mainRequest()->isLLSC() && 768 ((store_has_lower_limit && lower_load_has_store_part) || 769 (store_has_upper_limit && upper_load_has_store_part) || 770 (lower_load_has_store_part && upper_load_has_store_part))) || 771 // The load is LLSC, and the store has all or part of the 772 // load's data 773 (req->mainRequest()->isLLSC() && 774 ((store_has_lower_limit || upper_load_has_store_part) && 775 (store_has_upper_limit || lower_load_has_store_part))) || 776 // The store entry is atomic and has all or part of the load's 777 // data 778 (store_it->instruction()->isAtomic() && 779 ((store_has_lower_limit || upper_load_has_store_part) && 780 (store_has_upper_limit || lower_load_has_store_part)))) { 781 782 // If it's already been written back, then don't worry about 783 // stalling on it. 784 if (store_it->completed()) { 785 panic("Should not check one of these"); 786 continue; 787 } 788 789 // Must stall load and force it to retry, so long as it's the 790 // oldest load that needs to do so. 791 if (!stalled || 792 (stalled && 793 load_inst->seqNum < 794 loadQueue[stallingLoadIdx].instruction()->seqNum)) { 795 stalled = true; 796 stallingStoreIsn = store_it->instruction()->seqNum; 797 stallingLoadIdx = load_idx; 798 } 799 800 // Tell IQ/mem dep unit that this instruction will need to be 801 // rescheduled eventually 802 iewStage->rescheduleMemInst(load_inst); 803 load_inst->clearIssued(); 804 load_inst->effAddrValid(false); 805 ++lsqRescheduledLoads; 806 807 // Do not generate a writeback event as this instruction is not 808 // complete. 809 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 810 "Store idx %i to load addr %#x\n", 811 store_it._idx, req->mainRequest()->getVaddr()); 812 813 // Must discard the request. 814 req->discard(); 815 load_req.setRequest(nullptr); 816 return NoFault; 817 } 818 } 819 } 820 821 // If there's no forwarding case, then go access memory 822 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n", 823 load_inst->seqNum, load_inst->pcState()); 824 825 // Allocate memory if this is the first time a load is issued. 826 if (!load_inst->memData) { 827 load_inst->memData = new uint8_t[req->mainRequest()->getSize()]; 828 } 829 830 // For now, load throughput is constrained by the number of 831 // load FUs only, and loads do not consume a cache port (only 832 // stores do). 833 // @todo We should account for cache port contention 834 // and arbitrate between loads and stores. 835 836 // if we the cache is not blocked, do cache access 837 if (req->senderState() == nullptr) { 838 LQSenderState *state = new LQSenderState( 839 loadQueue.getIterator(load_idx)); 840 state->isLoad = true; 841 state->inst = load_inst; 842 state->isSplit = req->isSplit(); 843 req->senderState(state); 844 } 845 req->buildPackets(); 846 req->sendPacketToCache(); 847 if (!req->isSent()) 848 iewStage->blockMemInst(load_inst); 849 850 return NoFault; 851} 852 853template <class Impl> 854Fault 855LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx) 856{ 857 assert(storeQueue[store_idx].valid()); 858 859 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i " 860 "[sn:%llu]\n", 861 store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1, 862 storeQueue[store_idx].instruction()->seqNum); 863 864 storeQueue[store_idx].setRequest(req); 865 unsigned size = req->_size; 866 storeQueue[store_idx].size() = size; 867 bool store_no_data = 868 req->mainRequest()->getFlags() & Request::STORE_NO_DATA; 869 storeQueue[store_idx].isAllZeros() = store_no_data; 870 assert(size <= SQEntry::DataSize || store_no_data); 871 872 // copy data into the storeQueue only if the store request has valid data 873 if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) && 874 !req->request()->isCacheMaintenance() && 875 !req->request()->isAtomic()) 876 memcpy(storeQueue[store_idx].data(), data, size); 877 878 // This function only writes the data to the store queue, so no fault 879 // can happen here. 880 return NoFault; 881} 882 883#endif // __CPU_O3_LSQ_UNIT_HH__ 884