1/* 2 * Copyright (c) 2011-2012, 2014, 2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 */ 43 44#ifndef __CPU_O3_LSQ_HH__ 45#define __CPU_O3_LSQ_HH__ 46 47#include <map> 48#include <queue> 49 50#include "arch/generic/tlb.hh" 51#include "cpu/inst_seq.hh" 52#include "cpu/o3/lsq_unit.hh" 53#include "cpu/utils.hh" 54#include "enums/SMTQueuePolicy.hh" 55#include "mem/port.hh" 56#include "sim/sim_object.hh" 57 58struct DerivO3CPUParams; 59 60template <class Impl> 61class FullO3CPU; 62 63template <class Impl> 64class LSQ 65 66{ 67 public: 68 typedef typename Impl::O3CPU O3CPU; 69 typedef typename Impl::DynInstPtr DynInstPtr; 70 typedef typename Impl::CPUPol::IEW IEW; 71 typedef typename Impl::CPUPol::LSQUnit LSQUnit; 72 73 class LSQRequest; 74 /** Derived class to hold any sender state the LSQ needs. */ 75 class LSQSenderState : public Packet::SenderState 76 { 77 protected: 78 /** The senderState needs to know the LSQRequest who owns it. */ 79 LSQRequest* _request; 80 81 /** Default constructor. */ 82 LSQSenderState(LSQRequest* request, bool isLoad_) 83 : _request(request), mainPkt(nullptr), pendingPacket(nullptr), 84 outstanding(0), isLoad(isLoad_), needWB(isLoad_), isSplit(false), 85 pktToSend(false), deleted(false) 86 { } 87 public: 88 89 /** Instruction which initiated the access to memory. */ 90 DynInstPtr inst; 91 /** The main packet from a split load, used during writeback. */ 92 PacketPtr mainPkt; 93 /** A second packet from a split store that needs sending. */ 94 PacketPtr pendingPacket; 95 /** Number of outstanding packets to complete. */ 96 uint8_t outstanding; 97 /** Whether or not it is a load. */ 98 bool isLoad; 99 /** Whether or not the instruction will need to writeback. */ 100 bool needWB; 101 /** Whether or not this access is split in two. */ 102 bool isSplit; 103 /** Whether or not there is a packet that needs sending. */ 104 bool pktToSend; 105 /** Has the request been deleted? 106 * LSQ entries can be squashed before the response comes back. in that 107 * case the SenderState knows. 108 */ 109 bool deleted; 110 ContextID contextId() { return inst->contextId(); } 111 112 /** Completes a packet and returns whether the access is finished. */ 113 inline bool isComplete() { return outstanding == 0; } 114 inline void deleteRequest() { deleted = true; } 115 inline bool alive() { return !deleted; } 116 LSQRequest* request() { return _request; } 117 virtual void complete() = 0; 118 void writebackDone() { _request->writebackDone(); } 119 }; 120 121 /** 122 * DcachePort class for the load/store queue. 123 */ 124 class DcachePort : public MasterPort 125 { 126 protected: 127 128 /** Pointer to LSQ. */ 129 LSQ<Impl> *lsq; 130 FullO3CPU<Impl> *cpu; 131 132 public: 133 /** Default constructor. */ 134 DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu) 135 : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), 136 cpu(_cpu) 137 { } 138 139 protected: 140 141 /** Timing version of receive. Handles writing back and 142 * completing the load or store that has returned from 143 * memory. */ 144 virtual bool recvTimingResp(PacketPtr pkt); 145 virtual void recvTimingSnoopReq(PacketPtr pkt); 146 147 virtual void recvFunctionalSnoop(PacketPtr pkt) 148 { 149 // @todo: Is there a need for potential invalidation here? 150 } 151 152 /** Handles doing a retry of the previous send. */ 153 virtual void recvReqRetry(); 154 155 /** 156 * As this CPU requires snooping to maintain the load store queue 157 * change the behaviour from the base CPU port. 158 * 159 * @return true since we have to snoop 160 */ 161 virtual bool isSnooping() const { return true; } 162 }; 163 164 /** Memory operation metadata. 165 * This class holds the information about a memory operation. It lives 166 * from initiateAcc to resource deallocation at commit or squash. 167 * LSQRequest objects are owned by the LQ/SQ Entry in the LSQUnit that 168 * holds the operation. It is also used by the LSQSenderState. In addition, 169 * the LSQRequest is a TranslationState, therefore, upon squash, there must 170 * be a defined ownership transferal in case the LSQ resources are 171 * deallocated before the TLB is done using the TranslationState. If that 172 * happens, the LSQRequest will be self-owned, and responsible to detect 173 * that its services are no longer required and self-destruct. 174 * 175 * Lifetime of a LSQRequest: 176 * +--------------------+ 177 * |LSQ creates and owns| 178 * +--------------------+ 179 * | 180 * +--------------------+ 181 * | Initate translation| 182 * +--------------------+ 183 * | 184 * ___^___ 185 * ___/ \___ 186 * ______/ Squashed? \ 187 * | \___ ___/ 188 * | \___ ___/ 189 * | v 190 * | | 191 * | +--------------------+ 192 * | | Translation done | 193 * | +--------------------+ 194 * | | 195 * | +--------------------+ 196 * | | Send packet |<------+ 197 * | +--------------------+ | 198 * | | | 199 * | ___^___ | 200 * | ___/ \___ | 201 * | ____/ Squashed? \ | 202 * | | \___ ___/ | 203 * | | \___ ___/ | 204 * | | v | 205 * | | | | 206 * | | ___^___ | 207 * | | ___/ \___ | 208 * | | / Done? \__________| 209 * | | \___ ___/ 210 * | | \___ ___/ 211 * | | v 212 * | | | 213 * | | +--------------------+ 214 * | | | Manage stuff | 215 * | | | Free resources | 216 * | | +--------------------+ 217 * | | 218 * | | +--------------------+ 219 * | | | senderState owns | 220 * | +->| onRecvTimingResp | 221 * | | free resources | 222 * | +--------------------+ 223 * | 224 * | +----------------------+ 225 * | | self owned (Trans) | 226 * +-->| on TranslationFinish | 227 * | free resources | 228 * +----------------------+ 229 * 230 * 231 */ 232 class LSQRequest : public BaseTLB::Translation 233 { 234 protected: 235 typedef uint32_t FlagsStorage; 236 typedef ::Flags<FlagsStorage> FlagsType; 237 238 enum Flag : FlagsStorage 239 { 240 IsLoad = 0x00000001, 241 /** True if this is a store/atomic that writes registers (SC). */ 242 WbStore = 0x00000002, 243 Delayed = 0x00000004, 244 IsSplit = 0x00000008, 245 /** True if any translation has been sent to TLB. */ 246 TranslationStarted = 0x00000010, 247 /** True if there are un-replied outbound translations.. */ 248 TranslationFinished = 0x00000020, 249 Sent = 0x00000040, 250 Retry = 0x00000080, 251 Complete = 0x00000100, 252 /** Ownership tracking flags. */ 253 /** Translation squashed. */ 254 TranslationSquashed = 0x00000200, 255 /** Request discarded */ 256 Discarded = 0x00000400, 257 /** LSQ resources freed. */ 258 LSQEntryFreed = 0x00000800, 259 /** Store written back. */ 260 WritebackScheduled = 0x00001000, 261 WritebackDone = 0x00002000, 262 /** True if this is an atomic request */ 263 IsAtomic = 0x00004000 264 }; 265 FlagsType flags; 266 267 enum class State 268 { 269 NotIssued, 270 Translation, 271 Request, 272 Fault, 273 PartialFault, 274 }; 275 State _state; 276 LSQSenderState* _senderState; 277 void setState(const State& newState) { _state = newState; } 278 279 uint32_t numTranslatedFragments; 280 uint32_t numInTranslationFragments; 281 282 /** LQ/SQ entry idx. */ 283 uint32_t _entryIdx; 284 285 void markDelayed() override { flags.set(Flag::Delayed); } 286 bool isDelayed() { return flags.isSet(Flag::Delayed); } 287 288 public: 289 LSQUnit& _port; 290 const DynInstPtr _inst; 291 uint32_t _taskId; 292 PacketDataPtr _data; 293 std::vector<PacketPtr> _packets; 294 std::vector<RequestPtr> _requests; 295 std::vector<Fault> _fault; 296 uint64_t* _res; 297 const Addr _addr; 298 const uint32_t _size; 299 const Request::Flags _flags; 300 std::vector<bool> _byteEnable; 301 uint32_t _numOutstandingPackets; 302 AtomicOpFunctorPtr _amo_op; 303 protected: 304 LSQUnit* lsqUnit() { return &_port; } 305 LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) : 306 _state(State::NotIssued), _senderState(nullptr), 307 _port(*port), _inst(inst), _data(nullptr), 308 _res(nullptr), _addr(0), _size(0), _flags(0), 309 _numOutstandingPackets(0), _amo_op(nullptr) 310 { 311 flags.set(Flag::IsLoad, isLoad); 312 flags.set(Flag::WbStore, 313 _inst->isStoreConditional() || _inst->isAtomic()); 314 flags.set(Flag::IsAtomic, _inst->isAtomic()); 315 install(); 316 } 317 LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 318 const Addr& addr, const uint32_t& size, 319 const Request::Flags& flags_, 320 PacketDataPtr data = nullptr, uint64_t* res = nullptr, 321 AtomicOpFunctorPtr amo_op = nullptr) 322 : _state(State::NotIssued), _senderState(nullptr), 323 numTranslatedFragments(0), 324 numInTranslationFragments(0), 325 _port(*port), _inst(inst), _data(data), 326 _res(res), _addr(addr), _size(size), 327 _flags(flags_), 328 _numOutstandingPackets(0), 329 _amo_op(std::move(amo_op)) 330 { 331 flags.set(Flag::IsLoad, isLoad); 332 flags.set(Flag::WbStore, 333 _inst->isStoreConditional() || _inst->isAtomic()); 334 flags.set(Flag::IsAtomic, _inst->isAtomic()); 335 install(); 336 } 337 338 bool 339 isLoad() const 340 { 341 return flags.isSet(Flag::IsLoad); 342 } 343 344 bool 345 isAtomic() const 346 { 347 return flags.isSet(Flag::IsAtomic); 348 } 349 350 /** Install the request in the LQ/SQ. */ 351 void install() 352 { 353 if (isLoad()) { 354 _port.loadQueue[_inst->lqIdx].setRequest(this); 355 } else { 356 // Store, StoreConditional, and Atomic requests are pushed 357 // to this storeQueue 358 _port.storeQueue[_inst->sqIdx].setRequest(this); 359 } 360 } 361 virtual bool 362 squashed() const override 363 { 364 return _inst->isSquashed(); 365 } 366 367 /** 368 * Test if the LSQRequest has been released, i.e. self-owned. 369 * An LSQRequest manages itself when the resources on the LSQ are freed 370 * but the translation is still going on and the LSQEntry was freed. 371 */ 372 bool 373 isReleased() 374 { 375 return flags.isSet(Flag::LSQEntryFreed) || 376 flags.isSet(Flag::Discarded); 377 } 378 379 /** Release the LSQRequest. 380 * Notify the sender state that the request it points to is not valid 381 * anymore. Understand if the request is orphan (self-managed) and if 382 * so, mark it as freed, else destroy it, as this means 383 * the end of its life cycle. 384 * An LSQRequest is orphan when its resources are released 385 * but there is any in-flight translation request to the TLB or access 386 * request to the memory. 387 */ 388 void release(Flag reason) 389 { 390 assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded); 391 if (!isAnyOutstandingRequest()) { 392 delete this; 393 } else { 394 if (_senderState) { 395 _senderState->deleteRequest(); 396 } 397 flags.set(reason); 398 } 399 } 400 401 /** Helper function used to add a (sub)request, given its address 402 * `addr`, size `size` and byte-enable mask `byteEnable`. 403 * 404 * The request is only added if the mask is empty or if there is at 405 * least an active element in it. 406 */ 407 void 408 addRequest(Addr addr, unsigned size, 409 const std::vector<bool>& byteEnable) 410 { 411 if (byteEnable.empty() || 412 isAnyActiveElement(byteEnable.begin(), byteEnable.end())) { 413 auto request = std::make_shared<Request>(_inst->getASID(), 414 addr, size, _flags, _inst->masterId(), 415 _inst->instAddr(), _inst->contextId(), 416 std::move(_amo_op)); 417 if (!byteEnable.empty()) { 418 request->setByteEnable(byteEnable); 419 } 420 _requests.push_back(request); 421 } 422 } 423 424 /** Destructor. 425 * The LSQRequest owns the request. If the packet has already been 426 * sent, the sender state will be deleted upon receiving the reply. 427 */ 428 virtual ~LSQRequest() 429 { 430 assert(!isAnyOutstandingRequest()); 431 _inst->savedReq = nullptr; 432 if (_senderState) 433 delete _senderState; 434 435 for (auto r: _packets) 436 delete r; 437 }; 438 439 440 public: 441 /** Convenience getters/setters. */ 442 /** @{ */ 443 /** Set up Context numbers. */ 444 void 445 setContext(const ContextID& context_id) 446 { 447 request()->setContext(context_id); 448 } 449 450 const DynInstPtr& 451 instruction() 452 { 453 return _inst; 454 } 455 456 /** Set up virtual request. 457 * For a previously allocated Request objects. 458 */ 459 void 460 setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_, 461 MasterID mid, Addr pc) 462 { 463 request()->setVirt(asid, vaddr, size, flags_, mid, pc); 464 } 465 466 void 467 taskId(const uint32_t& v) 468 { 469 _taskId = v; 470 for (auto& r: _requests) 471 r->taskId(v); 472 } 473 474 uint32_t taskId() const { return _taskId; } 475 RequestPtr request(int idx = 0) { return _requests.at(idx); } 476 477 const RequestPtr 478 request(int idx = 0) const 479 { 480 return _requests.at(idx); 481 } 482 483 Addr getVaddr(int idx = 0) const { return request(idx)->getVaddr(); } 484 virtual void initiateTranslation() = 0; 485 486 PacketPtr packet(int idx = 0) { return _packets.at(idx); } 487 488 virtual PacketPtr 489 mainPacket() 490 { 491 assert (_packets.size() == 1); 492 return packet(); 493 } 494 495 virtual RequestPtr 496 mainRequest() 497 { 498 assert (_requests.size() == 1); 499 return request(); 500 } 501 502 void 503 senderState(LSQSenderState* st) 504 { 505 _senderState = st; 506 for (auto& pkt: _packets) { 507 if (pkt) 508 pkt->senderState = st; 509 } 510 } 511 512 const LSQSenderState* 513 senderState() const 514 { 515 return _senderState; 516 } 517 518 /** 519 * Mark senderState as discarded. This will cause to discard response 520 * packets from the cache. 521 */ 522 void 523 discardSenderState() 524 { 525 assert(_senderState); 526 _senderState->deleteRequest(); 527 } 528 529 /** 530 * Test if there is any in-flight translation or mem access request 531 */ 532 bool 533 isAnyOutstandingRequest() 534 { 535 return numInTranslationFragments > 0 || 536 _numOutstandingPackets > 0 || 537 (flags.isSet(Flag::WritebackScheduled) && 538 !flags.isSet(Flag::WritebackDone)); 539 } 540 541 bool 542 isSplit() const 543 { 544 return flags.isSet(Flag::IsSplit); 545 } 546 /** @} */ 547 virtual bool recvTimingResp(PacketPtr pkt) = 0; 548 virtual void sendPacketToCache() = 0; 549 virtual void buildPackets() = 0; 550 551 /** 552 * Memory mapped IPR accesses 553 */ 554 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt) = 0; 555 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt) = 0; 556 557 /** 558 * Test if the request accesses a particular cache line. 559 */ 560 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask) = 0; 561 562 /** Update the status to reflect that a packet was sent. */ 563 void 564 packetSent() 565 { 566 flags.set(Flag::Sent); 567 } 568 /** Update the status to reflect that a packet was not sent. 569 * When a packet fails to be sent, we mark the request as needing a 570 * retry. Note that Retry flag is sticky. 571 */ 572 void 573 packetNotSent() 574 { 575 flags.set(Flag::Retry); 576 flags.clear(Flag::Sent); 577 } 578 579 void sendFragmentToTranslation(int i); 580 bool 581 isComplete() 582 { 583 return flags.isSet(Flag::Complete); 584 } 585 586 bool 587 isInTranslation() 588 { 589 return _state == State::Translation; 590 } 591 592 bool 593 isTranslationComplete() 594 { 595 return flags.isSet(Flag::TranslationStarted) && 596 !isInTranslation(); 597 } 598 599 bool 600 isTranslationBlocked() 601 { 602 return _state == State::Translation && 603 flags.isSet(Flag::TranslationStarted) && 604 !flags.isSet(Flag::TranslationFinished); 605 } 606 607 bool 608 isSent() 609 { 610 return flags.isSet(Flag::Sent); 611 } 612 613 bool 614 isPartialFault() 615 { 616 return _state == State::PartialFault; 617 } 618 619 bool 620 isMemAccessRequired() 621 { 622 return (_state == State::Request || 623 (isPartialFault() && isLoad())); 624 } 625 626 /** 627 * The LSQ entry is cleared 628 */ 629 void 630 freeLSQEntry() 631 { 632 release(Flag::LSQEntryFreed); 633 } 634 635 /** 636 * The request is discarded (e.g. partial store-load forwarding) 637 */ 638 void 639 discard() 640 { 641 release(Flag::Discarded); 642 } 643 644 void 645 packetReplied() 646 { 647 assert(_numOutstandingPackets > 0); 648 _numOutstandingPackets--; 649 if (_numOutstandingPackets == 0 && isReleased()) 650 delete this; 651 } 652 653 void 654 writebackScheduled() 655 { 656 assert(!flags.isSet(Flag::WritebackScheduled)); 657 flags.set(Flag::WritebackScheduled); 658 } 659 660 void 661 writebackDone() 662 { 663 flags.set(Flag::WritebackDone); 664 /* If the lsq resources are already free */ 665 if (isReleased()) { 666 delete this; 667 } 668 } 669 670 void 671 squashTranslation() 672 { 673 assert(numInTranslationFragments == 0); 674 flags.set(Flag::TranslationSquashed); 675 /* If we are on our own, self-destruct. */ 676 if (isReleased()) { 677 delete this; 678 } 679 } 680 681 void 682 complete() 683 { 684 flags.set(Flag::Complete); 685 } 686 }; 687 688 class SingleDataRequest : public LSQRequest 689 { 690 protected: 691 /* Given that we are inside templates, children need explicit 692 * declaration of the names in the parent class. */ 693 using Flag = typename LSQRequest::Flag; 694 using State = typename LSQRequest::State; 695 using LSQRequest::_addr; 696 using LSQRequest::_fault; 697 using LSQRequest::_flags; 698 using LSQRequest::_size; 699 using LSQRequest::_byteEnable; 700 using LSQRequest::_requests; 701 using LSQRequest::_inst; 702 using LSQRequest::_packets; 703 using LSQRequest::_port; 704 using LSQRequest::_res; 705 using LSQRequest::_taskId; 706 using LSQRequest::_senderState; 707 using LSQRequest::_state; 708 using LSQRequest::flags; 709 using LSQRequest::isLoad; 710 using LSQRequest::isTranslationComplete; 711 using LSQRequest::lsqUnit; 712 using LSQRequest::request; 713 using LSQRequest::sendFragmentToTranslation; 714 using LSQRequest::setState; 715 using LSQRequest::numInTranslationFragments; 716 using LSQRequest::numTranslatedFragments; 717 using LSQRequest::_numOutstandingPackets; 718 using LSQRequest::_amo_op; 719 public: 720 SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 721 const Addr& addr, const uint32_t& size, 722 const Request::Flags& flags_, 723 PacketDataPtr data = nullptr, 724 uint64_t* res = nullptr, 725 AtomicOpFunctorPtr amo_op = nullptr) : 726 LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, 727 std::move(amo_op)) {} 728 729 inline virtual ~SingleDataRequest() {} 730 virtual void initiateTranslation(); 731 virtual void finish(const Fault &fault, const RequestPtr &req, 732 ThreadContext* tc, BaseTLB::Mode mode); 733 virtual bool recvTimingResp(PacketPtr pkt); 734 virtual void sendPacketToCache(); 735 virtual void buildPackets(); 736 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt); 737 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt); 738 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask); 739 }; 740 741 class SplitDataRequest : public LSQRequest 742 { 743 protected: 744 /* Given that we are inside templates, children need explicit 745 * declaration of the names in the parent class. */ 746 using Flag = typename LSQRequest::Flag; 747 using State = typename LSQRequest::State; 748 using LSQRequest::_addr; 749 using LSQRequest::_data; 750 using LSQRequest::_fault; 751 using LSQRequest::_flags; 752 using LSQRequest::_inst; 753 using LSQRequest::_packets; 754 using LSQRequest::_port; 755 using LSQRequest::_requests; 756 using LSQRequest::_res; 757 using LSQRequest::_byteEnable; 758 using LSQRequest::_senderState; 759 using LSQRequest::_size; 760 using LSQRequest::_state; 761 using LSQRequest::_taskId; 762 using LSQRequest::flags; 763 using LSQRequest::isLoad; 764 using LSQRequest::isTranslationComplete; 765 using LSQRequest::lsqUnit; 766 using LSQRequest::numInTranslationFragments; 767 using LSQRequest::numTranslatedFragments; 768 using LSQRequest::request; 769 using LSQRequest::sendFragmentToTranslation; 770 using LSQRequest::setState; 771 using LSQRequest::_numOutstandingPackets; 772 773 uint32_t numFragments; 774 uint32_t numReceivedPackets; 775 RequestPtr mainReq; 776 PacketPtr _mainPacket; 777 778 public: 779 SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 780 const Addr& addr, const uint32_t& size, 781 const Request::Flags & flags_, 782 PacketDataPtr data = nullptr, 783 uint64_t* res = nullptr) : 784 LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, 785 nullptr), 786 numFragments(0), 787 numReceivedPackets(0), 788 mainReq(nullptr), 789 _mainPacket(nullptr) 790 { 791 flags.set(Flag::IsSplit); 792 } 793 virtual ~SplitDataRequest() 794 { 795 if (mainReq) { 796 mainReq = nullptr; 797 } 798 if (_mainPacket) { 799 delete _mainPacket; 800 _mainPacket = nullptr; 801 } 802 } 803 virtual void finish(const Fault &fault, const RequestPtr &req, 804 ThreadContext* tc, BaseTLB::Mode mode); 805 virtual bool recvTimingResp(PacketPtr pkt); 806 virtual void initiateTranslation(); 807 virtual void sendPacketToCache(); 808 virtual void buildPackets(); 809 810 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt); 811 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt); 812 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask); 813 814 virtual RequestPtr mainRequest(); 815 virtual PacketPtr mainPacket(); 816 }; 817 818 /** Constructs an LSQ with the given parameters. */ 819 LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params); 820 ~LSQ() { } 821 822 /** Returns the name of the LSQ. */ 823 std::string name() const; 824 825 /** Registers statistics of each LSQ unit. */ 826 void regStats(); 827 828 /** Sets the pointer to the list of active threads. */ 829 void setActiveThreads(std::list<ThreadID> *at_ptr); 830 831 /** Perform sanity checks after a drain. */ 832 void drainSanityCheck() const; 833 /** Has the LSQ drained? */ 834 bool isDrained() const; 835 /** Takes over execution from another CPU's thread. */ 836 void takeOverFrom(); 837 838 /** Number of entries needed for the given amount of threads.*/ 839 int entryAmount(ThreadID num_threads); 840 841 /** Ticks the LSQ. */ 842 void tick(); 843 844 /** Inserts a load into the LSQ. */ 845 void insertLoad(const DynInstPtr &load_inst); 846 /** Inserts a store into the LSQ. */ 847 void insertStore(const DynInstPtr &store_inst); 848 849 /** Executes a load. */ 850 Fault executeLoad(const DynInstPtr &inst); 851 852 /** Executes a store. */ 853 Fault executeStore(const DynInstPtr &inst); 854 855 /** 856 * Commits loads up until the given sequence number for a specific thread. 857 */ 858 void commitLoads(InstSeqNum &youngest_inst, ThreadID tid) 859 { thread.at(tid).commitLoads(youngest_inst); } 860 861 /** 862 * Commits stores up until the given sequence number for a specific thread. 863 */ 864 void commitStores(InstSeqNum &youngest_inst, ThreadID tid) 865 { thread.at(tid).commitStores(youngest_inst); } 866 867 /** 868 * Attempts to write back stores until all cache ports are used or the 869 * interface becomes blocked. 870 */ 871 void writebackStores(); 872 /** Same as above, but only for one thread. */ 873 void writebackStores(ThreadID tid); 874 875 /** 876 * Squash instructions from a thread until the specified sequence number. 877 */ 878 void 879 squash(const InstSeqNum &squashed_num, ThreadID tid) 880 { 881 thread.at(tid).squash(squashed_num); 882 } 883 884 /** Returns whether or not there was a memory ordering violation. */ 885 bool violation(); 886 /** 887 * Returns whether or not there was a memory ordering violation for a 888 * specific thread. 889 */ 890 bool violation(ThreadID tid) { return thread.at(tid).violation(); } 891 892 /** Gets the instruction that caused the memory ordering violation. */ 893 DynInstPtr 894 getMemDepViolator(ThreadID tid) 895 { 896 return thread.at(tid).getMemDepViolator(); 897 } 898 899 /** Returns the head index of the load queue for a specific thread. */ 900 int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); } 901 902 /** Returns the sequence number of the head of the load queue. */ 903 InstSeqNum 904 getLoadHeadSeqNum(ThreadID tid) 905 { 906 return thread.at(tid).getLoadHeadSeqNum(); 907 } 908 909 /** Returns the head index of the store queue. */ 910 int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); } 911 912 /** Returns the sequence number of the head of the store queue. */ 913 InstSeqNum 914 getStoreHeadSeqNum(ThreadID tid) 915 { 916 return thread.at(tid).getStoreHeadSeqNum(); 917 } 918 919 /** Returns the number of instructions in all of the queues. */ 920 int getCount(); 921 /** Returns the number of instructions in the queues of one thread. */ 922 int getCount(ThreadID tid) { return thread.at(tid).getCount(); } 923 924 /** Returns the total number of loads in the load queue. */ 925 int numLoads(); 926 /** Returns the total number of loads for a single thread. */ 927 int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); } 928 929 /** Returns the total number of stores in the store queue. */ 930 int numStores(); 931 /** Returns the total number of stores for a single thread. */ 932 int numStores(ThreadID tid) { return thread.at(tid).numStores(); } 933 934 /** Returns the number of free load entries. */ 935 unsigned numFreeLoadEntries(); 936 937 /** Returns the number of free store entries. */ 938 unsigned numFreeStoreEntries(); 939 940 /** Returns the number of free entries for a specific thread. */ 941 unsigned numFreeEntries(ThreadID tid); 942 943 /** Returns the number of free entries in the LQ for a specific thread. */ 944 unsigned numFreeLoadEntries(ThreadID tid); 945 946 /** Returns the number of free entries in the SQ for a specific thread. */ 947 unsigned numFreeStoreEntries(ThreadID tid); 948 949 /** Returns if the LSQ is full (either LQ or SQ is full). */ 950 bool isFull(); 951 /** 952 * Returns if the LSQ is full for a specific thread (either LQ or SQ is 953 * full). 954 */ 955 bool isFull(ThreadID tid); 956 957 /** Returns if the LSQ is empty (both LQ and SQ are empty). */ 958 bool isEmpty() const; 959 /** Returns if all of the LQs are empty. */ 960 bool lqEmpty() const; 961 /** Returns if all of the SQs are empty. */ 962 bool sqEmpty() const; 963 964 /** Returns if any of the LQs are full. */ 965 bool lqFull(); 966 /** Returns if the LQ of a given thread is full. */ 967 bool lqFull(ThreadID tid); 968 969 /** Returns if any of the SQs are full. */ 970 bool sqFull(); 971 /** Returns if the SQ of a given thread is full. */ 972 bool sqFull(ThreadID tid); 973 974 /** 975 * Returns if the LSQ is stalled due to a memory operation that must be 976 * replayed. 977 */ 978 bool isStalled(); 979 /** 980 * Returns if the LSQ of a specific thread is stalled due to a memory 981 * operation that must be replayed. 982 */ 983 bool isStalled(ThreadID tid); 984 985 /** Returns whether or not there are any stores to write back to memory. */ 986 bool hasStoresToWB(); 987 988 /** Returns whether or not a specific thread has any stores to write back 989 * to memory. 990 */ 991 bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); } 992 993 /** Returns the number of stores a specific thread has to write back. */ 994 int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); } 995 996 /** Returns if the LSQ will write back to memory this cycle. */ 997 bool willWB(); 998 /** Returns if the LSQ of a specific thread will write back to memory this 999 * cycle. 1000 */ 1001 bool willWB(ThreadID tid) { return thread.at(tid).willWB(); } 1002 1003 /** Debugging function to print out all instructions. */ 1004 void dumpInsts() const; 1005 /** Debugging function to print out instructions from a specific thread. */ 1006 void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); } 1007 1008 /** Executes a read operation, using the load specified at the load 1009 * index. 1010 */ 1011 Fault read(LSQRequest* req, int load_idx); 1012 1013 /** Executes a store operation, using the store specified at the store 1014 * index. 1015 */ 1016 Fault write(LSQRequest* req, uint8_t *data, int store_idx); 1017 1018 /** 1019 * Retry the previous send that failed. 1020 */ 1021 void recvReqRetry(); 1022 1023 void completeDataAccess(PacketPtr pkt); 1024 /** 1025 * Handles writing back and completing the load or store that has 1026 * returned from memory. 1027 * 1028 * @param pkt Response packet from the memory sub-system 1029 */ 1030 bool recvTimingResp(PacketPtr pkt); 1031 1032 void recvTimingSnoopReq(PacketPtr pkt); 1033 1034 Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, 1035 unsigned int size, Addr addr, Request::Flags flags, 1036 uint64_t *res, AtomicOpFunctorPtr amo_op, 1037 const std::vector<bool>& byteEnable); 1038 1039 /** The CPU pointer. */ 1040 O3CPU *cpu; 1041 1042 /** The IEW stage pointer. */ 1043 IEW *iewStage; 1044 1045 /** Is D-cache blocked? */ 1046 bool cacheBlocked() const; 1047 /** Set D-cache blocked status */ 1048 void cacheBlocked(bool v); 1049 /** Is any store port available to use? */ 1050 bool cachePortAvailable(bool is_load) const; 1051 /** Another store port is in use */ 1052 void cachePortBusy(bool is_load); 1053 1054 MasterPort &getDataPort() { return dcachePort; } 1055 1056 protected: 1057 /** D-cache is blocked */ 1058 bool _cacheBlocked; 1059 /** The number of cache ports available each cycle (stores only). */ 1060 int cacheStorePorts; 1061 /** The number of used cache ports in this cycle by stores. */ 1062 int usedStorePorts; 1063 /** The number of cache ports available each cycle (loads only). */ 1064 int cacheLoadPorts; 1065 /** The number of used cache ports in this cycle by loads. */ 1066 int usedLoadPorts; 1067 1068 1069 /** The LSQ policy for SMT mode. */ 1070 SMTQueuePolicy lsqPolicy; 1071 1072 /** Auxiliary function to calculate per-thread max LSQ allocation limit. 1073 * Depending on a policy, number of entries and possibly number of threads 1074 * and threshold, this function calculates how many resources each thread 1075 * can occupy at most. 1076 */ 1077 static uint32_t 1078 maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, 1079 uint32_t numThreads, uint32_t SMTThreshold) 1080 { 1081 if (pol == SMTQueuePolicy::Dynamic) { 1082 return entries; 1083 } else if (pol == SMTQueuePolicy::Partitioned) { 1084 //@todo:make work if part_amt doesnt divide evenly. 1085 return entries / numThreads; 1086 } else if (pol == SMTQueuePolicy::Threshold) { 1087 //Divide up by threshold amount 1088 //@todo: Should threads check the max and the total 1089 //amount of the LSQ 1090 return SMTThreshold; 1091 } 1092 return 0; 1093 } 1094 1095 /** List of Active Threads in System. */ 1096 std::list<ThreadID> *activeThreads; 1097 1098 /** Total Size of LQ Entries. */ 1099 unsigned LQEntries; 1100 /** Total Size of SQ Entries. */ 1101 unsigned SQEntries; 1102 1103 /** Max LQ Size - Used to Enforce Sharing Policies. */ 1104 unsigned maxLQEntries; 1105 1106 /** Max SQ Size - Used to Enforce Sharing Policies. */ 1107 unsigned maxSQEntries; 1108 1109 /** Data port. */ 1110 DcachePort dcachePort; 1111 1112 /** The LSQ units for individual threads. */ 1113 std::vector<LSQUnit> thread; 1114 1115 /** Number of Threads. */ 1116 ThreadID numThreads; 1117}; 1118 1119template <class Impl> 1120Fault 1121LSQ<Impl>::read(LSQRequest* req, int load_idx) 1122{ 1123 ThreadID tid = cpu->contextToThread(req->request()->contextId()); 1124 1125 return thread.at(tid).read(req, load_idx); 1126} 1127 1128template <class Impl> 1129Fault 1130LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx) 1131{ 1132 ThreadID tid = cpu->contextToThread(req->request()->contextId()); 1133 1134 return thread.at(tid).write(req, data, store_idx); 1135} 1136 1137#endif // __CPU_O3_LSQ_HH__ 1138