lsq.hh revision 14194:967b9c450b04
1/* 2 * Copyright (c) 2011-2012, 2014, 2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 */ 43 44#ifndef __CPU_O3_LSQ_HH__ 45#define __CPU_O3_LSQ_HH__ 46 47#include <map> 48#include <queue> 49 50#include "arch/generic/tlb.hh" 51#include "cpu/inst_seq.hh" 52#include "cpu/o3/lsq_unit.hh" 53#include "cpu/utils.hh" 54#include "enums/SMTQueuePolicy.hh" 55#include "mem/port.hh" 56#include "sim/sim_object.hh" 57 58struct DerivO3CPUParams; 59 60template <class Impl> 61class FullO3CPU; 62 63template <class Impl> 64class LSQ 65 66{ 67 public: 68 typedef typename Impl::O3CPU O3CPU; 69 typedef typename Impl::DynInstPtr DynInstPtr; 70 typedef typename Impl::CPUPol::IEW IEW; 71 typedef typename Impl::CPUPol::LSQUnit LSQUnit; 72 73 class LSQRequest; 74 /** Derived class to hold any sender state the LSQ needs. */ 75 class LSQSenderState : public Packet::SenderState 76 { 77 protected: 78 /** The senderState needs to know the LSQRequest who owns it. */ 79 LSQRequest* _request; 80 81 /** Default constructor. */ 82 LSQSenderState(LSQRequest* request, bool isLoad_) 83 : _request(request), mainPkt(nullptr), pendingPacket(nullptr), 84 outstanding(0), isLoad(isLoad_), needWB(isLoad_), isSplit(false), 85 pktToSend(false), deleted(false) 86 { } 87 public: 88 89 /** Instruction which initiated the access to memory. */ 90 DynInstPtr inst; 91 /** The main packet from a split load, used during writeback. */ 92 PacketPtr mainPkt; 93 /** A second packet from a split store that needs sending. */ 94 PacketPtr pendingPacket; 95 /** Number of outstanding packets to complete. */ 96 uint8_t outstanding; 97 /** Whether or not it is a load. */ 98 bool isLoad; 99 /** Whether or not the instruction will need to writeback. */ 100 bool needWB; 101 /** Whether or not this access is split in two. */ 102 bool isSplit; 103 /** Whether or not there is a packet that needs sending. */ 104 bool pktToSend; 105 /** Has the request been deleted? 106 * LSQ entries can be squashed before the response comes back. in that 107 * case the SenderState knows. 108 */ 109 bool deleted; 110 ContextID contextId() { return inst->contextId(); } 111 112 /** Completes a packet and returns whether the access is finished. */ 113 inline bool isComplete() { return outstanding == 0; } 114 inline void deleteRequest() { deleted = true; } 115 inline bool alive() { return !deleted; } 116 LSQRequest* request() { return _request; } 117 virtual void complete() = 0; 118 void writebackDone() { _request->writebackDone(); } 119 }; 120 121 /** 122 * DcachePort class for the load/store queue. 123 */ 124 class DcachePort : public MasterPort 125 { 126 protected: 127 128 /** Pointer to LSQ. */ 129 LSQ<Impl> *lsq; 130 FullO3CPU<Impl> *cpu; 131 132 public: 133 /** Default constructor. */ 134 DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu) 135 : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), 136 cpu(_cpu) 137 { } 138 139 protected: 140 141 /** Timing version of receive. Handles writing back and 142 * completing the load or store that has returned from 143 * memory. */ 144 virtual bool recvTimingResp(PacketPtr pkt); 145 virtual void recvTimingSnoopReq(PacketPtr pkt); 146 147 virtual void recvFunctionalSnoop(PacketPtr pkt) 148 { 149 // @todo: Is there a need for potential invalidation here? 150 } 151 152 /** Handles doing a retry of the previous send. */ 153 virtual void recvReqRetry(); 154 155 /** 156 * As this CPU requires snooping to maintain the load store queue 157 * change the behaviour from the base CPU port. 158 * 159 * @return true since we have to snoop 160 */ 161 virtual bool isSnooping() const { return true; } 162 }; 163 164 /** Memory operation metadata. 165 * This class holds the information about a memory operation. It lives 166 * from initiateAcc to resource deallocation at commit or squash. 167 * LSQRequest objects are owned by the LQ/SQ Entry in the LSQUnit that 168 * holds the operation. It is also used by the LSQSenderState. In addition, 169 * the LSQRequest is a TranslationState, therefore, upon squash, there must 170 * be a defined ownership transferal in case the LSQ resources are 171 * deallocated before the TLB is done using the TranslationState. If that 172 * happens, the LSQRequest will be self-owned, and responsible to detect 173 * that its services are no longer required and self-destruct. 174 * 175 * Lifetime of a LSQRequest: 176 * +--------------------+ 177 * |LSQ creates and owns| 178 * +--------------------+ 179 * | 180 * +--------------------+ 181 * | Initate translation| 182 * +--------------------+ 183 * | 184 * ___^___ 185 * ___/ \___ 186 * ______/ Squashed? \ 187 * | \___ ___/ 188 * | \___ ___/ 189 * | v 190 * | | 191 * | +--------------------+ 192 * | | Translation done | 193 * | +--------------------+ 194 * | | 195 * | +--------------------+ 196 * | | Send packet |<------+ 197 * | +--------------------+ | 198 * | | | 199 * | ___^___ | 200 * | ___/ \___ | 201 * | ____/ Squashed? \ | 202 * | | \___ ___/ | 203 * | | \___ ___/ | 204 * | | v | 205 * | | | | 206 * | | ___^___ | 207 * | | ___/ \___ | 208 * | | / Done? \__________| 209 * | | \___ ___/ 210 * | | \___ ___/ 211 * | | v 212 * | | | 213 * | | +--------------------+ 214 * | | | Manage stuff | 215 * | | | Free resources | 216 * | | +--------------------+ 217 * | | 218 * | | +--------------------+ 219 * | | | senderState owns | 220 * | +->| onRecvTimingResp | 221 * | | free resources | 222 * | +--------------------+ 223 * | 224 * | +----------------------+ 225 * | | self owned (Trans) | 226 * +-->| on TranslationFinish | 227 * | free resources | 228 * +----------------------+ 229 * 230 * 231 */ 232 class LSQRequest : public BaseTLB::Translation 233 { 234 protected: 235 typedef uint32_t FlagsStorage; 236 typedef ::Flags<FlagsStorage> FlagsType; 237 238 enum Flag : FlagsStorage 239 { 240 IsLoad = 0x00000001, 241 /** True if this is a store/atomic that writes registers (SC). */ 242 WbStore = 0x00000002, 243 Delayed = 0x00000004, 244 IsSplit = 0x00000008, 245 /** True if any translation has been sent to TLB. */ 246 TranslationStarted = 0x00000010, 247 /** True if there are un-replied outbound translations.. */ 248 TranslationFinished = 0x00000020, 249 Sent = 0x00000040, 250 Retry = 0x00000080, 251 Complete = 0x00000100, 252 /** Ownership tracking flags. */ 253 /** Translation squashed. */ 254 TranslationSquashed = 0x00000200, 255 /** Request discarded */ 256 Discarded = 0x00000400, 257 /** LSQ resources freed. */ 258 LSQEntryFreed = 0x00000800, 259 /** Store written back. */ 260 WritebackScheduled = 0x00001000, 261 WritebackDone = 0x00002000, 262 /** True if this is an atomic request */ 263 IsAtomic = 0x00004000 264 }; 265 FlagsType flags; 266 267 enum class State 268 { 269 NotIssued, 270 Translation, 271 Request, 272 Fault, 273 PartialFault, 274 }; 275 State _state; 276 LSQSenderState* _senderState; 277 void setState(const State& newState) { _state = newState; } 278 279 uint32_t numTranslatedFragments; 280 uint32_t numInTranslationFragments; 281 282 /** LQ/SQ entry idx. */ 283 uint32_t _entryIdx; 284 285 void markDelayed() override { flags.set(Flag::Delayed); } 286 bool isDelayed() { return flags.isSet(Flag::Delayed); } 287 288 public: 289 LSQUnit& _port; 290 const DynInstPtr _inst; 291 uint32_t _taskId; 292 PacketDataPtr _data; 293 std::vector<PacketPtr> _packets; 294 std::vector<RequestPtr> _requests; 295 std::vector<Fault> _fault; 296 uint64_t* _res; 297 const Addr _addr; 298 const uint32_t _size; 299 const Request::Flags _flags; 300 std::vector<bool> _byteEnable; 301 uint32_t _numOutstandingPackets; 302 AtomicOpFunctor *_amo_op; 303 protected: 304 LSQUnit* lsqUnit() { return &_port; } 305 LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) : 306 _state(State::NotIssued), _senderState(nullptr), 307 _port(*port), _inst(inst), _data(nullptr), 308 _res(nullptr), _addr(0), _size(0), _flags(0), 309 _numOutstandingPackets(0), _amo_op(nullptr) 310 { 311 flags.set(Flag::IsLoad, isLoad); 312 flags.set(Flag::WbStore, 313 _inst->isStoreConditional() || _inst->isAtomic()); 314 flags.set(Flag::IsAtomic, _inst->isAtomic()); 315 install(); 316 } 317 LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 318 const Addr& addr, const uint32_t& size, 319 const Request::Flags& flags_, 320 PacketDataPtr data = nullptr, uint64_t* res = nullptr, 321 AtomicOpFunctor* amo_op = nullptr) 322 : _state(State::NotIssued), _senderState(nullptr), 323 numTranslatedFragments(0), 324 numInTranslationFragments(0), 325 _port(*port), _inst(inst), _data(data), 326 _res(res), _addr(addr), _size(size), 327 _flags(flags_), 328 _numOutstandingPackets(0), 329 _amo_op(amo_op) 330 { 331 flags.set(Flag::IsLoad, isLoad); 332 flags.set(Flag::WbStore, 333 _inst->isStoreConditional() || _inst->isAtomic()); 334 flags.set(Flag::IsAtomic, _inst->isAtomic()); 335 install(); 336 } 337 338 bool 339 isLoad() const 340 { 341 return flags.isSet(Flag::IsLoad); 342 } 343 344 bool 345 isAtomic() const 346 { 347 return flags.isSet(Flag::IsAtomic); 348 } 349 350 /** Install the request in the LQ/SQ. */ 351 void install() 352 { 353 if (isLoad()) { 354 _port.loadQueue[_inst->lqIdx].setRequest(this); 355 } else { 356 // Store, StoreConditional, and Atomic requests are pushed 357 // to this storeQueue 358 _port.storeQueue[_inst->sqIdx].setRequest(this); 359 } 360 } 361 virtual bool 362 squashed() const override 363 { 364 return _inst->isSquashed(); 365 } 366 367 /** 368 * Test if the LSQRequest has been released, i.e. self-owned. 369 * An LSQRequest manages itself when the resources on the LSQ are freed 370 * but the translation is still going on and the LSQEntry was freed. 371 */ 372 bool 373 isReleased() 374 { 375 return flags.isSet(Flag::LSQEntryFreed) || 376 flags.isSet(Flag::Discarded); 377 } 378 379 /** Release the LSQRequest. 380 * Notify the sender state that the request it points to is not valid 381 * anymore. Understand if the request is orphan (self-managed) and if 382 * so, mark it as freed, else destroy it, as this means 383 * the end of its life cycle. 384 * An LSQRequest is orphan when its resources are released 385 * but there is any in-flight translation request to the TLB or access 386 * request to the memory. 387 */ 388 void release(Flag reason) 389 { 390 assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded); 391 if (!isAnyOutstandingRequest()) { 392 delete this; 393 } else { 394 if (_senderState) { 395 _senderState->deleteRequest(); 396 } 397 flags.set(reason); 398 } 399 } 400 401 /** Helper function used to add a (sub)request, given its address 402 * `addr`, size `size` and byte-enable mask `byteEnable`. 403 * 404 * The request is only added if the mask is empty or if there is at 405 * least an active element in it. 406 */ 407 void 408 addRequest(Addr addr, unsigned size, 409 const std::vector<bool>& byteEnable) 410 { 411 if (byteEnable.empty() || 412 isAnyActiveElement(byteEnable.begin(), byteEnable.end())) { 413 auto request = std::make_shared<Request>(_inst->getASID(), 414 addr, size, _flags, _inst->masterId(), 415 _inst->instAddr(), _inst->contextId(), _amo_op); 416 if (!byteEnable.empty()) { 417 request->setByteEnable(byteEnable); 418 } 419 _requests.push_back(request); 420 } 421 } 422 423 /** Destructor. 424 * The LSQRequest owns the request. If the packet has already been 425 * sent, the sender state will be deleted upon receiving the reply. 426 */ 427 virtual ~LSQRequest() 428 { 429 assert(!isAnyOutstandingRequest()); 430 _inst->savedReq = nullptr; 431 if (_senderState) 432 delete _senderState; 433 434 for (auto r: _packets) 435 delete r; 436 }; 437 438 439 public: 440 /** Convenience getters/setters. */ 441 /** @{ */ 442 /** Set up Context numbers. */ 443 void 444 setContext(const ContextID& context_id) 445 { 446 request()->setContext(context_id); 447 } 448 449 const DynInstPtr& 450 instruction() 451 { 452 return _inst; 453 } 454 455 /** Set up virtual request. 456 * For a previously allocated Request objects. 457 */ 458 void 459 setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_, 460 MasterID mid, Addr pc) 461 { 462 request()->setVirt(asid, vaddr, size, flags_, mid, pc); 463 } 464 465 void 466 taskId(const uint32_t& v) 467 { 468 _taskId = v; 469 for (auto& r: _requests) 470 r->taskId(v); 471 } 472 473 uint32_t taskId() const { return _taskId; } 474 RequestPtr request(int idx = 0) { return _requests.at(idx); } 475 476 const RequestPtr 477 request(int idx = 0) const 478 { 479 return _requests.at(idx); 480 } 481 482 Addr getVaddr(int idx = 0) const { return request(idx)->getVaddr(); } 483 virtual void initiateTranslation() = 0; 484 485 PacketPtr packet(int idx = 0) { return _packets.at(idx); } 486 487 virtual PacketPtr 488 mainPacket() 489 { 490 assert (_packets.size() == 1); 491 return packet(); 492 } 493 494 virtual RequestPtr 495 mainRequest() 496 { 497 assert (_requests.size() == 1); 498 return request(); 499 } 500 501 void 502 senderState(LSQSenderState* st) 503 { 504 _senderState = st; 505 for (auto& pkt: _packets) { 506 if (pkt) 507 pkt->senderState = st; 508 } 509 } 510 511 const LSQSenderState* 512 senderState() const 513 { 514 return _senderState; 515 } 516 517 /** 518 * Mark senderState as discarded. This will cause to discard response 519 * packets from the cache. 520 */ 521 void 522 discardSenderState() 523 { 524 assert(_senderState); 525 _senderState->deleteRequest(); 526 } 527 528 /** 529 * Test if there is any in-flight translation or mem access request 530 */ 531 bool 532 isAnyOutstandingRequest() 533 { 534 return numInTranslationFragments > 0 || 535 _numOutstandingPackets > 0 || 536 (flags.isSet(Flag::WritebackScheduled) && 537 !flags.isSet(Flag::WritebackDone)); 538 } 539 540 bool 541 isSplit() const 542 { 543 return flags.isSet(Flag::IsSplit); 544 } 545 /** @} */ 546 virtual bool recvTimingResp(PacketPtr pkt) = 0; 547 virtual void sendPacketToCache() = 0; 548 virtual void buildPackets() = 0; 549 550 /** 551 * Memory mapped IPR accesses 552 */ 553 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt) = 0; 554 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt) = 0; 555 556 /** 557 * Test if the request accesses a particular cache line. 558 */ 559 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask) = 0; 560 561 /** Update the status to reflect that a packet was sent. */ 562 void 563 packetSent() 564 { 565 flags.set(Flag::Sent); 566 } 567 /** Update the status to reflect that a packet was not sent. 568 * When a packet fails to be sent, we mark the request as needing a 569 * retry. Note that Retry flag is sticky. 570 */ 571 void 572 packetNotSent() 573 { 574 flags.set(Flag::Retry); 575 flags.clear(Flag::Sent); 576 } 577 578 void sendFragmentToTranslation(int i); 579 bool 580 isComplete() 581 { 582 return flags.isSet(Flag::Complete); 583 } 584 585 bool 586 isInTranslation() 587 { 588 return _state == State::Translation; 589 } 590 591 bool 592 isTranslationComplete() 593 { 594 return flags.isSet(Flag::TranslationStarted) && 595 !isInTranslation(); 596 } 597 598 bool 599 isTranslationBlocked() 600 { 601 return _state == State::Translation && 602 flags.isSet(Flag::TranslationStarted) && 603 !flags.isSet(Flag::TranslationFinished); 604 } 605 606 bool 607 isSent() 608 { 609 return flags.isSet(Flag::Sent); 610 } 611 612 bool 613 isPartialFault() 614 { 615 return _state == State::PartialFault; 616 } 617 618 bool 619 isMemAccessRequired() 620 { 621 return (_state == State::Request || 622 (isPartialFault() && isLoad())); 623 } 624 625 /** 626 * The LSQ entry is cleared 627 */ 628 void 629 freeLSQEntry() 630 { 631 release(Flag::LSQEntryFreed); 632 } 633 634 /** 635 * The request is discarded (e.g. partial store-load forwarding) 636 */ 637 void 638 discard() 639 { 640 release(Flag::Discarded); 641 } 642 643 void 644 packetReplied() 645 { 646 assert(_numOutstandingPackets > 0); 647 _numOutstandingPackets--; 648 if (_numOutstandingPackets == 0 && isReleased()) 649 delete this; 650 } 651 652 void 653 writebackScheduled() 654 { 655 assert(!flags.isSet(Flag::WritebackScheduled)); 656 flags.set(Flag::WritebackScheduled); 657 } 658 659 void 660 writebackDone() 661 { 662 flags.set(Flag::WritebackDone); 663 /* If the lsq resources are already free */ 664 if (isReleased()) { 665 delete this; 666 } 667 } 668 669 void 670 squashTranslation() 671 { 672 assert(numInTranslationFragments == 0); 673 flags.set(Flag::TranslationSquashed); 674 /* If we are on our own, self-destruct. */ 675 if (isReleased()) { 676 delete this; 677 } 678 } 679 680 void 681 complete() 682 { 683 flags.set(Flag::Complete); 684 } 685 }; 686 687 class SingleDataRequest : public LSQRequest 688 { 689 protected: 690 /* Given that we are inside templates, children need explicit 691 * declaration of the names in the parent class. */ 692 using Flag = typename LSQRequest::Flag; 693 using State = typename LSQRequest::State; 694 using LSQRequest::_addr; 695 using LSQRequest::_fault; 696 using LSQRequest::_flags; 697 using LSQRequest::_size; 698 using LSQRequest::_byteEnable; 699 using LSQRequest::_requests; 700 using LSQRequest::_inst; 701 using LSQRequest::_packets; 702 using LSQRequest::_port; 703 using LSQRequest::_res; 704 using LSQRequest::_taskId; 705 using LSQRequest::_senderState; 706 using LSQRequest::_state; 707 using LSQRequest::flags; 708 using LSQRequest::isLoad; 709 using LSQRequest::isTranslationComplete; 710 using LSQRequest::lsqUnit; 711 using LSQRequest::request; 712 using LSQRequest::sendFragmentToTranslation; 713 using LSQRequest::setState; 714 using LSQRequest::numInTranslationFragments; 715 using LSQRequest::numTranslatedFragments; 716 using LSQRequest::_numOutstandingPackets; 717 using LSQRequest::_amo_op; 718 public: 719 SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 720 const Addr& addr, const uint32_t& size, 721 const Request::Flags& flags_, 722 PacketDataPtr data = nullptr, 723 uint64_t* res = nullptr, 724 AtomicOpFunctor* amo_op = nullptr) : 725 LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, 726 amo_op) {} 727 728 inline virtual ~SingleDataRequest() {} 729 virtual void initiateTranslation(); 730 virtual void finish(const Fault &fault, const RequestPtr &req, 731 ThreadContext* tc, BaseTLB::Mode mode); 732 virtual bool recvTimingResp(PacketPtr pkt); 733 virtual void sendPacketToCache(); 734 virtual void buildPackets(); 735 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt); 736 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt); 737 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask); 738 }; 739 740 class SplitDataRequest : public LSQRequest 741 { 742 protected: 743 /* Given that we are inside templates, children need explicit 744 * declaration of the names in the parent class. */ 745 using Flag = typename LSQRequest::Flag; 746 using State = typename LSQRequest::State; 747 using LSQRequest::_addr; 748 using LSQRequest::_data; 749 using LSQRequest::_fault; 750 using LSQRequest::_flags; 751 using LSQRequest::_inst; 752 using LSQRequest::_packets; 753 using LSQRequest::_port; 754 using LSQRequest::_requests; 755 using LSQRequest::_res; 756 using LSQRequest::_byteEnable; 757 using LSQRequest::_senderState; 758 using LSQRequest::_size; 759 using LSQRequest::_state; 760 using LSQRequest::_taskId; 761 using LSQRequest::flags; 762 using LSQRequest::isLoad; 763 using LSQRequest::isTranslationComplete; 764 using LSQRequest::lsqUnit; 765 using LSQRequest::numInTranslationFragments; 766 using LSQRequest::numTranslatedFragments; 767 using LSQRequest::request; 768 using LSQRequest::sendFragmentToTranslation; 769 using LSQRequest::setState; 770 using LSQRequest::_numOutstandingPackets; 771 772 uint32_t numFragments; 773 uint32_t numReceivedPackets; 774 RequestPtr mainReq; 775 PacketPtr _mainPacket; 776 777 public: 778 SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, 779 const Addr& addr, const uint32_t& size, 780 const Request::Flags & flags_, 781 PacketDataPtr data = nullptr, 782 uint64_t* res = nullptr) : 783 LSQRequest(port, inst, isLoad, addr, size, flags_, data, res, 784 nullptr), 785 numFragments(0), 786 numReceivedPackets(0), 787 mainReq(nullptr), 788 _mainPacket(nullptr) 789 { 790 flags.set(Flag::IsSplit); 791 } 792 virtual ~SplitDataRequest() 793 { 794 if (mainReq) { 795 mainReq = nullptr; 796 } 797 if (_mainPacket) { 798 delete _mainPacket; 799 _mainPacket = nullptr; 800 } 801 } 802 virtual void finish(const Fault &fault, const RequestPtr &req, 803 ThreadContext* tc, BaseTLB::Mode mode); 804 virtual bool recvTimingResp(PacketPtr pkt); 805 virtual void initiateTranslation(); 806 virtual void sendPacketToCache(); 807 virtual void buildPackets(); 808 809 virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt); 810 virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt); 811 virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask); 812 813 virtual RequestPtr mainRequest(); 814 virtual PacketPtr mainPacket(); 815 }; 816 817 /** Constructs an LSQ with the given parameters. */ 818 LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params); 819 ~LSQ() { } 820 821 /** Returns the name of the LSQ. */ 822 std::string name() const; 823 824 /** Registers statistics of each LSQ unit. */ 825 void regStats(); 826 827 /** Sets the pointer to the list of active threads. */ 828 void setActiveThreads(std::list<ThreadID> *at_ptr); 829 830 /** Perform sanity checks after a drain. */ 831 void drainSanityCheck() const; 832 /** Has the LSQ drained? */ 833 bool isDrained() const; 834 /** Takes over execution from another CPU's thread. */ 835 void takeOverFrom(); 836 837 /** Number of entries needed for the given amount of threads.*/ 838 int entryAmount(ThreadID num_threads); 839 840 /** Ticks the LSQ. */ 841 void tick(); 842 843 /** Inserts a load into the LSQ. */ 844 void insertLoad(const DynInstPtr &load_inst); 845 /** Inserts a store into the LSQ. */ 846 void insertStore(const DynInstPtr &store_inst); 847 848 /** Executes a load. */ 849 Fault executeLoad(const DynInstPtr &inst); 850 851 /** Executes a store. */ 852 Fault executeStore(const DynInstPtr &inst); 853 854 /** 855 * Commits loads up until the given sequence number for a specific thread. 856 */ 857 void commitLoads(InstSeqNum &youngest_inst, ThreadID tid) 858 { thread.at(tid).commitLoads(youngest_inst); } 859 860 /** 861 * Commits stores up until the given sequence number for a specific thread. 862 */ 863 void commitStores(InstSeqNum &youngest_inst, ThreadID tid) 864 { thread.at(tid).commitStores(youngest_inst); } 865 866 /** 867 * Attempts to write back stores until all cache ports are used or the 868 * interface becomes blocked. 869 */ 870 void writebackStores(); 871 /** Same as above, but only for one thread. */ 872 void writebackStores(ThreadID tid); 873 874 /** 875 * Squash instructions from a thread until the specified sequence number. 876 */ 877 void 878 squash(const InstSeqNum &squashed_num, ThreadID tid) 879 { 880 thread.at(tid).squash(squashed_num); 881 } 882 883 /** Returns whether or not there was a memory ordering violation. */ 884 bool violation(); 885 /** 886 * Returns whether or not there was a memory ordering violation for a 887 * specific thread. 888 */ 889 bool violation(ThreadID tid) { return thread.at(tid).violation(); } 890 891 /** Gets the instruction that caused the memory ordering violation. */ 892 DynInstPtr 893 getMemDepViolator(ThreadID tid) 894 { 895 return thread.at(tid).getMemDepViolator(); 896 } 897 898 /** Returns the head index of the load queue for a specific thread. */ 899 int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); } 900 901 /** Returns the sequence number of the head of the load queue. */ 902 InstSeqNum 903 getLoadHeadSeqNum(ThreadID tid) 904 { 905 return thread.at(tid).getLoadHeadSeqNum(); 906 } 907 908 /** Returns the head index of the store queue. */ 909 int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); } 910 911 /** Returns the sequence number of the head of the store queue. */ 912 InstSeqNum 913 getStoreHeadSeqNum(ThreadID tid) 914 { 915 return thread.at(tid).getStoreHeadSeqNum(); 916 } 917 918 /** Returns the number of instructions in all of the queues. */ 919 int getCount(); 920 /** Returns the number of instructions in the queues of one thread. */ 921 int getCount(ThreadID tid) { return thread.at(tid).getCount(); } 922 923 /** Returns the total number of loads in the load queue. */ 924 int numLoads(); 925 /** Returns the total number of loads for a single thread. */ 926 int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); } 927 928 /** Returns the total number of stores in the store queue. */ 929 int numStores(); 930 /** Returns the total number of stores for a single thread. */ 931 int numStores(ThreadID tid) { return thread.at(tid).numStores(); } 932 933 /** Returns the number of free load entries. */ 934 unsigned numFreeLoadEntries(); 935 936 /** Returns the number of free store entries. */ 937 unsigned numFreeStoreEntries(); 938 939 /** Returns the number of free entries for a specific thread. */ 940 unsigned numFreeEntries(ThreadID tid); 941 942 /** Returns the number of free entries in the LQ for a specific thread. */ 943 unsigned numFreeLoadEntries(ThreadID tid); 944 945 /** Returns the number of free entries in the SQ for a specific thread. */ 946 unsigned numFreeStoreEntries(ThreadID tid); 947 948 /** Returns if the LSQ is full (either LQ or SQ is full). */ 949 bool isFull(); 950 /** 951 * Returns if the LSQ is full for a specific thread (either LQ or SQ is 952 * full). 953 */ 954 bool isFull(ThreadID tid); 955 956 /** Returns if the LSQ is empty (both LQ and SQ are empty). */ 957 bool isEmpty() const; 958 /** Returns if all of the LQs are empty. */ 959 bool lqEmpty() const; 960 /** Returns if all of the SQs are empty. */ 961 bool sqEmpty() const; 962 963 /** Returns if any of the LQs are full. */ 964 bool lqFull(); 965 /** Returns if the LQ of a given thread is full. */ 966 bool lqFull(ThreadID tid); 967 968 /** Returns if any of the SQs are full. */ 969 bool sqFull(); 970 /** Returns if the SQ of a given thread is full. */ 971 bool sqFull(ThreadID tid); 972 973 /** 974 * Returns if the LSQ is stalled due to a memory operation that must be 975 * replayed. 976 */ 977 bool isStalled(); 978 /** 979 * Returns if the LSQ of a specific thread is stalled due to a memory 980 * operation that must be replayed. 981 */ 982 bool isStalled(ThreadID tid); 983 984 /** Returns whether or not there are any stores to write back to memory. */ 985 bool hasStoresToWB(); 986 987 /** Returns whether or not a specific thread has any stores to write back 988 * to memory. 989 */ 990 bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); } 991 992 /** Returns the number of stores a specific thread has to write back. */ 993 int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); } 994 995 /** Returns if the LSQ will write back to memory this cycle. */ 996 bool willWB(); 997 /** Returns if the LSQ of a specific thread will write back to memory this 998 * cycle. 999 */ 1000 bool willWB(ThreadID tid) { return thread.at(tid).willWB(); } 1001 1002 /** Debugging function to print out all instructions. */ 1003 void dumpInsts() const; 1004 /** Debugging function to print out instructions from a specific thread. */ 1005 void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); } 1006 1007 /** Executes a read operation, using the load specified at the load 1008 * index. 1009 */ 1010 Fault read(LSQRequest* req, int load_idx); 1011 1012 /** Executes a store operation, using the store specified at the store 1013 * index. 1014 */ 1015 Fault write(LSQRequest* req, uint8_t *data, int store_idx); 1016 1017 /** 1018 * Retry the previous send that failed. 1019 */ 1020 void recvReqRetry(); 1021 1022 void completeDataAccess(PacketPtr pkt); 1023 /** 1024 * Handles writing back and completing the load or store that has 1025 * returned from memory. 1026 * 1027 * @param pkt Response packet from the memory sub-system 1028 */ 1029 bool recvTimingResp(PacketPtr pkt); 1030 1031 void recvTimingSnoopReq(PacketPtr pkt); 1032 1033 Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, 1034 unsigned int size, Addr addr, Request::Flags flags, 1035 uint64_t *res, AtomicOpFunctor *amo_op, 1036 const std::vector<bool>& byteEnable); 1037 1038 /** The CPU pointer. */ 1039 O3CPU *cpu; 1040 1041 /** The IEW stage pointer. */ 1042 IEW *iewStage; 1043 1044 /** Is D-cache blocked? */ 1045 bool cacheBlocked() const; 1046 /** Set D-cache blocked status */ 1047 void cacheBlocked(bool v); 1048 /** Is any store port available to use? */ 1049 bool cachePortAvailable(bool is_load) const; 1050 /** Another store port is in use */ 1051 void cachePortBusy(bool is_load); 1052 1053 MasterPort &getDataPort() { return dcachePort; } 1054 1055 protected: 1056 /** D-cache is blocked */ 1057 bool _cacheBlocked; 1058 /** The number of cache ports available each cycle (stores only). */ 1059 int cacheStorePorts; 1060 /** The number of used cache ports in this cycle by stores. */ 1061 int usedStorePorts; 1062 /** The number of cache ports available each cycle (loads only). */ 1063 int cacheLoadPorts; 1064 /** The number of used cache ports in this cycle by loads. */ 1065 int usedLoadPorts; 1066 1067 1068 /** The LSQ policy for SMT mode. */ 1069 SMTQueuePolicy lsqPolicy; 1070 1071 /** Auxiliary function to calculate per-thread max LSQ allocation limit. 1072 * Depending on a policy, number of entries and possibly number of threads 1073 * and threshold, this function calculates how many resources each thread 1074 * can occupy at most. 1075 */ 1076 static uint32_t 1077 maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, 1078 uint32_t numThreads, uint32_t SMTThreshold) 1079 { 1080 if (pol == SMTQueuePolicy::Dynamic) { 1081 return entries; 1082 } else if (pol == SMTQueuePolicy::Partitioned) { 1083 //@todo:make work if part_amt doesnt divide evenly. 1084 return entries / numThreads; 1085 } else if (pol == SMTQueuePolicy::Threshold) { 1086 //Divide up by threshold amount 1087 //@todo: Should threads check the max and the total 1088 //amount of the LSQ 1089 return SMTThreshold; 1090 } 1091 return 0; 1092 } 1093 1094 /** List of Active Threads in System. */ 1095 std::list<ThreadID> *activeThreads; 1096 1097 /** Total Size of LQ Entries. */ 1098 unsigned LQEntries; 1099 /** Total Size of SQ Entries. */ 1100 unsigned SQEntries; 1101 1102 /** Max LQ Size - Used to Enforce Sharing Policies. */ 1103 unsigned maxLQEntries; 1104 1105 /** Max SQ Size - Used to Enforce Sharing Policies. */ 1106 unsigned maxSQEntries; 1107 1108 /** Data port. */ 1109 DcachePort dcachePort; 1110 1111 /** The LSQ units for individual threads. */ 1112 std::vector<LSQUnit> thread; 1113 1114 /** Number of Threads. */ 1115 ThreadID numThreads; 1116}; 1117 1118template <class Impl> 1119Fault 1120LSQ<Impl>::read(LSQRequest* req, int load_idx) 1121{ 1122 ThreadID tid = cpu->contextToThread(req->request()->contextId()); 1123 1124 return thread.at(tid).read(req, load_idx); 1125} 1126 1127template <class Impl> 1128Fault 1129LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx) 1130{ 1131 ThreadID tid = cpu->contextToThread(req->request()->contextId()); 1132 1133 return thread.at(tid).write(req, data, store_idx); 1134} 1135 1136#endif // __CPU_O3_LSQ_HH__ 1137