lsq_impl.hh revision 14297
1/* 2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2005-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 */ 43 44#ifndef __CPU_O3_LSQ_IMPL_HH__ 45#define __CPU_O3_LSQ_IMPL_HH__ 46 47#include <algorithm> 48#include <list> 49#include <string> 50 51#include "base/logging.hh" 52#include "cpu/o3/cpu.hh" 53#include "cpu/o3/lsq.hh" 54#include "debug/Drain.hh" 55#include "debug/Fetch.hh" 56#include "debug/LSQ.hh" 57#include "debug/Writeback.hh" 58#include "params/DerivO3CPU.hh" 59 60using namespace std; 61 62template <class Impl> 63LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params) 64 : cpu(cpu_ptr), iewStage(iew_ptr), 65 _cacheBlocked(false), 66 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0), 67 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0), 68 lsqPolicy(params->smtLSQPolicy), 69 LQEntries(params->LQEntries), 70 SQEntries(params->SQEntries), 71 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, 72 params->smtLSQThreshold)), 73 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, 74 params->smtLSQThreshold)), 75 dcachePort(this, cpu_ptr), 76 numThreads(params->numThreads) 77{ 78 assert(numThreads > 0 && numThreads <= Impl::MaxThreads); 79 80 //**********************************************/ 81 //************ Handle SMT Parameters ***********/ 82 //**********************************************/ 83 84 /* Run SMT olicy checks. */ 85 if (lsqPolicy == SMTQueuePolicy::Dynamic) { 86 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); 87 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { 88 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " 89 "%i entries per LQ | %i entries per SQ\n", 90 maxLQEntries,maxSQEntries); 91 } else if (lsqPolicy == SMTQueuePolicy::Threshold) { 92 93 assert(params->smtLSQThreshold > params->LQEntries); 94 assert(params->smtLSQThreshold > params->SQEntries); 95 96 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " 97 "%i entries per LQ | %i entries per SQ\n", 98 maxLQEntries,maxSQEntries); 99 } else { 100 panic("Invalid LSQ sharing policy. Options are: Dynamic, " 101 "Partitioned, Threshold"); 102 } 103 104 thread.reserve(numThreads); 105 for (ThreadID tid = 0; tid < numThreads; tid++) { 106 thread.emplace_back(maxLQEntries, maxSQEntries); 107 thread[tid].init(cpu, iew_ptr, params, this, tid); 108 thread[tid].setDcachePort(&dcachePort); 109 } 110} 111 112 113template<class Impl> 114std::string 115LSQ<Impl>::name() const 116{ 117 return iewStage->name() + ".lsq"; 118} 119 120template<class Impl> 121void 122LSQ<Impl>::regStats() 123{ 124 //Initialize LSQs 125 for (ThreadID tid = 0; tid < numThreads; tid++) { 126 thread[tid].regStats(); 127 } 128} 129 130template<class Impl> 131void 132LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr) 133{ 134 activeThreads = at_ptr; 135 assert(activeThreads != 0); 136} 137 138template <class Impl> 139void 140LSQ<Impl>::drainSanityCheck() const 141{ 142 assert(isDrained()); 143 144 for (ThreadID tid = 0; tid < numThreads; tid++) 145 thread[tid].drainSanityCheck(); 146} 147 148template <class Impl> 149bool 150LSQ<Impl>::isDrained() const 151{ 152 bool drained(true); 153 154 if (!lqEmpty()) { 155 DPRINTF(Drain, "Not drained, LQ not empty.\n"); 156 drained = false; 157 } 158 159 if (!sqEmpty()) { 160 DPRINTF(Drain, "Not drained, SQ not empty.\n"); 161 drained = false; 162 } 163 164 return drained; 165} 166 167template <class Impl> 168void 169LSQ<Impl>::takeOverFrom() 170{ 171 usedStorePorts = 0; 172 _cacheBlocked = false; 173 174 for (ThreadID tid = 0; tid < numThreads; tid++) { 175 thread[tid].takeOverFrom(); 176 } 177} 178 179template <class Impl> 180void 181LSQ<Impl>::tick() 182{ 183 // Re-issue loads which got blocked on the per-cycle load ports limit. 184 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked) 185 iewStage->cacheUnblocked(); 186 187 usedLoadPorts = 0; 188 usedStorePorts = 0; 189} 190 191template<class Impl> 192bool 193LSQ<Impl>::cacheBlocked() const 194{ 195 return _cacheBlocked; 196} 197 198template<class Impl> 199void 200LSQ<Impl>::cacheBlocked(bool v) 201{ 202 _cacheBlocked = v; 203} 204 205template<class Impl> 206bool 207LSQ<Impl>::cachePortAvailable(bool is_load) const 208{ 209 bool ret; 210 if (is_load) { 211 ret = usedLoadPorts < cacheLoadPorts; 212 } else { 213 ret = usedStorePorts < cacheStorePorts; 214 } 215 return ret; 216} 217 218template<class Impl> 219void 220LSQ<Impl>::cachePortBusy(bool is_load) 221{ 222 assert(cachePortAvailable(is_load)); 223 if (is_load) { 224 usedLoadPorts++; 225 } else { 226 usedStorePorts++; 227 } 228} 229 230template<class Impl> 231void 232LSQ<Impl>::insertLoad(const DynInstPtr &load_inst) 233{ 234 ThreadID tid = load_inst->threadNumber; 235 236 thread[tid].insertLoad(load_inst); 237} 238 239template<class Impl> 240void 241LSQ<Impl>::insertStore(const DynInstPtr &store_inst) 242{ 243 ThreadID tid = store_inst->threadNumber; 244 245 thread[tid].insertStore(store_inst); 246} 247 248template<class Impl> 249Fault 250LSQ<Impl>::executeLoad(const DynInstPtr &inst) 251{ 252 ThreadID tid = inst->threadNumber; 253 254 return thread[tid].executeLoad(inst); 255} 256 257template<class Impl> 258Fault 259LSQ<Impl>::executeStore(const DynInstPtr &inst) 260{ 261 ThreadID tid = inst->threadNumber; 262 263 return thread[tid].executeStore(inst); 264} 265 266template<class Impl> 267void 268LSQ<Impl>::writebackStores() 269{ 270 list<ThreadID>::iterator threads = activeThreads->begin(); 271 list<ThreadID>::iterator end = activeThreads->end(); 272 273 while (threads != end) { 274 ThreadID tid = *threads++; 275 276 if (numStoresToWB(tid) > 0) { 277 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " 278 "available for Writeback.\n", tid, numStoresToWB(tid)); 279 } 280 281 thread[tid].writebackStores(); 282 } 283} 284 285template<class Impl> 286bool 287LSQ<Impl>::violation() 288{ 289 /* Answers: Does Anybody Have a Violation?*/ 290 list<ThreadID>::iterator threads = activeThreads->begin(); 291 list<ThreadID>::iterator end = activeThreads->end(); 292 293 while (threads != end) { 294 ThreadID tid = *threads++; 295 296 if (thread[tid].violation()) 297 return true; 298 } 299 300 return false; 301} 302 303template <class Impl> 304void 305LSQ<Impl>::recvReqRetry() 306{ 307 iewStage->cacheUnblocked(); 308 cacheBlocked(false); 309 310 for (ThreadID tid : *activeThreads) { 311 thread[tid].recvRetry(); 312 } 313} 314 315template <class Impl> 316void 317LSQ<Impl>::completeDataAccess(PacketPtr pkt) 318{ 319 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState); 320 thread[cpu->contextToThread(senderState->contextId())] 321 .completeDataAccess(pkt); 322} 323 324template <class Impl> 325bool 326LSQ<Impl>::recvTimingResp(PacketPtr pkt) 327{ 328 if (pkt->isError()) 329 DPRINTF(LSQ, "Got error packet back for address: %#X\n", 330 pkt->getAddr()); 331 332 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState); 333 panic_if(!senderState, "Got packet back with unknown sender state\n"); 334 335 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); 336 337 if (pkt->isInvalidate()) { 338 // This response also contains an invalidate; e.g. this can be the case 339 // if cmd is ReadRespWithInvalidate. 340 // 341 // The calling order between completeDataAccess and checkSnoop matters. 342 // By calling checkSnoop after completeDataAccess, we ensure that the 343 // fault set by checkSnoop is not lost. Calling writeback (more 344 // specifically inst->completeAcc) in completeDataAccess overwrites 345 // fault, and in case this instruction requires squashing (as 346 // determined by checkSnoop), the ReExec fault set by checkSnoop would 347 // be lost otherwise. 348 349 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", 350 pkt->getAddr()); 351 352 for (ThreadID tid = 0; tid < numThreads; tid++) { 353 thread[tid].checkSnoop(pkt); 354 } 355 } 356 // Update the LSQRequest state (this may delete the request) 357 senderState->request()->packetReplied(); 358 359 return true; 360} 361 362template <class Impl> 363void 364LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt) 365{ 366 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), 367 pkt->cmdString()); 368 369 // must be a snoop 370 if (pkt->isInvalidate()) { 371 DPRINTF(LSQ, "received invalidation for addr:%#x\n", 372 pkt->getAddr()); 373 for (ThreadID tid = 0; tid < numThreads; tid++) { 374 thread[tid].checkSnoop(pkt); 375 } 376 } 377} 378 379template<class Impl> 380int 381LSQ<Impl>::getCount() 382{ 383 unsigned total = 0; 384 385 list<ThreadID>::iterator threads = activeThreads->begin(); 386 list<ThreadID>::iterator end = activeThreads->end(); 387 388 while (threads != end) { 389 ThreadID tid = *threads++; 390 391 total += getCount(tid); 392 } 393 394 return total; 395} 396 397template<class Impl> 398int 399LSQ<Impl>::numLoads() 400{ 401 unsigned total = 0; 402 403 list<ThreadID>::iterator threads = activeThreads->begin(); 404 list<ThreadID>::iterator end = activeThreads->end(); 405 406 while (threads != end) { 407 ThreadID tid = *threads++; 408 409 total += numLoads(tid); 410 } 411 412 return total; 413} 414 415template<class Impl> 416int 417LSQ<Impl>::numStores() 418{ 419 unsigned total = 0; 420 421 list<ThreadID>::iterator threads = activeThreads->begin(); 422 list<ThreadID>::iterator end = activeThreads->end(); 423 424 while (threads != end) { 425 ThreadID tid = *threads++; 426 427 total += thread[tid].numStores(); 428 } 429 430 return total; 431} 432 433template<class Impl> 434unsigned 435LSQ<Impl>::numFreeLoadEntries() 436{ 437 unsigned total = 0; 438 439 list<ThreadID>::iterator threads = activeThreads->begin(); 440 list<ThreadID>::iterator end = activeThreads->end(); 441 442 while (threads != end) { 443 ThreadID tid = *threads++; 444 445 total += thread[tid].numFreeLoadEntries(); 446 } 447 448 return total; 449} 450 451template<class Impl> 452unsigned 453LSQ<Impl>::numFreeStoreEntries() 454{ 455 unsigned total = 0; 456 457 list<ThreadID>::iterator threads = activeThreads->begin(); 458 list<ThreadID>::iterator end = activeThreads->end(); 459 460 while (threads != end) { 461 ThreadID tid = *threads++; 462 463 total += thread[tid].numFreeStoreEntries(); 464 } 465 466 return total; 467} 468 469template<class Impl> 470unsigned 471LSQ<Impl>::numFreeLoadEntries(ThreadID tid) 472{ 473 return thread[tid].numFreeLoadEntries(); 474} 475 476template<class Impl> 477unsigned 478LSQ<Impl>::numFreeStoreEntries(ThreadID tid) 479{ 480 return thread[tid].numFreeStoreEntries(); 481} 482 483template<class Impl> 484bool 485LSQ<Impl>::isFull() 486{ 487 list<ThreadID>::iterator threads = activeThreads->begin(); 488 list<ThreadID>::iterator end = activeThreads->end(); 489 490 while (threads != end) { 491 ThreadID tid = *threads++; 492 493 if (!(thread[tid].lqFull() || thread[tid].sqFull())) 494 return false; 495 } 496 497 return true; 498} 499 500template<class Impl> 501bool 502LSQ<Impl>::isFull(ThreadID tid) 503{ 504 //@todo: Change to Calculate All Entries for 505 //Dynamic Policy 506 if (lsqPolicy == SMTQueuePolicy::Dynamic) 507 return isFull(); 508 else 509 return thread[tid].lqFull() || thread[tid].sqFull(); 510} 511 512template<class Impl> 513bool 514LSQ<Impl>::isEmpty() const 515{ 516 return lqEmpty() && sqEmpty(); 517} 518 519template<class Impl> 520bool 521LSQ<Impl>::lqEmpty() const 522{ 523 list<ThreadID>::const_iterator threads = activeThreads->begin(); 524 list<ThreadID>::const_iterator end = activeThreads->end(); 525 526 while (threads != end) { 527 ThreadID tid = *threads++; 528 529 if (!thread[tid].lqEmpty()) 530 return false; 531 } 532 533 return true; 534} 535 536template<class Impl> 537bool 538LSQ<Impl>::sqEmpty() const 539{ 540 list<ThreadID>::const_iterator threads = activeThreads->begin(); 541 list<ThreadID>::const_iterator end = activeThreads->end(); 542 543 while (threads != end) { 544 ThreadID tid = *threads++; 545 546 if (!thread[tid].sqEmpty()) 547 return false; 548 } 549 550 return true; 551} 552 553template<class Impl> 554bool 555LSQ<Impl>::lqFull() 556{ 557 list<ThreadID>::iterator threads = activeThreads->begin(); 558 list<ThreadID>::iterator end = activeThreads->end(); 559 560 while (threads != end) { 561 ThreadID tid = *threads++; 562 563 if (!thread[tid].lqFull()) 564 return false; 565 } 566 567 return true; 568} 569 570template<class Impl> 571bool 572LSQ<Impl>::lqFull(ThreadID tid) 573{ 574 //@todo: Change to Calculate All Entries for 575 //Dynamic Policy 576 if (lsqPolicy == SMTQueuePolicy::Dynamic) 577 return lqFull(); 578 else 579 return thread[tid].lqFull(); 580} 581 582template<class Impl> 583bool 584LSQ<Impl>::sqFull() 585{ 586 list<ThreadID>::iterator threads = activeThreads->begin(); 587 list<ThreadID>::iterator end = activeThreads->end(); 588 589 while (threads != end) { 590 ThreadID tid = *threads++; 591 592 if (!sqFull(tid)) 593 return false; 594 } 595 596 return true; 597} 598 599template<class Impl> 600bool 601LSQ<Impl>::sqFull(ThreadID tid) 602{ 603 //@todo: Change to Calculate All Entries for 604 //Dynamic Policy 605 if (lsqPolicy == SMTQueuePolicy::Dynamic) 606 return sqFull(); 607 else 608 return thread[tid].sqFull(); 609} 610 611template<class Impl> 612bool 613LSQ<Impl>::isStalled() 614{ 615 list<ThreadID>::iterator threads = activeThreads->begin(); 616 list<ThreadID>::iterator end = activeThreads->end(); 617 618 while (threads != end) { 619 ThreadID tid = *threads++; 620 621 if (!thread[tid].isStalled()) 622 return false; 623 } 624 625 return true; 626} 627 628template<class Impl> 629bool 630LSQ<Impl>::isStalled(ThreadID tid) 631{ 632 if (lsqPolicy == SMTQueuePolicy::Dynamic) 633 return isStalled(); 634 else 635 return thread[tid].isStalled(); 636} 637 638template<class Impl> 639bool 640LSQ<Impl>::hasStoresToWB() 641{ 642 list<ThreadID>::iterator threads = activeThreads->begin(); 643 list<ThreadID>::iterator end = activeThreads->end(); 644 645 while (threads != end) { 646 ThreadID tid = *threads++; 647 648 if (hasStoresToWB(tid)) 649 return true; 650 } 651 652 return false; 653} 654 655template<class Impl> 656bool 657LSQ<Impl>::willWB() 658{ 659 list<ThreadID>::iterator threads = activeThreads->begin(); 660 list<ThreadID>::iterator end = activeThreads->end(); 661 662 while (threads != end) { 663 ThreadID tid = *threads++; 664 665 if (willWB(tid)) 666 return true; 667 } 668 669 return false; 670} 671 672template<class Impl> 673void 674LSQ<Impl>::dumpInsts() const 675{ 676 list<ThreadID>::const_iterator threads = activeThreads->begin(); 677 list<ThreadID>::const_iterator end = activeThreads->end(); 678 679 while (threads != end) { 680 ThreadID tid = *threads++; 681 682 thread[tid].dumpInsts(); 683 } 684} 685 686template<class Impl> 687Fault 688LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, 689 unsigned int size, Addr addr, Request::Flags flags, 690 uint64_t *res, AtomicOpFunctorPtr amo_op, 691 const std::vector<bool>& byteEnable) 692{ 693 // This comming request can be either load, store or atomic. 694 // Atomic request has a corresponding pointer to its atomic memory 695 // operation 696 bool isAtomic M5_VAR_USED = !isLoad && amo_op; 697 698 ThreadID tid = cpu->contextToThread(inst->contextId()); 699 auto cacheLineSize = cpu->cacheLineSize(); 700 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); 701 LSQRequest* req = nullptr; 702 703 // Atomic requests that access data across cache line boundary are 704 // currently not allowed since the cache does not guarantee corresponding 705 // atomic memory operations to be executed atomically across a cache line. 706 // For ISAs such as x86 that supports cross-cache-line atomic instructions, 707 // the cache needs to be modified to perform atomic update to both cache 708 // lines. For now, such cross-line update is not supported. 709 assert(!isAtomic || (isAtomic && !needs_burst)); 710 711 if (inst->translationStarted()) { 712 req = inst->savedReq; 713 assert(req); 714 } else { 715 if (needs_burst) { 716 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, 717 size, flags, data, res); 718 } else { 719 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, 720 size, flags, data, res, std::move(amo_op)); 721 } 722 assert(req); 723 if (!byteEnable.empty()) { 724 req->_byteEnable = byteEnable; 725 } 726 inst->setRequest(); 727 req->taskId(cpu->taskId()); 728 729 // There might be fault from a previous execution attempt if this is 730 // a strictly ordered load 731 inst->getFault() = NoFault; 732 733 req->initiateTranslation(); 734 } 735 736 /* This is the place were instructions get the effAddr. */ 737 if (req->isTranslationComplete()) { 738 if (req->isMemAccessRequired()) { 739 inst->effAddr = req->getVaddr(); 740 inst->effSize = size; 741 inst->effAddrValid(true); 742 743 if (cpu->checker) { 744 inst->reqToVerify = std::make_shared<Request>(*req->request()); 745 } 746 Fault fault; 747 if (isLoad) 748 fault = cpu->read(req, inst->lqIdx); 749 else 750 fault = cpu->write(req, data, inst->sqIdx); 751 // inst->getFault() may have the first-fault of a 752 // multi-access split request at this point. 753 // Overwrite that only if we got another type of fault 754 // (e.g. re-exec). 755 if (fault != NoFault) 756 inst->getFault() = fault; 757 } else if (isLoad) { 758 inst->setMemAccPredicate(false); 759 // Commit will have to clean up whatever happened. Set this 760 // instruction as executed. 761 inst->setExecuted(); 762 } 763 } 764 765 if (inst->traceData) 766 inst->traceData->setMem(addr, size, flags); 767 768 return inst->getFault(); 769} 770 771template<class Impl> 772void 773LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, 774 ThreadContext* tc, BaseTLB::Mode mode) 775{ 776 _fault.push_back(fault); 777 numInTranslationFragments = 0; 778 numTranslatedFragments = 1; 779 /* If the instruction has been squahsed, let the request know 780 * as it may have to self-destruct. */ 781 if (_inst->isSquashed()) { 782 this->squashTranslation(); 783 } else { 784 _inst->strictlyOrdered(req->isStrictlyOrdered()); 785 786 flags.set(Flag::TranslationFinished); 787 if (fault == NoFault) { 788 _inst->physEffAddr = req->getPaddr(); 789 _inst->memReqFlags = req->getFlags(); 790 if (req->isCondSwap()) { 791 assert(_res); 792 req->setExtraData(*_res); 793 } 794 setState(State::Request); 795 } else { 796 setState(State::Fault); 797 } 798 799 LSQRequest::_inst->fault = fault; 800 LSQRequest::_inst->translationCompleted(true); 801 } 802} 803 804template<class Impl> 805void 806LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, 807 ThreadContext* tc, BaseTLB::Mode mode) 808{ 809 int i; 810 for (i = 0; i < _requests.size() && _requests[i] != req; i++); 811 assert(i < _requests.size()); 812 _fault[i] = fault; 813 814 numInTranslationFragments--; 815 numTranslatedFragments++; 816 817 if (fault == NoFault) 818 mainReq->setFlags(req->getFlags()); 819 820 if (numTranslatedFragments == _requests.size()) { 821 if (_inst->isSquashed()) { 822 this->squashTranslation(); 823 } else { 824 _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); 825 flags.set(Flag::TranslationFinished); 826 _inst->translationCompleted(true); 827 828 for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++); 829 if (i > 0) { 830 _inst->physEffAddr = request(0)->getPaddr(); 831 _inst->memReqFlags = mainReq->getFlags(); 832 if (mainReq->isCondSwap()) { 833 assert (i == _fault.size()); 834 assert(_res); 835 mainReq->setExtraData(*_res); 836 } 837 if (i == _fault.size()) { 838 _inst->fault = NoFault; 839 setState(State::Request); 840 } else { 841 _inst->fault = _fault[i]; 842 setState(State::PartialFault); 843 } 844 } else { 845 _inst->fault = _fault[0]; 846 setState(State::Fault); 847 } 848 } 849 850 } 851} 852 853template<class Impl> 854void 855LSQ<Impl>::SingleDataRequest::initiateTranslation() 856{ 857 assert(_requests.size() == 0); 858 859 this->addRequest(_addr, _size, _byteEnable); 860 861 if (_requests.size() > 0) { 862 _requests.back()->setReqInstSeqNum(_inst->seqNum); 863 _requests.back()->taskId(_taskId); 864 _inst->translationStarted(true); 865 setState(State::Translation); 866 flags.set(Flag::TranslationStarted); 867 868 _inst->savedReq = this; 869 sendFragmentToTranslation(0); 870 } else { 871 _inst->setMemAccPredicate(false); 872 } 873} 874 875template<class Impl> 876PacketPtr 877LSQ<Impl>::SplitDataRequest::mainPacket() 878{ 879 return _mainPacket; 880} 881 882template<class Impl> 883RequestPtr 884LSQ<Impl>::SplitDataRequest::mainRequest() 885{ 886 return mainReq; 887} 888 889template<class Impl> 890void 891LSQ<Impl>::SplitDataRequest::initiateTranslation() 892{ 893 auto cacheLineSize = _port.cacheLineSize(); 894 Addr base_addr = _addr; 895 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); 896 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); 897 uint32_t size_so_far = 0; 898 899 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, 900 _size, _flags, _inst->masterId(), 901 _inst->instAddr(), _inst->contextId()); 902 if (!_byteEnable.empty()) { 903 mainReq->setByteEnable(_byteEnable); 904 } 905 906 // Paddr is not used in mainReq. However, we will accumulate the flags 907 // from the sub requests into mainReq by calling setFlags() in finish(). 908 // setFlags() assumes that paddr is set so flip the paddr valid bit here to 909 // avoid a potential assert in setFlags() when we call it from finish(). 910 mainReq->setPaddr(0); 911 912 /* Get the pre-fix, possibly unaligned. */ 913 if (_byteEnable.empty()) { 914 this->addRequest(base_addr, next_addr - base_addr, _byteEnable); 915 } else { 916 auto it_start = _byteEnable.begin(); 917 auto it_end = _byteEnable.begin() + (next_addr - base_addr); 918 this->addRequest(base_addr, next_addr - base_addr, 919 std::vector<bool>(it_start, it_end)); 920 } 921 size_so_far = next_addr - base_addr; 922 923 /* We are block aligned now, reading whole blocks. */ 924 base_addr = next_addr; 925 while (base_addr != final_addr) { 926 if (_byteEnable.empty()) { 927 this->addRequest(base_addr, cacheLineSize, _byteEnable); 928 } else { 929 auto it_start = _byteEnable.begin() + size_so_far; 930 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize; 931 this->addRequest(base_addr, cacheLineSize, 932 std::vector<bool>(it_start, it_end)); 933 } 934 size_so_far += cacheLineSize; 935 base_addr += cacheLineSize; 936 } 937 938 /* Deal with the tail. */ 939 if (size_so_far < _size) { 940 if (_byteEnable.empty()) { 941 this->addRequest(base_addr, _size - size_so_far, _byteEnable); 942 } else { 943 auto it_start = _byteEnable.begin() + size_so_far; 944 auto it_end = _byteEnable.end(); 945 this->addRequest(base_addr, _size - size_so_far, 946 std::vector<bool>(it_start, it_end)); 947 } 948 } 949 950 if (_requests.size() > 0) { 951 /* Setup the requests and send them to translation. */ 952 for (auto& r: _requests) { 953 r->setReqInstSeqNum(_inst->seqNum); 954 r->taskId(_taskId); 955 } 956 957 _inst->translationStarted(true); 958 setState(State::Translation); 959 flags.set(Flag::TranslationStarted); 960 this->_inst->savedReq = this; 961 numInTranslationFragments = 0; 962 numTranslatedFragments = 0; 963 _fault.resize(_requests.size()); 964 965 for (uint32_t i = 0; i < _requests.size(); i++) { 966 sendFragmentToTranslation(i); 967 } 968 } else { 969 _inst->setMemAccPredicate(false); 970 } 971} 972 973template<class Impl> 974void 975LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i) 976{ 977 numInTranslationFragments++; 978 _port.dTLB()->translateTiming( 979 this->request(i), 980 this->_inst->thread->getTC(), this, 981 this->isLoad() ? BaseTLB::Read : BaseTLB::Write); 982} 983 984template<class Impl> 985bool 986LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt) 987{ 988 assert(_numOutstandingPackets == 1); 989 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState); 990 flags.set(Flag::Complete); 991 state->outstanding--; 992 assert(pkt == _packets.front()); 993 _port.completeDataAccess(pkt); 994 return true; 995} 996 997template<class Impl> 998bool 999LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) 1000{ 1001 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState); 1002 uint32_t pktIdx = 0; 1003 while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) 1004 pktIdx++; 1005 assert(pktIdx < _packets.size()); 1006 numReceivedPackets++; 1007 state->outstanding--; 1008 if (numReceivedPackets == _packets.size()) { 1009 flags.set(Flag::Complete); 1010 /* Assemble packets. */ 1011 PacketPtr resp = isLoad() 1012 ? Packet::createRead(mainReq) 1013 : Packet::createWrite(mainReq); 1014 if (isLoad()) 1015 resp->dataStatic(_inst->memData); 1016 else 1017 resp->dataStatic(_data); 1018 resp->senderState = _senderState; 1019 _port.completeDataAccess(resp); 1020 delete resp; 1021 } 1022 return true; 1023} 1024 1025template<class Impl> 1026void 1027LSQ<Impl>::SingleDataRequest::buildPackets() 1028{ 1029 assert(_senderState); 1030 /* Retries do not create new packets. */ 1031 if (_packets.size() == 0) { 1032 _packets.push_back( 1033 isLoad() 1034 ? Packet::createRead(request()) 1035 : Packet::createWrite(request())); 1036 _packets.back()->dataStatic(_inst->memData); 1037 _packets.back()->senderState = _senderState; 1038 } 1039 assert(_packets.size() == 1); 1040} 1041 1042template<class Impl> 1043void 1044LSQ<Impl>::SplitDataRequest::buildPackets() 1045{ 1046 /* Extra data?? */ 1047 Addr base_address = _addr; 1048 1049 if (_packets.size() == 0) { 1050 /* New stuff */ 1051 if (isLoad()) { 1052 _mainPacket = Packet::createRead(mainReq); 1053 _mainPacket->dataStatic(_inst->memData); 1054 } 1055 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) { 1056 RequestPtr r = _requests[i]; 1057 PacketPtr pkt = isLoad() ? Packet::createRead(r) 1058 : Packet::createWrite(r); 1059 ptrdiff_t offset = r->getVaddr() - base_address; 1060 if (isLoad()) { 1061 pkt->dataStatic(_inst->memData + offset); 1062 } else { 1063 uint8_t* req_data = new uint8_t[r->getSize()]; 1064 std::memcpy(req_data, 1065 _inst->memData + offset, 1066 r->getSize()); 1067 pkt->dataDynamic(req_data); 1068 } 1069 pkt->senderState = _senderState; 1070 _packets.push_back(pkt); 1071 } 1072 } 1073 assert(_packets.size() > 0); 1074} 1075 1076template<class Impl> 1077void 1078LSQ<Impl>::SingleDataRequest::sendPacketToCache() 1079{ 1080 assert(_numOutstandingPackets == 0); 1081 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) 1082 _numOutstandingPackets = 1; 1083} 1084 1085template<class Impl> 1086void 1087LSQ<Impl>::SplitDataRequest::sendPacketToCache() 1088{ 1089 /* Try to send the packets. */ 1090 while (numReceivedPackets + _numOutstandingPackets < _packets.size() && 1091 lsqUnit()->trySendPacket(isLoad(), 1092 _packets.at(numReceivedPackets + _numOutstandingPackets))) { 1093 _numOutstandingPackets++; 1094 } 1095} 1096 1097template<class Impl> 1098void 1099LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread, 1100 PacketPtr pkt) 1101{ 1102 TheISA::handleIprWrite(thread, pkt); 1103} 1104 1105template<class Impl> 1106void 1107LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread, 1108 PacketPtr mainPkt) 1109{ 1110 unsigned offset = 0; 1111 for (auto r: _requests) { 1112 PacketPtr pkt = new Packet(r, MemCmd::WriteReq); 1113 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1114 TheISA::handleIprWrite(thread, pkt); 1115 offset += r->getSize(); 1116 delete pkt; 1117 } 1118} 1119 1120template<class Impl> 1121Cycles 1122LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread, 1123 PacketPtr pkt) 1124{ 1125 return TheISA::handleIprRead(thread, pkt); 1126} 1127 1128template<class Impl> 1129Cycles 1130LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread, 1131 PacketPtr mainPkt) 1132{ 1133 Cycles delay(0); 1134 unsigned offset = 0; 1135 1136 for (auto r: _requests) { 1137 PacketPtr pkt = new Packet(r, MemCmd::ReadReq); 1138 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1139 Cycles d = TheISA::handleIprRead(thread, pkt); 1140 if (d > delay) 1141 delay = d; 1142 offset += r->getSize(); 1143 delete pkt; 1144 } 1145 return delay; 1146} 1147 1148template<class Impl> 1149bool 1150LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1151{ 1152 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); 1153} 1154 1155template<class Impl> 1156bool 1157LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1158{ 1159 bool is_hit = false; 1160 for (auto &r: _requests) { 1161 if ((r->getPaddr() & blockMask) == blockAddr) { 1162 is_hit = true; 1163 break; 1164 } 1165 } 1166 return is_hit; 1167} 1168 1169template <class Impl> 1170bool 1171LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 1172{ 1173 return lsq->recvTimingResp(pkt); 1174} 1175 1176template <class Impl> 1177void 1178LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 1179{ 1180 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 1181 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1182 cpu->wakeup(tid); 1183 } 1184 } 1185 lsq->recvTimingSnoopReq(pkt); 1186} 1187 1188template <class Impl> 1189void 1190LSQ<Impl>::DcachePort::recvReqRetry() 1191{ 1192 lsq->recvReqRetry(); 1193} 1194 1195#endif//__CPU_O3_LSQ_IMPL_HH__ 1196