Cross Reference: /gem5/src/cpu/o3/lsq_impl.hh

Deleted Added

sdiff udiff text old ( 13954:2f400a5f2627 ) new ( 14080:4472576445e7 )

full compact

lsq_impl.hh (13954:2f400a5f2627)	lsq_impl.hh (14080:4472576445e7)
1/* 2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2005-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 / 43 44#ifndef __CPU_O3_LSQ_IMPL_HH__ 45#define __CPU_O3_LSQ_IMPL_HH__ 46 47#include <algorithm> 48#include <list> 49#include <string> 50 51#include "base/logging.hh" 52#include "cpu/o3/lsq.hh" 53#include "debug/Drain.hh" 54#include "debug/Fetch.hh" 55#include "debug/LSQ.hh" 56#include "debug/Writeback.hh" 57#include "params/DerivO3CPU.hh" 58 59using namespace std; 60 61template <class Impl> 62LSQ<Impl>::LSQ(O3CPU cpu_ptr, IEW iew_ptr, DerivO3CPUParams params) 63 : cpu(cpu_ptr), iewStage(iew_ptr), 64 _cacheBlocked(false), 65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0), 66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0), 67 lsqPolicy(params->smtLSQPolicy), 68 LQEntries(params->LQEntries), 69 SQEntries(params->SQEntries), 70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, 71 params->smtLSQThreshold)), 72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, 73 params->smtLSQThreshold)), 74 numThreads(params->numThreads) 75{ 76 assert(numThreads > 0 && numThreads <= Impl::MaxThreads); 77 78 //********************************************/ 79 //******** Handle SMT Parameters *******/ 80 //*******************************************/ 81 82 / Run SMT olicy checks. / 83 if (lsqPolicy == SMTQueuePolicy::Dynamic) { 84 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); 85 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { 86 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " 87 "%i entries per LQ \| %i entries per SQ\n", 88 maxLQEntries,maxSQEntries); 89 } else if (lsqPolicy == SMTQueuePolicy::Threshold) { 90 91 assert(params->smtLSQThreshold > params->LQEntries); 92 assert(params->smtLSQThreshold > params->SQEntries); 93 94 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " 95 "%i entries per LQ \| %i entries per SQ\n", 96 maxLQEntries,maxSQEntries); 97 } else { 98 panic("Invalid LSQ sharing policy. Options are: Dynamic, " 99 "Partitioned, Threshold"); 100* } 101 102 thread.reserve(numThreads); 103 for (ThreadID tid = 0; tid < numThreads; tid++) { 104 thread.emplace_back(maxLQEntries, maxSQEntries); 105 thread[tid].init(cpu, iew_ptr, params, this, tid); 106 thread[tid].setDcachePort(&cpu_ptr->getDataPort()); 107 } 108} 109 110 111template<class Impl> 112std::string 113LSQ<Impl>::name() const 114{ 115 return iewStage->name() + ".lsq"; 116} 117 118template<class Impl> 119void 120LSQ<Impl>::regStats() 121{ 122 //Initialize LSQs 123 for (ThreadID tid = 0; tid < numThreads; tid++) { 124 thread[tid].regStats(); 125 } 126} 127 128template<class Impl> 129void 130LSQ<Impl>::setActiveThreads(list<ThreadID> at_ptr) 131{ 132* activeThreads = at_ptr; 133 assert(activeThreads != 0); 134} 135 136template <class Impl> 137void 138LSQ<Impl>::drainSanityCheck() const 139{ 140 assert(isDrained()); 141 142 for (ThreadID tid = 0; tid < numThreads; tid++) 143 thread[tid].drainSanityCheck(); 144} 145 146template <class Impl> 147bool 148LSQ<Impl>::isDrained() const 149{ 150 bool drained(true); 151 152 if (!lqEmpty()) { 153 DPRINTF(Drain, "Not drained, LQ not empty.\n"); 154 drained = false; 155 } 156 157 if (!sqEmpty()) { 158 DPRINTF(Drain, "Not drained, SQ not empty.\n"); 159 drained = false; 160 } 161 162 return drained; 163} 164 165template <class Impl> 166void 167LSQ<Impl>::takeOverFrom() 168{ 169 usedStorePorts = 0; 170 _cacheBlocked = false; 171 172 for (ThreadID tid = 0; tid < numThreads; tid++) { 173 thread[tid].takeOverFrom(); 174 } 175} 176 177template <class Impl> 178void 179LSQ<Impl>::tick() 180{ 181 // Re-issue loads which got blocked on the per-cycle load ports limit. 182 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked) 183 iewStage->cacheUnblocked(); 184 185 usedLoadPorts = 0; 186 usedStorePorts = 0; 187} 188 189template<class Impl> 190bool 191LSQ<Impl>::cacheBlocked() const 192{ 193 return _cacheBlocked; 194} 195 196template<class Impl> 197void 198LSQ<Impl>::cacheBlocked(bool v) 199{ 200 _cacheBlocked = v; 201} 202 203template<class Impl> 204bool 205LSQ<Impl>::cachePortAvailable(bool is_load) const 206{ 207 bool ret; 208 if (is_load) { 209 ret = usedLoadPorts < cacheLoadPorts; 210 } else { 211 ret = usedStorePorts < cacheStorePorts; 212 } 213 return ret; 214} 215 216template<class Impl> 217void 218LSQ<Impl>::cachePortBusy(bool is_load) 219{ 220 assert(cachePortAvailable(is_load)); 221 if (is_load) { 222 usedLoadPorts++; 223 } else { 224 usedStorePorts++; 225 } 226} 227 228template<class Impl> 229void 230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst) 231{ 232 ThreadID tid = load_inst->threadNumber; 233 234 thread[tid].insertLoad(load_inst); 235} 236 237template<class Impl> 238void 239LSQ<Impl>::insertStore(const DynInstPtr &store_inst) 240{ 241 ThreadID tid = store_inst->threadNumber; 242 243 thread[tid].insertStore(store_inst); 244} 245 246template<class Impl> 247Fault 248LSQ<Impl>::executeLoad(const DynInstPtr &inst) 249{ 250 ThreadID tid = inst->threadNumber; 251 252 return thread[tid].executeLoad(inst); 253} 254 255template<class Impl> 256Fault 257LSQ<Impl>::executeStore(const DynInstPtr &inst) 258{ 259 ThreadID tid = inst->threadNumber; 260 261 return thread[tid].executeStore(inst); 262} 263 264template<class Impl> 265void 266LSQ<Impl>::writebackStores() 267{ 268 list<ThreadID>::iterator threads = activeThreads->begin(); 269 list<ThreadID>::iterator end = activeThreads->end(); 270 271 while (threads != end) { 272 ThreadID tid = threads++; 273* 274 if (numStoresToWB(tid) > 0) { 275 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " 276 "available for Writeback.\n", tid, numStoresToWB(tid)); 277 } 278 279 thread[tid].writebackStores(); 280 } 281} 282 283template<class Impl> 284bool 285LSQ<Impl>::violation() 286{ 287 /* Answers: Does Anybody Have a Violation?/ 288* list<ThreadID>::iterator threads = activeThreads->begin(); 289 list<ThreadID>::iterator end = activeThreads->end(); 290 291 while (threads != end) { 292 ThreadID tid = threads++; 293* 294 if (thread[tid].violation()) 295 return true; 296 } 297 298 return false; 299} 300 301template <class Impl> 302void 303LSQ<Impl>::recvReqRetry() 304{ 305 iewStage->cacheUnblocked(); 306 cacheBlocked(false); 307 308 for (ThreadID tid : activeThreads) { 309* thread[tid].recvRetry(); 310 } 311} 312 313template <class Impl> 314void 315LSQ<Impl>::completeDataAccess(PacketPtr pkt) 316{ 317 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 318* thread[cpu->contextToThread(senderState->contextId())] 319 .completeDataAccess(pkt); 320} 321 322template <class Impl> 323bool 324LSQ<Impl>::recvTimingResp(PacketPtr pkt) 325{ 326 if (pkt->isError()) 327 DPRINTF(LSQ, "Got error packet back for address: %#X\n", 328 pkt->getAddr()); 329 330 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 331* panic_if(!senderState, "Got packet back with unknown sender state\n"); 332 333 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); 334 335 if (pkt->isInvalidate()) { 336 // This response also contains an invalidate; e.g. this can be the case 337 // if cmd is ReadRespWithInvalidate. 338 // 339 // The calling order between completeDataAccess and checkSnoop matters. 340 // By calling checkSnoop after completeDataAccess, we ensure that the 341 // fault set by checkSnoop is not lost. Calling writeback (more 342 // specifically inst->completeAcc) in completeDataAccess overwrites 343 // fault, and in case this instruction requires squashing (as 344 // determined by checkSnoop), the ReExec fault set by checkSnoop would 345 // be lost otherwise. 346 347 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", 348 pkt->getAddr()); 349 350 for (ThreadID tid = 0; tid < numThreads; tid++) { 351 thread[tid].checkSnoop(pkt); 352 } 353 } 354 // Update the LSQRequest state (this may delete the request) 355 senderState->request()->packetReplied(); 356 357 return true; 358} 359 360template <class Impl> 361void 362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt) 363{ 364 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), 365 pkt->cmdString()); 366 367 // must be a snoop 368 if (pkt->isInvalidate()) { 369 DPRINTF(LSQ, "received invalidation for addr:%#x\n", 370 pkt->getAddr()); 371 for (ThreadID tid = 0; tid < numThreads; tid++) { 372 thread[tid].checkSnoop(pkt); 373 } 374 } 375} 376 377template<class Impl> 378int 379LSQ<Impl>::getCount() 380{ 381 unsigned total = 0; 382 383 list<ThreadID>::iterator threads = activeThreads->begin(); 384 list<ThreadID>::iterator end = activeThreads->end(); 385 386 while (threads != end) { 387 ThreadID tid = threads++; 388* 389 total += getCount(tid); 390 } 391 392 return total; 393} 394 395template<class Impl> 396int 397LSQ<Impl>::numLoads() 398{ 399 unsigned total = 0; 400 401 list<ThreadID>::iterator threads = activeThreads->begin(); 402 list<ThreadID>::iterator end = activeThreads->end(); 403 404 while (threads != end) { 405 ThreadID tid = threads++; 406* 407 total += numLoads(tid); 408 } 409 410 return total; 411} 412 413template<class Impl> 414int 415LSQ<Impl>::numStores() 416{ 417 unsigned total = 0; 418 419 list<ThreadID>::iterator threads = activeThreads->begin(); 420 list<ThreadID>::iterator end = activeThreads->end(); 421 422 while (threads != end) { 423 ThreadID tid = threads++; 424* 425 total += thread[tid].numStores(); 426 } 427 428 return total; 429} 430 431template<class Impl> 432unsigned 433LSQ<Impl>::numFreeLoadEntries() 434{ 435 unsigned total = 0; 436 437 list<ThreadID>::iterator threads = activeThreads->begin(); 438 list<ThreadID>::iterator end = activeThreads->end(); 439 440 while (threads != end) { 441 ThreadID tid = threads++; 442* 443 total += thread[tid].numFreeLoadEntries(); 444 } 445 446 return total; 447} 448 449template<class Impl> 450unsigned 451LSQ<Impl>::numFreeStoreEntries() 452{ 453 unsigned total = 0; 454 455 list<ThreadID>::iterator threads = activeThreads->begin(); 456 list<ThreadID>::iterator end = activeThreads->end(); 457 458 while (threads != end) { 459 ThreadID tid = threads++; 460* 461 total += thread[tid].numFreeStoreEntries(); 462 } 463 464 return total; 465} 466 467template<class Impl> 468unsigned 469LSQ<Impl>::numFreeLoadEntries(ThreadID tid) 470{ 471 return thread[tid].numFreeLoadEntries(); 472} 473 474template<class Impl> 475unsigned 476LSQ<Impl>::numFreeStoreEntries(ThreadID tid) 477{ 478 return thread[tid].numFreeStoreEntries(); 479} 480 481template<class Impl> 482bool 483LSQ<Impl>::isFull() 484{ 485 list<ThreadID>::iterator threads = activeThreads->begin(); 486 list<ThreadID>::iterator end = activeThreads->end(); 487 488 while (threads != end) { 489 ThreadID tid = threads++; 490* 491 if (!(thread[tid].lqFull() \|\| thread[tid].sqFull())) 492 return false; 493 } 494 495 return true; 496} 497 498template<class Impl> 499bool 500LSQ<Impl>::isFull(ThreadID tid) 501{ 502 //@todo: Change to Calculate All Entries for 503 //Dynamic Policy 504 if (lsqPolicy == SMTQueuePolicy::Dynamic) 505 return isFull(); 506 else 507 return thread[tid].lqFull() \|\| thread[tid].sqFull(); 508} 509 510template<class Impl> 511bool 512LSQ<Impl>::isEmpty() const 513{ 514 return lqEmpty() && sqEmpty(); 515} 516 517template<class Impl> 518bool 519LSQ<Impl>::lqEmpty() const 520{ 521 list<ThreadID>::const_iterator threads = activeThreads->begin(); 522 list<ThreadID>::const_iterator end = activeThreads->end(); 523 524 while (threads != end) { 525 ThreadID tid = threads++; 526* 527 if (!thread[tid].lqEmpty()) 528 return false; 529 } 530 531 return true; 532} 533 534template<class Impl> 535bool 536LSQ<Impl>::sqEmpty() const 537{ 538 list<ThreadID>::const_iterator threads = activeThreads->begin(); 539 list<ThreadID>::const_iterator end = activeThreads->end(); 540 541 while (threads != end) { 542 ThreadID tid = threads++; 543* 544 if (!thread[tid].sqEmpty()) 545 return false; 546 } 547 548 return true; 549} 550 551template<class Impl> 552bool 553LSQ<Impl>::lqFull() 554{ 555 list<ThreadID>::iterator threads = activeThreads->begin(); 556 list<ThreadID>::iterator end = activeThreads->end(); 557 558 while (threads != end) { 559 ThreadID tid = threads++; 560* 561 if (!thread[tid].lqFull()) 562 return false; 563 } 564 565 return true; 566} 567 568template<class Impl> 569bool 570LSQ<Impl>::lqFull(ThreadID tid) 571{ 572 //@todo: Change to Calculate All Entries for 573 //Dynamic Policy 574 if (lsqPolicy == SMTQueuePolicy::Dynamic) 575 return lqFull(); 576 else 577 return thread[tid].lqFull(); 578} 579 580template<class Impl> 581bool 582LSQ<Impl>::sqFull() 583{ 584 list<ThreadID>::iterator threads = activeThreads->begin(); 585 list<ThreadID>::iterator end = activeThreads->end(); 586 587 while (threads != end) { 588 ThreadID tid = threads++; 589* 590 if (!sqFull(tid)) 591 return false; 592 } 593 594 return true; 595} 596 597template<class Impl> 598bool 599LSQ<Impl>::sqFull(ThreadID tid) 600{ 601 //@todo: Change to Calculate All Entries for 602 //Dynamic Policy 603 if (lsqPolicy == SMTQueuePolicy::Dynamic) 604 return sqFull(); 605 else 606 return thread[tid].sqFull(); 607} 608 609template<class Impl> 610bool 611LSQ<Impl>::isStalled() 612{ 613 list<ThreadID>::iterator threads = activeThreads->begin(); 614 list<ThreadID>::iterator end = activeThreads->end(); 615 616 while (threads != end) { 617 ThreadID tid = threads++; 618* 619 if (!thread[tid].isStalled()) 620 return false; 621 } 622 623 return true; 624} 625 626template<class Impl> 627bool 628LSQ<Impl>::isStalled(ThreadID tid) 629{ 630 if (lsqPolicy == SMTQueuePolicy::Dynamic) 631 return isStalled(); 632 else 633 return thread[tid].isStalled(); 634} 635 636template<class Impl> 637bool 638LSQ<Impl>::hasStoresToWB() 639{ 640 list<ThreadID>::iterator threads = activeThreads->begin(); 641 list<ThreadID>::iterator end = activeThreads->end(); 642 643 while (threads != end) { 644 ThreadID tid = threads++; 645* 646 if (hasStoresToWB(tid)) 647 return true; 648 } 649 650 return false; 651} 652 653template<class Impl> 654bool 655LSQ<Impl>::willWB() 656{ 657 list<ThreadID>::iterator threads = activeThreads->begin(); 658 list<ThreadID>::iterator end = activeThreads->end(); 659 660 while (threads != end) { 661 ThreadID tid = threads++; 662* 663 if (willWB(tid)) 664 return true; 665 } 666 667 return false; 668} 669 670template<class Impl> 671void 672LSQ<Impl>::dumpInsts() const 673{ 674 list<ThreadID>::const_iterator threads = activeThreads->begin(); 675 list<ThreadID>::const_iterator end = activeThreads->end(); 676 677 while (threads != end) { 678 ThreadID tid = threads++; 679* 680 thread[tid].dumpInsts(); 681 } 682} 683 684template<class Impl> 685Fault 686LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t data, 687* unsigned int size, Addr addr, Request::Flags flags, 688 uint64_t res, AtomicOpFunctor amo_op, 689 const std::vector<bool>& byteEnable) 690{ 691 // This comming request can be either load, store or atomic. 692 // Atomic request has a corresponding pointer to its atomic memory 693 // operation 694 bool isAtomic M5_VAR_USED = !isLoad && amo_op; 695 696 ThreadID tid = cpu->contextToThread(inst->contextId()); 697 auto cacheLineSize = cpu->cacheLineSize(); 698 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); 699 LSQRequest* req = nullptr; 700 701 // Atomic requests that access data across cache line boundary are 702 // currently not allowed since the cache does not guarantee corresponding 703 // atomic memory operations to be executed atomically across a cache line. 704 // For ISAs such as x86 that supports cross-cache-line atomic instructions, 705 // the cache needs to be modified to perform atomic update to both cache 706 // lines. For now, such cross-line update is not supported. 707 assert(!isAtomic \|\| (isAtomic && !needs_burst)); 708 709 if (inst->translationStarted()) { 710 req = inst->savedReq; 711 assert(req); 712 } else { 713 if (needs_burst) { 714 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, 715 size, flags, data, res); 716 } else { 717 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, 718 size, flags, data, res, amo_op); 719 } 720 assert(req); 721 if (!byteEnable.empty()) { 722 req->_byteEnable = byteEnable; 723 } 724 inst->setRequest(); 725 req->taskId(cpu->taskId()); 726	1/* 2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2005-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 / 43 44#ifndef __CPU_O3_LSQ_IMPL_HH__ 45#define __CPU_O3_LSQ_IMPL_HH__ 46 47#include <algorithm> 48#include <list> 49#include <string> 50 51#include "base/logging.hh" 52#include "cpu/o3/lsq.hh" 53#include "debug/Drain.hh" 54#include "debug/Fetch.hh" 55#include "debug/LSQ.hh" 56#include "debug/Writeback.hh" 57#include "params/DerivO3CPU.hh" 58 59using namespace std; 60 61template <class Impl> 62LSQ<Impl>::LSQ(O3CPU cpu_ptr, IEW iew_ptr, DerivO3CPUParams params) 63 : cpu(cpu_ptr), iewStage(iew_ptr), 64 _cacheBlocked(false), 65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0), 66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0), 67 lsqPolicy(params->smtLSQPolicy), 68 LQEntries(params->LQEntries), 69 SQEntries(params->SQEntries), 70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, 71 params->smtLSQThreshold)), 72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, 73 params->smtLSQThreshold)), 74 numThreads(params->numThreads) 75{ 76 assert(numThreads > 0 && numThreads <= Impl::MaxThreads); 77 78 //********************************************/ 79 //******** Handle SMT Parameters *******/ 80 //*******************************************/ 81 82 / Run SMT olicy checks. / 83 if (lsqPolicy == SMTQueuePolicy::Dynamic) { 84 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); 85 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { 86 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " 87 "%i entries per LQ \| %i entries per SQ\n", 88 maxLQEntries,maxSQEntries); 89 } else if (lsqPolicy == SMTQueuePolicy::Threshold) { 90 91 assert(params->smtLSQThreshold > params->LQEntries); 92 assert(params->smtLSQThreshold > params->SQEntries); 93 94 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " 95 "%i entries per LQ \| %i entries per SQ\n", 96 maxLQEntries,maxSQEntries); 97 } else { 98 panic("Invalid LSQ sharing policy. Options are: Dynamic, " 99 "Partitioned, Threshold"); 100* } 101 102 thread.reserve(numThreads); 103 for (ThreadID tid = 0; tid < numThreads; tid++) { 104 thread.emplace_back(maxLQEntries, maxSQEntries); 105 thread[tid].init(cpu, iew_ptr, params, this, tid); 106 thread[tid].setDcachePort(&cpu_ptr->getDataPort()); 107 } 108} 109 110 111template<class Impl> 112std::string 113LSQ<Impl>::name() const 114{ 115 return iewStage->name() + ".lsq"; 116} 117 118template<class Impl> 119void 120LSQ<Impl>::regStats() 121{ 122 //Initialize LSQs 123 for (ThreadID tid = 0; tid < numThreads; tid++) { 124 thread[tid].regStats(); 125 } 126} 127 128template<class Impl> 129void 130LSQ<Impl>::setActiveThreads(list<ThreadID> at_ptr) 131{ 132* activeThreads = at_ptr; 133 assert(activeThreads != 0); 134} 135 136template <class Impl> 137void 138LSQ<Impl>::drainSanityCheck() const 139{ 140 assert(isDrained()); 141 142 for (ThreadID tid = 0; tid < numThreads; tid++) 143 thread[tid].drainSanityCheck(); 144} 145 146template <class Impl> 147bool 148LSQ<Impl>::isDrained() const 149{ 150 bool drained(true); 151 152 if (!lqEmpty()) { 153 DPRINTF(Drain, "Not drained, LQ not empty.\n"); 154 drained = false; 155 } 156 157 if (!sqEmpty()) { 158 DPRINTF(Drain, "Not drained, SQ not empty.\n"); 159 drained = false; 160 } 161 162 return drained; 163} 164 165template <class Impl> 166void 167LSQ<Impl>::takeOverFrom() 168{ 169 usedStorePorts = 0; 170 _cacheBlocked = false; 171 172 for (ThreadID tid = 0; tid < numThreads; tid++) { 173 thread[tid].takeOverFrom(); 174 } 175} 176 177template <class Impl> 178void 179LSQ<Impl>::tick() 180{ 181 // Re-issue loads which got blocked on the per-cycle load ports limit. 182 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked) 183 iewStage->cacheUnblocked(); 184 185 usedLoadPorts = 0; 186 usedStorePorts = 0; 187} 188 189template<class Impl> 190bool 191LSQ<Impl>::cacheBlocked() const 192{ 193 return _cacheBlocked; 194} 195 196template<class Impl> 197void 198LSQ<Impl>::cacheBlocked(bool v) 199{ 200 _cacheBlocked = v; 201} 202 203template<class Impl> 204bool 205LSQ<Impl>::cachePortAvailable(bool is_load) const 206{ 207 bool ret; 208 if (is_load) { 209 ret = usedLoadPorts < cacheLoadPorts; 210 } else { 211 ret = usedStorePorts < cacheStorePorts; 212 } 213 return ret; 214} 215 216template<class Impl> 217void 218LSQ<Impl>::cachePortBusy(bool is_load) 219{ 220 assert(cachePortAvailable(is_load)); 221 if (is_load) { 222 usedLoadPorts++; 223 } else { 224 usedStorePorts++; 225 } 226} 227 228template<class Impl> 229void 230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst) 231{ 232 ThreadID tid = load_inst->threadNumber; 233 234 thread[tid].insertLoad(load_inst); 235} 236 237template<class Impl> 238void 239LSQ<Impl>::insertStore(const DynInstPtr &store_inst) 240{ 241 ThreadID tid = store_inst->threadNumber; 242 243 thread[tid].insertStore(store_inst); 244} 245 246template<class Impl> 247Fault 248LSQ<Impl>::executeLoad(const DynInstPtr &inst) 249{ 250 ThreadID tid = inst->threadNumber; 251 252 return thread[tid].executeLoad(inst); 253} 254 255template<class Impl> 256Fault 257LSQ<Impl>::executeStore(const DynInstPtr &inst) 258{ 259 ThreadID tid = inst->threadNumber; 260 261 return thread[tid].executeStore(inst); 262} 263 264template<class Impl> 265void 266LSQ<Impl>::writebackStores() 267{ 268 list<ThreadID>::iterator threads = activeThreads->begin(); 269 list<ThreadID>::iterator end = activeThreads->end(); 270 271 while (threads != end) { 272 ThreadID tid = threads++; 273* 274 if (numStoresToWB(tid) > 0) { 275 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " 276 "available for Writeback.\n", tid, numStoresToWB(tid)); 277 } 278 279 thread[tid].writebackStores(); 280 } 281} 282 283template<class Impl> 284bool 285LSQ<Impl>::violation() 286{ 287 /* Answers: Does Anybody Have a Violation?/ 288* list<ThreadID>::iterator threads = activeThreads->begin(); 289 list<ThreadID>::iterator end = activeThreads->end(); 290 291 while (threads != end) { 292 ThreadID tid = threads++; 293* 294 if (thread[tid].violation()) 295 return true; 296 } 297 298 return false; 299} 300 301template <class Impl> 302void 303LSQ<Impl>::recvReqRetry() 304{ 305 iewStage->cacheUnblocked(); 306 cacheBlocked(false); 307 308 for (ThreadID tid : activeThreads) { 309* thread[tid].recvRetry(); 310 } 311} 312 313template <class Impl> 314void 315LSQ<Impl>::completeDataAccess(PacketPtr pkt) 316{ 317 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 318* thread[cpu->contextToThread(senderState->contextId())] 319 .completeDataAccess(pkt); 320} 321 322template <class Impl> 323bool 324LSQ<Impl>::recvTimingResp(PacketPtr pkt) 325{ 326 if (pkt->isError()) 327 DPRINTF(LSQ, "Got error packet back for address: %#X\n", 328 pkt->getAddr()); 329 330 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 331* panic_if(!senderState, "Got packet back with unknown sender state\n"); 332 333 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); 334 335 if (pkt->isInvalidate()) { 336 // This response also contains an invalidate; e.g. this can be the case 337 // if cmd is ReadRespWithInvalidate. 338 // 339 // The calling order between completeDataAccess and checkSnoop matters. 340 // By calling checkSnoop after completeDataAccess, we ensure that the 341 // fault set by checkSnoop is not lost. Calling writeback (more 342 // specifically inst->completeAcc) in completeDataAccess overwrites 343 // fault, and in case this instruction requires squashing (as 344 // determined by checkSnoop), the ReExec fault set by checkSnoop would 345 // be lost otherwise. 346 347 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", 348 pkt->getAddr()); 349 350 for (ThreadID tid = 0; tid < numThreads; tid++) { 351 thread[tid].checkSnoop(pkt); 352 } 353 } 354 // Update the LSQRequest state (this may delete the request) 355 senderState->request()->packetReplied(); 356 357 return true; 358} 359 360template <class Impl> 361void 362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt) 363{ 364 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), 365 pkt->cmdString()); 366 367 // must be a snoop 368 if (pkt->isInvalidate()) { 369 DPRINTF(LSQ, "received invalidation for addr:%#x\n", 370 pkt->getAddr()); 371 for (ThreadID tid = 0; tid < numThreads; tid++) { 372 thread[tid].checkSnoop(pkt); 373 } 374 } 375} 376 377template<class Impl> 378int 379LSQ<Impl>::getCount() 380{ 381 unsigned total = 0; 382 383 list<ThreadID>::iterator threads = activeThreads->begin(); 384 list<ThreadID>::iterator end = activeThreads->end(); 385 386 while (threads != end) { 387 ThreadID tid = threads++; 388* 389 total += getCount(tid); 390 } 391 392 return total; 393} 394 395template<class Impl> 396int 397LSQ<Impl>::numLoads() 398{ 399 unsigned total = 0; 400 401 list<ThreadID>::iterator threads = activeThreads->begin(); 402 list<ThreadID>::iterator end = activeThreads->end(); 403 404 while (threads != end) { 405 ThreadID tid = threads++; 406* 407 total += numLoads(tid); 408 } 409 410 return total; 411} 412 413template<class Impl> 414int 415LSQ<Impl>::numStores() 416{ 417 unsigned total = 0; 418 419 list<ThreadID>::iterator threads = activeThreads->begin(); 420 list<ThreadID>::iterator end = activeThreads->end(); 421 422 while (threads != end) { 423 ThreadID tid = threads++; 424* 425 total += thread[tid].numStores(); 426 } 427 428 return total; 429} 430 431template<class Impl> 432unsigned 433LSQ<Impl>::numFreeLoadEntries() 434{ 435 unsigned total = 0; 436 437 list<ThreadID>::iterator threads = activeThreads->begin(); 438 list<ThreadID>::iterator end = activeThreads->end(); 439 440 while (threads != end) { 441 ThreadID tid = threads++; 442* 443 total += thread[tid].numFreeLoadEntries(); 444 } 445 446 return total; 447} 448 449template<class Impl> 450unsigned 451LSQ<Impl>::numFreeStoreEntries() 452{ 453 unsigned total = 0; 454 455 list<ThreadID>::iterator threads = activeThreads->begin(); 456 list<ThreadID>::iterator end = activeThreads->end(); 457 458 while (threads != end) { 459 ThreadID tid = threads++; 460* 461 total += thread[tid].numFreeStoreEntries(); 462 } 463 464 return total; 465} 466 467template<class Impl> 468unsigned 469LSQ<Impl>::numFreeLoadEntries(ThreadID tid) 470{ 471 return thread[tid].numFreeLoadEntries(); 472} 473 474template<class Impl> 475unsigned 476LSQ<Impl>::numFreeStoreEntries(ThreadID tid) 477{ 478 return thread[tid].numFreeStoreEntries(); 479} 480 481template<class Impl> 482bool 483LSQ<Impl>::isFull() 484{ 485 list<ThreadID>::iterator threads = activeThreads->begin(); 486 list<ThreadID>::iterator end = activeThreads->end(); 487 488 while (threads != end) { 489 ThreadID tid = threads++; 490* 491 if (!(thread[tid].lqFull() \|\| thread[tid].sqFull())) 492 return false; 493 } 494 495 return true; 496} 497 498template<class Impl> 499bool 500LSQ<Impl>::isFull(ThreadID tid) 501{ 502 //@todo: Change to Calculate All Entries for 503 //Dynamic Policy 504 if (lsqPolicy == SMTQueuePolicy::Dynamic) 505 return isFull(); 506 else 507 return thread[tid].lqFull() \|\| thread[tid].sqFull(); 508} 509 510template<class Impl> 511bool 512LSQ<Impl>::isEmpty() const 513{ 514 return lqEmpty() && sqEmpty(); 515} 516 517template<class Impl> 518bool 519LSQ<Impl>::lqEmpty() const 520{ 521 list<ThreadID>::const_iterator threads = activeThreads->begin(); 522 list<ThreadID>::const_iterator end = activeThreads->end(); 523 524 while (threads != end) { 525 ThreadID tid = threads++; 526* 527 if (!thread[tid].lqEmpty()) 528 return false; 529 } 530 531 return true; 532} 533 534template<class Impl> 535bool 536LSQ<Impl>::sqEmpty() const 537{ 538 list<ThreadID>::const_iterator threads = activeThreads->begin(); 539 list<ThreadID>::const_iterator end = activeThreads->end(); 540 541 while (threads != end) { 542 ThreadID tid = threads++; 543* 544 if (!thread[tid].sqEmpty()) 545 return false; 546 } 547 548 return true; 549} 550 551template<class Impl> 552bool 553LSQ<Impl>::lqFull() 554{ 555 list<ThreadID>::iterator threads = activeThreads->begin(); 556 list<ThreadID>::iterator end = activeThreads->end(); 557 558 while (threads != end) { 559 ThreadID tid = threads++; 560* 561 if (!thread[tid].lqFull()) 562 return false; 563 } 564 565 return true; 566} 567 568template<class Impl> 569bool 570LSQ<Impl>::lqFull(ThreadID tid) 571{ 572 //@todo: Change to Calculate All Entries for 573 //Dynamic Policy 574 if (lsqPolicy == SMTQueuePolicy::Dynamic) 575 return lqFull(); 576 else 577 return thread[tid].lqFull(); 578} 579 580template<class Impl> 581bool 582LSQ<Impl>::sqFull() 583{ 584 list<ThreadID>::iterator threads = activeThreads->begin(); 585 list<ThreadID>::iterator end = activeThreads->end(); 586 587 while (threads != end) { 588 ThreadID tid = threads++; 589* 590 if (!sqFull(tid)) 591 return false; 592 } 593 594 return true; 595} 596 597template<class Impl> 598bool 599LSQ<Impl>::sqFull(ThreadID tid) 600{ 601 //@todo: Change to Calculate All Entries for 602 //Dynamic Policy 603 if (lsqPolicy == SMTQueuePolicy::Dynamic) 604 return sqFull(); 605 else 606 return thread[tid].sqFull(); 607} 608 609template<class Impl> 610bool 611LSQ<Impl>::isStalled() 612{ 613 list<ThreadID>::iterator threads = activeThreads->begin(); 614 list<ThreadID>::iterator end = activeThreads->end(); 615 616 while (threads != end) { 617 ThreadID tid = threads++; 618* 619 if (!thread[tid].isStalled()) 620 return false; 621 } 622 623 return true; 624} 625 626template<class Impl> 627bool 628LSQ<Impl>::isStalled(ThreadID tid) 629{ 630 if (lsqPolicy == SMTQueuePolicy::Dynamic) 631 return isStalled(); 632 else 633 return thread[tid].isStalled(); 634} 635 636template<class Impl> 637bool 638LSQ<Impl>::hasStoresToWB() 639{ 640 list<ThreadID>::iterator threads = activeThreads->begin(); 641 list<ThreadID>::iterator end = activeThreads->end(); 642 643 while (threads != end) { 644 ThreadID tid = threads++; 645* 646 if (hasStoresToWB(tid)) 647 return true; 648 } 649 650 return false; 651} 652 653template<class Impl> 654bool 655LSQ<Impl>::willWB() 656{ 657 list<ThreadID>::iterator threads = activeThreads->begin(); 658 list<ThreadID>::iterator end = activeThreads->end(); 659 660 while (threads != end) { 661 ThreadID tid = threads++; 662* 663 if (willWB(tid)) 664 return true; 665 } 666 667 return false; 668} 669 670template<class Impl> 671void 672LSQ<Impl>::dumpInsts() const 673{ 674 list<ThreadID>::const_iterator threads = activeThreads->begin(); 675 list<ThreadID>::const_iterator end = activeThreads->end(); 676 677 while (threads != end) { 678 ThreadID tid = threads++; 679* 680 thread[tid].dumpInsts(); 681 } 682} 683 684template<class Impl> 685Fault 686LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t data, 687* unsigned int size, Addr addr, Request::Flags flags, 688 uint64_t res, AtomicOpFunctor amo_op, 689 const std::vector<bool>& byteEnable) 690{ 691 // This comming request can be either load, store or atomic. 692 // Atomic request has a corresponding pointer to its atomic memory 693 // operation 694 bool isAtomic M5_VAR_USED = !isLoad && amo_op; 695 696 ThreadID tid = cpu->contextToThread(inst->contextId()); 697 auto cacheLineSize = cpu->cacheLineSize(); 698 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); 699 LSQRequest* req = nullptr; 700 701 // Atomic requests that access data across cache line boundary are 702 // currently not allowed since the cache does not guarantee corresponding 703 // atomic memory operations to be executed atomically across a cache line. 704 // For ISAs such as x86 that supports cross-cache-line atomic instructions, 705 // the cache needs to be modified to perform atomic update to both cache 706 // lines. For now, such cross-line update is not supported. 707 assert(!isAtomic \|\| (isAtomic && !needs_burst)); 708 709 if (inst->translationStarted()) { 710 req = inst->savedReq; 711 assert(req); 712 } else { 713 if (needs_burst) { 714 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, 715 size, flags, data, res); 716 } else { 717 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, 718 size, flags, data, res, amo_op); 719 } 720 assert(req); 721 if (!byteEnable.empty()) { 722 req->_byteEnable = byteEnable; 723 } 724 inst->setRequest(); 725 req->taskId(cpu->taskId()); 726
	727 // There might be fault from a previous execution attempt if this is 728 // a strictly ordered load 729 inst->getFault() = NoFault; 730
727 req->initiateTranslation(); 728 } 729 730 /* This is the place were instructions get the effAddr. / 731* if (req->isTranslationComplete()) { 732 if (inst->getFault() == NoFault) { 733 inst->effAddr = req->getVaddr(); 734 inst->effSize = size; 735 inst->effAddrValid(true); 736 737 if (cpu->checker) { 738 inst->reqToVerify = std::make_shared<Request>(req->request()); 739* } 740 if (isLoad) 741 inst->getFault() = cpu->read(req, inst->lqIdx); 742 else 743 inst->getFault() = cpu->write(req, data, inst->sqIdx); 744 } else if (isLoad) { 745 inst->setMemAccPredicate(false); 746 // Commit will have to clean up whatever happened. Set this 747 // instruction as executed. 748 inst->setExecuted(); 749 } 750 } 751 752 if (inst->traceData) 753 inst->traceData->setMem(addr, size, flags); 754 755 return inst->getFault(); 756} 757 758template<class Impl> 759void 760LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, 761 ThreadContext* tc, BaseTLB::Mode mode) 762{ 763 _fault.push_back(fault); 764 numInTranslationFragments = 0; 765 numTranslatedFragments = 1; 766 /* If the instruction has been squahsed, let the request know 767 * as it may have to self-destruct. / 768* if (_inst->isSquashed()) { 769 this->squashTranslation(); 770 } else { 771 _inst->strictlyOrdered(req->isStrictlyOrdered()); 772 773 flags.set(Flag::TranslationFinished); 774 if (fault == NoFault) { 775 _inst->physEffAddr = req->getPaddr(); 776 _inst->memReqFlags = req->getFlags(); 777 if (req->isCondSwap()) { 778 assert(_res); 779 req->setExtraData(_res); 780* } 781 setState(State::Request); 782 } else { 783 setState(State::Fault); 784 } 785 786 LSQRequest::_inst->fault = fault; 787 LSQRequest::_inst->translationCompleted(true); 788 } 789} 790 791template<class Impl> 792void 793LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, 794 ThreadContext* tc, BaseTLB::Mode mode) 795{ 796 _fault.push_back(fault); 797 assert(req == _requests[numTranslatedFragments] \|\| this->isDelayed()); 798 799 numInTranslationFragments--; 800 numTranslatedFragments++; 801 802 mainReq->setFlags(req->getFlags()); 803 804 if (numTranslatedFragments == _requests.size()) { 805 if (_inst->isSquashed()) { 806 this->squashTranslation(); 807 } else { 808 _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); 809 flags.set(Flag::TranslationFinished); 810 auto fault_it = _fault.begin(); 811 /* Ffwd to the first NoFault. / 812* while (fault_it != _fault.end() && fault_it == NoFault) 813* fault_it++; 814 /* If none of the fragments faulted: / 815* if (fault_it == _fault.end()) { 816 _inst->physEffAddr = request(0)->getPaddr(); 817 818 _inst->memReqFlags = mainReq->getFlags(); 819 if (mainReq->isCondSwap()) { 820 assert(_res); 821 mainReq->setExtraData(_res); 822* } 823 setState(State::Request); 824 _inst->fault = NoFault; 825 } else { 826 setState(State::Fault); 827 _inst->fault = fault_it; 828* } 829 _inst->translationCompleted(true); 830 } 831 } 832} 833 834template<class Impl> 835void 836LSQ<Impl>::SingleDataRequest::initiateTranslation() 837{ 838 assert(_requests.size() == 0); 839 840 this->addRequest(_addr, _size, _byteEnable); 841 842 if (_requests.size() > 0) { 843 _requests.back()->setReqInstSeqNum(_inst->seqNum); 844 _requests.back()->taskId(_taskId); 845 _inst->translationStarted(true); 846 setState(State::Translation); 847 flags.set(Flag::TranslationStarted); 848 849 _inst->savedReq = this; 850 sendFragmentToTranslation(0); 851 } else { 852 _inst->setMemAccPredicate(false); 853 } 854} 855 856template<class Impl> 857PacketPtr 858LSQ<Impl>::SplitDataRequest::mainPacket() 859{ 860 return _mainPacket; 861} 862 863template<class Impl> 864RequestPtr 865LSQ<Impl>::SplitDataRequest::mainRequest() 866{ 867 return mainReq; 868} 869 870template<class Impl> 871void 872LSQ<Impl>::SplitDataRequest::initiateTranslation() 873{ 874 auto cacheLineSize = _port.cacheLineSize(); 875 Addr base_addr = _addr; 876 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); 877 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); 878 uint32_t size_so_far = 0; 879 880 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, 881 _size, _flags, _inst->masterId(), 882 _inst->instAddr(), _inst->contextId()); 883 if (!_byteEnable.empty()) { 884 mainReq->setByteEnable(_byteEnable); 885 } 886 887 // Paddr is not used in mainReq. However, we will accumulate the flags 888 // from the sub requests into mainReq by calling setFlags() in finish(). 889 // setFlags() assumes that paddr is set so flip the paddr valid bit here to 890 // avoid a potential assert in setFlags() when we call it from finish(). 891 mainReq->setPaddr(0); 892 893 /* Get the pre-fix, possibly unaligned. / 894* if (_byteEnable.empty()) { 895 this->addRequest(base_addr, next_addr - base_addr, _byteEnable); 896 } else { 897 auto it_start = _byteEnable.begin(); 898 auto it_end = _byteEnable.begin() + (next_addr - base_addr); 899 this->addRequest(base_addr, next_addr - base_addr, 900 std::vector<bool>(it_start, it_end)); 901 } 902 size_so_far = next_addr - base_addr; 903 904 /* We are block aligned now, reading whole blocks. / 905* base_addr = next_addr; 906 while (base_addr != final_addr) { 907 if (_byteEnable.empty()) { 908 this->addRequest(base_addr, cacheLineSize, _byteEnable); 909 } else { 910 auto it_start = _byteEnable.begin() + size_so_far; 911 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize; 912 this->addRequest(base_addr, cacheLineSize, 913 std::vector<bool>(it_start, it_end)); 914 } 915 size_so_far += cacheLineSize; 916 base_addr += cacheLineSize; 917 } 918 919 /* Deal with the tail. / 920* if (size_so_far < _size) { 921 if (_byteEnable.empty()) { 922 this->addRequest(base_addr, _size - size_so_far, _byteEnable); 923 } else { 924 auto it_start = _byteEnable.begin() + size_so_far; 925 auto it_end = _byteEnable.end(); 926 this->addRequest(base_addr, _size - size_so_far, 927 std::vector<bool>(it_start, it_end)); 928 } 929 } 930 931 if (_requests.size() > 0) { 932 /* Setup the requests and send them to translation. / 933* for (auto& r: _requests) { 934 r->setReqInstSeqNum(_inst->seqNum); 935 r->taskId(_taskId); 936 } 937 938 _inst->translationStarted(true); 939 setState(State::Translation); 940 flags.set(Flag::TranslationStarted); 941 this->_inst->savedReq = this; 942 numInTranslationFragments = 0; 943 numTranslatedFragments = 0; 944 _fault.resize(_requests.size()); 945 946 for (uint32_t i = 0; i < _requests.size(); i++) { 947 sendFragmentToTranslation(i); 948 } 949 } else { 950 _inst->setMemAccPredicate(false); 951 } 952} 953 954template<class Impl> 955void 956LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i) 957{ 958 numInTranslationFragments++; 959 _port.dTLB()->translateTiming( 960 this->request(i), 961 this->_inst->thread->getTC(), this, 962 this->isLoad() ? BaseTLB::Read : BaseTLB::Write); 963} 964 965template<class Impl> 966bool 967LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt) 968{ 969 assert(_numOutstandingPackets == 1); 970 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 971* setState(State::Complete); 972 flags.set(Flag::Complete); 973 state->outstanding--; 974 assert(pkt == _packets.front()); 975 _port.completeDataAccess(pkt); 976 return true; 977} 978 979template<class Impl> 980bool 981LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) 982{ 983 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 984* uint32_t pktIdx = 0; 985 while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) 986 pktIdx++; 987 assert(pktIdx < _packets.size()); 988 numReceivedPackets++; 989 state->outstanding--; 990 if (numReceivedPackets == _packets.size()) { 991 setState(State::Complete); 992 flags.set(Flag::Complete); 993 /* Assemble packets. / 994* PacketPtr resp = isLoad() 995 ? Packet::createRead(mainReq) 996 : Packet::createWrite(mainReq); 997 if (isLoad()) 998 resp->dataStatic(_inst->memData); 999 else 1000 resp->dataStatic(_data); 1001 resp->senderState = _senderState; 1002 _port.completeDataAccess(resp); 1003 delete resp; 1004 } 1005 return true; 1006} 1007 1008template<class Impl> 1009void 1010LSQ<Impl>::SingleDataRequest::buildPackets() 1011{ 1012 assert(_senderState); 1013 /* Retries do not create new packets. / 1014* if (_packets.size() == 0) { 1015 _packets.push_back( 1016 isLoad() 1017 ? Packet::createRead(request()) 1018 : Packet::createWrite(request())); 1019 _packets.back()->dataStatic(_inst->memData); 1020 _packets.back()->senderState = _senderState; 1021 } 1022 assert(_packets.size() == 1); 1023} 1024 1025template<class Impl> 1026void 1027LSQ<Impl>::SplitDataRequest::buildPackets() 1028{ 1029 /* Extra data?? / 1030* Addr base_address = _addr; 1031 1032 if (_packets.size() == 0) { 1033 /* New stuff / 1034* if (isLoad()) { 1035 _mainPacket = Packet::createRead(mainReq); 1036 _mainPacket->dataStatic(_inst->memData); 1037 } 1038 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) { 1039 RequestPtr r = _requests[i]; 1040 PacketPtr pkt = isLoad() ? Packet::createRead(r) 1041 : Packet::createWrite(r); 1042 ptrdiff_t offset = r->getVaddr() - base_address; 1043 if (isLoad()) { 1044 pkt->dataStatic(_inst->memData + offset); 1045 } else { 1046 uint8_t* req_data = new uint8_t[r->getSize()]; 1047 std::memcpy(req_data, 1048 _inst->memData + offset, 1049 r->getSize()); 1050 pkt->dataDynamic(req_data); 1051 } 1052 pkt->senderState = _senderState; 1053 _packets.push_back(pkt); 1054 } 1055 } 1056 assert(_packets.size() > 0); 1057} 1058 1059template<class Impl> 1060void 1061LSQ<Impl>::SingleDataRequest::sendPacketToCache() 1062{ 1063 assert(_numOutstandingPackets == 0); 1064 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) 1065 _numOutstandingPackets = 1; 1066} 1067 1068template<class Impl> 1069void 1070LSQ<Impl>::SplitDataRequest::sendPacketToCache() 1071{ 1072 /* Try to send the packets. / 1073* while (numReceivedPackets + _numOutstandingPackets < _packets.size() && 1074 lsqUnit()->trySendPacket(isLoad(), 1075 _packets.at(numReceivedPackets + _numOutstandingPackets))) { 1076 _numOutstandingPackets++; 1077 } 1078} 1079 1080template<class Impl> 1081void 1082LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext thread, 1083* PacketPtr pkt) 1084{ 1085 TheISA::handleIprWrite(thread, pkt); 1086} 1087 1088template<class Impl> 1089void 1090LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext thread, 1091* PacketPtr mainPkt) 1092{ 1093 unsigned offset = 0; 1094 for (auto r: _requests) { 1095 PacketPtr pkt = new Packet(r, MemCmd::WriteReq); 1096 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1097 TheISA::handleIprWrite(thread, pkt); 1098 offset += r->getSize(); 1099 delete pkt; 1100 } 1101} 1102 1103template<class Impl> 1104Cycles 1105LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext thread, 1106* PacketPtr pkt) 1107{ 1108 return TheISA::handleIprRead(thread, pkt); 1109} 1110 1111template<class Impl> 1112Cycles 1113LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext thread, 1114* PacketPtr mainPkt) 1115{ 1116 Cycles delay(0); 1117 unsigned offset = 0; 1118 1119 for (auto r: _requests) { 1120 PacketPtr pkt = new Packet(r, MemCmd::ReadReq); 1121 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1122 Cycles d = TheISA::handleIprRead(thread, pkt); 1123 if (d > delay) 1124 delay = d; 1125 offset += r->getSize(); 1126 delete pkt; 1127 } 1128 return delay; 1129} 1130 1131template<class Impl> 1132bool 1133LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1134{ 1135 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); 1136} 1137 1138template<class Impl> 1139bool 1140LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1141{ 1142 bool is_hit = false; 1143 for (auto &r: _requests) { 1144 if ((r->getPaddr() & blockMask) == blockAddr) { 1145 is_hit = true; 1146 break; 1147 } 1148 } 1149 return is_hit; 1150} 1151 1152#endif//__CPU_O3_LSQ_IMPL_HH__	731 req->initiateTranslation(); 732 } 733 734 /* This is the place were instructions get the effAddr. / 735* if (req->isTranslationComplete()) { 736 if (inst->getFault() == NoFault) { 737 inst->effAddr = req->getVaddr(); 738 inst->effSize = size; 739 inst->effAddrValid(true); 740 741 if (cpu->checker) { 742 inst->reqToVerify = std::make_shared<Request>(req->request()); 743* } 744 if (isLoad) 745 inst->getFault() = cpu->read(req, inst->lqIdx); 746 else 747 inst->getFault() = cpu->write(req, data, inst->sqIdx); 748 } else if (isLoad) { 749 inst->setMemAccPredicate(false); 750 // Commit will have to clean up whatever happened. Set this 751 // instruction as executed. 752 inst->setExecuted(); 753 } 754 } 755 756 if (inst->traceData) 757 inst->traceData->setMem(addr, size, flags); 758 759 return inst->getFault(); 760} 761 762template<class Impl> 763void 764LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, 765 ThreadContext* tc, BaseTLB::Mode mode) 766{ 767 _fault.push_back(fault); 768 numInTranslationFragments = 0; 769 numTranslatedFragments = 1; 770 /* If the instruction has been squahsed, let the request know 771 * as it may have to self-destruct. / 772* if (_inst->isSquashed()) { 773 this->squashTranslation(); 774 } else { 775 _inst->strictlyOrdered(req->isStrictlyOrdered()); 776 777 flags.set(Flag::TranslationFinished); 778 if (fault == NoFault) { 779 _inst->physEffAddr = req->getPaddr(); 780 _inst->memReqFlags = req->getFlags(); 781 if (req->isCondSwap()) { 782 assert(_res); 783 req->setExtraData(_res); 784* } 785 setState(State::Request); 786 } else { 787 setState(State::Fault); 788 } 789 790 LSQRequest::_inst->fault = fault; 791 LSQRequest::_inst->translationCompleted(true); 792 } 793} 794 795template<class Impl> 796void 797LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, 798 ThreadContext* tc, BaseTLB::Mode mode) 799{ 800 _fault.push_back(fault); 801 assert(req == _requests[numTranslatedFragments] \|\| this->isDelayed()); 802 803 numInTranslationFragments--; 804 numTranslatedFragments++; 805 806 mainReq->setFlags(req->getFlags()); 807 808 if (numTranslatedFragments == _requests.size()) { 809 if (_inst->isSquashed()) { 810 this->squashTranslation(); 811 } else { 812 _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); 813 flags.set(Flag::TranslationFinished); 814 auto fault_it = _fault.begin(); 815 /* Ffwd to the first NoFault. / 816* while (fault_it != _fault.end() && fault_it == NoFault) 817* fault_it++; 818 /* If none of the fragments faulted: / 819* if (fault_it == _fault.end()) { 820 _inst->physEffAddr = request(0)->getPaddr(); 821 822 _inst->memReqFlags = mainReq->getFlags(); 823 if (mainReq->isCondSwap()) { 824 assert(_res); 825 mainReq->setExtraData(_res); 826* } 827 setState(State::Request); 828 _inst->fault = NoFault; 829 } else { 830 setState(State::Fault); 831 _inst->fault = fault_it; 832* } 833 _inst->translationCompleted(true); 834 } 835 } 836} 837 838template<class Impl> 839void 840LSQ<Impl>::SingleDataRequest::initiateTranslation() 841{ 842 assert(_requests.size() == 0); 843 844 this->addRequest(_addr, _size, _byteEnable); 845 846 if (_requests.size() > 0) { 847 _requests.back()->setReqInstSeqNum(_inst->seqNum); 848 _requests.back()->taskId(_taskId); 849 _inst->translationStarted(true); 850 setState(State::Translation); 851 flags.set(Flag::TranslationStarted); 852 853 _inst->savedReq = this; 854 sendFragmentToTranslation(0); 855 } else { 856 _inst->setMemAccPredicate(false); 857 } 858} 859 860template<class Impl> 861PacketPtr 862LSQ<Impl>::SplitDataRequest::mainPacket() 863{ 864 return _mainPacket; 865} 866 867template<class Impl> 868RequestPtr 869LSQ<Impl>::SplitDataRequest::mainRequest() 870{ 871 return mainReq; 872} 873 874template<class Impl> 875void 876LSQ<Impl>::SplitDataRequest::initiateTranslation() 877{ 878 auto cacheLineSize = _port.cacheLineSize(); 879 Addr base_addr = _addr; 880 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); 881 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); 882 uint32_t size_so_far = 0; 883 884 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, 885 _size, _flags, _inst->masterId(), 886 _inst->instAddr(), _inst->contextId()); 887 if (!_byteEnable.empty()) { 888 mainReq->setByteEnable(_byteEnable); 889 } 890 891 // Paddr is not used in mainReq. However, we will accumulate the flags 892 // from the sub requests into mainReq by calling setFlags() in finish(). 893 // setFlags() assumes that paddr is set so flip the paddr valid bit here to 894 // avoid a potential assert in setFlags() when we call it from finish(). 895 mainReq->setPaddr(0); 896 897 /* Get the pre-fix, possibly unaligned. / 898* if (_byteEnable.empty()) { 899 this->addRequest(base_addr, next_addr - base_addr, _byteEnable); 900 } else { 901 auto it_start = _byteEnable.begin(); 902 auto it_end = _byteEnable.begin() + (next_addr - base_addr); 903 this->addRequest(base_addr, next_addr - base_addr, 904 std::vector<bool>(it_start, it_end)); 905 } 906 size_so_far = next_addr - base_addr; 907 908 /* We are block aligned now, reading whole blocks. / 909* base_addr = next_addr; 910 while (base_addr != final_addr) { 911 if (_byteEnable.empty()) { 912 this->addRequest(base_addr, cacheLineSize, _byteEnable); 913 } else { 914 auto it_start = _byteEnable.begin() + size_so_far; 915 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize; 916 this->addRequest(base_addr, cacheLineSize, 917 std::vector<bool>(it_start, it_end)); 918 } 919 size_so_far += cacheLineSize; 920 base_addr += cacheLineSize; 921 } 922 923 /* Deal with the tail. / 924* if (size_so_far < _size) { 925 if (_byteEnable.empty()) { 926 this->addRequest(base_addr, _size - size_so_far, _byteEnable); 927 } else { 928 auto it_start = _byteEnable.begin() + size_so_far; 929 auto it_end = _byteEnable.end(); 930 this->addRequest(base_addr, _size - size_so_far, 931 std::vector<bool>(it_start, it_end)); 932 } 933 } 934 935 if (_requests.size() > 0) { 936 /* Setup the requests and send them to translation. / 937* for (auto& r: _requests) { 938 r->setReqInstSeqNum(_inst->seqNum); 939 r->taskId(_taskId); 940 } 941 942 _inst->translationStarted(true); 943 setState(State::Translation); 944 flags.set(Flag::TranslationStarted); 945 this->_inst->savedReq = this; 946 numInTranslationFragments = 0; 947 numTranslatedFragments = 0; 948 _fault.resize(_requests.size()); 949 950 for (uint32_t i = 0; i < _requests.size(); i++) { 951 sendFragmentToTranslation(i); 952 } 953 } else { 954 _inst->setMemAccPredicate(false); 955 } 956} 957 958template<class Impl> 959void 960LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i) 961{ 962 numInTranslationFragments++; 963 _port.dTLB()->translateTiming( 964 this->request(i), 965 this->_inst->thread->getTC(), this, 966 this->isLoad() ? BaseTLB::Read : BaseTLB::Write); 967} 968 969template<class Impl> 970bool 971LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt) 972{ 973 assert(_numOutstandingPackets == 1); 974 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 975* setState(State::Complete); 976 flags.set(Flag::Complete); 977 state->outstanding--; 978 assert(pkt == _packets.front()); 979 _port.completeDataAccess(pkt); 980 return true; 981} 982 983template<class Impl> 984bool 985LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) 986{ 987 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 988* uint32_t pktIdx = 0; 989 while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) 990 pktIdx++; 991 assert(pktIdx < _packets.size()); 992 numReceivedPackets++; 993 state->outstanding--; 994 if (numReceivedPackets == _packets.size()) { 995 setState(State::Complete); 996 flags.set(Flag::Complete); 997 /* Assemble packets. / 998* PacketPtr resp = isLoad() 999 ? Packet::createRead(mainReq) 1000 : Packet::createWrite(mainReq); 1001 if (isLoad()) 1002 resp->dataStatic(_inst->memData); 1003 else 1004 resp->dataStatic(_data); 1005 resp->senderState = _senderState; 1006 _port.completeDataAccess(resp); 1007 delete resp; 1008 } 1009 return true; 1010} 1011 1012template<class Impl> 1013void 1014LSQ<Impl>::SingleDataRequest::buildPackets() 1015{ 1016 assert(_senderState); 1017 /* Retries do not create new packets. / 1018* if (_packets.size() == 0) { 1019 _packets.push_back( 1020 isLoad() 1021 ? Packet::createRead(request()) 1022 : Packet::createWrite(request())); 1023 _packets.back()->dataStatic(_inst->memData); 1024 _packets.back()->senderState = _senderState; 1025 } 1026 assert(_packets.size() == 1); 1027} 1028 1029template<class Impl> 1030void 1031LSQ<Impl>::SplitDataRequest::buildPackets() 1032{ 1033 /* Extra data?? / 1034* Addr base_address = _addr; 1035 1036 if (_packets.size() == 0) { 1037 /* New stuff / 1038* if (isLoad()) { 1039 _mainPacket = Packet::createRead(mainReq); 1040 _mainPacket->dataStatic(_inst->memData); 1041 } 1042 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) { 1043 RequestPtr r = _requests[i]; 1044 PacketPtr pkt = isLoad() ? Packet::createRead(r) 1045 : Packet::createWrite(r); 1046 ptrdiff_t offset = r->getVaddr() - base_address; 1047 if (isLoad()) { 1048 pkt->dataStatic(_inst->memData + offset); 1049 } else { 1050 uint8_t* req_data = new uint8_t[r->getSize()]; 1051 std::memcpy(req_data, 1052 _inst->memData + offset, 1053 r->getSize()); 1054 pkt->dataDynamic(req_data); 1055 } 1056 pkt->senderState = _senderState; 1057 _packets.push_back(pkt); 1058 } 1059 } 1060 assert(_packets.size() > 0); 1061} 1062 1063template<class Impl> 1064void 1065LSQ<Impl>::SingleDataRequest::sendPacketToCache() 1066{ 1067 assert(_numOutstandingPackets == 0); 1068 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) 1069 _numOutstandingPackets = 1; 1070} 1071 1072template<class Impl> 1073void 1074LSQ<Impl>::SplitDataRequest::sendPacketToCache() 1075{ 1076 /* Try to send the packets. / 1077* while (numReceivedPackets + _numOutstandingPackets < _packets.size() && 1078 lsqUnit()->trySendPacket(isLoad(), 1079 _packets.at(numReceivedPackets + _numOutstandingPackets))) { 1080 _numOutstandingPackets++; 1081 } 1082} 1083 1084template<class Impl> 1085void 1086LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext thread, 1087* PacketPtr pkt) 1088{ 1089 TheISA::handleIprWrite(thread, pkt); 1090} 1091 1092template<class Impl> 1093void 1094LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext thread, 1095* PacketPtr mainPkt) 1096{ 1097 unsigned offset = 0; 1098 for (auto r: _requests) { 1099 PacketPtr pkt = new Packet(r, MemCmd::WriteReq); 1100 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1101 TheISA::handleIprWrite(thread, pkt); 1102 offset += r->getSize(); 1103 delete pkt; 1104 } 1105} 1106 1107template<class Impl> 1108Cycles 1109LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext thread, 1110* PacketPtr pkt) 1111{ 1112 return TheISA::handleIprRead(thread, pkt); 1113} 1114 1115template<class Impl> 1116Cycles 1117LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext thread, 1118* PacketPtr mainPkt) 1119{ 1120 Cycles delay(0); 1121 unsigned offset = 0; 1122 1123 for (auto r: _requests) { 1124 PacketPtr pkt = new Packet(r, MemCmd::ReadReq); 1125 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1126 Cycles d = TheISA::handleIprRead(thread, pkt); 1127 if (d > delay) 1128 delay = d; 1129 offset += r->getSize(); 1130 delete pkt; 1131 } 1132 return delay; 1133} 1134 1135template<class Impl> 1136bool 1137LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1138{ 1139 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); 1140} 1141 1142template<class Impl> 1143bool 1144LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1145{ 1146 bool is_hit = false; 1147 for (auto &r: _requests) { 1148 if ((r->getPaddr() & blockMask) == blockAddr) { 1149 is_hit = true; 1150 break; 1151 } 1152 } 1153 return is_hit; 1154} 1155 1156#endif//__CPU_O3_LSQ_IMPL_HH__