Cross Reference: /gem5/src/cpu/o3/lsq

Deleted Added

sdiff udiff text old ( 13688:5bb3bf2f2559 ) new ( 13710:5ba1d8066ef0 )

full compact

lsq_impl.hh (13688:5bb3bf2f2559)	lsq_impl.hh (13710:5ba1d8066ef0)
1/* 2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2005-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 / 43 44#ifndef __CPU_O3_LSQ_IMPL_HH__ 45#define __CPU_O3_LSQ_IMPL_HH__ 46 47#include <algorithm> 48#include <list> 49#include <string> 50 51#include "base/logging.hh" 52#include "cpu/o3/lsq.hh" 53#include "debug/Drain.hh" 54#include "debug/Fetch.hh" 55#include "debug/LSQ.hh" 56#include "debug/Writeback.hh" 57#include "params/DerivO3CPU.hh" 58 59using namespace std; 60 61template <class Impl> 62LSQ<Impl>::LSQ(O3CPU cpu_ptr, IEW iew_ptr, DerivO3CPUParams params) 63 : cpu(cpu_ptr), iewStage(iew_ptr), 64 _cacheBlocked(false), 65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),	1/* 2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2005-2006 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Korey Sewell 42 / 43 44#ifndef __CPU_O3_LSQ_IMPL_HH__ 45#define __CPU_O3_LSQ_IMPL_HH__ 46 47#include <algorithm> 48#include <list> 49#include <string> 50 51#include "base/logging.hh" 52#include "cpu/o3/lsq.hh" 53#include "debug/Drain.hh" 54#include "debug/Fetch.hh" 55#include "debug/LSQ.hh" 56#include "debug/Writeback.hh" 57#include "params/DerivO3CPU.hh" 58 59using namespace std; 60 61template <class Impl> 62LSQ<Impl>::LSQ(O3CPU cpu_ptr, IEW iew_ptr, DerivO3CPUParams params) 63 : cpu(cpu_ptr), iewStage(iew_ptr), 64 _cacheBlocked(false), 65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
	66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
66 lsqPolicy(params->smtLSQPolicy), 67 LQEntries(params->LQEntries), 68 SQEntries(params->SQEntries), 69 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, 70 params->smtLSQThreshold)), 71 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, 72 params->smtLSQThreshold)), 73 numThreads(params->numThreads) 74{ 75 assert(numThreads > 0 && numThreads <= Impl::MaxThreads); 76 77 //********************************************/ 78 //******** Handle SMT Parameters *******/ 79 //*******************************************/ 80 81 / Run SMT olicy checks. / 82 if (lsqPolicy == SMTQueuePolicy::Dynamic) { 83 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); 84 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { 85 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " 86 "%i entries per LQ \| %i entries per SQ\n", 87 maxLQEntries,maxSQEntries); 88 } else if (lsqPolicy == SMTQueuePolicy::Threshold) { 89 90 assert(params->smtLSQThreshold > params->LQEntries); 91 assert(params->smtLSQThreshold > params->SQEntries); 92 93 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " 94 "%i entries per LQ \| %i entries per SQ\n", 95 maxLQEntries,maxSQEntries); 96 } else { 97 panic("Invalid LSQ sharing policy. Options are: Dynamic, " 98 "Partitioned, Threshold"); 99 } 100* 101 thread.reserve(numThreads); 102 for (ThreadID tid = 0; tid < numThreads; tid++) { 103 thread.emplace_back(maxLQEntries, maxSQEntries); 104 thread[tid].init(cpu, iew_ptr, params, this, tid); 105 thread[tid].setDcachePort(&cpu_ptr->getDataPort()); 106 } 107} 108 109 110template<class Impl> 111std::string 112LSQ<Impl>::name() const 113{ 114 return iewStage->name() + ".lsq"; 115} 116 117template<class Impl> 118void 119LSQ<Impl>::regStats() 120{ 121 //Initialize LSQs 122 for (ThreadID tid = 0; tid < numThreads; tid++) { 123 thread[tid].regStats(); 124 } 125} 126 127template<class Impl> 128void 129LSQ<Impl>::setActiveThreads(list<ThreadID> at_ptr) 130{ 131* activeThreads = at_ptr; 132 assert(activeThreads != 0); 133} 134 135template <class Impl> 136void 137LSQ<Impl>::drainSanityCheck() const 138{ 139 assert(isDrained()); 140 141 for (ThreadID tid = 0; tid < numThreads; tid++) 142 thread[tid].drainSanityCheck(); 143} 144 145template <class Impl> 146bool 147LSQ<Impl>::isDrained() const 148{ 149 bool drained(true); 150 151 if (!lqEmpty()) { 152 DPRINTF(Drain, "Not drained, LQ not empty.\n"); 153 drained = false; 154 } 155 156 if (!sqEmpty()) { 157 DPRINTF(Drain, "Not drained, SQ not empty.\n"); 158 drained = false; 159 } 160 161 return drained; 162} 163 164template <class Impl> 165void 166LSQ<Impl>::takeOverFrom() 167{ 168 usedStorePorts = 0; 169 _cacheBlocked = false; 170 171 for (ThreadID tid = 0; tid < numThreads; tid++) { 172 thread[tid].takeOverFrom(); 173 } 174} 175	67 lsqPolicy(params->smtLSQPolicy), 68 LQEntries(params->LQEntries), 69 SQEntries(params->SQEntries), 70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, 71 params->smtLSQThreshold)), 72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, 73 params->smtLSQThreshold)), 74 numThreads(params->numThreads) 75{ 76 assert(numThreads > 0 && numThreads <= Impl::MaxThreads); 77 78 //********************************************/ 79 //******** Handle SMT Parameters *******/ 80 //*******************************************/ 81 82 / Run SMT olicy checks. / 83 if (lsqPolicy == SMTQueuePolicy::Dynamic) { 84 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); 85 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { 86 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " 87 "%i entries per LQ \| %i entries per SQ\n", 88 maxLQEntries,maxSQEntries); 89 } else if (lsqPolicy == SMTQueuePolicy::Threshold) { 90 91 assert(params->smtLSQThreshold > params->LQEntries); 92 assert(params->smtLSQThreshold > params->SQEntries); 93 94 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " 95 "%i entries per LQ \| %i entries per SQ\n", 96 maxLQEntries,maxSQEntries); 97 } else { 98 panic("Invalid LSQ sharing policy. Options are: Dynamic, " 99 "Partitioned, Threshold"); 100* } 101 102 thread.reserve(numThreads); 103 for (ThreadID tid = 0; tid < numThreads; tid++) { 104 thread.emplace_back(maxLQEntries, maxSQEntries); 105 thread[tid].init(cpu, iew_ptr, params, this, tid); 106 thread[tid].setDcachePort(&cpu_ptr->getDataPort()); 107 } 108} 109 110 111template<class Impl> 112std::string 113LSQ<Impl>::name() const 114{ 115 return iewStage->name() + ".lsq"; 116} 117 118template<class Impl> 119void 120LSQ<Impl>::regStats() 121{ 122 //Initialize LSQs 123 for (ThreadID tid = 0; tid < numThreads; tid++) { 124 thread[tid].regStats(); 125 } 126} 127 128template<class Impl> 129void 130LSQ<Impl>::setActiveThreads(list<ThreadID> at_ptr) 131{ 132* activeThreads = at_ptr; 133 assert(activeThreads != 0); 134} 135 136template <class Impl> 137void 138LSQ<Impl>::drainSanityCheck() const 139{ 140 assert(isDrained()); 141 142 for (ThreadID tid = 0; tid < numThreads; tid++) 143 thread[tid].drainSanityCheck(); 144} 145 146template <class Impl> 147bool 148LSQ<Impl>::isDrained() const 149{ 150 bool drained(true); 151 152 if (!lqEmpty()) { 153 DPRINTF(Drain, "Not drained, LQ not empty.\n"); 154 drained = false; 155 } 156 157 if (!sqEmpty()) { 158 DPRINTF(Drain, "Not drained, SQ not empty.\n"); 159 drained = false; 160 } 161 162 return drained; 163} 164 165template <class Impl> 166void 167LSQ<Impl>::takeOverFrom() 168{ 169 usedStorePorts = 0; 170 _cacheBlocked = false; 171 172 for (ThreadID tid = 0; tid < numThreads; tid++) { 173 thread[tid].takeOverFrom(); 174 } 175} 176
	177template <class Impl> 178void 179LSQ<Impl>::tick() 180{ 181 // Re-issue loads which got blocked on the per-cycle load ports limit. 182 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked) 183 iewStage->cacheUnblocked(); 184 185 usedLoadPorts = 0; 186 usedStorePorts = 0; 187} 188
176template<class Impl> 177bool 178LSQ<Impl>::cacheBlocked() const 179{ 180 return _cacheBlocked; 181} 182 183template<class Impl> 184void 185LSQ<Impl>::cacheBlocked(bool v) 186{ 187 _cacheBlocked = v; 188} 189 190template<class Impl> 191bool	189template<class Impl> 190bool 191LSQ<Impl>::cacheBlocked() const 192{ 193 return _cacheBlocked; 194} 195 196template<class Impl> 197void 198LSQ<Impl>::cacheBlocked(bool v) 199{ 200 _cacheBlocked = v; 201} 202 203template<class Impl> 204bool
192LSQ<Impl>::storePortAvailable() const	205LSQ<Impl>::cachePortAvailable(bool is_load) const
193{	206{
194 return usedStorePorts < cacheStorePorts;	207 bool ret; 208 if (is_load) { 209 ret = usedLoadPorts < cacheLoadPorts; 210 } else { 211 ret = usedStorePorts < cacheStorePorts; 212 } 213 return ret;
195} 196 197template<class Impl> 198void	214} 215 216template<class Impl> 217void
199LSQ<Impl>::storePortBusy()	218LSQ<Impl>::cachePortBusy(bool is_load)
200{	219{
201 usedStorePorts++; 202 assert(usedStorePorts <= cacheStorePorts);	220 assert(cachePortAvailable(is_load)); 221 if (is_load) { 222 usedLoadPorts++; 223 } else { 224 usedStorePorts++; 225 }
203} 204 205template<class Impl> 206void 207LSQ<Impl>::insertLoad(const DynInstPtr &load_inst) 208{ 209 ThreadID tid = load_inst->threadNumber; 210 211 thread[tid].insertLoad(load_inst); 212} 213 214template<class Impl> 215void 216LSQ<Impl>::insertStore(const DynInstPtr &store_inst) 217{ 218 ThreadID tid = store_inst->threadNumber; 219 220 thread[tid].insertStore(store_inst); 221} 222 223template<class Impl> 224Fault 225LSQ<Impl>::executeLoad(const DynInstPtr &inst) 226{ 227 ThreadID tid = inst->threadNumber; 228 229 return thread[tid].executeLoad(inst); 230} 231 232template<class Impl> 233Fault 234LSQ<Impl>::executeStore(const DynInstPtr &inst) 235{ 236 ThreadID tid = inst->threadNumber; 237 238 return thread[tid].executeStore(inst); 239} 240 241template<class Impl> 242void 243LSQ<Impl>::writebackStores() 244{ 245 list<ThreadID>::iterator threads = activeThreads->begin(); 246 list<ThreadID>::iterator end = activeThreads->end(); 247 248 while (threads != end) { 249 ThreadID tid = threads++; 250* 251 if (numStoresToWB(tid) > 0) { 252 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " 253 "available for Writeback.\n", tid, numStoresToWB(tid)); 254 } 255 256 thread[tid].writebackStores(); 257 } 258} 259 260template<class Impl> 261bool 262LSQ<Impl>::violation() 263{ 264 /* Answers: Does Anybody Have a Violation?/ 265* list<ThreadID>::iterator threads = activeThreads->begin(); 266 list<ThreadID>::iterator end = activeThreads->end(); 267 268 while (threads != end) { 269 ThreadID tid = threads++; 270* 271 if (thread[tid].violation()) 272 return true; 273 } 274 275 return false; 276} 277 278template <class Impl> 279void 280LSQ<Impl>::recvReqRetry() 281{ 282 iewStage->cacheUnblocked(); 283 cacheBlocked(false); 284 285 for (ThreadID tid : activeThreads) { 286* thread[tid].recvRetry(); 287 } 288} 289 290template <class Impl> 291void 292LSQ<Impl>::completeDataAccess(PacketPtr pkt) 293{ 294 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 295* thread[cpu->contextToThread(senderState->contextId())] 296 .completeDataAccess(pkt); 297} 298 299template <class Impl> 300bool 301LSQ<Impl>::recvTimingResp(PacketPtr pkt) 302{ 303 if (pkt->isError()) 304 DPRINTF(LSQ, "Got error packet back for address: %#X\n", 305 pkt->getAddr()); 306 307 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 308* panic_if(!senderState, "Got packet back with unknown sender state\n"); 309 310 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); 311 312 if (pkt->isInvalidate()) { 313 // This response also contains an invalidate; e.g. this can be the case 314 // if cmd is ReadRespWithInvalidate. 315 // 316 // The calling order between completeDataAccess and checkSnoop matters. 317 // By calling checkSnoop after completeDataAccess, we ensure that the 318 // fault set by checkSnoop is not lost. Calling writeback (more 319 // specifically inst->completeAcc) in completeDataAccess overwrites 320 // fault, and in case this instruction requires squashing (as 321 // determined by checkSnoop), the ReExec fault set by checkSnoop would 322 // be lost otherwise. 323 324 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", 325 pkt->getAddr()); 326 327 for (ThreadID tid = 0; tid < numThreads; tid++) { 328 thread[tid].checkSnoop(pkt); 329 } 330 } 331 // Update the LSQRequest state (this may delete the request) 332 senderState->request()->packetReplied(); 333 334 return true; 335} 336 337template <class Impl> 338void 339LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt) 340{ 341 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), 342 pkt->cmdString()); 343 344 // must be a snoop 345 if (pkt->isInvalidate()) { 346 DPRINTF(LSQ, "received invalidation for addr:%#x\n", 347 pkt->getAddr()); 348 for (ThreadID tid = 0; tid < numThreads; tid++) { 349 thread[tid].checkSnoop(pkt); 350 } 351 } 352} 353 354template<class Impl> 355int 356LSQ<Impl>::getCount() 357{ 358 unsigned total = 0; 359 360 list<ThreadID>::iterator threads = activeThreads->begin(); 361 list<ThreadID>::iterator end = activeThreads->end(); 362 363 while (threads != end) { 364 ThreadID tid = threads++; 365* 366 total += getCount(tid); 367 } 368 369 return total; 370} 371 372template<class Impl> 373int 374LSQ<Impl>::numLoads() 375{ 376 unsigned total = 0; 377 378 list<ThreadID>::iterator threads = activeThreads->begin(); 379 list<ThreadID>::iterator end = activeThreads->end(); 380 381 while (threads != end) { 382 ThreadID tid = threads++; 383* 384 total += numLoads(tid); 385 } 386 387 return total; 388} 389 390template<class Impl> 391int 392LSQ<Impl>::numStores() 393{ 394 unsigned total = 0; 395 396 list<ThreadID>::iterator threads = activeThreads->begin(); 397 list<ThreadID>::iterator end = activeThreads->end(); 398 399 while (threads != end) { 400 ThreadID tid = threads++; 401* 402 total += thread[tid].numStores(); 403 } 404 405 return total; 406} 407 408template<class Impl> 409unsigned 410LSQ<Impl>::numFreeLoadEntries() 411{ 412 unsigned total = 0; 413 414 list<ThreadID>::iterator threads = activeThreads->begin(); 415 list<ThreadID>::iterator end = activeThreads->end(); 416 417 while (threads != end) { 418 ThreadID tid = threads++; 419* 420 total += thread[tid].numFreeLoadEntries(); 421 } 422 423 return total; 424} 425 426template<class Impl> 427unsigned 428LSQ<Impl>::numFreeStoreEntries() 429{ 430 unsigned total = 0; 431 432 list<ThreadID>::iterator threads = activeThreads->begin(); 433 list<ThreadID>::iterator end = activeThreads->end(); 434 435 while (threads != end) { 436 ThreadID tid = threads++; 437* 438 total += thread[tid].numFreeStoreEntries(); 439 } 440 441 return total; 442} 443 444template<class Impl> 445unsigned 446LSQ<Impl>::numFreeLoadEntries(ThreadID tid) 447{ 448 return thread[tid].numFreeLoadEntries(); 449} 450 451template<class Impl> 452unsigned 453LSQ<Impl>::numFreeStoreEntries(ThreadID tid) 454{ 455 return thread[tid].numFreeStoreEntries(); 456} 457 458template<class Impl> 459bool 460LSQ<Impl>::isFull() 461{ 462 list<ThreadID>::iterator threads = activeThreads->begin(); 463 list<ThreadID>::iterator end = activeThreads->end(); 464 465 while (threads != end) { 466 ThreadID tid = threads++; 467* 468 if (!(thread[tid].lqFull() \|\| thread[tid].sqFull())) 469 return false; 470 } 471 472 return true; 473} 474 475template<class Impl> 476bool 477LSQ<Impl>::isFull(ThreadID tid) 478{ 479 //@todo: Change to Calculate All Entries for 480 //Dynamic Policy 481 if (lsqPolicy == SMTQueuePolicy::Dynamic) 482 return isFull(); 483 else 484 return thread[tid].lqFull() \|\| thread[tid].sqFull(); 485} 486 487template<class Impl> 488bool 489LSQ<Impl>::isEmpty() const 490{ 491 return lqEmpty() && sqEmpty(); 492} 493 494template<class Impl> 495bool 496LSQ<Impl>::lqEmpty() const 497{ 498 list<ThreadID>::const_iterator threads = activeThreads->begin(); 499 list<ThreadID>::const_iterator end = activeThreads->end(); 500 501 while (threads != end) { 502 ThreadID tid = threads++; 503* 504 if (!thread[tid].lqEmpty()) 505 return false; 506 } 507 508 return true; 509} 510 511template<class Impl> 512bool 513LSQ<Impl>::sqEmpty() const 514{ 515 list<ThreadID>::const_iterator threads = activeThreads->begin(); 516 list<ThreadID>::const_iterator end = activeThreads->end(); 517 518 while (threads != end) { 519 ThreadID tid = threads++; 520* 521 if (!thread[tid].sqEmpty()) 522 return false; 523 } 524 525 return true; 526} 527 528template<class Impl> 529bool 530LSQ<Impl>::lqFull() 531{ 532 list<ThreadID>::iterator threads = activeThreads->begin(); 533 list<ThreadID>::iterator end = activeThreads->end(); 534 535 while (threads != end) { 536 ThreadID tid = threads++; 537* 538 if (!thread[tid].lqFull()) 539 return false; 540 } 541 542 return true; 543} 544 545template<class Impl> 546bool 547LSQ<Impl>::lqFull(ThreadID tid) 548{ 549 //@todo: Change to Calculate All Entries for 550 //Dynamic Policy 551 if (lsqPolicy == SMTQueuePolicy::Dynamic) 552 return lqFull(); 553 else 554 return thread[tid].lqFull(); 555} 556 557template<class Impl> 558bool 559LSQ<Impl>::sqFull() 560{ 561 list<ThreadID>::iterator threads = activeThreads->begin(); 562 list<ThreadID>::iterator end = activeThreads->end(); 563 564 while (threads != end) { 565 ThreadID tid = threads++; 566* 567 if (!sqFull(tid)) 568 return false; 569 } 570 571 return true; 572} 573 574template<class Impl> 575bool 576LSQ<Impl>::sqFull(ThreadID tid) 577{ 578 //@todo: Change to Calculate All Entries for 579 //Dynamic Policy 580 if (lsqPolicy == SMTQueuePolicy::Dynamic) 581 return sqFull(); 582 else 583 return thread[tid].sqFull(); 584} 585 586template<class Impl> 587bool 588LSQ<Impl>::isStalled() 589{ 590 list<ThreadID>::iterator threads = activeThreads->begin(); 591 list<ThreadID>::iterator end = activeThreads->end(); 592 593 while (threads != end) { 594 ThreadID tid = threads++; 595* 596 if (!thread[tid].isStalled()) 597 return false; 598 } 599 600 return true; 601} 602 603template<class Impl> 604bool 605LSQ<Impl>::isStalled(ThreadID tid) 606{ 607 if (lsqPolicy == SMTQueuePolicy::Dynamic) 608 return isStalled(); 609 else 610 return thread[tid].isStalled(); 611} 612 613template<class Impl> 614bool 615LSQ<Impl>::hasStoresToWB() 616{ 617 list<ThreadID>::iterator threads = activeThreads->begin(); 618 list<ThreadID>::iterator end = activeThreads->end(); 619 620 while (threads != end) { 621 ThreadID tid = threads++; 622* 623 if (hasStoresToWB(tid)) 624 return true; 625 } 626 627 return false; 628} 629 630template<class Impl> 631bool 632LSQ<Impl>::willWB() 633{ 634 list<ThreadID>::iterator threads = activeThreads->begin(); 635 list<ThreadID>::iterator end = activeThreads->end(); 636 637 while (threads != end) { 638 ThreadID tid = threads++; 639* 640 if (willWB(tid)) 641 return true; 642 } 643 644 return false; 645} 646 647template<class Impl> 648void 649LSQ<Impl>::dumpInsts() const 650{ 651 list<ThreadID>::const_iterator threads = activeThreads->begin(); 652 list<ThreadID>::const_iterator end = activeThreads->end(); 653 654 while (threads != end) { 655 ThreadID tid = threads++; 656* 657 thread[tid].dumpInsts(); 658 } 659} 660 661static Addr 662addrBlockOffset(Addr addr, unsigned int block_size) 663{ 664 return addr & (block_size - 1); 665} 666 667static Addr 668addrBlockAlign(Addr addr, uint64_t block_size) 669{ 670 return addr & ~(block_size - 1); 671} 672 673static bool 674transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size) 675{ 676 return (addrBlockOffset(addr, block_size) + size) > block_size; 677} 678 679template<class Impl> 680Fault 681LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t data, 682* unsigned int size, Addr addr, Request::Flags flags, 683 uint64_t res, AtomicOpFunctor amo_op) 684{ 685 // This comming request can be either load, store or atomic. 686 // Atomic request has a corresponding pointer to its atomic memory 687 // operation 688 bool isAtomic M5_VAR_USED = !isLoad && amo_op; 689 690 ThreadID tid = cpu->contextToThread(inst->contextId()); 691 auto cacheLineSize = cpu->cacheLineSize(); 692 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); 693 LSQRequest* req = nullptr; 694 695 // Atomic requests that access data across cache line boundary are 696 // currently not allowed since the cache does not guarantee corresponding 697 // atomic memory operations to be executed atomically across a cache line. 698 // For ISAs such as x86 that supports cross-cache-line atomic instructions, 699 // the cache needs to be modified to perform atomic update to both cache 700 // lines. For now, such cross-line update is not supported. 701 assert(!isAtomic \|\| (isAtomic && !needs_burst)); 702 703 if (inst->translationStarted()) { 704 req = inst->savedReq; 705 assert(req); 706 } else { 707 if (needs_burst) { 708 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, 709 size, flags, data, res); 710 } else { 711 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, 712 size, flags, data, res, amo_op); 713 } 714 assert(req); 715 inst->setRequest(); 716 req->taskId(cpu->taskId()); 717 718 req->initiateTranslation(); 719 } 720 721 /* This is the place were instructions get the effAddr. / 722* if (req->isTranslationComplete()) { 723 if (inst->getFault() == NoFault) { 724 inst->effAddr = req->getVaddr(); 725 inst->effSize = size; 726 inst->effAddrValid(true); 727 728 if (cpu->checker) { 729 inst->reqToVerify = std::make_shared<Request>(req->request()); 730* } 731 if (isLoad) 732 inst->getFault() = cpu->read(req, inst->lqIdx); 733 else 734 inst->getFault() = cpu->write(req, data, inst->sqIdx); 735 } else if (isLoad) { 736 // Commit will have to clean up whatever happened. Set this 737 // instruction as executed. 738 inst->setExecuted(); 739 } 740 } 741 742 if (inst->traceData) 743 inst->traceData->setMem(addr, size, flags); 744 745 return inst->getFault(); 746} 747 748template<class Impl> 749void 750LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, 751 ThreadContext* tc, BaseTLB::Mode mode) 752{ 753 _fault.push_back(fault); 754 numInTranslationFragments = 0; 755 numTranslatedFragments = 1; 756 /* If the instruction has been squahsed, let the request know 757 * as it may have to self-destruct. / 758* if (_inst->isSquashed()) { 759 this->squashTranslation(); 760 } else { 761 _inst->strictlyOrdered(req->isStrictlyOrdered()); 762 763 flags.set(Flag::TranslationFinished); 764 if (fault == NoFault) { 765 _inst->physEffAddr = req->getPaddr(); 766 _inst->memReqFlags = req->getFlags(); 767 if (req->isCondSwap()) { 768 assert(_res); 769 req->setExtraData(_res); 770* } 771 setState(State::Request); 772 } else { 773 setState(State::Fault); 774 } 775 776 LSQRequest::_inst->fault = fault; 777 LSQRequest::_inst->translationCompleted(true); 778 } 779} 780 781template<class Impl> 782void 783LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, 784 ThreadContext* tc, BaseTLB::Mode mode) 785{ 786 _fault.push_back(fault); 787 assert(req == _requests[numTranslatedFragments] \|\| this->isDelayed()); 788 789 numInTranslationFragments--; 790 numTranslatedFragments++; 791 792 mainReq->setFlags(req->getFlags()); 793 794 if (numTranslatedFragments == _requests.size()) { 795 if (_inst->isSquashed()) { 796 this->squashTranslation(); 797 } else { 798 _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); 799 flags.set(Flag::TranslationFinished); 800 auto fault_it = _fault.begin(); 801 /* Ffwd to the first NoFault. / 802* while (fault_it != _fault.end() && fault_it == NoFault) 803* fault_it++; 804 /* If none of the fragments faulted: / 805* if (fault_it == _fault.end()) { 806 _inst->physEffAddr = request(0)->getPaddr(); 807 808 _inst->memReqFlags = mainReq->getFlags(); 809 if (mainReq->isCondSwap()) { 810 assert(_res); 811 mainReq->setExtraData(_res); 812* } 813 setState(State::Request); 814 _inst->fault = NoFault; 815 } else { 816 setState(State::Fault); 817 _inst->fault = fault_it; 818* } 819 _inst->translationCompleted(true); 820 } 821 } 822} 823 824template<class Impl> 825void 826LSQ<Impl>::SingleDataRequest::initiateTranslation() 827{ 828 _inst->translationStarted(true); 829 setState(State::Translation); 830 flags.set(Flag::TranslationStarted); 831 832 _inst->savedReq = this; 833 sendFragmentToTranslation(0); 834 835 if (isTranslationComplete()) { 836 } 837} 838 839template<class Impl> 840PacketPtr 841LSQ<Impl>::SplitDataRequest::mainPacket() 842{ 843 return _mainPacket; 844} 845 846template<class Impl> 847RequestPtr 848LSQ<Impl>::SplitDataRequest::mainRequest() 849{ 850 return mainReq; 851} 852 853template<class Impl> 854void 855LSQ<Impl>::SplitDataRequest::initiateTranslation() 856{ 857 _inst->translationStarted(true); 858 setState(State::Translation); 859 flags.set(Flag::TranslationStarted); 860 861 unsigned int cacheLineSize = _port.cacheLineSize(); 862 Addr base_addr = _addr; 863 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); 864 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); 865 uint32_t size_so_far = 0; 866 867 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, 868 _size, _flags, _inst->masterId(), 869 _inst->instAddr(), _inst->contextId()); 870 871 // Paddr is not used in mainReq. However, we will accumulate the flags 872 // from the sub requests into mainReq by calling setFlags() in finish(). 873 // setFlags() assumes that paddr is set so flip the paddr valid bit here to 874 // avoid a potential assert in setFlags() when we call it from finish(). 875 mainReq->setPaddr(0); 876 877 /* Get the pre-fix, possibly unaligned. / 878* _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr, 879 next_addr - base_addr, _flags, _inst->masterId(), 880 _inst->instAddr(), _inst->contextId())); 881 size_so_far = next_addr - base_addr; 882 883 /* We are block aligned now, reading whole blocks. / 884* base_addr = next_addr; 885 while (base_addr != final_addr) { 886 _requests.push_back(std::make_shared<Request>(_inst->getASID(), 887 base_addr, cacheLineSize, _flags, _inst->masterId(), 888 _inst->instAddr(), _inst->contextId())); 889 size_so_far += cacheLineSize; 890 base_addr += cacheLineSize; 891 } 892 893 /* Deal with the tail. / 894* if (size_so_far < _size) { 895 _requests.push_back(std::make_shared<Request>(_inst->getASID(), 896 base_addr, _size - size_so_far, _flags, _inst->masterId(), 897 _inst->instAddr(), _inst->contextId())); 898 } 899 900 /* Setup the requests and send them to translation. / 901* for (auto& r: _requests) { 902 r->setReqInstSeqNum(_inst->seqNum); 903 r->taskId(_taskId); 904 } 905 this->_inst->savedReq = this; 906 numInTranslationFragments = 0; 907 numTranslatedFragments = 0; 908 909 for (uint32_t i = 0; i < _requests.size(); i++) { 910 sendFragmentToTranslation(i); 911 } 912} 913 914template<class Impl> 915void 916LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i) 917{ 918 numInTranslationFragments++; 919 _port.dTLB()->translateTiming( 920 this->request(i), 921 this->_inst->thread->getTC(), this, 922 this->isLoad() ? BaseTLB::Read : BaseTLB::Write); 923} 924 925template<class Impl> 926bool 927LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt) 928{ 929 assert(_numOutstandingPackets == 1); 930 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 931* setState(State::Complete); 932 flags.set(Flag::Complete); 933 state->outstanding--; 934 assert(pkt == _packets.front()); 935 _port.completeDataAccess(pkt); 936 return true; 937} 938 939template<class Impl> 940bool 941LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) 942{ 943 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 944* uint32_t pktIdx = 0; 945 while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) 946 pktIdx++; 947 assert(pktIdx < _packets.size()); 948 assert(pkt->req == _requests[pktIdx]); 949 assert(pkt == _packets[pktIdx]); 950 numReceivedPackets++; 951 state->outstanding--; 952 if (numReceivedPackets == _packets.size()) { 953 setState(State::Complete); 954 flags.set(Flag::Complete); 955 /* Assemble packets. / 956* PacketPtr resp = isLoad() 957 ? Packet::createRead(mainReq) 958 : Packet::createWrite(mainReq); 959 if (isLoad()) 960 resp->dataStatic(_inst->memData); 961 else 962 resp->dataStatic(_data); 963 resp->senderState = _senderState; 964 _port.completeDataAccess(resp); 965 delete resp; 966 } 967 return true; 968} 969 970template<class Impl> 971void 972LSQ<Impl>::SingleDataRequest::buildPackets() 973{ 974 assert(_senderState); 975 /* Retries do not create new packets. / 976* if (_packets.size() == 0) { 977 _packets.push_back( 978 isLoad() 979 ? Packet::createRead(request()) 980 : Packet::createWrite(request())); 981 _packets.back()->dataStatic(_inst->memData); 982 _packets.back()->senderState = _senderState; 983 } 984 assert(_packets.size() == 1); 985} 986 987template<class Impl> 988void 989LSQ<Impl>::SplitDataRequest::buildPackets() 990{ 991 /* Extra data?? / 992* ptrdiff_t offset = 0; 993 if (_packets.size() == 0) { 994 /* New stuff / 995* if (isLoad()) { 996 _mainPacket = Packet::createRead(mainReq); 997 _mainPacket->dataStatic(_inst->memData); 998 } 999 for (auto& r: _requests) { 1000 PacketPtr pkt = isLoad() ? Packet::createRead(r) 1001 : Packet::createWrite(r); 1002 if (isLoad()) { 1003 pkt->dataStatic(_inst->memData + offset); 1004 } else { 1005 uint8_t* req_data = new uint8_t[r->getSize()]; 1006 std::memcpy(req_data, 1007 _inst->memData + offset, 1008 r->getSize()); 1009 pkt->dataDynamic(req_data); 1010 } 1011 offset += r->getSize(); 1012 pkt->senderState = _senderState; 1013 _packets.push_back(pkt); 1014 } 1015 } 1016 assert(_packets.size() == _requests.size()); 1017} 1018 1019template<class Impl> 1020void 1021LSQ<Impl>::SingleDataRequest::sendPacketToCache() 1022{ 1023 assert(_numOutstandingPackets == 0); 1024 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) 1025 _numOutstandingPackets = 1; 1026} 1027 1028template<class Impl> 1029void 1030LSQ<Impl>::SplitDataRequest::sendPacketToCache() 1031{ 1032 /* Try to send the packets. / 1033* while (numReceivedPackets + _numOutstandingPackets < _packets.size() && 1034 lsqUnit()->trySendPacket(isLoad(), 1035 _packets.at(numReceivedPackets + _numOutstandingPackets))) { 1036 _numOutstandingPackets++; 1037 } 1038} 1039 1040template<class Impl> 1041void 1042LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext thread, 1043* PacketPtr pkt) 1044{ 1045 TheISA::handleIprWrite(thread, pkt); 1046} 1047 1048template<class Impl> 1049void 1050LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext thread, 1051* PacketPtr mainPkt) 1052{ 1053 unsigned offset = 0; 1054 for (auto r: _requests) { 1055 PacketPtr pkt = new Packet(r, MemCmd::WriteReq); 1056 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1057 TheISA::handleIprWrite(thread, pkt); 1058 offset += r->getSize(); 1059 delete pkt; 1060 } 1061} 1062 1063template<class Impl> 1064Cycles 1065LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext thread, 1066* PacketPtr pkt) 1067{ 1068 return TheISA::handleIprRead(thread, pkt); 1069} 1070 1071template<class Impl> 1072Cycles 1073LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext thread, 1074* PacketPtr mainPkt) 1075{ 1076 Cycles delay(0); 1077 unsigned offset = 0; 1078 1079 for (auto r: _requests) { 1080 PacketPtr pkt = new Packet(r, MemCmd::ReadReq); 1081 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1082 Cycles d = TheISA::handleIprRead(thread, pkt); 1083 if (d > delay) 1084 delay = d; 1085 offset += r->getSize(); 1086 delete pkt; 1087 } 1088 return delay; 1089} 1090 1091template<class Impl> 1092bool 1093LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1094{ 1095 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); 1096} 1097 1098template<class Impl> 1099bool 1100LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1101{ 1102 bool is_hit = false; 1103 for (auto &r: _requests) { 1104 if ((r->getPaddr() & blockMask) == blockAddr) { 1105 is_hit = true; 1106 break; 1107 } 1108 } 1109 return is_hit; 1110} 1111 1112#endif//__CPU_O3_LSQ_IMPL_HH__	226} 227 228template<class Impl> 229void 230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst) 231{ 232 ThreadID tid = load_inst->threadNumber; 233 234 thread[tid].insertLoad(load_inst); 235} 236 237template<class Impl> 238void 239LSQ<Impl>::insertStore(const DynInstPtr &store_inst) 240{ 241 ThreadID tid = store_inst->threadNumber; 242 243 thread[tid].insertStore(store_inst); 244} 245 246template<class Impl> 247Fault 248LSQ<Impl>::executeLoad(const DynInstPtr &inst) 249{ 250 ThreadID tid = inst->threadNumber; 251 252 return thread[tid].executeLoad(inst); 253} 254 255template<class Impl> 256Fault 257LSQ<Impl>::executeStore(const DynInstPtr &inst) 258{ 259 ThreadID tid = inst->threadNumber; 260 261 return thread[tid].executeStore(inst); 262} 263 264template<class Impl> 265void 266LSQ<Impl>::writebackStores() 267{ 268 list<ThreadID>::iterator threads = activeThreads->begin(); 269 list<ThreadID>::iterator end = activeThreads->end(); 270 271 while (threads != end) { 272 ThreadID tid = threads++; 273* 274 if (numStoresToWB(tid) > 0) { 275 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " 276 "available for Writeback.\n", tid, numStoresToWB(tid)); 277 } 278 279 thread[tid].writebackStores(); 280 } 281} 282 283template<class Impl> 284bool 285LSQ<Impl>::violation() 286{ 287 /* Answers: Does Anybody Have a Violation?/ 288* list<ThreadID>::iterator threads = activeThreads->begin(); 289 list<ThreadID>::iterator end = activeThreads->end(); 290 291 while (threads != end) { 292 ThreadID tid = threads++; 293* 294 if (thread[tid].violation()) 295 return true; 296 } 297 298 return false; 299} 300 301template <class Impl> 302void 303LSQ<Impl>::recvReqRetry() 304{ 305 iewStage->cacheUnblocked(); 306 cacheBlocked(false); 307 308 for (ThreadID tid : activeThreads) { 309* thread[tid].recvRetry(); 310 } 311} 312 313template <class Impl> 314void 315LSQ<Impl>::completeDataAccess(PacketPtr pkt) 316{ 317 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 318* thread[cpu->contextToThread(senderState->contextId())] 319 .completeDataAccess(pkt); 320} 321 322template <class Impl> 323bool 324LSQ<Impl>::recvTimingResp(PacketPtr pkt) 325{ 326 if (pkt->isError()) 327 DPRINTF(LSQ, "Got error packet back for address: %#X\n", 328 pkt->getAddr()); 329 330 auto senderState = dynamic_cast<LSQSenderState>(pkt->senderState); 331* panic_if(!senderState, "Got packet back with unknown sender state\n"); 332 333 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); 334 335 if (pkt->isInvalidate()) { 336 // This response also contains an invalidate; e.g. this can be the case 337 // if cmd is ReadRespWithInvalidate. 338 // 339 // The calling order between completeDataAccess and checkSnoop matters. 340 // By calling checkSnoop after completeDataAccess, we ensure that the 341 // fault set by checkSnoop is not lost. Calling writeback (more 342 // specifically inst->completeAcc) in completeDataAccess overwrites 343 // fault, and in case this instruction requires squashing (as 344 // determined by checkSnoop), the ReExec fault set by checkSnoop would 345 // be lost otherwise. 346 347 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", 348 pkt->getAddr()); 349 350 for (ThreadID tid = 0; tid < numThreads; tid++) { 351 thread[tid].checkSnoop(pkt); 352 } 353 } 354 // Update the LSQRequest state (this may delete the request) 355 senderState->request()->packetReplied(); 356 357 return true; 358} 359 360template <class Impl> 361void 362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt) 363{ 364 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), 365 pkt->cmdString()); 366 367 // must be a snoop 368 if (pkt->isInvalidate()) { 369 DPRINTF(LSQ, "received invalidation for addr:%#x\n", 370 pkt->getAddr()); 371 for (ThreadID tid = 0; tid < numThreads; tid++) { 372 thread[tid].checkSnoop(pkt); 373 } 374 } 375} 376 377template<class Impl> 378int 379LSQ<Impl>::getCount() 380{ 381 unsigned total = 0; 382 383 list<ThreadID>::iterator threads = activeThreads->begin(); 384 list<ThreadID>::iterator end = activeThreads->end(); 385 386 while (threads != end) { 387 ThreadID tid = threads++; 388* 389 total += getCount(tid); 390 } 391 392 return total; 393} 394 395template<class Impl> 396int 397LSQ<Impl>::numLoads() 398{ 399 unsigned total = 0; 400 401 list<ThreadID>::iterator threads = activeThreads->begin(); 402 list<ThreadID>::iterator end = activeThreads->end(); 403 404 while (threads != end) { 405 ThreadID tid = threads++; 406* 407 total += numLoads(tid); 408 } 409 410 return total; 411} 412 413template<class Impl> 414int 415LSQ<Impl>::numStores() 416{ 417 unsigned total = 0; 418 419 list<ThreadID>::iterator threads = activeThreads->begin(); 420 list<ThreadID>::iterator end = activeThreads->end(); 421 422 while (threads != end) { 423 ThreadID tid = threads++; 424* 425 total += thread[tid].numStores(); 426 } 427 428 return total; 429} 430 431template<class Impl> 432unsigned 433LSQ<Impl>::numFreeLoadEntries() 434{ 435 unsigned total = 0; 436 437 list<ThreadID>::iterator threads = activeThreads->begin(); 438 list<ThreadID>::iterator end = activeThreads->end(); 439 440 while (threads != end) { 441 ThreadID tid = threads++; 442* 443 total += thread[tid].numFreeLoadEntries(); 444 } 445 446 return total; 447} 448 449template<class Impl> 450unsigned 451LSQ<Impl>::numFreeStoreEntries() 452{ 453 unsigned total = 0; 454 455 list<ThreadID>::iterator threads = activeThreads->begin(); 456 list<ThreadID>::iterator end = activeThreads->end(); 457 458 while (threads != end) { 459 ThreadID tid = threads++; 460* 461 total += thread[tid].numFreeStoreEntries(); 462 } 463 464 return total; 465} 466 467template<class Impl> 468unsigned 469LSQ<Impl>::numFreeLoadEntries(ThreadID tid) 470{ 471 return thread[tid].numFreeLoadEntries(); 472} 473 474template<class Impl> 475unsigned 476LSQ<Impl>::numFreeStoreEntries(ThreadID tid) 477{ 478 return thread[tid].numFreeStoreEntries(); 479} 480 481template<class Impl> 482bool 483LSQ<Impl>::isFull() 484{ 485 list<ThreadID>::iterator threads = activeThreads->begin(); 486 list<ThreadID>::iterator end = activeThreads->end(); 487 488 while (threads != end) { 489 ThreadID tid = threads++; 490* 491 if (!(thread[tid].lqFull() \|\| thread[tid].sqFull())) 492 return false; 493 } 494 495 return true; 496} 497 498template<class Impl> 499bool 500LSQ<Impl>::isFull(ThreadID tid) 501{ 502 //@todo: Change to Calculate All Entries for 503 //Dynamic Policy 504 if (lsqPolicy == SMTQueuePolicy::Dynamic) 505 return isFull(); 506 else 507 return thread[tid].lqFull() \|\| thread[tid].sqFull(); 508} 509 510template<class Impl> 511bool 512LSQ<Impl>::isEmpty() const 513{ 514 return lqEmpty() && sqEmpty(); 515} 516 517template<class Impl> 518bool 519LSQ<Impl>::lqEmpty() const 520{ 521 list<ThreadID>::const_iterator threads = activeThreads->begin(); 522 list<ThreadID>::const_iterator end = activeThreads->end(); 523 524 while (threads != end) { 525 ThreadID tid = threads++; 526* 527 if (!thread[tid].lqEmpty()) 528 return false; 529 } 530 531 return true; 532} 533 534template<class Impl> 535bool 536LSQ<Impl>::sqEmpty() const 537{ 538 list<ThreadID>::const_iterator threads = activeThreads->begin(); 539 list<ThreadID>::const_iterator end = activeThreads->end(); 540 541 while (threads != end) { 542 ThreadID tid = threads++; 543* 544 if (!thread[tid].sqEmpty()) 545 return false; 546 } 547 548 return true; 549} 550 551template<class Impl> 552bool 553LSQ<Impl>::lqFull() 554{ 555 list<ThreadID>::iterator threads = activeThreads->begin(); 556 list<ThreadID>::iterator end = activeThreads->end(); 557 558 while (threads != end) { 559 ThreadID tid = threads++; 560* 561 if (!thread[tid].lqFull()) 562 return false; 563 } 564 565 return true; 566} 567 568template<class Impl> 569bool 570LSQ<Impl>::lqFull(ThreadID tid) 571{ 572 //@todo: Change to Calculate All Entries for 573 //Dynamic Policy 574 if (lsqPolicy == SMTQueuePolicy::Dynamic) 575 return lqFull(); 576 else 577 return thread[tid].lqFull(); 578} 579 580template<class Impl> 581bool 582LSQ<Impl>::sqFull() 583{ 584 list<ThreadID>::iterator threads = activeThreads->begin(); 585 list<ThreadID>::iterator end = activeThreads->end(); 586 587 while (threads != end) { 588 ThreadID tid = threads++; 589* 590 if (!sqFull(tid)) 591 return false; 592 } 593 594 return true; 595} 596 597template<class Impl> 598bool 599LSQ<Impl>::sqFull(ThreadID tid) 600{ 601 //@todo: Change to Calculate All Entries for 602 //Dynamic Policy 603 if (lsqPolicy == SMTQueuePolicy::Dynamic) 604 return sqFull(); 605 else 606 return thread[tid].sqFull(); 607} 608 609template<class Impl> 610bool 611LSQ<Impl>::isStalled() 612{ 613 list<ThreadID>::iterator threads = activeThreads->begin(); 614 list<ThreadID>::iterator end = activeThreads->end(); 615 616 while (threads != end) { 617 ThreadID tid = threads++; 618* 619 if (!thread[tid].isStalled()) 620 return false; 621 } 622 623 return true; 624} 625 626template<class Impl> 627bool 628LSQ<Impl>::isStalled(ThreadID tid) 629{ 630 if (lsqPolicy == SMTQueuePolicy::Dynamic) 631 return isStalled(); 632 else 633 return thread[tid].isStalled(); 634} 635 636template<class Impl> 637bool 638LSQ<Impl>::hasStoresToWB() 639{ 640 list<ThreadID>::iterator threads = activeThreads->begin(); 641 list<ThreadID>::iterator end = activeThreads->end(); 642 643 while (threads != end) { 644 ThreadID tid = threads++; 645* 646 if (hasStoresToWB(tid)) 647 return true; 648 } 649 650 return false; 651} 652 653template<class Impl> 654bool 655LSQ<Impl>::willWB() 656{ 657 list<ThreadID>::iterator threads = activeThreads->begin(); 658 list<ThreadID>::iterator end = activeThreads->end(); 659 660 while (threads != end) { 661 ThreadID tid = threads++; 662* 663 if (willWB(tid)) 664 return true; 665 } 666 667 return false; 668} 669 670template<class Impl> 671void 672LSQ<Impl>::dumpInsts() const 673{ 674 list<ThreadID>::const_iterator threads = activeThreads->begin(); 675 list<ThreadID>::const_iterator end = activeThreads->end(); 676 677 while (threads != end) { 678 ThreadID tid = threads++; 679* 680 thread[tid].dumpInsts(); 681 } 682} 683 684static Addr 685addrBlockOffset(Addr addr, unsigned int block_size) 686{ 687 return addr & (block_size - 1); 688} 689 690static Addr 691addrBlockAlign(Addr addr, uint64_t block_size) 692{ 693 return addr & ~(block_size - 1); 694} 695 696static bool 697transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size) 698{ 699 return (addrBlockOffset(addr, block_size) + size) > block_size; 700} 701 702template<class Impl> 703Fault 704LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t data, 705* unsigned int size, Addr addr, Request::Flags flags, 706 uint64_t res, AtomicOpFunctor amo_op) 707{ 708 // This comming request can be either load, store or atomic. 709 // Atomic request has a corresponding pointer to its atomic memory 710 // operation 711 bool isAtomic M5_VAR_USED = !isLoad && amo_op; 712 713 ThreadID tid = cpu->contextToThread(inst->contextId()); 714 auto cacheLineSize = cpu->cacheLineSize(); 715 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); 716 LSQRequest* req = nullptr; 717 718 // Atomic requests that access data across cache line boundary are 719 // currently not allowed since the cache does not guarantee corresponding 720 // atomic memory operations to be executed atomically across a cache line. 721 // For ISAs such as x86 that supports cross-cache-line atomic instructions, 722 // the cache needs to be modified to perform atomic update to both cache 723 // lines. For now, such cross-line update is not supported. 724 assert(!isAtomic \|\| (isAtomic && !needs_burst)); 725 726 if (inst->translationStarted()) { 727 req = inst->savedReq; 728 assert(req); 729 } else { 730 if (needs_burst) { 731 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, 732 size, flags, data, res); 733 } else { 734 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, 735 size, flags, data, res, amo_op); 736 } 737 assert(req); 738 inst->setRequest(); 739 req->taskId(cpu->taskId()); 740 741 req->initiateTranslation(); 742 } 743 744 /* This is the place were instructions get the effAddr. / 745* if (req->isTranslationComplete()) { 746 if (inst->getFault() == NoFault) { 747 inst->effAddr = req->getVaddr(); 748 inst->effSize = size; 749 inst->effAddrValid(true); 750 751 if (cpu->checker) { 752 inst->reqToVerify = std::make_shared<Request>(req->request()); 753* } 754 if (isLoad) 755 inst->getFault() = cpu->read(req, inst->lqIdx); 756 else 757 inst->getFault() = cpu->write(req, data, inst->sqIdx); 758 } else if (isLoad) { 759 // Commit will have to clean up whatever happened. Set this 760 // instruction as executed. 761 inst->setExecuted(); 762 } 763 } 764 765 if (inst->traceData) 766 inst->traceData->setMem(addr, size, flags); 767 768 return inst->getFault(); 769} 770 771template<class Impl> 772void 773LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, 774 ThreadContext* tc, BaseTLB::Mode mode) 775{ 776 _fault.push_back(fault); 777 numInTranslationFragments = 0; 778 numTranslatedFragments = 1; 779 /* If the instruction has been squahsed, let the request know 780 * as it may have to self-destruct. / 781* if (_inst->isSquashed()) { 782 this->squashTranslation(); 783 } else { 784 _inst->strictlyOrdered(req->isStrictlyOrdered()); 785 786 flags.set(Flag::TranslationFinished); 787 if (fault == NoFault) { 788 _inst->physEffAddr = req->getPaddr(); 789 _inst->memReqFlags = req->getFlags(); 790 if (req->isCondSwap()) { 791 assert(_res); 792 req->setExtraData(_res); 793* } 794 setState(State::Request); 795 } else { 796 setState(State::Fault); 797 } 798 799 LSQRequest::_inst->fault = fault; 800 LSQRequest::_inst->translationCompleted(true); 801 } 802} 803 804template<class Impl> 805void 806LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, 807 ThreadContext* tc, BaseTLB::Mode mode) 808{ 809 _fault.push_back(fault); 810 assert(req == _requests[numTranslatedFragments] \|\| this->isDelayed()); 811 812 numInTranslationFragments--; 813 numTranslatedFragments++; 814 815 mainReq->setFlags(req->getFlags()); 816 817 if (numTranslatedFragments == _requests.size()) { 818 if (_inst->isSquashed()) { 819 this->squashTranslation(); 820 } else { 821 _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); 822 flags.set(Flag::TranslationFinished); 823 auto fault_it = _fault.begin(); 824 /* Ffwd to the first NoFault. / 825* while (fault_it != _fault.end() && fault_it == NoFault) 826* fault_it++; 827 /* If none of the fragments faulted: / 828* if (fault_it == _fault.end()) { 829 _inst->physEffAddr = request(0)->getPaddr(); 830 831 _inst->memReqFlags = mainReq->getFlags(); 832 if (mainReq->isCondSwap()) { 833 assert(_res); 834 mainReq->setExtraData(_res); 835* } 836 setState(State::Request); 837 _inst->fault = NoFault; 838 } else { 839 setState(State::Fault); 840 _inst->fault = fault_it; 841* } 842 _inst->translationCompleted(true); 843 } 844 } 845} 846 847template<class Impl> 848void 849LSQ<Impl>::SingleDataRequest::initiateTranslation() 850{ 851 _inst->translationStarted(true); 852 setState(State::Translation); 853 flags.set(Flag::TranslationStarted); 854 855 _inst->savedReq = this; 856 sendFragmentToTranslation(0); 857 858 if (isTranslationComplete()) { 859 } 860} 861 862template<class Impl> 863PacketPtr 864LSQ<Impl>::SplitDataRequest::mainPacket() 865{ 866 return _mainPacket; 867} 868 869template<class Impl> 870RequestPtr 871LSQ<Impl>::SplitDataRequest::mainRequest() 872{ 873 return mainReq; 874} 875 876template<class Impl> 877void 878LSQ<Impl>::SplitDataRequest::initiateTranslation() 879{ 880 _inst->translationStarted(true); 881 setState(State::Translation); 882 flags.set(Flag::TranslationStarted); 883 884 unsigned int cacheLineSize = _port.cacheLineSize(); 885 Addr base_addr = _addr; 886 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); 887 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); 888 uint32_t size_so_far = 0; 889 890 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr, 891 _size, _flags, _inst->masterId(), 892 _inst->instAddr(), _inst->contextId()); 893 894 // Paddr is not used in mainReq. However, we will accumulate the flags 895 // from the sub requests into mainReq by calling setFlags() in finish(). 896 // setFlags() assumes that paddr is set so flip the paddr valid bit here to 897 // avoid a potential assert in setFlags() when we call it from finish(). 898 mainReq->setPaddr(0); 899 900 /* Get the pre-fix, possibly unaligned. / 901* _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr, 902 next_addr - base_addr, _flags, _inst->masterId(), 903 _inst->instAddr(), _inst->contextId())); 904 size_so_far = next_addr - base_addr; 905 906 /* We are block aligned now, reading whole blocks. / 907* base_addr = next_addr; 908 while (base_addr != final_addr) { 909 _requests.push_back(std::make_shared<Request>(_inst->getASID(), 910 base_addr, cacheLineSize, _flags, _inst->masterId(), 911 _inst->instAddr(), _inst->contextId())); 912 size_so_far += cacheLineSize; 913 base_addr += cacheLineSize; 914 } 915 916 /* Deal with the tail. / 917* if (size_so_far < _size) { 918 _requests.push_back(std::make_shared<Request>(_inst->getASID(), 919 base_addr, _size - size_so_far, _flags, _inst->masterId(), 920 _inst->instAddr(), _inst->contextId())); 921 } 922 923 /* Setup the requests and send them to translation. / 924* for (auto& r: _requests) { 925 r->setReqInstSeqNum(_inst->seqNum); 926 r->taskId(_taskId); 927 } 928 this->_inst->savedReq = this; 929 numInTranslationFragments = 0; 930 numTranslatedFragments = 0; 931 932 for (uint32_t i = 0; i < _requests.size(); i++) { 933 sendFragmentToTranslation(i); 934 } 935} 936 937template<class Impl> 938void 939LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i) 940{ 941 numInTranslationFragments++; 942 _port.dTLB()->translateTiming( 943 this->request(i), 944 this->_inst->thread->getTC(), this, 945 this->isLoad() ? BaseTLB::Read : BaseTLB::Write); 946} 947 948template<class Impl> 949bool 950LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt) 951{ 952 assert(_numOutstandingPackets == 1); 953 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 954* setState(State::Complete); 955 flags.set(Flag::Complete); 956 state->outstanding--; 957 assert(pkt == _packets.front()); 958 _port.completeDataAccess(pkt); 959 return true; 960} 961 962template<class Impl> 963bool 964LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt) 965{ 966 auto state = dynamic_cast<LSQSenderState>(pkt->senderState); 967* uint32_t pktIdx = 0; 968 while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) 969 pktIdx++; 970 assert(pktIdx < _packets.size()); 971 assert(pkt->req == _requests[pktIdx]); 972 assert(pkt == _packets[pktIdx]); 973 numReceivedPackets++; 974 state->outstanding--; 975 if (numReceivedPackets == _packets.size()) { 976 setState(State::Complete); 977 flags.set(Flag::Complete); 978 /* Assemble packets. / 979* PacketPtr resp = isLoad() 980 ? Packet::createRead(mainReq) 981 : Packet::createWrite(mainReq); 982 if (isLoad()) 983 resp->dataStatic(_inst->memData); 984 else 985 resp->dataStatic(_data); 986 resp->senderState = _senderState; 987 _port.completeDataAccess(resp); 988 delete resp; 989 } 990 return true; 991} 992 993template<class Impl> 994void 995LSQ<Impl>::SingleDataRequest::buildPackets() 996{ 997 assert(_senderState); 998 /* Retries do not create new packets. / 999* if (_packets.size() == 0) { 1000 _packets.push_back( 1001 isLoad() 1002 ? Packet::createRead(request()) 1003 : Packet::createWrite(request())); 1004 _packets.back()->dataStatic(_inst->memData); 1005 _packets.back()->senderState = _senderState; 1006 } 1007 assert(_packets.size() == 1); 1008} 1009 1010template<class Impl> 1011void 1012LSQ<Impl>::SplitDataRequest::buildPackets() 1013{ 1014 /* Extra data?? / 1015* ptrdiff_t offset = 0; 1016 if (_packets.size() == 0) { 1017 /* New stuff / 1018* if (isLoad()) { 1019 _mainPacket = Packet::createRead(mainReq); 1020 _mainPacket->dataStatic(_inst->memData); 1021 } 1022 for (auto& r: _requests) { 1023 PacketPtr pkt = isLoad() ? Packet::createRead(r) 1024 : Packet::createWrite(r); 1025 if (isLoad()) { 1026 pkt->dataStatic(_inst->memData + offset); 1027 } else { 1028 uint8_t* req_data = new uint8_t[r->getSize()]; 1029 std::memcpy(req_data, 1030 _inst->memData + offset, 1031 r->getSize()); 1032 pkt->dataDynamic(req_data); 1033 } 1034 offset += r->getSize(); 1035 pkt->senderState = _senderState; 1036 _packets.push_back(pkt); 1037 } 1038 } 1039 assert(_packets.size() == _requests.size()); 1040} 1041 1042template<class Impl> 1043void 1044LSQ<Impl>::SingleDataRequest::sendPacketToCache() 1045{ 1046 assert(_numOutstandingPackets == 0); 1047 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) 1048 _numOutstandingPackets = 1; 1049} 1050 1051template<class Impl> 1052void 1053LSQ<Impl>::SplitDataRequest::sendPacketToCache() 1054{ 1055 /* Try to send the packets. / 1056* while (numReceivedPackets + _numOutstandingPackets < _packets.size() && 1057 lsqUnit()->trySendPacket(isLoad(), 1058 _packets.at(numReceivedPackets + _numOutstandingPackets))) { 1059 _numOutstandingPackets++; 1060 } 1061} 1062 1063template<class Impl> 1064void 1065LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext thread, 1066* PacketPtr pkt) 1067{ 1068 TheISA::handleIprWrite(thread, pkt); 1069} 1070 1071template<class Impl> 1072void 1073LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext thread, 1074* PacketPtr mainPkt) 1075{ 1076 unsigned offset = 0; 1077 for (auto r: _requests) { 1078 PacketPtr pkt = new Packet(r, MemCmd::WriteReq); 1079 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1080 TheISA::handleIprWrite(thread, pkt); 1081 offset += r->getSize(); 1082 delete pkt; 1083 } 1084} 1085 1086template<class Impl> 1087Cycles 1088LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext thread, 1089* PacketPtr pkt) 1090{ 1091 return TheISA::handleIprRead(thread, pkt); 1092} 1093 1094template<class Impl> 1095Cycles 1096LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext thread, 1097* PacketPtr mainPkt) 1098{ 1099 Cycles delay(0); 1100 unsigned offset = 0; 1101 1102 for (auto r: _requests) { 1103 PacketPtr pkt = new Packet(r, MemCmd::ReadReq); 1104 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset); 1105 Cycles d = TheISA::handleIprRead(thread, pkt); 1106 if (d > delay) 1107 delay = d; 1108 offset += r->getSize(); 1109 delete pkt; 1110 } 1111 return delay; 1112} 1113 1114template<class Impl> 1115bool 1116LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1117{ 1118 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); 1119} 1120 1121template<class Impl> 1122bool 1123LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) 1124{ 1125 bool is_hit = false; 1126 for (auto &r: _requests) { 1127 if ((r->getPaddr() & blockMask) == blockAddr) { 1128 is_hit = true; 1129 break; 1130 } 1131 } 1132 return is_hit; 1133} 1134 1135#endif//__CPU_O3_LSQ_IMPL_HH__