cpu.cc revision 13910:d5deee7b4279
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template<class Impl> 94bool 95FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) 96{ 97 DPRINTF(O3CPU, "Fetch unit received timing\n"); 98 // We shouldn't ever get a cacheable block in Modified state 99 assert(pkt->req->isUncacheable() || 100 !(pkt->cacheResponding() && !pkt->hasSharers())); 101 fetch->processCacheCompletion(pkt); 102 103 return true; 104} 105 106template<class Impl> 107void 108FullO3CPU<Impl>::IcachePort::recvReqRetry() 109{ 110 fetch->recvReqRetry(); 111} 112 113template <class Impl> 114bool 115FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 116{ 117 return lsq->recvTimingResp(pkt); 118} 119 120template <class Impl> 121void 122FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 123{ 124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 126 cpu->wakeup(tid); 127 } 128 } 129 lsq->recvTimingSnoopReq(pkt); 130} 131 132template <class Impl> 133void 134FullO3CPU<Impl>::DcachePort::recvReqRetry() 135{ 136 lsq->recvReqRetry(); 137} 138 139template <class Impl> 140FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 141 : BaseO3CPU(params), 142 itb(params->itb), 143 dtb(params->dtb), 144 tickEvent([this]{ tick(); }, "FullO3CPU tick", 145 false, Event::CPU_Tick_Pri), 146 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 147 false, Event::CPU_Exit_Pri), 148#ifndef NDEBUG 149 instcount(0), 150#endif 151 removeInstsThisCycle(false), 152 fetch(this, params), 153 decode(this, params), 154 rename(this, params), 155 iew(this, params), 156 commit(this, params), 157 158 /* It is mandatory that all SMT threads use the same renaming mode as 159 * they are sharing registers and rename */ 160 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 161 regFile(params->numPhysIntRegs, 162 params->numPhysFloatRegs, 163 params->numPhysVecRegs, 164 params->numPhysVecPredRegs, 165 params->numPhysCCRegs, 166 vecMode), 167 168 freeList(name() + ".freelist", ®File), 169 170 rob(this, params), 171 172 scoreboard(name() + ".scoreboard", 173 regFile.totalNumPhysRegs()), 174 175 isa(numThreads, NULL), 176 177 icachePort(&fetch, this), 178 dcachePort(&iew.ldstQueue, this), 179 180 timeBuffer(params->backComSize, params->forwardComSize), 181 fetchQueue(params->backComSize, params->forwardComSize), 182 decodeQueue(params->backComSize, params->forwardComSize), 183 renameQueue(params->backComSize, params->forwardComSize), 184 iewQueue(params->backComSize, params->forwardComSize), 185 activityRec(name(), NumStages, 186 params->backComSize + params->forwardComSize, 187 params->activity), 188 189 globalSeqNum(1), 190 system(params->system), 191 lastRunningCycle(curCycle()) 192{ 193 if (!params->switched_out) { 194 _status = Running; 195 } else { 196 _status = SwitchedOut; 197 } 198 199 if (params->checker) { 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 202 checker->setIcachePort(&icachePort); 203 checker->setSystem(params->system); 204 } else { 205 checker = NULL; 206 } 207 208 if (!FullSystem) { 209 thread.resize(numThreads); 210 tids.resize(numThreads); 211 } 212 213 // The stages also need their CPU pointer setup. However this 214 // must be done at the upper level CPU because they have pointers 215 // to the upper level CPU, and not this FullO3CPU. 216 217 // Set up Pointers to the activeThreads list for each stage 218 fetch.setActiveThreads(&activeThreads); 219 decode.setActiveThreads(&activeThreads); 220 rename.setActiveThreads(&activeThreads); 221 iew.setActiveThreads(&activeThreads); 222 commit.setActiveThreads(&activeThreads); 223 224 // Give each of the stages the time buffer they will use. 225 fetch.setTimeBuffer(&timeBuffer); 226 decode.setTimeBuffer(&timeBuffer); 227 rename.setTimeBuffer(&timeBuffer); 228 iew.setTimeBuffer(&timeBuffer); 229 commit.setTimeBuffer(&timeBuffer); 230 231 // Also setup each of the stages' queues. 232 fetch.setFetchQueue(&fetchQueue); 233 decode.setFetchQueue(&fetchQueue); 234 commit.setFetchQueue(&fetchQueue); 235 decode.setDecodeQueue(&decodeQueue); 236 rename.setDecodeQueue(&decodeQueue); 237 rename.setRenameQueue(&renameQueue); 238 iew.setRenameQueue(&renameQueue); 239 iew.setIEWQueue(&iewQueue); 240 commit.setIEWQueue(&iewQueue); 241 commit.setRenameQueue(&renameQueue); 242 243 commit.setIEWStage(&iew); 244 rename.setIEWStage(&iew); 245 rename.setCommitStage(&commit); 246 247 ThreadID active_threads; 248 if (FullSystem) { 249 active_threads = 1; 250 } else { 251 active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads' " 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 256 "or edit your workload size."); 257 } 258 } 259 260 //Make Sure That this a Valid Architeture 261 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 262 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 263 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 264 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 265 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 266 267 rename.setScoreboard(&scoreboard); 268 iew.setScoreboard(&scoreboard); 269 270 // Setup the rename map for whichever stages need it. 271 for (ThreadID tid = 0; tid < numThreads; tid++) { 272 isa[tid] = params->isa[tid]; 273 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 274 275 // Only Alpha has an FP zero register, so for other ISAs we 276 // use an invalid FP register index to avoid special treatment 277 // of any valid FP reg. 278 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 279 RegIndex fpZeroReg = 280 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 281 282 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 283 &freeList, 284 vecMode); 285 286 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 287 &freeList, vecMode); 288 } 289 290 // Initialize rename map to assign physical registers to the 291 // architectural registers for active threads only. 292 for (ThreadID tid = 0; tid < active_threads; tid++) { 293 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 294 // Note that we can't use the rename() method because we don't 295 // want special treatment for the zero register at this point 296 PhysRegIdPtr phys_reg = freeList.getIntReg(); 297 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 298 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 299 } 300 301 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 302 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 303 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 304 commitRenameMap[tid].setEntry( 305 RegId(FloatRegClass, ridx), phys_reg); 306 } 307 308 /* Here we need two 'interfaces' the 'whole register' and the 309 * 'register element'. At any point only one of them will be 310 * active. */ 311 if (vecMode == Enums::Full) { 312 /* Initialize the full-vector interface */ 313 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 314 RegId rid = RegId(VecRegClass, ridx); 315 PhysRegIdPtr phys_reg = freeList.getVecReg(); 316 renameMap[tid].setEntry(rid, phys_reg); 317 commitRenameMap[tid].setEntry(rid, phys_reg); 318 } 319 } else { 320 /* Initialize the vector-element interface */ 321 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 322 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 323 ++ldx) { 324 RegId lrid = RegId(VecElemClass, ridx, ldx); 325 PhysRegIdPtr phys_elem = freeList.getVecElem(); 326 renameMap[tid].setEntry(lrid, phys_elem); 327 commitRenameMap[tid].setEntry(lrid, phys_elem); 328 } 329 } 330 } 331 332 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 333 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 334 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 335 commitRenameMap[tid].setEntry( 336 RegId(VecPredRegClass, ridx), phys_reg); 337 } 338 339 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 340 PhysRegIdPtr phys_reg = freeList.getCCReg(); 341 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 342 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 343 } 344 } 345 346 rename.setRenameMap(renameMap); 347 commit.setRenameMap(commitRenameMap); 348 rename.setFreeList(&freeList); 349 350 // Setup the ROB for whichever stages need it. 351 commit.setROB(&rob); 352 353 lastActivatedCycle = 0; 354#if 0 355 // Give renameMap & rename stage access to the freeList; 356 for (ThreadID tid = 0; tid < numThreads; tid++) 357 globalSeqNum[tid] = 1; 358#endif 359 360 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 361 362 // Setup any thread state. 363 this->thread.resize(this->numThreads); 364 365 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 366 if (FullSystem) { 367 // SMT is not supported in FS mode yet. 368 assert(this->numThreads == 1); 369 this->thread[tid] = new Thread(this, 0, NULL); 370 } else { 371 if (tid < params->workload.size()) { 372 DPRINTF(O3CPU, "Workload[%i] process is %#x", 373 tid, this->thread[tid]); 374 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 375 (typename Impl::O3CPU *)(this), 376 tid, params->workload[tid]); 377 378 //usedTids[tid] = true; 379 //threadMap[tid] = tid; 380 } else { 381 //Allocate Empty thread so M5 can use later 382 //when scheduling threads to CPU 383 Process* dummy_proc = NULL; 384 385 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 386 (typename Impl::O3CPU *)(this), 387 tid, dummy_proc); 388 //usedTids[tid] = false; 389 } 390 } 391 392 ThreadContext *tc; 393 394 // Setup the TC that will serve as the interface to the threads/CPU. 395 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 396 397 tc = o3_tc; 398 399 // If we're using a checker, then the TC should be the 400 // CheckerThreadContext. 401 if (params->checker) { 402 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 403 o3_tc, this->checker); 404 } 405 406 o3_tc->cpu = (typename Impl::O3CPU *)(this); 407 assert(o3_tc->cpu); 408 o3_tc->thread = this->thread[tid]; 409 410 // Setup quiesce event. 411 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 412 413 // Give the thread the TC. 414 this->thread[tid]->tc = tc; 415 416 // Add the TC to the CPU's list of TC's. 417 this->threadContexts.push_back(tc); 418 } 419 420 // FullO3CPU always requires an interrupt controller. 421 if (!params->switched_out && interrupts.empty()) { 422 fatal("FullO3CPU %s has no interrupt controller.\n" 423 "Ensure createInterruptController() is called.\n", name()); 424 } 425 426 for (ThreadID tid = 0; tid < this->numThreads; tid++) 427 this->thread[tid]->setFuncExeInst(0); 428} 429 430template <class Impl> 431FullO3CPU<Impl>::~FullO3CPU() 432{ 433} 434 435template <class Impl> 436void 437FullO3CPU<Impl>::regProbePoints() 438{ 439 BaseCPU::regProbePoints(); 440 441 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 442 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 443 444 fetch.regProbePoints(); 445 rename.regProbePoints(); 446 iew.regProbePoints(); 447 commit.regProbePoints(); 448} 449 450template <class Impl> 451void 452FullO3CPU<Impl>::regStats() 453{ 454 BaseO3CPU::regStats(); 455 456 // Register any of the O3CPU's stats here. 457 timesIdled 458 .name(name() + ".timesIdled") 459 .desc("Number of times that the entire CPU went into an idle state and" 460 " unscheduled itself") 461 .prereq(timesIdled); 462 463 idleCycles 464 .name(name() + ".idleCycles") 465 .desc("Total number of cycles that the CPU has spent unscheduled due " 466 "to idling") 467 .prereq(idleCycles); 468 469 quiesceCycles 470 .name(name() + ".quiesceCycles") 471 .desc("Total number of cycles that CPU has spent quiesced or waiting " 472 "for an interrupt") 473 .prereq(quiesceCycles); 474 475 // Number of Instructions simulated 476 // -------------------------------- 477 // Should probably be in Base CPU but need templated 478 // MaxThreads so put in here instead 479 committedInsts 480 .init(numThreads) 481 .name(name() + ".committedInsts") 482 .desc("Number of Instructions Simulated") 483 .flags(Stats::total); 484 485 committedOps 486 .init(numThreads) 487 .name(name() + ".committedOps") 488 .desc("Number of Ops (including micro ops) Simulated") 489 .flags(Stats::total); 490 491 cpi 492 .name(name() + ".cpi") 493 .desc("CPI: Cycles Per Instruction") 494 .precision(6); 495 cpi = numCycles / committedInsts; 496 497 totalCpi 498 .name(name() + ".cpi_total") 499 .desc("CPI: Total CPI of All Threads") 500 .precision(6); 501 totalCpi = numCycles / sum(committedInsts); 502 503 ipc 504 .name(name() + ".ipc") 505 .desc("IPC: Instructions Per Cycle") 506 .precision(6); 507 ipc = committedInsts / numCycles; 508 509 totalIpc 510 .name(name() + ".ipc_total") 511 .desc("IPC: Total IPC of All Threads") 512 .precision(6); 513 totalIpc = sum(committedInsts) / numCycles; 514 515 this->fetch.regStats(); 516 this->decode.regStats(); 517 this->rename.regStats(); 518 this->iew.regStats(); 519 this->commit.regStats(); 520 this->rob.regStats(); 521 522 intRegfileReads 523 .name(name() + ".int_regfile_reads") 524 .desc("number of integer regfile reads") 525 .prereq(intRegfileReads); 526 527 intRegfileWrites 528 .name(name() + ".int_regfile_writes") 529 .desc("number of integer regfile writes") 530 .prereq(intRegfileWrites); 531 532 fpRegfileReads 533 .name(name() + ".fp_regfile_reads") 534 .desc("number of floating regfile reads") 535 .prereq(fpRegfileReads); 536 537 fpRegfileWrites 538 .name(name() + ".fp_regfile_writes") 539 .desc("number of floating regfile writes") 540 .prereq(fpRegfileWrites); 541 542 vecRegfileReads 543 .name(name() + ".vec_regfile_reads") 544 .desc("number of vector regfile reads") 545 .prereq(vecRegfileReads); 546 547 vecRegfileWrites 548 .name(name() + ".vec_regfile_writes") 549 .desc("number of vector regfile writes") 550 .prereq(vecRegfileWrites); 551 552 vecPredRegfileReads 553 .name(name() + ".pred_regfile_reads") 554 .desc("number of predicate regfile reads") 555 .prereq(vecPredRegfileReads); 556 557 vecPredRegfileWrites 558 .name(name() + ".pred_regfile_writes") 559 .desc("number of predicate regfile writes") 560 .prereq(vecPredRegfileWrites); 561 562 ccRegfileReads 563 .name(name() + ".cc_regfile_reads") 564 .desc("number of cc regfile reads") 565 .prereq(ccRegfileReads); 566 567 ccRegfileWrites 568 .name(name() + ".cc_regfile_writes") 569 .desc("number of cc regfile writes") 570 .prereq(ccRegfileWrites); 571 572 miscRegfileReads 573 .name(name() + ".misc_regfile_reads") 574 .desc("number of misc regfile reads") 575 .prereq(miscRegfileReads); 576 577 miscRegfileWrites 578 .name(name() + ".misc_regfile_writes") 579 .desc("number of misc regfile writes") 580 .prereq(miscRegfileWrites); 581} 582 583template <class Impl> 584void 585FullO3CPU<Impl>::tick() 586{ 587 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 588 assert(!switchedOut()); 589 assert(drainState() != DrainState::Drained); 590 591 ++numCycles; 592 updateCycleCounters(BaseCPU::CPU_STATE_ON); 593 594// activity = false; 595 596 //Tick each of the stages 597 fetch.tick(); 598 599 decode.tick(); 600 601 rename.tick(); 602 603 iew.tick(); 604 605 commit.tick(); 606 607 // Now advance the time buffers 608 timeBuffer.advance(); 609 610 fetchQueue.advance(); 611 decodeQueue.advance(); 612 renameQueue.advance(); 613 iewQueue.advance(); 614 615 activityRec.advance(); 616 617 if (removeInstsThisCycle) { 618 cleanUpRemovedInsts(); 619 } 620 621 if (!tickEvent.scheduled()) { 622 if (_status == SwitchedOut) { 623 DPRINTF(O3CPU, "Switched out!\n"); 624 // increment stat 625 lastRunningCycle = curCycle(); 626 } else if (!activityRec.active() || _status == Idle) { 627 DPRINTF(O3CPU, "Idle!\n"); 628 lastRunningCycle = curCycle(); 629 timesIdled++; 630 } else { 631 schedule(tickEvent, clockEdge(Cycles(1))); 632 DPRINTF(O3CPU, "Scheduling next tick!\n"); 633 } 634 } 635 636 if (!FullSystem) 637 updateThreadPriority(); 638 639 tryDrain(); 640} 641 642template <class Impl> 643void 644FullO3CPU<Impl>::init() 645{ 646 BaseCPU::init(); 647 648 for (ThreadID tid = 0; tid < numThreads; ++tid) { 649 // Set noSquashFromTC so that the CPU doesn't squash when initially 650 // setting up registers. 651 thread[tid]->noSquashFromTC = true; 652 // Initialise the ThreadContext's memory proxies 653 thread[tid]->initMemProxies(thread[tid]->getTC()); 654 } 655 656 if (FullSystem && !params()->switched_out) { 657 for (ThreadID tid = 0; tid < numThreads; tid++) { 658 ThreadContext *src_tc = threadContexts[tid]; 659 TheISA::initCPU(src_tc, src_tc->contextId()); 660 } 661 } 662 663 // Clear noSquashFromTC. 664 for (int tid = 0; tid < numThreads; ++tid) 665 thread[tid]->noSquashFromTC = false; 666 667 commit.setThreads(thread); 668} 669 670template <class Impl> 671void 672FullO3CPU<Impl>::startup() 673{ 674 BaseCPU::startup(); 675 for (int tid = 0; tid < numThreads; ++tid) 676 isa[tid]->startup(threadContexts[tid]); 677 678 fetch.startupStage(); 679 decode.startupStage(); 680 iew.startupStage(); 681 rename.startupStage(); 682 commit.startupStage(); 683} 684 685template <class Impl> 686void 687FullO3CPU<Impl>::activateThread(ThreadID tid) 688{ 689 list<ThreadID>::iterator isActive = 690 std::find(activeThreads.begin(), activeThreads.end(), tid); 691 692 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid); 693 assert(!switchedOut()); 694 695 if (isActive == activeThreads.end()) { 696 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", 697 tid); 698 699 activeThreads.push_back(tid); 700 } 701} 702 703template <class Impl> 704void 705FullO3CPU<Impl>::deactivateThread(ThreadID tid) 706{ 707 //Remove From Active List, if Active 708 list<ThreadID>::iterator thread_it = 709 std::find(activeThreads.begin(), activeThreads.end(), tid); 710 711 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid); 712 assert(!switchedOut()); 713 714 if (thread_it != activeThreads.end()) { 715 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n", 716 tid); 717 activeThreads.erase(thread_it); 718 } 719 720 fetch.deactivateThread(tid); 721 commit.deactivateThread(tid); 722} 723 724template <class Impl> 725Counter 726FullO3CPU<Impl>::totalInsts() const 727{ 728 Counter total(0); 729 730 ThreadID size = thread.size(); 731 for (ThreadID i = 0; i < size; i++) 732 total += thread[i]->numInst; 733 734 return total; 735} 736 737template <class Impl> 738Counter 739FullO3CPU<Impl>::totalOps() const 740{ 741 Counter total(0); 742 743 ThreadID size = thread.size(); 744 for (ThreadID i = 0; i < size; i++) 745 total += thread[i]->numOp; 746 747 return total; 748} 749 750template <class Impl> 751void 752FullO3CPU<Impl>::activateContext(ThreadID tid) 753{ 754 assert(!switchedOut()); 755 756 // Needs to set each stage to running as well. 757 activateThread(tid); 758 759 // We don't want to wake the CPU if it is drained. In that case, 760 // we just want to flag the thread as active and schedule the tick 761 // event from drainResume() instead. 762 if (drainState() == DrainState::Drained) 763 return; 764 765 // If we are time 0 or if the last activation time is in the past, 766 // schedule the next tick and wake up the fetch unit 767 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 768 scheduleTickEvent(Cycles(0)); 769 770 // Be sure to signal that there's some activity so the CPU doesn't 771 // deschedule itself. 772 activityRec.activity(); 773 fetch.wakeFromQuiesce(); 774 775 Cycles cycles(curCycle() - lastRunningCycle); 776 // @todo: This is an oddity that is only here to match the stats 777 if (cycles != 0) 778 --cycles; 779 quiesceCycles += cycles; 780 781 lastActivatedCycle = curTick(); 782 783 _status = Running; 784 785 BaseCPU::activateContext(tid); 786 } 787} 788 789template <class Impl> 790void 791FullO3CPU<Impl>::suspendContext(ThreadID tid) 792{ 793 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid); 794 assert(!switchedOut()); 795 796 deactivateThread(tid); 797 798 // If this was the last thread then unschedule the tick event. 799 if (activeThreads.size() == 0) { 800 unscheduleTickEvent(); 801 lastRunningCycle = curCycle(); 802 _status = Idle; 803 } 804 805 DPRINTF(Quiesce, "Suspending Context\n"); 806 807 BaseCPU::suspendContext(tid); 808} 809 810template <class Impl> 811void 812FullO3CPU<Impl>::haltContext(ThreadID tid) 813{ 814 //For now, this is the same as deallocate 815 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid); 816 assert(!switchedOut()); 817 818 deactivateThread(tid); 819 removeThread(tid); 820 821 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 822} 823 824template <class Impl> 825void 826FullO3CPU<Impl>::insertThread(ThreadID tid) 827{ 828 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 829 // Will change now that the PC and thread state is internal to the CPU 830 // and not in the ThreadContext. 831 ThreadContext *src_tc; 832 if (FullSystem) 833 src_tc = system->threadContexts[tid]; 834 else 835 src_tc = tcBase(tid); 836 837 //Bind Int Regs to Rename Map 838 839 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 840 reg_id.index()++) { 841 PhysRegIdPtr phys_reg = freeList.getIntReg(); 842 renameMap[tid].setEntry(reg_id, phys_reg); 843 scoreboard.setReg(phys_reg); 844 } 845 846 //Bind Float Regs to Rename Map 847 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 848 reg_id.index()++) { 849 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 850 renameMap[tid].setEntry(reg_id, phys_reg); 851 scoreboard.setReg(phys_reg); 852 } 853 854 //Bind condition-code Regs to Rename Map 855 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 856 reg_id.index()++) { 857 PhysRegIdPtr phys_reg = freeList.getCCReg(); 858 renameMap[tid].setEntry(reg_id, phys_reg); 859 scoreboard.setReg(phys_reg); 860 } 861 862 //Copy Thread Data Into RegFile 863 //this->copyFromTC(tid); 864 865 //Set PC/NPC/NNPC 866 pcState(src_tc->pcState(), tid); 867 868 src_tc->setStatus(ThreadContext::Active); 869 870 activateContext(tid); 871 872 //Reset ROB/IQ/LSQ Entries 873 commit.rob->resetEntries(); 874} 875 876template <class Impl> 877void 878FullO3CPU<Impl>::removeThread(ThreadID tid) 879{ 880 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 881 882 // Copy Thread Data From RegFile 883 // If thread is suspended, it might be re-allocated 884 // this->copyToTC(tid); 885 886 887 // @todo: 2-27-2008: Fix how we free up rename mappings 888 // here to alleviate the case for double-freeing registers 889 // in SMT workloads. 890 891 // clear all thread-specific states in each stage of the pipeline 892 // since this thread is going to be completely removed from the CPU 893 commit.clearStates(tid); 894 fetch.clearStates(tid); 895 decode.clearStates(tid); 896 rename.clearStates(tid); 897 iew.clearStates(tid); 898 899 // at this step, all instructions in the pipeline should be already 900 // either committed successfully or squashed. All thread-specific 901 // queues in the pipeline must be empty. 902 assert(iew.instQueue.getCount(tid) == 0); 903 assert(iew.ldstQueue.getCount(tid) == 0); 904 assert(commit.rob->isEmpty(tid)); 905 906 // Reset ROB/IQ/LSQ Entries 907 908 // Commented out for now. This should be possible to do by 909 // telling all the pipeline stages to drain first, and then 910 // checking until the drain completes. Once the pipeline is 911 // drained, call resetEntries(). - 10-09-06 ktlim 912/* 913 if (activeThreads.size() >= 1) { 914 commit.rob->resetEntries(); 915 iew.resetEntries(); 916 } 917*/ 918} 919 920template <class Impl> 921void 922FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 923{ 924 auto pc = this->pcState(tid); 925 926 // new_mode is the new vector renaming mode 927 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 928 929 // We update vecMode only if there has been a change 930 if (new_mode != vecMode) { 931 vecMode = new_mode; 932 933 renameMap[tid].switchMode(vecMode); 934 commitRenameMap[tid].switchMode(vecMode); 935 renameMap[tid].switchFreeList(freelist); 936 } 937} 938 939template <class Impl> 940Fault 941FullO3CPU<Impl>::getInterrupts() 942{ 943 // Check if there are any outstanding interrupts 944 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 945} 946 947template <class Impl> 948void 949FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 950{ 951 // Check for interrupts here. For now can copy the code that 952 // exists within isa_fullsys_traits.hh. Also assume that thread 0 953 // is the one that handles the interrupts. 954 // @todo: Possibly consolidate the interrupt checking code. 955 // @todo: Allow other threads to handle interrupts. 956 957 assert(interrupt != NoFault); 958 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 959 960 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 961 this->trap(interrupt, 0, nullptr); 962} 963 964template <class Impl> 965void 966FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 967 const StaticInstPtr &inst) 968{ 969 // Pass the thread's TC into the invoke method. 970 fault->invoke(this->threadContexts[tid], inst); 971} 972 973template <class Impl> 974void 975FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 976{ 977 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 978 979 DPRINTF(Activity,"Activity: syscall() called.\n"); 980 981 // Temporarily increase this by one to account for the syscall 982 // instruction. 983 ++(this->thread[tid]->funcExeInst); 984 985 // Execute the actual syscall. 986 this->thread[tid]->syscall(callnum, fault); 987 988 // Decrease funcExeInst by one as the normal commit will handle 989 // incrementing it. 990 --(this->thread[tid]->funcExeInst); 991} 992 993template <class Impl> 994void 995FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 996{ 997 thread[tid]->serialize(cp); 998} 999 1000template <class Impl> 1001void 1002FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 1003{ 1004 thread[tid]->unserialize(cp); 1005} 1006 1007template <class Impl> 1008DrainState 1009FullO3CPU<Impl>::drain() 1010{ 1011 // Deschedule any power gating event (if any) 1012 deschedulePowerGatingEvent(); 1013 1014 // If the CPU isn't doing anything, then return immediately. 1015 if (switchedOut()) 1016 return DrainState::Drained; 1017 1018 DPRINTF(Drain, "Draining...\n"); 1019 1020 // We only need to signal a drain to the commit stage as this 1021 // initiates squashing controls the draining. Once the commit 1022 // stage commits an instruction where it is safe to stop, it'll 1023 // squash the rest of the instructions in the pipeline and force 1024 // the fetch stage to stall. The pipeline will be drained once all 1025 // in-flight instructions have retired. 1026 commit.drain(); 1027 1028 // Wake the CPU and record activity so everything can drain out if 1029 // the CPU was not able to immediately drain. 1030 if (!isDrained()) { 1031 // If a thread is suspended, wake it up so it can be drained 1032 for (auto t : threadContexts) { 1033 if (t->status() == ThreadContext::Suspended){ 1034 DPRINTF(Drain, "Currently suspended so activate %i \n", 1035 t->threadId()); 1036 t->activate(); 1037 // As the thread is now active, change the power state as well 1038 activateContext(t->threadId()); 1039 } 1040 } 1041 1042 wakeCPU(); 1043 activityRec.activity(); 1044 1045 DPRINTF(Drain, "CPU not drained\n"); 1046 1047 return DrainState::Draining; 1048 } else { 1049 DPRINTF(Drain, "CPU is already drained\n"); 1050 if (tickEvent.scheduled()) 1051 deschedule(tickEvent); 1052 1053 // Flush out any old data from the time buffers. In 1054 // particular, there might be some data in flight from the 1055 // fetch stage that isn't visible in any of the CPU buffers we 1056 // test in isDrained(). 1057 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1058 timeBuffer.advance(); 1059 fetchQueue.advance(); 1060 decodeQueue.advance(); 1061 renameQueue.advance(); 1062 iewQueue.advance(); 1063 } 1064 1065 drainSanityCheck(); 1066 return DrainState::Drained; 1067 } 1068} 1069 1070template <class Impl> 1071bool 1072FullO3CPU<Impl>::tryDrain() 1073{ 1074 if (drainState() != DrainState::Draining || !isDrained()) 1075 return false; 1076 1077 if (tickEvent.scheduled()) 1078 deschedule(tickEvent); 1079 1080 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1081 signalDrainDone(); 1082 1083 return true; 1084} 1085 1086template <class Impl> 1087void 1088FullO3CPU<Impl>::drainSanityCheck() const 1089{ 1090 assert(isDrained()); 1091 fetch.drainSanityCheck(); 1092 decode.drainSanityCheck(); 1093 rename.drainSanityCheck(); 1094 iew.drainSanityCheck(); 1095 commit.drainSanityCheck(); 1096} 1097 1098template <class Impl> 1099bool 1100FullO3CPU<Impl>::isDrained() const 1101{ 1102 bool drained(true); 1103 1104 if (!instList.empty() || !removeList.empty()) { 1105 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1106 drained = false; 1107 } 1108 1109 if (!fetch.isDrained()) { 1110 DPRINTF(Drain, "Fetch not drained.\n"); 1111 drained = false; 1112 } 1113 1114 if (!decode.isDrained()) { 1115 DPRINTF(Drain, "Decode not drained.\n"); 1116 drained = false; 1117 } 1118 1119 if (!rename.isDrained()) { 1120 DPRINTF(Drain, "Rename not drained.\n"); 1121 drained = false; 1122 } 1123 1124 if (!iew.isDrained()) { 1125 DPRINTF(Drain, "IEW not drained.\n"); 1126 drained = false; 1127 } 1128 1129 if (!commit.isDrained()) { 1130 DPRINTF(Drain, "Commit not drained.\n"); 1131 drained = false; 1132 } 1133 1134 return drained; 1135} 1136 1137template <class Impl> 1138void 1139FullO3CPU<Impl>::commitDrained(ThreadID tid) 1140{ 1141 fetch.drainStall(tid); 1142} 1143 1144template <class Impl> 1145void 1146FullO3CPU<Impl>::drainResume() 1147{ 1148 if (switchedOut()) 1149 return; 1150 1151 DPRINTF(Drain, "Resuming...\n"); 1152 verifyMemoryMode(); 1153 1154 fetch.drainResume(); 1155 commit.drainResume(); 1156 1157 _status = Idle; 1158 for (ThreadID i = 0; i < thread.size(); i++) { 1159 if (thread[i]->status() == ThreadContext::Active) { 1160 DPRINTF(Drain, "Activating thread: %i\n", i); 1161 activateThread(i); 1162 _status = Running; 1163 } 1164 } 1165 1166 assert(!tickEvent.scheduled()); 1167 if (_status == Running) 1168 schedule(tickEvent, nextCycle()); 1169 1170 // Reschedule any power gating event (if any) 1171 schedulePowerGatingEvent(); 1172} 1173 1174template <class Impl> 1175void 1176FullO3CPU<Impl>::switchOut() 1177{ 1178 DPRINTF(O3CPU, "Switching out\n"); 1179 BaseCPU::switchOut(); 1180 1181 activityRec.reset(); 1182 1183 _status = SwitchedOut; 1184 1185 if (checker) 1186 checker->switchOut(); 1187} 1188 1189template <class Impl> 1190void 1191FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1192{ 1193 BaseCPU::takeOverFrom(oldCPU); 1194 1195 fetch.takeOverFrom(); 1196 decode.takeOverFrom(); 1197 rename.takeOverFrom(); 1198 iew.takeOverFrom(); 1199 commit.takeOverFrom(); 1200 1201 assert(!tickEvent.scheduled()); 1202 1203 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1204 if (oldO3CPU) 1205 globalSeqNum = oldO3CPU->globalSeqNum; 1206 1207 lastRunningCycle = curCycle(); 1208 _status = Idle; 1209} 1210 1211template <class Impl> 1212void 1213FullO3CPU<Impl>::verifyMemoryMode() const 1214{ 1215 if (!system->isTimingMode()) { 1216 fatal("The O3 CPU requires the memory system to be in " 1217 "'timing' mode.\n"); 1218 } 1219} 1220 1221template <class Impl> 1222RegVal 1223FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1224{ 1225 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1226} 1227 1228template <class Impl> 1229RegVal 1230FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1231{ 1232 miscRegfileReads++; 1233 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1234} 1235 1236template <class Impl> 1237void 1238FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1239{ 1240 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1241} 1242 1243template <class Impl> 1244void 1245FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1246{ 1247 miscRegfileWrites++; 1248 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1249} 1250 1251template <class Impl> 1252RegVal 1253FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1254{ 1255 intRegfileReads++; 1256 return regFile.readIntReg(phys_reg); 1257} 1258 1259template <class Impl> 1260RegVal 1261FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1262{ 1263 fpRegfileReads++; 1264 return regFile.readFloatReg(phys_reg); 1265} 1266 1267template <class Impl> 1268auto 1269FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1270 -> const VecRegContainer& 1271{ 1272 vecRegfileReads++; 1273 return regFile.readVecReg(phys_reg); 1274} 1275 1276template <class Impl> 1277auto 1278FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1279 -> VecRegContainer& 1280{ 1281 vecRegfileWrites++; 1282 return regFile.getWritableVecReg(phys_reg); 1283} 1284 1285template <class Impl> 1286auto 1287FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1288{ 1289 vecRegfileReads++; 1290 return regFile.readVecElem(phys_reg); 1291} 1292 1293template <class Impl> 1294auto 1295FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1296 -> const VecPredRegContainer& 1297{ 1298 vecPredRegfileReads++; 1299 return regFile.readVecPredReg(phys_reg); 1300} 1301 1302template <class Impl> 1303auto 1304FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1305 -> VecPredRegContainer& 1306{ 1307 vecPredRegfileWrites++; 1308 return regFile.getWritableVecPredReg(phys_reg); 1309} 1310 1311template <class Impl> 1312RegVal 1313FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1314{ 1315 ccRegfileReads++; 1316 return regFile.readCCReg(phys_reg); 1317} 1318 1319template <class Impl> 1320void 1321FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1322{ 1323 intRegfileWrites++; 1324 regFile.setIntReg(phys_reg, val); 1325} 1326 1327template <class Impl> 1328void 1329FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1330{ 1331 fpRegfileWrites++; 1332 regFile.setFloatReg(phys_reg, val); 1333} 1334 1335template <class Impl> 1336void 1337FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1338{ 1339 vecRegfileWrites++; 1340 regFile.setVecReg(phys_reg, val); 1341} 1342 1343template <class Impl> 1344void 1345FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1346{ 1347 vecRegfileWrites++; 1348 regFile.setVecElem(phys_reg, val); 1349} 1350 1351template <class Impl> 1352void 1353FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1354 const VecPredRegContainer& val) 1355{ 1356 vecPredRegfileWrites++; 1357 regFile.setVecPredReg(phys_reg, val); 1358} 1359 1360template <class Impl> 1361void 1362FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1363{ 1364 ccRegfileWrites++; 1365 regFile.setCCReg(phys_reg, val); 1366} 1367 1368template <class Impl> 1369RegVal 1370FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1371{ 1372 intRegfileReads++; 1373 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1374 RegId(IntRegClass, reg_idx)); 1375 1376 return regFile.readIntReg(phys_reg); 1377} 1378 1379template <class Impl> 1380RegVal 1381FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1382{ 1383 fpRegfileReads++; 1384 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1385 RegId(FloatRegClass, reg_idx)); 1386 1387 return regFile.readFloatReg(phys_reg); 1388} 1389 1390template <class Impl> 1391auto 1392FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1393 -> const VecRegContainer& 1394{ 1395 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1396 RegId(VecRegClass, reg_idx)); 1397 return readVecReg(phys_reg); 1398} 1399 1400template <class Impl> 1401auto 1402FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1403 -> VecRegContainer& 1404{ 1405 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1406 RegId(VecRegClass, reg_idx)); 1407 return getWritableVecReg(phys_reg); 1408} 1409 1410template <class Impl> 1411auto 1412FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1413 ThreadID tid) const -> const VecElem& 1414{ 1415 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1416 RegId(VecElemClass, reg_idx, ldx)); 1417 return readVecElem(phys_reg); 1418} 1419 1420template <class Impl> 1421auto 1422FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1423 -> const VecPredRegContainer& 1424{ 1425 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1426 RegId(VecPredRegClass, reg_idx)); 1427 return readVecPredReg(phys_reg); 1428} 1429 1430template <class Impl> 1431auto 1432FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1433 -> VecPredRegContainer& 1434{ 1435 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1436 RegId(VecPredRegClass, reg_idx)); 1437 return getWritableVecPredReg(phys_reg); 1438} 1439 1440template <class Impl> 1441RegVal 1442FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1443{ 1444 ccRegfileReads++; 1445 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1446 RegId(CCRegClass, reg_idx)); 1447 1448 return regFile.readCCReg(phys_reg); 1449} 1450 1451template <class Impl> 1452void 1453FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1454{ 1455 intRegfileWrites++; 1456 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1457 RegId(IntRegClass, reg_idx)); 1458 1459 regFile.setIntReg(phys_reg, val); 1460} 1461 1462template <class Impl> 1463void 1464FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1465{ 1466 fpRegfileWrites++; 1467 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1468 RegId(FloatRegClass, reg_idx)); 1469 1470 regFile.setFloatReg(phys_reg, val); 1471} 1472 1473template <class Impl> 1474void 1475FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1476 ThreadID tid) 1477{ 1478 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1479 RegId(VecRegClass, reg_idx)); 1480 setVecReg(phys_reg, val); 1481} 1482 1483template <class Impl> 1484void 1485FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1486 const VecElem& val, ThreadID tid) 1487{ 1488 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1489 RegId(VecElemClass, reg_idx, ldx)); 1490 setVecElem(phys_reg, val); 1491} 1492 1493template <class Impl> 1494void 1495FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1496 ThreadID tid) 1497{ 1498 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1499 RegId(VecPredRegClass, reg_idx)); 1500 setVecPredReg(phys_reg, val); 1501} 1502 1503template <class Impl> 1504void 1505FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1506{ 1507 ccRegfileWrites++; 1508 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1509 RegId(CCRegClass, reg_idx)); 1510 1511 regFile.setCCReg(phys_reg, val); 1512} 1513 1514template <class Impl> 1515TheISA::PCState 1516FullO3CPU<Impl>::pcState(ThreadID tid) 1517{ 1518 return commit.pcState(tid); 1519} 1520 1521template <class Impl> 1522void 1523FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1524{ 1525 commit.pcState(val, tid); 1526} 1527 1528template <class Impl> 1529Addr 1530FullO3CPU<Impl>::instAddr(ThreadID tid) 1531{ 1532 return commit.instAddr(tid); 1533} 1534 1535template <class Impl> 1536Addr 1537FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1538{ 1539 return commit.nextInstAddr(tid); 1540} 1541 1542template <class Impl> 1543MicroPC 1544FullO3CPU<Impl>::microPC(ThreadID tid) 1545{ 1546 return commit.microPC(tid); 1547} 1548 1549template <class Impl> 1550void 1551FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1552{ 1553 this->thread[tid]->noSquashFromTC = true; 1554 this->commit.generateTCEvent(tid); 1555} 1556 1557template <class Impl> 1558typename FullO3CPU<Impl>::ListIt 1559FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1560{ 1561 instList.push_back(inst); 1562 1563 return --(instList.end()); 1564} 1565 1566template <class Impl> 1567void 1568FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1569{ 1570 // Keep an instruction count. 1571 if (!inst->isMicroop() || inst->isLastMicroop()) { 1572 thread[tid]->numInst++; 1573 thread[tid]->numInsts++; 1574 committedInsts[tid]++; 1575 system->totalNumInsts++; 1576 1577 // Check for instruction-count-based events. 1578 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1579 system->instEventQueue.serviceEvents(system->totalNumInsts); 1580 } 1581 thread[tid]->numOp++; 1582 thread[tid]->numOps++; 1583 committedOps[tid]++; 1584 1585 probeInstCommit(inst->staticInst, inst->instAddr()); 1586} 1587 1588template <class Impl> 1589void 1590FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1591{ 1592 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1593 "[sn:%lli]\n", 1594 inst->threadNumber, inst->pcState(), inst->seqNum); 1595 1596 removeInstsThisCycle = true; 1597 1598 // Remove the front instruction. 1599 removeList.push(inst->getInstListIt()); 1600} 1601 1602template <class Impl> 1603void 1604FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1605{ 1606 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1607 " list.\n", tid); 1608 1609 ListIt end_it; 1610 1611 bool rob_empty = false; 1612 1613 if (instList.empty()) { 1614 return; 1615 } else if (rob.isEmpty(tid)) { 1616 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1617 end_it = instList.begin(); 1618 rob_empty = true; 1619 } else { 1620 end_it = (rob.readTailInst(tid))->getInstListIt(); 1621 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1622 } 1623 1624 removeInstsThisCycle = true; 1625 1626 ListIt inst_it = instList.end(); 1627 1628 inst_it--; 1629 1630 // Walk through the instruction list, removing any instructions 1631 // that were inserted after the given instruction iterator, end_it. 1632 while (inst_it != end_it) { 1633 assert(!instList.empty()); 1634 1635 squashInstIt(inst_it, tid); 1636 1637 inst_it--; 1638 } 1639 1640 // If the ROB was empty, then we actually need to remove the first 1641 // instruction as well. 1642 if (rob_empty) { 1643 squashInstIt(inst_it, tid); 1644 } 1645} 1646 1647template <class Impl> 1648void 1649FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1650{ 1651 assert(!instList.empty()); 1652 1653 removeInstsThisCycle = true; 1654 1655 ListIt inst_iter = instList.end(); 1656 1657 inst_iter--; 1658 1659 DPRINTF(O3CPU, "Deleting instructions from instruction " 1660 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1661 tid, seq_num, (*inst_iter)->seqNum); 1662 1663 while ((*inst_iter)->seqNum > seq_num) { 1664 1665 bool break_loop = (inst_iter == instList.begin()); 1666 1667 squashInstIt(inst_iter, tid); 1668 1669 inst_iter--; 1670 1671 if (break_loop) 1672 break; 1673 } 1674} 1675 1676template <class Impl> 1677inline void 1678FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1679{ 1680 if ((*instIt)->threadNumber == tid) { 1681 DPRINTF(O3CPU, "Squashing instruction, " 1682 "[tid:%i] [sn:%lli] PC %s\n", 1683 (*instIt)->threadNumber, 1684 (*instIt)->seqNum, 1685 (*instIt)->pcState()); 1686 1687 // Mark it as squashed. 1688 (*instIt)->setSquashed(); 1689 1690 // @todo: Formulate a consistent method for deleting 1691 // instructions from the instruction list 1692 // Remove the instruction from the list. 1693 removeList.push(instIt); 1694 } 1695} 1696 1697template <class Impl> 1698void 1699FullO3CPU<Impl>::cleanUpRemovedInsts() 1700{ 1701 while (!removeList.empty()) { 1702 DPRINTF(O3CPU, "Removing instruction, " 1703 "[tid:%i] [sn:%lli] PC %s\n", 1704 (*removeList.front())->threadNumber, 1705 (*removeList.front())->seqNum, 1706 (*removeList.front())->pcState()); 1707 1708 instList.erase(removeList.front()); 1709 1710 removeList.pop(); 1711 } 1712 1713 removeInstsThisCycle = false; 1714} 1715/* 1716template <class Impl> 1717void 1718FullO3CPU<Impl>::removeAllInsts() 1719{ 1720 instList.clear(); 1721} 1722*/ 1723template <class Impl> 1724void 1725FullO3CPU<Impl>::dumpInsts() 1726{ 1727 int num = 0; 1728 1729 ListIt inst_list_it = instList.begin(); 1730 1731 cprintf("Dumping Instruction List\n"); 1732 1733 while (inst_list_it != instList.end()) { 1734 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1735 "Squashed:%i\n\n", 1736 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1737 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1738 (*inst_list_it)->isSquashed()); 1739 inst_list_it++; 1740 ++num; 1741 } 1742} 1743/* 1744template <class Impl> 1745void 1746FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1747{ 1748 iew.wakeDependents(inst); 1749} 1750*/ 1751template <class Impl> 1752void 1753FullO3CPU<Impl>::wakeCPU() 1754{ 1755 if (activityRec.active() || tickEvent.scheduled()) { 1756 DPRINTF(Activity, "CPU already running.\n"); 1757 return; 1758 } 1759 1760 DPRINTF(Activity, "Waking up CPU\n"); 1761 1762 Cycles cycles(curCycle() - lastRunningCycle); 1763 // @todo: This is an oddity that is only here to match the stats 1764 if (cycles > 1) { 1765 --cycles; 1766 idleCycles += cycles; 1767 numCycles += cycles; 1768 } 1769 1770 schedule(tickEvent, clockEdge()); 1771} 1772 1773template <class Impl> 1774void 1775FullO3CPU<Impl>::wakeup(ThreadID tid) 1776{ 1777 if (this->thread[tid]->status() != ThreadContext::Suspended) 1778 return; 1779 1780 this->wakeCPU(); 1781 1782 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1783 this->threadContexts[tid]->activate(); 1784} 1785 1786template <class Impl> 1787ThreadID 1788FullO3CPU<Impl>::getFreeTid() 1789{ 1790 for (ThreadID tid = 0; tid < numThreads; tid++) { 1791 if (!tids[tid]) { 1792 tids[tid] = true; 1793 return tid; 1794 } 1795 } 1796 1797 return InvalidThreadID; 1798} 1799 1800template <class Impl> 1801void 1802FullO3CPU<Impl>::updateThreadPriority() 1803{ 1804 if (activeThreads.size() > 1) { 1805 //DEFAULT TO ROUND ROBIN SCHEME 1806 //e.g. Move highest priority to end of thread list 1807 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1808 1809 unsigned high_thread = *list_begin; 1810 1811 activeThreads.erase(list_begin); 1812 1813 activeThreads.push_back(high_thread); 1814 } 1815} 1816 1817template <class Impl> 1818void 1819FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1820{ 1821 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1822 1823 // the thread trying to exit can't be already halted 1824 assert(tcBase(tid)->status() != ThreadContext::Halted); 1825 1826 // make sure the thread has not been added to the list yet 1827 assert(exitingThreads.count(tid) == 0); 1828 1829 // add the thread to exitingThreads list to mark that this thread is 1830 // trying to exit. The boolean value in the pair denotes if a thread is 1831 // ready to exit. The thread is not ready to exit until the corresponding 1832 // exit trap event is processed in the future. Until then, it'll be still 1833 // an active thread that is trying to exit. 1834 exitingThreads.emplace(std::make_pair(tid, false)); 1835} 1836 1837template <class Impl> 1838bool 1839FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1840{ 1841 return exitingThreads.count(tid) == 1; 1842} 1843 1844template <class Impl> 1845void 1846FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1847{ 1848 assert(exitingThreads.count(tid) == 1); 1849 1850 // exit trap event has been processed. Now, the thread is ready to exit 1851 // and be removed from the CPU. 1852 exitingThreads[tid] = true; 1853 1854 // we schedule a threadExitEvent in the next cycle to properly clean 1855 // up the thread's states in the pipeline. threadExitEvent has lower 1856 // priority than tickEvent, so the cleanup will happen at the very end 1857 // of the next cycle after all pipeline stages complete their operations. 1858 // We want all stages to complete squashing instructions before doing 1859 // the cleanup. 1860 if (!threadExitEvent.scheduled()) { 1861 schedule(threadExitEvent, nextCycle()); 1862 } 1863} 1864 1865template <class Impl> 1866void 1867FullO3CPU<Impl>::exitThreads() 1868{ 1869 // there must be at least one thread trying to exit 1870 assert(exitingThreads.size() > 0); 1871 1872 // terminate all threads that are ready to exit 1873 auto it = exitingThreads.begin(); 1874 while (it != exitingThreads.end()) { 1875 ThreadID thread_id = it->first; 1876 bool readyToExit = it->second; 1877 1878 if (readyToExit) { 1879 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1880 haltContext(thread_id); 1881 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1882 it = exitingThreads.erase(it); 1883 } else { 1884 it++; 1885 } 1886 } 1887} 1888 1889// Forward declaration of FullO3CPU. 1890template class FullO3CPU<O3CPUImpl>; 1891