cpu.cc revision 13905:5cf30883255c
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template<class Impl> 94bool 95FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) 96{ 97 DPRINTF(O3CPU, "Fetch unit received timing\n"); 98 // We shouldn't ever get a cacheable block in Modified state 99 assert(pkt->req->isUncacheable() || 100 !(pkt->cacheResponding() && !pkt->hasSharers())); 101 fetch->processCacheCompletion(pkt); 102 103 return true; 104} 105 106template<class Impl> 107void 108FullO3CPU<Impl>::IcachePort::recvReqRetry() 109{ 110 fetch->recvReqRetry(); 111} 112 113template <class Impl> 114bool 115FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 116{ 117 return lsq->recvTimingResp(pkt); 118} 119 120template <class Impl> 121void 122FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 123{ 124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 126 cpu->wakeup(tid); 127 } 128 } 129 lsq->recvTimingSnoopReq(pkt); 130} 131 132template <class Impl> 133void 134FullO3CPU<Impl>::DcachePort::recvReqRetry() 135{ 136 lsq->recvReqRetry(); 137} 138 139template <class Impl> 140FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 141 : BaseO3CPU(params), 142 itb(params->itb), 143 dtb(params->dtb), 144 tickEvent([this]{ tick(); }, "FullO3CPU tick", 145 false, Event::CPU_Tick_Pri), 146 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 147 false, Event::CPU_Exit_Pri), 148#ifndef NDEBUG 149 instcount(0), 150#endif 151 removeInstsThisCycle(false), 152 fetch(this, params), 153 decode(this, params), 154 rename(this, params), 155 iew(this, params), 156 commit(this, params), 157 158 /* It is mandatory that all SMT threads use the same renaming mode as 159 * they are sharing registers and rename */ 160 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 161 regFile(params->numPhysIntRegs, 162 params->numPhysFloatRegs, 163 params->numPhysVecRegs, 164 params->numPhysVecPredRegs, 165 params->numPhysCCRegs, 166 vecMode), 167 168 freeList(name() + ".freelist", ®File), 169 170 rob(this, params), 171 172 scoreboard(name() + ".scoreboard", 173 regFile.totalNumPhysRegs()), 174 175 isa(numThreads, NULL), 176 177 icachePort(&fetch, this), 178 dcachePort(&iew.ldstQueue, this), 179 180 timeBuffer(params->backComSize, params->forwardComSize), 181 fetchQueue(params->backComSize, params->forwardComSize), 182 decodeQueue(params->backComSize, params->forwardComSize), 183 renameQueue(params->backComSize, params->forwardComSize), 184 iewQueue(params->backComSize, params->forwardComSize), 185 activityRec(name(), NumStages, 186 params->backComSize + params->forwardComSize, 187 params->activity), 188 189 globalSeqNum(1), 190 system(params->system), 191 lastRunningCycle(curCycle()) 192{ 193 if (!params->switched_out) { 194 _status = Running; 195 } else { 196 _status = SwitchedOut; 197 } 198 199 if (params->checker) { 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 202 checker->setIcachePort(&icachePort); 203 checker->setSystem(params->system); 204 } else { 205 checker = NULL; 206 } 207 208 if (!FullSystem) { 209 thread.resize(numThreads); 210 tids.resize(numThreads); 211 } 212 213 // The stages also need their CPU pointer setup. However this 214 // must be done at the upper level CPU because they have pointers 215 // to the upper level CPU, and not this FullO3CPU. 216 217 // Set up Pointers to the activeThreads list for each stage 218 fetch.setActiveThreads(&activeThreads); 219 decode.setActiveThreads(&activeThreads); 220 rename.setActiveThreads(&activeThreads); 221 iew.setActiveThreads(&activeThreads); 222 commit.setActiveThreads(&activeThreads); 223 224 // Give each of the stages the time buffer they will use. 225 fetch.setTimeBuffer(&timeBuffer); 226 decode.setTimeBuffer(&timeBuffer); 227 rename.setTimeBuffer(&timeBuffer); 228 iew.setTimeBuffer(&timeBuffer); 229 commit.setTimeBuffer(&timeBuffer); 230 231 // Also setup each of the stages' queues. 232 fetch.setFetchQueue(&fetchQueue); 233 decode.setFetchQueue(&fetchQueue); 234 commit.setFetchQueue(&fetchQueue); 235 decode.setDecodeQueue(&decodeQueue); 236 rename.setDecodeQueue(&decodeQueue); 237 rename.setRenameQueue(&renameQueue); 238 iew.setRenameQueue(&renameQueue); 239 iew.setIEWQueue(&iewQueue); 240 commit.setIEWQueue(&iewQueue); 241 commit.setRenameQueue(&renameQueue); 242 243 commit.setIEWStage(&iew); 244 rename.setIEWStage(&iew); 245 rename.setCommitStage(&commit); 246 247 ThreadID active_threads; 248 if (FullSystem) { 249 active_threads = 1; 250 } else { 251 active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads' " 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 256 "or edit your workload size."); 257 } 258 } 259 260 //Make Sure That this a Valid Architeture 261 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 262 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 263 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 264 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 265 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 266 267 rename.setScoreboard(&scoreboard); 268 iew.setScoreboard(&scoreboard); 269 270 // Setup the rename map for whichever stages need it. 271 for (ThreadID tid = 0; tid < numThreads; tid++) { 272 isa[tid] = params->isa[tid]; 273 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 274 275 // Only Alpha has an FP zero register, so for other ISAs we 276 // use an invalid FP register index to avoid special treatment 277 // of any valid FP reg. 278 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 279 RegIndex fpZeroReg = 280 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 281 282 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 283 &freeList, 284 vecMode); 285 286 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 287 &freeList, vecMode); 288 } 289 290 // Initialize rename map to assign physical registers to the 291 // architectural registers for active threads only. 292 for (ThreadID tid = 0; tid < active_threads; tid++) { 293 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 294 // Note that we can't use the rename() method because we don't 295 // want special treatment for the zero register at this point 296 PhysRegIdPtr phys_reg = freeList.getIntReg(); 297 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 298 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 299 } 300 301 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 302 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 303 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 304 commitRenameMap[tid].setEntry( 305 RegId(FloatRegClass, ridx), phys_reg); 306 } 307 308 /* Here we need two 'interfaces' the 'whole register' and the 309 * 'register element'. At any point only one of them will be 310 * active. */ 311 if (vecMode == Enums::Full) { 312 /* Initialize the full-vector interface */ 313 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 314 RegId rid = RegId(VecRegClass, ridx); 315 PhysRegIdPtr phys_reg = freeList.getVecReg(); 316 renameMap[tid].setEntry(rid, phys_reg); 317 commitRenameMap[tid].setEntry(rid, phys_reg); 318 } 319 } else { 320 /* Initialize the vector-element interface */ 321 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 322 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 323 ++ldx) { 324 RegId lrid = RegId(VecElemClass, ridx, ldx); 325 PhysRegIdPtr phys_elem = freeList.getVecElem(); 326 renameMap[tid].setEntry(lrid, phys_elem); 327 commitRenameMap[tid].setEntry(lrid, phys_elem); 328 } 329 } 330 } 331 332 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 333 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 334 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 335 commitRenameMap[tid].setEntry( 336 RegId(VecPredRegClass, ridx), phys_reg); 337 } 338 339 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 340 PhysRegIdPtr phys_reg = freeList.getCCReg(); 341 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 342 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 343 } 344 } 345 346 rename.setRenameMap(renameMap); 347 commit.setRenameMap(commitRenameMap); 348 rename.setFreeList(&freeList); 349 350 // Setup the ROB for whichever stages need it. 351 commit.setROB(&rob); 352 353 lastActivatedCycle = 0; 354#if 0 355 // Give renameMap & rename stage access to the freeList; 356 for (ThreadID tid = 0; tid < numThreads; tid++) 357 globalSeqNum[tid] = 1; 358#endif 359 360 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 361 362 // Setup any thread state. 363 this->thread.resize(this->numThreads); 364 365 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 366 if (FullSystem) { 367 // SMT is not supported in FS mode yet. 368 assert(this->numThreads == 1); 369 this->thread[tid] = new Thread(this, 0, NULL); 370 } else { 371 if (tid < params->workload.size()) { 372 DPRINTF(O3CPU, "Workload[%i] process is %#x", 373 tid, this->thread[tid]); 374 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 375 (typename Impl::O3CPU *)(this), 376 tid, params->workload[tid]); 377 378 //usedTids[tid] = true; 379 //threadMap[tid] = tid; 380 } else { 381 //Allocate Empty thread so M5 can use later 382 //when scheduling threads to CPU 383 Process* dummy_proc = NULL; 384 385 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 386 (typename Impl::O3CPU *)(this), 387 tid, dummy_proc); 388 //usedTids[tid] = false; 389 } 390 } 391 392 ThreadContext *tc; 393 394 // Setup the TC that will serve as the interface to the threads/CPU. 395 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 396 397 tc = o3_tc; 398 399 // If we're using a checker, then the TC should be the 400 // CheckerThreadContext. 401 if (params->checker) { 402 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 403 o3_tc, this->checker); 404 } 405 406 o3_tc->cpu = (typename Impl::O3CPU *)(this); 407 assert(o3_tc->cpu); 408 o3_tc->thread = this->thread[tid]; 409 410 // Setup quiesce event. 411 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 412 413 // Give the thread the TC. 414 this->thread[tid]->tc = tc; 415 416 // Add the TC to the CPU's list of TC's. 417 this->threadContexts.push_back(tc); 418 } 419 420 // FullO3CPU always requires an interrupt controller. 421 if (!params->switched_out && interrupts.empty()) { 422 fatal("FullO3CPU %s has no interrupt controller.\n" 423 "Ensure createInterruptController() is called.\n", name()); 424 } 425 426 for (ThreadID tid = 0; tid < this->numThreads; tid++) 427 this->thread[tid]->setFuncExeInst(0); 428} 429 430template <class Impl> 431FullO3CPU<Impl>::~FullO3CPU() 432{ 433} 434 435template <class Impl> 436void 437FullO3CPU<Impl>::regProbePoints() 438{ 439 BaseCPU::regProbePoints(); 440 441 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 442 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 443 444 fetch.regProbePoints(); 445 rename.regProbePoints(); 446 iew.regProbePoints(); 447 commit.regProbePoints(); 448} 449 450template <class Impl> 451void 452FullO3CPU<Impl>::regStats() 453{ 454 BaseO3CPU::regStats(); 455 456 // Register any of the O3CPU's stats here. 457 timesIdled 458 .name(name() + ".timesIdled") 459 .desc("Number of times that the entire CPU went into an idle state and" 460 " unscheduled itself") 461 .prereq(timesIdled); 462 463 idleCycles 464 .name(name() + ".idleCycles") 465 .desc("Total number of cycles that the CPU has spent unscheduled due " 466 "to idling") 467 .prereq(idleCycles); 468 469 quiesceCycles 470 .name(name() + ".quiesceCycles") 471 .desc("Total number of cycles that CPU has spent quiesced or waiting " 472 "for an interrupt") 473 .prereq(quiesceCycles); 474 475 // Number of Instructions simulated 476 // -------------------------------- 477 // Should probably be in Base CPU but need templated 478 // MaxThreads so put in here instead 479 committedInsts 480 .init(numThreads) 481 .name(name() + ".committedInsts") 482 .desc("Number of Instructions Simulated") 483 .flags(Stats::total); 484 485 committedOps 486 .init(numThreads) 487 .name(name() + ".committedOps") 488 .desc("Number of Ops (including micro ops) Simulated") 489 .flags(Stats::total); 490 491 cpi 492 .name(name() + ".cpi") 493 .desc("CPI: Cycles Per Instruction") 494 .precision(6); 495 cpi = numCycles / committedInsts; 496 497 totalCpi 498 .name(name() + ".cpi_total") 499 .desc("CPI: Total CPI of All Threads") 500 .precision(6); 501 totalCpi = numCycles / sum(committedInsts); 502 503 ipc 504 .name(name() + ".ipc") 505 .desc("IPC: Instructions Per Cycle") 506 .precision(6); 507 ipc = committedInsts / numCycles; 508 509 totalIpc 510 .name(name() + ".ipc_total") 511 .desc("IPC: Total IPC of All Threads") 512 .precision(6); 513 totalIpc = sum(committedInsts) / numCycles; 514 515 this->fetch.regStats(); 516 this->decode.regStats(); 517 this->rename.regStats(); 518 this->iew.regStats(); 519 this->commit.regStats(); 520 this->rob.regStats(); 521 522 intRegfileReads 523 .name(name() + ".int_regfile_reads") 524 .desc("number of integer regfile reads") 525 .prereq(intRegfileReads); 526 527 intRegfileWrites 528 .name(name() + ".int_regfile_writes") 529 .desc("number of integer regfile writes") 530 .prereq(intRegfileWrites); 531 532 fpRegfileReads 533 .name(name() + ".fp_regfile_reads") 534 .desc("number of floating regfile reads") 535 .prereq(fpRegfileReads); 536 537 fpRegfileWrites 538 .name(name() + ".fp_regfile_writes") 539 .desc("number of floating regfile writes") 540 .prereq(fpRegfileWrites); 541 542 vecRegfileReads 543 .name(name() + ".vec_regfile_reads") 544 .desc("number of vector regfile reads") 545 .prereq(vecRegfileReads); 546 547 vecRegfileWrites 548 .name(name() + ".vec_regfile_writes") 549 .desc("number of vector regfile writes") 550 .prereq(vecRegfileWrites); 551 552 vecPredRegfileReads 553 .name(name() + ".pred_regfile_reads") 554 .desc("number of predicate regfile reads") 555 .prereq(vecPredRegfileReads); 556 557 vecPredRegfileWrites 558 .name(name() + ".pred_regfile_writes") 559 .desc("number of predicate regfile writes") 560 .prereq(vecPredRegfileWrites); 561 562 ccRegfileReads 563 .name(name() + ".cc_regfile_reads") 564 .desc("number of cc regfile reads") 565 .prereq(ccRegfileReads); 566 567 ccRegfileWrites 568 .name(name() + ".cc_regfile_writes") 569 .desc("number of cc regfile writes") 570 .prereq(ccRegfileWrites); 571 572 miscRegfileReads 573 .name(name() + ".misc_regfile_reads") 574 .desc("number of misc regfile reads") 575 .prereq(miscRegfileReads); 576 577 miscRegfileWrites 578 .name(name() + ".misc_regfile_writes") 579 .desc("number of misc regfile writes") 580 .prereq(miscRegfileWrites); 581} 582 583template <class Impl> 584void 585FullO3CPU<Impl>::tick() 586{ 587 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 588 assert(!switchedOut()); 589 assert(drainState() != DrainState::Drained); 590 591 ++numCycles; 592 updateCycleCounters(BaseCPU::CPU_STATE_ON); 593 594// activity = false; 595 596 //Tick each of the stages 597 fetch.tick(); 598 599 decode.tick(); 600 601 rename.tick(); 602 603 iew.tick(); 604 605 commit.tick(); 606 607 // Now advance the time buffers 608 timeBuffer.advance(); 609 610 fetchQueue.advance(); 611 decodeQueue.advance(); 612 renameQueue.advance(); 613 iewQueue.advance(); 614 615 activityRec.advance(); 616 617 if (removeInstsThisCycle) { 618 cleanUpRemovedInsts(); 619 } 620 621 if (!tickEvent.scheduled()) { 622 if (_status == SwitchedOut) { 623 DPRINTF(O3CPU, "Switched out!\n"); 624 // increment stat 625 lastRunningCycle = curCycle(); 626 } else if (!activityRec.active() || _status == Idle) { 627 DPRINTF(O3CPU, "Idle!\n"); 628 lastRunningCycle = curCycle(); 629 timesIdled++; 630 } else { 631 schedule(tickEvent, clockEdge(Cycles(1))); 632 DPRINTF(O3CPU, "Scheduling next tick!\n"); 633 } 634 } 635 636 if (!FullSystem) 637 updateThreadPriority(); 638 639 tryDrain(); 640} 641 642template <class Impl> 643void 644FullO3CPU<Impl>::init() 645{ 646 BaseCPU::init(); 647 648 for (ThreadID tid = 0; tid < numThreads; ++tid) { 649 // Set noSquashFromTC so that the CPU doesn't squash when initially 650 // setting up registers. 651 thread[tid]->noSquashFromTC = true; 652 // Initialise the ThreadContext's memory proxies 653 thread[tid]->initMemProxies(thread[tid]->getTC()); 654 } 655 656 if (FullSystem && !params()->switched_out) { 657 for (ThreadID tid = 0; tid < numThreads; tid++) { 658 ThreadContext *src_tc = threadContexts[tid]; 659 TheISA::initCPU(src_tc, src_tc->contextId()); 660 } 661 } 662 663 // Clear noSquashFromTC. 664 for (int tid = 0; tid < numThreads; ++tid) 665 thread[tid]->noSquashFromTC = false; 666 667 commit.setThreads(thread); 668} 669 670template <class Impl> 671void 672FullO3CPU<Impl>::startup() 673{ 674 BaseCPU::startup(); 675 for (int tid = 0; tid < numThreads; ++tid) 676 isa[tid]->startup(threadContexts[tid]); 677 678 fetch.startupStage(); 679 decode.startupStage(); 680 iew.startupStage(); 681 rename.startupStage(); 682 commit.startupStage(); 683} 684 685template <class Impl> 686void 687FullO3CPU<Impl>::activateThread(ThreadID tid) 688{ 689 list<ThreadID>::iterator isActive = 690 std::find(activeThreads.begin(), activeThreads.end(), tid); 691 692 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid); 693 assert(!switchedOut()); 694 695 if (isActive == activeThreads.end()) { 696 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", 697 tid); 698 699 activeThreads.push_back(tid); 700 } 701} 702 703template <class Impl> 704void 705FullO3CPU<Impl>::deactivateThread(ThreadID tid) 706{ 707 //Remove From Active List, if Active 708 list<ThreadID>::iterator thread_it = 709 std::find(activeThreads.begin(), activeThreads.end(), tid); 710 711 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid); 712 assert(!switchedOut()); 713 714 if (thread_it != activeThreads.end()) { 715 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n", 716 tid); 717 activeThreads.erase(thread_it); 718 } 719 720 fetch.deactivateThread(tid); 721 commit.deactivateThread(tid); 722} 723 724template <class Impl> 725Counter 726FullO3CPU<Impl>::totalInsts() const 727{ 728 Counter total(0); 729 730 ThreadID size = thread.size(); 731 for (ThreadID i = 0; i < size; i++) 732 total += thread[i]->numInst; 733 734 return total; 735} 736 737template <class Impl> 738Counter 739FullO3CPU<Impl>::totalOps() const 740{ 741 Counter total(0); 742 743 ThreadID size = thread.size(); 744 for (ThreadID i = 0; i < size; i++) 745 total += thread[i]->numOp; 746 747 return total; 748} 749 750template <class Impl> 751void 752FullO3CPU<Impl>::activateContext(ThreadID tid) 753{ 754 assert(!switchedOut()); 755 756 // Needs to set each stage to running as well. 757 activateThread(tid); 758 759 // We don't want to wake the CPU if it is drained. In that case, 760 // we just want to flag the thread as active and schedule the tick 761 // event from drainResume() instead. 762 if (drainState() == DrainState::Drained) 763 return; 764 765 // If we are time 0 or if the last activation time is in the past, 766 // schedule the next tick and wake up the fetch unit 767 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 768 scheduleTickEvent(Cycles(0)); 769 770 // Be sure to signal that there's some activity so the CPU doesn't 771 // deschedule itself. 772 activityRec.activity(); 773 fetch.wakeFromQuiesce(); 774 775 Cycles cycles(curCycle() - lastRunningCycle); 776 // @todo: This is an oddity that is only here to match the stats 777 if (cycles != 0) 778 --cycles; 779 quiesceCycles += cycles; 780 781 lastActivatedCycle = curTick(); 782 783 _status = Running; 784 785 BaseCPU::activateContext(tid); 786 } 787} 788 789template <class Impl> 790void 791FullO3CPU<Impl>::suspendContext(ThreadID tid) 792{ 793 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid); 794 assert(!switchedOut()); 795 796 deactivateThread(tid); 797 798 // If this was the last thread then unschedule the tick event. 799 if (activeThreads.size() == 0) { 800 unscheduleTickEvent(); 801 lastRunningCycle = curCycle(); 802 _status = Idle; 803 } 804 805 DPRINTF(Quiesce, "Suspending Context\n"); 806 807 BaseCPU::suspendContext(tid); 808} 809 810template <class Impl> 811void 812FullO3CPU<Impl>::haltContext(ThreadID tid) 813{ 814 //For now, this is the same as deallocate 815 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid); 816 assert(!switchedOut()); 817 818 deactivateThread(tid); 819 removeThread(tid); 820 821 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 822} 823 824template <class Impl> 825void 826FullO3CPU<Impl>::insertThread(ThreadID tid) 827{ 828 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 829 // Will change now that the PC and thread state is internal to the CPU 830 // and not in the ThreadContext. 831 ThreadContext *src_tc; 832 if (FullSystem) 833 src_tc = system->threadContexts[tid]; 834 else 835 src_tc = tcBase(tid); 836 837 //Bind Int Regs to Rename Map 838 839 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 840 reg_id.index()++) { 841 PhysRegIdPtr phys_reg = freeList.getIntReg(); 842 renameMap[tid].setEntry(reg_id, phys_reg); 843 scoreboard.setReg(phys_reg); 844 } 845 846 //Bind Float Regs to Rename Map 847 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 848 reg_id.index()++) { 849 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 850 renameMap[tid].setEntry(reg_id, phys_reg); 851 scoreboard.setReg(phys_reg); 852 } 853 854 //Bind condition-code Regs to Rename Map 855 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 856 reg_id.index()++) { 857 PhysRegIdPtr phys_reg = freeList.getCCReg(); 858 renameMap[tid].setEntry(reg_id, phys_reg); 859 scoreboard.setReg(phys_reg); 860 } 861 862 //Copy Thread Data Into RegFile 863 //this->copyFromTC(tid); 864 865 //Set PC/NPC/NNPC 866 pcState(src_tc->pcState(), tid); 867 868 src_tc->setStatus(ThreadContext::Active); 869 870 activateContext(tid); 871 872 //Reset ROB/IQ/LSQ Entries 873 commit.rob->resetEntries(); 874} 875 876template <class Impl> 877void 878FullO3CPU<Impl>::removeThread(ThreadID tid) 879{ 880 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 881 882 // Copy Thread Data From RegFile 883 // If thread is suspended, it might be re-allocated 884 // this->copyToTC(tid); 885 886 887 // @todo: 2-27-2008: Fix how we free up rename mappings 888 // here to alleviate the case for double-freeing registers 889 // in SMT workloads. 890 891 // clear all thread-specific states in each stage of the pipeline 892 // since this thread is going to be completely removed from the CPU 893 commit.clearStates(tid); 894 fetch.clearStates(tid); 895 decode.clearStates(tid); 896 rename.clearStates(tid); 897 iew.clearStates(tid); 898 899 // at this step, all instructions in the pipeline should be already 900 // either committed successfully or squashed. All thread-specific 901 // queues in the pipeline must be empty. 902 assert(iew.instQueue.getCount(tid) == 0); 903 assert(iew.ldstQueue.getCount(tid) == 0); 904 assert(commit.rob->isEmpty(tid)); 905 906 // Reset ROB/IQ/LSQ Entries 907 908 // Commented out for now. This should be possible to do by 909 // telling all the pipeline stages to drain first, and then 910 // checking until the drain completes. Once the pipeline is 911 // drained, call resetEntries(). - 10-09-06 ktlim 912/* 913 if (activeThreads.size() >= 1) { 914 commit.rob->resetEntries(); 915 iew.resetEntries(); 916 } 917*/ 918} 919 920template <class Impl> 921Fault 922FullO3CPU<Impl>::hwrei(ThreadID tid) 923{ 924#if THE_ISA == ALPHA_ISA 925 // Need to clear the lock flag upon returning from an interrupt. 926 this->setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid); 927 928 auto *stats = dynamic_cast<AlphaISA::Kernel::Statistics *>( 929 this->thread[tid]->kernelStats); 930 assert(stats); 931 stats->hwrei(); 932 933 // FIXME: XXX check for interrupts? XXX 934#endif 935 return NoFault; 936} 937 938template <class Impl> 939bool 940FullO3CPU<Impl>::simPalCheck(int palFunc, ThreadID tid) 941{ 942#if THE_ISA == ALPHA_ISA 943 auto *stats = dynamic_cast<AlphaISA::Kernel::Statistics *>( 944 this->thread[tid]->kernelStats); 945 if (stats) 946 stats->callpal(palFunc, this->threadContexts[tid]); 947 948 switch (palFunc) { 949 case PAL::halt: 950 halt(); 951 if (--System::numSystemsRunning == 0) 952 exitSimLoop("all cpus halted"); 953 break; 954 955 case PAL::bpt: 956 case PAL::bugchk: 957 if (this->system->breakpoint()) 958 return false; 959 break; 960 } 961#endif 962 return true; 963} 964 965template <class Impl> 966void 967FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 968{ 969 auto pc = this->pcState(tid); 970 971 // new_mode is the new vector renaming mode 972 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 973 974 // We update vecMode only if there has been a change 975 if (new_mode != vecMode) { 976 vecMode = new_mode; 977 978 renameMap[tid].switchMode(vecMode); 979 commitRenameMap[tid].switchMode(vecMode); 980 renameMap[tid].switchFreeList(freelist); 981 } 982} 983 984template <class Impl> 985Fault 986FullO3CPU<Impl>::getInterrupts() 987{ 988 // Check if there are any outstanding interrupts 989 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 990} 991 992template <class Impl> 993void 994FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 995{ 996 // Check for interrupts here. For now can copy the code that 997 // exists within isa_fullsys_traits.hh. Also assume that thread 0 998 // is the one that handles the interrupts. 999 // @todo: Possibly consolidate the interrupt checking code. 1000 // @todo: Allow other threads to handle interrupts. 1001 1002 assert(interrupt != NoFault); 1003 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 1004 1005 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 1006 this->trap(interrupt, 0, nullptr); 1007} 1008 1009template <class Impl> 1010void 1011FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 1012 const StaticInstPtr &inst) 1013{ 1014 // Pass the thread's TC into the invoke method. 1015 fault->invoke(this->threadContexts[tid], inst); 1016} 1017 1018template <class Impl> 1019void 1020FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 1021{ 1022 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 1023 1024 DPRINTF(Activity,"Activity: syscall() called.\n"); 1025 1026 // Temporarily increase this by one to account for the syscall 1027 // instruction. 1028 ++(this->thread[tid]->funcExeInst); 1029 1030 // Execute the actual syscall. 1031 this->thread[tid]->syscall(callnum, fault); 1032 1033 // Decrease funcExeInst by one as the normal commit will handle 1034 // incrementing it. 1035 --(this->thread[tid]->funcExeInst); 1036} 1037 1038template <class Impl> 1039void 1040FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 1041{ 1042 thread[tid]->serialize(cp); 1043} 1044 1045template <class Impl> 1046void 1047FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 1048{ 1049 thread[tid]->unserialize(cp); 1050} 1051 1052template <class Impl> 1053DrainState 1054FullO3CPU<Impl>::drain() 1055{ 1056 // Deschedule any power gating event (if any) 1057 deschedulePowerGatingEvent(); 1058 1059 // If the CPU isn't doing anything, then return immediately. 1060 if (switchedOut()) 1061 return DrainState::Drained; 1062 1063 DPRINTF(Drain, "Draining...\n"); 1064 1065 // We only need to signal a drain to the commit stage as this 1066 // initiates squashing controls the draining. Once the commit 1067 // stage commits an instruction where it is safe to stop, it'll 1068 // squash the rest of the instructions in the pipeline and force 1069 // the fetch stage to stall. The pipeline will be drained once all 1070 // in-flight instructions have retired. 1071 commit.drain(); 1072 1073 // Wake the CPU and record activity so everything can drain out if 1074 // the CPU was not able to immediately drain. 1075 if (!isDrained()) { 1076 // If a thread is suspended, wake it up so it can be drained 1077 for (auto t : threadContexts) { 1078 if (t->status() == ThreadContext::Suspended){ 1079 DPRINTF(Drain, "Currently suspended so activate %i \n", 1080 t->threadId()); 1081 t->activate(); 1082 // As the thread is now active, change the power state as well 1083 activateContext(t->threadId()); 1084 } 1085 } 1086 1087 wakeCPU(); 1088 activityRec.activity(); 1089 1090 DPRINTF(Drain, "CPU not drained\n"); 1091 1092 return DrainState::Draining; 1093 } else { 1094 DPRINTF(Drain, "CPU is already drained\n"); 1095 if (tickEvent.scheduled()) 1096 deschedule(tickEvent); 1097 1098 // Flush out any old data from the time buffers. In 1099 // particular, there might be some data in flight from the 1100 // fetch stage that isn't visible in any of the CPU buffers we 1101 // test in isDrained(). 1102 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1103 timeBuffer.advance(); 1104 fetchQueue.advance(); 1105 decodeQueue.advance(); 1106 renameQueue.advance(); 1107 iewQueue.advance(); 1108 } 1109 1110 drainSanityCheck(); 1111 return DrainState::Drained; 1112 } 1113} 1114 1115template <class Impl> 1116bool 1117FullO3CPU<Impl>::tryDrain() 1118{ 1119 if (drainState() != DrainState::Draining || !isDrained()) 1120 return false; 1121 1122 if (tickEvent.scheduled()) 1123 deschedule(tickEvent); 1124 1125 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1126 signalDrainDone(); 1127 1128 return true; 1129} 1130 1131template <class Impl> 1132void 1133FullO3CPU<Impl>::drainSanityCheck() const 1134{ 1135 assert(isDrained()); 1136 fetch.drainSanityCheck(); 1137 decode.drainSanityCheck(); 1138 rename.drainSanityCheck(); 1139 iew.drainSanityCheck(); 1140 commit.drainSanityCheck(); 1141} 1142 1143template <class Impl> 1144bool 1145FullO3CPU<Impl>::isDrained() const 1146{ 1147 bool drained(true); 1148 1149 if (!instList.empty() || !removeList.empty()) { 1150 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1151 drained = false; 1152 } 1153 1154 if (!fetch.isDrained()) { 1155 DPRINTF(Drain, "Fetch not drained.\n"); 1156 drained = false; 1157 } 1158 1159 if (!decode.isDrained()) { 1160 DPRINTF(Drain, "Decode not drained.\n"); 1161 drained = false; 1162 } 1163 1164 if (!rename.isDrained()) { 1165 DPRINTF(Drain, "Rename not drained.\n"); 1166 drained = false; 1167 } 1168 1169 if (!iew.isDrained()) { 1170 DPRINTF(Drain, "IEW not drained.\n"); 1171 drained = false; 1172 } 1173 1174 if (!commit.isDrained()) { 1175 DPRINTF(Drain, "Commit not drained.\n"); 1176 drained = false; 1177 } 1178 1179 return drained; 1180} 1181 1182template <class Impl> 1183void 1184FullO3CPU<Impl>::commitDrained(ThreadID tid) 1185{ 1186 fetch.drainStall(tid); 1187} 1188 1189template <class Impl> 1190void 1191FullO3CPU<Impl>::drainResume() 1192{ 1193 if (switchedOut()) 1194 return; 1195 1196 DPRINTF(Drain, "Resuming...\n"); 1197 verifyMemoryMode(); 1198 1199 fetch.drainResume(); 1200 commit.drainResume(); 1201 1202 _status = Idle; 1203 for (ThreadID i = 0; i < thread.size(); i++) { 1204 if (thread[i]->status() == ThreadContext::Active) { 1205 DPRINTF(Drain, "Activating thread: %i\n", i); 1206 activateThread(i); 1207 _status = Running; 1208 } 1209 } 1210 1211 assert(!tickEvent.scheduled()); 1212 if (_status == Running) 1213 schedule(tickEvent, nextCycle()); 1214 1215 // Reschedule any power gating event (if any) 1216 schedulePowerGatingEvent(); 1217} 1218 1219template <class Impl> 1220void 1221FullO3CPU<Impl>::switchOut() 1222{ 1223 DPRINTF(O3CPU, "Switching out\n"); 1224 BaseCPU::switchOut(); 1225 1226 activityRec.reset(); 1227 1228 _status = SwitchedOut; 1229 1230 if (checker) 1231 checker->switchOut(); 1232} 1233 1234template <class Impl> 1235void 1236FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1237{ 1238 BaseCPU::takeOverFrom(oldCPU); 1239 1240 fetch.takeOverFrom(); 1241 decode.takeOverFrom(); 1242 rename.takeOverFrom(); 1243 iew.takeOverFrom(); 1244 commit.takeOverFrom(); 1245 1246 assert(!tickEvent.scheduled()); 1247 1248 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1249 if (oldO3CPU) 1250 globalSeqNum = oldO3CPU->globalSeqNum; 1251 1252 lastRunningCycle = curCycle(); 1253 _status = Idle; 1254} 1255 1256template <class Impl> 1257void 1258FullO3CPU<Impl>::verifyMemoryMode() const 1259{ 1260 if (!system->isTimingMode()) { 1261 fatal("The O3 CPU requires the memory system to be in " 1262 "'timing' mode.\n"); 1263 } 1264} 1265 1266template <class Impl> 1267RegVal 1268FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1269{ 1270 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1271} 1272 1273template <class Impl> 1274RegVal 1275FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1276{ 1277 miscRegfileReads++; 1278 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1279} 1280 1281template <class Impl> 1282void 1283FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1284{ 1285 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1286} 1287 1288template <class Impl> 1289void 1290FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1291{ 1292 miscRegfileWrites++; 1293 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1294} 1295 1296template <class Impl> 1297RegVal 1298FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1299{ 1300 intRegfileReads++; 1301 return regFile.readIntReg(phys_reg); 1302} 1303 1304template <class Impl> 1305RegVal 1306FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1307{ 1308 fpRegfileReads++; 1309 return regFile.readFloatReg(phys_reg); 1310} 1311 1312template <class Impl> 1313auto 1314FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1315 -> const VecRegContainer& 1316{ 1317 vecRegfileReads++; 1318 return regFile.readVecReg(phys_reg); 1319} 1320 1321template <class Impl> 1322auto 1323FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1324 -> VecRegContainer& 1325{ 1326 vecRegfileWrites++; 1327 return regFile.getWritableVecReg(phys_reg); 1328} 1329 1330template <class Impl> 1331auto 1332FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1333{ 1334 vecRegfileReads++; 1335 return regFile.readVecElem(phys_reg); 1336} 1337 1338template <class Impl> 1339auto 1340FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1341 -> const VecPredRegContainer& 1342{ 1343 vecPredRegfileReads++; 1344 return regFile.readVecPredReg(phys_reg); 1345} 1346 1347template <class Impl> 1348auto 1349FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1350 -> VecPredRegContainer& 1351{ 1352 vecPredRegfileWrites++; 1353 return regFile.getWritableVecPredReg(phys_reg); 1354} 1355 1356template <class Impl> 1357RegVal 1358FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1359{ 1360 ccRegfileReads++; 1361 return regFile.readCCReg(phys_reg); 1362} 1363 1364template <class Impl> 1365void 1366FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1367{ 1368 intRegfileWrites++; 1369 regFile.setIntReg(phys_reg, val); 1370} 1371 1372template <class Impl> 1373void 1374FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1375{ 1376 fpRegfileWrites++; 1377 regFile.setFloatReg(phys_reg, val); 1378} 1379 1380template <class Impl> 1381void 1382FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1383{ 1384 vecRegfileWrites++; 1385 regFile.setVecReg(phys_reg, val); 1386} 1387 1388template <class Impl> 1389void 1390FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1391{ 1392 vecRegfileWrites++; 1393 regFile.setVecElem(phys_reg, val); 1394} 1395 1396template <class Impl> 1397void 1398FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1399 const VecPredRegContainer& val) 1400{ 1401 vecPredRegfileWrites++; 1402 regFile.setVecPredReg(phys_reg, val); 1403} 1404 1405template <class Impl> 1406void 1407FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1408{ 1409 ccRegfileWrites++; 1410 regFile.setCCReg(phys_reg, val); 1411} 1412 1413template <class Impl> 1414RegVal 1415FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1416{ 1417 intRegfileReads++; 1418 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1419 RegId(IntRegClass, reg_idx)); 1420 1421 return regFile.readIntReg(phys_reg); 1422} 1423 1424template <class Impl> 1425RegVal 1426FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1427{ 1428 fpRegfileReads++; 1429 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1430 RegId(FloatRegClass, reg_idx)); 1431 1432 return regFile.readFloatReg(phys_reg); 1433} 1434 1435template <class Impl> 1436auto 1437FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1438 -> const VecRegContainer& 1439{ 1440 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1441 RegId(VecRegClass, reg_idx)); 1442 return readVecReg(phys_reg); 1443} 1444 1445template <class Impl> 1446auto 1447FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1448 -> VecRegContainer& 1449{ 1450 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1451 RegId(VecRegClass, reg_idx)); 1452 return getWritableVecReg(phys_reg); 1453} 1454 1455template <class Impl> 1456auto 1457FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1458 ThreadID tid) const -> const VecElem& 1459{ 1460 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1461 RegId(VecElemClass, reg_idx, ldx)); 1462 return readVecElem(phys_reg); 1463} 1464 1465template <class Impl> 1466auto 1467FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1468 -> const VecPredRegContainer& 1469{ 1470 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1471 RegId(VecPredRegClass, reg_idx)); 1472 return readVecPredReg(phys_reg); 1473} 1474 1475template <class Impl> 1476auto 1477FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1478 -> VecPredRegContainer& 1479{ 1480 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1481 RegId(VecPredRegClass, reg_idx)); 1482 return getWritableVecPredReg(phys_reg); 1483} 1484 1485template <class Impl> 1486RegVal 1487FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1488{ 1489 ccRegfileReads++; 1490 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1491 RegId(CCRegClass, reg_idx)); 1492 1493 return regFile.readCCReg(phys_reg); 1494} 1495 1496template <class Impl> 1497void 1498FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1499{ 1500 intRegfileWrites++; 1501 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1502 RegId(IntRegClass, reg_idx)); 1503 1504 regFile.setIntReg(phys_reg, val); 1505} 1506 1507template <class Impl> 1508void 1509FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1510{ 1511 fpRegfileWrites++; 1512 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1513 RegId(FloatRegClass, reg_idx)); 1514 1515 regFile.setFloatReg(phys_reg, val); 1516} 1517 1518template <class Impl> 1519void 1520FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1521 ThreadID tid) 1522{ 1523 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1524 RegId(VecRegClass, reg_idx)); 1525 setVecReg(phys_reg, val); 1526} 1527 1528template <class Impl> 1529void 1530FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1531 const VecElem& val, ThreadID tid) 1532{ 1533 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1534 RegId(VecElemClass, reg_idx, ldx)); 1535 setVecElem(phys_reg, val); 1536} 1537 1538template <class Impl> 1539void 1540FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1541 ThreadID tid) 1542{ 1543 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1544 RegId(VecPredRegClass, reg_idx)); 1545 setVecPredReg(phys_reg, val); 1546} 1547 1548template <class Impl> 1549void 1550FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1551{ 1552 ccRegfileWrites++; 1553 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1554 RegId(CCRegClass, reg_idx)); 1555 1556 regFile.setCCReg(phys_reg, val); 1557} 1558 1559template <class Impl> 1560TheISA::PCState 1561FullO3CPU<Impl>::pcState(ThreadID tid) 1562{ 1563 return commit.pcState(tid); 1564} 1565 1566template <class Impl> 1567void 1568FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1569{ 1570 commit.pcState(val, tid); 1571} 1572 1573template <class Impl> 1574Addr 1575FullO3CPU<Impl>::instAddr(ThreadID tid) 1576{ 1577 return commit.instAddr(tid); 1578} 1579 1580template <class Impl> 1581Addr 1582FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1583{ 1584 return commit.nextInstAddr(tid); 1585} 1586 1587template <class Impl> 1588MicroPC 1589FullO3CPU<Impl>::microPC(ThreadID tid) 1590{ 1591 return commit.microPC(tid); 1592} 1593 1594template <class Impl> 1595void 1596FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1597{ 1598 this->thread[tid]->noSquashFromTC = true; 1599 this->commit.generateTCEvent(tid); 1600} 1601 1602template <class Impl> 1603typename FullO3CPU<Impl>::ListIt 1604FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1605{ 1606 instList.push_back(inst); 1607 1608 return --(instList.end()); 1609} 1610 1611template <class Impl> 1612void 1613FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1614{ 1615 // Keep an instruction count. 1616 if (!inst->isMicroop() || inst->isLastMicroop()) { 1617 thread[tid]->numInst++; 1618 thread[tid]->numInsts++; 1619 committedInsts[tid]++; 1620 system->totalNumInsts++; 1621 1622 // Check for instruction-count-based events. 1623 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1624 system->instEventQueue.serviceEvents(system->totalNumInsts); 1625 } 1626 thread[tid]->numOp++; 1627 thread[tid]->numOps++; 1628 committedOps[tid]++; 1629 1630 probeInstCommit(inst->staticInst, inst->instAddr()); 1631} 1632 1633template <class Impl> 1634void 1635FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1636{ 1637 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1638 "[sn:%lli]\n", 1639 inst->threadNumber, inst->pcState(), inst->seqNum); 1640 1641 removeInstsThisCycle = true; 1642 1643 // Remove the front instruction. 1644 removeList.push(inst->getInstListIt()); 1645} 1646 1647template <class Impl> 1648void 1649FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1650{ 1651 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1652 " list.\n", tid); 1653 1654 ListIt end_it; 1655 1656 bool rob_empty = false; 1657 1658 if (instList.empty()) { 1659 return; 1660 } else if (rob.isEmpty(tid)) { 1661 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1662 end_it = instList.begin(); 1663 rob_empty = true; 1664 } else { 1665 end_it = (rob.readTailInst(tid))->getInstListIt(); 1666 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1667 } 1668 1669 removeInstsThisCycle = true; 1670 1671 ListIt inst_it = instList.end(); 1672 1673 inst_it--; 1674 1675 // Walk through the instruction list, removing any instructions 1676 // that were inserted after the given instruction iterator, end_it. 1677 while (inst_it != end_it) { 1678 assert(!instList.empty()); 1679 1680 squashInstIt(inst_it, tid); 1681 1682 inst_it--; 1683 } 1684 1685 // If the ROB was empty, then we actually need to remove the first 1686 // instruction as well. 1687 if (rob_empty) { 1688 squashInstIt(inst_it, tid); 1689 } 1690} 1691 1692template <class Impl> 1693void 1694FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1695{ 1696 assert(!instList.empty()); 1697 1698 removeInstsThisCycle = true; 1699 1700 ListIt inst_iter = instList.end(); 1701 1702 inst_iter--; 1703 1704 DPRINTF(O3CPU, "Deleting instructions from instruction " 1705 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1706 tid, seq_num, (*inst_iter)->seqNum); 1707 1708 while ((*inst_iter)->seqNum > seq_num) { 1709 1710 bool break_loop = (inst_iter == instList.begin()); 1711 1712 squashInstIt(inst_iter, tid); 1713 1714 inst_iter--; 1715 1716 if (break_loop) 1717 break; 1718 } 1719} 1720 1721template <class Impl> 1722inline void 1723FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1724{ 1725 if ((*instIt)->threadNumber == tid) { 1726 DPRINTF(O3CPU, "Squashing instruction, " 1727 "[tid:%i] [sn:%lli] PC %s\n", 1728 (*instIt)->threadNumber, 1729 (*instIt)->seqNum, 1730 (*instIt)->pcState()); 1731 1732 // Mark it as squashed. 1733 (*instIt)->setSquashed(); 1734 1735 // @todo: Formulate a consistent method for deleting 1736 // instructions from the instruction list 1737 // Remove the instruction from the list. 1738 removeList.push(instIt); 1739 } 1740} 1741 1742template <class Impl> 1743void 1744FullO3CPU<Impl>::cleanUpRemovedInsts() 1745{ 1746 while (!removeList.empty()) { 1747 DPRINTF(O3CPU, "Removing instruction, " 1748 "[tid:%i] [sn:%lli] PC %s\n", 1749 (*removeList.front())->threadNumber, 1750 (*removeList.front())->seqNum, 1751 (*removeList.front())->pcState()); 1752 1753 instList.erase(removeList.front()); 1754 1755 removeList.pop(); 1756 } 1757 1758 removeInstsThisCycle = false; 1759} 1760/* 1761template <class Impl> 1762void 1763FullO3CPU<Impl>::removeAllInsts() 1764{ 1765 instList.clear(); 1766} 1767*/ 1768template <class Impl> 1769void 1770FullO3CPU<Impl>::dumpInsts() 1771{ 1772 int num = 0; 1773 1774 ListIt inst_list_it = instList.begin(); 1775 1776 cprintf("Dumping Instruction List\n"); 1777 1778 while (inst_list_it != instList.end()) { 1779 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1780 "Squashed:%i\n\n", 1781 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1782 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1783 (*inst_list_it)->isSquashed()); 1784 inst_list_it++; 1785 ++num; 1786 } 1787} 1788/* 1789template <class Impl> 1790void 1791FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1792{ 1793 iew.wakeDependents(inst); 1794} 1795*/ 1796template <class Impl> 1797void 1798FullO3CPU<Impl>::wakeCPU() 1799{ 1800 if (activityRec.active() || tickEvent.scheduled()) { 1801 DPRINTF(Activity, "CPU already running.\n"); 1802 return; 1803 } 1804 1805 DPRINTF(Activity, "Waking up CPU\n"); 1806 1807 Cycles cycles(curCycle() - lastRunningCycle); 1808 // @todo: This is an oddity that is only here to match the stats 1809 if (cycles > 1) { 1810 --cycles; 1811 idleCycles += cycles; 1812 numCycles += cycles; 1813 } 1814 1815 schedule(tickEvent, clockEdge()); 1816} 1817 1818template <class Impl> 1819void 1820FullO3CPU<Impl>::wakeup(ThreadID tid) 1821{ 1822 if (this->thread[tid]->status() != ThreadContext::Suspended) 1823 return; 1824 1825 this->wakeCPU(); 1826 1827 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1828 this->threadContexts[tid]->activate(); 1829} 1830 1831template <class Impl> 1832ThreadID 1833FullO3CPU<Impl>::getFreeTid() 1834{ 1835 for (ThreadID tid = 0; tid < numThreads; tid++) { 1836 if (!tids[tid]) { 1837 tids[tid] = true; 1838 return tid; 1839 } 1840 } 1841 1842 return InvalidThreadID; 1843} 1844 1845template <class Impl> 1846void 1847FullO3CPU<Impl>::updateThreadPriority() 1848{ 1849 if (activeThreads.size() > 1) { 1850 //DEFAULT TO ROUND ROBIN SCHEME 1851 //e.g. Move highest priority to end of thread list 1852 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1853 1854 unsigned high_thread = *list_begin; 1855 1856 activeThreads.erase(list_begin); 1857 1858 activeThreads.push_back(high_thread); 1859 } 1860} 1861 1862template <class Impl> 1863void 1864FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1865{ 1866 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1867 1868 // the thread trying to exit can't be already halted 1869 assert(tcBase(tid)->status() != ThreadContext::Halted); 1870 1871 // make sure the thread has not been added to the list yet 1872 assert(exitingThreads.count(tid) == 0); 1873 1874 // add the thread to exitingThreads list to mark that this thread is 1875 // trying to exit. The boolean value in the pair denotes if a thread is 1876 // ready to exit. The thread is not ready to exit until the corresponding 1877 // exit trap event is processed in the future. Until then, it'll be still 1878 // an active thread that is trying to exit. 1879 exitingThreads.emplace(std::make_pair(tid, false)); 1880} 1881 1882template <class Impl> 1883bool 1884FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1885{ 1886 return exitingThreads.count(tid) == 1; 1887} 1888 1889template <class Impl> 1890void 1891FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1892{ 1893 assert(exitingThreads.count(tid) == 1); 1894 1895 // exit trap event has been processed. Now, the thread is ready to exit 1896 // and be removed from the CPU. 1897 exitingThreads[tid] = true; 1898 1899 // we schedule a threadExitEvent in the next cycle to properly clean 1900 // up the thread's states in the pipeline. threadExitEvent has lower 1901 // priority than tickEvent, so the cleanup will happen at the very end 1902 // of the next cycle after all pipeline stages complete their operations. 1903 // We want all stages to complete squashing instructions before doing 1904 // the cleanup. 1905 if (!threadExitEvent.scheduled()) { 1906 schedule(threadExitEvent, nextCycle()); 1907 } 1908} 1909 1910template <class Impl> 1911void 1912FullO3CPU<Impl>::exitThreads() 1913{ 1914 // there must be at least one thread trying to exit 1915 assert(exitingThreads.size() > 0); 1916 1917 // terminate all threads that are ready to exit 1918 auto it = exitingThreads.begin(); 1919 while (it != exitingThreads.end()) { 1920 ThreadID thread_id = it->first; 1921 bool readyToExit = it->second; 1922 1923 if (readyToExit) { 1924 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1925 haltContext(thread_id); 1926 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1927 it = exitingThreads.erase(it); 1928 } else { 1929 it++; 1930 } 1931 } 1932} 1933 1934// Forward declaration of FullO3CPU. 1935template class FullO3CPU<O3CPUImpl>; 1936