cpu.cc revision 13908:6ab98c626b06
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template<class Impl> 94bool 95FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) 96{ 97 DPRINTF(O3CPU, "Fetch unit received timing\n"); 98 // We shouldn't ever get a cacheable block in Modified state 99 assert(pkt->req->isUncacheable() || 100 !(pkt->cacheResponding() && !pkt->hasSharers())); 101 fetch->processCacheCompletion(pkt); 102 103 return true; 104} 105 106template<class Impl> 107void 108FullO3CPU<Impl>::IcachePort::recvReqRetry() 109{ 110 fetch->recvReqRetry(); 111} 112 113template <class Impl> 114bool 115FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 116{ 117 return lsq->recvTimingResp(pkt); 118} 119 120template <class Impl> 121void 122FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 123{ 124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 126 cpu->wakeup(tid); 127 } 128 } 129 lsq->recvTimingSnoopReq(pkt); 130} 131 132template <class Impl> 133void 134FullO3CPU<Impl>::DcachePort::recvReqRetry() 135{ 136 lsq->recvReqRetry(); 137} 138 139template <class Impl> 140FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 141 : BaseO3CPU(params), 142 itb(params->itb), 143 dtb(params->dtb), 144 tickEvent([this]{ tick(); }, "FullO3CPU tick", 145 false, Event::CPU_Tick_Pri), 146 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 147 false, Event::CPU_Exit_Pri), 148#ifndef NDEBUG 149 instcount(0), 150#endif 151 removeInstsThisCycle(false), 152 fetch(this, params), 153 decode(this, params), 154 rename(this, params), 155 iew(this, params), 156 commit(this, params), 157 158 /* It is mandatory that all SMT threads use the same renaming mode as 159 * they are sharing registers and rename */ 160 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 161 regFile(params->numPhysIntRegs, 162 params->numPhysFloatRegs, 163 params->numPhysVecRegs, 164 params->numPhysVecPredRegs, 165 params->numPhysCCRegs, 166 vecMode), 167 168 freeList(name() + ".freelist", ®File), 169 170 rob(this, params), 171 172 scoreboard(name() + ".scoreboard", 173 regFile.totalNumPhysRegs()), 174 175 isa(numThreads, NULL), 176 177 icachePort(&fetch, this), 178 dcachePort(&iew.ldstQueue, this), 179 180 timeBuffer(params->backComSize, params->forwardComSize), 181 fetchQueue(params->backComSize, params->forwardComSize), 182 decodeQueue(params->backComSize, params->forwardComSize), 183 renameQueue(params->backComSize, params->forwardComSize), 184 iewQueue(params->backComSize, params->forwardComSize), 185 activityRec(name(), NumStages, 186 params->backComSize + params->forwardComSize, 187 params->activity), 188 189 globalSeqNum(1), 190 system(params->system), 191 lastRunningCycle(curCycle()) 192{ 193 if (!params->switched_out) { 194 _status = Running; 195 } else { 196 _status = SwitchedOut; 197 } 198 199 if (params->checker) { 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 202 checker->setIcachePort(&icachePort); 203 checker->setSystem(params->system); 204 } else { 205 checker = NULL; 206 } 207 208 if (!FullSystem) { 209 thread.resize(numThreads); 210 tids.resize(numThreads); 211 } 212 213 // The stages also need their CPU pointer setup. However this 214 // must be done at the upper level CPU because they have pointers 215 // to the upper level CPU, and not this FullO3CPU. 216 217 // Set up Pointers to the activeThreads list for each stage 218 fetch.setActiveThreads(&activeThreads); 219 decode.setActiveThreads(&activeThreads); 220 rename.setActiveThreads(&activeThreads); 221 iew.setActiveThreads(&activeThreads); 222 commit.setActiveThreads(&activeThreads); 223 224 // Give each of the stages the time buffer they will use. 225 fetch.setTimeBuffer(&timeBuffer); 226 decode.setTimeBuffer(&timeBuffer); 227 rename.setTimeBuffer(&timeBuffer); 228 iew.setTimeBuffer(&timeBuffer); 229 commit.setTimeBuffer(&timeBuffer); 230 231 // Also setup each of the stages' queues. 232 fetch.setFetchQueue(&fetchQueue); 233 decode.setFetchQueue(&fetchQueue); 234 commit.setFetchQueue(&fetchQueue); 235 decode.setDecodeQueue(&decodeQueue); 236 rename.setDecodeQueue(&decodeQueue); 237 rename.setRenameQueue(&renameQueue); 238 iew.setRenameQueue(&renameQueue); 239 iew.setIEWQueue(&iewQueue); 240 commit.setIEWQueue(&iewQueue); 241 commit.setRenameQueue(&renameQueue); 242 243 commit.setIEWStage(&iew); 244 rename.setIEWStage(&iew); 245 rename.setCommitStage(&commit); 246 247 ThreadID active_threads; 248 if (FullSystem) { 249 active_threads = 1; 250 } else { 251 active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads' " 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 256 "or edit your workload size."); 257 } 258 } 259 260 //Make Sure That this a Valid Architeture 261 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 262 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 263 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 264 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 265 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 266 267 rename.setScoreboard(&scoreboard); 268 iew.setScoreboard(&scoreboard); 269 270 // Setup the rename map for whichever stages need it. 271 for (ThreadID tid = 0; tid < numThreads; tid++) { 272 isa[tid] = params->isa[tid]; 273 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 274 275 // Only Alpha has an FP zero register, so for other ISAs we 276 // use an invalid FP register index to avoid special treatment 277 // of any valid FP reg. 278 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 279 RegIndex fpZeroReg = 280 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 281 282 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 283 &freeList, 284 vecMode); 285 286 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 287 &freeList, vecMode); 288 } 289 290 // Initialize rename map to assign physical registers to the 291 // architectural registers for active threads only. 292 for (ThreadID tid = 0; tid < active_threads; tid++) { 293 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 294 // Note that we can't use the rename() method because we don't 295 // want special treatment for the zero register at this point 296 PhysRegIdPtr phys_reg = freeList.getIntReg(); 297 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 298 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 299 } 300 301 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 302 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 303 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 304 commitRenameMap[tid].setEntry( 305 RegId(FloatRegClass, ridx), phys_reg); 306 } 307 308 /* Here we need two 'interfaces' the 'whole register' and the 309 * 'register element'. At any point only one of them will be 310 * active. */ 311 if (vecMode == Enums::Full) { 312 /* Initialize the full-vector interface */ 313 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 314 RegId rid = RegId(VecRegClass, ridx); 315 PhysRegIdPtr phys_reg = freeList.getVecReg(); 316 renameMap[tid].setEntry(rid, phys_reg); 317 commitRenameMap[tid].setEntry(rid, phys_reg); 318 } 319 } else { 320 /* Initialize the vector-element interface */ 321 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 322 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 323 ++ldx) { 324 RegId lrid = RegId(VecElemClass, ridx, ldx); 325 PhysRegIdPtr phys_elem = freeList.getVecElem(); 326 renameMap[tid].setEntry(lrid, phys_elem); 327 commitRenameMap[tid].setEntry(lrid, phys_elem); 328 } 329 } 330 } 331 332 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 333 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 334 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 335 commitRenameMap[tid].setEntry( 336 RegId(VecPredRegClass, ridx), phys_reg); 337 } 338 339 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 340 PhysRegIdPtr phys_reg = freeList.getCCReg(); 341 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 342 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 343 } 344 } 345 346 rename.setRenameMap(renameMap); 347 commit.setRenameMap(commitRenameMap); 348 rename.setFreeList(&freeList); 349 350 // Setup the ROB for whichever stages need it. 351 commit.setROB(&rob); 352 353 lastActivatedCycle = 0; 354#if 0 355 // Give renameMap & rename stage access to the freeList; 356 for (ThreadID tid = 0; tid < numThreads; tid++) 357 globalSeqNum[tid] = 1; 358#endif 359 360 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 361 362 // Setup any thread state. 363 this->thread.resize(this->numThreads); 364 365 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 366 if (FullSystem) { 367 // SMT is not supported in FS mode yet. 368 assert(this->numThreads == 1); 369 this->thread[tid] = new Thread(this, 0, NULL); 370 } else { 371 if (tid < params->workload.size()) { 372 DPRINTF(O3CPU, "Workload[%i] process is %#x", 373 tid, this->thread[tid]); 374 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 375 (typename Impl::O3CPU *)(this), 376 tid, params->workload[tid]); 377 378 //usedTids[tid] = true; 379 //threadMap[tid] = tid; 380 } else { 381 //Allocate Empty thread so M5 can use later 382 //when scheduling threads to CPU 383 Process* dummy_proc = NULL; 384 385 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 386 (typename Impl::O3CPU *)(this), 387 tid, dummy_proc); 388 //usedTids[tid] = false; 389 } 390 } 391 392 ThreadContext *tc; 393 394 // Setup the TC that will serve as the interface to the threads/CPU. 395 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 396 397 tc = o3_tc; 398 399 // If we're using a checker, then the TC should be the 400 // CheckerThreadContext. 401 if (params->checker) { 402 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 403 o3_tc, this->checker); 404 } 405 406 o3_tc->cpu = (typename Impl::O3CPU *)(this); 407 assert(o3_tc->cpu); 408 o3_tc->thread = this->thread[tid]; 409 410 // Setup quiesce event. 411 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 412 413 // Give the thread the TC. 414 this->thread[tid]->tc = tc; 415 416 // Add the TC to the CPU's list of TC's. 417 this->threadContexts.push_back(tc); 418 } 419 420 // FullO3CPU always requires an interrupt controller. 421 if (!params->switched_out && interrupts.empty()) { 422 fatal("FullO3CPU %s has no interrupt controller.\n" 423 "Ensure createInterruptController() is called.\n", name()); 424 } 425 426 for (ThreadID tid = 0; tid < this->numThreads; tid++) 427 this->thread[tid]->setFuncExeInst(0); 428} 429 430template <class Impl> 431FullO3CPU<Impl>::~FullO3CPU() 432{ 433} 434 435template <class Impl> 436void 437FullO3CPU<Impl>::regProbePoints() 438{ 439 BaseCPU::regProbePoints(); 440 441 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 442 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 443 444 fetch.regProbePoints(); 445 rename.regProbePoints(); 446 iew.regProbePoints(); 447 commit.regProbePoints(); 448} 449 450template <class Impl> 451void 452FullO3CPU<Impl>::regStats() 453{ 454 BaseO3CPU::regStats(); 455 456 // Register any of the O3CPU's stats here. 457 timesIdled 458 .name(name() + ".timesIdled") 459 .desc("Number of times that the entire CPU went into an idle state and" 460 " unscheduled itself") 461 .prereq(timesIdled); 462 463 idleCycles 464 .name(name() + ".idleCycles") 465 .desc("Total number of cycles that the CPU has spent unscheduled due " 466 "to idling") 467 .prereq(idleCycles); 468 469 quiesceCycles 470 .name(name() + ".quiesceCycles") 471 .desc("Total number of cycles that CPU has spent quiesced or waiting " 472 "for an interrupt") 473 .prereq(quiesceCycles); 474 475 // Number of Instructions simulated 476 // -------------------------------- 477 // Should probably be in Base CPU but need templated 478 // MaxThreads so put in here instead 479 committedInsts 480 .init(numThreads) 481 .name(name() + ".committedInsts") 482 .desc("Number of Instructions Simulated") 483 .flags(Stats::total); 484 485 committedOps 486 .init(numThreads) 487 .name(name() + ".committedOps") 488 .desc("Number of Ops (including micro ops) Simulated") 489 .flags(Stats::total); 490 491 cpi 492 .name(name() + ".cpi") 493 .desc("CPI: Cycles Per Instruction") 494 .precision(6); 495 cpi = numCycles / committedInsts; 496 497 totalCpi 498 .name(name() + ".cpi_total") 499 .desc("CPI: Total CPI of All Threads") 500 .precision(6); 501 totalCpi = numCycles / sum(committedInsts); 502 503 ipc 504 .name(name() + ".ipc") 505 .desc("IPC: Instructions Per Cycle") 506 .precision(6); 507 ipc = committedInsts / numCycles; 508 509 totalIpc 510 .name(name() + ".ipc_total") 511 .desc("IPC: Total IPC of All Threads") 512 .precision(6); 513 totalIpc = sum(committedInsts) / numCycles; 514 515 this->fetch.regStats(); 516 this->decode.regStats(); 517 this->rename.regStats(); 518 this->iew.regStats(); 519 this->commit.regStats(); 520 this->rob.regStats(); 521 522 intRegfileReads 523 .name(name() + ".int_regfile_reads") 524 .desc("number of integer regfile reads") 525 .prereq(intRegfileReads); 526 527 intRegfileWrites 528 .name(name() + ".int_regfile_writes") 529 .desc("number of integer regfile writes") 530 .prereq(intRegfileWrites); 531 532 fpRegfileReads 533 .name(name() + ".fp_regfile_reads") 534 .desc("number of floating regfile reads") 535 .prereq(fpRegfileReads); 536 537 fpRegfileWrites 538 .name(name() + ".fp_regfile_writes") 539 .desc("number of floating regfile writes") 540 .prereq(fpRegfileWrites); 541 542 vecRegfileReads 543 .name(name() + ".vec_regfile_reads") 544 .desc("number of vector regfile reads") 545 .prereq(vecRegfileReads); 546 547 vecRegfileWrites 548 .name(name() + ".vec_regfile_writes") 549 .desc("number of vector regfile writes") 550 .prereq(vecRegfileWrites); 551 552 vecPredRegfileReads 553 .name(name() + ".pred_regfile_reads") 554 .desc("number of predicate regfile reads") 555 .prereq(vecPredRegfileReads); 556 557 vecPredRegfileWrites 558 .name(name() + ".pred_regfile_writes") 559 .desc("number of predicate regfile writes") 560 .prereq(vecPredRegfileWrites); 561 562 ccRegfileReads 563 .name(name() + ".cc_regfile_reads") 564 .desc("number of cc regfile reads") 565 .prereq(ccRegfileReads); 566 567 ccRegfileWrites 568 .name(name() + ".cc_regfile_writes") 569 .desc("number of cc regfile writes") 570 .prereq(ccRegfileWrites); 571 572 miscRegfileReads 573 .name(name() + ".misc_regfile_reads") 574 .desc("number of misc regfile reads") 575 .prereq(miscRegfileReads); 576 577 miscRegfileWrites 578 .name(name() + ".misc_regfile_writes") 579 .desc("number of misc regfile writes") 580 .prereq(miscRegfileWrites); 581} 582 583template <class Impl> 584void 585FullO3CPU<Impl>::tick() 586{ 587 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 588 assert(!switchedOut()); 589 assert(drainState() != DrainState::Drained); 590 591 ++numCycles; 592 updateCycleCounters(BaseCPU::CPU_STATE_ON); 593 594// activity = false; 595 596 //Tick each of the stages 597 fetch.tick(); 598 599 decode.tick(); 600 601 rename.tick(); 602 603 iew.tick(); 604 605 commit.tick(); 606 607 // Now advance the time buffers 608 timeBuffer.advance(); 609 610 fetchQueue.advance(); 611 decodeQueue.advance(); 612 renameQueue.advance(); 613 iewQueue.advance(); 614 615 activityRec.advance(); 616 617 if (removeInstsThisCycle) { 618 cleanUpRemovedInsts(); 619 } 620 621 if (!tickEvent.scheduled()) { 622 if (_status == SwitchedOut) { 623 DPRINTF(O3CPU, "Switched out!\n"); 624 // increment stat 625 lastRunningCycle = curCycle(); 626 } else if (!activityRec.active() || _status == Idle) { 627 DPRINTF(O3CPU, "Idle!\n"); 628 lastRunningCycle = curCycle(); 629 timesIdled++; 630 } else { 631 schedule(tickEvent, clockEdge(Cycles(1))); 632 DPRINTF(O3CPU, "Scheduling next tick!\n"); 633 } 634 } 635 636 if (!FullSystem) 637 updateThreadPriority(); 638 639 tryDrain(); 640} 641 642template <class Impl> 643void 644FullO3CPU<Impl>::init() 645{ 646 BaseCPU::init(); 647 648 for (ThreadID tid = 0; tid < numThreads; ++tid) { 649 // Set noSquashFromTC so that the CPU doesn't squash when initially 650 // setting up registers. 651 thread[tid]->noSquashFromTC = true; 652 // Initialise the ThreadContext's memory proxies 653 thread[tid]->initMemProxies(thread[tid]->getTC()); 654 } 655 656 if (FullSystem && !params()->switched_out) { 657 for (ThreadID tid = 0; tid < numThreads; tid++) { 658 ThreadContext *src_tc = threadContexts[tid]; 659 TheISA::initCPU(src_tc, src_tc->contextId()); 660 } 661 } 662 663 // Clear noSquashFromTC. 664 for (int tid = 0; tid < numThreads; ++tid) 665 thread[tid]->noSquashFromTC = false; 666 667 commit.setThreads(thread); 668} 669 670template <class Impl> 671void 672FullO3CPU<Impl>::startup() 673{ 674 BaseCPU::startup(); 675 for (int tid = 0; tid < numThreads; ++tid) 676 isa[tid]->startup(threadContexts[tid]); 677 678 fetch.startupStage(); 679 decode.startupStage(); 680 iew.startupStage(); 681 rename.startupStage(); 682 commit.startupStage(); 683} 684 685template <class Impl> 686void 687FullO3CPU<Impl>::activateThread(ThreadID tid) 688{ 689 list<ThreadID>::iterator isActive = 690 std::find(activeThreads.begin(), activeThreads.end(), tid); 691 692 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid); 693 assert(!switchedOut()); 694 695 if (isActive == activeThreads.end()) { 696 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", 697 tid); 698 699 activeThreads.push_back(tid); 700 } 701} 702 703template <class Impl> 704void 705FullO3CPU<Impl>::deactivateThread(ThreadID tid) 706{ 707 //Remove From Active List, if Active 708 list<ThreadID>::iterator thread_it = 709 std::find(activeThreads.begin(), activeThreads.end(), tid); 710 711 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid); 712 assert(!switchedOut()); 713 714 if (thread_it != activeThreads.end()) { 715 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n", 716 tid); 717 activeThreads.erase(thread_it); 718 } 719 720 fetch.deactivateThread(tid); 721 commit.deactivateThread(tid); 722} 723 724template <class Impl> 725Counter 726FullO3CPU<Impl>::totalInsts() const 727{ 728 Counter total(0); 729 730 ThreadID size = thread.size(); 731 for (ThreadID i = 0; i < size; i++) 732 total += thread[i]->numInst; 733 734 return total; 735} 736 737template <class Impl> 738Counter 739FullO3CPU<Impl>::totalOps() const 740{ 741 Counter total(0); 742 743 ThreadID size = thread.size(); 744 for (ThreadID i = 0; i < size; i++) 745 total += thread[i]->numOp; 746 747 return total; 748} 749 750template <class Impl> 751void 752FullO3CPU<Impl>::activateContext(ThreadID tid) 753{ 754 assert(!switchedOut()); 755 756 // Needs to set each stage to running as well. 757 activateThread(tid); 758 759 // We don't want to wake the CPU if it is drained. In that case, 760 // we just want to flag the thread as active and schedule the tick 761 // event from drainResume() instead. 762 if (drainState() == DrainState::Drained) 763 return; 764 765 // If we are time 0 or if the last activation time is in the past, 766 // schedule the next tick and wake up the fetch unit 767 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 768 scheduleTickEvent(Cycles(0)); 769 770 // Be sure to signal that there's some activity so the CPU doesn't 771 // deschedule itself. 772 activityRec.activity(); 773 fetch.wakeFromQuiesce(); 774 775 Cycles cycles(curCycle() - lastRunningCycle); 776 // @todo: This is an oddity that is only here to match the stats 777 if (cycles != 0) 778 --cycles; 779 quiesceCycles += cycles; 780 781 lastActivatedCycle = curTick(); 782 783 _status = Running; 784 785 BaseCPU::activateContext(tid); 786 } 787} 788 789template <class Impl> 790void 791FullO3CPU<Impl>::suspendContext(ThreadID tid) 792{ 793 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid); 794 assert(!switchedOut()); 795 796 deactivateThread(tid); 797 798 // If this was the last thread then unschedule the tick event. 799 if (activeThreads.size() == 0) { 800 unscheduleTickEvent(); 801 lastRunningCycle = curCycle(); 802 _status = Idle; 803 } 804 805 DPRINTF(Quiesce, "Suspending Context\n"); 806 807 BaseCPU::suspendContext(tid); 808} 809 810template <class Impl> 811void 812FullO3CPU<Impl>::haltContext(ThreadID tid) 813{ 814 //For now, this is the same as deallocate 815 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid); 816 assert(!switchedOut()); 817 818 deactivateThread(tid); 819 removeThread(tid); 820 821 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 822} 823 824template <class Impl> 825void 826FullO3CPU<Impl>::insertThread(ThreadID tid) 827{ 828 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 829 // Will change now that the PC and thread state is internal to the CPU 830 // and not in the ThreadContext. 831 ThreadContext *src_tc; 832 if (FullSystem) 833 src_tc = system->threadContexts[tid]; 834 else 835 src_tc = tcBase(tid); 836 837 //Bind Int Regs to Rename Map 838 839 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 840 reg_id.index()++) { 841 PhysRegIdPtr phys_reg = freeList.getIntReg(); 842 renameMap[tid].setEntry(reg_id, phys_reg); 843 scoreboard.setReg(phys_reg); 844 } 845 846 //Bind Float Regs to Rename Map 847 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 848 reg_id.index()++) { 849 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 850 renameMap[tid].setEntry(reg_id, phys_reg); 851 scoreboard.setReg(phys_reg); 852 } 853 854 //Bind condition-code Regs to Rename Map 855 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 856 reg_id.index()++) { 857 PhysRegIdPtr phys_reg = freeList.getCCReg(); 858 renameMap[tid].setEntry(reg_id, phys_reg); 859 scoreboard.setReg(phys_reg); 860 } 861 862 //Copy Thread Data Into RegFile 863 //this->copyFromTC(tid); 864 865 //Set PC/NPC/NNPC 866 pcState(src_tc->pcState(), tid); 867 868 src_tc->setStatus(ThreadContext::Active); 869 870 activateContext(tid); 871 872 //Reset ROB/IQ/LSQ Entries 873 commit.rob->resetEntries(); 874} 875 876template <class Impl> 877void 878FullO3CPU<Impl>::removeThread(ThreadID tid) 879{ 880 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 881 882 // Copy Thread Data From RegFile 883 // If thread is suspended, it might be re-allocated 884 // this->copyToTC(tid); 885 886 887 // @todo: 2-27-2008: Fix how we free up rename mappings 888 // here to alleviate the case for double-freeing registers 889 // in SMT workloads. 890 891 // clear all thread-specific states in each stage of the pipeline 892 // since this thread is going to be completely removed from the CPU 893 commit.clearStates(tid); 894 fetch.clearStates(tid); 895 decode.clearStates(tid); 896 rename.clearStates(tid); 897 iew.clearStates(tid); 898 899 // at this step, all instructions in the pipeline should be already 900 // either committed successfully or squashed. All thread-specific 901 // queues in the pipeline must be empty. 902 assert(iew.instQueue.getCount(tid) == 0); 903 assert(iew.ldstQueue.getCount(tid) == 0); 904 assert(commit.rob->isEmpty(tid)); 905 906 // Reset ROB/IQ/LSQ Entries 907 908 // Commented out for now. This should be possible to do by 909 // telling all the pipeline stages to drain first, and then 910 // checking until the drain completes. Once the pipeline is 911 // drained, call resetEntries(). - 10-09-06 ktlim 912/* 913 if (activeThreads.size() >= 1) { 914 commit.rob->resetEntries(); 915 iew.resetEntries(); 916 } 917*/ 918} 919 920template <class Impl> 921bool 922FullO3CPU<Impl>::simPalCheck(int palFunc, ThreadID tid) 923{ 924#if THE_ISA == ALPHA_ISA 925 auto *stats = dynamic_cast<AlphaISA::Kernel::Statistics *>( 926 this->thread[tid]->kernelStats); 927 if (stats) 928 stats->callpal(palFunc, this->threadContexts[tid]); 929 930 switch (palFunc) { 931 case PAL::halt: 932 halt(); 933 if (--System::numSystemsRunning == 0) 934 exitSimLoop("all cpus halted"); 935 break; 936 937 case PAL::bpt: 938 case PAL::bugchk: 939 if (this->system->breakpoint()) 940 return false; 941 break; 942 } 943#endif 944 return true; 945} 946 947template <class Impl> 948void 949FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 950{ 951 auto pc = this->pcState(tid); 952 953 // new_mode is the new vector renaming mode 954 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 955 956 // We update vecMode only if there has been a change 957 if (new_mode != vecMode) { 958 vecMode = new_mode; 959 960 renameMap[tid].switchMode(vecMode); 961 commitRenameMap[tid].switchMode(vecMode); 962 renameMap[tid].switchFreeList(freelist); 963 } 964} 965 966template <class Impl> 967Fault 968FullO3CPU<Impl>::getInterrupts() 969{ 970 // Check if there are any outstanding interrupts 971 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 972} 973 974template <class Impl> 975void 976FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 977{ 978 // Check for interrupts here. For now can copy the code that 979 // exists within isa_fullsys_traits.hh. Also assume that thread 0 980 // is the one that handles the interrupts. 981 // @todo: Possibly consolidate the interrupt checking code. 982 // @todo: Allow other threads to handle interrupts. 983 984 assert(interrupt != NoFault); 985 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 986 987 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 988 this->trap(interrupt, 0, nullptr); 989} 990 991template <class Impl> 992void 993FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 994 const StaticInstPtr &inst) 995{ 996 // Pass the thread's TC into the invoke method. 997 fault->invoke(this->threadContexts[tid], inst); 998} 999 1000template <class Impl> 1001void 1002FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 1003{ 1004 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 1005 1006 DPRINTF(Activity,"Activity: syscall() called.\n"); 1007 1008 // Temporarily increase this by one to account for the syscall 1009 // instruction. 1010 ++(this->thread[tid]->funcExeInst); 1011 1012 // Execute the actual syscall. 1013 this->thread[tid]->syscall(callnum, fault); 1014 1015 // Decrease funcExeInst by one as the normal commit will handle 1016 // incrementing it. 1017 --(this->thread[tid]->funcExeInst); 1018} 1019 1020template <class Impl> 1021void 1022FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 1023{ 1024 thread[tid]->serialize(cp); 1025} 1026 1027template <class Impl> 1028void 1029FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 1030{ 1031 thread[tid]->unserialize(cp); 1032} 1033 1034template <class Impl> 1035DrainState 1036FullO3CPU<Impl>::drain() 1037{ 1038 // Deschedule any power gating event (if any) 1039 deschedulePowerGatingEvent(); 1040 1041 // If the CPU isn't doing anything, then return immediately. 1042 if (switchedOut()) 1043 return DrainState::Drained; 1044 1045 DPRINTF(Drain, "Draining...\n"); 1046 1047 // We only need to signal a drain to the commit stage as this 1048 // initiates squashing controls the draining. Once the commit 1049 // stage commits an instruction where it is safe to stop, it'll 1050 // squash the rest of the instructions in the pipeline and force 1051 // the fetch stage to stall. The pipeline will be drained once all 1052 // in-flight instructions have retired. 1053 commit.drain(); 1054 1055 // Wake the CPU and record activity so everything can drain out if 1056 // the CPU was not able to immediately drain. 1057 if (!isDrained()) { 1058 // If a thread is suspended, wake it up so it can be drained 1059 for (auto t : threadContexts) { 1060 if (t->status() == ThreadContext::Suspended){ 1061 DPRINTF(Drain, "Currently suspended so activate %i \n", 1062 t->threadId()); 1063 t->activate(); 1064 // As the thread is now active, change the power state as well 1065 activateContext(t->threadId()); 1066 } 1067 } 1068 1069 wakeCPU(); 1070 activityRec.activity(); 1071 1072 DPRINTF(Drain, "CPU not drained\n"); 1073 1074 return DrainState::Draining; 1075 } else { 1076 DPRINTF(Drain, "CPU is already drained\n"); 1077 if (tickEvent.scheduled()) 1078 deschedule(tickEvent); 1079 1080 // Flush out any old data from the time buffers. In 1081 // particular, there might be some data in flight from the 1082 // fetch stage that isn't visible in any of the CPU buffers we 1083 // test in isDrained(). 1084 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1085 timeBuffer.advance(); 1086 fetchQueue.advance(); 1087 decodeQueue.advance(); 1088 renameQueue.advance(); 1089 iewQueue.advance(); 1090 } 1091 1092 drainSanityCheck(); 1093 return DrainState::Drained; 1094 } 1095} 1096 1097template <class Impl> 1098bool 1099FullO3CPU<Impl>::tryDrain() 1100{ 1101 if (drainState() != DrainState::Draining || !isDrained()) 1102 return false; 1103 1104 if (tickEvent.scheduled()) 1105 deschedule(tickEvent); 1106 1107 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1108 signalDrainDone(); 1109 1110 return true; 1111} 1112 1113template <class Impl> 1114void 1115FullO3CPU<Impl>::drainSanityCheck() const 1116{ 1117 assert(isDrained()); 1118 fetch.drainSanityCheck(); 1119 decode.drainSanityCheck(); 1120 rename.drainSanityCheck(); 1121 iew.drainSanityCheck(); 1122 commit.drainSanityCheck(); 1123} 1124 1125template <class Impl> 1126bool 1127FullO3CPU<Impl>::isDrained() const 1128{ 1129 bool drained(true); 1130 1131 if (!instList.empty() || !removeList.empty()) { 1132 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1133 drained = false; 1134 } 1135 1136 if (!fetch.isDrained()) { 1137 DPRINTF(Drain, "Fetch not drained.\n"); 1138 drained = false; 1139 } 1140 1141 if (!decode.isDrained()) { 1142 DPRINTF(Drain, "Decode not drained.\n"); 1143 drained = false; 1144 } 1145 1146 if (!rename.isDrained()) { 1147 DPRINTF(Drain, "Rename not drained.\n"); 1148 drained = false; 1149 } 1150 1151 if (!iew.isDrained()) { 1152 DPRINTF(Drain, "IEW not drained.\n"); 1153 drained = false; 1154 } 1155 1156 if (!commit.isDrained()) { 1157 DPRINTF(Drain, "Commit not drained.\n"); 1158 drained = false; 1159 } 1160 1161 return drained; 1162} 1163 1164template <class Impl> 1165void 1166FullO3CPU<Impl>::commitDrained(ThreadID tid) 1167{ 1168 fetch.drainStall(tid); 1169} 1170 1171template <class Impl> 1172void 1173FullO3CPU<Impl>::drainResume() 1174{ 1175 if (switchedOut()) 1176 return; 1177 1178 DPRINTF(Drain, "Resuming...\n"); 1179 verifyMemoryMode(); 1180 1181 fetch.drainResume(); 1182 commit.drainResume(); 1183 1184 _status = Idle; 1185 for (ThreadID i = 0; i < thread.size(); i++) { 1186 if (thread[i]->status() == ThreadContext::Active) { 1187 DPRINTF(Drain, "Activating thread: %i\n", i); 1188 activateThread(i); 1189 _status = Running; 1190 } 1191 } 1192 1193 assert(!tickEvent.scheduled()); 1194 if (_status == Running) 1195 schedule(tickEvent, nextCycle()); 1196 1197 // Reschedule any power gating event (if any) 1198 schedulePowerGatingEvent(); 1199} 1200 1201template <class Impl> 1202void 1203FullO3CPU<Impl>::switchOut() 1204{ 1205 DPRINTF(O3CPU, "Switching out\n"); 1206 BaseCPU::switchOut(); 1207 1208 activityRec.reset(); 1209 1210 _status = SwitchedOut; 1211 1212 if (checker) 1213 checker->switchOut(); 1214} 1215 1216template <class Impl> 1217void 1218FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1219{ 1220 BaseCPU::takeOverFrom(oldCPU); 1221 1222 fetch.takeOverFrom(); 1223 decode.takeOverFrom(); 1224 rename.takeOverFrom(); 1225 iew.takeOverFrom(); 1226 commit.takeOverFrom(); 1227 1228 assert(!tickEvent.scheduled()); 1229 1230 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1231 if (oldO3CPU) 1232 globalSeqNum = oldO3CPU->globalSeqNum; 1233 1234 lastRunningCycle = curCycle(); 1235 _status = Idle; 1236} 1237 1238template <class Impl> 1239void 1240FullO3CPU<Impl>::verifyMemoryMode() const 1241{ 1242 if (!system->isTimingMode()) { 1243 fatal("The O3 CPU requires the memory system to be in " 1244 "'timing' mode.\n"); 1245 } 1246} 1247 1248template <class Impl> 1249RegVal 1250FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1251{ 1252 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1253} 1254 1255template <class Impl> 1256RegVal 1257FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1258{ 1259 miscRegfileReads++; 1260 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1261} 1262 1263template <class Impl> 1264void 1265FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1266{ 1267 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1268} 1269 1270template <class Impl> 1271void 1272FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1273{ 1274 miscRegfileWrites++; 1275 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1276} 1277 1278template <class Impl> 1279RegVal 1280FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1281{ 1282 intRegfileReads++; 1283 return regFile.readIntReg(phys_reg); 1284} 1285 1286template <class Impl> 1287RegVal 1288FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1289{ 1290 fpRegfileReads++; 1291 return regFile.readFloatReg(phys_reg); 1292} 1293 1294template <class Impl> 1295auto 1296FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1297 -> const VecRegContainer& 1298{ 1299 vecRegfileReads++; 1300 return regFile.readVecReg(phys_reg); 1301} 1302 1303template <class Impl> 1304auto 1305FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1306 -> VecRegContainer& 1307{ 1308 vecRegfileWrites++; 1309 return regFile.getWritableVecReg(phys_reg); 1310} 1311 1312template <class Impl> 1313auto 1314FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1315{ 1316 vecRegfileReads++; 1317 return regFile.readVecElem(phys_reg); 1318} 1319 1320template <class Impl> 1321auto 1322FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1323 -> const VecPredRegContainer& 1324{ 1325 vecPredRegfileReads++; 1326 return regFile.readVecPredReg(phys_reg); 1327} 1328 1329template <class Impl> 1330auto 1331FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1332 -> VecPredRegContainer& 1333{ 1334 vecPredRegfileWrites++; 1335 return regFile.getWritableVecPredReg(phys_reg); 1336} 1337 1338template <class Impl> 1339RegVal 1340FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1341{ 1342 ccRegfileReads++; 1343 return regFile.readCCReg(phys_reg); 1344} 1345 1346template <class Impl> 1347void 1348FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1349{ 1350 intRegfileWrites++; 1351 regFile.setIntReg(phys_reg, val); 1352} 1353 1354template <class Impl> 1355void 1356FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1357{ 1358 fpRegfileWrites++; 1359 regFile.setFloatReg(phys_reg, val); 1360} 1361 1362template <class Impl> 1363void 1364FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1365{ 1366 vecRegfileWrites++; 1367 regFile.setVecReg(phys_reg, val); 1368} 1369 1370template <class Impl> 1371void 1372FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1373{ 1374 vecRegfileWrites++; 1375 regFile.setVecElem(phys_reg, val); 1376} 1377 1378template <class Impl> 1379void 1380FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1381 const VecPredRegContainer& val) 1382{ 1383 vecPredRegfileWrites++; 1384 regFile.setVecPredReg(phys_reg, val); 1385} 1386 1387template <class Impl> 1388void 1389FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1390{ 1391 ccRegfileWrites++; 1392 regFile.setCCReg(phys_reg, val); 1393} 1394 1395template <class Impl> 1396RegVal 1397FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1398{ 1399 intRegfileReads++; 1400 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1401 RegId(IntRegClass, reg_idx)); 1402 1403 return regFile.readIntReg(phys_reg); 1404} 1405 1406template <class Impl> 1407RegVal 1408FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1409{ 1410 fpRegfileReads++; 1411 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1412 RegId(FloatRegClass, reg_idx)); 1413 1414 return regFile.readFloatReg(phys_reg); 1415} 1416 1417template <class Impl> 1418auto 1419FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1420 -> const VecRegContainer& 1421{ 1422 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1423 RegId(VecRegClass, reg_idx)); 1424 return readVecReg(phys_reg); 1425} 1426 1427template <class Impl> 1428auto 1429FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1430 -> VecRegContainer& 1431{ 1432 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1433 RegId(VecRegClass, reg_idx)); 1434 return getWritableVecReg(phys_reg); 1435} 1436 1437template <class Impl> 1438auto 1439FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1440 ThreadID tid) const -> const VecElem& 1441{ 1442 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1443 RegId(VecElemClass, reg_idx, ldx)); 1444 return readVecElem(phys_reg); 1445} 1446 1447template <class Impl> 1448auto 1449FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1450 -> const VecPredRegContainer& 1451{ 1452 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1453 RegId(VecPredRegClass, reg_idx)); 1454 return readVecPredReg(phys_reg); 1455} 1456 1457template <class Impl> 1458auto 1459FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1460 -> VecPredRegContainer& 1461{ 1462 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1463 RegId(VecPredRegClass, reg_idx)); 1464 return getWritableVecPredReg(phys_reg); 1465} 1466 1467template <class Impl> 1468RegVal 1469FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1470{ 1471 ccRegfileReads++; 1472 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1473 RegId(CCRegClass, reg_idx)); 1474 1475 return regFile.readCCReg(phys_reg); 1476} 1477 1478template <class Impl> 1479void 1480FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1481{ 1482 intRegfileWrites++; 1483 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1484 RegId(IntRegClass, reg_idx)); 1485 1486 regFile.setIntReg(phys_reg, val); 1487} 1488 1489template <class Impl> 1490void 1491FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1492{ 1493 fpRegfileWrites++; 1494 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1495 RegId(FloatRegClass, reg_idx)); 1496 1497 regFile.setFloatReg(phys_reg, val); 1498} 1499 1500template <class Impl> 1501void 1502FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1503 ThreadID tid) 1504{ 1505 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1506 RegId(VecRegClass, reg_idx)); 1507 setVecReg(phys_reg, val); 1508} 1509 1510template <class Impl> 1511void 1512FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1513 const VecElem& val, ThreadID tid) 1514{ 1515 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1516 RegId(VecElemClass, reg_idx, ldx)); 1517 setVecElem(phys_reg, val); 1518} 1519 1520template <class Impl> 1521void 1522FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1523 ThreadID tid) 1524{ 1525 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1526 RegId(VecPredRegClass, reg_idx)); 1527 setVecPredReg(phys_reg, val); 1528} 1529 1530template <class Impl> 1531void 1532FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1533{ 1534 ccRegfileWrites++; 1535 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1536 RegId(CCRegClass, reg_idx)); 1537 1538 regFile.setCCReg(phys_reg, val); 1539} 1540 1541template <class Impl> 1542TheISA::PCState 1543FullO3CPU<Impl>::pcState(ThreadID tid) 1544{ 1545 return commit.pcState(tid); 1546} 1547 1548template <class Impl> 1549void 1550FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1551{ 1552 commit.pcState(val, tid); 1553} 1554 1555template <class Impl> 1556Addr 1557FullO3CPU<Impl>::instAddr(ThreadID tid) 1558{ 1559 return commit.instAddr(tid); 1560} 1561 1562template <class Impl> 1563Addr 1564FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1565{ 1566 return commit.nextInstAddr(tid); 1567} 1568 1569template <class Impl> 1570MicroPC 1571FullO3CPU<Impl>::microPC(ThreadID tid) 1572{ 1573 return commit.microPC(tid); 1574} 1575 1576template <class Impl> 1577void 1578FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1579{ 1580 this->thread[tid]->noSquashFromTC = true; 1581 this->commit.generateTCEvent(tid); 1582} 1583 1584template <class Impl> 1585typename FullO3CPU<Impl>::ListIt 1586FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1587{ 1588 instList.push_back(inst); 1589 1590 return --(instList.end()); 1591} 1592 1593template <class Impl> 1594void 1595FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1596{ 1597 // Keep an instruction count. 1598 if (!inst->isMicroop() || inst->isLastMicroop()) { 1599 thread[tid]->numInst++; 1600 thread[tid]->numInsts++; 1601 committedInsts[tid]++; 1602 system->totalNumInsts++; 1603 1604 // Check for instruction-count-based events. 1605 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1606 system->instEventQueue.serviceEvents(system->totalNumInsts); 1607 } 1608 thread[tid]->numOp++; 1609 thread[tid]->numOps++; 1610 committedOps[tid]++; 1611 1612 probeInstCommit(inst->staticInst, inst->instAddr()); 1613} 1614 1615template <class Impl> 1616void 1617FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1618{ 1619 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1620 "[sn:%lli]\n", 1621 inst->threadNumber, inst->pcState(), inst->seqNum); 1622 1623 removeInstsThisCycle = true; 1624 1625 // Remove the front instruction. 1626 removeList.push(inst->getInstListIt()); 1627} 1628 1629template <class Impl> 1630void 1631FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1632{ 1633 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1634 " list.\n", tid); 1635 1636 ListIt end_it; 1637 1638 bool rob_empty = false; 1639 1640 if (instList.empty()) { 1641 return; 1642 } else if (rob.isEmpty(tid)) { 1643 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1644 end_it = instList.begin(); 1645 rob_empty = true; 1646 } else { 1647 end_it = (rob.readTailInst(tid))->getInstListIt(); 1648 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1649 } 1650 1651 removeInstsThisCycle = true; 1652 1653 ListIt inst_it = instList.end(); 1654 1655 inst_it--; 1656 1657 // Walk through the instruction list, removing any instructions 1658 // that were inserted after the given instruction iterator, end_it. 1659 while (inst_it != end_it) { 1660 assert(!instList.empty()); 1661 1662 squashInstIt(inst_it, tid); 1663 1664 inst_it--; 1665 } 1666 1667 // If the ROB was empty, then we actually need to remove the first 1668 // instruction as well. 1669 if (rob_empty) { 1670 squashInstIt(inst_it, tid); 1671 } 1672} 1673 1674template <class Impl> 1675void 1676FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1677{ 1678 assert(!instList.empty()); 1679 1680 removeInstsThisCycle = true; 1681 1682 ListIt inst_iter = instList.end(); 1683 1684 inst_iter--; 1685 1686 DPRINTF(O3CPU, "Deleting instructions from instruction " 1687 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1688 tid, seq_num, (*inst_iter)->seqNum); 1689 1690 while ((*inst_iter)->seqNum > seq_num) { 1691 1692 bool break_loop = (inst_iter == instList.begin()); 1693 1694 squashInstIt(inst_iter, tid); 1695 1696 inst_iter--; 1697 1698 if (break_loop) 1699 break; 1700 } 1701} 1702 1703template <class Impl> 1704inline void 1705FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1706{ 1707 if ((*instIt)->threadNumber == tid) { 1708 DPRINTF(O3CPU, "Squashing instruction, " 1709 "[tid:%i] [sn:%lli] PC %s\n", 1710 (*instIt)->threadNumber, 1711 (*instIt)->seqNum, 1712 (*instIt)->pcState()); 1713 1714 // Mark it as squashed. 1715 (*instIt)->setSquashed(); 1716 1717 // @todo: Formulate a consistent method for deleting 1718 // instructions from the instruction list 1719 // Remove the instruction from the list. 1720 removeList.push(instIt); 1721 } 1722} 1723 1724template <class Impl> 1725void 1726FullO3CPU<Impl>::cleanUpRemovedInsts() 1727{ 1728 while (!removeList.empty()) { 1729 DPRINTF(O3CPU, "Removing instruction, " 1730 "[tid:%i] [sn:%lli] PC %s\n", 1731 (*removeList.front())->threadNumber, 1732 (*removeList.front())->seqNum, 1733 (*removeList.front())->pcState()); 1734 1735 instList.erase(removeList.front()); 1736 1737 removeList.pop(); 1738 } 1739 1740 removeInstsThisCycle = false; 1741} 1742/* 1743template <class Impl> 1744void 1745FullO3CPU<Impl>::removeAllInsts() 1746{ 1747 instList.clear(); 1748} 1749*/ 1750template <class Impl> 1751void 1752FullO3CPU<Impl>::dumpInsts() 1753{ 1754 int num = 0; 1755 1756 ListIt inst_list_it = instList.begin(); 1757 1758 cprintf("Dumping Instruction List\n"); 1759 1760 while (inst_list_it != instList.end()) { 1761 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1762 "Squashed:%i\n\n", 1763 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1764 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1765 (*inst_list_it)->isSquashed()); 1766 inst_list_it++; 1767 ++num; 1768 } 1769} 1770/* 1771template <class Impl> 1772void 1773FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1774{ 1775 iew.wakeDependents(inst); 1776} 1777*/ 1778template <class Impl> 1779void 1780FullO3CPU<Impl>::wakeCPU() 1781{ 1782 if (activityRec.active() || tickEvent.scheduled()) { 1783 DPRINTF(Activity, "CPU already running.\n"); 1784 return; 1785 } 1786 1787 DPRINTF(Activity, "Waking up CPU\n"); 1788 1789 Cycles cycles(curCycle() - lastRunningCycle); 1790 // @todo: This is an oddity that is only here to match the stats 1791 if (cycles > 1) { 1792 --cycles; 1793 idleCycles += cycles; 1794 numCycles += cycles; 1795 } 1796 1797 schedule(tickEvent, clockEdge()); 1798} 1799 1800template <class Impl> 1801void 1802FullO3CPU<Impl>::wakeup(ThreadID tid) 1803{ 1804 if (this->thread[tid]->status() != ThreadContext::Suspended) 1805 return; 1806 1807 this->wakeCPU(); 1808 1809 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1810 this->threadContexts[tid]->activate(); 1811} 1812 1813template <class Impl> 1814ThreadID 1815FullO3CPU<Impl>::getFreeTid() 1816{ 1817 for (ThreadID tid = 0; tid < numThreads; tid++) { 1818 if (!tids[tid]) { 1819 tids[tid] = true; 1820 return tid; 1821 } 1822 } 1823 1824 return InvalidThreadID; 1825} 1826 1827template <class Impl> 1828void 1829FullO3CPU<Impl>::updateThreadPriority() 1830{ 1831 if (activeThreads.size() > 1) { 1832 //DEFAULT TO ROUND ROBIN SCHEME 1833 //e.g. Move highest priority to end of thread list 1834 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1835 1836 unsigned high_thread = *list_begin; 1837 1838 activeThreads.erase(list_begin); 1839 1840 activeThreads.push_back(high_thread); 1841 } 1842} 1843 1844template <class Impl> 1845void 1846FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1847{ 1848 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1849 1850 // the thread trying to exit can't be already halted 1851 assert(tcBase(tid)->status() != ThreadContext::Halted); 1852 1853 // make sure the thread has not been added to the list yet 1854 assert(exitingThreads.count(tid) == 0); 1855 1856 // add the thread to exitingThreads list to mark that this thread is 1857 // trying to exit. The boolean value in the pair denotes if a thread is 1858 // ready to exit. The thread is not ready to exit until the corresponding 1859 // exit trap event is processed in the future. Until then, it'll be still 1860 // an active thread that is trying to exit. 1861 exitingThreads.emplace(std::make_pair(tid, false)); 1862} 1863 1864template <class Impl> 1865bool 1866FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1867{ 1868 return exitingThreads.count(tid) == 1; 1869} 1870 1871template <class Impl> 1872void 1873FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1874{ 1875 assert(exitingThreads.count(tid) == 1); 1876 1877 // exit trap event has been processed. Now, the thread is ready to exit 1878 // and be removed from the CPU. 1879 exitingThreads[tid] = true; 1880 1881 // we schedule a threadExitEvent in the next cycle to properly clean 1882 // up the thread's states in the pipeline. threadExitEvent has lower 1883 // priority than tickEvent, so the cleanup will happen at the very end 1884 // of the next cycle after all pipeline stages complete their operations. 1885 // We want all stages to complete squashing instructions before doing 1886 // the cleanup. 1887 if (!threadExitEvent.scheduled()) { 1888 schedule(threadExitEvent, nextCycle()); 1889 } 1890} 1891 1892template <class Impl> 1893void 1894FullO3CPU<Impl>::exitThreads() 1895{ 1896 // there must be at least one thread trying to exit 1897 assert(exitingThreads.size() > 0); 1898 1899 // terminate all threads that are ready to exit 1900 auto it = exitingThreads.begin(); 1901 while (it != exitingThreads.end()) { 1902 ThreadID thread_id = it->first; 1903 bool readyToExit = it->second; 1904 1905 if (readyToExit) { 1906 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1907 haltContext(thread_id); 1908 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1909 it = exitingThreads.erase(it); 1910 } else { 1911 it++; 1912 } 1913 } 1914} 1915 1916// Forward declaration of FullO3CPU. 1917template class FullO3CPU<O3CPUImpl>; 1918