cpu.cc revision 14085:0075b0d29d55
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template<class Impl> 94bool 95FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) 96{ 97 DPRINTF(O3CPU, "Fetch unit received timing\n"); 98 // We shouldn't ever get a cacheable block in Modified state 99 assert(pkt->req->isUncacheable() || 100 !(pkt->cacheResponding() && !pkt->hasSharers())); 101 fetch->processCacheCompletion(pkt); 102 103 return true; 104} 105 106template<class Impl> 107void 108FullO3CPU<Impl>::IcachePort::recvReqRetry() 109{ 110 fetch->recvReqRetry(); 111} 112 113template <class Impl> 114bool 115FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 116{ 117 return lsq->recvTimingResp(pkt); 118} 119 120template <class Impl> 121void 122FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 123{ 124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 126 cpu->wakeup(tid); 127 } 128 } 129 lsq->recvTimingSnoopReq(pkt); 130} 131 132template <class Impl> 133void 134FullO3CPU<Impl>::DcachePort::recvReqRetry() 135{ 136 lsq->recvReqRetry(); 137} 138 139template <class Impl> 140FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 141 : BaseO3CPU(params), 142 itb(params->itb), 143 dtb(params->dtb), 144 tickEvent([this]{ tick(); }, "FullO3CPU tick", 145 false, Event::CPU_Tick_Pri), 146 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 147 false, Event::CPU_Exit_Pri), 148#ifndef NDEBUG 149 instcount(0), 150#endif 151 removeInstsThisCycle(false), 152 fetch(this, params), 153 decode(this, params), 154 rename(this, params), 155 iew(this, params), 156 commit(this, params), 157 158 /* It is mandatory that all SMT threads use the same renaming mode as 159 * they are sharing registers and rename */ 160 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 161 regFile(params->numPhysIntRegs, 162 params->numPhysFloatRegs, 163 params->numPhysVecRegs, 164 params->numPhysVecPredRegs, 165 params->numPhysCCRegs, 166 vecMode), 167 168 freeList(name() + ".freelist", ®File), 169 170 rob(this, params), 171 172 scoreboard(name() + ".scoreboard", 173 regFile.totalNumPhysRegs()), 174 175 isa(numThreads, NULL), 176 177 icachePort(&fetch, this), 178 dcachePort(&iew.ldstQueue, this), 179 180 timeBuffer(params->backComSize, params->forwardComSize), 181 fetchQueue(params->backComSize, params->forwardComSize), 182 decodeQueue(params->backComSize, params->forwardComSize), 183 renameQueue(params->backComSize, params->forwardComSize), 184 iewQueue(params->backComSize, params->forwardComSize), 185 activityRec(name(), NumStages, 186 params->backComSize + params->forwardComSize, 187 params->activity), 188 189 globalSeqNum(1), 190 system(params->system), 191 lastRunningCycle(curCycle()) 192{ 193 if (!params->switched_out) { 194 _status = Running; 195 } else { 196 _status = SwitchedOut; 197 } 198 199 if (params->checker) { 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 202 checker->setIcachePort(&icachePort); 203 checker->setSystem(params->system); 204 } else { 205 checker = NULL; 206 } 207 208 if (!FullSystem) { 209 thread.resize(numThreads); 210 tids.resize(numThreads); 211 } 212 213 // The stages also need their CPU pointer setup. However this 214 // must be done at the upper level CPU because they have pointers 215 // to the upper level CPU, and not this FullO3CPU. 216 217 // Set up Pointers to the activeThreads list for each stage 218 fetch.setActiveThreads(&activeThreads); 219 decode.setActiveThreads(&activeThreads); 220 rename.setActiveThreads(&activeThreads); 221 iew.setActiveThreads(&activeThreads); 222 commit.setActiveThreads(&activeThreads); 223 224 // Give each of the stages the time buffer they will use. 225 fetch.setTimeBuffer(&timeBuffer); 226 decode.setTimeBuffer(&timeBuffer); 227 rename.setTimeBuffer(&timeBuffer); 228 iew.setTimeBuffer(&timeBuffer); 229 commit.setTimeBuffer(&timeBuffer); 230 231 // Also setup each of the stages' queues. 232 fetch.setFetchQueue(&fetchQueue); 233 decode.setFetchQueue(&fetchQueue); 234 commit.setFetchQueue(&fetchQueue); 235 decode.setDecodeQueue(&decodeQueue); 236 rename.setDecodeQueue(&decodeQueue); 237 rename.setRenameQueue(&renameQueue); 238 iew.setRenameQueue(&renameQueue); 239 iew.setIEWQueue(&iewQueue); 240 commit.setIEWQueue(&iewQueue); 241 commit.setRenameQueue(&renameQueue); 242 243 commit.setIEWStage(&iew); 244 rename.setIEWStage(&iew); 245 rename.setCommitStage(&commit); 246 247 ThreadID active_threads; 248 if (FullSystem) { 249 active_threads = 1; 250 } else { 251 active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads' " 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 256 "or edit your workload size."); 257 } 258 } 259 260 //Make Sure That this a Valid Architeture 261 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 262 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 263 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 264 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 265 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 266 267 rename.setScoreboard(&scoreboard); 268 iew.setScoreboard(&scoreboard); 269 270 // Setup the rename map for whichever stages need it. 271 for (ThreadID tid = 0; tid < numThreads; tid++) { 272 isa[tid] = params->isa[tid]; 273 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 274 275 // Only Alpha has an FP zero register, so for other ISAs we 276 // use an invalid FP register index to avoid special treatment 277 // of any valid FP reg. 278 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 279 RegIndex fpZeroReg = 280 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 281 282 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 283 &freeList, 284 vecMode); 285 286 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 287 &freeList, vecMode); 288 } 289 290 // Initialize rename map to assign physical registers to the 291 // architectural registers for active threads only. 292 for (ThreadID tid = 0; tid < active_threads; tid++) { 293 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 294 // Note that we can't use the rename() method because we don't 295 // want special treatment for the zero register at this point 296 PhysRegIdPtr phys_reg = freeList.getIntReg(); 297 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 298 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 299 } 300 301 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 302 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 303 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 304 commitRenameMap[tid].setEntry( 305 RegId(FloatRegClass, ridx), phys_reg); 306 } 307 308 /* Here we need two 'interfaces' the 'whole register' and the 309 * 'register element'. At any point only one of them will be 310 * active. */ 311 if (vecMode == Enums::Full) { 312 /* Initialize the full-vector interface */ 313 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 314 RegId rid = RegId(VecRegClass, ridx); 315 PhysRegIdPtr phys_reg = freeList.getVecReg(); 316 renameMap[tid].setEntry(rid, phys_reg); 317 commitRenameMap[tid].setEntry(rid, phys_reg); 318 } 319 } else { 320 /* Initialize the vector-element interface */ 321 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 322 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 323 ++ldx) { 324 RegId lrid = RegId(VecElemClass, ridx, ldx); 325 PhysRegIdPtr phys_elem = freeList.getVecElem(); 326 renameMap[tid].setEntry(lrid, phys_elem); 327 commitRenameMap[tid].setEntry(lrid, phys_elem); 328 } 329 } 330 } 331 332 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 333 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 334 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 335 commitRenameMap[tid].setEntry( 336 RegId(VecPredRegClass, ridx), phys_reg); 337 } 338 339 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 340 PhysRegIdPtr phys_reg = freeList.getCCReg(); 341 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 342 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 343 } 344 } 345 346 rename.setRenameMap(renameMap); 347 commit.setRenameMap(commitRenameMap); 348 rename.setFreeList(&freeList); 349 350 // Setup the ROB for whichever stages need it. 351 commit.setROB(&rob); 352 353 lastActivatedCycle = 0; 354 355 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 356 357 // Setup any thread state. 358 this->thread.resize(this->numThreads); 359 360 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 361 if (FullSystem) { 362 // SMT is not supported in FS mode yet. 363 assert(this->numThreads == 1); 364 this->thread[tid] = new Thread(this, 0, NULL); 365 } else { 366 if (tid < params->workload.size()) { 367 DPRINTF(O3CPU, "Workload[%i] process is %#x", 368 tid, this->thread[tid]); 369 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 370 (typename Impl::O3CPU *)(this), 371 tid, params->workload[tid]); 372 373 //usedTids[tid] = true; 374 //threadMap[tid] = tid; 375 } else { 376 //Allocate Empty thread so M5 can use later 377 //when scheduling threads to CPU 378 Process* dummy_proc = NULL; 379 380 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 381 (typename Impl::O3CPU *)(this), 382 tid, dummy_proc); 383 //usedTids[tid] = false; 384 } 385 } 386 387 ThreadContext *tc; 388 389 // Setup the TC that will serve as the interface to the threads/CPU. 390 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 391 392 tc = o3_tc; 393 394 // If we're using a checker, then the TC should be the 395 // CheckerThreadContext. 396 if (params->checker) { 397 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 398 o3_tc, this->checker); 399 } 400 401 o3_tc->cpu = (typename Impl::O3CPU *)(this); 402 assert(o3_tc->cpu); 403 o3_tc->thread = this->thread[tid]; 404 405 // Setup quiesce event. 406 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 407 408 // Give the thread the TC. 409 this->thread[tid]->tc = tc; 410 411 // Add the TC to the CPU's list of TC's. 412 this->threadContexts.push_back(tc); 413 } 414 415 // FullO3CPU always requires an interrupt controller. 416 if (!params->switched_out && interrupts.empty()) { 417 fatal("FullO3CPU %s has no interrupt controller.\n" 418 "Ensure createInterruptController() is called.\n", name()); 419 } 420 421 for (ThreadID tid = 0; tid < this->numThreads; tid++) 422 this->thread[tid]->setFuncExeInst(0); 423} 424 425template <class Impl> 426FullO3CPU<Impl>::~FullO3CPU() 427{ 428} 429 430template <class Impl> 431void 432FullO3CPU<Impl>::regProbePoints() 433{ 434 BaseCPU::regProbePoints(); 435 436 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 437 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 438 439 fetch.regProbePoints(); 440 rename.regProbePoints(); 441 iew.regProbePoints(); 442 commit.regProbePoints(); 443} 444 445template <class Impl> 446void 447FullO3CPU<Impl>::regStats() 448{ 449 BaseO3CPU::regStats(); 450 451 // Register any of the O3CPU's stats here. 452 timesIdled 453 .name(name() + ".timesIdled") 454 .desc("Number of times that the entire CPU went into an idle state and" 455 " unscheduled itself") 456 .prereq(timesIdled); 457 458 idleCycles 459 .name(name() + ".idleCycles") 460 .desc("Total number of cycles that the CPU has spent unscheduled due " 461 "to idling") 462 .prereq(idleCycles); 463 464 quiesceCycles 465 .name(name() + ".quiesceCycles") 466 .desc("Total number of cycles that CPU has spent quiesced or waiting " 467 "for an interrupt") 468 .prereq(quiesceCycles); 469 470 // Number of Instructions simulated 471 // -------------------------------- 472 // Should probably be in Base CPU but need templated 473 // MaxThreads so put in here instead 474 committedInsts 475 .init(numThreads) 476 .name(name() + ".committedInsts") 477 .desc("Number of Instructions Simulated") 478 .flags(Stats::total); 479 480 committedOps 481 .init(numThreads) 482 .name(name() + ".committedOps") 483 .desc("Number of Ops (including micro ops) Simulated") 484 .flags(Stats::total); 485 486 cpi 487 .name(name() + ".cpi") 488 .desc("CPI: Cycles Per Instruction") 489 .precision(6); 490 cpi = numCycles / committedInsts; 491 492 totalCpi 493 .name(name() + ".cpi_total") 494 .desc("CPI: Total CPI of All Threads") 495 .precision(6); 496 totalCpi = numCycles / sum(committedInsts); 497 498 ipc 499 .name(name() + ".ipc") 500 .desc("IPC: Instructions Per Cycle") 501 .precision(6); 502 ipc = committedInsts / numCycles; 503 504 totalIpc 505 .name(name() + ".ipc_total") 506 .desc("IPC: Total IPC of All Threads") 507 .precision(6); 508 totalIpc = sum(committedInsts) / numCycles; 509 510 this->fetch.regStats(); 511 this->decode.regStats(); 512 this->rename.regStats(); 513 this->iew.regStats(); 514 this->commit.regStats(); 515 this->rob.regStats(); 516 517 intRegfileReads 518 .name(name() + ".int_regfile_reads") 519 .desc("number of integer regfile reads") 520 .prereq(intRegfileReads); 521 522 intRegfileWrites 523 .name(name() + ".int_regfile_writes") 524 .desc("number of integer regfile writes") 525 .prereq(intRegfileWrites); 526 527 fpRegfileReads 528 .name(name() + ".fp_regfile_reads") 529 .desc("number of floating regfile reads") 530 .prereq(fpRegfileReads); 531 532 fpRegfileWrites 533 .name(name() + ".fp_regfile_writes") 534 .desc("number of floating regfile writes") 535 .prereq(fpRegfileWrites); 536 537 vecRegfileReads 538 .name(name() + ".vec_regfile_reads") 539 .desc("number of vector regfile reads") 540 .prereq(vecRegfileReads); 541 542 vecRegfileWrites 543 .name(name() + ".vec_regfile_writes") 544 .desc("number of vector regfile writes") 545 .prereq(vecRegfileWrites); 546 547 vecPredRegfileReads 548 .name(name() + ".pred_regfile_reads") 549 .desc("number of predicate regfile reads") 550 .prereq(vecPredRegfileReads); 551 552 vecPredRegfileWrites 553 .name(name() + ".pred_regfile_writes") 554 .desc("number of predicate regfile writes") 555 .prereq(vecPredRegfileWrites); 556 557 ccRegfileReads 558 .name(name() + ".cc_regfile_reads") 559 .desc("number of cc regfile reads") 560 .prereq(ccRegfileReads); 561 562 ccRegfileWrites 563 .name(name() + ".cc_regfile_writes") 564 .desc("number of cc regfile writes") 565 .prereq(ccRegfileWrites); 566 567 miscRegfileReads 568 .name(name() + ".misc_regfile_reads") 569 .desc("number of misc regfile reads") 570 .prereq(miscRegfileReads); 571 572 miscRegfileWrites 573 .name(name() + ".misc_regfile_writes") 574 .desc("number of misc regfile writes") 575 .prereq(miscRegfileWrites); 576} 577 578template <class Impl> 579void 580FullO3CPU<Impl>::tick() 581{ 582 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 583 assert(!switchedOut()); 584 assert(drainState() != DrainState::Drained); 585 586 ++numCycles; 587 updateCycleCounters(BaseCPU::CPU_STATE_ON); 588 589// activity = false; 590 591 //Tick each of the stages 592 fetch.tick(); 593 594 decode.tick(); 595 596 rename.tick(); 597 598 iew.tick(); 599 600 commit.tick(); 601 602 // Now advance the time buffers 603 timeBuffer.advance(); 604 605 fetchQueue.advance(); 606 decodeQueue.advance(); 607 renameQueue.advance(); 608 iewQueue.advance(); 609 610 activityRec.advance(); 611 612 if (removeInstsThisCycle) { 613 cleanUpRemovedInsts(); 614 } 615 616 if (!tickEvent.scheduled()) { 617 if (_status == SwitchedOut) { 618 DPRINTF(O3CPU, "Switched out!\n"); 619 // increment stat 620 lastRunningCycle = curCycle(); 621 } else if (!activityRec.active() || _status == Idle) { 622 DPRINTF(O3CPU, "Idle!\n"); 623 lastRunningCycle = curCycle(); 624 timesIdled++; 625 } else { 626 schedule(tickEvent, clockEdge(Cycles(1))); 627 DPRINTF(O3CPU, "Scheduling next tick!\n"); 628 } 629 } 630 631 if (!FullSystem) 632 updateThreadPriority(); 633 634 tryDrain(); 635} 636 637template <class Impl> 638void 639FullO3CPU<Impl>::init() 640{ 641 BaseCPU::init(); 642 643 for (ThreadID tid = 0; tid < numThreads; ++tid) { 644 // Set noSquashFromTC so that the CPU doesn't squash when initially 645 // setting up registers. 646 thread[tid]->noSquashFromTC = true; 647 // Initialise the ThreadContext's memory proxies 648 thread[tid]->initMemProxies(thread[tid]->getTC()); 649 } 650 651 if (FullSystem && !params()->switched_out) { 652 for (ThreadID tid = 0; tid < numThreads; tid++) { 653 ThreadContext *src_tc = threadContexts[tid]; 654 TheISA::initCPU(src_tc, src_tc->contextId()); 655 } 656 } 657 658 // Clear noSquashFromTC. 659 for (int tid = 0; tid < numThreads; ++tid) 660 thread[tid]->noSquashFromTC = false; 661 662 commit.setThreads(thread); 663} 664 665template <class Impl> 666void 667FullO3CPU<Impl>::startup() 668{ 669 BaseCPU::startup(); 670 for (int tid = 0; tid < numThreads; ++tid) 671 isa[tid]->startup(threadContexts[tid]); 672 673 fetch.startupStage(); 674 decode.startupStage(); 675 iew.startupStage(); 676 rename.startupStage(); 677 commit.startupStage(); 678} 679 680template <class Impl> 681void 682FullO3CPU<Impl>::activateThread(ThreadID tid) 683{ 684 list<ThreadID>::iterator isActive = 685 std::find(activeThreads.begin(), activeThreads.end(), tid); 686 687 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid); 688 assert(!switchedOut()); 689 690 if (isActive == activeThreads.end()) { 691 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", 692 tid); 693 694 activeThreads.push_back(tid); 695 } 696} 697 698template <class Impl> 699void 700FullO3CPU<Impl>::deactivateThread(ThreadID tid) 701{ 702 //Remove From Active List, if Active 703 list<ThreadID>::iterator thread_it = 704 std::find(activeThreads.begin(), activeThreads.end(), tid); 705 706 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid); 707 assert(!switchedOut()); 708 709 if (thread_it != activeThreads.end()) { 710 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n", 711 tid); 712 activeThreads.erase(thread_it); 713 } 714 715 fetch.deactivateThread(tid); 716 commit.deactivateThread(tid); 717} 718 719template <class Impl> 720Counter 721FullO3CPU<Impl>::totalInsts() const 722{ 723 Counter total(0); 724 725 ThreadID size = thread.size(); 726 for (ThreadID i = 0; i < size; i++) 727 total += thread[i]->numInst; 728 729 return total; 730} 731 732template <class Impl> 733Counter 734FullO3CPU<Impl>::totalOps() const 735{ 736 Counter total(0); 737 738 ThreadID size = thread.size(); 739 for (ThreadID i = 0; i < size; i++) 740 total += thread[i]->numOp; 741 742 return total; 743} 744 745template <class Impl> 746void 747FullO3CPU<Impl>::activateContext(ThreadID tid) 748{ 749 assert(!switchedOut()); 750 751 // Needs to set each stage to running as well. 752 activateThread(tid); 753 754 // We don't want to wake the CPU if it is drained. In that case, 755 // we just want to flag the thread as active and schedule the tick 756 // event from drainResume() instead. 757 if (drainState() == DrainState::Drained) 758 return; 759 760 // If we are time 0 or if the last activation time is in the past, 761 // schedule the next tick and wake up the fetch unit 762 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 763 scheduleTickEvent(Cycles(0)); 764 765 // Be sure to signal that there's some activity so the CPU doesn't 766 // deschedule itself. 767 activityRec.activity(); 768 fetch.wakeFromQuiesce(); 769 770 Cycles cycles(curCycle() - lastRunningCycle); 771 // @todo: This is an oddity that is only here to match the stats 772 if (cycles != 0) 773 --cycles; 774 quiesceCycles += cycles; 775 776 lastActivatedCycle = curTick(); 777 778 _status = Running; 779 780 BaseCPU::activateContext(tid); 781 } 782} 783 784template <class Impl> 785void 786FullO3CPU<Impl>::suspendContext(ThreadID tid) 787{ 788 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid); 789 assert(!switchedOut()); 790 791 deactivateThread(tid); 792 793 // If this was the last thread then unschedule the tick event. 794 if (activeThreads.size() == 0) { 795 unscheduleTickEvent(); 796 lastRunningCycle = curCycle(); 797 _status = Idle; 798 } 799 800 DPRINTF(Quiesce, "Suspending Context\n"); 801 802 BaseCPU::suspendContext(tid); 803} 804 805template <class Impl> 806void 807FullO3CPU<Impl>::haltContext(ThreadID tid) 808{ 809 //For now, this is the same as deallocate 810 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid); 811 assert(!switchedOut()); 812 813 deactivateThread(tid); 814 removeThread(tid); 815 816 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 817} 818 819template <class Impl> 820void 821FullO3CPU<Impl>::insertThread(ThreadID tid) 822{ 823 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 824 // Will change now that the PC and thread state is internal to the CPU 825 // and not in the ThreadContext. 826 ThreadContext *src_tc; 827 if (FullSystem) 828 src_tc = system->threadContexts[tid]; 829 else 830 src_tc = tcBase(tid); 831 832 //Bind Int Regs to Rename Map 833 834 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 835 reg_id.index()++) { 836 PhysRegIdPtr phys_reg = freeList.getIntReg(); 837 renameMap[tid].setEntry(reg_id, phys_reg); 838 scoreboard.setReg(phys_reg); 839 } 840 841 //Bind Float Regs to Rename Map 842 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 843 reg_id.index()++) { 844 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 845 renameMap[tid].setEntry(reg_id, phys_reg); 846 scoreboard.setReg(phys_reg); 847 } 848 849 //Bind condition-code Regs to Rename Map 850 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 851 reg_id.index()++) { 852 PhysRegIdPtr phys_reg = freeList.getCCReg(); 853 renameMap[tid].setEntry(reg_id, phys_reg); 854 scoreboard.setReg(phys_reg); 855 } 856 857 //Copy Thread Data Into RegFile 858 //this->copyFromTC(tid); 859 860 //Set PC/NPC/NNPC 861 pcState(src_tc->pcState(), tid); 862 863 src_tc->setStatus(ThreadContext::Active); 864 865 activateContext(tid); 866 867 //Reset ROB/IQ/LSQ Entries 868 commit.rob->resetEntries(); 869} 870 871template <class Impl> 872void 873FullO3CPU<Impl>::removeThread(ThreadID tid) 874{ 875 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 876 877 // Copy Thread Data From RegFile 878 // If thread is suspended, it might be re-allocated 879 // this->copyToTC(tid); 880 881 882 // @todo: 2-27-2008: Fix how we free up rename mappings 883 // here to alleviate the case for double-freeing registers 884 // in SMT workloads. 885 886 // clear all thread-specific states in each stage of the pipeline 887 // since this thread is going to be completely removed from the CPU 888 commit.clearStates(tid); 889 fetch.clearStates(tid); 890 decode.clearStates(tid); 891 rename.clearStates(tid); 892 iew.clearStates(tid); 893 894 // at this step, all instructions in the pipeline should be already 895 // either committed successfully or squashed. All thread-specific 896 // queues in the pipeline must be empty. 897 assert(iew.instQueue.getCount(tid) == 0); 898 assert(iew.ldstQueue.getCount(tid) == 0); 899 assert(commit.rob->isEmpty(tid)); 900 901 // Reset ROB/IQ/LSQ Entries 902 903 // Commented out for now. This should be possible to do by 904 // telling all the pipeline stages to drain first, and then 905 // checking until the drain completes. Once the pipeline is 906 // drained, call resetEntries(). - 10-09-06 ktlim 907/* 908 if (activeThreads.size() >= 1) { 909 commit.rob->resetEntries(); 910 iew.resetEntries(); 911 } 912*/ 913} 914 915template <class Impl> 916void 917FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 918{ 919 auto pc = this->pcState(tid); 920 921 // new_mode is the new vector renaming mode 922 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 923 924 // We update vecMode only if there has been a change 925 if (new_mode != vecMode) { 926 vecMode = new_mode; 927 928 renameMap[tid].switchMode(vecMode); 929 commitRenameMap[tid].switchMode(vecMode); 930 renameMap[tid].switchFreeList(freelist); 931 } 932} 933 934template <class Impl> 935Fault 936FullO3CPU<Impl>::getInterrupts() 937{ 938 // Check if there are any outstanding interrupts 939 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 940} 941 942template <class Impl> 943void 944FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 945{ 946 // Check for interrupts here. For now can copy the code that 947 // exists within isa_fullsys_traits.hh. Also assume that thread 0 948 // is the one that handles the interrupts. 949 // @todo: Possibly consolidate the interrupt checking code. 950 // @todo: Allow other threads to handle interrupts. 951 952 assert(interrupt != NoFault); 953 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 954 955 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 956 this->trap(interrupt, 0, nullptr); 957} 958 959template <class Impl> 960void 961FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 962 const StaticInstPtr &inst) 963{ 964 // Pass the thread's TC into the invoke method. 965 fault->invoke(this->threadContexts[tid], inst); 966} 967 968template <class Impl> 969void 970FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 971{ 972 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 973 974 DPRINTF(Activity,"Activity: syscall() called.\n"); 975 976 // Temporarily increase this by one to account for the syscall 977 // instruction. 978 ++(this->thread[tid]->funcExeInst); 979 980 // Execute the actual syscall. 981 this->thread[tid]->syscall(callnum, fault); 982 983 // Decrease funcExeInst by one as the normal commit will handle 984 // incrementing it. 985 --(this->thread[tid]->funcExeInst); 986} 987 988template <class Impl> 989void 990FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 991{ 992 thread[tid]->serialize(cp); 993} 994 995template <class Impl> 996void 997FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 998{ 999 thread[tid]->unserialize(cp); 1000} 1001 1002template <class Impl> 1003DrainState 1004FullO3CPU<Impl>::drain() 1005{ 1006 // Deschedule any power gating event (if any) 1007 deschedulePowerGatingEvent(); 1008 1009 // If the CPU isn't doing anything, then return immediately. 1010 if (switchedOut()) 1011 return DrainState::Drained; 1012 1013 DPRINTF(Drain, "Draining...\n"); 1014 1015 // We only need to signal a drain to the commit stage as this 1016 // initiates squashing controls the draining. Once the commit 1017 // stage commits an instruction where it is safe to stop, it'll 1018 // squash the rest of the instructions in the pipeline and force 1019 // the fetch stage to stall. The pipeline will be drained once all 1020 // in-flight instructions have retired. 1021 commit.drain(); 1022 1023 // Wake the CPU and record activity so everything can drain out if 1024 // the CPU was not able to immediately drain. 1025 if (!isCpuDrained()) { 1026 // If a thread is suspended, wake it up so it can be drained 1027 for (auto t : threadContexts) { 1028 if (t->status() == ThreadContext::Suspended){ 1029 DPRINTF(Drain, "Currently suspended so activate %i \n", 1030 t->threadId()); 1031 t->activate(); 1032 // As the thread is now active, change the power state as well 1033 activateContext(t->threadId()); 1034 } 1035 } 1036 1037 wakeCPU(); 1038 activityRec.activity(); 1039 1040 DPRINTF(Drain, "CPU not drained\n"); 1041 1042 return DrainState::Draining; 1043 } else { 1044 DPRINTF(Drain, "CPU is already drained\n"); 1045 if (tickEvent.scheduled()) 1046 deschedule(tickEvent); 1047 1048 // Flush out any old data from the time buffers. In 1049 // particular, there might be some data in flight from the 1050 // fetch stage that isn't visible in any of the CPU buffers we 1051 // test in isCpuDrained(). 1052 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1053 timeBuffer.advance(); 1054 fetchQueue.advance(); 1055 decodeQueue.advance(); 1056 renameQueue.advance(); 1057 iewQueue.advance(); 1058 } 1059 1060 drainSanityCheck(); 1061 return DrainState::Drained; 1062 } 1063} 1064 1065template <class Impl> 1066bool 1067FullO3CPU<Impl>::tryDrain() 1068{ 1069 if (drainState() != DrainState::Draining || !isCpuDrained()) 1070 return false; 1071 1072 if (tickEvent.scheduled()) 1073 deschedule(tickEvent); 1074 1075 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1076 signalDrainDone(); 1077 1078 return true; 1079} 1080 1081template <class Impl> 1082void 1083FullO3CPU<Impl>::drainSanityCheck() const 1084{ 1085 assert(isCpuDrained()); 1086 fetch.drainSanityCheck(); 1087 decode.drainSanityCheck(); 1088 rename.drainSanityCheck(); 1089 iew.drainSanityCheck(); 1090 commit.drainSanityCheck(); 1091} 1092 1093template <class Impl> 1094bool 1095FullO3CPU<Impl>::isCpuDrained() const 1096{ 1097 bool drained(true); 1098 1099 if (!instList.empty() || !removeList.empty()) { 1100 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1101 drained = false; 1102 } 1103 1104 if (!fetch.isDrained()) { 1105 DPRINTF(Drain, "Fetch not drained.\n"); 1106 drained = false; 1107 } 1108 1109 if (!decode.isDrained()) { 1110 DPRINTF(Drain, "Decode not drained.\n"); 1111 drained = false; 1112 } 1113 1114 if (!rename.isDrained()) { 1115 DPRINTF(Drain, "Rename not drained.\n"); 1116 drained = false; 1117 } 1118 1119 if (!iew.isDrained()) { 1120 DPRINTF(Drain, "IEW not drained.\n"); 1121 drained = false; 1122 } 1123 1124 if (!commit.isDrained()) { 1125 DPRINTF(Drain, "Commit not drained.\n"); 1126 drained = false; 1127 } 1128 1129 return drained; 1130} 1131 1132template <class Impl> 1133void 1134FullO3CPU<Impl>::commitDrained(ThreadID tid) 1135{ 1136 fetch.drainStall(tid); 1137} 1138 1139template <class Impl> 1140void 1141FullO3CPU<Impl>::drainResume() 1142{ 1143 if (switchedOut()) 1144 return; 1145 1146 DPRINTF(Drain, "Resuming...\n"); 1147 verifyMemoryMode(); 1148 1149 fetch.drainResume(); 1150 commit.drainResume(); 1151 1152 _status = Idle; 1153 for (ThreadID i = 0; i < thread.size(); i++) { 1154 if (thread[i]->status() == ThreadContext::Active) { 1155 DPRINTF(Drain, "Activating thread: %i\n", i); 1156 activateThread(i); 1157 _status = Running; 1158 } 1159 } 1160 1161 assert(!tickEvent.scheduled()); 1162 if (_status == Running) 1163 schedule(tickEvent, nextCycle()); 1164 1165 // Reschedule any power gating event (if any) 1166 schedulePowerGatingEvent(); 1167} 1168 1169template <class Impl> 1170void 1171FullO3CPU<Impl>::switchOut() 1172{ 1173 DPRINTF(O3CPU, "Switching out\n"); 1174 BaseCPU::switchOut(); 1175 1176 activityRec.reset(); 1177 1178 _status = SwitchedOut; 1179 1180 if (checker) 1181 checker->switchOut(); 1182} 1183 1184template <class Impl> 1185void 1186FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1187{ 1188 BaseCPU::takeOverFrom(oldCPU); 1189 1190 fetch.takeOverFrom(); 1191 decode.takeOverFrom(); 1192 rename.takeOverFrom(); 1193 iew.takeOverFrom(); 1194 commit.takeOverFrom(); 1195 1196 assert(!tickEvent.scheduled()); 1197 1198 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1199 if (oldO3CPU) 1200 globalSeqNum = oldO3CPU->globalSeqNum; 1201 1202 lastRunningCycle = curCycle(); 1203 _status = Idle; 1204} 1205 1206template <class Impl> 1207void 1208FullO3CPU<Impl>::verifyMemoryMode() const 1209{ 1210 if (!system->isTimingMode()) { 1211 fatal("The O3 CPU requires the memory system to be in " 1212 "'timing' mode.\n"); 1213 } 1214} 1215 1216template <class Impl> 1217RegVal 1218FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1219{ 1220 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1221} 1222 1223template <class Impl> 1224RegVal 1225FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1226{ 1227 miscRegfileReads++; 1228 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1229} 1230 1231template <class Impl> 1232void 1233FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1234{ 1235 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1236} 1237 1238template <class Impl> 1239void 1240FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1241{ 1242 miscRegfileWrites++; 1243 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1244} 1245 1246template <class Impl> 1247RegVal 1248FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1249{ 1250 intRegfileReads++; 1251 return regFile.readIntReg(phys_reg); 1252} 1253 1254template <class Impl> 1255RegVal 1256FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1257{ 1258 fpRegfileReads++; 1259 return regFile.readFloatReg(phys_reg); 1260} 1261 1262template <class Impl> 1263auto 1264FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1265 -> const VecRegContainer& 1266{ 1267 vecRegfileReads++; 1268 return regFile.readVecReg(phys_reg); 1269} 1270 1271template <class Impl> 1272auto 1273FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1274 -> VecRegContainer& 1275{ 1276 vecRegfileWrites++; 1277 return regFile.getWritableVecReg(phys_reg); 1278} 1279 1280template <class Impl> 1281auto 1282FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1283{ 1284 vecRegfileReads++; 1285 return regFile.readVecElem(phys_reg); 1286} 1287 1288template <class Impl> 1289auto 1290FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1291 -> const VecPredRegContainer& 1292{ 1293 vecPredRegfileReads++; 1294 return regFile.readVecPredReg(phys_reg); 1295} 1296 1297template <class Impl> 1298auto 1299FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1300 -> VecPredRegContainer& 1301{ 1302 vecPredRegfileWrites++; 1303 return regFile.getWritableVecPredReg(phys_reg); 1304} 1305 1306template <class Impl> 1307RegVal 1308FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1309{ 1310 ccRegfileReads++; 1311 return regFile.readCCReg(phys_reg); 1312} 1313 1314template <class Impl> 1315void 1316FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1317{ 1318 intRegfileWrites++; 1319 regFile.setIntReg(phys_reg, val); 1320} 1321 1322template <class Impl> 1323void 1324FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1325{ 1326 fpRegfileWrites++; 1327 regFile.setFloatReg(phys_reg, val); 1328} 1329 1330template <class Impl> 1331void 1332FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1333{ 1334 vecRegfileWrites++; 1335 regFile.setVecReg(phys_reg, val); 1336} 1337 1338template <class Impl> 1339void 1340FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1341{ 1342 vecRegfileWrites++; 1343 regFile.setVecElem(phys_reg, val); 1344} 1345 1346template <class Impl> 1347void 1348FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1349 const VecPredRegContainer& val) 1350{ 1351 vecPredRegfileWrites++; 1352 regFile.setVecPredReg(phys_reg, val); 1353} 1354 1355template <class Impl> 1356void 1357FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1358{ 1359 ccRegfileWrites++; 1360 regFile.setCCReg(phys_reg, val); 1361} 1362 1363template <class Impl> 1364RegVal 1365FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1366{ 1367 intRegfileReads++; 1368 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1369 RegId(IntRegClass, reg_idx)); 1370 1371 return regFile.readIntReg(phys_reg); 1372} 1373 1374template <class Impl> 1375RegVal 1376FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1377{ 1378 fpRegfileReads++; 1379 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1380 RegId(FloatRegClass, reg_idx)); 1381 1382 return regFile.readFloatReg(phys_reg); 1383} 1384 1385template <class Impl> 1386auto 1387FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1388 -> const VecRegContainer& 1389{ 1390 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1391 RegId(VecRegClass, reg_idx)); 1392 return readVecReg(phys_reg); 1393} 1394 1395template <class Impl> 1396auto 1397FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1398 -> VecRegContainer& 1399{ 1400 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1401 RegId(VecRegClass, reg_idx)); 1402 return getWritableVecReg(phys_reg); 1403} 1404 1405template <class Impl> 1406auto 1407FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1408 ThreadID tid) const -> const VecElem& 1409{ 1410 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1411 RegId(VecElemClass, reg_idx, ldx)); 1412 return readVecElem(phys_reg); 1413} 1414 1415template <class Impl> 1416auto 1417FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1418 -> const VecPredRegContainer& 1419{ 1420 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1421 RegId(VecPredRegClass, reg_idx)); 1422 return readVecPredReg(phys_reg); 1423} 1424 1425template <class Impl> 1426auto 1427FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1428 -> VecPredRegContainer& 1429{ 1430 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1431 RegId(VecPredRegClass, reg_idx)); 1432 return getWritableVecPredReg(phys_reg); 1433} 1434 1435template <class Impl> 1436RegVal 1437FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1438{ 1439 ccRegfileReads++; 1440 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1441 RegId(CCRegClass, reg_idx)); 1442 1443 return regFile.readCCReg(phys_reg); 1444} 1445 1446template <class Impl> 1447void 1448FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1449{ 1450 intRegfileWrites++; 1451 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1452 RegId(IntRegClass, reg_idx)); 1453 1454 regFile.setIntReg(phys_reg, val); 1455} 1456 1457template <class Impl> 1458void 1459FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1460{ 1461 fpRegfileWrites++; 1462 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1463 RegId(FloatRegClass, reg_idx)); 1464 1465 regFile.setFloatReg(phys_reg, val); 1466} 1467 1468template <class Impl> 1469void 1470FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1471 ThreadID tid) 1472{ 1473 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1474 RegId(VecRegClass, reg_idx)); 1475 setVecReg(phys_reg, val); 1476} 1477 1478template <class Impl> 1479void 1480FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1481 const VecElem& val, ThreadID tid) 1482{ 1483 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1484 RegId(VecElemClass, reg_idx, ldx)); 1485 setVecElem(phys_reg, val); 1486} 1487 1488template <class Impl> 1489void 1490FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1491 ThreadID tid) 1492{ 1493 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1494 RegId(VecPredRegClass, reg_idx)); 1495 setVecPredReg(phys_reg, val); 1496} 1497 1498template <class Impl> 1499void 1500FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1501{ 1502 ccRegfileWrites++; 1503 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1504 RegId(CCRegClass, reg_idx)); 1505 1506 regFile.setCCReg(phys_reg, val); 1507} 1508 1509template <class Impl> 1510TheISA::PCState 1511FullO3CPU<Impl>::pcState(ThreadID tid) 1512{ 1513 return commit.pcState(tid); 1514} 1515 1516template <class Impl> 1517void 1518FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1519{ 1520 commit.pcState(val, tid); 1521} 1522 1523template <class Impl> 1524Addr 1525FullO3CPU<Impl>::instAddr(ThreadID tid) 1526{ 1527 return commit.instAddr(tid); 1528} 1529 1530template <class Impl> 1531Addr 1532FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1533{ 1534 return commit.nextInstAddr(tid); 1535} 1536 1537template <class Impl> 1538MicroPC 1539FullO3CPU<Impl>::microPC(ThreadID tid) 1540{ 1541 return commit.microPC(tid); 1542} 1543 1544template <class Impl> 1545void 1546FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1547{ 1548 this->thread[tid]->noSquashFromTC = true; 1549 this->commit.generateTCEvent(tid); 1550} 1551 1552template <class Impl> 1553typename FullO3CPU<Impl>::ListIt 1554FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1555{ 1556 instList.push_back(inst); 1557 1558 return --(instList.end()); 1559} 1560 1561template <class Impl> 1562void 1563FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1564{ 1565 // Keep an instruction count. 1566 if (!inst->isMicroop() || inst->isLastMicroop()) { 1567 thread[tid]->numInst++; 1568 thread[tid]->numInsts++; 1569 committedInsts[tid]++; 1570 system->totalNumInsts++; 1571 1572 // Check for instruction-count-based events. 1573 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1574 system->instEventQueue.serviceEvents(system->totalNumInsts); 1575 } 1576 thread[tid]->numOp++; 1577 thread[tid]->numOps++; 1578 committedOps[tid]++; 1579 1580 probeInstCommit(inst->staticInst, inst->instAddr()); 1581} 1582 1583template <class Impl> 1584void 1585FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1586{ 1587 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1588 "[sn:%lli]\n", 1589 inst->threadNumber, inst->pcState(), inst->seqNum); 1590 1591 removeInstsThisCycle = true; 1592 1593 // Remove the front instruction. 1594 removeList.push(inst->getInstListIt()); 1595} 1596 1597template <class Impl> 1598void 1599FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1600{ 1601 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1602 " list.\n", tid); 1603 1604 ListIt end_it; 1605 1606 bool rob_empty = false; 1607 1608 if (instList.empty()) { 1609 return; 1610 } else if (rob.isEmpty(tid)) { 1611 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1612 end_it = instList.begin(); 1613 rob_empty = true; 1614 } else { 1615 end_it = (rob.readTailInst(tid))->getInstListIt(); 1616 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1617 } 1618 1619 removeInstsThisCycle = true; 1620 1621 ListIt inst_it = instList.end(); 1622 1623 inst_it--; 1624 1625 // Walk through the instruction list, removing any instructions 1626 // that were inserted after the given instruction iterator, end_it. 1627 while (inst_it != end_it) { 1628 assert(!instList.empty()); 1629 1630 squashInstIt(inst_it, tid); 1631 1632 inst_it--; 1633 } 1634 1635 // If the ROB was empty, then we actually need to remove the first 1636 // instruction as well. 1637 if (rob_empty) { 1638 squashInstIt(inst_it, tid); 1639 } 1640} 1641 1642template <class Impl> 1643void 1644FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1645{ 1646 assert(!instList.empty()); 1647 1648 removeInstsThisCycle = true; 1649 1650 ListIt inst_iter = instList.end(); 1651 1652 inst_iter--; 1653 1654 DPRINTF(O3CPU, "Deleting instructions from instruction " 1655 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1656 tid, seq_num, (*inst_iter)->seqNum); 1657 1658 while ((*inst_iter)->seqNum > seq_num) { 1659 1660 bool break_loop = (inst_iter == instList.begin()); 1661 1662 squashInstIt(inst_iter, tid); 1663 1664 inst_iter--; 1665 1666 if (break_loop) 1667 break; 1668 } 1669} 1670 1671template <class Impl> 1672inline void 1673FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1674{ 1675 if ((*instIt)->threadNumber == tid) { 1676 DPRINTF(O3CPU, "Squashing instruction, " 1677 "[tid:%i] [sn:%lli] PC %s\n", 1678 (*instIt)->threadNumber, 1679 (*instIt)->seqNum, 1680 (*instIt)->pcState()); 1681 1682 // Mark it as squashed. 1683 (*instIt)->setSquashed(); 1684 1685 // @todo: Formulate a consistent method for deleting 1686 // instructions from the instruction list 1687 // Remove the instruction from the list. 1688 removeList.push(instIt); 1689 } 1690} 1691 1692template <class Impl> 1693void 1694FullO3CPU<Impl>::cleanUpRemovedInsts() 1695{ 1696 while (!removeList.empty()) { 1697 DPRINTF(O3CPU, "Removing instruction, " 1698 "[tid:%i] [sn:%lli] PC %s\n", 1699 (*removeList.front())->threadNumber, 1700 (*removeList.front())->seqNum, 1701 (*removeList.front())->pcState()); 1702 1703 instList.erase(removeList.front()); 1704 1705 removeList.pop(); 1706 } 1707 1708 removeInstsThisCycle = false; 1709} 1710/* 1711template <class Impl> 1712void 1713FullO3CPU<Impl>::removeAllInsts() 1714{ 1715 instList.clear(); 1716} 1717*/ 1718template <class Impl> 1719void 1720FullO3CPU<Impl>::dumpInsts() 1721{ 1722 int num = 0; 1723 1724 ListIt inst_list_it = instList.begin(); 1725 1726 cprintf("Dumping Instruction List\n"); 1727 1728 while (inst_list_it != instList.end()) { 1729 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1730 "Squashed:%i\n\n", 1731 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1732 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1733 (*inst_list_it)->isSquashed()); 1734 inst_list_it++; 1735 ++num; 1736 } 1737} 1738/* 1739template <class Impl> 1740void 1741FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1742{ 1743 iew.wakeDependents(inst); 1744} 1745*/ 1746template <class Impl> 1747void 1748FullO3CPU<Impl>::wakeCPU() 1749{ 1750 if (activityRec.active() || tickEvent.scheduled()) { 1751 DPRINTF(Activity, "CPU already running.\n"); 1752 return; 1753 } 1754 1755 DPRINTF(Activity, "Waking up CPU\n"); 1756 1757 Cycles cycles(curCycle() - lastRunningCycle); 1758 // @todo: This is an oddity that is only here to match the stats 1759 if (cycles > 1) { 1760 --cycles; 1761 idleCycles += cycles; 1762 numCycles += cycles; 1763 } 1764 1765 schedule(tickEvent, clockEdge()); 1766} 1767 1768template <class Impl> 1769void 1770FullO3CPU<Impl>::wakeup(ThreadID tid) 1771{ 1772 if (this->thread[tid]->status() != ThreadContext::Suspended) 1773 return; 1774 1775 this->wakeCPU(); 1776 1777 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1778 this->threadContexts[tid]->activate(); 1779} 1780 1781template <class Impl> 1782ThreadID 1783FullO3CPU<Impl>::getFreeTid() 1784{ 1785 for (ThreadID tid = 0; tid < numThreads; tid++) { 1786 if (!tids[tid]) { 1787 tids[tid] = true; 1788 return tid; 1789 } 1790 } 1791 1792 return InvalidThreadID; 1793} 1794 1795template <class Impl> 1796void 1797FullO3CPU<Impl>::updateThreadPriority() 1798{ 1799 if (activeThreads.size() > 1) { 1800 //DEFAULT TO ROUND ROBIN SCHEME 1801 //e.g. Move highest priority to end of thread list 1802 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1803 1804 unsigned high_thread = *list_begin; 1805 1806 activeThreads.erase(list_begin); 1807 1808 activeThreads.push_back(high_thread); 1809 } 1810} 1811 1812template <class Impl> 1813void 1814FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1815{ 1816 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1817 1818 // the thread trying to exit can't be already halted 1819 assert(tcBase(tid)->status() != ThreadContext::Halted); 1820 1821 // make sure the thread has not been added to the list yet 1822 assert(exitingThreads.count(tid) == 0); 1823 1824 // add the thread to exitingThreads list to mark that this thread is 1825 // trying to exit. The boolean value in the pair denotes if a thread is 1826 // ready to exit. The thread is not ready to exit until the corresponding 1827 // exit trap event is processed in the future. Until then, it'll be still 1828 // an active thread that is trying to exit. 1829 exitingThreads.emplace(std::make_pair(tid, false)); 1830} 1831 1832template <class Impl> 1833bool 1834FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1835{ 1836 return exitingThreads.count(tid) == 1; 1837} 1838 1839template <class Impl> 1840void 1841FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1842{ 1843 assert(exitingThreads.count(tid) == 1); 1844 1845 // exit trap event has been processed. Now, the thread is ready to exit 1846 // and be removed from the CPU. 1847 exitingThreads[tid] = true; 1848 1849 // we schedule a threadExitEvent in the next cycle to properly clean 1850 // up the thread's states in the pipeline. threadExitEvent has lower 1851 // priority than tickEvent, so the cleanup will happen at the very end 1852 // of the next cycle after all pipeline stages complete their operations. 1853 // We want all stages to complete squashing instructions before doing 1854 // the cleanup. 1855 if (!threadExitEvent.scheduled()) { 1856 schedule(threadExitEvent, nextCycle()); 1857 } 1858} 1859 1860template <class Impl> 1861void 1862FullO3CPU<Impl>::exitThreads() 1863{ 1864 // there must be at least one thread trying to exit 1865 assert(exitingThreads.size() > 0); 1866 1867 // terminate all threads that are ready to exit 1868 auto it = exitingThreads.begin(); 1869 while (it != exitingThreads.end()) { 1870 ThreadID thread_id = it->first; 1871 bool readyToExit = it->second; 1872 1873 if (readyToExit) { 1874 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1875 haltContext(thread_id); 1876 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1877 it = exitingThreads.erase(it); 1878 } else { 1879 it++; 1880 } 1881 } 1882} 1883 1884// Forward declaration of FullO3CPU. 1885template class FullO3CPU<O3CPUImpl>; 1886