cpu.cc revision 14195:c5efdb3319aa
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template <class Impl> 94FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 95 : BaseO3CPU(params), 96 itb(params->itb), 97 dtb(params->dtb), 98 tickEvent([this]{ tick(); }, "FullO3CPU tick", 99 false, Event::CPU_Tick_Pri), 100 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 101 false, Event::CPU_Exit_Pri), 102#ifndef NDEBUG 103 instcount(0), 104#endif 105 removeInstsThisCycle(false), 106 fetch(this, params), 107 decode(this, params), 108 rename(this, params), 109 iew(this, params), 110 commit(this, params), 111 112 /* It is mandatory that all SMT threads use the same renaming mode as 113 * they are sharing registers and rename */ 114 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 115 regFile(params->numPhysIntRegs, 116 params->numPhysFloatRegs, 117 params->numPhysVecRegs, 118 params->numPhysVecPredRegs, 119 params->numPhysCCRegs, 120 vecMode), 121 122 freeList(name() + ".freelist", ®File), 123 124 rob(this, params), 125 126 scoreboard(name() + ".scoreboard", 127 regFile.totalNumPhysRegs()), 128 129 isa(numThreads, NULL), 130 131 timeBuffer(params->backComSize, params->forwardComSize), 132 fetchQueue(params->backComSize, params->forwardComSize), 133 decodeQueue(params->backComSize, params->forwardComSize), 134 renameQueue(params->backComSize, params->forwardComSize), 135 iewQueue(params->backComSize, params->forwardComSize), 136 activityRec(name(), NumStages, 137 params->backComSize + params->forwardComSize, 138 params->activity), 139 140 globalSeqNum(1), 141 system(params->system), 142 lastRunningCycle(curCycle()) 143{ 144 if (!params->switched_out) { 145 _status = Running; 146 } else { 147 _status = SwitchedOut; 148 } 149 150 if (params->checker) { 151 BaseCPU *temp_checker = params->checker; 152 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 153 checker->setIcachePort(&this->fetch.getInstPort()); 154 checker->setSystem(params->system); 155 } else { 156 checker = NULL; 157 } 158 159 if (!FullSystem) { 160 thread.resize(numThreads); 161 tids.resize(numThreads); 162 } 163 164 // The stages also need their CPU pointer setup. However this 165 // must be done at the upper level CPU because they have pointers 166 // to the upper level CPU, and not this FullO3CPU. 167 168 // Set up Pointers to the activeThreads list for each stage 169 fetch.setActiveThreads(&activeThreads); 170 decode.setActiveThreads(&activeThreads); 171 rename.setActiveThreads(&activeThreads); 172 iew.setActiveThreads(&activeThreads); 173 commit.setActiveThreads(&activeThreads); 174 175 // Give each of the stages the time buffer they will use. 176 fetch.setTimeBuffer(&timeBuffer); 177 decode.setTimeBuffer(&timeBuffer); 178 rename.setTimeBuffer(&timeBuffer); 179 iew.setTimeBuffer(&timeBuffer); 180 commit.setTimeBuffer(&timeBuffer); 181 182 // Also setup each of the stages' queues. 183 fetch.setFetchQueue(&fetchQueue); 184 decode.setFetchQueue(&fetchQueue); 185 commit.setFetchQueue(&fetchQueue); 186 decode.setDecodeQueue(&decodeQueue); 187 rename.setDecodeQueue(&decodeQueue); 188 rename.setRenameQueue(&renameQueue); 189 iew.setRenameQueue(&renameQueue); 190 iew.setIEWQueue(&iewQueue); 191 commit.setIEWQueue(&iewQueue); 192 commit.setRenameQueue(&renameQueue); 193 194 commit.setIEWStage(&iew); 195 rename.setIEWStage(&iew); 196 rename.setCommitStage(&commit); 197 198 ThreadID active_threads; 199 if (FullSystem) { 200 active_threads = 1; 201 } else { 202 active_threads = params->workload.size(); 203 204 if (active_threads > Impl::MaxThreads) { 205 panic("Workload Size too large. Increase the 'MaxThreads' " 206 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 207 "or edit your workload size."); 208 } 209 } 210 211 //Make Sure That this a Valid Architeture 212 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 213 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 214 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 215 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 216 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 217 218 rename.setScoreboard(&scoreboard); 219 iew.setScoreboard(&scoreboard); 220 221 // Setup the rename map for whichever stages need it. 222 for (ThreadID tid = 0; tid < numThreads; tid++) { 223 isa[tid] = params->isa[tid]; 224 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 225 226 // Only Alpha has an FP zero register, so for other ISAs we 227 // use an invalid FP register index to avoid special treatment 228 // of any valid FP reg. 229 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 230 RegIndex fpZeroReg = 231 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 232 233 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 234 &freeList, 235 vecMode); 236 237 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 238 &freeList, vecMode); 239 } 240 241 // Initialize rename map to assign physical registers to the 242 // architectural registers for active threads only. 243 for (ThreadID tid = 0; tid < active_threads; tid++) { 244 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 245 // Note that we can't use the rename() method because we don't 246 // want special treatment for the zero register at this point 247 PhysRegIdPtr phys_reg = freeList.getIntReg(); 248 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 249 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 250 } 251 252 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 253 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 254 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 255 commitRenameMap[tid].setEntry( 256 RegId(FloatRegClass, ridx), phys_reg); 257 } 258 259 /* Here we need two 'interfaces' the 'whole register' and the 260 * 'register element'. At any point only one of them will be 261 * active. */ 262 if (vecMode == Enums::Full) { 263 /* Initialize the full-vector interface */ 264 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 265 RegId rid = RegId(VecRegClass, ridx); 266 PhysRegIdPtr phys_reg = freeList.getVecReg(); 267 renameMap[tid].setEntry(rid, phys_reg); 268 commitRenameMap[tid].setEntry(rid, phys_reg); 269 } 270 } else { 271 /* Initialize the vector-element interface */ 272 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 273 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 274 ++ldx) { 275 RegId lrid = RegId(VecElemClass, ridx, ldx); 276 PhysRegIdPtr phys_elem = freeList.getVecElem(); 277 renameMap[tid].setEntry(lrid, phys_elem); 278 commitRenameMap[tid].setEntry(lrid, phys_elem); 279 } 280 } 281 } 282 283 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 284 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 285 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 286 commitRenameMap[tid].setEntry( 287 RegId(VecPredRegClass, ridx), phys_reg); 288 } 289 290 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 291 PhysRegIdPtr phys_reg = freeList.getCCReg(); 292 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 293 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 294 } 295 } 296 297 rename.setRenameMap(renameMap); 298 commit.setRenameMap(commitRenameMap); 299 rename.setFreeList(&freeList); 300 301 // Setup the ROB for whichever stages need it. 302 commit.setROB(&rob); 303 304 lastActivatedCycle = 0; 305 306 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 307 308 // Setup any thread state. 309 this->thread.resize(this->numThreads); 310 311 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 312 if (FullSystem) { 313 // SMT is not supported in FS mode yet. 314 assert(this->numThreads == 1); 315 this->thread[tid] = new Thread(this, 0, NULL); 316 } else { 317 if (tid < params->workload.size()) { 318 DPRINTF(O3CPU, "Workload[%i] process is %#x", 319 tid, this->thread[tid]); 320 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 321 (typename Impl::O3CPU *)(this), 322 tid, params->workload[tid]); 323 324 //usedTids[tid] = true; 325 //threadMap[tid] = tid; 326 } else { 327 //Allocate Empty thread so M5 can use later 328 //when scheduling threads to CPU 329 Process* dummy_proc = NULL; 330 331 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 332 (typename Impl::O3CPU *)(this), 333 tid, dummy_proc); 334 //usedTids[tid] = false; 335 } 336 } 337 338 ThreadContext *tc; 339 340 // Setup the TC that will serve as the interface to the threads/CPU. 341 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 342 343 tc = o3_tc; 344 345 // If we're using a checker, then the TC should be the 346 // CheckerThreadContext. 347 if (params->checker) { 348 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 349 o3_tc, this->checker); 350 } 351 352 o3_tc->cpu = (typename Impl::O3CPU *)(this); 353 assert(o3_tc->cpu); 354 o3_tc->thread = this->thread[tid]; 355 356 // Setup quiesce event. 357 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 358 359 // Give the thread the TC. 360 this->thread[tid]->tc = tc; 361 362 // Add the TC to the CPU's list of TC's. 363 this->threadContexts.push_back(tc); 364 } 365 366 // FullO3CPU always requires an interrupt controller. 367 if (!params->switched_out && interrupts.empty()) { 368 fatal("FullO3CPU %s has no interrupt controller.\n" 369 "Ensure createInterruptController() is called.\n", name()); 370 } 371 372 for (ThreadID tid = 0; tid < this->numThreads; tid++) 373 this->thread[tid]->setFuncExeInst(0); 374} 375 376template <class Impl> 377FullO3CPU<Impl>::~FullO3CPU() 378{ 379} 380 381template <class Impl> 382void 383FullO3CPU<Impl>::regProbePoints() 384{ 385 BaseCPU::regProbePoints(); 386 387 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 388 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 389 390 fetch.regProbePoints(); 391 rename.regProbePoints(); 392 iew.regProbePoints(); 393 commit.regProbePoints(); 394} 395 396template <class Impl> 397void 398FullO3CPU<Impl>::regStats() 399{ 400 BaseO3CPU::regStats(); 401 402 // Register any of the O3CPU's stats here. 403 timesIdled 404 .name(name() + ".timesIdled") 405 .desc("Number of times that the entire CPU went into an idle state and" 406 " unscheduled itself") 407 .prereq(timesIdled); 408 409 idleCycles 410 .name(name() + ".idleCycles") 411 .desc("Total number of cycles that the CPU has spent unscheduled due " 412 "to idling") 413 .prereq(idleCycles); 414 415 quiesceCycles 416 .name(name() + ".quiesceCycles") 417 .desc("Total number of cycles that CPU has spent quiesced or waiting " 418 "for an interrupt") 419 .prereq(quiesceCycles); 420 421 // Number of Instructions simulated 422 // -------------------------------- 423 // Should probably be in Base CPU but need templated 424 // MaxThreads so put in here instead 425 committedInsts 426 .init(numThreads) 427 .name(name() + ".committedInsts") 428 .desc("Number of Instructions Simulated") 429 .flags(Stats::total); 430 431 committedOps 432 .init(numThreads) 433 .name(name() + ".committedOps") 434 .desc("Number of Ops (including micro ops) Simulated") 435 .flags(Stats::total); 436 437 cpi 438 .name(name() + ".cpi") 439 .desc("CPI: Cycles Per Instruction") 440 .precision(6); 441 cpi = numCycles / committedInsts; 442 443 totalCpi 444 .name(name() + ".cpi_total") 445 .desc("CPI: Total CPI of All Threads") 446 .precision(6); 447 totalCpi = numCycles / sum(committedInsts); 448 449 ipc 450 .name(name() + ".ipc") 451 .desc("IPC: Instructions Per Cycle") 452 .precision(6); 453 ipc = committedInsts / numCycles; 454 455 totalIpc 456 .name(name() + ".ipc_total") 457 .desc("IPC: Total IPC of All Threads") 458 .precision(6); 459 totalIpc = sum(committedInsts) / numCycles; 460 461 this->fetch.regStats(); 462 this->decode.regStats(); 463 this->rename.regStats(); 464 this->iew.regStats(); 465 this->commit.regStats(); 466 this->rob.regStats(); 467 468 intRegfileReads 469 .name(name() + ".int_regfile_reads") 470 .desc("number of integer regfile reads") 471 .prereq(intRegfileReads); 472 473 intRegfileWrites 474 .name(name() + ".int_regfile_writes") 475 .desc("number of integer regfile writes") 476 .prereq(intRegfileWrites); 477 478 fpRegfileReads 479 .name(name() + ".fp_regfile_reads") 480 .desc("number of floating regfile reads") 481 .prereq(fpRegfileReads); 482 483 fpRegfileWrites 484 .name(name() + ".fp_regfile_writes") 485 .desc("number of floating regfile writes") 486 .prereq(fpRegfileWrites); 487 488 vecRegfileReads 489 .name(name() + ".vec_regfile_reads") 490 .desc("number of vector regfile reads") 491 .prereq(vecRegfileReads); 492 493 vecRegfileWrites 494 .name(name() + ".vec_regfile_writes") 495 .desc("number of vector regfile writes") 496 .prereq(vecRegfileWrites); 497 498 vecPredRegfileReads 499 .name(name() + ".pred_regfile_reads") 500 .desc("number of predicate regfile reads") 501 .prereq(vecPredRegfileReads); 502 503 vecPredRegfileWrites 504 .name(name() + ".pred_regfile_writes") 505 .desc("number of predicate regfile writes") 506 .prereq(vecPredRegfileWrites); 507 508 ccRegfileReads 509 .name(name() + ".cc_regfile_reads") 510 .desc("number of cc regfile reads") 511 .prereq(ccRegfileReads); 512 513 ccRegfileWrites 514 .name(name() + ".cc_regfile_writes") 515 .desc("number of cc regfile writes") 516 .prereq(ccRegfileWrites); 517 518 miscRegfileReads 519 .name(name() + ".misc_regfile_reads") 520 .desc("number of misc regfile reads") 521 .prereq(miscRegfileReads); 522 523 miscRegfileWrites 524 .name(name() + ".misc_regfile_writes") 525 .desc("number of misc regfile writes") 526 .prereq(miscRegfileWrites); 527} 528 529template <class Impl> 530void 531FullO3CPU<Impl>::tick() 532{ 533 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 534 assert(!switchedOut()); 535 assert(drainState() != DrainState::Drained); 536 537 ++numCycles; 538 updateCycleCounters(BaseCPU::CPU_STATE_ON); 539 540// activity = false; 541 542 //Tick each of the stages 543 fetch.tick(); 544 545 decode.tick(); 546 547 rename.tick(); 548 549 iew.tick(); 550 551 commit.tick(); 552 553 // Now advance the time buffers 554 timeBuffer.advance(); 555 556 fetchQueue.advance(); 557 decodeQueue.advance(); 558 renameQueue.advance(); 559 iewQueue.advance(); 560 561 activityRec.advance(); 562 563 if (removeInstsThisCycle) { 564 cleanUpRemovedInsts(); 565 } 566 567 if (!tickEvent.scheduled()) { 568 if (_status == SwitchedOut) { 569 DPRINTF(O3CPU, "Switched out!\n"); 570 // increment stat 571 lastRunningCycle = curCycle(); 572 } else if (!activityRec.active() || _status == Idle) { 573 DPRINTF(O3CPU, "Idle!\n"); 574 lastRunningCycle = curCycle(); 575 timesIdled++; 576 } else { 577 schedule(tickEvent, clockEdge(Cycles(1))); 578 DPRINTF(O3CPU, "Scheduling next tick!\n"); 579 } 580 } 581 582 if (!FullSystem) 583 updateThreadPriority(); 584 585 tryDrain(); 586} 587 588template <class Impl> 589void 590FullO3CPU<Impl>::init() 591{ 592 BaseCPU::init(); 593 594 for (ThreadID tid = 0; tid < numThreads; ++tid) { 595 // Set noSquashFromTC so that the CPU doesn't squash when initially 596 // setting up registers. 597 thread[tid]->noSquashFromTC = true; 598 // Initialise the ThreadContext's memory proxies 599 thread[tid]->initMemProxies(thread[tid]->getTC()); 600 } 601 602 if (FullSystem && !params()->switched_out) { 603 for (ThreadID tid = 0; tid < numThreads; tid++) { 604 ThreadContext *src_tc = threadContexts[tid]; 605 TheISA::initCPU(src_tc, src_tc->contextId()); 606 } 607 } 608 609 // Clear noSquashFromTC. 610 for (int tid = 0; tid < numThreads; ++tid) 611 thread[tid]->noSquashFromTC = false; 612 613 commit.setThreads(thread); 614} 615 616template <class Impl> 617void 618FullO3CPU<Impl>::startup() 619{ 620 BaseCPU::startup(); 621 for (int tid = 0; tid < numThreads; ++tid) 622 isa[tid]->startup(threadContexts[tid]); 623 624 fetch.startupStage(); 625 decode.startupStage(); 626 iew.startupStage(); 627 rename.startupStage(); 628 commit.startupStage(); 629} 630 631template <class Impl> 632void 633FullO3CPU<Impl>::activateThread(ThreadID tid) 634{ 635 list<ThreadID>::iterator isActive = 636 std::find(activeThreads.begin(), activeThreads.end(), tid); 637 638 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid); 639 assert(!switchedOut()); 640 641 if (isActive == activeThreads.end()) { 642 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", 643 tid); 644 645 activeThreads.push_back(tid); 646 } 647} 648 649template <class Impl> 650void 651FullO3CPU<Impl>::deactivateThread(ThreadID tid) 652{ 653 //Remove From Active List, if Active 654 list<ThreadID>::iterator thread_it = 655 std::find(activeThreads.begin(), activeThreads.end(), tid); 656 657 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid); 658 assert(!switchedOut()); 659 660 if (thread_it != activeThreads.end()) { 661 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n", 662 tid); 663 activeThreads.erase(thread_it); 664 } 665 666 fetch.deactivateThread(tid); 667 commit.deactivateThread(tid); 668} 669 670template <class Impl> 671Counter 672FullO3CPU<Impl>::totalInsts() const 673{ 674 Counter total(0); 675 676 ThreadID size = thread.size(); 677 for (ThreadID i = 0; i < size; i++) 678 total += thread[i]->numInst; 679 680 return total; 681} 682 683template <class Impl> 684Counter 685FullO3CPU<Impl>::totalOps() const 686{ 687 Counter total(0); 688 689 ThreadID size = thread.size(); 690 for (ThreadID i = 0; i < size; i++) 691 total += thread[i]->numOp; 692 693 return total; 694} 695 696template <class Impl> 697void 698FullO3CPU<Impl>::activateContext(ThreadID tid) 699{ 700 assert(!switchedOut()); 701 702 // Needs to set each stage to running as well. 703 activateThread(tid); 704 705 // We don't want to wake the CPU if it is drained. In that case, 706 // we just want to flag the thread as active and schedule the tick 707 // event from drainResume() instead. 708 if (drainState() == DrainState::Drained) 709 return; 710 711 // If we are time 0 or if the last activation time is in the past, 712 // schedule the next tick and wake up the fetch unit 713 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 714 scheduleTickEvent(Cycles(0)); 715 716 // Be sure to signal that there's some activity so the CPU doesn't 717 // deschedule itself. 718 activityRec.activity(); 719 fetch.wakeFromQuiesce(); 720 721 Cycles cycles(curCycle() - lastRunningCycle); 722 // @todo: This is an oddity that is only here to match the stats 723 if (cycles != 0) 724 --cycles; 725 quiesceCycles += cycles; 726 727 lastActivatedCycle = curTick(); 728 729 _status = Running; 730 731 BaseCPU::activateContext(tid); 732 } 733} 734 735template <class Impl> 736void 737FullO3CPU<Impl>::suspendContext(ThreadID tid) 738{ 739 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid); 740 assert(!switchedOut()); 741 742 deactivateThread(tid); 743 744 // If this was the last thread then unschedule the tick event. 745 if (activeThreads.size() == 0) { 746 unscheduleTickEvent(); 747 lastRunningCycle = curCycle(); 748 _status = Idle; 749 } 750 751 DPRINTF(Quiesce, "Suspending Context\n"); 752 753 BaseCPU::suspendContext(tid); 754} 755 756template <class Impl> 757void 758FullO3CPU<Impl>::haltContext(ThreadID tid) 759{ 760 //For now, this is the same as deallocate 761 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid); 762 assert(!switchedOut()); 763 764 deactivateThread(tid); 765 removeThread(tid); 766 767 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 768} 769 770template <class Impl> 771void 772FullO3CPU<Impl>::insertThread(ThreadID tid) 773{ 774 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 775 // Will change now that the PC and thread state is internal to the CPU 776 // and not in the ThreadContext. 777 ThreadContext *src_tc; 778 if (FullSystem) 779 src_tc = system->threadContexts[tid]; 780 else 781 src_tc = tcBase(tid); 782 783 //Bind Int Regs to Rename Map 784 785 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 786 reg_id.index()++) { 787 PhysRegIdPtr phys_reg = freeList.getIntReg(); 788 renameMap[tid].setEntry(reg_id, phys_reg); 789 scoreboard.setReg(phys_reg); 790 } 791 792 //Bind Float Regs to Rename Map 793 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 794 reg_id.index()++) { 795 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 796 renameMap[tid].setEntry(reg_id, phys_reg); 797 scoreboard.setReg(phys_reg); 798 } 799 800 //Bind condition-code Regs to Rename Map 801 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 802 reg_id.index()++) { 803 PhysRegIdPtr phys_reg = freeList.getCCReg(); 804 renameMap[tid].setEntry(reg_id, phys_reg); 805 scoreboard.setReg(phys_reg); 806 } 807 808 //Copy Thread Data Into RegFile 809 //this->copyFromTC(tid); 810 811 //Set PC/NPC/NNPC 812 pcState(src_tc->pcState(), tid); 813 814 src_tc->setStatus(ThreadContext::Active); 815 816 activateContext(tid); 817 818 //Reset ROB/IQ/LSQ Entries 819 commit.rob->resetEntries(); 820} 821 822template <class Impl> 823void 824FullO3CPU<Impl>::removeThread(ThreadID tid) 825{ 826 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 827 828 // Copy Thread Data From RegFile 829 // If thread is suspended, it might be re-allocated 830 // this->copyToTC(tid); 831 832 833 // @todo: 2-27-2008: Fix how we free up rename mappings 834 // here to alleviate the case for double-freeing registers 835 // in SMT workloads. 836 837 // clear all thread-specific states in each stage of the pipeline 838 // since this thread is going to be completely removed from the CPU 839 commit.clearStates(tid); 840 fetch.clearStates(tid); 841 decode.clearStates(tid); 842 rename.clearStates(tid); 843 iew.clearStates(tid); 844 845 // at this step, all instructions in the pipeline should be already 846 // either committed successfully or squashed. All thread-specific 847 // queues in the pipeline must be empty. 848 assert(iew.instQueue.getCount(tid) == 0); 849 assert(iew.ldstQueue.getCount(tid) == 0); 850 assert(commit.rob->isEmpty(tid)); 851 852 // Reset ROB/IQ/LSQ Entries 853 854 // Commented out for now. This should be possible to do by 855 // telling all the pipeline stages to drain first, and then 856 // checking until the drain completes. Once the pipeline is 857 // drained, call resetEntries(). - 10-09-06 ktlim 858/* 859 if (activeThreads.size() >= 1) { 860 commit.rob->resetEntries(); 861 iew.resetEntries(); 862 } 863*/ 864} 865 866template <class Impl> 867void 868FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 869{ 870 auto pc = this->pcState(tid); 871 872 // new_mode is the new vector renaming mode 873 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 874 875 // We update vecMode only if there has been a change 876 if (new_mode != vecMode) { 877 vecMode = new_mode; 878 879 renameMap[tid].switchMode(vecMode); 880 commitRenameMap[tid].switchMode(vecMode); 881 renameMap[tid].switchFreeList(freelist); 882 } 883} 884 885template <class Impl> 886Fault 887FullO3CPU<Impl>::getInterrupts() 888{ 889 // Check if there are any outstanding interrupts 890 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 891} 892 893template <class Impl> 894void 895FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 896{ 897 // Check for interrupts here. For now can copy the code that 898 // exists within isa_fullsys_traits.hh. Also assume that thread 0 899 // is the one that handles the interrupts. 900 // @todo: Possibly consolidate the interrupt checking code. 901 // @todo: Allow other threads to handle interrupts. 902 903 assert(interrupt != NoFault); 904 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 905 906 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 907 this->trap(interrupt, 0, nullptr); 908} 909 910template <class Impl> 911void 912FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 913 const StaticInstPtr &inst) 914{ 915 // Pass the thread's TC into the invoke method. 916 fault->invoke(this->threadContexts[tid], inst); 917} 918 919template <class Impl> 920void 921FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 922{ 923 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 924 925 DPRINTF(Activity,"Activity: syscall() called.\n"); 926 927 // Temporarily increase this by one to account for the syscall 928 // instruction. 929 ++(this->thread[tid]->funcExeInst); 930 931 // Execute the actual syscall. 932 this->thread[tid]->syscall(callnum, fault); 933 934 // Decrease funcExeInst by one as the normal commit will handle 935 // incrementing it. 936 --(this->thread[tid]->funcExeInst); 937} 938 939template <class Impl> 940void 941FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 942{ 943 thread[tid]->serialize(cp); 944} 945 946template <class Impl> 947void 948FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 949{ 950 thread[tid]->unserialize(cp); 951} 952 953template <class Impl> 954DrainState 955FullO3CPU<Impl>::drain() 956{ 957 // Deschedule any power gating event (if any) 958 deschedulePowerGatingEvent(); 959 960 // If the CPU isn't doing anything, then return immediately. 961 if (switchedOut()) 962 return DrainState::Drained; 963 964 DPRINTF(Drain, "Draining...\n"); 965 966 // We only need to signal a drain to the commit stage as this 967 // initiates squashing controls the draining. Once the commit 968 // stage commits an instruction where it is safe to stop, it'll 969 // squash the rest of the instructions in the pipeline and force 970 // the fetch stage to stall. The pipeline will be drained once all 971 // in-flight instructions have retired. 972 commit.drain(); 973 974 // Wake the CPU and record activity so everything can drain out if 975 // the CPU was not able to immediately drain. 976 if (!isCpuDrained()) { 977 // If a thread is suspended, wake it up so it can be drained 978 for (auto t : threadContexts) { 979 if (t->status() == ThreadContext::Suspended){ 980 DPRINTF(Drain, "Currently suspended so activate %i \n", 981 t->threadId()); 982 t->activate(); 983 // As the thread is now active, change the power state as well 984 activateContext(t->threadId()); 985 } 986 } 987 988 wakeCPU(); 989 activityRec.activity(); 990 991 DPRINTF(Drain, "CPU not drained\n"); 992 993 return DrainState::Draining; 994 } else { 995 DPRINTF(Drain, "CPU is already drained\n"); 996 if (tickEvent.scheduled()) 997 deschedule(tickEvent); 998 999 // Flush out any old data from the time buffers. In 1000 // particular, there might be some data in flight from the 1001 // fetch stage that isn't visible in any of the CPU buffers we 1002 // test in isCpuDrained(). 1003 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1004 timeBuffer.advance(); 1005 fetchQueue.advance(); 1006 decodeQueue.advance(); 1007 renameQueue.advance(); 1008 iewQueue.advance(); 1009 } 1010 1011 drainSanityCheck(); 1012 return DrainState::Drained; 1013 } 1014} 1015 1016template <class Impl> 1017bool 1018FullO3CPU<Impl>::tryDrain() 1019{ 1020 if (drainState() != DrainState::Draining || !isCpuDrained()) 1021 return false; 1022 1023 if (tickEvent.scheduled()) 1024 deschedule(tickEvent); 1025 1026 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1027 signalDrainDone(); 1028 1029 return true; 1030} 1031 1032template <class Impl> 1033void 1034FullO3CPU<Impl>::drainSanityCheck() const 1035{ 1036 assert(isCpuDrained()); 1037 fetch.drainSanityCheck(); 1038 decode.drainSanityCheck(); 1039 rename.drainSanityCheck(); 1040 iew.drainSanityCheck(); 1041 commit.drainSanityCheck(); 1042} 1043 1044template <class Impl> 1045bool 1046FullO3CPU<Impl>::isCpuDrained() const 1047{ 1048 bool drained(true); 1049 1050 if (!instList.empty() || !removeList.empty()) { 1051 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1052 drained = false; 1053 } 1054 1055 if (!fetch.isDrained()) { 1056 DPRINTF(Drain, "Fetch not drained.\n"); 1057 drained = false; 1058 } 1059 1060 if (!decode.isDrained()) { 1061 DPRINTF(Drain, "Decode not drained.\n"); 1062 drained = false; 1063 } 1064 1065 if (!rename.isDrained()) { 1066 DPRINTF(Drain, "Rename not drained.\n"); 1067 drained = false; 1068 } 1069 1070 if (!iew.isDrained()) { 1071 DPRINTF(Drain, "IEW not drained.\n"); 1072 drained = false; 1073 } 1074 1075 if (!commit.isDrained()) { 1076 DPRINTF(Drain, "Commit not drained.\n"); 1077 drained = false; 1078 } 1079 1080 return drained; 1081} 1082 1083template <class Impl> 1084void 1085FullO3CPU<Impl>::commitDrained(ThreadID tid) 1086{ 1087 fetch.drainStall(tid); 1088} 1089 1090template <class Impl> 1091void 1092FullO3CPU<Impl>::drainResume() 1093{ 1094 if (switchedOut()) 1095 return; 1096 1097 DPRINTF(Drain, "Resuming...\n"); 1098 verifyMemoryMode(); 1099 1100 fetch.drainResume(); 1101 commit.drainResume(); 1102 1103 _status = Idle; 1104 for (ThreadID i = 0; i < thread.size(); i++) { 1105 if (thread[i]->status() == ThreadContext::Active) { 1106 DPRINTF(Drain, "Activating thread: %i\n", i); 1107 activateThread(i); 1108 _status = Running; 1109 } 1110 } 1111 1112 assert(!tickEvent.scheduled()); 1113 if (_status == Running) 1114 schedule(tickEvent, nextCycle()); 1115 1116 // Reschedule any power gating event (if any) 1117 schedulePowerGatingEvent(); 1118} 1119 1120template <class Impl> 1121void 1122FullO3CPU<Impl>::switchOut() 1123{ 1124 DPRINTF(O3CPU, "Switching out\n"); 1125 BaseCPU::switchOut(); 1126 1127 activityRec.reset(); 1128 1129 _status = SwitchedOut; 1130 1131 if (checker) 1132 checker->switchOut(); 1133} 1134 1135template <class Impl> 1136void 1137FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1138{ 1139 BaseCPU::takeOverFrom(oldCPU); 1140 1141 fetch.takeOverFrom(); 1142 decode.takeOverFrom(); 1143 rename.takeOverFrom(); 1144 iew.takeOverFrom(); 1145 commit.takeOverFrom(); 1146 1147 assert(!tickEvent.scheduled()); 1148 1149 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1150 if (oldO3CPU) 1151 globalSeqNum = oldO3CPU->globalSeqNum; 1152 1153 lastRunningCycle = curCycle(); 1154 _status = Idle; 1155} 1156 1157template <class Impl> 1158void 1159FullO3CPU<Impl>::verifyMemoryMode() const 1160{ 1161 if (!system->isTimingMode()) { 1162 fatal("The O3 CPU requires the memory system to be in " 1163 "'timing' mode.\n"); 1164 } 1165} 1166 1167template <class Impl> 1168RegVal 1169FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1170{ 1171 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1172} 1173 1174template <class Impl> 1175RegVal 1176FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1177{ 1178 miscRegfileReads++; 1179 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1180} 1181 1182template <class Impl> 1183void 1184FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1185{ 1186 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1187} 1188 1189template <class Impl> 1190void 1191FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1192{ 1193 miscRegfileWrites++; 1194 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1195} 1196 1197template <class Impl> 1198RegVal 1199FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1200{ 1201 intRegfileReads++; 1202 return regFile.readIntReg(phys_reg); 1203} 1204 1205template <class Impl> 1206RegVal 1207FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1208{ 1209 fpRegfileReads++; 1210 return regFile.readFloatReg(phys_reg); 1211} 1212 1213template <class Impl> 1214auto 1215FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1216 -> const VecRegContainer& 1217{ 1218 vecRegfileReads++; 1219 return regFile.readVecReg(phys_reg); 1220} 1221 1222template <class Impl> 1223auto 1224FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1225 -> VecRegContainer& 1226{ 1227 vecRegfileWrites++; 1228 return regFile.getWritableVecReg(phys_reg); 1229} 1230 1231template <class Impl> 1232auto 1233FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1234{ 1235 vecRegfileReads++; 1236 return regFile.readVecElem(phys_reg); 1237} 1238 1239template <class Impl> 1240auto 1241FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1242 -> const VecPredRegContainer& 1243{ 1244 vecPredRegfileReads++; 1245 return regFile.readVecPredReg(phys_reg); 1246} 1247 1248template <class Impl> 1249auto 1250FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1251 -> VecPredRegContainer& 1252{ 1253 vecPredRegfileWrites++; 1254 return regFile.getWritableVecPredReg(phys_reg); 1255} 1256 1257template <class Impl> 1258RegVal 1259FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1260{ 1261 ccRegfileReads++; 1262 return regFile.readCCReg(phys_reg); 1263} 1264 1265template <class Impl> 1266void 1267FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1268{ 1269 intRegfileWrites++; 1270 regFile.setIntReg(phys_reg, val); 1271} 1272 1273template <class Impl> 1274void 1275FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1276{ 1277 fpRegfileWrites++; 1278 regFile.setFloatReg(phys_reg, val); 1279} 1280 1281template <class Impl> 1282void 1283FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1284{ 1285 vecRegfileWrites++; 1286 regFile.setVecReg(phys_reg, val); 1287} 1288 1289template <class Impl> 1290void 1291FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1292{ 1293 vecRegfileWrites++; 1294 regFile.setVecElem(phys_reg, val); 1295} 1296 1297template <class Impl> 1298void 1299FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1300 const VecPredRegContainer& val) 1301{ 1302 vecPredRegfileWrites++; 1303 regFile.setVecPredReg(phys_reg, val); 1304} 1305 1306template <class Impl> 1307void 1308FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1309{ 1310 ccRegfileWrites++; 1311 regFile.setCCReg(phys_reg, val); 1312} 1313 1314template <class Impl> 1315RegVal 1316FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1317{ 1318 intRegfileReads++; 1319 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1320 RegId(IntRegClass, reg_idx)); 1321 1322 return regFile.readIntReg(phys_reg); 1323} 1324 1325template <class Impl> 1326RegVal 1327FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1328{ 1329 fpRegfileReads++; 1330 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1331 RegId(FloatRegClass, reg_idx)); 1332 1333 return regFile.readFloatReg(phys_reg); 1334} 1335 1336template <class Impl> 1337auto 1338FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1339 -> const VecRegContainer& 1340{ 1341 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1342 RegId(VecRegClass, reg_idx)); 1343 return readVecReg(phys_reg); 1344} 1345 1346template <class Impl> 1347auto 1348FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1349 -> VecRegContainer& 1350{ 1351 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1352 RegId(VecRegClass, reg_idx)); 1353 return getWritableVecReg(phys_reg); 1354} 1355 1356template <class Impl> 1357auto 1358FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1359 ThreadID tid) const -> const VecElem& 1360{ 1361 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1362 RegId(VecElemClass, reg_idx, ldx)); 1363 return readVecElem(phys_reg); 1364} 1365 1366template <class Impl> 1367auto 1368FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1369 -> const VecPredRegContainer& 1370{ 1371 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1372 RegId(VecPredRegClass, reg_idx)); 1373 return readVecPredReg(phys_reg); 1374} 1375 1376template <class Impl> 1377auto 1378FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1379 -> VecPredRegContainer& 1380{ 1381 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1382 RegId(VecPredRegClass, reg_idx)); 1383 return getWritableVecPredReg(phys_reg); 1384} 1385 1386template <class Impl> 1387RegVal 1388FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1389{ 1390 ccRegfileReads++; 1391 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1392 RegId(CCRegClass, reg_idx)); 1393 1394 return regFile.readCCReg(phys_reg); 1395} 1396 1397template <class Impl> 1398void 1399FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1400{ 1401 intRegfileWrites++; 1402 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1403 RegId(IntRegClass, reg_idx)); 1404 1405 regFile.setIntReg(phys_reg, val); 1406} 1407 1408template <class Impl> 1409void 1410FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1411{ 1412 fpRegfileWrites++; 1413 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1414 RegId(FloatRegClass, reg_idx)); 1415 1416 regFile.setFloatReg(phys_reg, val); 1417} 1418 1419template <class Impl> 1420void 1421FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1422 ThreadID tid) 1423{ 1424 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1425 RegId(VecRegClass, reg_idx)); 1426 setVecReg(phys_reg, val); 1427} 1428 1429template <class Impl> 1430void 1431FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1432 const VecElem& val, ThreadID tid) 1433{ 1434 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1435 RegId(VecElemClass, reg_idx, ldx)); 1436 setVecElem(phys_reg, val); 1437} 1438 1439template <class Impl> 1440void 1441FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1442 ThreadID tid) 1443{ 1444 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1445 RegId(VecPredRegClass, reg_idx)); 1446 setVecPredReg(phys_reg, val); 1447} 1448 1449template <class Impl> 1450void 1451FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1452{ 1453 ccRegfileWrites++; 1454 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1455 RegId(CCRegClass, reg_idx)); 1456 1457 regFile.setCCReg(phys_reg, val); 1458} 1459 1460template <class Impl> 1461TheISA::PCState 1462FullO3CPU<Impl>::pcState(ThreadID tid) 1463{ 1464 return commit.pcState(tid); 1465} 1466 1467template <class Impl> 1468void 1469FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1470{ 1471 commit.pcState(val, tid); 1472} 1473 1474template <class Impl> 1475Addr 1476FullO3CPU<Impl>::instAddr(ThreadID tid) 1477{ 1478 return commit.instAddr(tid); 1479} 1480 1481template <class Impl> 1482Addr 1483FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1484{ 1485 return commit.nextInstAddr(tid); 1486} 1487 1488template <class Impl> 1489MicroPC 1490FullO3CPU<Impl>::microPC(ThreadID tid) 1491{ 1492 return commit.microPC(tid); 1493} 1494 1495template <class Impl> 1496void 1497FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1498{ 1499 this->thread[tid]->noSquashFromTC = true; 1500 this->commit.generateTCEvent(tid); 1501} 1502 1503template <class Impl> 1504typename FullO3CPU<Impl>::ListIt 1505FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1506{ 1507 instList.push_back(inst); 1508 1509 return --(instList.end()); 1510} 1511 1512template <class Impl> 1513void 1514FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1515{ 1516 // Keep an instruction count. 1517 if (!inst->isMicroop() || inst->isLastMicroop()) { 1518 thread[tid]->numInst++; 1519 thread[tid]->numInsts++; 1520 committedInsts[tid]++; 1521 system->totalNumInsts++; 1522 1523 // Check for instruction-count-based events. 1524 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1525 system->instEventQueue.serviceEvents(system->totalNumInsts); 1526 } 1527 thread[tid]->numOp++; 1528 thread[tid]->numOps++; 1529 committedOps[tid]++; 1530 1531 probeInstCommit(inst->staticInst, inst->instAddr()); 1532} 1533 1534template <class Impl> 1535void 1536FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1537{ 1538 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1539 "[sn:%lli]\n", 1540 inst->threadNumber, inst->pcState(), inst->seqNum); 1541 1542 removeInstsThisCycle = true; 1543 1544 // Remove the front instruction. 1545 removeList.push(inst->getInstListIt()); 1546} 1547 1548template <class Impl> 1549void 1550FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1551{ 1552 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1553 " list.\n", tid); 1554 1555 ListIt end_it; 1556 1557 bool rob_empty = false; 1558 1559 if (instList.empty()) { 1560 return; 1561 } else if (rob.isEmpty(tid)) { 1562 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1563 end_it = instList.begin(); 1564 rob_empty = true; 1565 } else { 1566 end_it = (rob.readTailInst(tid))->getInstListIt(); 1567 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1568 } 1569 1570 removeInstsThisCycle = true; 1571 1572 ListIt inst_it = instList.end(); 1573 1574 inst_it--; 1575 1576 // Walk through the instruction list, removing any instructions 1577 // that were inserted after the given instruction iterator, end_it. 1578 while (inst_it != end_it) { 1579 assert(!instList.empty()); 1580 1581 squashInstIt(inst_it, tid); 1582 1583 inst_it--; 1584 } 1585 1586 // If the ROB was empty, then we actually need to remove the first 1587 // instruction as well. 1588 if (rob_empty) { 1589 squashInstIt(inst_it, tid); 1590 } 1591} 1592 1593template <class Impl> 1594void 1595FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1596{ 1597 assert(!instList.empty()); 1598 1599 removeInstsThisCycle = true; 1600 1601 ListIt inst_iter = instList.end(); 1602 1603 inst_iter--; 1604 1605 DPRINTF(O3CPU, "Deleting instructions from instruction " 1606 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1607 tid, seq_num, (*inst_iter)->seqNum); 1608 1609 while ((*inst_iter)->seqNum > seq_num) { 1610 1611 bool break_loop = (inst_iter == instList.begin()); 1612 1613 squashInstIt(inst_iter, tid); 1614 1615 inst_iter--; 1616 1617 if (break_loop) 1618 break; 1619 } 1620} 1621 1622template <class Impl> 1623inline void 1624FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1625{ 1626 if ((*instIt)->threadNumber == tid) { 1627 DPRINTF(O3CPU, "Squashing instruction, " 1628 "[tid:%i] [sn:%lli] PC %s\n", 1629 (*instIt)->threadNumber, 1630 (*instIt)->seqNum, 1631 (*instIt)->pcState()); 1632 1633 // Mark it as squashed. 1634 (*instIt)->setSquashed(); 1635 1636 // @todo: Formulate a consistent method for deleting 1637 // instructions from the instruction list 1638 // Remove the instruction from the list. 1639 removeList.push(instIt); 1640 } 1641} 1642 1643template <class Impl> 1644void 1645FullO3CPU<Impl>::cleanUpRemovedInsts() 1646{ 1647 while (!removeList.empty()) { 1648 DPRINTF(O3CPU, "Removing instruction, " 1649 "[tid:%i] [sn:%lli] PC %s\n", 1650 (*removeList.front())->threadNumber, 1651 (*removeList.front())->seqNum, 1652 (*removeList.front())->pcState()); 1653 1654 instList.erase(removeList.front()); 1655 1656 removeList.pop(); 1657 } 1658 1659 removeInstsThisCycle = false; 1660} 1661/* 1662template <class Impl> 1663void 1664FullO3CPU<Impl>::removeAllInsts() 1665{ 1666 instList.clear(); 1667} 1668*/ 1669template <class Impl> 1670void 1671FullO3CPU<Impl>::dumpInsts() 1672{ 1673 int num = 0; 1674 1675 ListIt inst_list_it = instList.begin(); 1676 1677 cprintf("Dumping Instruction List\n"); 1678 1679 while (inst_list_it != instList.end()) { 1680 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1681 "Squashed:%i\n\n", 1682 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1683 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1684 (*inst_list_it)->isSquashed()); 1685 inst_list_it++; 1686 ++num; 1687 } 1688} 1689/* 1690template <class Impl> 1691void 1692FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1693{ 1694 iew.wakeDependents(inst); 1695} 1696*/ 1697template <class Impl> 1698void 1699FullO3CPU<Impl>::wakeCPU() 1700{ 1701 if (activityRec.active() || tickEvent.scheduled()) { 1702 DPRINTF(Activity, "CPU already running.\n"); 1703 return; 1704 } 1705 1706 DPRINTF(Activity, "Waking up CPU\n"); 1707 1708 Cycles cycles(curCycle() - lastRunningCycle); 1709 // @todo: This is an oddity that is only here to match the stats 1710 if (cycles > 1) { 1711 --cycles; 1712 idleCycles += cycles; 1713 numCycles += cycles; 1714 } 1715 1716 schedule(tickEvent, clockEdge()); 1717} 1718 1719template <class Impl> 1720void 1721FullO3CPU<Impl>::wakeup(ThreadID tid) 1722{ 1723 if (this->thread[tid]->status() != ThreadContext::Suspended) 1724 return; 1725 1726 this->wakeCPU(); 1727 1728 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1729 this->threadContexts[tid]->activate(); 1730} 1731 1732template <class Impl> 1733ThreadID 1734FullO3CPU<Impl>::getFreeTid() 1735{ 1736 for (ThreadID tid = 0; tid < numThreads; tid++) { 1737 if (!tids[tid]) { 1738 tids[tid] = true; 1739 return tid; 1740 } 1741 } 1742 1743 return InvalidThreadID; 1744} 1745 1746template <class Impl> 1747void 1748FullO3CPU<Impl>::updateThreadPriority() 1749{ 1750 if (activeThreads.size() > 1) { 1751 //DEFAULT TO ROUND ROBIN SCHEME 1752 //e.g. Move highest priority to end of thread list 1753 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1754 1755 unsigned high_thread = *list_begin; 1756 1757 activeThreads.erase(list_begin); 1758 1759 activeThreads.push_back(high_thread); 1760 } 1761} 1762 1763template <class Impl> 1764void 1765FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1766{ 1767 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1768 1769 // the thread trying to exit can't be already halted 1770 assert(tcBase(tid)->status() != ThreadContext::Halted); 1771 1772 // make sure the thread has not been added to the list yet 1773 assert(exitingThreads.count(tid) == 0); 1774 1775 // add the thread to exitingThreads list to mark that this thread is 1776 // trying to exit. The boolean value in the pair denotes if a thread is 1777 // ready to exit. The thread is not ready to exit until the corresponding 1778 // exit trap event is processed in the future. Until then, it'll be still 1779 // an active thread that is trying to exit. 1780 exitingThreads.emplace(std::make_pair(tid, false)); 1781} 1782 1783template <class Impl> 1784bool 1785FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1786{ 1787 return exitingThreads.count(tid) == 1; 1788} 1789 1790template <class Impl> 1791void 1792FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1793{ 1794 assert(exitingThreads.count(tid) == 1); 1795 1796 // exit trap event has been processed. Now, the thread is ready to exit 1797 // and be removed from the CPU. 1798 exitingThreads[tid] = true; 1799 1800 // we schedule a threadExitEvent in the next cycle to properly clean 1801 // up the thread's states in the pipeline. threadExitEvent has lower 1802 // priority than tickEvent, so the cleanup will happen at the very end 1803 // of the next cycle after all pipeline stages complete their operations. 1804 // We want all stages to complete squashing instructions before doing 1805 // the cleanup. 1806 if (!threadExitEvent.scheduled()) { 1807 schedule(threadExitEvent, nextCycle()); 1808 } 1809} 1810 1811template <class Impl> 1812void 1813FullO3CPU<Impl>::exitThreads() 1814{ 1815 // there must be at least one thread trying to exit 1816 assert(exitingThreads.size() > 0); 1817 1818 // terminate all threads that are ready to exit 1819 auto it = exitingThreads.begin(); 1820 while (it != exitingThreads.end()) { 1821 ThreadID thread_id = it->first; 1822 bool readyToExit = it->second; 1823 1824 if (readyToExit) { 1825 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1826 haltContext(thread_id); 1827 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1828 it = exitingThreads.erase(it); 1829 } else { 1830 it++; 1831 } 1832 } 1833} 1834 1835// Forward declaration of FullO3CPU. 1836template class FullO3CPU<O3CPUImpl>; 1837