cpu.cc revision 13644
1/* 2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited 3 * Copyright (c) 2013 Advanced Micro Devices, Inc. 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2006 The Regents of The University of Michigan 16 * Copyright (c) 2011 Regents of the University of California 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 * Rick Strong 45 */ 46 47#include "cpu/o3/cpu.hh" 48 49#include "arch/generic/traits.hh" 50#include "arch/kernel_stats.hh" 51#include "config/the_isa.hh" 52#include "cpu/activity.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/checker/thread_context.hh" 55#include "cpu/o3/isa_specific.hh" 56#include "cpu/o3/thread_context.hh" 57#include "cpu/quiesce_event.hh" 58#include "cpu/simple_thread.hh" 59#include "cpu/thread_context.hh" 60#include "debug/Activity.hh" 61#include "debug/Drain.hh" 62#include "debug/O3CPU.hh" 63#include "debug/Quiesce.hh" 64#include "enums/MemoryMode.hh" 65#include "sim/core.hh" 66#include "sim/full_system.hh" 67#include "sim/process.hh" 68#include "sim/stat_control.hh" 69#include "sim/system.hh" 70 71#if THE_ISA == ALPHA_ISA 72#include "arch/alpha/osfpal.hh" 73#include "debug/Activity.hh" 74 75#endif 76 77struct BaseCPUParams; 78 79using namespace TheISA; 80using namespace std; 81 82BaseO3CPU::BaseO3CPU(BaseCPUParams *params) 83 : BaseCPU(params) 84{ 85} 86 87void 88BaseO3CPU::regStats() 89{ 90 BaseCPU::regStats(); 91} 92 93template<class Impl> 94bool 95FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) 96{ 97 DPRINTF(O3CPU, "Fetch unit received timing\n"); 98 // We shouldn't ever get a cacheable block in Modified state 99 assert(pkt->req->isUncacheable() || 100 !(pkt->cacheResponding() && !pkt->hasSharers())); 101 fetch->processCacheCompletion(pkt); 102 103 return true; 104} 105 106template<class Impl> 107void 108FullO3CPU<Impl>::IcachePort::recvReqRetry() 109{ 110 fetch->recvReqRetry(); 111} 112 113template <class Impl> 114bool 115FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt) 116{ 117 return lsq->recvTimingResp(pkt); 118} 119 120template <class Impl> 121void 122FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) 123{ 124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { 125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { 126 cpu->wakeup(tid); 127 } 128 } 129 lsq->recvTimingSnoopReq(pkt); 130} 131 132template <class Impl> 133void 134FullO3CPU<Impl>::DcachePort::recvReqRetry() 135{ 136 lsq->recvReqRetry(); 137} 138 139template <class Impl> 140FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params) 141 : BaseO3CPU(params), 142 itb(params->itb), 143 dtb(params->dtb), 144 tickEvent([this]{ tick(); }, "FullO3CPU tick", 145 false, Event::CPU_Tick_Pri), 146 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads", 147 false, Event::CPU_Exit_Pri), 148#ifndef NDEBUG 149 instcount(0), 150#endif 151 removeInstsThisCycle(false), 152 fetch(this, params), 153 decode(this, params), 154 rename(this, params), 155 iew(this, params), 156 commit(this, params), 157 158 /* It is mandatory that all SMT threads use the same renaming mode as 159 * they are sharing registers and rename */ 160 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])), 161 regFile(params->numPhysIntRegs, 162 params->numPhysFloatRegs, 163 params->numPhysVecRegs, 164 params->numPhysVecPredRegs, 165 params->numPhysCCRegs, 166 vecMode), 167 168 freeList(name() + ".freelist", ®File), 169 170 rob(this, params), 171 172 scoreboard(name() + ".scoreboard", 173 regFile.totalNumPhysRegs()), 174 175 isa(numThreads, NULL), 176 177 icachePort(&fetch, this), 178 dcachePort(&iew.ldstQueue, this), 179 180 timeBuffer(params->backComSize, params->forwardComSize), 181 fetchQueue(params->backComSize, params->forwardComSize), 182 decodeQueue(params->backComSize, params->forwardComSize), 183 renameQueue(params->backComSize, params->forwardComSize), 184 iewQueue(params->backComSize, params->forwardComSize), 185 activityRec(name(), NumStages, 186 params->backComSize + params->forwardComSize, 187 params->activity), 188 189 globalSeqNum(1), 190 system(params->system), 191 lastRunningCycle(curCycle()) 192{ 193 if (!params->switched_out) { 194 _status = Running; 195 } else { 196 _status = SwitchedOut; 197 } 198 199 if (params->checker) { 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<Impl> *>(temp_checker); 202 checker->setIcachePort(&icachePort); 203 checker->setSystem(params->system); 204 } else { 205 checker = NULL; 206 } 207 208 if (!FullSystem) { 209 thread.resize(numThreads); 210 tids.resize(numThreads); 211 } 212 213 // The stages also need their CPU pointer setup. However this 214 // must be done at the upper level CPU because they have pointers 215 // to the upper level CPU, and not this FullO3CPU. 216 217 // Set up Pointers to the activeThreads list for each stage 218 fetch.setActiveThreads(&activeThreads); 219 decode.setActiveThreads(&activeThreads); 220 rename.setActiveThreads(&activeThreads); 221 iew.setActiveThreads(&activeThreads); 222 commit.setActiveThreads(&activeThreads); 223 224 // Give each of the stages the time buffer they will use. 225 fetch.setTimeBuffer(&timeBuffer); 226 decode.setTimeBuffer(&timeBuffer); 227 rename.setTimeBuffer(&timeBuffer); 228 iew.setTimeBuffer(&timeBuffer); 229 commit.setTimeBuffer(&timeBuffer); 230 231 // Also setup each of the stages' queues. 232 fetch.setFetchQueue(&fetchQueue); 233 decode.setFetchQueue(&fetchQueue); 234 commit.setFetchQueue(&fetchQueue); 235 decode.setDecodeQueue(&decodeQueue); 236 rename.setDecodeQueue(&decodeQueue); 237 rename.setRenameQueue(&renameQueue); 238 iew.setRenameQueue(&renameQueue); 239 iew.setIEWQueue(&iewQueue); 240 commit.setIEWQueue(&iewQueue); 241 commit.setRenameQueue(&renameQueue); 242 243 commit.setIEWStage(&iew); 244 rename.setIEWStage(&iew); 245 rename.setCommitStage(&commit); 246 247 ThreadID active_threads; 248 if (FullSystem) { 249 active_threads = 1; 250 } else { 251 active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads' " 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " 256 "or edit your workload size."); 257 } 258 } 259 260 //Make Sure That this a Valid Architeture 261 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 262 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 263 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); 264 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); 265 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); 266 267 rename.setScoreboard(&scoreboard); 268 iew.setScoreboard(&scoreboard); 269 270 // Setup the rename map for whichever stages need it. 271 for (ThreadID tid = 0; tid < numThreads; tid++) { 272 isa[tid] = params->isa[tid]; 273 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0])); 274 275 // Only Alpha has an FP zero register, so for other ISAs we 276 // use an invalid FP register index to avoid special treatment 277 // of any valid FP reg. 278 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1; 279 RegIndex fpZeroReg = 280 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg; 281 282 commitRenameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 283 &freeList, 284 vecMode); 285 286 renameMap[tid].init(®File, TheISA::ZeroReg, fpZeroReg, 287 &freeList, vecMode); 288 } 289 290 // Initialize rename map to assign physical registers to the 291 // architectural registers for active threads only. 292 for (ThreadID tid = 0; tid < active_threads; tid++) { 293 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) { 294 // Note that we can't use the rename() method because we don't 295 // want special treatment for the zero register at this point 296 PhysRegIdPtr phys_reg = freeList.getIntReg(); 297 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 298 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg); 299 } 300 301 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) { 302 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 303 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg); 304 commitRenameMap[tid].setEntry( 305 RegId(FloatRegClass, ridx), phys_reg); 306 } 307 308 /* Here we need two 'interfaces' the 'whole register' and the 309 * 'register element'. At any point only one of them will be 310 * active. */ 311 if (vecMode == Enums::Full) { 312 /* Initialize the full-vector interface */ 313 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 314 RegId rid = RegId(VecRegClass, ridx); 315 PhysRegIdPtr phys_reg = freeList.getVecReg(); 316 renameMap[tid].setEntry(rid, phys_reg); 317 commitRenameMap[tid].setEntry(rid, phys_reg); 318 } 319 } else { 320 /* Initialize the vector-element interface */ 321 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) { 322 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg; 323 ++ldx) { 324 RegId lrid = RegId(VecElemClass, ridx, ldx); 325 PhysRegIdPtr phys_elem = freeList.getVecElem(); 326 renameMap[tid].setEntry(lrid, phys_elem); 327 commitRenameMap[tid].setEntry(lrid, phys_elem); 328 } 329 } 330 } 331 332 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { 333 PhysRegIdPtr phys_reg = freeList.getVecPredReg(); 334 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); 335 commitRenameMap[tid].setEntry( 336 RegId(VecPredRegClass, ridx), phys_reg); 337 } 338 339 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { 340 PhysRegIdPtr phys_reg = freeList.getCCReg(); 341 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 342 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); 343 } 344 } 345 346 rename.setRenameMap(renameMap); 347 commit.setRenameMap(commitRenameMap); 348 rename.setFreeList(&freeList); 349 350 // Setup the ROB for whichever stages need it. 351 commit.setROB(&rob); 352 353 lastActivatedCycle = 0; 354#if 0 355 // Give renameMap & rename stage access to the freeList; 356 for (ThreadID tid = 0; tid < numThreads; tid++) 357 globalSeqNum[tid] = 1; 358#endif 359 360 DPRINTF(O3CPU, "Creating O3CPU object.\n"); 361 362 // Setup any thread state. 363 this->thread.resize(this->numThreads); 364 365 for (ThreadID tid = 0; tid < this->numThreads; ++tid) { 366 if (FullSystem) { 367 // SMT is not supported in FS mode yet. 368 assert(this->numThreads == 1); 369 this->thread[tid] = new Thread(this, 0, NULL); 370 } else { 371 if (tid < params->workload.size()) { 372 DPRINTF(O3CPU, "Workload[%i] process is %#x", 373 tid, this->thread[tid]); 374 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 375 (typename Impl::O3CPU *)(this), 376 tid, params->workload[tid]); 377 378 //usedTids[tid] = true; 379 //threadMap[tid] = tid; 380 } else { 381 //Allocate Empty thread so M5 can use later 382 //when scheduling threads to CPU 383 Process* dummy_proc = NULL; 384 385 this->thread[tid] = new typename FullO3CPU<Impl>::Thread( 386 (typename Impl::O3CPU *)(this), 387 tid, dummy_proc); 388 //usedTids[tid] = false; 389 } 390 } 391 392 ThreadContext *tc; 393 394 // Setup the TC that will serve as the interface to the threads/CPU. 395 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>; 396 397 tc = o3_tc; 398 399 // If we're using a checker, then the TC should be the 400 // CheckerThreadContext. 401 if (params->checker) { 402 tc = new CheckerThreadContext<O3ThreadContext<Impl> >( 403 o3_tc, this->checker); 404 } 405 406 o3_tc->cpu = (typename Impl::O3CPU *)(this); 407 assert(o3_tc->cpu); 408 o3_tc->thread = this->thread[tid]; 409 410 // Setup quiesce event. 411 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); 412 413 // Give the thread the TC. 414 this->thread[tid]->tc = tc; 415 416 // Add the TC to the CPU's list of TC's. 417 this->threadContexts.push_back(tc); 418 } 419 420 // FullO3CPU always requires an interrupt controller. 421 if (!params->switched_out && interrupts.empty()) { 422 fatal("FullO3CPU %s has no interrupt controller.\n" 423 "Ensure createInterruptController() is called.\n", name()); 424 } 425 426 for (ThreadID tid = 0; tid < this->numThreads; tid++) 427 this->thread[tid]->setFuncExeInst(0); 428} 429 430template <class Impl> 431FullO3CPU<Impl>::~FullO3CPU() 432{ 433} 434 435template <class Impl> 436void 437FullO3CPU<Impl>::regProbePoints() 438{ 439 BaseCPU::regProbePoints(); 440 441 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete"); 442 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete"); 443 444 fetch.regProbePoints(); 445 rename.regProbePoints(); 446 iew.regProbePoints(); 447 commit.regProbePoints(); 448} 449 450template <class Impl> 451void 452FullO3CPU<Impl>::regStats() 453{ 454 BaseO3CPU::regStats(); 455 456 // Register any of the O3CPU's stats here. 457 timesIdled 458 .name(name() + ".timesIdled") 459 .desc("Number of times that the entire CPU went into an idle state and" 460 " unscheduled itself") 461 .prereq(timesIdled); 462 463 idleCycles 464 .name(name() + ".idleCycles") 465 .desc("Total number of cycles that the CPU has spent unscheduled due " 466 "to idling") 467 .prereq(idleCycles); 468 469 quiesceCycles 470 .name(name() + ".quiesceCycles") 471 .desc("Total number of cycles that CPU has spent quiesced or waiting " 472 "for an interrupt") 473 .prereq(quiesceCycles); 474 475 // Number of Instructions simulated 476 // -------------------------------- 477 // Should probably be in Base CPU but need templated 478 // MaxThreads so put in here instead 479 committedInsts 480 .init(numThreads) 481 .name(name() + ".committedInsts") 482 .desc("Number of Instructions Simulated") 483 .flags(Stats::total); 484 485 committedOps 486 .init(numThreads) 487 .name(name() + ".committedOps") 488 .desc("Number of Ops (including micro ops) Simulated") 489 .flags(Stats::total); 490 491 cpi 492 .name(name() + ".cpi") 493 .desc("CPI: Cycles Per Instruction") 494 .precision(6); 495 cpi = numCycles / committedInsts; 496 497 totalCpi 498 .name(name() + ".cpi_total") 499 .desc("CPI: Total CPI of All Threads") 500 .precision(6); 501 totalCpi = numCycles / sum(committedInsts); 502 503 ipc 504 .name(name() + ".ipc") 505 .desc("IPC: Instructions Per Cycle") 506 .precision(6); 507 ipc = committedInsts / numCycles; 508 509 totalIpc 510 .name(name() + ".ipc_total") 511 .desc("IPC: Total IPC of All Threads") 512 .precision(6); 513 totalIpc = sum(committedInsts) / numCycles; 514 515 this->fetch.regStats(); 516 this->decode.regStats(); 517 this->rename.regStats(); 518 this->iew.regStats(); 519 this->commit.regStats(); 520 this->rob.regStats(); 521 522 intRegfileReads 523 .name(name() + ".int_regfile_reads") 524 .desc("number of integer regfile reads") 525 .prereq(intRegfileReads); 526 527 intRegfileWrites 528 .name(name() + ".int_regfile_writes") 529 .desc("number of integer regfile writes") 530 .prereq(intRegfileWrites); 531 532 fpRegfileReads 533 .name(name() + ".fp_regfile_reads") 534 .desc("number of floating regfile reads") 535 .prereq(fpRegfileReads); 536 537 fpRegfileWrites 538 .name(name() + ".fp_regfile_writes") 539 .desc("number of floating regfile writes") 540 .prereq(fpRegfileWrites); 541 542 vecRegfileReads 543 .name(name() + ".vec_regfile_reads") 544 .desc("number of vector regfile reads") 545 .prereq(vecRegfileReads); 546 547 vecRegfileWrites 548 .name(name() + ".vec_regfile_writes") 549 .desc("number of vector regfile writes") 550 .prereq(vecRegfileWrites); 551 552 vecPredRegfileReads 553 .name(name() + ".pred_regfile_reads") 554 .desc("number of predicate regfile reads") 555 .prereq(vecPredRegfileReads); 556 557 vecPredRegfileWrites 558 .name(name() + ".pred_regfile_writes") 559 .desc("number of predicate regfile writes") 560 .prereq(vecPredRegfileWrites); 561 562 ccRegfileReads 563 .name(name() + ".cc_regfile_reads") 564 .desc("number of cc regfile reads") 565 .prereq(ccRegfileReads); 566 567 ccRegfileWrites 568 .name(name() + ".cc_regfile_writes") 569 .desc("number of cc regfile writes") 570 .prereq(ccRegfileWrites); 571 572 miscRegfileReads 573 .name(name() + ".misc_regfile_reads") 574 .desc("number of misc regfile reads") 575 .prereq(miscRegfileReads); 576 577 miscRegfileWrites 578 .name(name() + ".misc_regfile_writes") 579 .desc("number of misc regfile writes") 580 .prereq(miscRegfileWrites); 581} 582 583template <class Impl> 584void 585FullO3CPU<Impl>::tick() 586{ 587 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 588 assert(!switchedOut()); 589 assert(drainState() != DrainState::Drained); 590 591 ++numCycles; 592 updateCycleCounters(BaseCPU::CPU_STATE_ON); 593 594// activity = false; 595 596 //Tick each of the stages 597 fetch.tick(); 598 599 decode.tick(); 600 601 rename.tick(); 602 603 iew.tick(); 604 605 commit.tick(); 606 607 // Now advance the time buffers 608 timeBuffer.advance(); 609 610 fetchQueue.advance(); 611 decodeQueue.advance(); 612 renameQueue.advance(); 613 iewQueue.advance(); 614 615 activityRec.advance(); 616 617 if (removeInstsThisCycle) { 618 cleanUpRemovedInsts(); 619 } 620 621 if (!tickEvent.scheduled()) { 622 if (_status == SwitchedOut) { 623 DPRINTF(O3CPU, "Switched out!\n"); 624 // increment stat 625 lastRunningCycle = curCycle(); 626 } else if (!activityRec.active() || _status == Idle) { 627 DPRINTF(O3CPU, "Idle!\n"); 628 lastRunningCycle = curCycle(); 629 timesIdled++; 630 } else { 631 schedule(tickEvent, clockEdge(Cycles(1))); 632 DPRINTF(O3CPU, "Scheduling next tick!\n"); 633 } 634 } 635 636 if (!FullSystem) 637 updateThreadPriority(); 638 639 tryDrain(); 640} 641 642template <class Impl> 643void 644FullO3CPU<Impl>::init() 645{ 646 BaseCPU::init(); 647 648 for (ThreadID tid = 0; tid < numThreads; ++tid) { 649 // Set noSquashFromTC so that the CPU doesn't squash when initially 650 // setting up registers. 651 thread[tid]->noSquashFromTC = true; 652 // Initialise the ThreadContext's memory proxies 653 thread[tid]->initMemProxies(thread[tid]->getTC()); 654 } 655 656 if (FullSystem && !params()->switched_out) { 657 for (ThreadID tid = 0; tid < numThreads; tid++) { 658 ThreadContext *src_tc = threadContexts[tid]; 659 TheISA::initCPU(src_tc, src_tc->contextId()); 660 } 661 } 662 663 // Clear noSquashFromTC. 664 for (int tid = 0; tid < numThreads; ++tid) 665 thread[tid]->noSquashFromTC = false; 666 667 commit.setThreads(thread); 668} 669 670template <class Impl> 671void 672FullO3CPU<Impl>::startup() 673{ 674 BaseCPU::startup(); 675 for (int tid = 0; tid < numThreads; ++tid) 676 isa[tid]->startup(threadContexts[tid]); 677 678 fetch.startupStage(); 679 decode.startupStage(); 680 iew.startupStage(); 681 rename.startupStage(); 682 commit.startupStage(); 683} 684 685template <class Impl> 686void 687FullO3CPU<Impl>::activateThread(ThreadID tid) 688{ 689 list<ThreadID>::iterator isActive = 690 std::find(activeThreads.begin(), activeThreads.end(), tid); 691 692 DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid); 693 assert(!switchedOut()); 694 695 if (isActive == activeThreads.end()) { 696 DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", 697 tid); 698 699 activeThreads.push_back(tid); 700 } 701} 702 703template <class Impl> 704void 705FullO3CPU<Impl>::deactivateThread(ThreadID tid) 706{ 707 //Remove From Active List, if Active 708 list<ThreadID>::iterator thread_it = 709 std::find(activeThreads.begin(), activeThreads.end(), tid); 710 711 DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid); 712 assert(!switchedOut()); 713 714 if (thread_it != activeThreads.end()) { 715 DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", 716 tid); 717 activeThreads.erase(thread_it); 718 } 719 720 fetch.deactivateThread(tid); 721 commit.deactivateThread(tid); 722} 723 724template <class Impl> 725Counter 726FullO3CPU<Impl>::totalInsts() const 727{ 728 Counter total(0); 729 730 ThreadID size = thread.size(); 731 for (ThreadID i = 0; i < size; i++) 732 total += thread[i]->numInst; 733 734 return total; 735} 736 737template <class Impl> 738Counter 739FullO3CPU<Impl>::totalOps() const 740{ 741 Counter total(0); 742 743 ThreadID size = thread.size(); 744 for (ThreadID i = 0; i < size; i++) 745 total += thread[i]->numOp; 746 747 return total; 748} 749 750template <class Impl> 751void 752FullO3CPU<Impl>::activateContext(ThreadID tid) 753{ 754 assert(!switchedOut()); 755 756 // Needs to set each stage to running as well. 757 activateThread(tid); 758 759 // We don't want to wake the CPU if it is drained. In that case, 760 // we just want to flag the thread as active and schedule the tick 761 // event from drainResume() instead. 762 if (drainState() == DrainState::Drained) 763 return; 764 765 // If we are time 0 or if the last activation time is in the past, 766 // schedule the next tick and wake up the fetch unit 767 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { 768 scheduleTickEvent(Cycles(0)); 769 770 // Be sure to signal that there's some activity so the CPU doesn't 771 // deschedule itself. 772 activityRec.activity(); 773 fetch.wakeFromQuiesce(); 774 775 Cycles cycles(curCycle() - lastRunningCycle); 776 // @todo: This is an oddity that is only here to match the stats 777 if (cycles != 0) 778 --cycles; 779 quiesceCycles += cycles; 780 781 lastActivatedCycle = curTick(); 782 783 _status = Running; 784 785 BaseCPU::activateContext(tid); 786 } 787} 788 789template <class Impl> 790void 791FullO3CPU<Impl>::suspendContext(ThreadID tid) 792{ 793 DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); 794 assert(!switchedOut()); 795 796 deactivateThread(tid); 797 798 // If this was the last thread then unschedule the tick event. 799 if (activeThreads.size() == 0) { 800 unscheduleTickEvent(); 801 lastRunningCycle = curCycle(); 802 _status = Idle; 803 } 804 805 DPRINTF(Quiesce, "Suspending Context\n"); 806 807 BaseCPU::suspendContext(tid); 808} 809 810template <class Impl> 811void 812FullO3CPU<Impl>::haltContext(ThreadID tid) 813{ 814 //For now, this is the same as deallocate 815 DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating\n", tid); 816 assert(!switchedOut()); 817 818 deactivateThread(tid); 819 removeThread(tid); 820 821 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP); 822} 823 824template <class Impl> 825void 826FullO3CPU<Impl>::insertThread(ThreadID tid) 827{ 828 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 829 // Will change now that the PC and thread state is internal to the CPU 830 // and not in the ThreadContext. 831 ThreadContext *src_tc; 832 if (FullSystem) 833 src_tc = system->threadContexts[tid]; 834 else 835 src_tc = tcBase(tid); 836 837 //Bind Int Regs to Rename Map 838 839 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs; 840 reg_id.index()++) { 841 PhysRegIdPtr phys_reg = freeList.getIntReg(); 842 renameMap[tid].setEntry(reg_id, phys_reg); 843 scoreboard.setReg(phys_reg); 844 } 845 846 //Bind Float Regs to Rename Map 847 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs; 848 reg_id.index()++) { 849 PhysRegIdPtr phys_reg = freeList.getFloatReg(); 850 renameMap[tid].setEntry(reg_id, phys_reg); 851 scoreboard.setReg(phys_reg); 852 } 853 854 //Bind condition-code Regs to Rename Map 855 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; 856 reg_id.index()++) { 857 PhysRegIdPtr phys_reg = freeList.getCCReg(); 858 renameMap[tid].setEntry(reg_id, phys_reg); 859 scoreboard.setReg(phys_reg); 860 } 861 862 //Copy Thread Data Into RegFile 863 //this->copyFromTC(tid); 864 865 //Set PC/NPC/NNPC 866 pcState(src_tc->pcState(), tid); 867 868 src_tc->setStatus(ThreadContext::Active); 869 870 activateContext(tid); 871 872 //Reset ROB/IQ/LSQ Entries 873 commit.rob->resetEntries(); 874} 875 876template <class Impl> 877void 878FullO3CPU<Impl>::removeThread(ThreadID tid) 879{ 880 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 881 882 // Copy Thread Data From RegFile 883 // If thread is suspended, it might be re-allocated 884 // this->copyToTC(tid); 885 886 887 // @todo: 2-27-2008: Fix how we free up rename mappings 888 // here to alleviate the case for double-freeing registers 889 // in SMT workloads. 890 891 // clear all thread-specific states in each stage of the pipeline 892 // since this thread is going to be completely removed from the CPU 893 commit.clearStates(tid); 894 fetch.clearStates(tid); 895 decode.clearStates(tid); 896 rename.clearStates(tid); 897 iew.clearStates(tid); 898 899 // at this step, all instructions in the pipeline should be already 900 // either committed successfully or squashed. All thread-specific 901 // queues in the pipeline must be empty. 902 assert(iew.instQueue.getCount(tid) == 0); 903 assert(iew.ldstQueue.getCount(tid) == 0); 904 assert(commit.rob->isEmpty(tid)); 905 906 // Reset ROB/IQ/LSQ Entries 907 908 // Commented out for now. This should be possible to do by 909 // telling all the pipeline stages to drain first, and then 910 // checking until the drain completes. Once the pipeline is 911 // drained, call resetEntries(). - 10-09-06 ktlim 912/* 913 if (activeThreads.size() >= 1) { 914 commit.rob->resetEntries(); 915 iew.resetEntries(); 916 } 917*/ 918} 919 920template <class Impl> 921Fault 922FullO3CPU<Impl>::hwrei(ThreadID tid) 923{ 924#if THE_ISA == ALPHA_ISA 925 // Need to clear the lock flag upon returning from an interrupt. 926 this->setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid); 927 928 this->thread[tid]->kernelStats->hwrei(); 929 930 // FIXME: XXX check for interrupts? XXX 931#endif 932 return NoFault; 933} 934 935template <class Impl> 936bool 937FullO3CPU<Impl>::simPalCheck(int palFunc, ThreadID tid) 938{ 939#if THE_ISA == ALPHA_ISA 940 if (this->thread[tid]->kernelStats) 941 this->thread[tid]->kernelStats->callpal(palFunc, 942 this->threadContexts[tid]); 943 944 switch (palFunc) { 945 case PAL::halt: 946 halt(); 947 if (--System::numSystemsRunning == 0) 948 exitSimLoop("all cpus halted"); 949 break; 950 951 case PAL::bpt: 952 case PAL::bugchk: 953 if (this->system->breakpoint()) 954 return false; 955 break; 956 } 957#endif 958 return true; 959} 960 961template <class Impl> 962void 963FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist) 964{ 965 auto pc = this->pcState(tid); 966 967 // new_mode is the new vector renaming mode 968 auto new_mode = RenameMode<TheISA::ISA>::mode(pc); 969 970 // We update vecMode only if there has been a change 971 if (new_mode != vecMode) { 972 vecMode = new_mode; 973 974 renameMap[tid].switchMode(vecMode); 975 commitRenameMap[tid].switchMode(vecMode); 976 renameMap[tid].switchFreeList(freelist); 977 } 978} 979 980template <class Impl> 981Fault 982FullO3CPU<Impl>::getInterrupts() 983{ 984 // Check if there are any outstanding interrupts 985 return this->interrupts[0]->getInterrupt(this->threadContexts[0]); 986} 987 988template <class Impl> 989void 990FullO3CPU<Impl>::processInterrupts(const Fault &interrupt) 991{ 992 // Check for interrupts here. For now can copy the code that 993 // exists within isa_fullsys_traits.hh. Also assume that thread 0 994 // is the one that handles the interrupts. 995 // @todo: Possibly consolidate the interrupt checking code. 996 // @todo: Allow other threads to handle interrupts. 997 998 assert(interrupt != NoFault); 999 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]); 1000 1001 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name()); 1002 this->trap(interrupt, 0, nullptr); 1003} 1004 1005template <class Impl> 1006void 1007FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid, 1008 const StaticInstPtr &inst) 1009{ 1010 // Pass the thread's TC into the invoke method. 1011 fault->invoke(this->threadContexts[tid], inst); 1012} 1013 1014template <class Impl> 1015void 1016FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault) 1017{ 1018 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); 1019 1020 DPRINTF(Activity,"Activity: syscall() called.\n"); 1021 1022 // Temporarily increase this by one to account for the syscall 1023 // instruction. 1024 ++(this->thread[tid]->funcExeInst); 1025 1026 // Execute the actual syscall. 1027 this->thread[tid]->syscall(callnum, fault); 1028 1029 // Decrease funcExeInst by one as the normal commit will handle 1030 // incrementing it. 1031 --(this->thread[tid]->funcExeInst); 1032} 1033 1034template <class Impl> 1035void 1036FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const 1037{ 1038 thread[tid]->serialize(cp); 1039} 1040 1041template <class Impl> 1042void 1043FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid) 1044{ 1045 thread[tid]->unserialize(cp); 1046} 1047 1048template <class Impl> 1049DrainState 1050FullO3CPU<Impl>::drain() 1051{ 1052 // Deschedule any power gating event (if any) 1053 deschedulePowerGatingEvent(); 1054 1055 // If the CPU isn't doing anything, then return immediately. 1056 if (switchedOut()) 1057 return DrainState::Drained; 1058 1059 DPRINTF(Drain, "Draining...\n"); 1060 1061 // We only need to signal a drain to the commit stage as this 1062 // initiates squashing controls the draining. Once the commit 1063 // stage commits an instruction where it is safe to stop, it'll 1064 // squash the rest of the instructions in the pipeline and force 1065 // the fetch stage to stall. The pipeline will be drained once all 1066 // in-flight instructions have retired. 1067 commit.drain(); 1068 1069 // Wake the CPU and record activity so everything can drain out if 1070 // the CPU was not able to immediately drain. 1071 if (!isDrained()) { 1072 // If a thread is suspended, wake it up so it can be drained 1073 for (auto t : threadContexts) { 1074 if (t->status() == ThreadContext::Suspended){ 1075 DPRINTF(Drain, "Currently suspended so activate %i \n", 1076 t->threadId()); 1077 t->activate(); 1078 // As the thread is now active, change the power state as well 1079 activateContext(t->threadId()); 1080 } 1081 } 1082 1083 wakeCPU(); 1084 activityRec.activity(); 1085 1086 DPRINTF(Drain, "CPU not drained\n"); 1087 1088 return DrainState::Draining; 1089 } else { 1090 DPRINTF(Drain, "CPU is already drained\n"); 1091 if (tickEvent.scheduled()) 1092 deschedule(tickEvent); 1093 1094 // Flush out any old data from the time buffers. In 1095 // particular, there might be some data in flight from the 1096 // fetch stage that isn't visible in any of the CPU buffers we 1097 // test in isDrained(). 1098 for (int i = 0; i < timeBuffer.getSize(); ++i) { 1099 timeBuffer.advance(); 1100 fetchQueue.advance(); 1101 decodeQueue.advance(); 1102 renameQueue.advance(); 1103 iewQueue.advance(); 1104 } 1105 1106 drainSanityCheck(); 1107 return DrainState::Drained; 1108 } 1109} 1110 1111template <class Impl> 1112bool 1113FullO3CPU<Impl>::tryDrain() 1114{ 1115 if (drainState() != DrainState::Draining || !isDrained()) 1116 return false; 1117 1118 if (tickEvent.scheduled()) 1119 deschedule(tickEvent); 1120 1121 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 1122 signalDrainDone(); 1123 1124 return true; 1125} 1126 1127template <class Impl> 1128void 1129FullO3CPU<Impl>::drainSanityCheck() const 1130{ 1131 assert(isDrained()); 1132 fetch.drainSanityCheck(); 1133 decode.drainSanityCheck(); 1134 rename.drainSanityCheck(); 1135 iew.drainSanityCheck(); 1136 commit.drainSanityCheck(); 1137} 1138 1139template <class Impl> 1140bool 1141FullO3CPU<Impl>::isDrained() const 1142{ 1143 bool drained(true); 1144 1145 if (!instList.empty() || !removeList.empty()) { 1146 DPRINTF(Drain, "Main CPU structures not drained.\n"); 1147 drained = false; 1148 } 1149 1150 if (!fetch.isDrained()) { 1151 DPRINTF(Drain, "Fetch not drained.\n"); 1152 drained = false; 1153 } 1154 1155 if (!decode.isDrained()) { 1156 DPRINTF(Drain, "Decode not drained.\n"); 1157 drained = false; 1158 } 1159 1160 if (!rename.isDrained()) { 1161 DPRINTF(Drain, "Rename not drained.\n"); 1162 drained = false; 1163 } 1164 1165 if (!iew.isDrained()) { 1166 DPRINTF(Drain, "IEW not drained.\n"); 1167 drained = false; 1168 } 1169 1170 if (!commit.isDrained()) { 1171 DPRINTF(Drain, "Commit not drained.\n"); 1172 drained = false; 1173 } 1174 1175 return drained; 1176} 1177 1178template <class Impl> 1179void 1180FullO3CPU<Impl>::commitDrained(ThreadID tid) 1181{ 1182 fetch.drainStall(tid); 1183} 1184 1185template <class Impl> 1186void 1187FullO3CPU<Impl>::drainResume() 1188{ 1189 if (switchedOut()) 1190 return; 1191 1192 DPRINTF(Drain, "Resuming...\n"); 1193 verifyMemoryMode(); 1194 1195 fetch.drainResume(); 1196 commit.drainResume(); 1197 1198 _status = Idle; 1199 for (ThreadID i = 0; i < thread.size(); i++) { 1200 if (thread[i]->status() == ThreadContext::Active) { 1201 DPRINTF(Drain, "Activating thread: %i\n", i); 1202 activateThread(i); 1203 _status = Running; 1204 } 1205 } 1206 1207 assert(!tickEvent.scheduled()); 1208 if (_status == Running) 1209 schedule(tickEvent, nextCycle()); 1210 1211 // Reschedule any power gating event (if any) 1212 schedulePowerGatingEvent(); 1213} 1214 1215template <class Impl> 1216void 1217FullO3CPU<Impl>::switchOut() 1218{ 1219 DPRINTF(O3CPU, "Switching out\n"); 1220 BaseCPU::switchOut(); 1221 1222 activityRec.reset(); 1223 1224 _status = SwitchedOut; 1225 1226 if (checker) 1227 checker->switchOut(); 1228} 1229 1230template <class Impl> 1231void 1232FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 1233{ 1234 BaseCPU::takeOverFrom(oldCPU); 1235 1236 fetch.takeOverFrom(); 1237 decode.takeOverFrom(); 1238 rename.takeOverFrom(); 1239 iew.takeOverFrom(); 1240 commit.takeOverFrom(); 1241 1242 assert(!tickEvent.scheduled()); 1243 1244 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU); 1245 if (oldO3CPU) 1246 globalSeqNum = oldO3CPU->globalSeqNum; 1247 1248 lastRunningCycle = curCycle(); 1249 _status = Idle; 1250} 1251 1252template <class Impl> 1253void 1254FullO3CPU<Impl>::verifyMemoryMode() const 1255{ 1256 if (!system->isTimingMode()) { 1257 fatal("The O3 CPU requires the memory system to be in " 1258 "'timing' mode.\n"); 1259 } 1260} 1261 1262template <class Impl> 1263RegVal 1264FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const 1265{ 1266 return this->isa[tid]->readMiscRegNoEffect(misc_reg); 1267} 1268 1269template <class Impl> 1270RegVal 1271FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) 1272{ 1273 miscRegfileReads++; 1274 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid)); 1275} 1276 1277template <class Impl> 1278void 1279FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) 1280{ 1281 this->isa[tid]->setMiscRegNoEffect(misc_reg, val); 1282} 1283 1284template <class Impl> 1285void 1286FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid) 1287{ 1288 miscRegfileWrites++; 1289 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); 1290} 1291 1292template <class Impl> 1293RegVal 1294FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg) 1295{ 1296 intRegfileReads++; 1297 return regFile.readIntReg(phys_reg); 1298} 1299 1300template <class Impl> 1301RegVal 1302FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg) 1303{ 1304 fpRegfileReads++; 1305 return regFile.readFloatReg(phys_reg); 1306} 1307 1308template <class Impl> 1309auto 1310FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const 1311 -> const VecRegContainer& 1312{ 1313 vecRegfileReads++; 1314 return regFile.readVecReg(phys_reg); 1315} 1316 1317template <class Impl> 1318auto 1319FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg) 1320 -> VecRegContainer& 1321{ 1322 vecRegfileWrites++; 1323 return regFile.getWritableVecReg(phys_reg); 1324} 1325 1326template <class Impl> 1327auto 1328FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& 1329{ 1330 vecRegfileReads++; 1331 return regFile.readVecElem(phys_reg); 1332} 1333 1334template <class Impl> 1335auto 1336FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const 1337 -> const VecPredRegContainer& 1338{ 1339 vecPredRegfileReads++; 1340 return regFile.readVecPredReg(phys_reg); 1341} 1342 1343template <class Impl> 1344auto 1345FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg) 1346 -> VecPredRegContainer& 1347{ 1348 vecPredRegfileWrites++; 1349 return regFile.getWritableVecPredReg(phys_reg); 1350} 1351 1352template <class Impl> 1353RegVal 1354FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg) 1355{ 1356 ccRegfileReads++; 1357 return regFile.readCCReg(phys_reg); 1358} 1359 1360template <class Impl> 1361void 1362FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val) 1363{ 1364 intRegfileWrites++; 1365 regFile.setIntReg(phys_reg, val); 1366} 1367 1368template <class Impl> 1369void 1370FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) 1371{ 1372 fpRegfileWrites++; 1373 regFile.setFloatReg(phys_reg, val); 1374} 1375 1376template <class Impl> 1377void 1378FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) 1379{ 1380 vecRegfileWrites++; 1381 regFile.setVecReg(phys_reg, val); 1382} 1383 1384template <class Impl> 1385void 1386FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) 1387{ 1388 vecRegfileWrites++; 1389 regFile.setVecElem(phys_reg, val); 1390} 1391 1392template <class Impl> 1393void 1394FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg, 1395 const VecPredRegContainer& val) 1396{ 1397 vecPredRegfileWrites++; 1398 regFile.setVecPredReg(phys_reg, val); 1399} 1400 1401template <class Impl> 1402void 1403FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val) 1404{ 1405 ccRegfileWrites++; 1406 regFile.setCCReg(phys_reg, val); 1407} 1408 1409template <class Impl> 1410RegVal 1411FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) 1412{ 1413 intRegfileReads++; 1414 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1415 RegId(IntRegClass, reg_idx)); 1416 1417 return regFile.readIntReg(phys_reg); 1418} 1419 1420template <class Impl> 1421RegVal 1422FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) 1423{ 1424 fpRegfileReads++; 1425 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1426 RegId(FloatRegClass, reg_idx)); 1427 1428 return regFile.readFloatReg(phys_reg); 1429} 1430 1431template <class Impl> 1432auto 1433FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const 1434 -> const VecRegContainer& 1435{ 1436 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1437 RegId(VecRegClass, reg_idx)); 1438 return readVecReg(phys_reg); 1439} 1440 1441template <class Impl> 1442auto 1443FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid) 1444 -> VecRegContainer& 1445{ 1446 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1447 RegId(VecRegClass, reg_idx)); 1448 return getWritableVecReg(phys_reg); 1449} 1450 1451template <class Impl> 1452auto 1453FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1454 ThreadID tid) const -> const VecElem& 1455{ 1456 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1457 RegId(VecElemClass, reg_idx, ldx)); 1458 return readVecElem(phys_reg); 1459} 1460 1461template <class Impl> 1462auto 1463FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const 1464 -> const VecPredRegContainer& 1465{ 1466 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1467 RegId(VecPredRegClass, reg_idx)); 1468 return readVecPredReg(phys_reg); 1469} 1470 1471template <class Impl> 1472auto 1473FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid) 1474 -> VecPredRegContainer& 1475{ 1476 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1477 RegId(VecPredRegClass, reg_idx)); 1478 return getWritableVecPredReg(phys_reg); 1479} 1480 1481template <class Impl> 1482RegVal 1483FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid) 1484{ 1485 ccRegfileReads++; 1486 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1487 RegId(CCRegClass, reg_idx)); 1488 1489 return regFile.readCCReg(phys_reg); 1490} 1491 1492template <class Impl> 1493void 1494FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) 1495{ 1496 intRegfileWrites++; 1497 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1498 RegId(IntRegClass, reg_idx)); 1499 1500 regFile.setIntReg(phys_reg, val); 1501} 1502 1503template <class Impl> 1504void 1505FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) 1506{ 1507 fpRegfileWrites++; 1508 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1509 RegId(FloatRegClass, reg_idx)); 1510 1511 regFile.setFloatReg(phys_reg, val); 1512} 1513 1514template <class Impl> 1515void 1516FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val, 1517 ThreadID tid) 1518{ 1519 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1520 RegId(VecRegClass, reg_idx)); 1521 setVecReg(phys_reg, val); 1522} 1523 1524template <class Impl> 1525void 1526FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, 1527 const VecElem& val, ThreadID tid) 1528{ 1529 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1530 RegId(VecElemClass, reg_idx, ldx)); 1531 setVecElem(phys_reg, val); 1532} 1533 1534template <class Impl> 1535void 1536FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, 1537 ThreadID tid) 1538{ 1539 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1540 RegId(VecPredRegClass, reg_idx)); 1541 setVecPredReg(phys_reg, val); 1542} 1543 1544template <class Impl> 1545void 1546FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) 1547{ 1548 ccRegfileWrites++; 1549 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( 1550 RegId(CCRegClass, reg_idx)); 1551 1552 regFile.setCCReg(phys_reg, val); 1553} 1554 1555template <class Impl> 1556TheISA::PCState 1557FullO3CPU<Impl>::pcState(ThreadID tid) 1558{ 1559 return commit.pcState(tid); 1560} 1561 1562template <class Impl> 1563void 1564FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid) 1565{ 1566 commit.pcState(val, tid); 1567} 1568 1569template <class Impl> 1570Addr 1571FullO3CPU<Impl>::instAddr(ThreadID tid) 1572{ 1573 return commit.instAddr(tid); 1574} 1575 1576template <class Impl> 1577Addr 1578FullO3CPU<Impl>::nextInstAddr(ThreadID tid) 1579{ 1580 return commit.nextInstAddr(tid); 1581} 1582 1583template <class Impl> 1584MicroPC 1585FullO3CPU<Impl>::microPC(ThreadID tid) 1586{ 1587 return commit.microPC(tid); 1588} 1589 1590template <class Impl> 1591void 1592FullO3CPU<Impl>::squashFromTC(ThreadID tid) 1593{ 1594 this->thread[tid]->noSquashFromTC = true; 1595 this->commit.generateTCEvent(tid); 1596} 1597 1598template <class Impl> 1599typename FullO3CPU<Impl>::ListIt 1600FullO3CPU<Impl>::addInst(const DynInstPtr &inst) 1601{ 1602 instList.push_back(inst); 1603 1604 return --(instList.end()); 1605} 1606 1607template <class Impl> 1608void 1609FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst) 1610{ 1611 // Keep an instruction count. 1612 if (!inst->isMicroop() || inst->isLastMicroop()) { 1613 thread[tid]->numInst++; 1614 thread[tid]->numInsts++; 1615 committedInsts[tid]++; 1616 system->totalNumInsts++; 1617 1618 // Check for instruction-count-based events. 1619 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1620 system->instEventQueue.serviceEvents(system->totalNumInsts); 1621 } 1622 thread[tid]->numOp++; 1623 thread[tid]->numOps++; 1624 committedOps[tid]++; 1625 1626 probeInstCommit(inst->staticInst); 1627} 1628 1629template <class Impl> 1630void 1631FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst) 1632{ 1633 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " 1634 "[sn:%lli]\n", 1635 inst->threadNumber, inst->pcState(), inst->seqNum); 1636 1637 removeInstsThisCycle = true; 1638 1639 // Remove the front instruction. 1640 removeList.push(inst->getInstListIt()); 1641} 1642 1643template <class Impl> 1644void 1645FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid) 1646{ 1647 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1648 " list.\n", tid); 1649 1650 ListIt end_it; 1651 1652 bool rob_empty = false; 1653 1654 if (instList.empty()) { 1655 return; 1656 } else if (rob.isEmpty(tid)) { 1657 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1658 end_it = instList.begin(); 1659 rob_empty = true; 1660 } else { 1661 end_it = (rob.readTailInst(tid))->getInstListIt(); 1662 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1663 } 1664 1665 removeInstsThisCycle = true; 1666 1667 ListIt inst_it = instList.end(); 1668 1669 inst_it--; 1670 1671 // Walk through the instruction list, removing any instructions 1672 // that were inserted after the given instruction iterator, end_it. 1673 while (inst_it != end_it) { 1674 assert(!instList.empty()); 1675 1676 squashInstIt(inst_it, tid); 1677 1678 inst_it--; 1679 } 1680 1681 // If the ROB was empty, then we actually need to remove the first 1682 // instruction as well. 1683 if (rob_empty) { 1684 squashInstIt(inst_it, tid); 1685 } 1686} 1687 1688template <class Impl> 1689void 1690FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) 1691{ 1692 assert(!instList.empty()); 1693 1694 removeInstsThisCycle = true; 1695 1696 ListIt inst_iter = instList.end(); 1697 1698 inst_iter--; 1699 1700 DPRINTF(O3CPU, "Deleting instructions from instruction " 1701 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1702 tid, seq_num, (*inst_iter)->seqNum); 1703 1704 while ((*inst_iter)->seqNum > seq_num) { 1705 1706 bool break_loop = (inst_iter == instList.begin()); 1707 1708 squashInstIt(inst_iter, tid); 1709 1710 inst_iter--; 1711 1712 if (break_loop) 1713 break; 1714 } 1715} 1716 1717template <class Impl> 1718inline void 1719FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid) 1720{ 1721 if ((*instIt)->threadNumber == tid) { 1722 DPRINTF(O3CPU, "Squashing instruction, " 1723 "[tid:%i] [sn:%lli] PC %s\n", 1724 (*instIt)->threadNumber, 1725 (*instIt)->seqNum, 1726 (*instIt)->pcState()); 1727 1728 // Mark it as squashed. 1729 (*instIt)->setSquashed(); 1730 1731 // @todo: Formulate a consistent method for deleting 1732 // instructions from the instruction list 1733 // Remove the instruction from the list. 1734 removeList.push(instIt); 1735 } 1736} 1737 1738template <class Impl> 1739void 1740FullO3CPU<Impl>::cleanUpRemovedInsts() 1741{ 1742 while (!removeList.empty()) { 1743 DPRINTF(O3CPU, "Removing instruction, " 1744 "[tid:%i] [sn:%lli] PC %s\n", 1745 (*removeList.front())->threadNumber, 1746 (*removeList.front())->seqNum, 1747 (*removeList.front())->pcState()); 1748 1749 instList.erase(removeList.front()); 1750 1751 removeList.pop(); 1752 } 1753 1754 removeInstsThisCycle = false; 1755} 1756/* 1757template <class Impl> 1758void 1759FullO3CPU<Impl>::removeAllInsts() 1760{ 1761 instList.clear(); 1762} 1763*/ 1764template <class Impl> 1765void 1766FullO3CPU<Impl>::dumpInsts() 1767{ 1768 int num = 0; 1769 1770 ListIt inst_list_it = instList.begin(); 1771 1772 cprintf("Dumping Instruction List\n"); 1773 1774 while (inst_list_it != instList.end()) { 1775 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1776 "Squashed:%i\n\n", 1777 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber, 1778 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1779 (*inst_list_it)->isSquashed()); 1780 inst_list_it++; 1781 ++num; 1782 } 1783} 1784/* 1785template <class Impl> 1786void 1787FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst) 1788{ 1789 iew.wakeDependents(inst); 1790} 1791*/ 1792template <class Impl> 1793void 1794FullO3CPU<Impl>::wakeCPU() 1795{ 1796 if (activityRec.active() || tickEvent.scheduled()) { 1797 DPRINTF(Activity, "CPU already running.\n"); 1798 return; 1799 } 1800 1801 DPRINTF(Activity, "Waking up CPU\n"); 1802 1803 Cycles cycles(curCycle() - lastRunningCycle); 1804 // @todo: This is an oddity that is only here to match the stats 1805 if (cycles > 1) { 1806 --cycles; 1807 idleCycles += cycles; 1808 numCycles += cycles; 1809 } 1810 1811 schedule(tickEvent, clockEdge()); 1812} 1813 1814template <class Impl> 1815void 1816FullO3CPU<Impl>::wakeup(ThreadID tid) 1817{ 1818 if (this->thread[tid]->status() != ThreadContext::Suspended) 1819 return; 1820 1821 this->wakeCPU(); 1822 1823 DPRINTF(Quiesce, "Suspended Processor woken\n"); 1824 this->threadContexts[tid]->activate(); 1825} 1826 1827template <class Impl> 1828ThreadID 1829FullO3CPU<Impl>::getFreeTid() 1830{ 1831 for (ThreadID tid = 0; tid < numThreads; tid++) { 1832 if (!tids[tid]) { 1833 tids[tid] = true; 1834 return tid; 1835 } 1836 } 1837 1838 return InvalidThreadID; 1839} 1840 1841template <class Impl> 1842void 1843FullO3CPU<Impl>::updateThreadPriority() 1844{ 1845 if (activeThreads.size() > 1) { 1846 //DEFAULT TO ROUND ROBIN SCHEME 1847 //e.g. Move highest priority to end of thread list 1848 list<ThreadID>::iterator list_begin = activeThreads.begin(); 1849 1850 unsigned high_thread = *list_begin; 1851 1852 activeThreads.erase(list_begin); 1853 1854 activeThreads.push_back(high_thread); 1855 } 1856} 1857 1858template <class Impl> 1859void 1860FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid) 1861{ 1862 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid); 1863 1864 // the thread trying to exit can't be already halted 1865 assert(tcBase(tid)->status() != ThreadContext::Halted); 1866 1867 // make sure the thread has not been added to the list yet 1868 assert(exitingThreads.count(tid) == 0); 1869 1870 // add the thread to exitingThreads list to mark that this thread is 1871 // trying to exit. The boolean value in the pair denotes if a thread is 1872 // ready to exit. The thread is not ready to exit until the corresponding 1873 // exit trap event is processed in the future. Until then, it'll be still 1874 // an active thread that is trying to exit. 1875 exitingThreads.emplace(std::make_pair(tid, false)); 1876} 1877 1878template <class Impl> 1879bool 1880FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const 1881{ 1882 return exitingThreads.count(tid) == 1; 1883} 1884 1885template <class Impl> 1886void 1887FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid) 1888{ 1889 assert(exitingThreads.count(tid) == 1); 1890 1891 // exit trap event has been processed. Now, the thread is ready to exit 1892 // and be removed from the CPU. 1893 exitingThreads[tid] = true; 1894 1895 // we schedule a threadExitEvent in the next cycle to properly clean 1896 // up the thread's states in the pipeline. threadExitEvent has lower 1897 // priority than tickEvent, so the cleanup will happen at the very end 1898 // of the next cycle after all pipeline stages complete their operations. 1899 // We want all stages to complete squashing instructions before doing 1900 // the cleanup. 1901 if (!threadExitEvent.scheduled()) { 1902 schedule(threadExitEvent, nextCycle()); 1903 } 1904} 1905 1906template <class Impl> 1907void 1908FullO3CPU<Impl>::exitThreads() 1909{ 1910 // there must be at least one thread trying to exit 1911 assert(exitingThreads.size() > 0); 1912 1913 // terminate all threads that are ready to exit 1914 auto it = exitingThreads.begin(); 1915 while (it != exitingThreads.end()) { 1916 ThreadID thread_id = it->first; 1917 bool readyToExit = it->second; 1918 1919 if (readyToExit) { 1920 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id); 1921 haltContext(thread_id); 1922 tcBase(thread_id)->setStatus(ThreadContext::Halted); 1923 it = exitingThreads.erase(it); 1924 } else { 1925 it++; 1926 } 1927 } 1928} 1929 1930// Forward declaration of FullO3CPU. 1931template class FullO3CPU<O3CPUImpl>; 1932