cpu.cc revision 3125:febd811bccc6
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 */ 31 32#include "config/full_system.hh" 33#include "config/use_checker.hh" 34 35#if FULL_SYSTEM 36#include "cpu/quiesce_event.hh" 37#include "sim/system.hh" 38#else 39#include "sim/process.hh" 40#endif 41 42#include "cpu/activity.hh" 43#include "cpu/simple_thread.hh" 44#include "cpu/thread_context.hh" 45#include "cpu/o3/isa_specific.hh" 46#include "cpu/o3/cpu.hh" 47 48#include "sim/root.hh" 49#include "sim/stat_control.hh" 50 51#if USE_CHECKER 52#include "cpu/checker/cpu.hh" 53#endif 54 55using namespace std; 56using namespace TheISA; 57 58BaseO3CPU::BaseO3CPU(Params *params) 59 : BaseCPU(params), cpu_id(0) 60{ 61} 62 63void 64BaseO3CPU::regStats() 65{ 66 BaseCPU::regStats(); 67} 68 69template <class Impl> 70FullO3CPU<Impl>::TickEvent::TickEvent(FullO3CPU<Impl> *c) 71 : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) 72{ 73} 74 75template <class Impl> 76void 77FullO3CPU<Impl>::TickEvent::process() 78{ 79 cpu->tick(); 80} 81 82template <class Impl> 83const char * 84FullO3CPU<Impl>::TickEvent::description() 85{ 86 return "FullO3CPU tick event"; 87} 88 89template <class Impl> 90FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() 91 : Event(&mainEventQueue, CPU_Tick_Pri) 92{ 93} 94 95template <class Impl> 96void 97FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num, 98 FullO3CPU<Impl> *thread_cpu) 99{ 100 tid = thread_num; 101 cpu = thread_cpu; 102} 103 104template <class Impl> 105void 106FullO3CPU<Impl>::ActivateThreadEvent::process() 107{ 108 cpu->activateThread(tid); 109} 110 111template <class Impl> 112const char * 113FullO3CPU<Impl>::ActivateThreadEvent::description() 114{ 115 return "FullO3CPU \"Activate Thread\" event"; 116} 117 118template <class Impl> 119FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() 120 : Event(&mainEventQueue, CPU_Tick_Pri) 121{ 122} 123 124template <class Impl> 125void 126FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, 127 FullO3CPU<Impl> *thread_cpu) 128{ 129 tid = thread_num; 130 cpu = thread_cpu; 131} 132 133template <class Impl> 134void 135FullO3CPU<Impl>::DeallocateContextEvent::process() 136{ 137 cpu->deactivateThread(tid); 138 cpu->removeThread(tid); 139} 140 141template <class Impl> 142const char * 143FullO3CPU<Impl>::DeallocateContextEvent::description() 144{ 145 return "FullO3CPU \"Deallocate Context\" event"; 146} 147 148template <class Impl> 149FullO3CPU<Impl>::FullO3CPU(Params *params) 150 : BaseO3CPU(params), 151 tickEvent(this), 152 removeInstsThisCycle(false), 153 fetch(params), 154 decode(params), 155 rename(params), 156 iew(params), 157 commit(params), 158 159 regFile(params->numPhysIntRegs, params->numPhysFloatRegs), 160 161 freeList(params->numberOfThreads, 162 TheISA::NumIntRegs, params->numPhysIntRegs, 163 TheISA::NumFloatRegs, params->numPhysFloatRegs), 164 165 rob(params->numROBEntries, params->squashWidth, 166 params->smtROBPolicy, params->smtROBThreshold, 167 params->numberOfThreads), 168 169 scoreboard(params->numberOfThreads, 170 TheISA::NumIntRegs, params->numPhysIntRegs, 171 TheISA::NumFloatRegs, params->numPhysFloatRegs, 172 TheISA::NumMiscRegs * number_of_threads, 173 TheISA::ZeroReg), 174 175 timeBuffer(params->backComSize, params->forwardComSize), 176 fetchQueue(params->backComSize, params->forwardComSize), 177 decodeQueue(params->backComSize, params->forwardComSize), 178 renameQueue(params->backComSize, params->forwardComSize), 179 iewQueue(params->backComSize, params->forwardComSize), 180 activityRec(NumStages, 181 params->backComSize + params->forwardComSize, 182 params->activity), 183 184 globalSeqNum(1), 185#if FULL_SYSTEM 186 system(params->system), 187 physmem(system->physmem), 188#endif // FULL_SYSTEM 189 mem(params->mem), 190 drainCount(0), 191 deferRegistration(params->deferRegistration), 192 numThreads(number_of_threads) 193{ 194 _status = Idle; 195 196 checker = NULL; 197 198 if (params->checker) { 199#if USE_CHECKER 200 BaseCPU *temp_checker = params->checker; 201 checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); 202 checker->setMemory(mem); 203#if FULL_SYSTEM 204 checker->setSystem(params->system); 205#endif 206#else 207 panic("Checker enabled but not compiled in!"); 208#endif // USE_CHECKER 209 } 210 211#if !FULL_SYSTEM 212 thread.resize(number_of_threads); 213 tids.resize(number_of_threads); 214#endif 215 216 // The stages also need their CPU pointer setup. However this 217 // must be done at the upper level CPU because they have pointers 218 // to the upper level CPU, and not this FullO3CPU. 219 220 // Set up Pointers to the activeThreads list for each stage 221 fetch.setActiveThreads(&activeThreads); 222 decode.setActiveThreads(&activeThreads); 223 rename.setActiveThreads(&activeThreads); 224 iew.setActiveThreads(&activeThreads); 225 commit.setActiveThreads(&activeThreads); 226 227 // Give each of the stages the time buffer they will use. 228 fetch.setTimeBuffer(&timeBuffer); 229 decode.setTimeBuffer(&timeBuffer); 230 rename.setTimeBuffer(&timeBuffer); 231 iew.setTimeBuffer(&timeBuffer); 232 commit.setTimeBuffer(&timeBuffer); 233 234 // Also setup each of the stages' queues. 235 fetch.setFetchQueue(&fetchQueue); 236 decode.setFetchQueue(&fetchQueue); 237 commit.setFetchQueue(&fetchQueue); 238 decode.setDecodeQueue(&decodeQueue); 239 rename.setDecodeQueue(&decodeQueue); 240 rename.setRenameQueue(&renameQueue); 241 iew.setRenameQueue(&renameQueue); 242 iew.setIEWQueue(&iewQueue); 243 commit.setIEWQueue(&iewQueue); 244 commit.setRenameQueue(&renameQueue); 245 246 commit.setIEWStage(&iew); 247 rename.setIEWStage(&iew); 248 rename.setCommitStage(&commit); 249 250#if !FULL_SYSTEM 251 int active_threads = params->workload.size(); 252 253 if (active_threads > Impl::MaxThreads) { 254 panic("Workload Size too large. Increase the 'MaxThreads'" 255 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " 256 "edit your workload size."); 257 } 258#else 259 int active_threads = 1; 260#endif 261 262 //Make Sure That this a Valid Architeture 263 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); 264 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); 265 266 rename.setScoreboard(&scoreboard); 267 iew.setScoreboard(&scoreboard); 268 269 // Setup the rename map for whichever stages need it. 270 PhysRegIndex lreg_idx = 0; 271 PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs 272 273 for (int tid=0; tid < numThreads; tid++) { 274 bool bindRegs = (tid <= active_threads - 1); 275 276 commitRenameMap[tid].init(TheISA::NumIntRegs, 277 params->numPhysIntRegs, 278 lreg_idx, //Index for Logical. Regs 279 280 TheISA::NumFloatRegs, 281 params->numPhysFloatRegs, 282 freg_idx, //Index for Float Regs 283 284 TheISA::NumMiscRegs, 285 286 TheISA::ZeroReg, 287 TheISA::ZeroReg, 288 289 tid, 290 false); 291 292 renameMap[tid].init(TheISA::NumIntRegs, 293 params->numPhysIntRegs, 294 lreg_idx, //Index for Logical. Regs 295 296 TheISA::NumFloatRegs, 297 params->numPhysFloatRegs, 298 freg_idx, //Index for Float Regs 299 300 TheISA::NumMiscRegs, 301 302 TheISA::ZeroReg, 303 TheISA::ZeroReg, 304 305 tid, 306 bindRegs); 307 } 308 309 rename.setRenameMap(renameMap); 310 commit.setRenameMap(commitRenameMap); 311 312 // Give renameMap & rename stage access to the freeList; 313 for (int i=0; i < numThreads; i++) { 314 renameMap[i].setFreeList(&freeList); 315 } 316 rename.setFreeList(&freeList); 317 318 // Setup the ROB for whichever stages need it. 319 commit.setROB(&rob); 320 321 lastRunningCycle = curTick; 322 323 lastActivatedCycle = -1; 324 325 // Give renameMap & rename stage access to the freeList; 326 //for (int i=0; i < numThreads; i++) { 327 //globalSeqNum[i] = 1; 328 //} 329 330 contextSwitch = false; 331} 332 333template <class Impl> 334FullO3CPU<Impl>::~FullO3CPU() 335{ 336} 337 338template <class Impl> 339void 340FullO3CPU<Impl>::fullCPURegStats() 341{ 342 BaseO3CPU::regStats(); 343 344 // Register any of the O3CPU's stats here. 345 timesIdled 346 .name(name() + ".timesIdled") 347 .desc("Number of times that the entire CPU went into an idle state and" 348 " unscheduled itself") 349 .prereq(timesIdled); 350 351 idleCycles 352 .name(name() + ".idleCycles") 353 .desc("Total number of cycles that the CPU has spent unscheduled due " 354 "to idling") 355 .prereq(idleCycles); 356 357 // Number of Instructions simulated 358 // -------------------------------- 359 // Should probably be in Base CPU but need templated 360 // MaxThreads so put in here instead 361 committedInsts 362 .init(numThreads) 363 .name(name() + ".committedInsts") 364 .desc("Number of Instructions Simulated"); 365 366 totalCommittedInsts 367 .name(name() + ".committedInsts_total") 368 .desc("Number of Instructions Simulated"); 369 370 cpi 371 .name(name() + ".cpi") 372 .desc("CPI: Cycles Per Instruction") 373 .precision(6); 374 cpi = simTicks / committedInsts; 375 376 totalCpi 377 .name(name() + ".cpi_total") 378 .desc("CPI: Total CPI of All Threads") 379 .precision(6); 380 totalCpi = simTicks / totalCommittedInsts; 381 382 ipc 383 .name(name() + ".ipc") 384 .desc("IPC: Instructions Per Cycle") 385 .precision(6); 386 ipc = committedInsts / simTicks; 387 388 totalIpc 389 .name(name() + ".ipc_total") 390 .desc("IPC: Total IPC of All Threads") 391 .precision(6); 392 totalIpc = totalCommittedInsts / simTicks; 393 394} 395 396template <class Impl> 397Port * 398FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) 399{ 400 if (if_name == "dcache_port") 401 return iew.getDcachePort(); 402 else if (if_name == "icache_port") 403 return fetch.getIcachePort(); 404 else 405 panic("No Such Port\n"); 406} 407 408template <class Impl> 409void 410FullO3CPU<Impl>::tick() 411{ 412 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); 413 414 ++numCycles; 415 416// activity = false; 417 418 //Tick each of the stages 419 fetch.tick(); 420 421 decode.tick(); 422 423 rename.tick(); 424 425 iew.tick(); 426 427 commit.tick(); 428 429#if !FULL_SYSTEM 430 doContextSwitch(); 431#endif 432 433 // Now advance the time buffers 434 timeBuffer.advance(); 435 436 fetchQueue.advance(); 437 decodeQueue.advance(); 438 renameQueue.advance(); 439 iewQueue.advance(); 440 441 activityRec.advance(); 442 443 if (removeInstsThisCycle) { 444 cleanUpRemovedInsts(); 445 } 446 447 if (!tickEvent.scheduled()) { 448 if (_status == SwitchedOut || 449 getState() == SimObject::Drained) { 450 // increment stat 451 lastRunningCycle = curTick; 452 } else if (!activityRec.active()) { 453 lastRunningCycle = curTick; 454 timesIdled++; 455 } else { 456 tickEvent.schedule(curTick + cycles(1)); 457 } 458 } 459 460#if !FULL_SYSTEM 461 updateThreadPriority(); 462#endif 463 464} 465 466template <class Impl> 467void 468FullO3CPU<Impl>::init() 469{ 470 if (!deferRegistration) { 471 registerThreadContexts(); 472 } 473 474 // Set inSyscall so that the CPU doesn't squash when initially 475 // setting up registers. 476 for (int i = 0; i < number_of_threads; ++i) 477 thread[i]->inSyscall = true; 478 479 for (int tid=0; tid < number_of_threads; tid++) { 480#if FULL_SYSTEM 481 ThreadContext *src_tc = threadContexts[tid]; 482#else 483 ThreadContext *src_tc = thread[tid]->getTC(); 484#endif 485 // Threads start in the Suspended State 486 if (src_tc->status() != ThreadContext::Suspended) { 487 continue; 488 } 489 490#if FULL_SYSTEM 491 TheISA::initCPU(src_tc, src_tc->readCpuId()); 492#endif 493 } 494 495 // Clear inSyscall. 496 for (int i = 0; i < number_of_threads; ++i) 497 thread[i]->inSyscall = false; 498 499 // Initialize stages. 500 fetch.initStage(); 501 iew.initStage(); 502 rename.initStage(); 503 commit.initStage(); 504 505 commit.setThreads(thread); 506} 507 508template <class Impl> 509void 510FullO3CPU<Impl>::activateThread(unsigned tid) 511{ 512 list<unsigned>::iterator isActive = find( 513 activeThreads.begin(), activeThreads.end(), tid); 514 515 if (isActive == activeThreads.end()) { 516 DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", 517 tid); 518 519 activeThreads.push_back(tid); 520 } 521} 522 523template <class Impl> 524void 525FullO3CPU<Impl>::deactivateThread(unsigned tid) 526{ 527 //Remove From Active List, if Active 528 list<unsigned>::iterator thread_it = 529 find(activeThreads.begin(), activeThreads.end(), tid); 530 531 if (thread_it != activeThreads.end()) { 532 DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", 533 tid); 534 activeThreads.erase(thread_it); 535 } 536} 537 538template <class Impl> 539void 540FullO3CPU<Impl>::activateContext(int tid, int delay) 541{ 542 // Needs to set each stage to running as well. 543 if (delay){ 544 DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " 545 "on cycle %d\n", tid, curTick + cycles(delay)); 546 scheduleActivateThreadEvent(tid, delay); 547 } else { 548 activateThread(tid); 549 } 550 551 if(lastActivatedCycle < curTick) { 552 scheduleTickEvent(delay); 553 554 // Be sure to signal that there's some activity so the CPU doesn't 555 // deschedule itself. 556 activityRec.activity(); 557 fetch.wakeFromQuiesce(); 558 559 lastActivatedCycle = curTick; 560 561 _status = Running; 562 } 563} 564 565template <class Impl> 566void 567FullO3CPU<Impl>::deallocateContext(int tid, int delay) 568{ 569 // Schedule removal of thread data from CPU 570 if (delay){ 571 DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " 572 "on cycle %d\n", tid, curTick + cycles(delay)); 573 scheduleDeallocateContextEvent(tid, delay); 574 } else { 575 deactivateThread(tid); 576 removeThread(tid); 577 } 578} 579 580template <class Impl> 581void 582FullO3CPU<Impl>::suspendContext(int tid) 583{ 584 DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); 585 deactivateThread(tid); 586 if (activeThreads.size() == 0) 587 unscheduleTickEvent(); 588 _status = Idle; 589} 590 591template <class Impl> 592void 593FullO3CPU<Impl>::haltContext(int tid) 594{ 595 //For now, this is the same as deallocate 596 DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); 597 deallocateContext(tid, 1); 598} 599 600template <class Impl> 601void 602FullO3CPU<Impl>::insertThread(unsigned tid) 603{ 604 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); 605 // Will change now that the PC and thread state is internal to the CPU 606 // and not in the ThreadContext. 607#if FULL_SYSTEM 608 ThreadContext *src_tc = system->threadContexts[tid]; 609#else 610 ThreadContext *src_tc = tcBase(tid); 611#endif 612 613 //Bind Int Regs to Rename Map 614 for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { 615 PhysRegIndex phys_reg = freeList.getIntReg(); 616 617 renameMap[tid].setEntry(ireg,phys_reg); 618 scoreboard.setReg(phys_reg); 619 } 620 621 //Bind Float Regs to Rename Map 622 for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { 623 PhysRegIndex phys_reg = freeList.getFloatReg(); 624 625 renameMap[tid].setEntry(freg,phys_reg); 626 scoreboard.setReg(phys_reg); 627 } 628 629 //Copy Thread Data Into RegFile 630 //this->copyFromTC(tid); 631 632 //Set PC/NPC/NNPC 633 setPC(src_tc->readPC(), tid); 634 setNextPC(src_tc->readNextPC(), tid); 635#if ISA_HAS_DELAY_SLOT 636 setNextNPC(src_tc->readNextNPC(), tid); 637#endif 638 639 src_tc->setStatus(ThreadContext::Active); 640 641 activateContext(tid,1); 642 643 //Reset ROB/IQ/LSQ Entries 644 commit.rob->resetEntries(); 645 iew.resetEntries(); 646} 647 648template <class Impl> 649void 650FullO3CPU<Impl>::removeThread(unsigned tid) 651{ 652 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); 653 654 // Copy Thread Data From RegFile 655 // If thread is suspended, it might be re-allocated 656 //this->copyToTC(tid); 657 658 // Unbind Int Regs from Rename Map 659 for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { 660 PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); 661 662 scoreboard.unsetReg(phys_reg); 663 freeList.addReg(phys_reg); 664 } 665 666 // Unbind Float Regs from Rename Map 667 for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { 668 PhysRegIndex phys_reg = renameMap[tid].lookup(freg); 669 670 scoreboard.unsetReg(phys_reg); 671 freeList.addReg(phys_reg); 672 } 673 674 // Squash Throughout Pipeline 675 InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; 676 fetch.squash(0, squash_seq_num, true, tid); 677 decode.squash(tid); 678 rename.squash(squash_seq_num, tid); 679 iew.squash(tid); 680 commit.rob->squash(squash_seq_num, tid); 681 682 assert(iew.ldstQueue.getCount(tid) == 0); 683 684 // Reset ROB/IQ/LSQ Entries 685 if (activeThreads.size() >= 1) { 686 commit.rob->resetEntries(); 687 iew.resetEntries(); 688 } 689} 690 691 692template <class Impl> 693void 694FullO3CPU<Impl>::activateWhenReady(int tid) 695{ 696 DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming" 697 "(e.g. PhysRegs/ROB/IQ/LSQ) \n", 698 tid); 699 700 bool ready = true; 701 702 if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { 703 DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " 704 "Phys. Int. Regs.\n", 705 tid); 706 ready = false; 707 } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { 708 DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " 709 "Phys. Float. Regs.\n", 710 tid); 711 ready = false; 712 } else if (commit.rob->numFreeEntries() >= 713 commit.rob->entryAmount(activeThreads.size() + 1)) { 714 DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " 715 "ROB entries.\n", 716 tid); 717 ready = false; 718 } else if (iew.instQueue.numFreeEntries() >= 719 iew.instQueue.entryAmount(activeThreads.size() + 1)) { 720 DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " 721 "IQ entries.\n", 722 tid); 723 ready = false; 724 } else if (iew.ldstQueue.numFreeEntries() >= 725 iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { 726 DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " 727 "LSQ entries.\n", 728 tid); 729 ready = false; 730 } 731 732 if (ready) { 733 insertThread(tid); 734 735 contextSwitch = false; 736 737 cpuWaitList.remove(tid); 738 } else { 739 suspendContext(tid); 740 741 //blocks fetch 742 contextSwitch = true; 743 744 //@todo: dont always add to waitlist 745 //do waitlist 746 cpuWaitList.push_back(tid); 747 } 748} 749 750template <class Impl> 751void 752FullO3CPU<Impl>::serialize(std::ostream &os) 753{ 754 SimObject::State so_state = SimObject::getState(); 755 SERIALIZE_ENUM(so_state); 756 BaseCPU::serialize(os); 757 nameOut(os, csprintf("%s.tickEvent", name())); 758 tickEvent.serialize(os); 759 760 // Use SimpleThread's ability to checkpoint to make it easier to 761 // write out the registers. Also make this static so it doesn't 762 // get instantiated multiple times (causes a panic in statistics). 763 static SimpleThread temp; 764 765 for (int i = 0; i < thread.size(); i++) { 766 nameOut(os, csprintf("%s.xc.%i", name(), i)); 767 temp.copyTC(thread[i]->getTC()); 768 temp.serialize(os); 769 } 770} 771 772template <class Impl> 773void 774FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) 775{ 776 SimObject::State so_state; 777 UNSERIALIZE_ENUM(so_state); 778 BaseCPU::unserialize(cp, section); 779 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 780 781 // Use SimpleThread's ability to checkpoint to make it easier to 782 // read in the registers. Also make this static so it doesn't 783 // get instantiated multiple times (causes a panic in statistics). 784 static SimpleThread temp; 785 786 for (int i = 0; i < thread.size(); i++) { 787 temp.copyTC(thread[i]->getTC()); 788 temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); 789 thread[i]->getTC()->copyArchRegs(temp.getTC()); 790 } 791} 792 793template <class Impl> 794unsigned int 795FullO3CPU<Impl>::drain(Event *drain_event) 796{ 797 DPRINTF(O3CPU, "Switching out\n"); 798 BaseCPU::switchOut(_sampler); 799 drainCount = 0; 800 fetch.drain(); 801 decode.drain(); 802 rename.drain(); 803 iew.drain(); 804 commit.drain(); 805 806 // Wake the CPU and record activity so everything can drain out if 807 // the CPU was not able to immediately drain. 808 if (getState() != SimObject::Drained) { 809 // A bit of a hack...set the drainEvent after all the drain() 810 // calls have been made, that way if all of the stages drain 811 // immediately, the signalDrained() function knows not to call 812 // process on the drain event. 813 drainEvent = drain_event; 814 815 wakeCPU(); 816 activityRec.activity(); 817 818 return 1; 819 } else { 820 return 0; 821 } 822} 823 824template <class Impl> 825void 826FullO3CPU<Impl>::resume() 827{ 828 assert(system->getMemoryMode() == System::Timing); 829 fetch.resume(); 830 decode.resume(); 831 rename.resume(); 832 iew.resume(); 833 commit.resume(); 834 835 changeState(SimObject::Running); 836 837 if (_status == SwitchedOut || _status == Idle) 838 return; 839 840 if (!tickEvent.scheduled()) 841 tickEvent.schedule(curTick); 842 _status = Running; 843} 844 845template <class Impl> 846void 847FullO3CPU<Impl>::signalDrained() 848{ 849 if (++drainCount == NumStages) { 850 if (tickEvent.scheduled()) 851 tickEvent.squash(); 852 853 changeState(SimObject::Drained); 854 855 if (drainEvent) { 856 drainEvent->process(); 857 drainEvent = NULL; 858 } 859 } 860 assert(drainCount <= 5); 861} 862 863template <class Impl> 864void 865FullO3CPU<Impl>::switchOut() 866{ 867 fetch.switchOut(); 868 rename.switchOut(); 869 iew.switchOut(); 870 commit.switchOut(); 871 instList.clear(); 872 while (!removeList.empty()) { 873 removeList.pop(); 874 } 875 876 _status = SwitchedOut; 877#if USE_CHECKER 878 if (checker) 879 checker->switchOut(); 880#endif 881} 882 883template <class Impl> 884void 885FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) 886{ 887 // Flush out any old data from the time buffers. 888 for (int i = 0; i < timeBuffer.getSize(); ++i) { 889 timeBuffer.advance(); 890 fetchQueue.advance(); 891 decodeQueue.advance(); 892 renameQueue.advance(); 893 iewQueue.advance(); 894 } 895 896 activityRec.reset(); 897 898 BaseCPU::takeOverFrom(oldCPU); 899 900 fetch.takeOverFrom(); 901 decode.takeOverFrom(); 902 rename.takeOverFrom(); 903 iew.takeOverFrom(); 904 commit.takeOverFrom(); 905 906 assert(!tickEvent.scheduled()); 907 908 // @todo: Figure out how to properly select the tid to put onto 909 // the active threads list. 910 int tid = 0; 911 912 list<unsigned>::iterator isActive = find( 913 activeThreads.begin(), activeThreads.end(), tid); 914 915 if (isActive == activeThreads.end()) { 916 //May Need to Re-code this if the delay variable is the delay 917 //needed for thread to activate 918 DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", 919 tid); 920 921 activeThreads.push_back(tid); 922 } 923 924 // Set all statuses to active, schedule the CPU's tick event. 925 // @todo: Fix up statuses so this is handled properly 926 for (int i = 0; i < threadContexts.size(); ++i) { 927 ThreadContext *tc = threadContexts[i]; 928 if (tc->status() == ThreadContext::Active && _status != Running) { 929 _status = Running; 930 tickEvent.schedule(curTick); 931 } 932 } 933 if (!tickEvent.scheduled()) 934 tickEvent.schedule(curTick); 935} 936 937template <class Impl> 938void 939FullO3CPU<Impl>::serialize(std::ostream &os) 940{ 941 BaseCPU::serialize(os); 942 nameOut(os, csprintf("%s.tickEvent", name())); 943 tickEvent.serialize(os); 944 945 // Use SimpleThread's ability to checkpoint to make it easier to 946 // write out the registers. Also make this static so it doesn't 947 // get instantiated multiple times (causes a panic in statistics). 948 static CPUExecContext temp; 949 950 for (int i = 0; i < thread.size(); i++) { 951 nameOut(os, csprintf("%s.xc.%i", name(), i)); 952 temp.copyXC(thread[i]->getXCProxy()); 953 temp.serialize(os); 954 } 955} 956 957template <class Impl> 958void 959FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) 960{ 961 BaseCPU::unserialize(cp, section); 962 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 963 964 // Use SimpleThread's ability to checkpoint to make it easier to 965 // read in the registers. Also make this static so it doesn't 966 // get instantiated multiple times (causes a panic in statistics). 967 static CPUExecContext temp; 968 969 for (int i = 0; i < thread.size(); i++) { 970 temp.copyXC(thread[i]->getXCProxy()); 971 temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); 972 thread[i]->getXCProxy()->copyArchRegs(temp.getProxy()); 973 } 974} 975 976template <class Impl> 977uint64_t 978FullO3CPU<Impl>::readIntReg(int reg_idx) 979{ 980 return regFile.readIntReg(reg_idx); 981} 982 983template <class Impl> 984FloatReg 985FullO3CPU<Impl>::readFloatReg(int reg_idx, int width) 986{ 987 return regFile.readFloatReg(reg_idx, width); 988} 989 990template <class Impl> 991FloatReg 992FullO3CPU<Impl>::readFloatReg(int reg_idx) 993{ 994 return regFile.readFloatReg(reg_idx); 995} 996 997template <class Impl> 998FloatRegBits 999FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width) 1000{ 1001 return regFile.readFloatRegBits(reg_idx, width); 1002} 1003 1004template <class Impl> 1005FloatRegBits 1006FullO3CPU<Impl>::readFloatRegBits(int reg_idx) 1007{ 1008 return regFile.readFloatRegBits(reg_idx); 1009} 1010 1011template <class Impl> 1012void 1013FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) 1014{ 1015 regFile.setIntReg(reg_idx, val); 1016} 1017 1018template <class Impl> 1019void 1020FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) 1021{ 1022 regFile.setFloatReg(reg_idx, val, width); 1023} 1024 1025template <class Impl> 1026void 1027FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val) 1028{ 1029 regFile.setFloatReg(reg_idx, val); 1030} 1031 1032template <class Impl> 1033void 1034FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, int width) 1035{ 1036 regFile.setFloatRegBits(reg_idx, val, width); 1037} 1038 1039template <class Impl> 1040void 1041FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) 1042{ 1043 regFile.setFloatRegBits(reg_idx, val); 1044} 1045 1046template <class Impl> 1047uint64_t 1048FullO3CPU<Impl>::readArchIntReg(int reg_idx, unsigned tid) 1049{ 1050 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); 1051 1052 return regFile.readIntReg(phys_reg); 1053} 1054 1055template <class Impl> 1056float 1057FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid) 1058{ 1059 int idx = reg_idx + TheISA::FP_Base_DepTag; 1060 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1061 1062 return regFile.readFloatReg(phys_reg); 1063} 1064 1065template <class Impl> 1066double 1067FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid) 1068{ 1069 int idx = reg_idx + TheISA::FP_Base_DepTag; 1070 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1071 1072 return regFile.readFloatReg(phys_reg, 64); 1073} 1074 1075template <class Impl> 1076uint64_t 1077FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid) 1078{ 1079 int idx = reg_idx + TheISA::FP_Base_DepTag; 1080 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1081 1082 return regFile.readFloatRegBits(phys_reg); 1083} 1084 1085template <class Impl> 1086void 1087FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, unsigned tid) 1088{ 1089 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); 1090 1091 regFile.setIntReg(phys_reg, val); 1092} 1093 1094template <class Impl> 1095void 1096FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) 1097{ 1098 int idx = reg_idx + TheISA::FP_Base_DepTag; 1099 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1100 1101 regFile.setFloatReg(phys_reg, val); 1102} 1103 1104template <class Impl> 1105void 1106FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) 1107{ 1108 int idx = reg_idx + TheISA::FP_Base_DepTag; 1109 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1110 1111 regFile.setFloatReg(phys_reg, val, 64); 1112} 1113 1114template <class Impl> 1115void 1116FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) 1117{ 1118 int idx = reg_idx + TheISA::FP_Base_DepTag; 1119 PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); 1120 1121 regFile.setFloatRegBits(phys_reg, val); 1122} 1123 1124template <class Impl> 1125uint64_t 1126FullO3CPU<Impl>::readPC(unsigned tid) 1127{ 1128 return commit.readPC(tid); 1129} 1130 1131template <class Impl> 1132void 1133FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid) 1134{ 1135 commit.setPC(new_PC, tid); 1136} 1137 1138template <class Impl> 1139uint64_t 1140FullO3CPU<Impl>::readNextPC(unsigned tid) 1141{ 1142 return commit.readNextPC(tid); 1143} 1144 1145template <class Impl> 1146void 1147FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) 1148{ 1149 commit.setNextPC(val, tid); 1150} 1151 1152template <class Impl> 1153uint64_t 1154FullO3CPU<Impl>::readNextNPC(unsigned tid) 1155{ 1156 return commit.readNextNPC(tid); 1157} 1158 1159template <class Impl> 1160void 1161FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid) 1162{ 1163 commit.setNextNPC(val, tid); 1164} 1165 1166template <class Impl> 1167typename FullO3CPU<Impl>::ListIt 1168FullO3CPU<Impl>::addInst(DynInstPtr &inst) 1169{ 1170 instList.push_back(inst); 1171 1172 return --(instList.end()); 1173} 1174 1175template <class Impl> 1176void 1177FullO3CPU<Impl>::instDone(unsigned tid) 1178{ 1179 // Keep an instruction count. 1180 thread[tid]->numInst++; 1181 thread[tid]->numInsts++; 1182 committedInsts[tid]++; 1183 totalCommittedInsts++; 1184 1185 // Check for instruction-count-based events. 1186 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); 1187} 1188 1189template <class Impl> 1190void 1191FullO3CPU<Impl>::addToRemoveList(DynInstPtr &inst) 1192{ 1193 removeInstsThisCycle = true; 1194 1195 removeList.push(inst->getInstListIt()); 1196} 1197 1198template <class Impl> 1199void 1200FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) 1201{ 1202 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x " 1203 "[sn:%lli]\n", 1204 inst->threadNumber, inst->readPC(), inst->seqNum); 1205 1206 removeInstsThisCycle = true; 1207 1208 // Remove the front instruction. 1209 removeList.push(inst->getInstListIt()); 1210} 1211 1212template <class Impl> 1213void 1214FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid, 1215 bool squash_delay_slot, 1216 const InstSeqNum &delay_slot_seq_num) 1217{ 1218 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" 1219 " list.\n", tid); 1220 1221 ListIt end_it; 1222 1223 bool rob_empty = false; 1224 1225 if (instList.empty()) { 1226 return; 1227 } else if (rob.isEmpty(/*tid*/)) { 1228 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); 1229 end_it = instList.begin(); 1230 rob_empty = true; 1231 } else { 1232 end_it = (rob.readTailInst(tid))->getInstListIt(); 1233 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); 1234 } 1235 1236 removeInstsThisCycle = true; 1237 1238 ListIt inst_it = instList.end(); 1239 1240 inst_it--; 1241 1242 // Walk through the instruction list, removing any instructions 1243 // that were inserted after the given instruction iterator, end_it. 1244 while (inst_it != end_it) { 1245 assert(!instList.empty()); 1246 1247#if ISA_HAS_DELAY_SLOT 1248 if(!squash_delay_slot && 1249 delay_slot_seq_num >= (*inst_it)->seqNum) { 1250 break; 1251 } 1252#endif 1253 squashInstIt(inst_it, tid); 1254 1255 inst_it--; 1256 } 1257 1258 // If the ROB was empty, then we actually need to remove the first 1259 // instruction as well. 1260 if (rob_empty) { 1261 squashInstIt(inst_it, tid); 1262 } 1263} 1264 1265template <class Impl> 1266void 1267FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, 1268 unsigned tid) 1269{ 1270 assert(!instList.empty()); 1271 1272 removeInstsThisCycle = true; 1273 1274 ListIt inst_iter = instList.end(); 1275 1276 inst_iter--; 1277 1278 DPRINTF(O3CPU, "Deleting instructions from instruction " 1279 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", 1280 tid, seq_num, (*inst_iter)->seqNum); 1281 1282 while ((*inst_iter)->seqNum > seq_num) { 1283 1284 bool break_loop = (inst_iter == instList.begin()); 1285 1286 squashInstIt(inst_iter, tid); 1287 1288 inst_iter--; 1289 1290 if (break_loop) 1291 break; 1292 } 1293} 1294 1295template <class Impl> 1296inline void 1297FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid) 1298{ 1299 if ((*instIt)->threadNumber == tid) { 1300 DPRINTF(O3CPU, "Squashing instruction, " 1301 "[tid:%i] [sn:%lli] PC %#x\n", 1302 (*instIt)->threadNumber, 1303 (*instIt)->seqNum, 1304 (*instIt)->readPC()); 1305 1306 // Mark it as squashed. 1307 (*instIt)->setSquashed(); 1308 1309 // @todo: Formulate a consistent method for deleting 1310 // instructions from the instruction list 1311 // Remove the instruction from the list. 1312 removeList.push(instIt); 1313 } 1314} 1315 1316template <class Impl> 1317void 1318FullO3CPU<Impl>::cleanUpRemovedInsts() 1319{ 1320 while (!removeList.empty()) { 1321 DPRINTF(O3CPU, "Removing instruction, " 1322 "[tid:%i] [sn:%lli] PC %#x\n", 1323 (*removeList.front())->threadNumber, 1324 (*removeList.front())->seqNum, 1325 (*removeList.front())->readPC()); 1326 1327 instList.erase(removeList.front()); 1328 1329 removeList.pop(); 1330 } 1331 1332 removeInstsThisCycle = false; 1333} 1334/* 1335template <class Impl> 1336void 1337FullO3CPU<Impl>::removeAllInsts() 1338{ 1339 instList.clear(); 1340} 1341*/ 1342template <class Impl> 1343void 1344FullO3CPU<Impl>::dumpInsts() 1345{ 1346 int num = 0; 1347 1348 ListIt inst_list_it = instList.begin(); 1349 1350 cprintf("Dumping Instruction List\n"); 1351 1352 while (inst_list_it != instList.end()) { 1353 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" 1354 "Squashed:%i\n\n", 1355 num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber, 1356 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), 1357 (*inst_list_it)->isSquashed()); 1358 inst_list_it++; 1359 ++num; 1360 } 1361} 1362/* 1363template <class Impl> 1364void 1365FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst) 1366{ 1367 iew.wakeDependents(inst); 1368} 1369*/ 1370template <class Impl> 1371void 1372FullO3CPU<Impl>::wakeCPU() 1373{ 1374 if (activityRec.active() || tickEvent.scheduled()) { 1375 DPRINTF(Activity, "CPU already running.\n"); 1376 return; 1377 } 1378 1379 DPRINTF(Activity, "Waking up CPU\n"); 1380 1381 idleCycles += (curTick - 1) - lastRunningCycle; 1382 1383 tickEvent.schedule(curTick); 1384} 1385 1386template <class Impl> 1387int 1388FullO3CPU<Impl>::getFreeTid() 1389{ 1390 for (int i=0; i < numThreads; i++) { 1391 if (!tids[i]) { 1392 tids[i] = true; 1393 return i; 1394 } 1395 } 1396 1397 return -1; 1398} 1399 1400template <class Impl> 1401void 1402FullO3CPU<Impl>::doContextSwitch() 1403{ 1404 if (contextSwitch) { 1405 1406 //ADD CODE TO DEACTIVE THREAD HERE (???) 1407 1408 for (int tid=0; tid < cpuWaitList.size(); tid++) { 1409 activateWhenReady(tid); 1410 } 1411 1412 if (cpuWaitList.size() == 0) 1413 contextSwitch = true; 1414 } 1415} 1416 1417template <class Impl> 1418void 1419FullO3CPU<Impl>::updateThreadPriority() 1420{ 1421 if (activeThreads.size() > 1) 1422 { 1423 //DEFAULT TO ROUND ROBIN SCHEME 1424 //e.g. Move highest priority to end of thread list 1425 list<unsigned>::iterator list_begin = activeThreads.begin(); 1426 list<unsigned>::iterator list_end = activeThreads.end(); 1427 1428 unsigned high_thread = *list_begin; 1429 1430 activeThreads.erase(list_begin); 1431 1432 activeThreads.push_back(high_thread); 1433 } 1434} 1435 1436// Forward declaration of FullO3CPU. 1437template class FullO3CPU<O3CPUImpl>; 1438