inst_queue.hh revision 9184
1/* 2 * Copyright (c) 2011 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 */ 42 43#ifndef __CPU_O3_INST_QUEUE_HH__ 44#define __CPU_O3_INST_QUEUE_HH__ 45 46#include <list> 47#include <map> 48#include <queue> 49#include <vector> 50 51#include "base/statistics.hh" 52#include "base/types.hh" 53#include "cpu/o3/dep_graph.hh" 54#include "cpu/inst_seq.hh" 55#include "cpu/op_class.hh" 56#include "cpu/timebuf.hh" 57#include "sim/eventq.hh" 58 59struct DerivO3CPUParams; 60class FUPool; 61class MemInterface; 62 63/** 64 * A standard instruction queue class. It holds ready instructions, in 65 * order, in seperate priority queues to facilitate the scheduling of 66 * instructions. The IQ uses a separate linked list to track dependencies. 67 * Similar to the rename map and the free list, it expects that 68 * floating point registers have their indices start after the integer 69 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 70 * and 96-191 are fp). This remains true even for both logical and 71 * physical register indices. The IQ depends on the memory dependence unit to 72 * track when memory operations are ready in terms of ordering; register 73 * dependencies are tracked normally. Right now the IQ also handles the 74 * execution timing; this is mainly to allow back-to-back scheduling without 75 * requiring IEW to be able to peek into the IQ. At the end of the execution 76 * latency, the instruction is put into the queue to execute, where it will 77 * have the execute() function called on it. 78 * @todo: Make IQ able to handle multiple FU pools. 79 */ 80template <class Impl> 81class InstructionQueue 82{ 83 public: 84 //Typedefs from the Impl. 85 typedef typename Impl::O3CPU O3CPU; 86 typedef typename Impl::DynInstPtr DynInstPtr; 87 88 typedef typename Impl::CPUPol::IEW IEW; 89 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 90 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 91 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 92 93 // Typedef of iterator through the list of instructions. 94 typedef typename std::list<DynInstPtr>::iterator ListIt; 95 96 /** FU completion event class. */ 97 class FUCompletion : public Event { 98 private: 99 /** Executing instruction. */ 100 DynInstPtr inst; 101 102 /** Index of the FU used for executing. */ 103 int fuIdx; 104 105 /** Pointer back to the instruction queue. */ 106 InstructionQueue<Impl> *iqPtr; 107 108 /** Should the FU be added to the list to be freed upon 109 * completing this event. 110 */ 111 bool freeFU; 112 113 public: 114 /** Construct a FU completion event. */ 115 FUCompletion(DynInstPtr &_inst, int fu_idx, 116 InstructionQueue<Impl> *iq_ptr); 117 118 virtual void process(); 119 virtual const char *description() const; 120 void setFreeFU() { freeFU = true; } 121 }; 122 123 /** Constructs an IQ. */ 124 InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params); 125 126 /** Destructs the IQ. */ 127 ~InstructionQueue(); 128 129 /** Returns the name of the IQ. */ 130 std::string name() const; 131 132 /** Registers statistics. */ 133 void regStats(); 134 135 /** Resets all instruction queue state. */ 136 void resetState(); 137 138 /** Sets active threads list. */ 139 void setActiveThreads(std::list<ThreadID> *at_ptr); 140 141 /** Sets the timer buffer between issue and execute. */ 142 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 143 144 /** Sets the global time buffer. */ 145 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 146 147 /** Switches out the instruction queue. */ 148 void switchOut(); 149 150 /** Takes over execution from another CPU's thread. */ 151 void takeOverFrom(); 152 153 /** Returns if the IQ is switched out. */ 154 bool isSwitchedOut() { return switchedOut; } 155 156 /** Number of entries needed for given amount of threads. */ 157 int entryAmount(ThreadID num_threads); 158 159 /** Resets max entries for all threads. */ 160 void resetEntries(); 161 162 /** Returns total number of free entries. */ 163 unsigned numFreeEntries(); 164 165 /** Returns number of free entries for a thread. */ 166 unsigned numFreeEntries(ThreadID tid); 167 168 /** Returns whether or not the IQ is full. */ 169 bool isFull(); 170 171 /** Returns whether or not the IQ is full for a specific thread. */ 172 bool isFull(ThreadID tid); 173 174 /** Returns if there are any ready instructions in the IQ. */ 175 bool hasReadyInsts(); 176 177 /** Inserts a new instruction into the IQ. */ 178 void insert(DynInstPtr &new_inst); 179 180 /** Inserts a new, non-speculative instruction into the IQ. */ 181 void insertNonSpec(DynInstPtr &new_inst); 182 183 /** Inserts a memory or write barrier into the IQ to make sure 184 * loads and stores are ordered properly. 185 */ 186 void insertBarrier(DynInstPtr &barr_inst); 187 188 /** Returns the oldest scheduled instruction, and removes it from 189 * the list of instructions waiting to execute. 190 */ 191 DynInstPtr getInstToExecute(); 192 193 /** Returns a memory instruction that was referred due to a delayed DTB 194 * translation if it is now ready to execute. 195 */ 196 DynInstPtr getDeferredMemInstToExecute(); 197 198 /** 199 * Records the instruction as the producer of a register without 200 * adding it to the rest of the IQ. 201 */ 202 void recordProducer(DynInstPtr &inst) 203 { addToProducers(inst); } 204 205 /** Process FU completion event. */ 206 void processFUCompletion(DynInstPtr &inst, int fu_idx); 207 208 /** 209 * Schedules ready instructions, adding the ready ones (oldest first) to 210 * the queue to execute. 211 */ 212 void scheduleReadyInsts(); 213 214 /** Schedules a single specific non-speculative instruction. */ 215 void scheduleNonSpec(const InstSeqNum &inst); 216 217 /** 218 * Commits all instructions up to and including the given sequence number, 219 * for a specific thread. 220 */ 221 void commit(const InstSeqNum &inst, ThreadID tid = 0); 222 223 /** Wakes all dependents of a completed instruction. */ 224 int wakeDependents(DynInstPtr &completed_inst); 225 226 /** Adds a ready memory instruction to the ready list. */ 227 void addReadyMemInst(DynInstPtr &ready_inst); 228 229 /** 230 * Reschedules a memory instruction. It will be ready to issue once 231 * replayMemInst() is called. 232 */ 233 void rescheduleMemInst(DynInstPtr &resched_inst); 234 235 /** Replays a memory instruction. It must be rescheduled first. */ 236 void replayMemInst(DynInstPtr &replay_inst); 237 238 /** Completes a memory operation. */ 239 void completeMemInst(DynInstPtr &completed_inst); 240 241 /** 242 * Defers a memory instruction when its DTB translation incurs a hw 243 * page table walk. 244 */ 245 void deferMemInst(DynInstPtr &deferred_inst); 246 247 /** Indicates an ordering violation between a store and a load. */ 248 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 249 250 /** 251 * Squashes instructions for a thread. Squashing information is obtained 252 * from the time buffer. 253 */ 254 void squash(ThreadID tid); 255 256 /** Returns the number of used entries for a thread. */ 257 unsigned getCount(ThreadID tid) { return count[tid]; }; 258 259 /** Debug function to print all instructions. */ 260 void printInsts(); 261 262 private: 263 /** Does the actual squashing. */ 264 void doSquash(ThreadID tid); 265 266 ///////////////////////// 267 // Various pointers 268 ///////////////////////// 269 270 /** Pointer to the CPU. */ 271 O3CPU *cpu; 272 273 /** Cache interface. */ 274 MemInterface *dcacheInterface; 275 276 /** Pointer to IEW stage. */ 277 IEW *iewStage; 278 279 /** The memory dependence unit, which tracks/predicts memory dependences 280 * between instructions. 281 */ 282 MemDepUnit memDepUnit[Impl::MaxThreads]; 283 284 /** The queue to the execute stage. Issued instructions will be written 285 * into it. 286 */ 287 TimeBuffer<IssueStruct> *issueToExecuteQueue; 288 289 /** The backwards time buffer. */ 290 TimeBuffer<TimeStruct> *timeBuffer; 291 292 /** Wire to read information from timebuffer. */ 293 typename TimeBuffer<TimeStruct>::wire fromCommit; 294 295 /** Function unit pool. */ 296 FUPool *fuPool; 297 298 ////////////////////////////////////// 299 // Instruction lists, ready queues, and ordering 300 ////////////////////////////////////// 301 302 /** List of all the instructions in the IQ (some of which may be issued). */ 303 std::list<DynInstPtr> instList[Impl::MaxThreads]; 304 305 /** List of instructions that are ready to be executed. */ 306 std::list<DynInstPtr> instsToExecute; 307 308 /** List of instructions waiting for their DTB translation to 309 * complete (hw page table walk in progress). 310 */ 311 std::list<DynInstPtr> deferredMemInsts; 312 313 /** 314 * Struct for comparing entries to be added to the priority queue. 315 * This gives reverse ordering to the instructions in terms of 316 * sequence numbers: the instructions with smaller sequence 317 * numbers (and hence are older) will be at the top of the 318 * priority queue. 319 */ 320 struct pqCompare { 321 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 322 { 323 return lhs->seqNum > rhs->seqNum; 324 } 325 }; 326 327 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 328 ReadyInstQueue; 329 330 /** List of ready instructions, per op class. They are separated by op 331 * class to allow for easy mapping to FUs. 332 */ 333 ReadyInstQueue readyInsts[Num_OpClasses]; 334 335 /** List of non-speculative instructions that will be scheduled 336 * once the IQ gets a signal from commit. While it's redundant to 337 * have the key be a part of the value (the sequence number is stored 338 * inside of DynInst), when these instructions are woken up only 339 * the sequence number will be available. Thus it is most efficient to be 340 * able to search by the sequence number alone. 341 */ 342 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 343 344 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 345 346 /** Entry for the list age ordering by op class. */ 347 struct ListOrderEntry { 348 OpClass queueType; 349 InstSeqNum oldestInst; 350 }; 351 352 /** List that contains the age order of the oldest instruction of each 353 * ready queue. Used to select the oldest instruction available 354 * among op classes. 355 * @todo: Might be better to just move these entries around instead 356 * of creating new ones every time the position changes due to an 357 * instruction issuing. Not sure std::list supports this. 358 */ 359 std::list<ListOrderEntry> listOrder; 360 361 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 362 363 /** Tracks if each ready queue is on the age order list. */ 364 bool queueOnList[Num_OpClasses]; 365 366 /** Iterators of each ready queue. Points to their spot in the age order 367 * list. 368 */ 369 ListOrderIt readyIt[Num_OpClasses]; 370 371 /** Add an op class to the age order list. */ 372 void addToOrderList(OpClass op_class); 373 374 /** 375 * Called when the oldest instruction has been removed from a ready queue; 376 * this places that ready queue into the proper spot in the age order list. 377 */ 378 void moveToYoungerInst(ListOrderIt age_order_it); 379 380 DependencyGraph<DynInstPtr> dependGraph; 381 382 ////////////////////////////////////// 383 // Various parameters 384 ////////////////////////////////////// 385 386 /** IQ Resource Sharing Policy */ 387 enum IQPolicy { 388 Dynamic, 389 Partitioned, 390 Threshold 391 }; 392 393 /** IQ sharing policy for SMT. */ 394 IQPolicy iqPolicy; 395 396 /** Number of Total Threads*/ 397 ThreadID numThreads; 398 399 /** Pointer to list of active threads. */ 400 std::list<ThreadID> *activeThreads; 401 402 /** Per Thread IQ count */ 403 unsigned count[Impl::MaxThreads]; 404 405 /** Max IQ Entries Per Thread */ 406 unsigned maxEntries[Impl::MaxThreads]; 407 408 /** Number of free IQ entries left. */ 409 unsigned freeEntries; 410 411 /** The number of entries in the instruction queue. */ 412 unsigned numEntries; 413 414 /** The total number of instructions that can be issued in one cycle. */ 415 unsigned totalWidth; 416 417 /** The number of physical registers in the CPU. */ 418 unsigned numPhysRegs; 419 420 /** The number of physical integer registers in the CPU. */ 421 unsigned numPhysIntRegs; 422 423 /** The number of floating point registers in the CPU. */ 424 unsigned numPhysFloatRegs; 425 426 /** Delay between commit stage and the IQ. 427 * @todo: Make there be a distinction between the delays within IEW. 428 */ 429 Cycles commitToIEWDelay; 430 431 /** Is the IQ switched out. */ 432 bool switchedOut; 433 434 /** The sequence number of the squashed instruction. */ 435 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 436 437 /** A cache of the recently woken registers. It is 1 if the register 438 * has been woken up recently, and 0 if the register has been added 439 * to the dependency graph and has not yet received its value. It 440 * is basically a secondary scoreboard, and should pretty much mirror 441 * the scoreboard that exists in the rename map. 442 */ 443 std::vector<bool> regScoreboard; 444 445 /** Adds an instruction to the dependency graph, as a consumer. */ 446 bool addToDependents(DynInstPtr &new_inst); 447 448 /** Adds an instruction to the dependency graph, as a producer. */ 449 void addToProducers(DynInstPtr &new_inst); 450 451 /** Moves an instruction to the ready queue if it is ready. */ 452 void addIfReady(DynInstPtr &inst); 453 454 /** Debugging function to count how many entries are in the IQ. It does 455 * a linear walk through the instructions, so do not call this function 456 * during normal execution. 457 */ 458 int countInsts(); 459 460 /** Debugging function to dump all the list sizes, as well as print 461 * out the list of nonspeculative instructions. Should not be used 462 * in any other capacity, but it has no harmful sideaffects. 463 */ 464 void dumpLists(); 465 466 /** Debugging function to dump out all instructions that are in the 467 * IQ. 468 */ 469 void dumpInsts(); 470 471 /** Stat for number of instructions added. */ 472 Stats::Scalar iqInstsAdded; 473 /** Stat for number of non-speculative instructions added. */ 474 Stats::Scalar iqNonSpecInstsAdded; 475 476 Stats::Scalar iqInstsIssued; 477 /** Stat for number of integer instructions issued. */ 478 Stats::Scalar iqIntInstsIssued; 479 /** Stat for number of floating point instructions issued. */ 480 Stats::Scalar iqFloatInstsIssued; 481 /** Stat for number of branch instructions issued. */ 482 Stats::Scalar iqBranchInstsIssued; 483 /** Stat for number of memory instructions issued. */ 484 Stats::Scalar iqMemInstsIssued; 485 /** Stat for number of miscellaneous instructions issued. */ 486 Stats::Scalar iqMiscInstsIssued; 487 /** Stat for number of squashed instructions that were ready to issue. */ 488 Stats::Scalar iqSquashedInstsIssued; 489 /** Stat for number of squashed instructions examined when squashing. */ 490 Stats::Scalar iqSquashedInstsExamined; 491 /** Stat for number of squashed instruction operands examined when 492 * squashing. 493 */ 494 Stats::Scalar iqSquashedOperandsExamined; 495 /** Stat for number of non-speculative instructions removed due to a squash. 496 */ 497 Stats::Scalar iqSquashedNonSpecRemoved; 498 // Also include number of instructions rescheduled and replayed. 499 500 /** Distribution of number of instructions in the queue. 501 * @todo: Need to create struct to track the entry time for each 502 * instruction. */ 503// Stats::VectorDistribution queueResDist; 504 /** Distribution of the number of instructions issued. */ 505 Stats::Distribution numIssuedDist; 506 /** Distribution of the cycles it takes to issue an instruction. 507 * @todo: Need to create struct to track the ready time for each 508 * instruction. */ 509// Stats::VectorDistribution issueDelayDist; 510 511 /** Number of times an instruction could not be issued because a 512 * FU was busy. 513 */ 514 Stats::Vector statFuBusy; 515// Stats::Vector dist_unissued; 516 /** Stat for total number issued for each instruction type. */ 517 Stats::Vector2d statIssuedInstType; 518 519 /** Number of instructions issued per cycle. */ 520 Stats::Formula issueRate; 521 522 /** Number of times the FU was busy. */ 523 Stats::Vector fuBusy; 524 /** Number of times the FU was busy per instruction issued. */ 525 Stats::Formula fuBusyRate; 526 public: 527 Stats::Scalar intInstQueueReads; 528 Stats::Scalar intInstQueueWrites; 529 Stats::Scalar intInstQueueWakeupAccesses; 530 Stats::Scalar fpInstQueueReads; 531 Stats::Scalar fpInstQueueWrites; 532 Stats::Scalar fpInstQueueWakeupQccesses; 533 534 Stats::Scalar intAluAccesses; 535 Stats::Scalar fpAluAccesses; 536}; 537 538#endif //__CPU_O3_INST_QUEUE_HH__ 539