inst_queue.hh revision 5999
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 */ 30 31#ifndef __CPU_O3_INST_QUEUE_HH__ 32#define __CPU_O3_INST_QUEUE_HH__ 33 34#include <list> 35#include <map> 36#include <queue> 37#include <vector> 38 39#include "base/statistics.hh" 40#include "base/timebuf.hh" 41#include "cpu/inst_seq.hh" 42#include "cpu/o3/dep_graph.hh" 43#include "cpu/op_class.hh" 44#include "sim/eventq.hh" 45#include "sim/host.hh" 46 47class DerivO3CPUParams; 48class FUPool; 49class MemInterface; 50 51/** 52 * A standard instruction queue class. It holds ready instructions, in 53 * order, in seperate priority queues to facilitate the scheduling of 54 * instructions. The IQ uses a separate linked list to track dependencies. 55 * Similar to the rename map and the free list, it expects that 56 * floating point registers have their indices start after the integer 57 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 58 * and 96-191 are fp). This remains true even for both logical and 59 * physical register indices. The IQ depends on the memory dependence unit to 60 * track when memory operations are ready in terms of ordering; register 61 * dependencies are tracked normally. Right now the IQ also handles the 62 * execution timing; this is mainly to allow back-to-back scheduling without 63 * requiring IEW to be able to peek into the IQ. At the end of the execution 64 * latency, the instruction is put into the queue to execute, where it will 65 * have the execute() function called on it. 66 * @todo: Make IQ able to handle multiple FU pools. 67 */ 68template <class Impl> 69class InstructionQueue 70{ 71 public: 72 //Typedefs from the Impl. 73 typedef typename Impl::O3CPU O3CPU; 74 typedef typename Impl::DynInstPtr DynInstPtr; 75 76 typedef typename Impl::CPUPol::IEW IEW; 77 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 78 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 79 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 80 81 // Typedef of iterator through the list of instructions. 82 typedef typename std::list<DynInstPtr>::iterator ListIt; 83 84 friend class Impl::O3CPU; 85 86 /** FU completion event class. */ 87 class FUCompletion : public Event { 88 private: 89 /** Executing instruction. */ 90 DynInstPtr inst; 91 92 /** Index of the FU used for executing. */ 93 int fuIdx; 94 95 /** Pointer back to the instruction queue. */ 96 InstructionQueue<Impl> *iqPtr; 97 98 /** Should the FU be added to the list to be freed upon 99 * completing this event. 100 */ 101 bool freeFU; 102 103 public: 104 /** Construct a FU completion event. */ 105 FUCompletion(DynInstPtr &_inst, int fu_idx, 106 InstructionQueue<Impl> *iq_ptr); 107 108 virtual void process(); 109 virtual const char *description() const; 110 void setFreeFU() { freeFU = true; } 111 }; 112 113 /** Constructs an IQ. */ 114 InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params); 115 116 /** Destructs the IQ. */ 117 ~InstructionQueue(); 118 119 /** Returns the name of the IQ. */ 120 std::string name() const; 121 122 /** Registers statistics. */ 123 void regStats(); 124 125 /** Resets all instruction queue state. */ 126 void resetState(); 127 128 /** Sets active threads list. */ 129 void setActiveThreads(std::list<unsigned> *at_ptr); 130 131 /** Sets the timer buffer between issue and execute. */ 132 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 133 134 /** Sets the global time buffer. */ 135 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 136 137 /** Switches out the instruction queue. */ 138 void switchOut(); 139 140 /** Takes over execution from another CPU's thread. */ 141 void takeOverFrom(); 142 143 /** Returns if the IQ is switched out. */ 144 bool isSwitchedOut() { return switchedOut; } 145 146 /** Number of entries needed for given amount of threads. */ 147 int entryAmount(int num_threads); 148 149 /** Resets max entries for all threads. */ 150 void resetEntries(); 151 152 /** Returns total number of free entries. */ 153 unsigned numFreeEntries(); 154 155 /** Returns number of free entries for a thread. */ 156 unsigned numFreeEntries(unsigned tid); 157 158 /** Returns whether or not the IQ is full. */ 159 bool isFull(); 160 161 /** Returns whether or not the IQ is full for a specific thread. */ 162 bool isFull(unsigned tid); 163 164 /** Returns if there are any ready instructions in the IQ. */ 165 bool hasReadyInsts(); 166 167 /** Inserts a new instruction into the IQ. */ 168 void insert(DynInstPtr &new_inst); 169 170 /** Inserts a new, non-speculative instruction into the IQ. */ 171 void insertNonSpec(DynInstPtr &new_inst); 172 173 /** Inserts a memory or write barrier into the IQ to make sure 174 * loads and stores are ordered properly. 175 */ 176 void insertBarrier(DynInstPtr &barr_inst); 177 178 /** Returns the oldest scheduled instruction, and removes it from 179 * the list of instructions waiting to execute. 180 */ 181 DynInstPtr getInstToExecute(); 182 183 /** 184 * Records the instruction as the producer of a register without 185 * adding it to the rest of the IQ. 186 */ 187 void recordProducer(DynInstPtr &inst) 188 { addToProducers(inst); } 189 190 /** Process FU completion event. */ 191 void processFUCompletion(DynInstPtr &inst, int fu_idx); 192 193 /** 194 * Schedules ready instructions, adding the ready ones (oldest first) to 195 * the queue to execute. 196 */ 197 void scheduleReadyInsts(); 198 199 /** Schedules a single specific non-speculative instruction. */ 200 void scheduleNonSpec(const InstSeqNum &inst); 201 202 /** 203 * Commits all instructions up to and including the given sequence number, 204 * for a specific thread. 205 */ 206 void commit(const InstSeqNum &inst, unsigned tid = 0); 207 208 /** Wakes all dependents of a completed instruction. */ 209 int wakeDependents(DynInstPtr &completed_inst); 210 211 /** Adds a ready memory instruction to the ready list. */ 212 void addReadyMemInst(DynInstPtr &ready_inst); 213 214 /** 215 * Reschedules a memory instruction. It will be ready to issue once 216 * replayMemInst() is called. 217 */ 218 void rescheduleMemInst(DynInstPtr &resched_inst); 219 220 /** Replays a memory instruction. It must be rescheduled first. */ 221 void replayMemInst(DynInstPtr &replay_inst); 222 223 /** Completes a memory operation. */ 224 void completeMemInst(DynInstPtr &completed_inst); 225 226 /** Indicates an ordering violation between a store and a load. */ 227 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 228 229 /** 230 * Squashes instructions for a thread. Squashing information is obtained 231 * from the time buffer. 232 */ 233 void squash(unsigned tid); 234 235 /** Returns the number of used entries for a thread. */ 236 unsigned getCount(unsigned tid) { return count[tid]; }; 237 238 /** Debug function to print all instructions. */ 239 void printInsts(); 240 241 private: 242 /** Does the actual squashing. */ 243 void doSquash(unsigned tid); 244 245 ///////////////////////// 246 // Various pointers 247 ///////////////////////// 248 249 /** Pointer to the CPU. */ 250 O3CPU *cpu; 251 252 /** Cache interface. */ 253 MemInterface *dcacheInterface; 254 255 /** Pointer to IEW stage. */ 256 IEW *iewStage; 257 258 /** The memory dependence unit, which tracks/predicts memory dependences 259 * between instructions. 260 */ 261 MemDepUnit memDepUnit[Impl::MaxThreads]; 262 263 /** The queue to the execute stage. Issued instructions will be written 264 * into it. 265 */ 266 TimeBuffer<IssueStruct> *issueToExecuteQueue; 267 268 /** The backwards time buffer. */ 269 TimeBuffer<TimeStruct> *timeBuffer; 270 271 /** Wire to read information from timebuffer. */ 272 typename TimeBuffer<TimeStruct>::wire fromCommit; 273 274 /** Function unit pool. */ 275 FUPool *fuPool; 276 277 ////////////////////////////////////// 278 // Instruction lists, ready queues, and ordering 279 ////////////////////////////////////// 280 281 /** List of all the instructions in the IQ (some of which may be issued). */ 282 std::list<DynInstPtr> instList[Impl::MaxThreads]; 283 284 /** List of instructions that are ready to be executed. */ 285 std::list<DynInstPtr> instsToExecute; 286 287 /** 288 * Struct for comparing entries to be added to the priority queue. 289 * This gives reverse ordering to the instructions in terms of 290 * sequence numbers: the instructions with smaller sequence 291 * numbers (and hence are older) will be at the top of the 292 * priority queue. 293 */ 294 struct pqCompare { 295 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 296 { 297 return lhs->seqNum > rhs->seqNum; 298 } 299 }; 300 301 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 302 ReadyInstQueue; 303 304 /** List of ready instructions, per op class. They are separated by op 305 * class to allow for easy mapping to FUs. 306 */ 307 ReadyInstQueue readyInsts[Num_OpClasses]; 308 309 /** List of non-speculative instructions that will be scheduled 310 * once the IQ gets a signal from commit. While it's redundant to 311 * have the key be a part of the value (the sequence number is stored 312 * inside of DynInst), when these instructions are woken up only 313 * the sequence number will be available. Thus it is most efficient to be 314 * able to search by the sequence number alone. 315 */ 316 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 317 318 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 319 320 /** Entry for the list age ordering by op class. */ 321 struct ListOrderEntry { 322 OpClass queueType; 323 InstSeqNum oldestInst; 324 }; 325 326 /** List that contains the age order of the oldest instruction of each 327 * ready queue. Used to select the oldest instruction available 328 * among op classes. 329 * @todo: Might be better to just move these entries around instead 330 * of creating new ones every time the position changes due to an 331 * instruction issuing. Not sure std::list supports this. 332 */ 333 std::list<ListOrderEntry> listOrder; 334 335 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 336 337 /** Tracks if each ready queue is on the age order list. */ 338 bool queueOnList[Num_OpClasses]; 339 340 /** Iterators of each ready queue. Points to their spot in the age order 341 * list. 342 */ 343 ListOrderIt readyIt[Num_OpClasses]; 344 345 /** Add an op class to the age order list. */ 346 void addToOrderList(OpClass op_class); 347 348 /** 349 * Called when the oldest instruction has been removed from a ready queue; 350 * this places that ready queue into the proper spot in the age order list. 351 */ 352 void moveToYoungerInst(ListOrderIt age_order_it); 353 354 DependencyGraph<DynInstPtr> dependGraph; 355 356 ////////////////////////////////////// 357 // Various parameters 358 ////////////////////////////////////// 359 360 /** IQ Resource Sharing Policy */ 361 enum IQPolicy { 362 Dynamic, 363 Partitioned, 364 Threshold 365 }; 366 367 /** IQ sharing policy for SMT. */ 368 IQPolicy iqPolicy; 369 370 /** Number of Total Threads*/ 371 unsigned numThreads; 372 373 /** Pointer to list of active threads. */ 374 std::list<unsigned> *activeThreads; 375 376 /** Per Thread IQ count */ 377 unsigned count[Impl::MaxThreads]; 378 379 /** Max IQ Entries Per Thread */ 380 unsigned maxEntries[Impl::MaxThreads]; 381 382 /** Number of free IQ entries left. */ 383 unsigned freeEntries; 384 385 /** The number of entries in the instruction queue. */ 386 unsigned numEntries; 387 388 /** The total number of instructions that can be issued in one cycle. */ 389 unsigned totalWidth; 390 391 /** The number of physical registers in the CPU. */ 392 unsigned numPhysRegs; 393 394 /** The number of physical integer registers in the CPU. */ 395 unsigned numPhysIntRegs; 396 397 /** The number of floating point registers in the CPU. */ 398 unsigned numPhysFloatRegs; 399 400 /** Delay between commit stage and the IQ. 401 * @todo: Make there be a distinction between the delays within IEW. 402 */ 403 unsigned commitToIEWDelay; 404 405 /** Is the IQ switched out. */ 406 bool switchedOut; 407 408 /** The sequence number of the squashed instruction. */ 409 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 410 411 /** A cache of the recently woken registers. It is 1 if the register 412 * has been woken up recently, and 0 if the register has been added 413 * to the dependency graph and has not yet received its value. It 414 * is basically a secondary scoreboard, and should pretty much mirror 415 * the scoreboard that exists in the rename map. 416 */ 417 std::vector<bool> regScoreboard; 418 419 /** Adds an instruction to the dependency graph, as a consumer. */ 420 bool addToDependents(DynInstPtr &new_inst); 421 422 /** Adds an instruction to the dependency graph, as a producer. */ 423 void addToProducers(DynInstPtr &new_inst); 424 425 /** Moves an instruction to the ready queue if it is ready. */ 426 void addIfReady(DynInstPtr &inst); 427 428 /** Debugging function to count how many entries are in the IQ. It does 429 * a linear walk through the instructions, so do not call this function 430 * during normal execution. 431 */ 432 int countInsts(); 433 434 /** Debugging function to dump all the list sizes, as well as print 435 * out the list of nonspeculative instructions. Should not be used 436 * in any other capacity, but it has no harmful sideaffects. 437 */ 438 void dumpLists(); 439 440 /** Debugging function to dump out all instructions that are in the 441 * IQ. 442 */ 443 void dumpInsts(); 444 445 /** Stat for number of instructions added. */ 446 Stats::Scalar iqInstsAdded; 447 /** Stat for number of non-speculative instructions added. */ 448 Stats::Scalar iqNonSpecInstsAdded; 449 450 Stats::Scalar iqInstsIssued; 451 /** Stat for number of integer instructions issued. */ 452 Stats::Scalar iqIntInstsIssued; 453 /** Stat for number of floating point instructions issued. */ 454 Stats::Scalar iqFloatInstsIssued; 455 /** Stat for number of branch instructions issued. */ 456 Stats::Scalar iqBranchInstsIssued; 457 /** Stat for number of memory instructions issued. */ 458 Stats::Scalar iqMemInstsIssued; 459 /** Stat for number of miscellaneous instructions issued. */ 460 Stats::Scalar iqMiscInstsIssued; 461 /** Stat for number of squashed instructions that were ready to issue. */ 462 Stats::Scalar iqSquashedInstsIssued; 463 /** Stat for number of squashed instructions examined when squashing. */ 464 Stats::Scalar iqSquashedInstsExamined; 465 /** Stat for number of squashed instruction operands examined when 466 * squashing. 467 */ 468 Stats::Scalar iqSquashedOperandsExamined; 469 /** Stat for number of non-speculative instructions removed due to a squash. 470 */ 471 Stats::Scalar iqSquashedNonSpecRemoved; 472 // Also include number of instructions rescheduled and replayed. 473 474 /** Distribution of number of instructions in the queue. 475 * @todo: Need to create struct to track the entry time for each 476 * instruction. */ 477// Stats::VectorDistribution queueResDist; 478 /** Distribution of the number of instructions issued. */ 479 Stats::Distribution numIssuedDist; 480 /** Distribution of the cycles it takes to issue an instruction. 481 * @todo: Need to create struct to track the ready time for each 482 * instruction. */ 483// Stats::VectorDistribution issueDelayDist; 484 485 /** Number of times an instruction could not be issued because a 486 * FU was busy. 487 */ 488 Stats::Vector statFuBusy; 489// Stats::Vector dist_unissued; 490 /** Stat for total number issued for each instruction type. */ 491 Stats::Vector2d statIssuedInstType; 492 493 /** Number of instructions issued per cycle. */ 494 Stats::Formula issueRate; 495 496 /** Number of times the FU was busy. */ 497 Stats::Vector fuBusy; 498 /** Number of times the FU was busy per instruction issued. */ 499 Stats::Formula fuBusyRate; 500}; 501 502#endif //__CPU_O3_INST_QUEUE_HH__ 503