inst_queue.hh revision 2301
1/* 2 * Copyright (c) 2004-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_INST_QUEUE_HH__ 30#define __CPU_O3_INST_QUEUE_HH__ 31 32#include <list> 33#include <map> 34#include <queue> 35#include <vector> 36 37#include "base/statistics.hh" 38#include "base/timebuf.hh" 39#include "cpu/inst_seq.hh" 40#include "encumbered/cpu/full/op_class.hh" 41#include "sim/host.hh" 42 43class FUPool; 44class MemInterface; 45 46/** 47 * A standard instruction queue class. It holds ready instructions, in 48 * order, in seperate priority queues to facilitate the scheduling of 49 * instructions. The IQ uses a separate linked list to track dependencies. 50 * Similar to the rename map and the free list, it expects that 51 * floating point registers have their indices start after the integer 52 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 53 * and 96-191 are fp). This remains true even for both logical and 54 * physical register indices. The IQ depends on the memory dependence unit to 55 * track when memory operations are ready in terms of ordering; register 56 * dependencies are tracked normally. Right now the IQ also handles the 57 * execution timing; this is mainly to allow back-to-back scheduling without 58 * requiring IEW to be able to peek into the IQ. At the end of the execution 59 * latency, the instruction is put into the queue to execute, where it will 60 * have the execute() function called on it. 61 * @todo: Make IQ able to handle multiple FU pools. 62 */ 63template <class Impl> 64class InstructionQueue 65{ 66 public: 67 //Typedefs from the Impl. 68 typedef typename Impl::FullCPU FullCPU; 69 typedef typename Impl::DynInstPtr DynInstPtr; 70 typedef typename Impl::Params Params; 71 72 typedef typename Impl::CPUPol::IEW IEW; 73 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 74 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 75 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 76 77 // Typedef of iterator through the list of instructions. 78 typedef typename std::list<DynInstPtr>::iterator ListIt; 79 80 friend class Impl::FullCPU; 81 82 /** FU completion event class. */ 83 class FUCompletion : public Event { 84 private: 85 /** Executing instruction. */ 86 DynInstPtr inst; 87 88 /** Index of the FU used for executing. */ 89 int fuIdx; 90 91 /** Pointer back to the instruction queue. */ 92 InstructionQueue<Impl> *iqPtr; 93 94 public: 95 /** Construct a FU completion event. */ 96 FUCompletion(DynInstPtr &_inst, int fu_idx, 97 InstructionQueue<Impl> *iq_ptr); 98 99 virtual void process(); 100 virtual const char *description(); 101 }; 102 103 /** Constructs an IQ. */ 104 InstructionQueue(Params *params); 105 106 /** Destructs the IQ. */ 107 ~InstructionQueue(); 108 109 /** Returns the name of the IQ. */ 110 std::string name() const; 111 112 /** Registers statistics. */ 113 void regStats(); 114 115 /** Sets CPU pointer. */ 116 void setCPU(FullCPU *_cpu) { cpu = _cpu; } 117 118 /** Sets active threads list. */ 119 void setActiveThreads(std::list<unsigned> *at_ptr); 120 121 /** Sets the IEW pointer. */ 122 void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } 123 124 /** Sets the timer buffer between issue and execute. */ 125 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 126 127 /** Sets the global time buffer. */ 128 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 129 130 /** Number of entries needed for given amount of threads. */ 131 int entryAmount(int num_threads); 132 133 /** Resets max entries for all threads. */ 134 void resetEntries(); 135 136 /** Returns total number of free entries. */ 137 unsigned numFreeEntries(); 138 139 /** Returns number of free entries for a thread. */ 140 unsigned numFreeEntries(unsigned tid); 141 142 /** Returns whether or not the IQ is full. */ 143 bool isFull(); 144 145 /** Returns whether or not the IQ is full for a specific thread. */ 146 bool isFull(unsigned tid); 147 148 /** Returns if there are any ready instructions in the IQ. */ 149 bool hasReadyInsts(); 150 151 /** Inserts a new instruction into the IQ. */ 152 void insert(DynInstPtr &new_inst); 153 154 /** Inserts a new, non-speculative instruction into the IQ. */ 155 void insertNonSpec(DynInstPtr &new_inst); 156 157 /** Inserts a memory or write barrier into the IQ to make sure 158 * loads and stores are ordered properly. 159 */ 160 void insertBarrier(DynInstPtr &barr_inst); 161 162 /** 163 * Advances the tail of the IQ, used if an instruction is not added to the 164 * IQ for scheduling. 165 * @todo: Rename this function. 166 */ 167 void advanceTail(DynInstPtr &inst); 168 169 /** Process FU completion event. */ 170 void processFUCompletion(DynInstPtr &inst, int fu_idx); 171 172 /** 173 * Schedules ready instructions, adding the ready ones (oldest first) to 174 * the queue to execute. 175 */ 176 void scheduleReadyInsts(); 177 178 /** Schedules a single specific non-speculative instruction. */ 179 void scheduleNonSpec(const InstSeqNum &inst); 180 181 /** 182 * Commits all instructions up to and including the given sequence number, 183 * for a specific thread. 184 */ 185 void commit(const InstSeqNum &inst, unsigned tid = 0); 186 187 /** Wakes all dependents of a completed instruction. */ 188 int wakeDependents(DynInstPtr &completed_inst); 189 190 /** Adds a ready memory instruction to the ready list. */ 191 void addReadyMemInst(DynInstPtr &ready_inst); 192 193 /** 194 * Reschedules a memory instruction. It will be ready to issue once 195 * replayMemInst() is called. 196 */ 197 void rescheduleMemInst(DynInstPtr &resched_inst); 198 199 /** Replays a memory instruction. It must be rescheduled first. */ 200 void replayMemInst(DynInstPtr &replay_inst); 201 202 /** Completes a memory operation. */ 203 void completeMemInst(DynInstPtr &completed_inst); 204 205 /** Indicates an ordering violation between a store and a load. */ 206 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 207 208 /** 209 * Squashes instructions for a thread. Squashing information is obtained 210 * from the time buffer. 211 */ 212 void squash(unsigned tid); 213 214 /** Returns the number of used entries for a thread. */ 215 unsigned getCount(unsigned tid) { return count[tid]; }; 216 217 /** Updates the number of free entries. */ 218 void updateFreeEntries(int num) { freeEntries += num; } 219 220 /** Debug function to print all instructions. */ 221 void printInsts(); 222 223 private: 224 /** Does the actual squashing. */ 225 void doSquash(unsigned tid); 226 227 ///////////////////////// 228 // Various pointers 229 ///////////////////////// 230 231 /** Pointer to the CPU. */ 232 FullCPU *cpu; 233 234 /** Cache interface. */ 235 MemInterface *dcacheInterface; 236 237 /** Pointer to IEW stage. */ 238 IEW *iewStage; 239 240 /** The memory dependence unit, which tracks/predicts memory dependences 241 * between instructions. 242 */ 243 MemDepUnit memDepUnit[Impl::MaxThreads]; 244 245 /** The queue to the execute stage. Issued instructions will be written 246 * into it. 247 */ 248 TimeBuffer<IssueStruct> *issueToExecuteQueue; 249 250 /** The backwards time buffer. */ 251 TimeBuffer<TimeStruct> *timeBuffer; 252 253 /** Wire to read information from timebuffer. */ 254 typename TimeBuffer<TimeStruct>::wire fromCommit; 255 256 /** Function unit pool. */ 257 FUPool *fuPool; 258 259 ////////////////////////////////////// 260 // Instruction lists, ready queues, and ordering 261 ////////////////////////////////////// 262 263 /** List of all the instructions in the IQ (some of which may be issued). */ 264 std::list<DynInstPtr> instList[Impl::MaxThreads]; 265 266 /** 267 * Struct for comparing entries to be added to the priority queue. This 268 * gives reverse ordering to the instructions in terms of sequence 269 * numbers: the instructions with smaller sequence numbers (and hence 270 * are older) will be at the top of the priority queue. 271 */ 272 struct pqCompare { 273 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 274 { 275 return lhs->seqNum > rhs->seqNum; 276 } 277 }; 278 279 /** 280 * Struct for an IQ entry. It includes the instruction and an iterator 281 * to the instruction's spot in the IQ. 282 */ 283 struct IQEntry { 284 DynInstPtr inst; 285 ListIt iqIt; 286 }; 287 288 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 289 ReadyInstQueue; 290 291 /** List of ready instructions, per op class. They are separated by op 292 * class to allow for easy mapping to FUs. 293 */ 294 ReadyInstQueue readyInsts[Num_OpClasses]; 295 296 /** List of non-speculative instructions that will be scheduled 297 * once the IQ gets a signal from commit. While it's redundant to 298 * have the key be a part of the value (the sequence number is stored 299 * inside of DynInst), when these instructions are woken up only 300 * the sequence number will be available. Thus it is most efficient to be 301 * able to search by the sequence number alone. 302 * @todo: Maybe change this to a priority queue per thread. 303 */ 304 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 305 306 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 307 308 /** Entry for the list age ordering by op class. */ 309 struct ListOrderEntry { 310 OpClass queueType; 311 InstSeqNum oldestInst; 312 }; 313 314 /** List that contains the age order of the oldest instruction of each 315 * ready queue. Used to select the oldest instruction available 316 * among op classes. 317 */ 318 std::list<ListOrderEntry> listOrder; 319 320 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 321 322 /** Tracks if each ready queue is on the age order list. */ 323 bool queueOnList[Num_OpClasses]; 324 325 /** Iterators of each ready queue. Points to their spot in the age order 326 * list. 327 */ 328 ListOrderIt readyIt[Num_OpClasses]; 329 330 /** Add an op class to the age order list. */ 331 void addToOrderList(OpClass op_class); 332 333 /** 334 * Called when the oldest instruction has been removed from a ready queue; 335 * this places that ready queue into the proper spot in the age order list. 336 */ 337 void moveToYoungerInst(ListOrderIt age_order_it); 338 339 ////////////////////////////////////// 340 // Various parameters 341 ////////////////////////////////////// 342 343 /** IQ Resource Sharing Policy */ 344 enum IQPolicy { 345 Dynamic, 346 Partitioned, 347 Threshold 348 }; 349 350 /** IQ sharing policy for SMT. */ 351 IQPolicy iqPolicy; 352 353 /** Number of Total Threads*/ 354 unsigned numThreads; 355 356 /** Pointer to list of active threads. */ 357 std::list<unsigned> *activeThreads; 358 359 /** Per Thread IQ count */ 360 unsigned count[Impl::MaxThreads]; 361 362 /** Max IQ Entries Per Thread */ 363 unsigned maxEntries[Impl::MaxThreads]; 364 365 /** Number of free IQ entries left. */ 366 unsigned freeEntries; 367 368 /** The number of entries in the instruction queue. */ 369 unsigned numEntries; 370 371 /** The total number of instructions that can be issued in one cycle. */ 372 unsigned totalWidth; 373 374 /** The number of physical registers in the CPU. */ 375 unsigned numPhysRegs; 376 377 /** The number of physical integer registers in the CPU. */ 378 unsigned numPhysIntRegs; 379 380 /** The number of floating point registers in the CPU. */ 381 unsigned numPhysFloatRegs; 382 383 /** Delay between commit stage and the IQ. 384 * @todo: Make there be a distinction between the delays within IEW. 385 */ 386 unsigned commitToIEWDelay; 387 388 ////////////////////////////////// 389 // Variables needed for squashing 390 ////////////////////////////////// 391 392 /** The sequence number of the squashed instruction. */ 393 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 394 395 /** Iterator that points to the last instruction that has been squashed. 396 * This will not be valid unless the IQ is in the process of squashing. 397 */ 398 ListIt squashIt[Impl::MaxThreads]; 399 400 /////////////////////////////////// 401 // Dependency graph stuff 402 /////////////////////////////////// 403 404 class DependencyEntry 405 { 406 public: 407 DependencyEntry() 408 : inst(NULL), next(NULL) 409 { } 410 411 DynInstPtr inst; 412 //Might want to include data about what arch. register the 413 //dependence is waiting on. 414 DependencyEntry *next; 415 416 //This function, and perhaps this whole class, stand out a little 417 //bit as they don't fit a classification well. I want access 418 //to the underlying structure of the linked list, yet at 419 //the same time it feels like this should be something abstracted 420 //away. So for now it will sit here, within the IQ, until 421 //a better implementation is decided upon. 422 // This function probably shouldn't be within the entry... 423 void insert(DynInstPtr &new_inst); 424 425 void remove(DynInstPtr &inst_to_remove); 426 427 // Debug variable, remove when done testing. 428 static unsigned mem_alloc_counter; 429 }; 430 431 /** Array of linked lists. Each linked list is a list of all the 432 * instructions that depend upon a given register. The actual 433 * register's index is used to index into the graph; ie all 434 * instructions in flight that are dependent upon r34 will be 435 * in the linked list of dependGraph[34]. 436 */ 437 DependencyEntry *dependGraph; 438 439 /** A cache of the recently woken registers. It is 1 if the register 440 * has been woken up recently, and 0 if the register has been added 441 * to the dependency graph and has not yet received its value. It 442 * is basically a secondary scoreboard, and should pretty much mirror 443 * the scoreboard that exists in the rename map. 444 */ 445 std::vector<bool> regScoreboard; 446 447 /** Adds an instruction to the dependency graph, as a producer. */ 448 bool addToDependents(DynInstPtr &new_inst); 449 450 /** Adds an instruction to the dependency graph, as a consumer. */ 451 void createDependency(DynInstPtr &new_inst); 452 453 /** Moves an instruction to the ready queue if it is ready. */ 454 void addIfReady(DynInstPtr &inst); 455 456 /** Debugging function to count how many entries are in the IQ. It does 457 * a linear walk through the instructions, so do not call this function 458 * during normal execution. 459 */ 460 int countInsts(); 461 462 /** Debugging function to dump out the dependency graph. 463 */ 464 void dumpDependGraph(); 465 466 /** Debugging function to dump all the list sizes, as well as print 467 * out the list of nonspeculative instructions. Should not be used 468 * in any other capacity, but it has no harmful sideaffects. 469 */ 470 void dumpLists(); 471 472 /** Debugging function to dump out all instructions that are in the 473 * IQ. 474 */ 475 void dumpInsts(); 476 477 /** Stat for number of instructions added. */ 478 Stats::Scalar<> iqInstsAdded; 479 /** Stat for number of non-speculative instructions added. */ 480 Stats::Scalar<> iqNonSpecInstsAdded; 481// Stats::Scalar<> iqIntInstsAdded; 482 Stats::Scalar<> iqInstsIssued; 483 /** Stat for number of integer instructions issued. */ 484 Stats::Scalar<> iqIntInstsIssued; 485// Stats::Scalar<> iqFloatInstsAdded; 486 /** Stat for number of floating point instructions issued. */ 487 Stats::Scalar<> iqFloatInstsIssued; 488// Stats::Scalar<> iqBranchInstsAdded; 489 /** Stat for number of branch instructions issued. */ 490 Stats::Scalar<> iqBranchInstsIssued; 491// Stats::Scalar<> iqMemInstsAdded; 492 /** Stat for number of memory instructions issued. */ 493 Stats::Scalar<> iqMemInstsIssued; 494// Stats::Scalar<> iqMiscInstsAdded; 495 /** Stat for number of miscellaneous instructions issued. */ 496 Stats::Scalar<> iqMiscInstsIssued; 497 /** Stat for number of squashed instructions that were ready to issue. */ 498 Stats::Scalar<> iqSquashedInstsIssued; 499 /** Stat for number of squashed instructions examined when squashing. */ 500 Stats::Scalar<> iqSquashedInstsExamined; 501 /** Stat for number of squashed instruction operands examined when 502 * squashing. 503 */ 504 Stats::Scalar<> iqSquashedOperandsExamined; 505 /** Stat for number of non-speculative instructions removed due to a squash. 506 */ 507 Stats::Scalar<> iqSquashedNonSpecRemoved; 508 509 Stats::VectorDistribution<> queue_res_dist; 510 Stats::Vector<> n_issued_dist; 511 Stats::VectorDistribution<> issue_delay_dist; 512 513 Stats::Vector<> stat_fu_busy; 514// Stats::Vector<> dist_unissued; 515 Stats::Vector2d<> stat_issued_inst_type; 516 517 Stats::Formula issue_rate; 518// Stats::Formula issue_stores; 519// Stats::Formula issue_op_rate; 520 Stats::Vector<> fu_busy; //cumulative fu busy 521 522 Stats::Formula fu_busy_rate; 523}; 524 525#endif //__CPU_O3_INST_QUEUE_HH__ 526