inst_queue.hh revision 2307
1/* 2 * Copyright (c) 2004-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifndef __CPU_O3_INST_QUEUE_HH__ 30#define __CPU_O3_INST_QUEUE_HH__ 31 32#include <list> 33#include <map> 34#include <queue> 35#include <vector> 36 37#include "base/statistics.hh" 38#include "base/timebuf.hh" 39#include "cpu/inst_seq.hh" 40#include "encumbered/cpu/full/op_class.hh" 41#include "sim/host.hh" 42 43class FUPool; 44class MemInterface; 45 46/** 47 * A standard instruction queue class. It holds ready instructions, in 48 * order, in seperate priority queues to facilitate the scheduling of 49 * instructions. The IQ uses a separate linked list to track dependencies. 50 * Similar to the rename map and the free list, it expects that 51 * floating point registers have their indices start after the integer 52 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 53 * and 96-191 are fp). This remains true even for both logical and 54 * physical register indices. The IQ depends on the memory dependence unit to 55 * track when memory operations are ready in terms of ordering; register 56 * dependencies are tracked normally. Right now the IQ also handles the 57 * execution timing; this is mainly to allow back-to-back scheduling without 58 * requiring IEW to be able to peek into the IQ. At the end of the execution 59 * latency, the instruction is put into the queue to execute, where it will 60 * have the execute() function called on it. 61 * @todo: Make IQ able to handle multiple FU pools. 62 */ 63template <class Impl> 64class InstructionQueue 65{ 66 public: 67 //Typedefs from the Impl. 68 typedef typename Impl::FullCPU FullCPU; 69 typedef typename Impl::DynInstPtr DynInstPtr; 70 typedef typename Impl::Params Params; 71 72 typedef typename Impl::CPUPol::IEW IEW; 73 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 74 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 75 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 76 77 // Typedef of iterator through the list of instructions. 78 typedef typename std::list<DynInstPtr>::iterator ListIt; 79 80 friend class Impl::FullCPU; 81 82 /** FU completion event class. */ 83 class FUCompletion : public Event { 84 private: 85 /** Executing instruction. */ 86 DynInstPtr inst; 87 88 /** Index of the FU used for executing. */ 89 int fuIdx; 90 91 /** Pointer back to the instruction queue. */ 92 InstructionQueue<Impl> *iqPtr; 93 94 public: 95 /** Construct a FU completion event. */ 96 FUCompletion(DynInstPtr &_inst, int fu_idx, 97 InstructionQueue<Impl> *iq_ptr); 98 99 virtual void process(); 100 virtual const char *description(); 101 }; 102 103 /** Constructs an IQ. */ 104 InstructionQueue(Params *params); 105 106 /** Destructs the IQ. */ 107 ~InstructionQueue(); 108 109 /** Returns the name of the IQ. */ 110 std::string name() const; 111 112 /** Registers statistics. */ 113 void regStats(); 114 115 void resetState(); 116 117 void resetDependencyGraph(); 118 119 /** Sets CPU pointer. */ 120 void setCPU(FullCPU *_cpu) { cpu = _cpu; } 121 122 /** Sets active threads list. */ 123 void setActiveThreads(std::list<unsigned> *at_ptr); 124 125 /** Sets the IEW pointer. */ 126 void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } 127 128 /** Sets the timer buffer between issue and execute. */ 129 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 130 131 /** Sets the global time buffer. */ 132 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 133 134 void switchOut(); 135 136 void takeOverFrom(); 137 138 bool isSwitchedOut() { return switchedOut; } 139 140 /** Number of entries needed for given amount of threads. */ 141 int entryAmount(int num_threads); 142 143 /** Resets max entries for all threads. */ 144 void resetEntries(); 145 146 /** Returns total number of free entries. */ 147 unsigned numFreeEntries(); 148 149 /** Returns number of free entries for a thread. */ 150 unsigned numFreeEntries(unsigned tid); 151 152 /** Returns whether or not the IQ is full. */ 153 bool isFull(); 154 155 /** Returns whether or not the IQ is full for a specific thread. */ 156 bool isFull(unsigned tid); 157 158 /** Returns if there are any ready instructions in the IQ. */ 159 bool hasReadyInsts(); 160 161 /** Inserts a new instruction into the IQ. */ 162 void insert(DynInstPtr &new_inst); 163 164 /** Inserts a new, non-speculative instruction into the IQ. */ 165 void insertNonSpec(DynInstPtr &new_inst); 166 167 /** Inserts a memory or write barrier into the IQ to make sure 168 * loads and stores are ordered properly. 169 */ 170 void insertBarrier(DynInstPtr &barr_inst); 171 172 /** 173 * Advances the tail of the IQ, used if an instruction is not added to the 174 * IQ for scheduling. 175 * @todo: Rename this function. 176 */ 177 void advanceTail(DynInstPtr &inst); 178 179 /** Process FU completion event. */ 180 void processFUCompletion(DynInstPtr &inst, int fu_idx); 181 182 /** 183 * Schedules ready instructions, adding the ready ones (oldest first) to 184 * the queue to execute. 185 */ 186 void scheduleReadyInsts(); 187 188 /** Schedules a single specific non-speculative instruction. */ 189 void scheduleNonSpec(const InstSeqNum &inst); 190 191 /** 192 * Commits all instructions up to and including the given sequence number, 193 * for a specific thread. 194 */ 195 void commit(const InstSeqNum &inst, unsigned tid = 0); 196 197 /** Wakes all dependents of a completed instruction. */ 198 int wakeDependents(DynInstPtr &completed_inst); 199 200 /** Adds a ready memory instruction to the ready list. */ 201 void addReadyMemInst(DynInstPtr &ready_inst); 202 203 /** 204 * Reschedules a memory instruction. It will be ready to issue once 205 * replayMemInst() is called. 206 */ 207 void rescheduleMemInst(DynInstPtr &resched_inst); 208 209 /** Replays a memory instruction. It must be rescheduled first. */ 210 void replayMemInst(DynInstPtr &replay_inst); 211 212 /** Completes a memory operation. */ 213 void completeMemInst(DynInstPtr &completed_inst); 214 215 /** Indicates an ordering violation between a store and a load. */ 216 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 217 218 /** 219 * Squashes instructions for a thread. Squashing information is obtained 220 * from the time buffer. 221 */ 222 void squash(unsigned tid); 223 224 /** Returns the number of used entries for a thread. */ 225 unsigned getCount(unsigned tid) { return count[tid]; }; 226 227 /** Updates the number of free entries. */ 228 void updateFreeEntries(int num) { freeEntries += num; } 229 230 /** Debug function to print all instructions. */ 231 void printInsts(); 232 233 private: 234 /** Does the actual squashing. */ 235 void doSquash(unsigned tid); 236 237 ///////////////////////// 238 // Various pointers 239 ///////////////////////// 240 241 /** Pointer to the CPU. */ 242 FullCPU *cpu; 243 244 /** Cache interface. */ 245 MemInterface *dcacheInterface; 246 247 /** Pointer to IEW stage. */ 248 IEW *iewStage; 249 250 /** The memory dependence unit, which tracks/predicts memory dependences 251 * between instructions. 252 */ 253 MemDepUnit memDepUnit[Impl::MaxThreads]; 254 255 /** The queue to the execute stage. Issued instructions will be written 256 * into it. 257 */ 258 TimeBuffer<IssueStruct> *issueToExecuteQueue; 259 260 /** The backwards time buffer. */ 261 TimeBuffer<TimeStruct> *timeBuffer; 262 263 /** Wire to read information from timebuffer. */ 264 typename TimeBuffer<TimeStruct>::wire fromCommit; 265 266 /** Function unit pool. */ 267 FUPool *fuPool; 268 269 ////////////////////////////////////// 270 // Instruction lists, ready queues, and ordering 271 ////////////////////////////////////// 272 273 /** List of all the instructions in the IQ (some of which may be issued). */ 274 std::list<DynInstPtr> instList[Impl::MaxThreads]; 275 276 /** 277 * Struct for comparing entries to be added to the priority queue. This 278 * gives reverse ordering to the instructions in terms of sequence 279 * numbers: the instructions with smaller sequence numbers (and hence 280 * are older) will be at the top of the priority queue. 281 */ 282 struct pqCompare { 283 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 284 { 285 return lhs->seqNum > rhs->seqNum; 286 } 287 }; 288 289 /** 290 * Struct for an IQ entry. It includes the instruction and an iterator 291 * to the instruction's spot in the IQ. 292 */ 293 struct IQEntry { 294 DynInstPtr inst; 295 ListIt iqIt; 296 }; 297 298 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 299 ReadyInstQueue; 300 301 /** List of ready instructions, per op class. They are separated by op 302 * class to allow for easy mapping to FUs. 303 */ 304 ReadyInstQueue readyInsts[Num_OpClasses]; 305 306 /** List of non-speculative instructions that will be scheduled 307 * once the IQ gets a signal from commit. While it's redundant to 308 * have the key be a part of the value (the sequence number is stored 309 * inside of DynInst), when these instructions are woken up only 310 * the sequence number will be available. Thus it is most efficient to be 311 * able to search by the sequence number alone. 312 * @todo: Maybe change this to a priority queue per thread. 313 */ 314 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 315 316 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 317 318 /** Entry for the list age ordering by op class. */ 319 struct ListOrderEntry { 320 OpClass queueType; 321 InstSeqNum oldestInst; 322 }; 323 324 /** List that contains the age order of the oldest instruction of each 325 * ready queue. Used to select the oldest instruction available 326 * among op classes. 327 */ 328 std::list<ListOrderEntry> listOrder; 329 330 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 331 332 /** Tracks if each ready queue is on the age order list. */ 333 bool queueOnList[Num_OpClasses]; 334 335 /** Iterators of each ready queue. Points to their spot in the age order 336 * list. 337 */ 338 ListOrderIt readyIt[Num_OpClasses]; 339 340 /** Add an op class to the age order list. */ 341 void addToOrderList(OpClass op_class); 342 343 /** 344 * Called when the oldest instruction has been removed from a ready queue; 345 * this places that ready queue into the proper spot in the age order list. 346 */ 347 void moveToYoungerInst(ListOrderIt age_order_it); 348 349 ////////////////////////////////////// 350 // Various parameters 351 ////////////////////////////////////// 352 353 /** IQ Resource Sharing Policy */ 354 enum IQPolicy { 355 Dynamic, 356 Partitioned, 357 Threshold 358 }; 359 360 /** IQ sharing policy for SMT. */ 361 IQPolicy iqPolicy; 362 363 /** Number of Total Threads*/ 364 unsigned numThreads; 365 366 /** Pointer to list of active threads. */ 367 std::list<unsigned> *activeThreads; 368 369 /** Per Thread IQ count */ 370 unsigned count[Impl::MaxThreads]; 371 372 /** Max IQ Entries Per Thread */ 373 unsigned maxEntries[Impl::MaxThreads]; 374 375 /** Number of free IQ entries left. */ 376 unsigned freeEntries; 377 378 /** The number of entries in the instruction queue. */ 379 unsigned numEntries; 380 381 /** The total number of instructions that can be issued in one cycle. */ 382 unsigned totalWidth; 383 384 /** The number of physical registers in the CPU. */ 385 unsigned numPhysRegs; 386 387 /** The number of physical integer registers in the CPU. */ 388 unsigned numPhysIntRegs; 389 390 /** The number of floating point registers in the CPU. */ 391 unsigned numPhysFloatRegs; 392 393 /** Delay between commit stage and the IQ. 394 * @todo: Make there be a distinction between the delays within IEW. 395 */ 396 unsigned commitToIEWDelay; 397 398 bool switchedOut; 399 400 ////////////////////////////////// 401 // Variables needed for squashing 402 ////////////////////////////////// 403 404 /** The sequence number of the squashed instruction. */ 405 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 406 407 /** Iterator that points to the last instruction that has been squashed. 408 * This will not be valid unless the IQ is in the process of squashing. 409 */ 410 ListIt squashIt[Impl::MaxThreads]; 411 412 /////////////////////////////////// 413 // Dependency graph stuff 414 /////////////////////////////////// 415 416 class DependencyEntry 417 { 418 public: 419 DependencyEntry() 420 : inst(NULL), next(NULL) 421 { } 422 423 DynInstPtr inst; 424 //Might want to include data about what arch. register the 425 //dependence is waiting on. 426 DependencyEntry *next; 427 428 //This function, and perhaps this whole class, stand out a little 429 //bit as they don't fit a classification well. I want access 430 //to the underlying structure of the linked list, yet at 431 //the same time it feels like this should be something abstracted 432 //away. So for now it will sit here, within the IQ, until 433 //a better implementation is decided upon. 434 // This function probably shouldn't be within the entry... 435 void insert(DynInstPtr &new_inst); 436 437 void remove(DynInstPtr &inst_to_remove); 438 439 // Debug variable, remove when done testing. 440 static unsigned mem_alloc_counter; 441 }; 442 443 /** Array of linked lists. Each linked list is a list of all the 444 * instructions that depend upon a given register. The actual 445 * register's index is used to index into the graph; ie all 446 * instructions in flight that are dependent upon r34 will be 447 * in the linked list of dependGraph[34]. 448 */ 449 DependencyEntry *dependGraph; 450 451 /** A cache of the recently woken registers. It is 1 if the register 452 * has been woken up recently, and 0 if the register has been added 453 * to the dependency graph and has not yet received its value. It 454 * is basically a secondary scoreboard, and should pretty much mirror 455 * the scoreboard that exists in the rename map. 456 */ 457 std::vector<bool> regScoreboard; 458 459 /** Adds an instruction to the dependency graph, as a producer. */ 460 bool addToDependents(DynInstPtr &new_inst); 461 462 /** Adds an instruction to the dependency graph, as a consumer. */ 463 void createDependency(DynInstPtr &new_inst); 464 465 /** Moves an instruction to the ready queue if it is ready. */ 466 void addIfReady(DynInstPtr &inst); 467 468 /** Debugging function to count how many entries are in the IQ. It does 469 * a linear walk through the instructions, so do not call this function 470 * during normal execution. 471 */ 472 int countInsts(); 473 474 /** Debugging function to dump out the dependency graph. 475 */ 476 void dumpDependGraph(); 477 478 /** Debugging function to dump all the list sizes, as well as print 479 * out the list of nonspeculative instructions. Should not be used 480 * in any other capacity, but it has no harmful sideaffects. 481 */ 482 void dumpLists(); 483 484 /** Debugging function to dump out all instructions that are in the 485 * IQ. 486 */ 487 void dumpInsts(); 488 489 /** Stat for number of instructions added. */ 490 Stats::Scalar<> iqInstsAdded; 491 /** Stat for number of non-speculative instructions added. */ 492 Stats::Scalar<> iqNonSpecInstsAdded; 493// Stats::Scalar<> iqIntInstsAdded; 494 Stats::Scalar<> iqInstsIssued; 495 /** Stat for number of integer instructions issued. */ 496 Stats::Scalar<> iqIntInstsIssued; 497// Stats::Scalar<> iqFloatInstsAdded; 498 /** Stat for number of floating point instructions issued. */ 499 Stats::Scalar<> iqFloatInstsIssued; 500// Stats::Scalar<> iqBranchInstsAdded; 501 /** Stat for number of branch instructions issued. */ 502 Stats::Scalar<> iqBranchInstsIssued; 503// Stats::Scalar<> iqMemInstsAdded; 504 /** Stat for number of memory instructions issued. */ 505 Stats::Scalar<> iqMemInstsIssued; 506// Stats::Scalar<> iqMiscInstsAdded; 507 /** Stat for number of miscellaneous instructions issued. */ 508 Stats::Scalar<> iqMiscInstsIssued; 509 /** Stat for number of squashed instructions that were ready to issue. */ 510 Stats::Scalar<> iqSquashedInstsIssued; 511 /** Stat for number of squashed instructions examined when squashing. */ 512 Stats::Scalar<> iqSquashedInstsExamined; 513 /** Stat for number of squashed instruction operands examined when 514 * squashing. 515 */ 516 Stats::Scalar<> iqSquashedOperandsExamined; 517 /** Stat for number of non-speculative instructions removed due to a squash. 518 */ 519 Stats::Scalar<> iqSquashedNonSpecRemoved; 520 521 Stats::VectorDistribution<> queue_res_dist; 522 Stats::Distribution<> n_issued_dist; 523 Stats::VectorDistribution<> issue_delay_dist; 524 525 Stats::Vector<> stat_fu_busy; 526// Stats::Vector<> dist_unissued; 527 Stats::Vector2d<> stat_issued_inst_type; 528 529 Stats::Formula issue_rate; 530// Stats::Formula issue_stores; 531// Stats::Formula issue_op_rate; 532 Stats::Vector<> fu_busy; //cumulative fu busy 533 534 Stats::Formula fu_busy_rate; 535}; 536 537#endif //__CPU_O3_INST_QUEUE_HH__ 538