inst_queue.hh revision 9444:ab47fe7f03f0
1/* 2 * Copyright (c) 2011-2012 ARM Limited 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2004-2006 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Kevin Lim 41 */ 42 43#ifndef __CPU_O3_INST_QUEUE_HH__ 44#define __CPU_O3_INST_QUEUE_HH__ 45 46#include <list> 47#include <map> 48#include <queue> 49#include <vector> 50 51#include "base/statistics.hh" 52#include "base/types.hh" 53#include "cpu/o3/dep_graph.hh" 54#include "cpu/inst_seq.hh" 55#include "cpu/op_class.hh" 56#include "cpu/timebuf.hh" 57#include "sim/eventq.hh" 58 59struct DerivO3CPUParams; 60class FUPool; 61class MemInterface; 62 63/** 64 * A standard instruction queue class. It holds ready instructions, in 65 * order, in seperate priority queues to facilitate the scheduling of 66 * instructions. The IQ uses a separate linked list to track dependencies. 67 * Similar to the rename map and the free list, it expects that 68 * floating point registers have their indices start after the integer 69 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 70 * and 96-191 are fp). This remains true even for both logical and 71 * physical register indices. The IQ depends on the memory dependence unit to 72 * track when memory operations are ready in terms of ordering; register 73 * dependencies are tracked normally. Right now the IQ also handles the 74 * execution timing; this is mainly to allow back-to-back scheduling without 75 * requiring IEW to be able to peek into the IQ. At the end of the execution 76 * latency, the instruction is put into the queue to execute, where it will 77 * have the execute() function called on it. 78 * @todo: Make IQ able to handle multiple FU pools. 79 */ 80template <class Impl> 81class InstructionQueue 82{ 83 public: 84 //Typedefs from the Impl. 85 typedef typename Impl::O3CPU O3CPU; 86 typedef typename Impl::DynInstPtr DynInstPtr; 87 88 typedef typename Impl::CPUPol::IEW IEW; 89 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 90 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 91 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 92 93 // Typedef of iterator through the list of instructions. 94 typedef typename std::list<DynInstPtr>::iterator ListIt; 95 96 /** FU completion event class. */ 97 class FUCompletion : public Event { 98 private: 99 /** Executing instruction. */ 100 DynInstPtr inst; 101 102 /** Index of the FU used for executing. */ 103 int fuIdx; 104 105 /** Pointer back to the instruction queue. */ 106 InstructionQueue<Impl> *iqPtr; 107 108 /** Should the FU be added to the list to be freed upon 109 * completing this event. 110 */ 111 bool freeFU; 112 113 public: 114 /** Construct a FU completion event. */ 115 FUCompletion(DynInstPtr &_inst, int fu_idx, 116 InstructionQueue<Impl> *iq_ptr); 117 118 virtual void process(); 119 virtual const char *description() const; 120 void setFreeFU() { freeFU = true; } 121 }; 122 123 /** Constructs an IQ. */ 124 InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params); 125 126 /** Destructs the IQ. */ 127 ~InstructionQueue(); 128 129 /** Returns the name of the IQ. */ 130 std::string name() const; 131 132 /** Registers statistics. */ 133 void regStats(); 134 135 /** Resets all instruction queue state. */ 136 void resetState(); 137 138 /** Sets active threads list. */ 139 void setActiveThreads(std::list<ThreadID> *at_ptr); 140 141 /** Sets the timer buffer between issue and execute. */ 142 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 143 144 /** Sets the global time buffer. */ 145 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 146 147 /** Perform sanity checks after a drain. */ 148 void drainSanityCheck() const; 149 150 /** Takes over execution from another CPU's thread. */ 151 void takeOverFrom(); 152 153 /** Number of entries needed for given amount of threads. */ 154 int entryAmount(ThreadID num_threads); 155 156 /** Resets max entries for all threads. */ 157 void resetEntries(); 158 159 /** Returns total number of free entries. */ 160 unsigned numFreeEntries(); 161 162 /** Returns number of free entries for a thread. */ 163 unsigned numFreeEntries(ThreadID tid); 164 165 /** Returns whether or not the IQ is full. */ 166 bool isFull(); 167 168 /** Returns whether or not the IQ is full for a specific thread. */ 169 bool isFull(ThreadID tid); 170 171 /** Returns if there are any ready instructions in the IQ. */ 172 bool hasReadyInsts(); 173 174 /** Inserts a new instruction into the IQ. */ 175 void insert(DynInstPtr &new_inst); 176 177 /** Inserts a new, non-speculative instruction into the IQ. */ 178 void insertNonSpec(DynInstPtr &new_inst); 179 180 /** Inserts a memory or write barrier into the IQ to make sure 181 * loads and stores are ordered properly. 182 */ 183 void insertBarrier(DynInstPtr &barr_inst); 184 185 /** Returns the oldest scheduled instruction, and removes it from 186 * the list of instructions waiting to execute. 187 */ 188 DynInstPtr getInstToExecute(); 189 190 /** Returns a memory instruction that was referred due to a delayed DTB 191 * translation if it is now ready to execute. 192 */ 193 DynInstPtr getDeferredMemInstToExecute(); 194 195 /** 196 * Records the instruction as the producer of a register without 197 * adding it to the rest of the IQ. 198 */ 199 void recordProducer(DynInstPtr &inst) 200 { addToProducers(inst); } 201 202 /** Process FU completion event. */ 203 void processFUCompletion(DynInstPtr &inst, int fu_idx); 204 205 /** 206 * Schedules ready instructions, adding the ready ones (oldest first) to 207 * the queue to execute. 208 */ 209 void scheduleReadyInsts(); 210 211 /** Schedules a single specific non-speculative instruction. */ 212 void scheduleNonSpec(const InstSeqNum &inst); 213 214 /** 215 * Commits all instructions up to and including the given sequence number, 216 * for a specific thread. 217 */ 218 void commit(const InstSeqNum &inst, ThreadID tid = 0); 219 220 /** Wakes all dependents of a completed instruction. */ 221 int wakeDependents(DynInstPtr &completed_inst); 222 223 /** Adds a ready memory instruction to the ready list. */ 224 void addReadyMemInst(DynInstPtr &ready_inst); 225 226 /** 227 * Reschedules a memory instruction. It will be ready to issue once 228 * replayMemInst() is called. 229 */ 230 void rescheduleMemInst(DynInstPtr &resched_inst); 231 232 /** Replays a memory instruction. It must be rescheduled first. */ 233 void replayMemInst(DynInstPtr &replay_inst); 234 235 /** Completes a memory operation. */ 236 void completeMemInst(DynInstPtr &completed_inst); 237 238 /** 239 * Defers a memory instruction when its DTB translation incurs a hw 240 * page table walk. 241 */ 242 void deferMemInst(DynInstPtr &deferred_inst); 243 244 /** Indicates an ordering violation between a store and a load. */ 245 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 246 247 /** 248 * Squashes instructions for a thread. Squashing information is obtained 249 * from the time buffer. 250 */ 251 void squash(ThreadID tid); 252 253 /** Returns the number of used entries for a thread. */ 254 unsigned getCount(ThreadID tid) { return count[tid]; }; 255 256 /** Debug function to print all instructions. */ 257 void printInsts(); 258 259 private: 260 /** Does the actual squashing. */ 261 void doSquash(ThreadID tid); 262 263 ///////////////////////// 264 // Various pointers 265 ///////////////////////// 266 267 /** Pointer to the CPU. */ 268 O3CPU *cpu; 269 270 /** Cache interface. */ 271 MemInterface *dcacheInterface; 272 273 /** Pointer to IEW stage. */ 274 IEW *iewStage; 275 276 /** The memory dependence unit, which tracks/predicts memory dependences 277 * between instructions. 278 */ 279 MemDepUnit memDepUnit[Impl::MaxThreads]; 280 281 /** The queue to the execute stage. Issued instructions will be written 282 * into it. 283 */ 284 TimeBuffer<IssueStruct> *issueToExecuteQueue; 285 286 /** The backwards time buffer. */ 287 TimeBuffer<TimeStruct> *timeBuffer; 288 289 /** Wire to read information from timebuffer. */ 290 typename TimeBuffer<TimeStruct>::wire fromCommit; 291 292 /** Function unit pool. */ 293 FUPool *fuPool; 294 295 ////////////////////////////////////// 296 // Instruction lists, ready queues, and ordering 297 ////////////////////////////////////// 298 299 /** List of all the instructions in the IQ (some of which may be issued). */ 300 std::list<DynInstPtr> instList[Impl::MaxThreads]; 301 302 /** List of instructions that are ready to be executed. */ 303 std::list<DynInstPtr> instsToExecute; 304 305 /** List of instructions waiting for their DTB translation to 306 * complete (hw page table walk in progress). 307 */ 308 std::list<DynInstPtr> deferredMemInsts; 309 310 /** 311 * Struct for comparing entries to be added to the priority queue. 312 * This gives reverse ordering to the instructions in terms of 313 * sequence numbers: the instructions with smaller sequence 314 * numbers (and hence are older) will be at the top of the 315 * priority queue. 316 */ 317 struct pqCompare { 318 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 319 { 320 return lhs->seqNum > rhs->seqNum; 321 } 322 }; 323 324 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 325 ReadyInstQueue; 326 327 /** List of ready instructions, per op class. They are separated by op 328 * class to allow for easy mapping to FUs. 329 */ 330 ReadyInstQueue readyInsts[Num_OpClasses]; 331 332 /** List of non-speculative instructions that will be scheduled 333 * once the IQ gets a signal from commit. While it's redundant to 334 * have the key be a part of the value (the sequence number is stored 335 * inside of DynInst), when these instructions are woken up only 336 * the sequence number will be available. Thus it is most efficient to be 337 * able to search by the sequence number alone. 338 */ 339 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 340 341 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 342 343 /** Entry for the list age ordering by op class. */ 344 struct ListOrderEntry { 345 OpClass queueType; 346 InstSeqNum oldestInst; 347 }; 348 349 /** List that contains the age order of the oldest instruction of each 350 * ready queue. Used to select the oldest instruction available 351 * among op classes. 352 * @todo: Might be better to just move these entries around instead 353 * of creating new ones every time the position changes due to an 354 * instruction issuing. Not sure std::list supports this. 355 */ 356 std::list<ListOrderEntry> listOrder; 357 358 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 359 360 /** Tracks if each ready queue is on the age order list. */ 361 bool queueOnList[Num_OpClasses]; 362 363 /** Iterators of each ready queue. Points to their spot in the age order 364 * list. 365 */ 366 ListOrderIt readyIt[Num_OpClasses]; 367 368 /** Add an op class to the age order list. */ 369 void addToOrderList(OpClass op_class); 370 371 /** 372 * Called when the oldest instruction has been removed from a ready queue; 373 * this places that ready queue into the proper spot in the age order list. 374 */ 375 void moveToYoungerInst(ListOrderIt age_order_it); 376 377 DependencyGraph<DynInstPtr> dependGraph; 378 379 ////////////////////////////////////// 380 // Various parameters 381 ////////////////////////////////////// 382 383 /** IQ Resource Sharing Policy */ 384 enum IQPolicy { 385 Dynamic, 386 Partitioned, 387 Threshold 388 }; 389 390 /** IQ sharing policy for SMT. */ 391 IQPolicy iqPolicy; 392 393 /** Number of Total Threads*/ 394 ThreadID numThreads; 395 396 /** Pointer to list of active threads. */ 397 std::list<ThreadID> *activeThreads; 398 399 /** Per Thread IQ count */ 400 unsigned count[Impl::MaxThreads]; 401 402 /** Max IQ Entries Per Thread */ 403 unsigned maxEntries[Impl::MaxThreads]; 404 405 /** Number of free IQ entries left. */ 406 unsigned freeEntries; 407 408 /** The number of entries in the instruction queue. */ 409 unsigned numEntries; 410 411 /** The total number of instructions that can be issued in one cycle. */ 412 unsigned totalWidth; 413 414 /** The number of physical registers in the CPU. */ 415 unsigned numPhysRegs; 416 417 /** The number of physical integer registers in the CPU. */ 418 unsigned numPhysIntRegs; 419 420 /** The number of floating point registers in the CPU. */ 421 unsigned numPhysFloatRegs; 422 423 /** Delay between commit stage and the IQ. 424 * @todo: Make there be a distinction between the delays within IEW. 425 */ 426 Cycles commitToIEWDelay; 427 428 /** The sequence number of the squashed instruction. */ 429 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 430 431 /** A cache of the recently woken registers. It is 1 if the register 432 * has been woken up recently, and 0 if the register has been added 433 * to the dependency graph and has not yet received its value. It 434 * is basically a secondary scoreboard, and should pretty much mirror 435 * the scoreboard that exists in the rename map. 436 */ 437 std::vector<bool> regScoreboard; 438 439 /** Adds an instruction to the dependency graph, as a consumer. */ 440 bool addToDependents(DynInstPtr &new_inst); 441 442 /** Adds an instruction to the dependency graph, as a producer. */ 443 void addToProducers(DynInstPtr &new_inst); 444 445 /** Moves an instruction to the ready queue if it is ready. */ 446 void addIfReady(DynInstPtr &inst); 447 448 /** Debugging function to count how many entries are in the IQ. It does 449 * a linear walk through the instructions, so do not call this function 450 * during normal execution. 451 */ 452 int countInsts(); 453 454 /** Debugging function to dump all the list sizes, as well as print 455 * out the list of nonspeculative instructions. Should not be used 456 * in any other capacity, but it has no harmful sideaffects. 457 */ 458 void dumpLists(); 459 460 /** Debugging function to dump out all instructions that are in the 461 * IQ. 462 */ 463 void dumpInsts(); 464 465 /** Stat for number of instructions added. */ 466 Stats::Scalar iqInstsAdded; 467 /** Stat for number of non-speculative instructions added. */ 468 Stats::Scalar iqNonSpecInstsAdded; 469 470 Stats::Scalar iqInstsIssued; 471 /** Stat for number of integer instructions issued. */ 472 Stats::Scalar iqIntInstsIssued; 473 /** Stat for number of floating point instructions issued. */ 474 Stats::Scalar iqFloatInstsIssued; 475 /** Stat for number of branch instructions issued. */ 476 Stats::Scalar iqBranchInstsIssued; 477 /** Stat for number of memory instructions issued. */ 478 Stats::Scalar iqMemInstsIssued; 479 /** Stat for number of miscellaneous instructions issued. */ 480 Stats::Scalar iqMiscInstsIssued; 481 /** Stat for number of squashed instructions that were ready to issue. */ 482 Stats::Scalar iqSquashedInstsIssued; 483 /** Stat for number of squashed instructions examined when squashing. */ 484 Stats::Scalar iqSquashedInstsExamined; 485 /** Stat for number of squashed instruction operands examined when 486 * squashing. 487 */ 488 Stats::Scalar iqSquashedOperandsExamined; 489 /** Stat for number of non-speculative instructions removed due to a squash. 490 */ 491 Stats::Scalar iqSquashedNonSpecRemoved; 492 // Also include number of instructions rescheduled and replayed. 493 494 /** Distribution of number of instructions in the queue. 495 * @todo: Need to create struct to track the entry time for each 496 * instruction. */ 497// Stats::VectorDistribution queueResDist; 498 /** Distribution of the number of instructions issued. */ 499 Stats::Distribution numIssuedDist; 500 /** Distribution of the cycles it takes to issue an instruction. 501 * @todo: Need to create struct to track the ready time for each 502 * instruction. */ 503// Stats::VectorDistribution issueDelayDist; 504 505 /** Number of times an instruction could not be issued because a 506 * FU was busy. 507 */ 508 Stats::Vector statFuBusy; 509// Stats::Vector dist_unissued; 510 /** Stat for total number issued for each instruction type. */ 511 Stats::Vector2d statIssuedInstType; 512 513 /** Number of instructions issued per cycle. */ 514 Stats::Formula issueRate; 515 516 /** Number of times the FU was busy. */ 517 Stats::Vector fuBusy; 518 /** Number of times the FU was busy per instruction issued. */ 519 Stats::Formula fuBusyRate; 520 public: 521 Stats::Scalar intInstQueueReads; 522 Stats::Scalar intInstQueueWrites; 523 Stats::Scalar intInstQueueWakeupAccesses; 524 Stats::Scalar fpInstQueueReads; 525 Stats::Scalar fpInstQueueWrites; 526 Stats::Scalar fpInstQueueWakeupQccesses; 527 528 Stats::Scalar intAluAccesses; 529 Stats::Scalar fpAluAccesses; 530}; 531 532#endif //__CPU_O3_INST_QUEUE_HH__ 533