inst_queue.hh revision 2831:0a42b294727c
17760SGiacomo.Gabrielli@arm.com/* 27760SGiacomo.Gabrielli@arm.com * Copyright (c) 2004-2006 The Regents of The University of Michigan 37760SGiacomo.Gabrielli@arm.com * All rights reserved. 47760SGiacomo.Gabrielli@arm.com * 57760SGiacomo.Gabrielli@arm.com * Redistribution and use in source and binary forms, with or without 67760SGiacomo.Gabrielli@arm.com * modification, are permitted provided that the following conditions are 77760SGiacomo.Gabrielli@arm.com * met: redistributions of source code must retain the above copyright 87760SGiacomo.Gabrielli@arm.com * notice, this list of conditions and the following disclaimer; 97760SGiacomo.Gabrielli@arm.com * redistributions in binary form must reproduce the above copyright 107760SGiacomo.Gabrielli@arm.com * notice, this list of conditions and the following disclaimer in the 117760SGiacomo.Gabrielli@arm.com * documentation and/or other materials provided with the distribution; 127760SGiacomo.Gabrielli@arm.com * neither the name of the copyright holders nor the names of its 134486Sbinkertn@umich.edu * contributors may be used to endorse or promote products derived from 144486Sbinkertn@umich.edu * this software without specific prior written permission. 154486Sbinkertn@umich.edu * 164486Sbinkertn@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 174486Sbinkertn@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 184486Sbinkertn@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 194486Sbinkertn@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 204486Sbinkertn@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 214486Sbinkertn@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 224486Sbinkertn@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 234486Sbinkertn@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 244486Sbinkertn@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 254486Sbinkertn@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 264486Sbinkertn@umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 274486Sbinkertn@umich.edu * 284486Sbinkertn@umich.edu * Authors: Kevin Lim 294486Sbinkertn@umich.edu * Korey Sewell 304486Sbinkertn@umich.edu */ 314486Sbinkertn@umich.edu 324486Sbinkertn@umich.edu#ifndef __CPU_O3_INST_QUEUE_HH__ 334486Sbinkertn@umich.edu#define __CPU_O3_INST_QUEUE_HH__ 344486Sbinkertn@umich.edu 354486Sbinkertn@umich.edu#include <list> 364486Sbinkertn@umich.edu#include <map> 374486Sbinkertn@umich.edu#include <queue> 384486Sbinkertn@umich.edu#include <vector> 394486Sbinkertn@umich.edu 404486Sbinkertn@umich.edu#include "base/statistics.hh" 413102SN/A#include "base/timebuf.hh" 423102SN/A#include "cpu/inst_seq.hh" 432736SN/A#include "cpu/o3/dep_graph.hh" 444556Sbinkertn@umich.edu#include "cpu/op_class.hh" 454556Sbinkertn@umich.edu#include "sim/host.hh" 462736SN/A 477760SGiacomo.Gabrielli@arm.comclass FUPool; 487760SGiacomo.Gabrielli@arm.comclass MemInterface; 497760SGiacomo.Gabrielli@arm.com 507760SGiacomo.Gabrielli@arm.com/** 517760SGiacomo.Gabrielli@arm.com * A standard instruction queue class. It holds ready instructions, in 522736SN/A * order, in seperate priority queues to facilitate the scheduling of 532736SN/A * instructions. The IQ uses a separate linked list to track dependencies. 542736SN/A * Similar to the rename map and the free list, it expects that 552736SN/A * floating point registers have their indices start after the integer 562736SN/A * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 574556Sbinkertn@umich.edu * and 96-191 are fp). This remains true even for both logical and 582736SN/A * physical register indices. The IQ depends on the memory dependence unit to 592736SN/A * track when memory operations are ready in terms of ordering; register 602736SN/A * dependencies are tracked normally. Right now the IQ also handles the 612736SN/A * execution timing; this is mainly to allow back-to-back scheduling without 622736SN/A * requiring IEW to be able to peek into the IQ. At the end of the execution 632736SN/A * latency, the instruction is put into the queue to execute, where it will 64 * have the execute() function called on it. 65 * @todo: Make IQ able to handle multiple FU pools. 66 */ 67template <class Impl> 68class InstructionQueue 69{ 70 public: 71 //Typedefs from the Impl. 72 typedef typename Impl::O3CPU O3CPU; 73 typedef typename Impl::DynInstPtr DynInstPtr; 74 typedef typename Impl::Params Params; 75 76 typedef typename Impl::CPUPol::IEW IEW; 77 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 78 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 79 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 80 81 // Typedef of iterator through the list of instructions. 82 typedef typename std::list<DynInstPtr>::iterator ListIt; 83 84 friend class Impl::O3CPU; 85 86 /** FU completion event class. */ 87 class FUCompletion : public Event { 88 private: 89 /** Executing instruction. */ 90 DynInstPtr inst; 91 92 /** Index of the FU used for executing. */ 93 int fuIdx; 94 95 /** Pointer back to the instruction queue. */ 96 InstructionQueue<Impl> *iqPtr; 97 98 /** Should the FU be added to the list to be freed upon 99 * completing this event. 100 */ 101 bool freeFU; 102 103 public: 104 /** Construct a FU completion event. */ 105 FUCompletion(DynInstPtr &_inst, int fu_idx, 106 InstructionQueue<Impl> *iq_ptr); 107 108 virtual void process(); 109 virtual const char *description(); 110 void setFreeFU() { freeFU = true; } 111 }; 112 113 /** Constructs an IQ. */ 114 InstructionQueue(Params *params); 115 116 /** Destructs the IQ. */ 117 ~InstructionQueue(); 118 119 /** Returns the name of the IQ. */ 120 std::string name() const; 121 122 /** Registers statistics. */ 123 void regStats(); 124 125 /** Resets all instruction queue state. */ 126 void resetState(); 127 128 /** Sets CPU pointer. */ 129 void setCPU(O3CPU *_cpu) { cpu = _cpu; } 130 131 /** Sets active threads list. */ 132 void setActiveThreads(std::list<unsigned> *at_ptr); 133 134 /** Sets the IEW pointer. */ 135 void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } 136 137 /** Sets the timer buffer between issue and execute. */ 138 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 139 140 /** Sets the global time buffer. */ 141 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 142 143 /** Switches out the instruction queue. */ 144 void switchOut(); 145 146 /** Takes over execution from another CPU's thread. */ 147 void takeOverFrom(); 148 149 /** Returns if the IQ is switched out. */ 150 bool isSwitchedOut() { return switchedOut; } 151 152 /** Number of entries needed for given amount of threads. */ 153 int entryAmount(int num_threads); 154 155 /** Resets max entries for all threads. */ 156 void resetEntries(); 157 158 /** Returns total number of free entries. */ 159 unsigned numFreeEntries(); 160 161 /** Returns number of free entries for a thread. */ 162 unsigned numFreeEntries(unsigned tid); 163 164 /** Returns whether or not the IQ is full. */ 165 bool isFull(); 166 167 /** Returns whether or not the IQ is full for a specific thread. */ 168 bool isFull(unsigned tid); 169 170 /** Returns if there are any ready instructions in the IQ. */ 171 bool hasReadyInsts(); 172 173 /** Inserts a new instruction into the IQ. */ 174 void insert(DynInstPtr &new_inst); 175 176 /** Inserts a new, non-speculative instruction into the IQ. */ 177 void insertNonSpec(DynInstPtr &new_inst); 178 179 /** Inserts a memory or write barrier into the IQ to make sure 180 * loads and stores are ordered properly. 181 */ 182 void insertBarrier(DynInstPtr &barr_inst); 183 184 /** Returns the oldest scheduled instruction, and removes it from 185 * the list of instructions waiting to execute. 186 */ 187 DynInstPtr getInstToExecute(); 188 189 /** 190 * Records the instruction as the producer of a register without 191 * adding it to the rest of the IQ. 192 */ 193 void recordProducer(DynInstPtr &inst) 194 { addToProducers(inst); } 195 196 /** Process FU completion event. */ 197 void processFUCompletion(DynInstPtr &inst, int fu_idx); 198 199 /** 200 * Schedules ready instructions, adding the ready ones (oldest first) to 201 * the queue to execute. 202 */ 203 void scheduleReadyInsts(); 204 205 /** Schedules a single specific non-speculative instruction. */ 206 void scheduleNonSpec(const InstSeqNum &inst); 207 208 /** 209 * Commits all instructions up to and including the given sequence number, 210 * for a specific thread. 211 */ 212 void commit(const InstSeqNum &inst, unsigned tid = 0); 213 214 /** Wakes all dependents of a completed instruction. */ 215 int wakeDependents(DynInstPtr &completed_inst); 216 217 /** Adds a ready memory instruction to the ready list. */ 218 void addReadyMemInst(DynInstPtr &ready_inst); 219 220 /** 221 * Reschedules a memory instruction. It will be ready to issue once 222 * replayMemInst() is called. 223 */ 224 void rescheduleMemInst(DynInstPtr &resched_inst); 225 226 /** Replays a memory instruction. It must be rescheduled first. */ 227 void replayMemInst(DynInstPtr &replay_inst); 228 229 /** Completes a memory operation. */ 230 void completeMemInst(DynInstPtr &completed_inst); 231 232 /** Indicates an ordering violation between a store and a load. */ 233 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 234 235 /** 236 * Squashes instructions for a thread. Squashing information is obtained 237 * from the time buffer. 238 */ 239 void squash(unsigned tid); 240 241 /** Returns the number of used entries for a thread. */ 242 unsigned getCount(unsigned tid) { return count[tid]; }; 243 244 /** Debug function to print all instructions. */ 245 void printInsts(); 246 247 private: 248 /** Does the actual squashing. */ 249 void doSquash(unsigned tid); 250 251 ///////////////////////// 252 // Various pointers 253 ///////////////////////// 254 255 /** Pointer to the CPU. */ 256 O3CPU *cpu; 257 258 /** Cache interface. */ 259 MemInterface *dcacheInterface; 260 261 /** Pointer to IEW stage. */ 262 IEW *iewStage; 263 264 /** The memory dependence unit, which tracks/predicts memory dependences 265 * between instructions. 266 */ 267 MemDepUnit memDepUnit[Impl::MaxThreads]; 268 269 /** The queue to the execute stage. Issued instructions will be written 270 * into it. 271 */ 272 TimeBuffer<IssueStruct> *issueToExecuteQueue; 273 274 /** The backwards time buffer. */ 275 TimeBuffer<TimeStruct> *timeBuffer; 276 277 /** Wire to read information from timebuffer. */ 278 typename TimeBuffer<TimeStruct>::wire fromCommit; 279 280 /** Function unit pool. */ 281 FUPool *fuPool; 282 283 ////////////////////////////////////// 284 // Instruction lists, ready queues, and ordering 285 ////////////////////////////////////// 286 287 /** List of all the instructions in the IQ (some of which may be issued). */ 288 std::list<DynInstPtr> instList[Impl::MaxThreads]; 289 290 /** List of instructions that are ready to be executed. */ 291 std::list<DynInstPtr> instsToExecute; 292 293 /** 294 * Struct for comparing entries to be added to the priority queue. 295 * This gives reverse ordering to the instructions in terms of 296 * sequence numbers: the instructions with smaller sequence 297 * numbers (and hence are older) will be at the top of the 298 * priority queue. 299 */ 300 struct pqCompare { 301 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 302 { 303 return lhs->seqNum > rhs->seqNum; 304 } 305 }; 306 307 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 308 ReadyInstQueue; 309 310 /** List of ready instructions, per op class. They are separated by op 311 * class to allow for easy mapping to FUs. 312 */ 313 ReadyInstQueue readyInsts[Num_OpClasses]; 314 315 /** List of non-speculative instructions that will be scheduled 316 * once the IQ gets a signal from commit. While it's redundant to 317 * have the key be a part of the value (the sequence number is stored 318 * inside of DynInst), when these instructions are woken up only 319 * the sequence number will be available. Thus it is most efficient to be 320 * able to search by the sequence number alone. 321 */ 322 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 323 324 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 325 326 /** Entry for the list age ordering by op class. */ 327 struct ListOrderEntry { 328 OpClass queueType; 329 InstSeqNum oldestInst; 330 }; 331 332 /** List that contains the age order of the oldest instruction of each 333 * ready queue. Used to select the oldest instruction available 334 * among op classes. 335 * @todo: Might be better to just move these entries around instead 336 * of creating new ones every time the position changes due to an 337 * instruction issuing. Not sure std::list supports this. 338 */ 339 std::list<ListOrderEntry> listOrder; 340 341 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 342 343 /** Tracks if each ready queue is on the age order list. */ 344 bool queueOnList[Num_OpClasses]; 345 346 /** Iterators of each ready queue. Points to their spot in the age order 347 * list. 348 */ 349 ListOrderIt readyIt[Num_OpClasses]; 350 351 /** Add an op class to the age order list. */ 352 void addToOrderList(OpClass op_class); 353 354 /** 355 * Called when the oldest instruction has been removed from a ready queue; 356 * this places that ready queue into the proper spot in the age order list. 357 */ 358 void moveToYoungerInst(ListOrderIt age_order_it); 359 360 DependencyGraph<DynInstPtr> dependGraph; 361 362 ////////////////////////////////////// 363 // Various parameters 364 ////////////////////////////////////// 365 366 /** IQ Resource Sharing Policy */ 367 enum IQPolicy { 368 Dynamic, 369 Partitioned, 370 Threshold 371 }; 372 373 /** IQ sharing policy for SMT. */ 374 IQPolicy iqPolicy; 375 376 /** Number of Total Threads*/ 377 unsigned numThreads; 378 379 /** Pointer to list of active threads. */ 380 std::list<unsigned> *activeThreads; 381 382 /** Per Thread IQ count */ 383 unsigned count[Impl::MaxThreads]; 384 385 /** Max IQ Entries Per Thread */ 386 unsigned maxEntries[Impl::MaxThreads]; 387 388 /** Number of free IQ entries left. */ 389 unsigned freeEntries; 390 391 /** The number of entries in the instruction queue. */ 392 unsigned numEntries; 393 394 /** The total number of instructions that can be issued in one cycle. */ 395 unsigned totalWidth; 396 397 /** The number of physical registers in the CPU. */ 398 unsigned numPhysRegs; 399 400 /** The number of physical integer registers in the CPU. */ 401 unsigned numPhysIntRegs; 402 403 /** The number of floating point registers in the CPU. */ 404 unsigned numPhysFloatRegs; 405 406 /** Delay between commit stage and the IQ. 407 * @todo: Make there be a distinction between the delays within IEW. 408 */ 409 unsigned commitToIEWDelay; 410 411 /** Is the IQ switched out. */ 412 bool switchedOut; 413 414 /** The sequence number of the squashed instruction. */ 415 InstSeqNum squashedSeqNum[Impl::MaxThreads]; 416 417 /** A cache of the recently woken registers. It is 1 if the register 418 * has been woken up recently, and 0 if the register has been added 419 * to the dependency graph and has not yet received its value. It 420 * is basically a secondary scoreboard, and should pretty much mirror 421 * the scoreboard that exists in the rename map. 422 */ 423 std::vector<bool> regScoreboard; 424 425 /** Adds an instruction to the dependency graph, as a consumer. */ 426 bool addToDependents(DynInstPtr &new_inst); 427 428 /** Adds an instruction to the dependency graph, as a producer. */ 429 void addToProducers(DynInstPtr &new_inst); 430 431 /** Moves an instruction to the ready queue if it is ready. */ 432 void addIfReady(DynInstPtr &inst); 433 434 /** Debugging function to count how many entries are in the IQ. It does 435 * a linear walk through the instructions, so do not call this function 436 * during normal execution. 437 */ 438 int countInsts(); 439 440 /** Debugging function to dump all the list sizes, as well as print 441 * out the list of nonspeculative instructions. Should not be used 442 * in any other capacity, but it has no harmful sideaffects. 443 */ 444 void dumpLists(); 445 446 /** Debugging function to dump out all instructions that are in the 447 * IQ. 448 */ 449 void dumpInsts(); 450 451 /** Stat for number of instructions added. */ 452 Stats::Scalar<> iqInstsAdded; 453 /** Stat for number of non-speculative instructions added. */ 454 Stats::Scalar<> iqNonSpecInstsAdded; 455 456 Stats::Scalar<> iqInstsIssued; 457 /** Stat for number of integer instructions issued. */ 458 Stats::Scalar<> iqIntInstsIssued; 459 /** Stat for number of floating point instructions issued. */ 460 Stats::Scalar<> iqFloatInstsIssued; 461 /** Stat for number of branch instructions issued. */ 462 Stats::Scalar<> iqBranchInstsIssued; 463 /** Stat for number of memory instructions issued. */ 464 Stats::Scalar<> iqMemInstsIssued; 465 /** Stat for number of miscellaneous instructions issued. */ 466 Stats::Scalar<> iqMiscInstsIssued; 467 /** Stat for number of squashed instructions that were ready to issue. */ 468 Stats::Scalar<> iqSquashedInstsIssued; 469 /** Stat for number of squashed instructions examined when squashing. */ 470 Stats::Scalar<> iqSquashedInstsExamined; 471 /** Stat for number of squashed instruction operands examined when 472 * squashing. 473 */ 474 Stats::Scalar<> iqSquashedOperandsExamined; 475 /** Stat for number of non-speculative instructions removed due to a squash. 476 */ 477 Stats::Scalar<> iqSquashedNonSpecRemoved; 478 // Also include number of instructions rescheduled and replayed. 479 480 /** Distribution of number of instructions in the queue. 481 * @todo: Need to create struct to track the entry time for each 482 * instruction. */ 483 Stats::VectorDistribution<> queueResDist; 484 /** Distribution of the number of instructions issued. */ 485 Stats::Distribution<> numIssuedDist; 486 /** Distribution of the cycles it takes to issue an instruction. 487 * @todo: Need to create struct to track the ready time for each 488 * instruction. */ 489 Stats::VectorDistribution<> issueDelayDist; 490 491 /** Number of times an instruction could not be issued because a 492 * FU was busy. 493 */ 494 Stats::Vector<> statFuBusy; 495// Stats::Vector<> dist_unissued; 496 /** Stat for total number issued for each instruction type. */ 497 Stats::Vector2d<> statIssuedInstType; 498 499 /** Number of instructions issued per cycle. */ 500 Stats::Formula issueRate; 501 502 /** Number of times the FU was busy. */ 503 Stats::Vector<> fuBusy; 504 /** Number of times the FU was busy per instruction issued. */ 505 Stats::Formula fuBusyRate; 506}; 507 508#endif //__CPU_O3_INST_QUEUE_HH__ 509