inst_queue.hh revision 2669
11689SN/A/* 22326SN/A * Copyright (c) 2004-2006 The Regents of The University of Michigan 31689SN/A * All rights reserved. 41689SN/A * 51689SN/A * Redistribution and use in source and binary forms, with or without 61689SN/A * modification, are permitted provided that the following conditions are 71689SN/A * met: redistributions of source code must retain the above copyright 81689SN/A * notice, this list of conditions and the following disclaimer; 91689SN/A * redistributions in binary form must reproduce the above copyright 101689SN/A * notice, this list of conditions and the following disclaimer in the 111689SN/A * documentation and/or other materials provided with the distribution; 121689SN/A * neither the name of the copyright holders nor the names of its 131689SN/A * contributors may be used to endorse or promote products derived from 141689SN/A * this software without specific prior written permission. 151689SN/A * 161689SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 171689SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 181689SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 191689SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 201689SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 211689SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 221689SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 231689SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 241689SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 251689SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 261689SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 271689SN/A */ 281689SN/A 292292SN/A#ifndef __CPU_O3_INST_QUEUE_HH__ 302292SN/A#define __CPU_O3_INST_QUEUE_HH__ 311060SN/A 321060SN/A#include <list> 331061SN/A#include <map> 341060SN/A#include <queue> 351061SN/A#include <vector> 361060SN/A 371062SN/A#include "base/statistics.hh" 381060SN/A#include "base/timebuf.hh" 391061SN/A#include "cpu/inst_seq.hh" 402326SN/A#include "cpu/o3/dep_graph.hh" 412669Sktlim@umich.edu#include "cpu/op_class.hh" 421710SN/A#include "sim/host.hh" 431060SN/A 442292SN/Aclass FUPool; 452292SN/Aclass MemInterface; 462292SN/A 471060SN/A/** 481689SN/A * A standard instruction queue class. It holds ready instructions, in 491689SN/A * order, in seperate priority queues to facilitate the scheduling of 501689SN/A * instructions. The IQ uses a separate linked list to track dependencies. 511689SN/A * Similar to the rename map and the free list, it expects that 521060SN/A * floating point registers have their indices start after the integer 531060SN/A * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 541060SN/A * and 96-191 are fp). This remains true even for both logical and 552292SN/A * physical register indices. The IQ depends on the memory dependence unit to 562292SN/A * track when memory operations are ready in terms of ordering; register 572292SN/A * dependencies are tracked normally. Right now the IQ also handles the 582292SN/A * execution timing; this is mainly to allow back-to-back scheduling without 592292SN/A * requiring IEW to be able to peek into the IQ. At the end of the execution 602292SN/A * latency, the instruction is put into the queue to execute, where it will 612292SN/A * have the execute() function called on it. 622292SN/A * @todo: Make IQ able to handle multiple FU pools. 631060SN/A */ 641061SN/Atemplate <class Impl> 651060SN/Aclass InstructionQueue 661060SN/A{ 671060SN/A public: 681060SN/A //Typedefs from the Impl. 691060SN/A typedef typename Impl::FullCPU FullCPU; 701061SN/A typedef typename Impl::DynInstPtr DynInstPtr; 711060SN/A typedef typename Impl::Params Params; 721060SN/A 732292SN/A typedef typename Impl::CPUPol::IEW IEW; 741061SN/A typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 751061SN/A typedef typename Impl::CPUPol::IssueStruct IssueStruct; 761061SN/A typedef typename Impl::CPUPol::TimeStruct TimeStruct; 771060SN/A 782292SN/A // Typedef of iterator through the list of instructions. 791061SN/A typedef typename std::list<DynInstPtr>::iterator ListIt; 801060SN/A 812292SN/A friend class Impl::FullCPU; 822292SN/A 832292SN/A /** FU completion event class. */ 842292SN/A class FUCompletion : public Event { 852292SN/A private: 862292SN/A /** Executing instruction. */ 872292SN/A DynInstPtr inst; 882292SN/A 892292SN/A /** Index of the FU used for executing. */ 902292SN/A int fuIdx; 912292SN/A 922292SN/A /** Pointer back to the instruction queue. */ 932292SN/A InstructionQueue<Impl> *iqPtr; 942292SN/A 952326SN/A bool freeFU; 962326SN/A 972292SN/A public: 982292SN/A /** Construct a FU completion event. */ 992292SN/A FUCompletion(DynInstPtr &_inst, int fu_idx, 1002292SN/A InstructionQueue<Impl> *iq_ptr); 1012292SN/A 1022292SN/A virtual void process(); 1032292SN/A virtual const char *description(); 1042326SN/A void setFreeFU() { freeFU = true; } 1051060SN/A }; 1061060SN/A 1072292SN/A /** Constructs an IQ. */ 1082292SN/A InstructionQueue(Params *params); 1091061SN/A 1102292SN/A /** Destructs the IQ. */ 1112292SN/A ~InstructionQueue(); 1121061SN/A 1132292SN/A /** Returns the name of the IQ. */ 1142292SN/A std::string name() const; 1151060SN/A 1162292SN/A /** Registers statistics. */ 1171062SN/A void regStats(); 1181062SN/A 1192307SN/A void resetState(); 1201060SN/A 1212292SN/A /** Sets CPU pointer. */ 1222292SN/A void setCPU(FullCPU *_cpu) { cpu = _cpu; } 1231755SN/A 1242292SN/A /** Sets active threads list. */ 1252292SN/A void setActiveThreads(std::list<unsigned> *at_ptr); 1262292SN/A 1272292SN/A /** Sets the IEW pointer. */ 1282292SN/A void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } 1292292SN/A 1302292SN/A /** Sets the timer buffer between issue and execute. */ 1311060SN/A void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 1321060SN/A 1332292SN/A /** Sets the global time buffer. */ 1341060SN/A void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 1351060SN/A 1362307SN/A void switchOut(); 1372307SN/A 1382307SN/A void takeOverFrom(); 1392307SN/A 1402307SN/A bool isSwitchedOut() { return switchedOut; } 1412307SN/A 1422292SN/A /** Number of entries needed for given amount of threads. */ 1432292SN/A int entryAmount(int num_threads); 1442292SN/A 1452292SN/A /** Resets max entries for all threads. */ 1462292SN/A void resetEntries(); 1472292SN/A 1482292SN/A /** Returns total number of free entries. */ 1491060SN/A unsigned numFreeEntries(); 1501060SN/A 1512292SN/A /** Returns number of free entries for a thread. */ 1522292SN/A unsigned numFreeEntries(unsigned tid); 1532292SN/A 1542292SN/A /** Returns whether or not the IQ is full. */ 1551060SN/A bool isFull(); 1561060SN/A 1572292SN/A /** Returns whether or not the IQ is full for a specific thread. */ 1582292SN/A bool isFull(unsigned tid); 1592292SN/A 1602292SN/A /** Returns if there are any ready instructions in the IQ. */ 1612292SN/A bool hasReadyInsts(); 1622292SN/A 1632292SN/A /** Inserts a new instruction into the IQ. */ 1641061SN/A void insert(DynInstPtr &new_inst); 1651060SN/A 1662292SN/A /** Inserts a new, non-speculative instruction into the IQ. */ 1671061SN/A void insertNonSpec(DynInstPtr &new_inst); 1681061SN/A 1692292SN/A /** Inserts a memory or write barrier into the IQ to make sure 1702292SN/A * loads and stores are ordered properly. 1712292SN/A */ 1722292SN/A void insertBarrier(DynInstPtr &barr_inst); 1731060SN/A 1742333SN/A DynInstPtr getInstToExecute(); 1752333SN/A 1762292SN/A /** 1772326SN/A * Records the instruction as the producer of a register without 1782326SN/A * adding it to the rest of the IQ. 1792292SN/A */ 1802326SN/A void recordProducer(DynInstPtr &inst) 1812326SN/A { addToProducers(inst); } 1821755SN/A 1832292SN/A /** Process FU completion event. */ 1842292SN/A void processFUCompletion(DynInstPtr &inst, int fu_idx); 1852292SN/A 1862292SN/A /** 1872292SN/A * Schedules ready instructions, adding the ready ones (oldest first) to 1882292SN/A * the queue to execute. 1892292SN/A */ 1901060SN/A void scheduleReadyInsts(); 1911060SN/A 1922292SN/A /** Schedules a single specific non-speculative instruction. */ 1931061SN/A void scheduleNonSpec(const InstSeqNum &inst); 1941061SN/A 1952292SN/A /** 1962292SN/A * Commits all instructions up to and including the given sequence number, 1972292SN/A * for a specific thread. 1982292SN/A */ 1992292SN/A void commit(const InstSeqNum &inst, unsigned tid = 0); 2001061SN/A 2012292SN/A /** Wakes all dependents of a completed instruction. */ 2022301SN/A int wakeDependents(DynInstPtr &completed_inst); 2031755SN/A 2042292SN/A /** Adds a ready memory instruction to the ready list. */ 2052292SN/A void addReadyMemInst(DynInstPtr &ready_inst); 2062292SN/A 2072292SN/A /** 2082292SN/A * Reschedules a memory instruction. It will be ready to issue once 2092292SN/A * replayMemInst() is called. 2102292SN/A */ 2112292SN/A void rescheduleMemInst(DynInstPtr &resched_inst); 2122292SN/A 2132292SN/A /** Replays a memory instruction. It must be rescheduled first. */ 2142292SN/A void replayMemInst(DynInstPtr &replay_inst); 2152292SN/A 2162292SN/A /** Completes a memory operation. */ 2172292SN/A void completeMemInst(DynInstPtr &completed_inst); 2182292SN/A 2192292SN/A /** Indicates an ordering violation between a store and a load. */ 2201061SN/A void violation(DynInstPtr &store, DynInstPtr &faulting_load); 2211061SN/A 2222292SN/A /** 2232292SN/A * Squashes instructions for a thread. Squashing information is obtained 2242292SN/A * from the time buffer. 2252292SN/A */ 2262292SN/A void squash(unsigned tid); 2271060SN/A 2282292SN/A /** Returns the number of used entries for a thread. */ 2292292SN/A unsigned getCount(unsigned tid) { return count[tid]; }; 2301060SN/A 2312292SN/A /** Debug function to print all instructions. */ 2322292SN/A void printInsts(); 2331060SN/A 2341060SN/A private: 2352292SN/A /** Does the actual squashing. */ 2362292SN/A void doSquash(unsigned tid); 2372292SN/A 2382292SN/A ///////////////////////// 2392292SN/A // Various pointers 2402292SN/A ///////////////////////// 2412292SN/A 2421060SN/A /** Pointer to the CPU. */ 2431060SN/A FullCPU *cpu; 2441060SN/A 2452292SN/A /** Cache interface. */ 2462292SN/A MemInterface *dcacheInterface; 2472292SN/A 2482292SN/A /** Pointer to IEW stage. */ 2492292SN/A IEW *iewStage; 2502292SN/A 2511061SN/A /** The memory dependence unit, which tracks/predicts memory dependences 2521061SN/A * between instructions. 2531061SN/A */ 2542292SN/A MemDepUnit memDepUnit[Impl::MaxThreads]; 2551061SN/A 2561060SN/A /** The queue to the execute stage. Issued instructions will be written 2571060SN/A * into it. 2581060SN/A */ 2591060SN/A TimeBuffer<IssueStruct> *issueToExecuteQueue; 2601060SN/A 2611060SN/A /** The backwards time buffer. */ 2621060SN/A TimeBuffer<TimeStruct> *timeBuffer; 2631060SN/A 2641060SN/A /** Wire to read information from timebuffer. */ 2651060SN/A typename TimeBuffer<TimeStruct>::wire fromCommit; 2661060SN/A 2672292SN/A /** Function unit pool. */ 2682292SN/A FUPool *fuPool; 2692292SN/A 2702292SN/A ////////////////////////////////////// 2712292SN/A // Instruction lists, ready queues, and ordering 2722292SN/A ////////////////////////////////////// 2732292SN/A 2742292SN/A /** List of all the instructions in the IQ (some of which may be issued). */ 2752292SN/A std::list<DynInstPtr> instList[Impl::MaxThreads]; 2762292SN/A 2772333SN/A std::list<DynInstPtr> instsToExecute; 2782333SN/A 2792292SN/A /** 2802292SN/A * Struct for comparing entries to be added to the priority queue. This 2812292SN/A * gives reverse ordering to the instructions in terms of sequence 2822292SN/A * numbers: the instructions with smaller sequence numbers (and hence 2832292SN/A * are older) will be at the top of the priority queue. 2842292SN/A */ 2852292SN/A struct pqCompare { 2862292SN/A bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 2872292SN/A { 2882292SN/A return lhs->seqNum > rhs->seqNum; 2892292SN/A } 2901060SN/A }; 2911060SN/A 2922292SN/A typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> 2932292SN/A ReadyInstQueue; 2941755SN/A 2952292SN/A /** List of ready instructions, per op class. They are separated by op 2962292SN/A * class to allow for easy mapping to FUs. 2971061SN/A */ 2982292SN/A ReadyInstQueue readyInsts[Num_OpClasses]; 2991061SN/A 3001061SN/A /** List of non-speculative instructions that will be scheduled 3011061SN/A * once the IQ gets a signal from commit. While it's redundant to 3021061SN/A * have the key be a part of the value (the sequence number is stored 3031061SN/A * inside of DynInst), when these instructions are woken up only 3041681SN/A * the sequence number will be available. Thus it is most efficient to be 3051061SN/A * able to search by the sequence number alone. 3061061SN/A */ 3071061SN/A std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 3081061SN/A 3092292SN/A typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; 3102292SN/A 3112292SN/A /** Entry for the list age ordering by op class. */ 3122292SN/A struct ListOrderEntry { 3132292SN/A OpClass queueType; 3142292SN/A InstSeqNum oldestInst; 3152292SN/A }; 3162292SN/A 3172292SN/A /** List that contains the age order of the oldest instruction of each 3182292SN/A * ready queue. Used to select the oldest instruction available 3192292SN/A * among op classes. 3202326SN/A * @todo: Might be better to just move these entries around instead 3212326SN/A * of creating new ones every time the position changes due to an 3222326SN/A * instruction issuing. Not sure std::list supports this. 3232292SN/A */ 3242292SN/A std::list<ListOrderEntry> listOrder; 3252292SN/A 3262292SN/A typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; 3272292SN/A 3282292SN/A /** Tracks if each ready queue is on the age order list. */ 3292292SN/A bool queueOnList[Num_OpClasses]; 3302292SN/A 3312292SN/A /** Iterators of each ready queue. Points to their spot in the age order 3322292SN/A * list. 3332292SN/A */ 3342292SN/A ListOrderIt readyIt[Num_OpClasses]; 3352292SN/A 3362292SN/A /** Add an op class to the age order list. */ 3372292SN/A void addToOrderList(OpClass op_class); 3382292SN/A 3392292SN/A /** 3402292SN/A * Called when the oldest instruction has been removed from a ready queue; 3412292SN/A * this places that ready queue into the proper spot in the age order list. 3422292SN/A */ 3432292SN/A void moveToYoungerInst(ListOrderIt age_order_it); 3442292SN/A 3452326SN/A DependencyGraph<DynInstPtr> dependGraph; 3462326SN/A 3472292SN/A ////////////////////////////////////// 3482292SN/A // Various parameters 3492292SN/A ////////////////////////////////////// 3502292SN/A 3512292SN/A /** IQ Resource Sharing Policy */ 3522292SN/A enum IQPolicy { 3532292SN/A Dynamic, 3542292SN/A Partitioned, 3552292SN/A Threshold 3562292SN/A }; 3572292SN/A 3582292SN/A /** IQ sharing policy for SMT. */ 3592292SN/A IQPolicy iqPolicy; 3602292SN/A 3612292SN/A /** Number of Total Threads*/ 3622292SN/A unsigned numThreads; 3632292SN/A 3642292SN/A /** Pointer to list of active threads. */ 3652292SN/A std::list<unsigned> *activeThreads; 3662292SN/A 3672292SN/A /** Per Thread IQ count */ 3682292SN/A unsigned count[Impl::MaxThreads]; 3692292SN/A 3702292SN/A /** Max IQ Entries Per Thread */ 3712292SN/A unsigned maxEntries[Impl::MaxThreads]; 3721060SN/A 3731060SN/A /** Number of free IQ entries left. */ 3741060SN/A unsigned freeEntries; 3751060SN/A 3761060SN/A /** The number of entries in the instruction queue. */ 3771060SN/A unsigned numEntries; 3781060SN/A 3791060SN/A /** The total number of instructions that can be issued in one cycle. */ 3801060SN/A unsigned totalWidth; 3811060SN/A 3822292SN/A /** The number of physical registers in the CPU. */ 3831060SN/A unsigned numPhysRegs; 3841060SN/A 3851060SN/A /** The number of physical integer registers in the CPU. */ 3861060SN/A unsigned numPhysIntRegs; 3871060SN/A 3881060SN/A /** The number of floating point registers in the CPU. */ 3891060SN/A unsigned numPhysFloatRegs; 3901060SN/A 3911060SN/A /** Delay between commit stage and the IQ. 3921060SN/A * @todo: Make there be a distinction between the delays within IEW. 3931060SN/A */ 3941060SN/A unsigned commitToIEWDelay; 3951060SN/A 3962307SN/A bool switchedOut; 3971060SN/A 3981060SN/A /** The sequence number of the squashed instruction. */ 3992292SN/A InstSeqNum squashedSeqNum[Impl::MaxThreads]; 4001060SN/A 4011060SN/A /** A cache of the recently woken registers. It is 1 if the register 4021060SN/A * has been woken up recently, and 0 if the register has been added 4031060SN/A * to the dependency graph and has not yet received its value. It 4041060SN/A * is basically a secondary scoreboard, and should pretty much mirror 4051060SN/A * the scoreboard that exists in the rename map. 4061060SN/A */ 4072292SN/A std::vector<bool> regScoreboard; 4081060SN/A 4092326SN/A /** Adds an instruction to the dependency graph, as a consumer. */ 4101061SN/A bool addToDependents(DynInstPtr &new_inst); 4111684SN/A 4122326SN/A /** Adds an instruction to the dependency graph, as a producer. */ 4132326SN/A void addToProducers(DynInstPtr &new_inst); 4141755SN/A 4152292SN/A /** Moves an instruction to the ready queue if it is ready. */ 4161684SN/A void addIfReady(DynInstPtr &inst); 4171684SN/A 4181684SN/A /** Debugging function to count how many entries are in the IQ. It does 4191684SN/A * a linear walk through the instructions, so do not call this function 4201684SN/A * during normal execution. 4211684SN/A */ 4221684SN/A int countInsts(); 4231684SN/A 4241684SN/A /** Debugging function to dump all the list sizes, as well as print 4251684SN/A * out the list of nonspeculative instructions. Should not be used 4261684SN/A * in any other capacity, but it has no harmful sideaffects. 4271684SN/A */ 4281684SN/A void dumpLists(); 4291062SN/A 4302292SN/A /** Debugging function to dump out all instructions that are in the 4312292SN/A * IQ. 4322292SN/A */ 4332292SN/A void dumpInsts(); 4342292SN/A 4352292SN/A /** Stat for number of instructions added. */ 4361062SN/A Stats::Scalar<> iqInstsAdded; 4372292SN/A /** Stat for number of non-speculative instructions added. */ 4381062SN/A Stats::Scalar<> iqNonSpecInstsAdded; 4392326SN/A 4402301SN/A Stats::Scalar<> iqInstsIssued; 4412292SN/A /** Stat for number of integer instructions issued. */ 4421062SN/A Stats::Scalar<> iqIntInstsIssued; 4432292SN/A /** Stat for number of floating point instructions issued. */ 4441062SN/A Stats::Scalar<> iqFloatInstsIssued; 4452292SN/A /** Stat for number of branch instructions issued. */ 4461062SN/A Stats::Scalar<> iqBranchInstsIssued; 4472292SN/A /** Stat for number of memory instructions issued. */ 4481062SN/A Stats::Scalar<> iqMemInstsIssued; 4492292SN/A /** Stat for number of miscellaneous instructions issued. */ 4501062SN/A Stats::Scalar<> iqMiscInstsIssued; 4512292SN/A /** Stat for number of squashed instructions that were ready to issue. */ 4521062SN/A Stats::Scalar<> iqSquashedInstsIssued; 4532292SN/A /** Stat for number of squashed instructions examined when squashing. */ 4541062SN/A Stats::Scalar<> iqSquashedInstsExamined; 4552292SN/A /** Stat for number of squashed instruction operands examined when 4562292SN/A * squashing. 4572292SN/A */ 4581062SN/A Stats::Scalar<> iqSquashedOperandsExamined; 4592292SN/A /** Stat for number of non-speculative instructions removed due to a squash. 4602292SN/A */ 4611062SN/A Stats::Scalar<> iqSquashedNonSpecRemoved; 4621062SN/A 4632326SN/A Stats::VectorDistribution<> queueResDist; 4642326SN/A Stats::Distribution<> numIssuedDist; 4652326SN/A Stats::VectorDistribution<> issueDelayDist; 4662301SN/A 4672326SN/A Stats::Vector<> statFuBusy; 4682301SN/A// Stats::Vector<> dist_unissued; 4692326SN/A Stats::Vector2d<> statIssuedInstType; 4702301SN/A 4712326SN/A Stats::Formula issueRate; 4722301SN/A// Stats::Formula issue_stores; 4732301SN/A// Stats::Formula issue_op_rate; 4742326SN/A Stats::Vector<> fuBusy; //cumulative fu busy 4752301SN/A 4762326SN/A Stats::Formula fuBusyRate; 4771060SN/A}; 4781060SN/A 4792292SN/A#endif //__CPU_O3_INST_QUEUE_HH__ 480