inst_queue.hh revision 2669
11689SN/A/*
22326SN/A * Copyright (c) 2004-2006 The Regents of The University of Michigan
31689SN/A * All rights reserved.
41689SN/A *
51689SN/A * Redistribution and use in source and binary forms, with or without
61689SN/A * modification, are permitted provided that the following conditions are
71689SN/A * met: redistributions of source code must retain the above copyright
81689SN/A * notice, this list of conditions and the following disclaimer;
91689SN/A * redistributions in binary form must reproduce the above copyright
101689SN/A * notice, this list of conditions and the following disclaimer in the
111689SN/A * documentation and/or other materials provided with the distribution;
121689SN/A * neither the name of the copyright holders nor the names of its
131689SN/A * contributors may be used to endorse or promote products derived from
141689SN/A * this software without specific prior written permission.
151689SN/A *
161689SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
171689SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
181689SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
191689SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
201689SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
211689SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
221689SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231689SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241689SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251689SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
261689SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271689SN/A */
281689SN/A
292292SN/A#ifndef __CPU_O3_INST_QUEUE_HH__
302292SN/A#define __CPU_O3_INST_QUEUE_HH__
311060SN/A
321060SN/A#include <list>
331061SN/A#include <map>
341060SN/A#include <queue>
351061SN/A#include <vector>
361060SN/A
371062SN/A#include "base/statistics.hh"
381060SN/A#include "base/timebuf.hh"
391061SN/A#include "cpu/inst_seq.hh"
402326SN/A#include "cpu/o3/dep_graph.hh"
412669Sktlim@umich.edu#include "cpu/op_class.hh"
421710SN/A#include "sim/host.hh"
431060SN/A
442292SN/Aclass FUPool;
452292SN/Aclass MemInterface;
462292SN/A
471060SN/A/**
481689SN/A * A standard instruction queue class.  It holds ready instructions, in
491689SN/A * order, in seperate priority queues to facilitate the scheduling of
501689SN/A * instructions.  The IQ uses a separate linked list to track dependencies.
511689SN/A * Similar to the rename map and the free list, it expects that
521060SN/A * floating point registers have their indices start after the integer
531060SN/A * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
541060SN/A * and 96-191 are fp).  This remains true even for both logical and
552292SN/A * physical register indices. The IQ depends on the memory dependence unit to
562292SN/A * track when memory operations are ready in terms of ordering; register
572292SN/A * dependencies are tracked normally. Right now the IQ also handles the
582292SN/A * execution timing; this is mainly to allow back-to-back scheduling without
592292SN/A * requiring IEW to be able to peek into the IQ. At the end of the execution
602292SN/A * latency, the instruction is put into the queue to execute, where it will
612292SN/A * have the execute() function called on it.
622292SN/A * @todo: Make IQ able to handle multiple FU pools.
631060SN/A */
641061SN/Atemplate <class Impl>
651060SN/Aclass InstructionQueue
661060SN/A{
671060SN/A  public:
681060SN/A    //Typedefs from the Impl.
691060SN/A    typedef typename Impl::FullCPU FullCPU;
701061SN/A    typedef typename Impl::DynInstPtr DynInstPtr;
711060SN/A    typedef typename Impl::Params Params;
721060SN/A
732292SN/A    typedef typename Impl::CPUPol::IEW IEW;
741061SN/A    typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
751061SN/A    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
761061SN/A    typedef typename Impl::CPUPol::TimeStruct TimeStruct;
771060SN/A
782292SN/A    // Typedef of iterator through the list of instructions.
791061SN/A    typedef typename std::list<DynInstPtr>::iterator ListIt;
801060SN/A
812292SN/A    friend class Impl::FullCPU;
822292SN/A
832292SN/A    /** FU completion event class. */
842292SN/A    class FUCompletion : public Event {
852292SN/A      private:
862292SN/A        /** Executing instruction. */
872292SN/A        DynInstPtr inst;
882292SN/A
892292SN/A        /** Index of the FU used for executing. */
902292SN/A        int fuIdx;
912292SN/A
922292SN/A        /** Pointer back to the instruction queue. */
932292SN/A        InstructionQueue<Impl> *iqPtr;
942292SN/A
952326SN/A        bool freeFU;
962326SN/A
972292SN/A      public:
982292SN/A        /** Construct a FU completion event. */
992292SN/A        FUCompletion(DynInstPtr &_inst, int fu_idx,
1002292SN/A                     InstructionQueue<Impl> *iq_ptr);
1012292SN/A
1022292SN/A        virtual void process();
1032292SN/A        virtual const char *description();
1042326SN/A        void setFreeFU() { freeFU = true; }
1051060SN/A    };
1061060SN/A
1072292SN/A    /** Constructs an IQ. */
1082292SN/A    InstructionQueue(Params *params);
1091061SN/A
1102292SN/A    /** Destructs the IQ. */
1112292SN/A    ~InstructionQueue();
1121061SN/A
1132292SN/A    /** Returns the name of the IQ. */
1142292SN/A    std::string name() const;
1151060SN/A
1162292SN/A    /** Registers statistics. */
1171062SN/A    void regStats();
1181062SN/A
1192307SN/A    void resetState();
1201060SN/A
1212292SN/A    /** Sets CPU pointer. */
1222292SN/A    void setCPU(FullCPU *_cpu) { cpu = _cpu; }
1231755SN/A
1242292SN/A    /** Sets active threads list. */
1252292SN/A    void setActiveThreads(std::list<unsigned> *at_ptr);
1262292SN/A
1272292SN/A    /** Sets the IEW pointer. */
1282292SN/A    void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
1292292SN/A
1302292SN/A    /** Sets the timer buffer between issue and execute. */
1311060SN/A    void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
1321060SN/A
1332292SN/A    /** Sets the global time buffer. */
1341060SN/A    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
1351060SN/A
1362307SN/A    void switchOut();
1372307SN/A
1382307SN/A    void takeOverFrom();
1392307SN/A
1402307SN/A    bool isSwitchedOut() { return switchedOut; }
1412307SN/A
1422292SN/A    /** Number of entries needed for given amount of threads. */
1432292SN/A    int entryAmount(int num_threads);
1442292SN/A
1452292SN/A    /** Resets max entries for all threads. */
1462292SN/A    void resetEntries();
1472292SN/A
1482292SN/A    /** Returns total number of free entries. */
1491060SN/A    unsigned numFreeEntries();
1501060SN/A
1512292SN/A    /** Returns number of free entries for a thread. */
1522292SN/A    unsigned numFreeEntries(unsigned tid);
1532292SN/A
1542292SN/A    /** Returns whether or not the IQ is full. */
1551060SN/A    bool isFull();
1561060SN/A
1572292SN/A    /** Returns whether or not the IQ is full for a specific thread. */
1582292SN/A    bool isFull(unsigned tid);
1592292SN/A
1602292SN/A    /** Returns if there are any ready instructions in the IQ. */
1612292SN/A    bool hasReadyInsts();
1622292SN/A
1632292SN/A    /** Inserts a new instruction into the IQ. */
1641061SN/A    void insert(DynInstPtr &new_inst);
1651060SN/A
1662292SN/A    /** Inserts a new, non-speculative instruction into the IQ. */
1671061SN/A    void insertNonSpec(DynInstPtr &new_inst);
1681061SN/A
1692292SN/A    /** Inserts a memory or write barrier into the IQ to make sure
1702292SN/A     *  loads and stores are ordered properly.
1712292SN/A     */
1722292SN/A    void insertBarrier(DynInstPtr &barr_inst);
1731060SN/A
1742333SN/A    DynInstPtr getInstToExecute();
1752333SN/A
1762292SN/A    /**
1772326SN/A     * Records the instruction as the producer of a register without
1782326SN/A     * adding it to the rest of the IQ.
1792292SN/A     */
1802326SN/A    void recordProducer(DynInstPtr &inst)
1812326SN/A    { addToProducers(inst); }
1821755SN/A
1832292SN/A    /** Process FU completion event. */
1842292SN/A    void processFUCompletion(DynInstPtr &inst, int fu_idx);
1852292SN/A
1862292SN/A    /**
1872292SN/A     * Schedules ready instructions, adding the ready ones (oldest first) to
1882292SN/A     * the queue to execute.
1892292SN/A     */
1901060SN/A    void scheduleReadyInsts();
1911060SN/A
1922292SN/A    /** Schedules a single specific non-speculative instruction. */
1931061SN/A    void scheduleNonSpec(const InstSeqNum &inst);
1941061SN/A
1952292SN/A    /**
1962292SN/A     * Commits all instructions up to and including the given sequence number,
1972292SN/A     * for a specific thread.
1982292SN/A     */
1992292SN/A    void commit(const InstSeqNum &inst, unsigned tid = 0);
2001061SN/A
2012292SN/A    /** Wakes all dependents of a completed instruction. */
2022301SN/A    int wakeDependents(DynInstPtr &completed_inst);
2031755SN/A
2042292SN/A    /** Adds a ready memory instruction to the ready list. */
2052292SN/A    void addReadyMemInst(DynInstPtr &ready_inst);
2062292SN/A
2072292SN/A    /**
2082292SN/A     * Reschedules a memory instruction. It will be ready to issue once
2092292SN/A     * replayMemInst() is called.
2102292SN/A     */
2112292SN/A    void rescheduleMemInst(DynInstPtr &resched_inst);
2122292SN/A
2132292SN/A    /** Replays a memory instruction. It must be rescheduled first. */
2142292SN/A    void replayMemInst(DynInstPtr &replay_inst);
2152292SN/A
2162292SN/A    /** Completes a memory operation. */
2172292SN/A    void completeMemInst(DynInstPtr &completed_inst);
2182292SN/A
2192292SN/A    /** Indicates an ordering violation between a store and a load. */
2201061SN/A    void violation(DynInstPtr &store, DynInstPtr &faulting_load);
2211061SN/A
2222292SN/A    /**
2232292SN/A     * Squashes instructions for a thread. Squashing information is obtained
2242292SN/A     * from the time buffer.
2252292SN/A     */
2262292SN/A    void squash(unsigned tid);
2271060SN/A
2282292SN/A    /** Returns the number of used entries for a thread. */
2292292SN/A    unsigned getCount(unsigned tid) { return count[tid]; };
2301060SN/A
2312292SN/A    /** Debug function to print all instructions. */
2322292SN/A    void printInsts();
2331060SN/A
2341060SN/A  private:
2352292SN/A    /** Does the actual squashing. */
2362292SN/A    void doSquash(unsigned tid);
2372292SN/A
2382292SN/A    /////////////////////////
2392292SN/A    // Various pointers
2402292SN/A    /////////////////////////
2412292SN/A
2421060SN/A    /** Pointer to the CPU. */
2431060SN/A    FullCPU *cpu;
2441060SN/A
2452292SN/A    /** Cache interface. */
2462292SN/A    MemInterface *dcacheInterface;
2472292SN/A
2482292SN/A    /** Pointer to IEW stage. */
2492292SN/A    IEW *iewStage;
2502292SN/A
2511061SN/A    /** The memory dependence unit, which tracks/predicts memory dependences
2521061SN/A     *  between instructions.
2531061SN/A     */
2542292SN/A    MemDepUnit memDepUnit[Impl::MaxThreads];
2551061SN/A
2561060SN/A    /** The queue to the execute stage.  Issued instructions will be written
2571060SN/A     *  into it.
2581060SN/A     */
2591060SN/A    TimeBuffer<IssueStruct> *issueToExecuteQueue;
2601060SN/A
2611060SN/A    /** The backwards time buffer. */
2621060SN/A    TimeBuffer<TimeStruct> *timeBuffer;
2631060SN/A
2641060SN/A    /** Wire to read information from timebuffer. */
2651060SN/A    typename TimeBuffer<TimeStruct>::wire fromCommit;
2661060SN/A
2672292SN/A    /** Function unit pool. */
2682292SN/A    FUPool *fuPool;
2692292SN/A
2702292SN/A    //////////////////////////////////////
2712292SN/A    // Instruction lists, ready queues, and ordering
2722292SN/A    //////////////////////////////////////
2732292SN/A
2742292SN/A    /** List of all the instructions in the IQ (some of which may be issued). */
2752292SN/A    std::list<DynInstPtr> instList[Impl::MaxThreads];
2762292SN/A
2772333SN/A    std::list<DynInstPtr> instsToExecute;
2782333SN/A
2792292SN/A    /**
2802292SN/A     * Struct for comparing entries to be added to the priority queue.  This
2812292SN/A     * gives reverse ordering to the instructions in terms of sequence
2822292SN/A     * numbers: the instructions with smaller sequence numbers (and hence
2832292SN/A     * are older) will be at the top of the priority queue.
2842292SN/A     */
2852292SN/A    struct pqCompare {
2862292SN/A        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
2872292SN/A        {
2882292SN/A            return lhs->seqNum > rhs->seqNum;
2892292SN/A        }
2901060SN/A    };
2911060SN/A
2922292SN/A    typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
2932292SN/A    ReadyInstQueue;
2941755SN/A
2952292SN/A    /** List of ready instructions, per op class.  They are separated by op
2962292SN/A     *  class to allow for easy mapping to FUs.
2971061SN/A     */
2982292SN/A    ReadyInstQueue readyInsts[Num_OpClasses];
2991061SN/A
3001061SN/A    /** List of non-speculative instructions that will be scheduled
3011061SN/A     *  once the IQ gets a signal from commit.  While it's redundant to
3021061SN/A     *  have the key be a part of the value (the sequence number is stored
3031061SN/A     *  inside of DynInst), when these instructions are woken up only
3041681SN/A     *  the sequence number will be available.  Thus it is most efficient to be
3051061SN/A     *  able to search by the sequence number alone.
3061061SN/A     */
3071061SN/A    std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
3081061SN/A
3092292SN/A    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
3102292SN/A
3112292SN/A    /** Entry for the list age ordering by op class. */
3122292SN/A    struct ListOrderEntry {
3132292SN/A        OpClass queueType;
3142292SN/A        InstSeqNum oldestInst;
3152292SN/A    };
3162292SN/A
3172292SN/A    /** List that contains the age order of the oldest instruction of each
3182292SN/A     *  ready queue.  Used to select the oldest instruction available
3192292SN/A     *  among op classes.
3202326SN/A     *  @todo: Might be better to just move these entries around instead
3212326SN/A     *  of creating new ones every time the position changes due to an
3222326SN/A     *  instruction issuing.  Not sure std::list supports this.
3232292SN/A     */
3242292SN/A    std::list<ListOrderEntry> listOrder;
3252292SN/A
3262292SN/A    typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
3272292SN/A
3282292SN/A    /** Tracks if each ready queue is on the age order list. */
3292292SN/A    bool queueOnList[Num_OpClasses];
3302292SN/A
3312292SN/A    /** Iterators of each ready queue.  Points to their spot in the age order
3322292SN/A     *  list.
3332292SN/A     */
3342292SN/A    ListOrderIt readyIt[Num_OpClasses];
3352292SN/A
3362292SN/A    /** Add an op class to the age order list. */
3372292SN/A    void addToOrderList(OpClass op_class);
3382292SN/A
3392292SN/A    /**
3402292SN/A     * Called when the oldest instruction has been removed from a ready queue;
3412292SN/A     * this places that ready queue into the proper spot in the age order list.
3422292SN/A     */
3432292SN/A    void moveToYoungerInst(ListOrderIt age_order_it);
3442292SN/A
3452326SN/A    DependencyGraph<DynInstPtr> dependGraph;
3462326SN/A
3472292SN/A    //////////////////////////////////////
3482292SN/A    // Various parameters
3492292SN/A    //////////////////////////////////////
3502292SN/A
3512292SN/A    /** IQ Resource Sharing Policy */
3522292SN/A    enum IQPolicy {
3532292SN/A        Dynamic,
3542292SN/A        Partitioned,
3552292SN/A        Threshold
3562292SN/A    };
3572292SN/A
3582292SN/A    /** IQ sharing policy for SMT. */
3592292SN/A    IQPolicy iqPolicy;
3602292SN/A
3612292SN/A    /** Number of Total Threads*/
3622292SN/A    unsigned numThreads;
3632292SN/A
3642292SN/A    /** Pointer to list of active threads. */
3652292SN/A    std::list<unsigned> *activeThreads;
3662292SN/A
3672292SN/A    /** Per Thread IQ count */
3682292SN/A    unsigned count[Impl::MaxThreads];
3692292SN/A
3702292SN/A    /** Max IQ Entries Per Thread */
3712292SN/A    unsigned maxEntries[Impl::MaxThreads];
3721060SN/A
3731060SN/A    /** Number of free IQ entries left. */
3741060SN/A    unsigned freeEntries;
3751060SN/A
3761060SN/A    /** The number of entries in the instruction queue. */
3771060SN/A    unsigned numEntries;
3781060SN/A
3791060SN/A    /** The total number of instructions that can be issued in one cycle. */
3801060SN/A    unsigned totalWidth;
3811060SN/A
3822292SN/A    /** The number of physical registers in the CPU. */
3831060SN/A    unsigned numPhysRegs;
3841060SN/A
3851060SN/A    /** The number of physical integer registers in the CPU. */
3861060SN/A    unsigned numPhysIntRegs;
3871060SN/A
3881060SN/A    /** The number of floating point registers in the CPU. */
3891060SN/A    unsigned numPhysFloatRegs;
3901060SN/A
3911060SN/A    /** Delay between commit stage and the IQ.
3921060SN/A     *  @todo: Make there be a distinction between the delays within IEW.
3931060SN/A     */
3941060SN/A    unsigned commitToIEWDelay;
3951060SN/A
3962307SN/A    bool switchedOut;
3971060SN/A
3981060SN/A    /** The sequence number of the squashed instruction. */
3992292SN/A    InstSeqNum squashedSeqNum[Impl::MaxThreads];
4001060SN/A
4011060SN/A    /** A cache of the recently woken registers.  It is 1 if the register
4021060SN/A     *  has been woken up recently, and 0 if the register has been added
4031060SN/A     *  to the dependency graph and has not yet received its value.  It
4041060SN/A     *  is basically a secondary scoreboard, and should pretty much mirror
4051060SN/A     *  the scoreboard that exists in the rename map.
4061060SN/A     */
4072292SN/A    std::vector<bool> regScoreboard;
4081060SN/A
4092326SN/A    /** Adds an instruction to the dependency graph, as a consumer. */
4101061SN/A    bool addToDependents(DynInstPtr &new_inst);
4111684SN/A
4122326SN/A    /** Adds an instruction to the dependency graph, as a producer. */
4132326SN/A    void addToProducers(DynInstPtr &new_inst);
4141755SN/A
4152292SN/A    /** Moves an instruction to the ready queue if it is ready. */
4161684SN/A    void addIfReady(DynInstPtr &inst);
4171684SN/A
4181684SN/A    /** Debugging function to count how many entries are in the IQ.  It does
4191684SN/A     *  a linear walk through the instructions, so do not call this function
4201684SN/A     *  during normal execution.
4211684SN/A     */
4221684SN/A    int countInsts();
4231684SN/A
4241684SN/A    /** Debugging function to dump all the list sizes, as well as print
4251684SN/A     *  out the list of nonspeculative instructions.  Should not be used
4261684SN/A     *  in any other capacity, but it has no harmful sideaffects.
4271684SN/A     */
4281684SN/A    void dumpLists();
4291062SN/A
4302292SN/A    /** Debugging function to dump out all instructions that are in the
4312292SN/A     *  IQ.
4322292SN/A     */
4332292SN/A    void dumpInsts();
4342292SN/A
4352292SN/A    /** Stat for number of instructions added. */
4361062SN/A    Stats::Scalar<> iqInstsAdded;
4372292SN/A    /** Stat for number of non-speculative instructions added. */
4381062SN/A    Stats::Scalar<> iqNonSpecInstsAdded;
4392326SN/A
4402301SN/A    Stats::Scalar<> iqInstsIssued;
4412292SN/A    /** Stat for number of integer instructions issued. */
4421062SN/A    Stats::Scalar<> iqIntInstsIssued;
4432292SN/A    /** Stat for number of floating point instructions issued. */
4441062SN/A    Stats::Scalar<> iqFloatInstsIssued;
4452292SN/A    /** Stat for number of branch instructions issued. */
4461062SN/A    Stats::Scalar<> iqBranchInstsIssued;
4472292SN/A    /** Stat for number of memory instructions issued. */
4481062SN/A    Stats::Scalar<> iqMemInstsIssued;
4492292SN/A    /** Stat for number of miscellaneous instructions issued. */
4501062SN/A    Stats::Scalar<> iqMiscInstsIssued;
4512292SN/A    /** Stat for number of squashed instructions that were ready to issue. */
4521062SN/A    Stats::Scalar<> iqSquashedInstsIssued;
4532292SN/A    /** Stat for number of squashed instructions examined when squashing. */
4541062SN/A    Stats::Scalar<> iqSquashedInstsExamined;
4552292SN/A    /** Stat for number of squashed instruction operands examined when
4562292SN/A     * squashing.
4572292SN/A     */
4581062SN/A    Stats::Scalar<> iqSquashedOperandsExamined;
4592292SN/A    /** Stat for number of non-speculative instructions removed due to a squash.
4602292SN/A     */
4611062SN/A    Stats::Scalar<> iqSquashedNonSpecRemoved;
4621062SN/A
4632326SN/A    Stats::VectorDistribution<> queueResDist;
4642326SN/A    Stats::Distribution<> numIssuedDist;
4652326SN/A    Stats::VectorDistribution<> issueDelayDist;
4662301SN/A
4672326SN/A    Stats::Vector<> statFuBusy;
4682301SN/A//    Stats::Vector<> dist_unissued;
4692326SN/A    Stats::Vector2d<> statIssuedInstType;
4702301SN/A
4712326SN/A    Stats::Formula issueRate;
4722301SN/A//    Stats::Formula issue_stores;
4732301SN/A//    Stats::Formula issue_op_rate;
4742326SN/A    Stats::Vector<> fuBusy;  //cumulative fu busy
4752301SN/A
4762326SN/A    Stats::Formula fuBusyRate;
4771060SN/A};
4781060SN/A
4792292SN/A#endif //__CPU_O3_INST_QUEUE_HH__
480