inst_queue.hh revision 2333
16700Snate@binkert.org/*
26700Snate@binkert.org * Copyright (c) 2004-2006 The Regents of The University of Michigan
36700Snate@binkert.org * All rights reserved.
46700Snate@binkert.org *
56700Snate@binkert.org * Redistribution and use in source and binary forms, with or without
66700Snate@binkert.org * modification, are permitted provided that the following conditions are
76700Snate@binkert.org * met: redistributions of source code must retain the above copyright
86700Snate@binkert.org * notice, this list of conditions and the following disclaimer;
96700Snate@binkert.org * redistributions in binary form must reproduce the above copyright
106700Snate@binkert.org * notice, this list of conditions and the following disclaimer in the
116700Snate@binkert.org * documentation and/or other materials provided with the distribution;
126700Snate@binkert.org * neither the name of the copyright holders nor the names of its
136700Snate@binkert.org * contributors may be used to endorse or promote products derived from
146700Snate@binkert.org * this software without specific prior written permission.
156700Snate@binkert.org *
166700Snate@binkert.org * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
176700Snate@binkert.org * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
186700Snate@binkert.org * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
196700Snate@binkert.org * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
206700Snate@binkert.org * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
216700Snate@binkert.org * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
226700Snate@binkert.org * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
236700Snate@binkert.org * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
246700Snate@binkert.org * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
256700Snate@binkert.org * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
266700Snate@binkert.org * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
276700Snate@binkert.org */
286285Snate@binkert.org
296285Snate@binkert.org#ifndef __CPU_O3_INST_QUEUE_HH__
306285Snate@binkert.org#define __CPU_O3_INST_QUEUE_HH__
316881SBrad.Beckmann@amd.com
326285Snate@binkert.org#include <list>
336876Ssteve.reinhardt@amd.com#include <map>
346876Ssteve.reinhardt@amd.com#include <queue>
356285Snate@binkert.org#include <vector>
366876Ssteve.reinhardt@amd.com
376876Ssteve.reinhardt@amd.com#include "base/statistics.hh"
386876Ssteve.reinhardt@amd.com#include "base/timebuf.hh"
396876Ssteve.reinhardt@amd.com#include "cpu/inst_seq.hh"
406876Ssteve.reinhardt@amd.com#include "cpu/o3/dep_graph.hh"
416876Ssteve.reinhardt@amd.com#include "encumbered/cpu/full/op_class.hh"
426876Ssteve.reinhardt@amd.com#include "sim/host.hh"
436876Ssteve.reinhardt@amd.com
446881SBrad.Beckmann@amd.comclass FUPool;
456881SBrad.Beckmann@amd.comclass MemInterface;
466881SBrad.Beckmann@amd.com
476881SBrad.Beckmann@amd.com/**
486881SBrad.Beckmann@amd.com * A standard instruction queue class.  It holds ready instructions, in
496881SBrad.Beckmann@amd.com * order, in seperate priority queues to facilitate the scheduling of
506881SBrad.Beckmann@amd.com * instructions.  The IQ uses a separate linked list to track dependencies.
516876Ssteve.reinhardt@amd.com * Similar to the rename map and the free list, it expects that
526876Ssteve.reinhardt@amd.com * floating point registers have their indices start after the integer
536881SBrad.Beckmann@amd.com * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
546881SBrad.Beckmann@amd.com * and 96-191 are fp).  This remains true even for both logical and
556881SBrad.Beckmann@amd.com * physical register indices. The IQ depends on the memory dependence unit to
566881SBrad.Beckmann@amd.com * track when memory operations are ready in terms of ordering; register
576881SBrad.Beckmann@amd.com * dependencies are tracked normally. Right now the IQ also handles the
586285Snate@binkert.org * execution timing; this is mainly to allow back-to-back scheduling without
596285Snate@binkert.org * requiring IEW to be able to peek into the IQ. At the end of the execution
606876Ssteve.reinhardt@amd.com * latency, the instruction is put into the queue to execute, where it will
616285Snate@binkert.org * have the execute() function called on it.
626493STushar.Krishna@amd.com * @todo: Make IQ able to handle multiple FU pools.
636285Snate@binkert.org */
646493STushar.Krishna@amd.comtemplate <class Impl>
656493STushar.Krishna@amd.comclass InstructionQueue
666493STushar.Krishna@amd.com{
676493STushar.Krishna@amd.com  public:
686493STushar.Krishna@amd.com    //Typedefs from the Impl.
696493STushar.Krishna@amd.com    typedef typename Impl::FullCPU FullCPU;
706493STushar.Krishna@amd.com    typedef typename Impl::DynInstPtr DynInstPtr;
716493STushar.Krishna@amd.com    typedef typename Impl::Params Params;
726493STushar.Krishna@amd.com
736493STushar.Krishna@amd.com    typedef typename Impl::CPUPol::IEW IEW;
746493STushar.Krishna@amd.com    typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
756493STushar.Krishna@amd.com    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
766493STushar.Krishna@amd.com    typedef typename Impl::CPUPol::TimeStruct TimeStruct;
776493STushar.Krishna@amd.com
786493STushar.Krishna@amd.com    // Typedef of iterator through the list of instructions.
796493STushar.Krishna@amd.com    typedef typename std::list<DynInstPtr>::iterator ListIt;
806493STushar.Krishna@amd.com
816493STushar.Krishna@amd.com    friend class Impl::FullCPU;
826493STushar.Krishna@amd.com
836493STushar.Krishna@amd.com    /** FU completion event class. */
846493STushar.Krishna@amd.com    class FUCompletion : public Event {
856493STushar.Krishna@amd.com      private:
866493STushar.Krishna@amd.com        /** Executing instruction. */
876493STushar.Krishna@amd.com        DynInstPtr inst;
886493STushar.Krishna@amd.com
896493STushar.Krishna@amd.com        /** Index of the FU used for executing. */
906493STushar.Krishna@amd.com        int fuIdx;
916493STushar.Krishna@amd.com
926493STushar.Krishna@amd.com        /** Pointer back to the instruction queue. */
936493STushar.Krishna@amd.com        InstructionQueue<Impl> *iqPtr;
946493STushar.Krishna@amd.com
956493STushar.Krishna@amd.com        bool freeFU;
96
97      public:
98        /** Construct a FU completion event. */
99        FUCompletion(DynInstPtr &_inst, int fu_idx,
100                     InstructionQueue<Impl> *iq_ptr);
101
102        virtual void process();
103        virtual const char *description();
104        void setFreeFU() { freeFU = true; }
105    };
106
107    /** Constructs an IQ. */
108    InstructionQueue(Params *params);
109
110    /** Destructs the IQ. */
111    ~InstructionQueue();
112
113    /** Returns the name of the IQ. */
114    std::string name() const;
115
116    /** Registers statistics. */
117    void regStats();
118
119    void resetState();
120
121    /** Sets CPU pointer. */
122    void setCPU(FullCPU *_cpu) { cpu = _cpu; }
123
124    /** Sets active threads list. */
125    void setActiveThreads(std::list<unsigned> *at_ptr);
126
127    /** Sets the IEW pointer. */
128    void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
129
130    /** Sets the timer buffer between issue and execute. */
131    void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
132
133    /** Sets the global time buffer. */
134    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
135
136    void switchOut();
137
138    void takeOverFrom();
139
140    bool isSwitchedOut() { return switchedOut; }
141
142    /** Number of entries needed for given amount of threads. */
143    int entryAmount(int num_threads);
144
145    /** Resets max entries for all threads. */
146    void resetEntries();
147
148    /** Returns total number of free entries. */
149    unsigned numFreeEntries();
150
151    /** Returns number of free entries for a thread. */
152    unsigned numFreeEntries(unsigned tid);
153
154    /** Returns whether or not the IQ is full. */
155    bool isFull();
156
157    /** Returns whether or not the IQ is full for a specific thread. */
158    bool isFull(unsigned tid);
159
160    /** Returns if there are any ready instructions in the IQ. */
161    bool hasReadyInsts();
162
163    /** Inserts a new instruction into the IQ. */
164    void insert(DynInstPtr &new_inst);
165
166    /** Inserts a new, non-speculative instruction into the IQ. */
167    void insertNonSpec(DynInstPtr &new_inst);
168
169    /** Inserts a memory or write barrier into the IQ to make sure
170     *  loads and stores are ordered properly.
171     */
172    void insertBarrier(DynInstPtr &barr_inst);
173
174    DynInstPtr getInstToExecute();
175
176    /**
177     * Records the instruction as the producer of a register without
178     * adding it to the rest of the IQ.
179     */
180    void recordProducer(DynInstPtr &inst)
181    { addToProducers(inst); }
182
183    /** Process FU completion event. */
184    void processFUCompletion(DynInstPtr &inst, int fu_idx);
185
186    /**
187     * Schedules ready instructions, adding the ready ones (oldest first) to
188     * the queue to execute.
189     */
190    void scheduleReadyInsts();
191
192    /** Schedules a single specific non-speculative instruction. */
193    void scheduleNonSpec(const InstSeqNum &inst);
194
195    /**
196     * Commits all instructions up to and including the given sequence number,
197     * for a specific thread.
198     */
199    void commit(const InstSeqNum &inst, unsigned tid = 0);
200
201    /** Wakes all dependents of a completed instruction. */
202    int wakeDependents(DynInstPtr &completed_inst);
203
204    /** Adds a ready memory instruction to the ready list. */
205    void addReadyMemInst(DynInstPtr &ready_inst);
206
207    /**
208     * Reschedules a memory instruction. It will be ready to issue once
209     * replayMemInst() is called.
210     */
211    void rescheduleMemInst(DynInstPtr &resched_inst);
212
213    /** Replays a memory instruction. It must be rescheduled first. */
214    void replayMemInst(DynInstPtr &replay_inst);
215
216    /** Completes a memory operation. */
217    void completeMemInst(DynInstPtr &completed_inst);
218
219    /** Indicates an ordering violation between a store and a load. */
220    void violation(DynInstPtr &store, DynInstPtr &faulting_load);
221
222    /**
223     * Squashes instructions for a thread. Squashing information is obtained
224     * from the time buffer.
225     */
226    void squash(unsigned tid);
227
228    /** Returns the number of used entries for a thread. */
229    unsigned getCount(unsigned tid) { return count[tid]; };
230
231    /** Debug function to print all instructions. */
232    void printInsts();
233
234  private:
235    /** Does the actual squashing. */
236    void doSquash(unsigned tid);
237
238    /////////////////////////
239    // Various pointers
240    /////////////////////////
241
242    /** Pointer to the CPU. */
243    FullCPU *cpu;
244
245    /** Cache interface. */
246    MemInterface *dcacheInterface;
247
248    /** Pointer to IEW stage. */
249    IEW *iewStage;
250
251    /** The memory dependence unit, which tracks/predicts memory dependences
252     *  between instructions.
253     */
254    MemDepUnit memDepUnit[Impl::MaxThreads];
255
256    /** The queue to the execute stage.  Issued instructions will be written
257     *  into it.
258     */
259    TimeBuffer<IssueStruct> *issueToExecuteQueue;
260
261    /** The backwards time buffer. */
262    TimeBuffer<TimeStruct> *timeBuffer;
263
264    /** Wire to read information from timebuffer. */
265    typename TimeBuffer<TimeStruct>::wire fromCommit;
266
267    /** Function unit pool. */
268    FUPool *fuPool;
269
270    //////////////////////////////////////
271    // Instruction lists, ready queues, and ordering
272    //////////////////////////////////////
273
274    /** List of all the instructions in the IQ (some of which may be issued). */
275    std::list<DynInstPtr> instList[Impl::MaxThreads];
276
277    std::list<DynInstPtr> instsToExecute;
278
279    /**
280     * Struct for comparing entries to be added to the priority queue.  This
281     * gives reverse ordering to the instructions in terms of sequence
282     * numbers: the instructions with smaller sequence numbers (and hence
283     * are older) will be at the top of the priority queue.
284     */
285    struct pqCompare {
286        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
287        {
288            return lhs->seqNum > rhs->seqNum;
289        }
290    };
291
292    typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
293    ReadyInstQueue;
294
295    /** List of ready instructions, per op class.  They are separated by op
296     *  class to allow for easy mapping to FUs.
297     */
298    ReadyInstQueue readyInsts[Num_OpClasses];
299
300    /** List of non-speculative instructions that will be scheduled
301     *  once the IQ gets a signal from commit.  While it's redundant to
302     *  have the key be a part of the value (the sequence number is stored
303     *  inside of DynInst), when these instructions are woken up only
304     *  the sequence number will be available.  Thus it is most efficient to be
305     *  able to search by the sequence number alone.
306     */
307    std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
308
309    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
310
311    /** Entry for the list age ordering by op class. */
312    struct ListOrderEntry {
313        OpClass queueType;
314        InstSeqNum oldestInst;
315    };
316
317    /** List that contains the age order of the oldest instruction of each
318     *  ready queue.  Used to select the oldest instruction available
319     *  among op classes.
320     *  @todo: Might be better to just move these entries around instead
321     *  of creating new ones every time the position changes due to an
322     *  instruction issuing.  Not sure std::list supports this.
323     */
324    std::list<ListOrderEntry> listOrder;
325
326    typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
327
328    /** Tracks if each ready queue is on the age order list. */
329    bool queueOnList[Num_OpClasses];
330
331    /** Iterators of each ready queue.  Points to their spot in the age order
332     *  list.
333     */
334    ListOrderIt readyIt[Num_OpClasses];
335
336    /** Add an op class to the age order list. */
337    void addToOrderList(OpClass op_class);
338
339    /**
340     * Called when the oldest instruction has been removed from a ready queue;
341     * this places that ready queue into the proper spot in the age order list.
342     */
343    void moveToYoungerInst(ListOrderIt age_order_it);
344
345    DependencyGraph<DynInstPtr> dependGraph;
346
347    //////////////////////////////////////
348    // Various parameters
349    //////////////////////////////////////
350
351    /** IQ Resource Sharing Policy */
352    enum IQPolicy {
353        Dynamic,
354        Partitioned,
355        Threshold
356    };
357
358    /** IQ sharing policy for SMT. */
359    IQPolicy iqPolicy;
360
361    /** Number of Total Threads*/
362    unsigned numThreads;
363
364    /** Pointer to list of active threads. */
365    std::list<unsigned> *activeThreads;
366
367    /** Per Thread IQ count */
368    unsigned count[Impl::MaxThreads];
369
370    /** Max IQ Entries Per Thread */
371    unsigned maxEntries[Impl::MaxThreads];
372
373    /** Number of free IQ entries left. */
374    unsigned freeEntries;
375
376    /** The number of entries in the instruction queue. */
377    unsigned numEntries;
378
379    /** The total number of instructions that can be issued in one cycle. */
380    unsigned totalWidth;
381
382    /** The number of physical registers in the CPU. */
383    unsigned numPhysRegs;
384
385    /** The number of physical integer registers in the CPU. */
386    unsigned numPhysIntRegs;
387
388    /** The number of floating point registers in the CPU. */
389    unsigned numPhysFloatRegs;
390
391    /** Delay between commit stage and the IQ.
392     *  @todo: Make there be a distinction between the delays within IEW.
393     */
394    unsigned commitToIEWDelay;
395
396    bool switchedOut;
397
398    /** The sequence number of the squashed instruction. */
399    InstSeqNum squashedSeqNum[Impl::MaxThreads];
400
401    /** A cache of the recently woken registers.  It is 1 if the register
402     *  has been woken up recently, and 0 if the register has been added
403     *  to the dependency graph and has not yet received its value.  It
404     *  is basically a secondary scoreboard, and should pretty much mirror
405     *  the scoreboard that exists in the rename map.
406     */
407    std::vector<bool> regScoreboard;
408
409    /** Adds an instruction to the dependency graph, as a consumer. */
410    bool addToDependents(DynInstPtr &new_inst);
411
412    /** Adds an instruction to the dependency graph, as a producer. */
413    void addToProducers(DynInstPtr &new_inst);
414
415    /** Moves an instruction to the ready queue if it is ready. */
416    void addIfReady(DynInstPtr &inst);
417
418    /** Debugging function to count how many entries are in the IQ.  It does
419     *  a linear walk through the instructions, so do not call this function
420     *  during normal execution.
421     */
422    int countInsts();
423
424    /** Debugging function to dump all the list sizes, as well as print
425     *  out the list of nonspeculative instructions.  Should not be used
426     *  in any other capacity, but it has no harmful sideaffects.
427     */
428    void dumpLists();
429
430    /** Debugging function to dump out all instructions that are in the
431     *  IQ.
432     */
433    void dumpInsts();
434
435    /** Stat for number of instructions added. */
436    Stats::Scalar<> iqInstsAdded;
437    /** Stat for number of non-speculative instructions added. */
438    Stats::Scalar<> iqNonSpecInstsAdded;
439
440    Stats::Scalar<> iqInstsIssued;
441    /** Stat for number of integer instructions issued. */
442    Stats::Scalar<> iqIntInstsIssued;
443    /** Stat for number of floating point instructions issued. */
444    Stats::Scalar<> iqFloatInstsIssued;
445    /** Stat for number of branch instructions issued. */
446    Stats::Scalar<> iqBranchInstsIssued;
447    /** Stat for number of memory instructions issued. */
448    Stats::Scalar<> iqMemInstsIssued;
449    /** Stat for number of miscellaneous instructions issued. */
450    Stats::Scalar<> iqMiscInstsIssued;
451    /** Stat for number of squashed instructions that were ready to issue. */
452    Stats::Scalar<> iqSquashedInstsIssued;
453    /** Stat for number of squashed instructions examined when squashing. */
454    Stats::Scalar<> iqSquashedInstsExamined;
455    /** Stat for number of squashed instruction operands examined when
456     * squashing.
457     */
458    Stats::Scalar<> iqSquashedOperandsExamined;
459    /** Stat for number of non-speculative instructions removed due to a squash.
460     */
461    Stats::Scalar<> iqSquashedNonSpecRemoved;
462
463    Stats::VectorDistribution<> queueResDist;
464    Stats::Distribution<> numIssuedDist;
465    Stats::VectorDistribution<> issueDelayDist;
466
467    Stats::Vector<> statFuBusy;
468//    Stats::Vector<> dist_unissued;
469    Stats::Vector2d<> statIssuedInstType;
470
471    Stats::Formula issueRate;
472//    Stats::Formula issue_stores;
473//    Stats::Formula issue_op_rate;
474    Stats::Vector<> fuBusy;  //cumulative fu busy
475
476    Stats::Formula fuBusyRate;
477};
478
479#endif //__CPU_O3_INST_QUEUE_HH__
480