2c2
< * Copyright (c) 2004-2005 The Regents of The University of Michigan
---
> * Copyright (c) 2004-2006 The Regents of The University of Michigan
29,30c29,30
< #ifndef __CPU_O3_CPU_INST_QUEUE_HH__
< #define __CPU_O3_CPU_INST_QUEUE_HH__
---
> #ifndef __CPU_O3_INST_QUEUE_HH__
> #define __CPU_O3_INST_QUEUE_HH__
39a40,41
> #include "cpu/o3/dep_graph.hh"
> #include "encumbered/cpu/full/op_class.hh"
41a44,46
> class FUPool;
> class MemInterface;
>
50c55,62
< * physical register indices.
---
> * physical register indices. The IQ depends on the memory dependence unit to
> * track when memory operations are ready in terms of ordering; register
> * dependencies are tracked normally. Right now the IQ also handles the
> * execution timing; this is mainly to allow back-to-back scheduling without
> * requiring IEW to be able to peek into the IQ. At the end of the execution
> * latency, the instruction is put into the queue to execute, where it will
> * have the execute() function called on it.
> * @todo: Make IQ able to handle multiple FU pools.
60a73
> typedef typename Impl::CPUPol::IEW IEW;
65,67c78
< // Typedef of iterator through the list of instructions. Might be
< // better to untie this from the FullCPU or pass its information to
< // the stages.
---
> // Typedef of iterator through the list of instructions.
70,82c81
< /**
< * Struct for comparing entries to be added to the priority queue. This
< * gives reverse ordering to the instructions in terms of sequence
< * numbers: the instructions with smaller sequence numbers (and hence
< * are older) will be at the top of the priority queue.
< */
< struct pqCompare
< {
< bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
< {
< return lhs->seqNum > rhs->seqNum;
< }
< };
---
> friend class Impl::FullCPU;
84,93c83,104
< /**
< * Struct for comparing entries to be added to the set. This gives
< * standard ordering in terms of sequence numbers.
< */
< struct setCompare
< {
< bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
< {
< return lhs->seqNum < rhs->seqNum;
< }
---
> /** FU completion event class. */
> class FUCompletion : public Event {
> private:
> /** Executing instruction. */
> DynInstPtr inst;
>
> /** Index of the FU used for executing. */
> int fuIdx;
>
> /** Pointer back to the instruction queue. */
> InstructionQueue<Impl> *iqPtr;
>
> bool freeFU;
>
> public:
> /** Construct a FU completion event. */
> FUCompletion(DynInstPtr &_inst, int fu_idx,
> InstructionQueue<Impl> *iq_ptr);
>
> virtual void process();
> virtual const char *description();
> void setFreeFU() { freeFU = true; }
96,97c107,108
< typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
< ReadyInstQueue;
---
> /** Constructs an IQ. */
> InstructionQueue(Params *params);
99c110,111
< InstructionQueue(Params &params);
---
> /** Destructs the IQ. */
> ~InstructionQueue();
100a113,116
> /** Returns the name of the IQ. */
> std::string name() const;
>
> /** Registers statistics. */
103c119
< void setCPU(FullCPU *cpu);
---
> void resetState();
104a121,130
> /** Sets CPU pointer. */
> void setCPU(FullCPU *_cpu) { cpu = _cpu; }
>
> /** Sets active threads list. */
> void setActiveThreads(std::list<unsigned> *at_ptr);
>
> /** Sets the IEW pointer. */
> void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
>
> /** Sets the timer buffer between issue and execute. */
106a133
> /** Sets the global time buffer. */
108a136,148
> void switchOut();
>
> void takeOverFrom();
>
> bool isSwitchedOut() { return switchedOut; }
>
> /** Number of entries needed for given amount of threads. */
> int entryAmount(int num_threads);
>
> /** Resets max entries for all threads. */
> void resetEntries();
>
> /** Returns total number of free entries. */
110a151,154
> /** Returns number of free entries for a thread. */
> unsigned numFreeEntries(unsigned tid);
>
> /** Returns whether or not the IQ is full. */
112a157,163
> /** Returns whether or not the IQ is full for a specific thread. */
> bool isFull(unsigned tid);
>
> /** Returns if there are any ready instructions in the IQ. */
> bool hasReadyInsts();
>
> /** Inserts a new instruction into the IQ. */
114a166
> /** Inserts a new, non-speculative instruction into the IQ. */
117c169,172
< void advanceTail(DynInstPtr &inst);
---
> /** Inserts a memory or write barrier into the IQ to make sure
> * loads and stores are ordered properly.
> */
> void insertBarrier(DynInstPtr &barr_inst);
118a174,189
> DynInstPtr getInstToExecute();
>
> /**
> * Records the instruction as the producer of a register without
> * adding it to the rest of the IQ.
> */
> void recordProducer(DynInstPtr &inst)
> { addToProducers(inst); }
>
> /** Process FU completion event. */
> void processFUCompletion(DynInstPtr &inst, int fu_idx);
>
> /**
> * Schedules ready instructions, adding the ready ones (oldest first) to
> * the queue to execute.
> */
120a192
> /** Schedules a single specific non-speculative instruction. */
123c195,199
< void wakeDependents(DynInstPtr &completed_inst);
---
> /**
> * Commits all instructions up to and including the given sequence number,
> * for a specific thread.
> */
> void commit(const InstSeqNum &inst, unsigned tid = 0);
124a201,219
> /** Wakes all dependents of a completed instruction. */
> int wakeDependents(DynInstPtr &completed_inst);
>
> /** Adds a ready memory instruction to the ready list. */
> void addReadyMemInst(DynInstPtr &ready_inst);
>
> /**
> * Reschedules a memory instruction. It will be ready to issue once
> * replayMemInst() is called.
> */
> void rescheduleMemInst(DynInstPtr &resched_inst);
>
> /** Replays a memory instruction. It must be rescheduled first. */
> void replayMemInst(DynInstPtr &replay_inst);
>
> /** Completes a memory operation. */
> void completeMemInst(DynInstPtr &completed_inst);
>
> /** Indicates an ordering violation between a store and a load. */
127,128c222,226
< // Change this to take in the sequence number
< void squash();
---
> /**
> * Squashes instructions for a thread. Squashing information is obtained
> * from the time buffer.
> */
> void squash(unsigned tid);
130c228,229
< void doSquash();
---
> /** Returns the number of used entries for a thread. */
> unsigned getCount(unsigned tid) { return count[tid]; };
132c231,232
< void stopSquash();
---
> /** Debug function to print all instructions. */
> void printInsts();
134a235,241
> /** Does the actual squashing. */
> void doSquash(unsigned tid);
>
> /////////////////////////
> // Various pointers
> /////////////////////////
>
137a245,250
> /** Cache interface. */
> MemInterface *dcacheInterface;
>
> /** Pointer to IEW stage. */
> IEW *iewStage;
>
141c254
< MemDepUnit memDepUnit;
---
> MemDepUnit memDepUnit[Impl::MaxThreads];
154,162c267,268
< enum InstList {
< Int,
< Float,
< Branch,
< Memory,
< Misc,
< Squashed,
< None
< };
---
> /** Function unit pool. */
> FUPool *fuPool;
164,167c270,272
< /** List of ready int instructions. Used to keep track of the order in
< * which instructions should issue.
< */
< ReadyInstQueue readyIntInsts;
---
> //////////////////////////////////////
> // Instruction lists, ready queues, and ordering
> //////////////////////////////////////
169,170c274,275
< /** List of ready floating point instructions. */
< ReadyInstQueue readyFloatInsts;
---
> /** List of all the instructions in the IQ (some of which may be issued). */
> std::list<DynInstPtr> instList[Impl::MaxThreads];
172,173c277
< /** List of ready branch instructions. */
< ReadyInstQueue readyBranchInsts;
---
> std::list<DynInstPtr> instsToExecute;
175,176c279,290
< /** List of ready miscellaneous instructions. */
< ReadyInstQueue readyMiscInsts;
---
> /**
> * Struct for comparing entries to be added to the priority queue. This
> * gives reverse ordering to the instructions in terms of sequence
> * numbers: the instructions with smaller sequence numbers (and hence
> * are older) will be at the top of the priority queue.
> */
> struct pqCompare {
> bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
> {
> return lhs->seqNum > rhs->seqNum;
> }
> };
178,181c292,296
< /** List of squashed instructions (which are still valid and in IQ).
< * Implemented using a priority queue; the entries must contain both
< * the IQ index and sequence number of each instruction so that
< * ordering based on sequence numbers can be used.
---
> typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
> ReadyInstQueue;
>
> /** List of ready instructions, per op class. They are separated by op
> * class to allow for easy mapping to FUs.
183c298
< ReadyInstQueue squashedInsts;
---
> ReadyInstQueue readyInsts[Num_OpClasses];
194c309
< typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
---
> typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
196,197c311,315
< /** Number of free IQ entries left. */
< unsigned freeEntries;
---
> /** Entry for the list age ordering by op class. */
> struct ListOrderEntry {
> OpClass queueType;
> InstSeqNum oldestInst;
> };
199,200c317,324
< /** The number of entries in the instruction queue. */
< unsigned numEntries;
---
> /** List that contains the age order of the oldest instruction of each
> * ready queue. Used to select the oldest instruction available
> * among op classes.
> * @todo: Might be better to just move these entries around instead
> * of creating new ones every time the position changes due to an
> * instruction issuing. Not sure std::list supports this.
> */
> std::list<ListOrderEntry> listOrder;
202,203c326,332
< /** The number of integer instructions that can be issued in one
< * cycle.
---
> typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
>
> /** Tracks if each ready queue is on the age order list. */
> bool queueOnList[Num_OpClasses];
>
> /** Iterators of each ready queue. Points to their spot in the age order
> * list.
205c334
< unsigned intWidth;
---
> ListOrderIt readyIt[Num_OpClasses];
207,208c336,341
< /** The number of floating point instructions that can be issued
< * in one cycle.
---
> /** Add an op class to the age order list. */
> void addToOrderList(OpClass op_class);
>
> /**
> * Called when the oldest instruction has been removed from a ready queue;
> * this places that ready queue into the proper spot in the age order list.
210c343
< unsigned floatWidth;
---
> void moveToYoungerInst(ListOrderIt age_order_it);
212,213c345
< /** The number of branches that can be issued in one cycle. */
< unsigned branchWidth;
---
> DependencyGraph<DynInstPtr> dependGraph;
215,216c347,349
< /** The number of memory instructions that can be issued in one cycle. */
< unsigned memoryWidth;
---
> //////////////////////////////////////
> // Various parameters
> //////////////////////////////////////
217a351,378
> /** IQ Resource Sharing Policy */
> enum IQPolicy {
> Dynamic,
> Partitioned,
> Threshold
> };
>
> /** IQ sharing policy for SMT. */
> IQPolicy iqPolicy;
>
> /** Number of Total Threads*/
> unsigned numThreads;
>
> /** Pointer to list of active threads. */
> std::list<unsigned> *activeThreads;
>
> /** Per Thread IQ count */
> unsigned count[Impl::MaxThreads];
>
> /** Max IQ Entries Per Thread */
> unsigned maxEntries[Impl::MaxThreads];
>
> /** Number of free IQ entries left. */
> unsigned freeEntries;
>
> /** The number of entries in the instruction queue. */
> unsigned numEntries;
>
221c382
< //The number of physical registers in the CPU.
---
> /** The number of physical registers in the CPU. */
235,237c396
< //////////////////////////////////
< // Variables needed for squashing
< //////////////////////////////////
---
> bool switchedOut;
240c399
< InstSeqNum squashedSeqNum;
---
> InstSeqNum squashedSeqNum[Impl::MaxThreads];
242,284d400
< /** Iterator that points to the youngest instruction in the IQ. */
< ListIt tail;
<
< /** Iterator that points to the last instruction that has been squashed.
< * This will not be valid unless the IQ is in the process of squashing.
< */
< ListIt squashIt;
<
< ///////////////////////////////////
< // Dependency graph stuff
< ///////////////////////////////////
<
< class DependencyEntry
< {
< public:
< DynInstPtr inst;
< //Might want to include data about what arch. register the
< //dependence is waiting on.
< DependencyEntry *next;
<
< //This function, and perhaps this whole class, stand out a little
< //bit as they don't fit a classification well. I want access
< //to the underlying structure of the linked list, yet at
< //the same time it feels like this should be something abstracted
< //away. So for now it will sit here, within the IQ, until
< //a better implementation is decided upon.
< // This function probably shouldn't be within the entry...
< void insert(DynInstPtr &new_inst);
<
< void remove(DynInstPtr &inst_to_remove);
<
< // Debug variable, remove when done testing.
< static unsigned mem_alloc_counter;
< };
<
< /** Array of linked lists. Each linked list is a list of all the
< * instructions that depend upon a given register. The actual
< * register's index is used to index into the graph; ie all
< * instructions in flight that are dependent upon r34 will be
< * in the linked list of dependGraph[34].
< */
< DependencyEntry *dependGraph;
<
291c407
< vector<bool> regScoreboard;
---
> std::vector<bool> regScoreboard;
292a409
> /** Adds an instruction to the dependency graph, as a consumer. */
294,295d410
< void insertDependency(DynInstPtr &new_inst);
< void createDependency(DynInstPtr &new_inst);
296a412,415
> /** Adds an instruction to the dependency graph, as a producer. */
> void addToProducers(DynInstPtr &new_inst);
>
> /** Moves an instruction to the ready queue if it is ready. */
299d417
< private:
306,309d423
< /** Debugging function to dump out the dependency graph.
< */
< void dumpDependGraph();
<
315a430,435
> /** Debugging function to dump out all instructions that are in the
> * IQ.
> */
> void dumpInsts();
>
> /** Stat for number of instructions added. */
316a437
> /** Stat for number of non-speculative instructions added. */
318c439,441
< // Stats::Scalar<> iqIntInstsAdded;
---
>
> Stats::Scalar<> iqInstsIssued;
> /** Stat for number of integer instructions issued. */
320c443
< // Stats::Scalar<> iqFloatInstsAdded;
---
> /** Stat for number of floating point instructions issued. */
322c445
< // Stats::Scalar<> iqBranchInstsAdded;
---
> /** Stat for number of branch instructions issued. */
324c447
< // Stats::Scalar<> iqMemInstsAdded;
---
> /** Stat for number of memory instructions issued. */
326c449
< // Stats::Scalar<> iqMiscInstsAdded;
---
> /** Stat for number of miscellaneous instructions issued. */
327a451
> /** Stat for number of squashed instructions that were ready to issue. */
329c453
< Stats::Scalar<> iqLoopSquashStalls;
---
> /** Stat for number of squashed instructions examined when squashing. */
330a455,457
> /** Stat for number of squashed instruction operands examined when
> * squashing.
> */
331a459,460
> /** Stat for number of non-speculative instructions removed due to a squash.
> */
333a463,476
> Stats::VectorDistribution<> queueResDist;
> Stats::Distribution<> numIssuedDist;
> Stats::VectorDistribution<> issueDelayDist;
>
> Stats::Vector<> statFuBusy;
> // Stats::Vector<> dist_unissued;
> Stats::Vector2d<> statIssuedInstType;
>
> Stats::Formula issueRate;
> // Stats::Formula issue_stores;
> // Stats::Formula issue_op_rate;
> Stats::Vector<> fuBusy; //cumulative fu busy
>
> Stats::Formula fuBusyRate;
336c479
< #endif //__CPU_O3_CPU_INST_QUEUE_HH__
---
> #endif //__CPU_O3_INST_QUEUE_HH__