1/*
|
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
|
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan |
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27 * 28 * Authors: Kevin Lim |
29 */ 30
|
29#ifndef __CPU_O3_INST_QUEUE_HH__
30#define __CPU_O3_INST_QUEUE_HH__
|
31#ifndef __CPU_O3_CPU_INST_QUEUE_HH__ 32#define __CPU_O3_CPU_INST_QUEUE_HH__ |
33 34#include <list> 35#include <map> 36#include <queue> 37#include <vector> 38 39#include "base/statistics.hh" 40#include "base/timebuf.hh" 41#include "cpu/inst_seq.hh"
|
40#include "cpu/o3/dep_graph.hh"
41#include "encumbered/cpu/full/op_class.hh"
|
42#include "sim/host.hh" 43
|
44class FUPool;
45class MemInterface;
46
|
44/** 45 * A standard instruction queue class. It holds ready instructions, in 46 * order, in seperate priority queues to facilitate the scheduling of 47 * instructions. The IQ uses a separate linked list to track dependencies. 48 * Similar to the rename map and the free list, it expects that 49 * floating point registers have their indices start after the integer 50 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 51 * and 96-191 are fp). This remains true even for both logical and
|
55 * physical register indices. The IQ depends on the memory dependence unit to
56 * track when memory operations are ready in terms of ordering; register
57 * dependencies are tracked normally. Right now the IQ also handles the
58 * execution timing; this is mainly to allow back-to-back scheduling without
59 * requiring IEW to be able to peek into the IQ. At the end of the execution
60 * latency, the instruction is put into the queue to execute, where it will
61 * have the execute() function called on it.
62 * @todo: Make IQ able to handle multiple FU pools.
|
52 * physical register indices. |
53 */ 54template <class Impl> 55class InstructionQueue 56{ 57 public: 58 //Typedefs from the Impl. 59 typedef typename Impl::FullCPU FullCPU; 60 typedef typename Impl::DynInstPtr DynInstPtr; 61 typedef typename Impl::Params Params; 62
|
73 typedef typename Impl::CPUPol::IEW IEW;
|
63 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 64 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 65 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 66
|
78 // Typedef of iterator through the list of instructions.
|
67 // Typedef of iterator through the list of instructions. Might be 68 // better to untie this from the FullCPU or pass its information to 69 // the stages. |
70 typedef typename std::list<DynInstPtr>::iterator ListIt; 71
|
81 friend class Impl::FullCPU;
|
72 /** 73 * Struct for comparing entries to be added to the priority queue. This 74 * gives reverse ordering to the instructions in terms of sequence 75 * numbers: the instructions with smaller sequence numbers (and hence 76 * are older) will be at the top of the priority queue. 77 */ 78 struct pqCompare 79 { 80 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 81 { 82 return lhs->seqNum > rhs->seqNum; 83 } 84 }; |
85
|
83 /** FU completion event class. */
84 class FUCompletion : public Event {
85 private:
86 /** Executing instruction. */
87 DynInstPtr inst;
88
89 /** Index of the FU used for executing. */
90 int fuIdx;
91
92 /** Pointer back to the instruction queue. */
93 InstructionQueue<Impl> *iqPtr;
94
95 bool freeFU;
96
97 public:
98 /** Construct a FU completion event. */
99 FUCompletion(DynInstPtr &_inst, int fu_idx,
100 InstructionQueue<Impl> *iq_ptr);
101
102 virtual void process();
103 virtual const char *description();
104 void setFreeFU() { freeFU = true; }
|
86 /** 87 * Struct for comparing entries to be added to the set. This gives 88 * standard ordering in terms of sequence numbers. 89 */ 90 struct setCompare 91 { 92 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 93 { 94 return lhs->seqNum < rhs->seqNum; 95 } |
96 }; 97
|
107 /** Constructs an IQ. */
108 InstructionQueue(Params *params);
|
98 typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare> 99 ReadyInstQueue; |
100
|
110 /** Destructs the IQ. */
111 ~InstructionQueue();
|
101 InstructionQueue(Params ¶ms); |
102
|
113 /** Returns the name of the IQ. */
114 std::string name() const;
115
116 /** Registers statistics. */
|
103 void regStats(); 104
|
119 void resetState();
|
105 void setCPU(FullCPU *cpu); |
106
|
121 /** Sets CPU pointer. */
122 void setCPU(FullCPU *_cpu) { cpu = _cpu; }
123
124 /** Sets active threads list. */
125 void setActiveThreads(std::list<unsigned> *at_ptr);
126
127 /** Sets the IEW pointer. */
128 void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
129
130 /** Sets the timer buffer between issue and execute. */
|
107 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 108
|
133 /** Sets the global time buffer. */
|
109 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 110
|
136 void switchOut();
137
138 void takeOverFrom();
139
140 bool isSwitchedOut() { return switchedOut; }
141
142 /** Number of entries needed for given amount of threads. */
143 int entryAmount(int num_threads);
144
145 /** Resets max entries for all threads. */
146 void resetEntries();
147
148 /** Returns total number of free entries. */
|
111 unsigned numFreeEntries(); 112
|
151 /** Returns number of free entries for a thread. */
152 unsigned numFreeEntries(unsigned tid);
153
154 /** Returns whether or not the IQ is full. */
|
113 bool isFull(); 114
|
157 /** Returns whether or not the IQ is full for a specific thread. */
158 bool isFull(unsigned tid);
159
160 /** Returns if there are any ready instructions in the IQ. */
161 bool hasReadyInsts();
162
163 /** Inserts a new instruction into the IQ. */
|
115 void insert(DynInstPtr &new_inst); 116
|
166 /** Inserts a new, non-speculative instruction into the IQ. */
|
117 void insertNonSpec(DynInstPtr &new_inst); 118
|
169 /** Inserts a memory or write barrier into the IQ to make sure
170 * loads and stores are ordered properly.
171 */
172 void insertBarrier(DynInstPtr &barr_inst);
|
119 void advanceTail(DynInstPtr &inst); |
120
|
174 DynInstPtr getInstToExecute();
175
176 /**
177 * Records the instruction as the producer of a register without
178 * adding it to the rest of the IQ.
179 */
180 void recordProducer(DynInstPtr &inst)
181 { addToProducers(inst); }
182
183 /** Process FU completion event. */
184 void processFUCompletion(DynInstPtr &inst, int fu_idx);
185
186 /**
187 * Schedules ready instructions, adding the ready ones (oldest first) to
188 * the queue to execute.
189 */
|
121 void scheduleReadyInsts(); 122
|
192 /** Schedules a single specific non-speculative instruction. */
|
123 void scheduleNonSpec(const InstSeqNum &inst); 124
|
195 /**
196 * Commits all instructions up to and including the given sequence number,
197 * for a specific thread.
198 */
199 void commit(const InstSeqNum &inst, unsigned tid = 0);
|
125 void wakeDependents(DynInstPtr &completed_inst); |
126
|
201 /** Wakes all dependents of a completed instruction. */
202 int wakeDependents(DynInstPtr &completed_inst);
203
204 /** Adds a ready memory instruction to the ready list. */
205 void addReadyMemInst(DynInstPtr &ready_inst);
206
207 /**
208 * Reschedules a memory instruction. It will be ready to issue once
209 * replayMemInst() is called.
210 */
211 void rescheduleMemInst(DynInstPtr &resched_inst);
212
213 /** Replays a memory instruction. It must be rescheduled first. */
214 void replayMemInst(DynInstPtr &replay_inst);
215
216 /** Completes a memory operation. */
217 void completeMemInst(DynInstPtr &completed_inst);
218
219 /** Indicates an ordering violation between a store and a load. */
|
127 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 128
|
222 /**
223 * Squashes instructions for a thread. Squashing information is obtained
224 * from the time buffer.
225 */
226 void squash(unsigned tid);
|
129 // Change this to take in the sequence number 130 void squash(); |
131
|
228 /** Returns the number of used entries for a thread. */
229 unsigned getCount(unsigned tid) { return count[tid]; };
|
132 void doSquash(); |
133
|
231 /** Debug function to print all instructions. */
232 void printInsts();
|
134 void stopSquash(); |
135 136 private:
|
235 /** Does the actual squashing. */
236 void doSquash(unsigned tid);
237
238 /////////////////////////
239 // Various pointers
240 /////////////////////////
241
|
137 /** Pointer to the CPU. */ 138 FullCPU *cpu; 139
|
245 /** Cache interface. */
246 MemInterface *dcacheInterface;
247
248 /** Pointer to IEW stage. */
249 IEW *iewStage;
250
|
140 /** The memory dependence unit, which tracks/predicts memory dependences 141 * between instructions. 142 */
|
254 MemDepUnit memDepUnit[Impl::MaxThreads];
|
143 MemDepUnit memDepUnit; |
144 145 /** The queue to the execute stage. Issued instructions will be written 146 * into it. 147 */ 148 TimeBuffer<IssueStruct> *issueToExecuteQueue; 149 150 /** The backwards time buffer. */ 151 TimeBuffer<TimeStruct> *timeBuffer; 152 153 /** Wire to read information from timebuffer. */ 154 typename TimeBuffer<TimeStruct>::wire fromCommit; 155
|
267 /** Function unit pool. */
268 FUPool *fuPool;
|
156 enum InstList { 157 Int, 158 Float, 159 Branch, 160 Memory, 161 Misc, 162 Squashed, 163 None 164 }; |
165
|
270 //////////////////////////////////////
271 // Instruction lists, ready queues, and ordering
272 //////////////////////////////////////
|
166 /** List of ready int instructions. Used to keep track of the order in 167 * which instructions should issue. 168 */ 169 ReadyInstQueue readyIntInsts; |
170
|
274 /** List of all the instructions in the IQ (some of which may be issued). */
275 std::list<DynInstPtr> instList[Impl::MaxThreads];
|
171 /** List of ready floating point instructions. */ 172 ReadyInstQueue readyFloatInsts; |
173
|
277 std::list<DynInstPtr> instsToExecute;
|
174 /** List of ready branch instructions. */ 175 ReadyInstQueue readyBranchInsts; |
176
|
279 /**
280 * Struct for comparing entries to be added to the priority queue. This
281 * gives reverse ordering to the instructions in terms of sequence
282 * numbers: the instructions with smaller sequence numbers (and hence
283 * are older) will be at the top of the priority queue.
284 */
285 struct pqCompare {
286 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
287 {
288 return lhs->seqNum > rhs->seqNum;
289 }
290 };
|
177 /** List of ready miscellaneous instructions. */ 178 ReadyInstQueue readyMiscInsts; |
179
|
292 typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
293 ReadyInstQueue;
294
295 /** List of ready instructions, per op class. They are separated by op
296 * class to allow for easy mapping to FUs.
|
180 /** List of squashed instructions (which are still valid and in IQ). 181 * Implemented using a priority queue; the entries must contain both 182 * the IQ index and sequence number of each instruction so that 183 * ordering based on sequence numbers can be used. |
184 */
|
298 ReadyInstQueue readyInsts[Num_OpClasses];
|
185 ReadyInstQueue squashedInsts; |
186 187 /** List of non-speculative instructions that will be scheduled 188 * once the IQ gets a signal from commit. While it's redundant to 189 * have the key be a part of the value (the sequence number is stored 190 * inside of DynInst), when these instructions are woken up only 191 * the sequence number will be available. Thus it is most efficient to be 192 * able to search by the sequence number alone. 193 */ 194 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 195
|
309 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
|
196 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t; |
197
|
311 /** Entry for the list age ordering by op class. */
312 struct ListOrderEntry {
313 OpClass queueType;
314 InstSeqNum oldestInst;
315 };
|
198 /** Number of free IQ entries left. */ 199 unsigned freeEntries; |
200
|
317 /** List that contains the age order of the oldest instruction of each
318 * ready queue. Used to select the oldest instruction available
319 * among op classes.
320 * @todo: Might be better to just move these entries around instead
321 * of creating new ones every time the position changes due to an
322 * instruction issuing. Not sure std::list supports this.
323 */
324 std::list<ListOrderEntry> listOrder;
|
201 /** The number of entries in the instruction queue. */ 202 unsigned numEntries; |
203
|
326 typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
327
328 /** Tracks if each ready queue is on the age order list. */
329 bool queueOnList[Num_OpClasses];
330
331 /** Iterators of each ready queue. Points to their spot in the age order
332 * list.
|
204 /** The number of integer instructions that can be issued in one 205 * cycle. |
206 */
|
334 ListOrderIt readyIt[Num_OpClasses];
|
207 unsigned intWidth; |
208
|
336 /** Add an op class to the age order list. */
337 void addToOrderList(OpClass op_class);
338
339 /**
340 * Called when the oldest instruction has been removed from a ready queue;
341 * this places that ready queue into the proper spot in the age order list.
|
209 /** The number of floating point instructions that can be issued 210 * in one cycle. |
211 */
|
343 void moveToYoungerInst(ListOrderIt age_order_it);
|
212 unsigned floatWidth; |
213
|
345 DependencyGraph<DynInstPtr> dependGraph;
|
214 /** The number of branches that can be issued in one cycle. */ 215 unsigned branchWidth; |
216
|
347 //////////////////////////////////////
348 // Various parameters
349 //////////////////////////////////////
|
217 /** The number of memory instructions that can be issued in one cycle. */ 218 unsigned memoryWidth; |
219
|
351 /** IQ Resource Sharing Policy */
352 enum IQPolicy {
353 Dynamic,
354 Partitioned,
355 Threshold
356 };
357
358 /** IQ sharing policy for SMT. */
359 IQPolicy iqPolicy;
360
361 /** Number of Total Threads*/
362 unsigned numThreads;
363
364 /** Pointer to list of active threads. */
365 std::list<unsigned> *activeThreads;
366
367 /** Per Thread IQ count */
368 unsigned count[Impl::MaxThreads];
369
370 /** Max IQ Entries Per Thread */
371 unsigned maxEntries[Impl::MaxThreads];
372
373 /** Number of free IQ entries left. */
374 unsigned freeEntries;
375
376 /** The number of entries in the instruction queue. */
377 unsigned numEntries;
378
|
220 /** The total number of instructions that can be issued in one cycle. */ 221 unsigned totalWidth; 222
|
382 /** The number of physical registers in the CPU. */
|
223 //The number of physical registers in the CPU. |
224 unsigned numPhysRegs; 225 226 /** The number of physical integer registers in the CPU. */ 227 unsigned numPhysIntRegs; 228 229 /** The number of floating point registers in the CPU. */ 230 unsigned numPhysFloatRegs; 231 232 /** Delay between commit stage and the IQ. 233 * @todo: Make there be a distinction between the delays within IEW. 234 */ 235 unsigned commitToIEWDelay; 236
|
396 bool switchedOut;
|
237 ////////////////////////////////// 238 // Variables needed for squashing 239 ////////////////////////////////// |
240 241 /** The sequence number of the squashed instruction. */
|
399 InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
242 InstSeqNum squashedSeqNum; |
243
|
244 /** Iterator that points to the youngest instruction in the IQ. */ 245 ListIt tail; 246 247 /** Iterator that points to the last instruction that has been squashed. 248 * This will not be valid unless the IQ is in the process of squashing. 249 */ 250 ListIt squashIt; 251 252 /////////////////////////////////// 253 // Dependency graph stuff 254 /////////////////////////////////// 255 256 class DependencyEntry 257 { 258 public: 259 DynInstPtr inst; 260 //Might want to include data about what arch. register the 261 //dependence is waiting on. 262 DependencyEntry *next; 263 264 //This function, and perhaps this whole class, stand out a little 265 //bit as they don't fit a classification well. I want access 266 //to the underlying structure of the linked list, yet at 267 //the same time it feels like this should be something abstracted 268 //away. So for now it will sit here, within the IQ, until 269 //a better implementation is decided upon. 270 // This function probably shouldn't be within the entry... 271 void insert(DynInstPtr &new_inst); 272 273 void remove(DynInstPtr &inst_to_remove); 274 275 // Debug variable, remove when done testing. 276 static unsigned mem_alloc_counter; 277 }; 278 279 /** Array of linked lists. Each linked list is a list of all the 280 * instructions that depend upon a given register. The actual 281 * register's index is used to index into the graph; ie all 282 * instructions in flight that are dependent upon r34 will be 283 * in the linked list of dependGraph[34]. 284 */ 285 DependencyEntry *dependGraph; 286 |
287 /** A cache of the recently woken registers. It is 1 if the register 288 * has been woken up recently, and 0 if the register has been added 289 * to the dependency graph and has not yet received its value. It 290 * is basically a secondary scoreboard, and should pretty much mirror 291 * the scoreboard that exists in the rename map. 292 */
|
407 std::vector<bool> regScoreboard;
|
293 vector regScoreboard; |
294
|
409 /** Adds an instruction to the dependency graph, as a consumer. */
|
295 bool addToDependents(DynInstPtr &new_inst);
|
296 void insertDependency(DynInstPtr &new_inst); 297 void createDependency(DynInstPtr &new_inst); |
298
|
412 /** Adds an instruction to the dependency graph, as a producer. */
413 void addToProducers(DynInstPtr &new_inst);
414
415 /** Moves an instruction to the ready queue if it is ready. */
|
299 void addIfReady(DynInstPtr &inst); 300
|
301 private: |
302 /** Debugging function to count how many entries are in the IQ. It does 303 * a linear walk through the instructions, so do not call this function 304 * during normal execution. 305 */ 306 int countInsts(); 307
|
308 /** Debugging function to dump out the dependency graph. 309 */ 310 void dumpDependGraph(); 311 |
312 /** Debugging function to dump all the list sizes, as well as print 313 * out the list of nonspeculative instructions. Should not be used 314 * in any other capacity, but it has no harmful sideaffects. 315 */ 316 void dumpLists(); 317
|
430 /** Debugging function to dump out all instructions that are in the
431 * IQ.
432 */
433 void dumpInsts();
434
435 /** Stat for number of instructions added. */
|
318 Stats::Scalar<> iqInstsAdded;
|
437 /** Stat for number of non-speculative instructions added. */
|
319 Stats::Scalar<> iqNonSpecInstsAdded;
|
439
440 Stats::Scalar<> iqInstsIssued;
441 /** Stat for number of integer instructions issued. */
|
320// Stats::Scalar<> iqIntInstsAdded; |
321 Stats::Scalar<> iqIntInstsIssued;
|
443 /** Stat for number of floating point instructions issued. */
|
322// Stats::Scalar<> iqFloatInstsAdded; |
323 Stats::Scalar<> iqFloatInstsIssued;
|
445 /** Stat for number of branch instructions issued. */
|
324// Stats::Scalar<> iqBranchInstsAdded; |
325 Stats::Scalar<> iqBranchInstsIssued;
|
447 /** Stat for number of memory instructions issued. */
|
326// Stats::Scalar<> iqMemInstsAdded; |
327 Stats::Scalar<> iqMemInstsIssued;
|
449 /** Stat for number of miscellaneous instructions issued. */
|
328// Stats::Scalar<> iqMiscInstsAdded; |
329 Stats::Scalar<> iqMiscInstsIssued;
|
451 /** Stat for number of squashed instructions that were ready to issue. */
|
330 Stats::Scalar<> iqSquashedInstsIssued;
|
453 /** Stat for number of squashed instructions examined when squashing. */
|
331 Stats::Scalar<> iqLoopSquashStalls; |
332 Stats::Scalar<> iqSquashedInstsExamined;
|
455 /** Stat for number of squashed instruction operands examined when
456 * squashing.
457 */
|
333 Stats::Scalar<> iqSquashedOperandsExamined;
|
459 /** Stat for number of non-speculative instructions removed due to a squash.
460 */
|
334 Stats::Scalar<> iqSquashedNonSpecRemoved; 335
|
463 Stats::VectorDistribution<> queueResDist;
464 Stats::Distribution<> numIssuedDist;
465 Stats::VectorDistribution<> issueDelayDist;
466
467 Stats::Vector<> statFuBusy;
468// Stats::Vector<> dist_unissued;
469 Stats::Vector2d<> statIssuedInstType;
470
471 Stats::Formula issueRate;
472// Stats::Formula issue_stores;
473// Stats::Formula issue_op_rate;
474 Stats::Vector<> fuBusy; //cumulative fu busy
475
476 Stats::Formula fuBusyRate;
|
336}; 337
|
479#endif //__CPU_O3_INST_QUEUE_HH__
|
338#endif //__CPU_O3_CPU_INST_QUEUE_HH__ |
|