Deleted Added
sdiff udiff text old ( 2654:9559cfa91b9d ) new ( 2665:a124942bacb8 )
full compact
1/*
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the

--- 8 unchanged lines hidden (view full) ---

19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31#ifndef __CPU_O3_CPU_INST_QUEUE_HH__
32#define __CPU_O3_CPU_INST_QUEUE_HH__
33
34#include <list>
35#include <map>
36#include <queue>
37#include <vector>
38
39#include "base/statistics.hh"
40#include "base/timebuf.hh"
41#include "cpu/inst_seq.hh"
42#include "sim/host.hh"
43
44/**
45 * A standard instruction queue class. It holds ready instructions, in
46 * order, in seperate priority queues to facilitate the scheduling of
47 * instructions. The IQ uses a separate linked list to track dependencies.
48 * Similar to the rename map and the free list, it expects that
49 * floating point registers have their indices start after the integer
50 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
51 * and 96-191 are fp). This remains true even for both logical and
52 * physical register indices.
53 */
54template <class Impl>
55class InstructionQueue
56{
57 public:
58 //Typedefs from the Impl.
59 typedef typename Impl::FullCPU FullCPU;
60 typedef typename Impl::DynInstPtr DynInstPtr;
61 typedef typename Impl::Params Params;
62
63 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
64 typedef typename Impl::CPUPol::IssueStruct IssueStruct;
65 typedef typename Impl::CPUPol::TimeStruct TimeStruct;
66
67 // Typedef of iterator through the list of instructions. Might be
68 // better to untie this from the FullCPU or pass its information to
69 // the stages.
70 typedef typename std::list<DynInstPtr>::iterator ListIt;
71
72 /**
73 * Struct for comparing entries to be added to the priority queue. This
74 * gives reverse ordering to the instructions in terms of sequence
75 * numbers: the instructions with smaller sequence numbers (and hence
76 * are older) will be at the top of the priority queue.
77 */
78 struct pqCompare
79 {
80 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
81 {
82 return lhs->seqNum > rhs->seqNum;
83 }
84 };
85
86 /**
87 * Struct for comparing entries to be added to the set. This gives
88 * standard ordering in terms of sequence numbers.
89 */
90 struct setCompare
91 {
92 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
93 {
94 return lhs->seqNum < rhs->seqNum;
95 }
96 };
97
98 typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
99 ReadyInstQueue;
100
101 InstructionQueue(Params &params);
102
103 void regStats();
104
105 void setCPU(FullCPU *cpu);
106
107 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
108
109 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
110
111 unsigned numFreeEntries();
112
113 bool isFull();
114
115 void insert(DynInstPtr &new_inst);
116
117 void insertNonSpec(DynInstPtr &new_inst);
118
119 void advanceTail(DynInstPtr &inst);
120
121 void scheduleReadyInsts();
122
123 void scheduleNonSpec(const InstSeqNum &inst);
124
125 void wakeDependents(DynInstPtr &completed_inst);
126
127 void violation(DynInstPtr &store, DynInstPtr &faulting_load);
128
129 // Change this to take in the sequence number
130 void squash();
131
132 void doSquash();
133
134 void stopSquash();
135
136 private:
137 /** Pointer to the CPU. */
138 FullCPU *cpu;
139
140 /** The memory dependence unit, which tracks/predicts memory dependences
141 * between instructions.
142 */
143 MemDepUnit memDepUnit;
144
145 /** The queue to the execute stage. Issued instructions will be written
146 * into it.
147 */
148 TimeBuffer<IssueStruct> *issueToExecuteQueue;
149
150 /** The backwards time buffer. */
151 TimeBuffer<TimeStruct> *timeBuffer;
152
153 /** Wire to read information from timebuffer. */
154 typename TimeBuffer<TimeStruct>::wire fromCommit;
155
156 enum InstList {
157 Int,
158 Float,
159 Branch,
160 Memory,
161 Misc,
162 Squashed,
163 None
164 };
165
166 /** List of ready int instructions. Used to keep track of the order in
167 * which instructions should issue.
168 */
169 ReadyInstQueue readyIntInsts;
170
171 /** List of ready floating point instructions. */
172 ReadyInstQueue readyFloatInsts;
173
174 /** List of ready branch instructions. */
175 ReadyInstQueue readyBranchInsts;
176
177 /** List of ready miscellaneous instructions. */
178 ReadyInstQueue readyMiscInsts;
179
180 /** List of squashed instructions (which are still valid and in IQ).
181 * Implemented using a priority queue; the entries must contain both
182 * the IQ index and sequence number of each instruction so that
183 * ordering based on sequence numbers can be used.
184 */
185 ReadyInstQueue squashedInsts;
186
187 /** List of non-speculative instructions that will be scheduled
188 * once the IQ gets a signal from commit. While it's redundant to
189 * have the key be a part of the value (the sequence number is stored
190 * inside of DynInst), when these instructions are woken up only
191 * the sequence number will be available. Thus it is most efficient to be
192 * able to search by the sequence number alone.
193 */
194 std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
195
196 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
197
198 /** Number of free IQ entries left. */
199 unsigned freeEntries;
200
201 /** The number of entries in the instruction queue. */
202 unsigned numEntries;
203
204 /** The number of integer instructions that can be issued in one
205 * cycle.
206 */
207 unsigned intWidth;
208
209 /** The number of floating point instructions that can be issued
210 * in one cycle.
211 */
212 unsigned floatWidth;
213
214 /** The number of branches that can be issued in one cycle. */
215 unsigned branchWidth;
216
217 /** The number of memory instructions that can be issued in one cycle. */
218 unsigned memoryWidth;
219
220 /** The total number of instructions that can be issued in one cycle. */
221 unsigned totalWidth;
222
223 //The number of physical registers in the CPU.
224 unsigned numPhysRegs;
225
226 /** The number of physical integer registers in the CPU. */
227 unsigned numPhysIntRegs;
228
229 /** The number of floating point registers in the CPU. */
230 unsigned numPhysFloatRegs;
231
232 /** Delay between commit stage and the IQ.
233 * @todo: Make there be a distinction between the delays within IEW.
234 */
235 unsigned commitToIEWDelay;
236
237 //////////////////////////////////
238 // Variables needed for squashing
239 //////////////////////////////////
240
241 /** The sequence number of the squashed instruction. */
242 InstSeqNum squashedSeqNum;
243
244 /** Iterator that points to the youngest instruction in the IQ. */
245 ListIt tail;
246
247 /** Iterator that points to the last instruction that has been squashed.
248 * This will not be valid unless the IQ is in the process of squashing.
249 */
250 ListIt squashIt;
251
252 ///////////////////////////////////
253 // Dependency graph stuff
254 ///////////////////////////////////
255
256 class DependencyEntry
257 {
258 public:
259 DynInstPtr inst;
260 //Might want to include data about what arch. register the
261 //dependence is waiting on.
262 DependencyEntry *next;
263
264 //This function, and perhaps this whole class, stand out a little
265 //bit as they don't fit a classification well. I want access
266 //to the underlying structure of the linked list, yet at
267 //the same time it feels like this should be something abstracted
268 //away. So for now it will sit here, within the IQ, until
269 //a better implementation is decided upon.
270 // This function probably shouldn't be within the entry...
271 void insert(DynInstPtr &new_inst);
272
273 void remove(DynInstPtr &inst_to_remove);
274
275 // Debug variable, remove when done testing.
276 static unsigned mem_alloc_counter;
277 };
278
279 /** Array of linked lists. Each linked list is a list of all the
280 * instructions that depend upon a given register. The actual
281 * register's index is used to index into the graph; ie all
282 * instructions in flight that are dependent upon r34 will be
283 * in the linked list of dependGraph[34].
284 */
285 DependencyEntry *dependGraph;
286
287 /** A cache of the recently woken registers. It is 1 if the register
288 * has been woken up recently, and 0 if the register has been added
289 * to the dependency graph and has not yet received its value. It
290 * is basically a secondary scoreboard, and should pretty much mirror
291 * the scoreboard that exists in the rename map.
292 */
293 vector regScoreboard;
294
295 bool addToDependents(DynInstPtr &new_inst);
296 void insertDependency(DynInstPtr &new_inst);
297 void createDependency(DynInstPtr &new_inst);
298
299 void addIfReady(DynInstPtr &inst);
300
301 private:
302 /** Debugging function to count how many entries are in the IQ. It does
303 * a linear walk through the instructions, so do not call this function
304 * during normal execution.
305 */
306 int countInsts();
307
308 /** Debugging function to dump out the dependency graph.
309 */
310 void dumpDependGraph();
311
312 /** Debugging function to dump all the list sizes, as well as print
313 * out the list of nonspeculative instructions. Should not be used
314 * in any other capacity, but it has no harmful sideaffects.
315 */
316 void dumpLists();
317
318 Stats::Scalar<> iqInstsAdded;
319 Stats::Scalar<> iqNonSpecInstsAdded;
320// Stats::Scalar<> iqIntInstsAdded;
321 Stats::Scalar<> iqIntInstsIssued;
322// Stats::Scalar<> iqFloatInstsAdded;
323 Stats::Scalar<> iqFloatInstsIssued;
324// Stats::Scalar<> iqBranchInstsAdded;
325 Stats::Scalar<> iqBranchInstsIssued;
326// Stats::Scalar<> iqMemInstsAdded;
327 Stats::Scalar<> iqMemInstsIssued;
328// Stats::Scalar<> iqMiscInstsAdded;
329 Stats::Scalar<> iqMiscInstsIssued;
330 Stats::Scalar<> iqSquashedInstsIssued;
331 Stats::Scalar<> iqLoopSquashStalls;
332 Stats::Scalar<> iqSquashedInstsExamined;
333 Stats::Scalar<> iqSquashedOperandsExamined;
334 Stats::Scalar<> iqSquashedNonSpecRemoved;
335
336};
337
338#endif //__CPU_O3_CPU_INST_QUEUE_HH__