1/* |
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan |
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the --- 8 unchanged lines hidden (view full) --- 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 * 28 * Authors: Kevin Lim |
29 */ 30 |
31#ifndef __CPU_O3_CPU_INST_QUEUE_HH__ 32#define __CPU_O3_CPU_INST_QUEUE_HH__ |
33 34#include <list> 35#include <map> 36#include <queue> 37#include <vector> 38 39#include "base/statistics.hh" 40#include "base/timebuf.hh" 41#include "cpu/inst_seq.hh" |
42#include "sim/host.hh" 43 |
44/** 45 * A standard instruction queue class. It holds ready instructions, in 46 * order, in seperate priority queues to facilitate the scheduling of 47 * instructions. The IQ uses a separate linked list to track dependencies. 48 * Similar to the rename map and the free list, it expects that 49 * floating point registers have their indices start after the integer 50 * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer 51 * and 96-191 are fp). This remains true even for both logical and |
52 * physical register indices. |
53 */ 54template <class Impl> 55class InstructionQueue 56{ 57 public: 58 //Typedefs from the Impl. 59 typedef typename Impl::FullCPU FullCPU; 60 typedef typename Impl::DynInstPtr DynInstPtr; 61 typedef typename Impl::Params Params; 62 |
63 typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; 64 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 65 typedef typename Impl::CPUPol::TimeStruct TimeStruct; 66 |
67 // Typedef of iterator through the list of instructions. Might be 68 // better to untie this from the FullCPU or pass its information to 69 // the stages. |
70 typedef typename std::list<DynInstPtr>::iterator ListIt; 71 |
72 /** 73 * Struct for comparing entries to be added to the priority queue. This 74 * gives reverse ordering to the instructions in terms of sequence 75 * numbers: the instructions with smaller sequence numbers (and hence 76 * are older) will be at the top of the priority queue. 77 */ 78 struct pqCompare 79 { 80 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 81 { 82 return lhs->seqNum > rhs->seqNum; 83 } 84 }; |
85 |
86 /** 87 * Struct for comparing entries to be added to the set. This gives 88 * standard ordering in terms of sequence numbers. 89 */ 90 struct setCompare 91 { 92 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const 93 { 94 return lhs->seqNum < rhs->seqNum; 95 } |
96 }; 97 |
98 typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare> 99 ReadyInstQueue; |
100 |
101 InstructionQueue(Params ¶ms); |
102 |
103 void regStats(); 104 |
105 void setCPU(FullCPU *cpu); |
106 |
107 void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); 108 |
109 void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); 110 |
111 unsigned numFreeEntries(); 112 |
113 bool isFull(); 114 |
115 void insert(DynInstPtr &new_inst); 116 |
117 void insertNonSpec(DynInstPtr &new_inst); 118 |
119 void advanceTail(DynInstPtr &inst); |
120 |
121 void scheduleReadyInsts(); 122 |
123 void scheduleNonSpec(const InstSeqNum &inst); 124 |
125 void wakeDependents(DynInstPtr &completed_inst); |
126 |
127 void violation(DynInstPtr &store, DynInstPtr &faulting_load); 128 |
129 // Change this to take in the sequence number 130 void squash(); |
131 |
132 void doSquash(); |
133 |
134 void stopSquash(); |
135 136 private: |
137 /** Pointer to the CPU. */ 138 FullCPU *cpu; 139 |
140 /** The memory dependence unit, which tracks/predicts memory dependences 141 * between instructions. 142 */ |
143 MemDepUnit memDepUnit; |
144 145 /** The queue to the execute stage. Issued instructions will be written 146 * into it. 147 */ 148 TimeBuffer<IssueStruct> *issueToExecuteQueue; 149 150 /** The backwards time buffer. */ 151 TimeBuffer<TimeStruct> *timeBuffer; 152 153 /** Wire to read information from timebuffer. */ 154 typename TimeBuffer<TimeStruct>::wire fromCommit; 155 |
156 enum InstList { 157 Int, 158 Float, 159 Branch, 160 Memory, 161 Misc, 162 Squashed, 163 None 164 }; |
165 |
166 /** List of ready int instructions. Used to keep track of the order in 167 * which instructions should issue. 168 */ 169 ReadyInstQueue readyIntInsts; |
170 |
171 /** List of ready floating point instructions. */ 172 ReadyInstQueue readyFloatInsts; |
173 |
174 /** List of ready branch instructions. */ 175 ReadyInstQueue readyBranchInsts; |
176 |
177 /** List of ready miscellaneous instructions. */ 178 ReadyInstQueue readyMiscInsts; |
179 |
180 /** List of squashed instructions (which are still valid and in IQ). 181 * Implemented using a priority queue; the entries must contain both 182 * the IQ index and sequence number of each instruction so that 183 * ordering based on sequence numbers can be used. |
184 */ |
185 ReadyInstQueue squashedInsts; |
186 187 /** List of non-speculative instructions that will be scheduled 188 * once the IQ gets a signal from commit. While it's redundant to 189 * have the key be a part of the value (the sequence number is stored 190 * inside of DynInst), when these instructions are woken up only 191 * the sequence number will be available. Thus it is most efficient to be 192 * able to search by the sequence number alone. 193 */ 194 std::map<InstSeqNum, DynInstPtr> nonSpecInsts; 195 |
196 typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t; |
197 |
198 /** Number of free IQ entries left. */ 199 unsigned freeEntries; |
200 |
201 /** The number of entries in the instruction queue. */ 202 unsigned numEntries; |
203 |
204 /** The number of integer instructions that can be issued in one 205 * cycle. |
206 */ |
207 unsigned intWidth; |
208 |
209 /** The number of floating point instructions that can be issued 210 * in one cycle. |
211 */ |
212 unsigned floatWidth; |
213 |
214 /** The number of branches that can be issued in one cycle. */ 215 unsigned branchWidth; |
216 |
217 /** The number of memory instructions that can be issued in one cycle. */ 218 unsigned memoryWidth; |
219 |
220 /** The total number of instructions that can be issued in one cycle. */ 221 unsigned totalWidth; 222 |
223 //The number of physical registers in the CPU. |
224 unsigned numPhysRegs; 225 226 /** The number of physical integer registers in the CPU. */ 227 unsigned numPhysIntRegs; 228 229 /** The number of floating point registers in the CPU. */ 230 unsigned numPhysFloatRegs; 231 232 /** Delay between commit stage and the IQ. 233 * @todo: Make there be a distinction between the delays within IEW. 234 */ 235 unsigned commitToIEWDelay; 236 |
237 ////////////////////////////////// 238 // Variables needed for squashing 239 ////////////////////////////////// |
240 241 /** The sequence number of the squashed instruction. */ |
242 InstSeqNum squashedSeqNum; |
243 |
244 /** Iterator that points to the youngest instruction in the IQ. */ 245 ListIt tail; 246 247 /** Iterator that points to the last instruction that has been squashed. 248 * This will not be valid unless the IQ is in the process of squashing. 249 */ 250 ListIt squashIt; 251 252 /////////////////////////////////// 253 // Dependency graph stuff 254 /////////////////////////////////// 255 256 class DependencyEntry 257 { 258 public: 259 DynInstPtr inst; 260 //Might want to include data about what arch. register the 261 //dependence is waiting on. 262 DependencyEntry *next; 263 264 //This function, and perhaps this whole class, stand out a little 265 //bit as they don't fit a classification well. I want access 266 //to the underlying structure of the linked list, yet at 267 //the same time it feels like this should be something abstracted 268 //away. So for now it will sit here, within the IQ, until 269 //a better implementation is decided upon. 270 // This function probably shouldn't be within the entry... 271 void insert(DynInstPtr &new_inst); 272 273 void remove(DynInstPtr &inst_to_remove); 274 275 // Debug variable, remove when done testing. 276 static unsigned mem_alloc_counter; 277 }; 278 279 /** Array of linked lists. Each linked list is a list of all the 280 * instructions that depend upon a given register. The actual 281 * register's index is used to index into the graph; ie all 282 * instructions in flight that are dependent upon r34 will be 283 * in the linked list of dependGraph[34]. 284 */ 285 DependencyEntry *dependGraph; 286 |
287 /** A cache of the recently woken registers. It is 1 if the register 288 * has been woken up recently, and 0 if the register has been added 289 * to the dependency graph and has not yet received its value. It 290 * is basically a secondary scoreboard, and should pretty much mirror 291 * the scoreboard that exists in the rename map. 292 */ |
293 vector |
294 |
295 bool addToDependents(DynInstPtr &new_inst); |
296 void insertDependency(DynInstPtr &new_inst); 297 void createDependency(DynInstPtr &new_inst); |
298 |
299 void addIfReady(DynInstPtr &inst); 300 |
301 private: |
302 /** Debugging function to count how many entries are in the IQ. It does 303 * a linear walk through the instructions, so do not call this function 304 * during normal execution. 305 */ 306 int countInsts(); 307 |
308 /** Debugging function to dump out the dependency graph. 309 */ 310 void dumpDependGraph(); 311 |
312 /** Debugging function to dump all the list sizes, as well as print 313 * out the list of nonspeculative instructions. Should not be used 314 * in any other capacity, but it has no harmful sideaffects. 315 */ 316 void dumpLists(); 317 |
318 Stats::Scalar<> iqInstsAdded; |
319 Stats::Scalar<> iqNonSpecInstsAdded; |
320// Stats::Scalar<> iqIntInstsAdded; |
321 Stats::Scalar<> iqIntInstsIssued; |
322// Stats::Scalar<> iqFloatInstsAdded; |
323 Stats::Scalar<> iqFloatInstsIssued; |
324// Stats::Scalar<> iqBranchInstsAdded; |
325 Stats::Scalar<> iqBranchInstsIssued; |
326// Stats::Scalar<> iqMemInstsAdded; |
327 Stats::Scalar<> iqMemInstsIssued; |
328// Stats::Scalar<> iqMiscInstsAdded; |
329 Stats::Scalar<> iqMiscInstsIssued; |
330 Stats::Scalar<> iqSquashedInstsIssued; |
331 Stats::Scalar<> iqLoopSquashStalls; |
332 Stats::Scalar<> iqSquashedInstsExamined; |
333 Stats::Scalar<> iqSquashedOperandsExamined; |
334 Stats::Scalar<> iqSquashedNonSpecRemoved; 335 |
336}; 337 |
338#endif //__CPU_O3_CPU_INST_QUEUE_HH__ |