lsq_unit.hh (8591:8f23aeaf6a91) lsq_unit.hh (8727:b3995530319f)
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/faults.hh"
41#include "arch/generic/debugfaults.hh"
42#include "arch/isa_traits.hh"
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "base/fast_alloc.hh"
46#include "base/hashmap.hh"
47#include "config/full_system.hh"
48#include "config/the_isa.hh"
49#include "cpu/inst_seq.hh"
50#include "cpu/timebuf.hh"
51#include "debug/LSQUnit.hh"
52#include "mem/packet.hh"
53#include "mem/port.hh"
54
55class DerivO3CPUParams;
56
57/**
58 * Class that implements the actual LQ and SQ for each specific
59 * thread. Both are circular queues; load entries are freed upon
60 * committing, while store entries are freed once they writeback. The
61 * LSQUnit tracks if there are memory ordering violations, and also
62 * detects partial load to store forwarding cases (a store only has
63 * part of a load's data) that requires the load to wait until the
64 * store writes back. In the former case it holds onto the instruction
65 * until the dependence unit looks at it, and in the latter it stalls
66 * the LSQ until the store writes back. At that point the load is
67 * replayed.
68 */
69template <class Impl>
70class LSQUnit {
71 public:
72 typedef typename Impl::O3CPU O3CPU;
73 typedef typename Impl::DynInstPtr DynInstPtr;
74 typedef typename Impl::CPUPol::IEW IEW;
75 typedef typename Impl::CPUPol::LSQ LSQ;
76 typedef typename Impl::CPUPol::IssueStruct IssueStruct;
77
78 public:
79 /** Constructs an LSQ unit. init() must be called prior to use. */
80 LSQUnit();
81
82 /** Initializes the LSQ unit with the specified number of entries. */
83 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
84 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
85 unsigned id);
86
87 /** Returns the name of the LSQ unit. */
88 std::string name() const;
89
90 /** Registers statistics. */
91 void regStats();
92
93 /** Sets the pointer to the dcache port. */
94 void setDcachePort(Port *dcache_port);
95
96 /** Switches out LSQ unit. */
97 void switchOut();
98
99 /** Takes over from another CPU's thread. */
100 void takeOverFrom();
101
102 /** Returns if the LSQ is switched out. */
103 bool isSwitchedOut() { return switchedOut; }
104
105 /** Ticks the LSQ unit, which in this case only resets the number of
106 * used cache ports.
107 * @todo: Move the number of used ports up to the LSQ level so it can
108 * be shared by all LSQ units.
109 */
110 void tick() { usedPorts = 0; }
111
112 /** Inserts an instruction. */
113 void insert(DynInstPtr &inst);
114 /** Inserts a load instruction. */
115 void insertLoad(DynInstPtr &load_inst);
116 /** Inserts a store instruction. */
117 void insertStore(DynInstPtr &store_inst);
118
119 /** Check for ordering violations in the LSQ. For a store squash if we
120 * ever find a conflicting load. For a load, only squash if we
121 * an external snoop invalidate has been seen for that load address
122 * @param load_idx index to start checking at
123 * @param inst the instruction to check
124 */
125 Fault checkViolations(int load_idx, DynInstPtr &inst);
126
127 /** Check if an incoming invalidate hits in the lsq on a load
128 * that might have issued out of order wrt another load beacuse
129 * of the intermediate invalidate.
130 */
131 void checkSnoop(PacketPtr pkt);
132
133 /** Executes a load instruction. */
134 Fault executeLoad(DynInstPtr &inst);
135
136 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
137 /** Executes a store instruction. */
138 Fault executeStore(DynInstPtr &inst);
139
140 /** Commits the head load. */
141 void commitLoad();
142 /** Commits loads older than a specific sequence number. */
143 void commitLoads(InstSeqNum &youngest_inst);
144
145 /** Commits stores older than a specific sequence number. */
146 void commitStores(InstSeqNum &youngest_inst);
147
148 /** Writes back stores. */
149 void writebackStores();
150
151 /** Completes the data access that has been returned from the
152 * memory system. */
153 void completeDataAccess(PacketPtr pkt);
154
155 /** Clears all the entries in the LQ. */
156 void clearLQ();
157
158 /** Clears all the entries in the SQ. */
159 void clearSQ();
160
161 /** Resizes the LQ to a given size. */
162 void resizeLQ(unsigned size);
163
164 /** Resizes the SQ to a given size. */
165 void resizeSQ(unsigned size);
166
167 /** Squashes all instructions younger than a specific sequence number. */
168 void squash(const InstSeqNum &squashed_num);
169
170 /** Returns if there is a memory ordering violation. Value is reset upon
171 * call to getMemDepViolator().
172 */
173 bool violation() { return memDepViolator; }
174
175 /** Returns the memory ordering violator. */
176 DynInstPtr getMemDepViolator();
177
178 /** Returns if a load became blocked due to the memory system. */
179 bool loadBlocked()
180 { return isLoadBlocked; }
181
182 /** Clears the signal that a load became blocked. */
183 void clearLoadBlocked()
184 { isLoadBlocked = false; }
185
186 /** Returns if the blocked load was handled. */
187 bool isLoadBlockedHandled()
188 { return loadBlockedHandled; }
189
190 /** Records the blocked load as being handled. */
191 void setLoadBlockedHandled()
192 { loadBlockedHandled = true; }
193
194 /** Returns the number of free entries (min of free LQ and SQ entries). */
195 unsigned numFreeEntries();
196
197 /** Returns the number of loads ready to execute. */
198 int numLoadsReady();
199
200 /** Returns the number of loads in the LQ. */
201 int numLoads() { return loads; }
202
203 /** Returns the number of stores in the SQ. */
204 int numStores() { return stores; }
205
206 /** Returns if either the LQ or SQ is full. */
207 bool isFull() { return lqFull() || sqFull(); }
208
209 /** Returns if the LQ is full. */
210 bool lqFull() { return loads >= (LQEntries - 1); }
211
212 /** Returns if the SQ is full. */
213 bool sqFull() { return stores >= (SQEntries - 1); }
214
215 /** Returns the number of instructions in the LSQ. */
216 unsigned getCount() { return loads + stores; }
217
218 /** Returns if there are any stores to writeback. */
219 bool hasStoresToWB() { return storesToWB; }
220
221 /** Returns the number of stores to writeback. */
222 int numStoresToWB() { return storesToWB; }
223
224 /** Returns if the LSQ unit will writeback on this cycle. */
225 bool willWB() { return storeQueue[storeWBIdx].canWB &&
226 !storeQueue[storeWBIdx].completed &&
227 !isStoreBlocked; }
228
229 /** Handles doing the retry. */
230 void recvRetry();
231
232 private:
233 /** Writes back the instruction, sending it to IEW. */
234 void writeback(DynInstPtr &inst, PacketPtr pkt);
235
236 /** Writes back a store that couldn't be completed the previous cycle. */
237 void writebackPendingStore();
238
239 /** Handles completing the send of a store to memory. */
240 void storePostSend(PacketPtr pkt);
241
242 /** Completes the store at the specified index. */
243 void completeStore(int store_idx);
244
245 /** Attempts to send a store to the cache. */
246 bool sendStore(PacketPtr data_pkt);
247
248 /** Increments the given store index (circular queue). */
249 inline void incrStIdx(int &store_idx);
250 /** Decrements the given store index (circular queue). */
251 inline void decrStIdx(int &store_idx);
252 /** Increments the given load index (circular queue). */
253 inline void incrLdIdx(int &load_idx);
254 /** Decrements the given load index (circular queue). */
255 inline void decrLdIdx(int &load_idx);
256
257 public:
258 /** Debugging function to dump instructions in the LSQ. */
259 void dumpInsts();
260
261 private:
262 /** Pointer to the CPU. */
263 O3CPU *cpu;
264
265 /** Pointer to the IEW stage. */
266 IEW *iewStage;
267
268 /** Pointer to the LSQ. */
269 LSQ *lsq;
270
271 /** Pointer to the dcache port. Used only for sending. */
272 Port *dcachePort;
273
274 /** Derived class to hold any sender state the LSQ needs. */
275 class LSQSenderState : public Packet::SenderState, public FastAlloc
276 {
277 public:
278 /** Default constructor. */
279 LSQSenderState()
280 : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
281 mainPkt(NULL), pendingPacket(NULL)
282 { }
283
284 /** Instruction who initiated the access to memory. */
285 DynInstPtr inst;
286 /** Whether or not it is a load. */
287 bool isLoad;
288 /** The LQ/SQ index of the instruction. */
289 int idx;
290 /** Whether or not the instruction will need to writeback. */
291 bool noWB;
292 /** Whether or not this access is split in two. */
293 bool isSplit;
294 /** Whether or not there is a packet that needs sending. */
295 bool pktToSend;
296 /** Number of outstanding packets to complete. */
297 int outstanding;
298 /** The main packet from a split load, used during writeback. */
299 PacketPtr mainPkt;
300 /** A second packet from a split store that needs sending. */
301 PacketPtr pendingPacket;
302
303 /** Completes a packet and returns whether the access is finished. */
304 inline bool complete() { return --outstanding == 0; }
305 };
306
307 /** Writeback event, specifically for when stores forward data to loads. */
308 class WritebackEvent : public Event {
309 public:
310 /** Constructs a writeback event. */
311 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
312
313 /** Processes the writeback event. */
314 void process();
315
316 /** Returns the description of this event. */
317 const char *description() const;
318
319 private:
320 /** Instruction whose results are being written back. */
321 DynInstPtr inst;
322
323 /** The packet that would have been sent to memory. */
324 PacketPtr pkt;
325
326 /** The pointer to the LSQ unit that issued the store. */
327 LSQUnit<Impl> *lsqPtr;
328 };
329
330 public:
331 struct SQEntry {
332 /** Constructs an empty store queue entry. */
333 SQEntry()
334 : inst(NULL), req(NULL), size(0),
335 canWB(0), committed(0), completed(0)
336 {
337 std::memset(data, 0, sizeof(data));
338 }
339
340 /** Constructs a store queue entry for a given instruction. */
341 SQEntry(DynInstPtr &_inst)
342 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
343 isSplit(0), canWB(0), committed(0), completed(0)
344 {
345 std::memset(data, 0, sizeof(data));
346 }
347
348 /** The store instruction. */
349 DynInstPtr inst;
350 /** The request for the store. */
351 RequestPtr req;
352 /** The split requests for the store. */
353 RequestPtr sreqLow;
354 RequestPtr sreqHigh;
355 /** The size of the store. */
356 int size;
357 /** The store data. */
358 char data[16];
359 /** Whether or not the store is split into two requests. */
360 bool isSplit;
361 /** Whether or not the store can writeback. */
362 bool canWB;
363 /** Whether or not the store is committed. */
364 bool committed;
365 /** Whether or not the store is completed. */
366 bool completed;
367 };
368
369 private:
370 /** The LSQUnit thread id. */
371 ThreadID lsqID;
372
373 /** The store queue. */
374 std::vector<SQEntry> storeQueue;
375
376 /** The load queue. */
377 std::vector<DynInstPtr> loadQueue;
378
379 /** The number of LQ entries, plus a sentinel entry (circular queue).
380 * @todo: Consider having var that records the true number of LQ entries.
381 */
382 unsigned LQEntries;
383 /** The number of SQ entries, plus a sentinel entry (circular queue).
384 * @todo: Consider having var that records the true number of SQ entries.
385 */
386 unsigned SQEntries;
387
388 /** The number of places to shift addresses in the LSQ before checking
389 * for dependency violations
390 */
391 unsigned depCheckShift;
392
393 /** Should loads be checked for dependency issues */
394 bool checkLoads;
395
396 /** The number of load instructions in the LQ. */
397 int loads;
398 /** The number of store instructions in the SQ. */
399 int stores;
400 /** The number of store instructions in the SQ waiting to writeback. */
401 int storesToWB;
402
403 /** The index of the head instruction in the LQ. */
404 int loadHead;
405 /** The index of the tail instruction in the LQ. */
406 int loadTail;
407
408 /** The index of the head instruction in the SQ. */
409 int storeHead;
410 /** The index of the first instruction that may be ready to be
411 * written back, and has not yet been written back.
412 */
413 int storeWBIdx;
414 /** The index of the tail instruction in the SQ. */
415 int storeTail;
416
417 /// @todo Consider moving to a more advanced model with write vs read ports
418 /** The number of cache ports available each cycle. */
419 int cachePorts;
420
421 /** The number of used cache ports in this cycle. */
422 int usedPorts;
423
424 /** Is the LSQ switched out. */
425 bool switchedOut;
426
427 //list<InstSeqNum> mshrSeqNums;
428
429 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
430 Addr cacheBlockMask;
431
432 /** Wire to read information from the issue stage time queue. */
433 typename TimeBuffer<IssueStruct>::wire fromIssue;
434
435 /** Whether or not the LSQ is stalled. */
436 bool stalled;
437 /** The store that causes the stall due to partial store to load
438 * forwarding.
439 */
440 InstSeqNum stallingStoreIsn;
441 /** The index of the above store. */
442 int stallingLoadIdx;
443
444 /** The packet that needs to be retried. */
445 PacketPtr retryPkt;
446
447 /** Whehter or not a store is blocked due to the memory system. */
448 bool isStoreBlocked;
449
450 /** Whether or not a load is blocked due to the memory system. */
451 bool isLoadBlocked;
452
453 /** Has the blocked load been handled. */
454 bool loadBlockedHandled;
455
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/faults.hh"
41#include "arch/generic/debugfaults.hh"
42#include "arch/isa_traits.hh"
43#include "arch/locked_mem.hh"
44#include "arch/mmapped_ipr.hh"
45#include "base/fast_alloc.hh"
46#include "base/hashmap.hh"
47#include "config/full_system.hh"
48#include "config/the_isa.hh"
49#include "cpu/inst_seq.hh"
50#include "cpu/timebuf.hh"
51#include "debug/LSQUnit.hh"
52#include "mem/packet.hh"
53#include "mem/port.hh"
54
55class DerivO3CPUParams;
56
57/**
58 * Class that implements the actual LQ and SQ for each specific
59 * thread. Both are circular queues; load entries are freed upon
60 * committing, while store entries are freed once they writeback. The
61 * LSQUnit tracks if there are memory ordering violations, and also
62 * detects partial load to store forwarding cases (a store only has
63 * part of a load's data) that requires the load to wait until the
64 * store writes back. In the former case it holds onto the instruction
65 * until the dependence unit looks at it, and in the latter it stalls
66 * the LSQ until the store writes back. At that point the load is
67 * replayed.
68 */
69template <class Impl>
70class LSQUnit {
71 public:
72 typedef typename Impl::O3CPU O3CPU;
73 typedef typename Impl::DynInstPtr DynInstPtr;
74 typedef typename Impl::CPUPol::IEW IEW;
75 typedef typename Impl::CPUPol::LSQ LSQ;
76 typedef typename Impl::CPUPol::IssueStruct IssueStruct;
77
78 public:
79 /** Constructs an LSQ unit. init() must be called prior to use. */
80 LSQUnit();
81
82 /** Initializes the LSQ unit with the specified number of entries. */
83 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
84 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
85 unsigned id);
86
87 /** Returns the name of the LSQ unit. */
88 std::string name() const;
89
90 /** Registers statistics. */
91 void regStats();
92
93 /** Sets the pointer to the dcache port. */
94 void setDcachePort(Port *dcache_port);
95
96 /** Switches out LSQ unit. */
97 void switchOut();
98
99 /** Takes over from another CPU's thread. */
100 void takeOverFrom();
101
102 /** Returns if the LSQ is switched out. */
103 bool isSwitchedOut() { return switchedOut; }
104
105 /** Ticks the LSQ unit, which in this case only resets the number of
106 * used cache ports.
107 * @todo: Move the number of used ports up to the LSQ level so it can
108 * be shared by all LSQ units.
109 */
110 void tick() { usedPorts = 0; }
111
112 /** Inserts an instruction. */
113 void insert(DynInstPtr &inst);
114 /** Inserts a load instruction. */
115 void insertLoad(DynInstPtr &load_inst);
116 /** Inserts a store instruction. */
117 void insertStore(DynInstPtr &store_inst);
118
119 /** Check for ordering violations in the LSQ. For a store squash if we
120 * ever find a conflicting load. For a load, only squash if we
121 * an external snoop invalidate has been seen for that load address
122 * @param load_idx index to start checking at
123 * @param inst the instruction to check
124 */
125 Fault checkViolations(int load_idx, DynInstPtr &inst);
126
127 /** Check if an incoming invalidate hits in the lsq on a load
128 * that might have issued out of order wrt another load beacuse
129 * of the intermediate invalidate.
130 */
131 void checkSnoop(PacketPtr pkt);
132
133 /** Executes a load instruction. */
134 Fault executeLoad(DynInstPtr &inst);
135
136 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
137 /** Executes a store instruction. */
138 Fault executeStore(DynInstPtr &inst);
139
140 /** Commits the head load. */
141 void commitLoad();
142 /** Commits loads older than a specific sequence number. */
143 void commitLoads(InstSeqNum &youngest_inst);
144
145 /** Commits stores older than a specific sequence number. */
146 void commitStores(InstSeqNum &youngest_inst);
147
148 /** Writes back stores. */
149 void writebackStores();
150
151 /** Completes the data access that has been returned from the
152 * memory system. */
153 void completeDataAccess(PacketPtr pkt);
154
155 /** Clears all the entries in the LQ. */
156 void clearLQ();
157
158 /** Clears all the entries in the SQ. */
159 void clearSQ();
160
161 /** Resizes the LQ to a given size. */
162 void resizeLQ(unsigned size);
163
164 /** Resizes the SQ to a given size. */
165 void resizeSQ(unsigned size);
166
167 /** Squashes all instructions younger than a specific sequence number. */
168 void squash(const InstSeqNum &squashed_num);
169
170 /** Returns if there is a memory ordering violation. Value is reset upon
171 * call to getMemDepViolator().
172 */
173 bool violation() { return memDepViolator; }
174
175 /** Returns the memory ordering violator. */
176 DynInstPtr getMemDepViolator();
177
178 /** Returns if a load became blocked due to the memory system. */
179 bool loadBlocked()
180 { return isLoadBlocked; }
181
182 /** Clears the signal that a load became blocked. */
183 void clearLoadBlocked()
184 { isLoadBlocked = false; }
185
186 /** Returns if the blocked load was handled. */
187 bool isLoadBlockedHandled()
188 { return loadBlockedHandled; }
189
190 /** Records the blocked load as being handled. */
191 void setLoadBlockedHandled()
192 { loadBlockedHandled = true; }
193
194 /** Returns the number of free entries (min of free LQ and SQ entries). */
195 unsigned numFreeEntries();
196
197 /** Returns the number of loads ready to execute. */
198 int numLoadsReady();
199
200 /** Returns the number of loads in the LQ. */
201 int numLoads() { return loads; }
202
203 /** Returns the number of stores in the SQ. */
204 int numStores() { return stores; }
205
206 /** Returns if either the LQ or SQ is full. */
207 bool isFull() { return lqFull() || sqFull(); }
208
209 /** Returns if the LQ is full. */
210 bool lqFull() { return loads >= (LQEntries - 1); }
211
212 /** Returns if the SQ is full. */
213 bool sqFull() { return stores >= (SQEntries - 1); }
214
215 /** Returns the number of instructions in the LSQ. */
216 unsigned getCount() { return loads + stores; }
217
218 /** Returns if there are any stores to writeback. */
219 bool hasStoresToWB() { return storesToWB; }
220
221 /** Returns the number of stores to writeback. */
222 int numStoresToWB() { return storesToWB; }
223
224 /** Returns if the LSQ unit will writeback on this cycle. */
225 bool willWB() { return storeQueue[storeWBIdx].canWB &&
226 !storeQueue[storeWBIdx].completed &&
227 !isStoreBlocked; }
228
229 /** Handles doing the retry. */
230 void recvRetry();
231
232 private:
233 /** Writes back the instruction, sending it to IEW. */
234 void writeback(DynInstPtr &inst, PacketPtr pkt);
235
236 /** Writes back a store that couldn't be completed the previous cycle. */
237 void writebackPendingStore();
238
239 /** Handles completing the send of a store to memory. */
240 void storePostSend(PacketPtr pkt);
241
242 /** Completes the store at the specified index. */
243 void completeStore(int store_idx);
244
245 /** Attempts to send a store to the cache. */
246 bool sendStore(PacketPtr data_pkt);
247
248 /** Increments the given store index (circular queue). */
249 inline void incrStIdx(int &store_idx);
250 /** Decrements the given store index (circular queue). */
251 inline void decrStIdx(int &store_idx);
252 /** Increments the given load index (circular queue). */
253 inline void incrLdIdx(int &load_idx);
254 /** Decrements the given load index (circular queue). */
255 inline void decrLdIdx(int &load_idx);
256
257 public:
258 /** Debugging function to dump instructions in the LSQ. */
259 void dumpInsts();
260
261 private:
262 /** Pointer to the CPU. */
263 O3CPU *cpu;
264
265 /** Pointer to the IEW stage. */
266 IEW *iewStage;
267
268 /** Pointer to the LSQ. */
269 LSQ *lsq;
270
271 /** Pointer to the dcache port. Used only for sending. */
272 Port *dcachePort;
273
274 /** Derived class to hold any sender state the LSQ needs. */
275 class LSQSenderState : public Packet::SenderState, public FastAlloc
276 {
277 public:
278 /** Default constructor. */
279 LSQSenderState()
280 : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
281 mainPkt(NULL), pendingPacket(NULL)
282 { }
283
284 /** Instruction who initiated the access to memory. */
285 DynInstPtr inst;
286 /** Whether or not it is a load. */
287 bool isLoad;
288 /** The LQ/SQ index of the instruction. */
289 int idx;
290 /** Whether or not the instruction will need to writeback. */
291 bool noWB;
292 /** Whether or not this access is split in two. */
293 bool isSplit;
294 /** Whether or not there is a packet that needs sending. */
295 bool pktToSend;
296 /** Number of outstanding packets to complete. */
297 int outstanding;
298 /** The main packet from a split load, used during writeback. */
299 PacketPtr mainPkt;
300 /** A second packet from a split store that needs sending. */
301 PacketPtr pendingPacket;
302
303 /** Completes a packet and returns whether the access is finished. */
304 inline bool complete() { return --outstanding == 0; }
305 };
306
307 /** Writeback event, specifically for when stores forward data to loads. */
308 class WritebackEvent : public Event {
309 public:
310 /** Constructs a writeback event. */
311 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
312
313 /** Processes the writeback event. */
314 void process();
315
316 /** Returns the description of this event. */
317 const char *description() const;
318
319 private:
320 /** Instruction whose results are being written back. */
321 DynInstPtr inst;
322
323 /** The packet that would have been sent to memory. */
324 PacketPtr pkt;
325
326 /** The pointer to the LSQ unit that issued the store. */
327 LSQUnit<Impl> *lsqPtr;
328 };
329
330 public:
331 struct SQEntry {
332 /** Constructs an empty store queue entry. */
333 SQEntry()
334 : inst(NULL), req(NULL), size(0),
335 canWB(0), committed(0), completed(0)
336 {
337 std::memset(data, 0, sizeof(data));
338 }
339
340 /** Constructs a store queue entry for a given instruction. */
341 SQEntry(DynInstPtr &_inst)
342 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
343 isSplit(0), canWB(0), committed(0), completed(0)
344 {
345 std::memset(data, 0, sizeof(data));
346 }
347
348 /** The store instruction. */
349 DynInstPtr inst;
350 /** The request for the store. */
351 RequestPtr req;
352 /** The split requests for the store. */
353 RequestPtr sreqLow;
354 RequestPtr sreqHigh;
355 /** The size of the store. */
356 int size;
357 /** The store data. */
358 char data[16];
359 /** Whether or not the store is split into two requests. */
360 bool isSplit;
361 /** Whether or not the store can writeback. */
362 bool canWB;
363 /** Whether or not the store is committed. */
364 bool committed;
365 /** Whether or not the store is completed. */
366 bool completed;
367 };
368
369 private:
370 /** The LSQUnit thread id. */
371 ThreadID lsqID;
372
373 /** The store queue. */
374 std::vector<SQEntry> storeQueue;
375
376 /** The load queue. */
377 std::vector<DynInstPtr> loadQueue;
378
379 /** The number of LQ entries, plus a sentinel entry (circular queue).
380 * @todo: Consider having var that records the true number of LQ entries.
381 */
382 unsigned LQEntries;
383 /** The number of SQ entries, plus a sentinel entry (circular queue).
384 * @todo: Consider having var that records the true number of SQ entries.
385 */
386 unsigned SQEntries;
387
388 /** The number of places to shift addresses in the LSQ before checking
389 * for dependency violations
390 */
391 unsigned depCheckShift;
392
393 /** Should loads be checked for dependency issues */
394 bool checkLoads;
395
396 /** The number of load instructions in the LQ. */
397 int loads;
398 /** The number of store instructions in the SQ. */
399 int stores;
400 /** The number of store instructions in the SQ waiting to writeback. */
401 int storesToWB;
402
403 /** The index of the head instruction in the LQ. */
404 int loadHead;
405 /** The index of the tail instruction in the LQ. */
406 int loadTail;
407
408 /** The index of the head instruction in the SQ. */
409 int storeHead;
410 /** The index of the first instruction that may be ready to be
411 * written back, and has not yet been written back.
412 */
413 int storeWBIdx;
414 /** The index of the tail instruction in the SQ. */
415 int storeTail;
416
417 /// @todo Consider moving to a more advanced model with write vs read ports
418 /** The number of cache ports available each cycle. */
419 int cachePorts;
420
421 /** The number of used cache ports in this cycle. */
422 int usedPorts;
423
424 /** Is the LSQ switched out. */
425 bool switchedOut;
426
427 //list<InstSeqNum> mshrSeqNums;
428
429 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
430 Addr cacheBlockMask;
431
432 /** Wire to read information from the issue stage time queue. */
433 typename TimeBuffer<IssueStruct>::wire fromIssue;
434
435 /** Whether or not the LSQ is stalled. */
436 bool stalled;
437 /** The store that causes the stall due to partial store to load
438 * forwarding.
439 */
440 InstSeqNum stallingStoreIsn;
441 /** The index of the above store. */
442 int stallingLoadIdx;
443
444 /** The packet that needs to be retried. */
445 PacketPtr retryPkt;
446
447 /** Whehter or not a store is blocked due to the memory system. */
448 bool isStoreBlocked;
449
450 /** Whether or not a load is blocked due to the memory system. */
451 bool isLoadBlocked;
452
453 /** Has the blocked load been handled. */
454 bool loadBlockedHandled;
455
456 /** Whether or not a store is in flight. */
457 bool storeInFlight;
458
456 /** The sequence number of the blocked load. */
457 InstSeqNum blockedLoadSeqNum;
458
459 /** The oldest load that caused a memory ordering violation. */
460 DynInstPtr memDepViolator;
461
462 /** Whether or not there is a packet that couldn't be sent because of
463 * a lack of cache ports. */
464 bool hasPendingPkt;
465
466 /** The packet that is pending free cache ports. */
467 PacketPtr pendingPkt;
468
459 /** The sequence number of the blocked load. */
460 InstSeqNum blockedLoadSeqNum;
461
462 /** The oldest load that caused a memory ordering violation. */
463 DynInstPtr memDepViolator;
464
465 /** Whether or not there is a packet that couldn't be sent because of
466 * a lack of cache ports. */
467 bool hasPendingPkt;
468
469 /** The packet that is pending free cache ports. */
470 PacketPtr pendingPkt;
471
472 /** Flag for memory model. */
473 bool needsTSO;
474
469 // Will also need how many read/write ports the Dcache has. Or keep track
470 // of that in stage that is one level up, and only call executeLoad/Store
471 // the appropriate number of times.
472 /** Total number of loads forwaded from LSQ stores. */
473 Stats::Scalar lsqForwLoads;
474
475 /** Total number of loads ignored due to invalid addresses. */
476 Stats::Scalar invAddrLoads;
477
478 /** Total number of squashed loads. */
479 Stats::Scalar lsqSquashedLoads;
480
481 /** Total number of responses from the memory system that are
482 * ignored due to the instruction already being squashed. */
483 Stats::Scalar lsqIgnoredResponses;
484
485 /** Tota number of memory ordering violations. */
486 Stats::Scalar lsqMemOrderViolation;
487
488 /** Total number of squashed stores. */
489 Stats::Scalar lsqSquashedStores;
490
491 /** Total number of software prefetches ignored due to invalid addresses. */
492 Stats::Scalar invAddrSwpfs;
493
494 /** Ready loads blocked due to partial store-forwarding. */
495 Stats::Scalar lsqBlockedLoads;
496
497 /** Number of loads that were rescheduled. */
498 Stats::Scalar lsqRescheduledLoads;
499
500 /** Number of times the LSQ is blocked due to the cache. */
501 Stats::Scalar lsqCacheBlocked;
502
503 public:
504 /** Executes the load at the given index. */
505 Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
506 uint8_t *data, int load_idx);
507
508 /** Executes the store at the given index. */
509 Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
510 uint8_t *data, int store_idx);
511
512 /** Returns the index of the head load instruction. */
513 int getLoadHead() { return loadHead; }
514 /** Returns the sequence number of the head load instruction. */
515 InstSeqNum getLoadHeadSeqNum()
516 {
517 if (loadQueue[loadHead]) {
518 return loadQueue[loadHead]->seqNum;
519 } else {
520 return 0;
521 }
522
523 }
524
525 /** Returns the index of the head store instruction. */
526 int getStoreHead() { return storeHead; }
527 /** Returns the sequence number of the head store instruction. */
528 InstSeqNum getStoreHeadSeqNum()
529 {
530 if (storeQueue[storeHead].inst) {
531 return storeQueue[storeHead].inst->seqNum;
532 } else {
533 return 0;
534 }
535
536 }
537
538 /** Returns whether or not the LSQ unit is stalled. */
539 bool isStalled() { return stalled; }
540};
541
542template <class Impl>
543Fault
544LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
545 uint8_t *data, int load_idx)
546{
547 DynInstPtr load_inst = loadQueue[load_idx];
548
549 assert(load_inst);
550
551 assert(!load_inst->isExecuted());
552
553 // Make sure this isn't an uncacheable access
554 // A bit of a hackish way to get uncached accesses to work only if they're
555 // at the head of the LSQ and are ready to commit (at the head of the ROB
556 // too).
557 if (req->isUncacheable() &&
558 (load_idx != loadHead || !load_inst->isAtCommit())) {
559 iewStage->rescheduleMemInst(load_inst);
560 ++lsqRescheduledLoads;
561 DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
562 load_inst->seqNum, load_inst->pcState());
563
564 // Must delete request now that it wasn't handed off to
565 // memory. This is quite ugly. @todo: Figure out the proper
566 // place to really handle request deletes.
567 delete req;
568 if (TheISA::HasUnalignedMemAcc && sreqLow) {
569 delete sreqLow;
570 delete sreqHigh;
571 }
572 return new GenericISA::M5PanicFault(
573 "Uncachable load [sn:%llx] PC %s\n",
574 load_inst->seqNum, load_inst->pcState());
575 }
576
577 // Check the SQ for any previous stores that might lead to forwarding
578 int store_idx = load_inst->sqIdx;
579
580 int store_size = 0;
581
582 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
583 "storeHead: %i addr: %#x%s\n",
584 load_idx, store_idx, storeHead, req->getPaddr(),
585 sreqLow ? " split" : "");
586
587 if (req->isLLSC()) {
588 assert(!sreqLow);
589 // Disable recording the result temporarily. Writing to misc
590 // regs normally updates the result, but this is not the
591 // desired behavior when handling store conditionals.
592 load_inst->recordResult = false;
593 TheISA::handleLockedRead(load_inst.get(), req);
594 load_inst->recordResult = true;
595 }
596
597 if (req->isMmappedIpr()) {
598 assert(!load_inst->memData);
599 load_inst->memData = new uint8_t[64];
600
601 ThreadContext *thread = cpu->tcBase(lsqID);
602 Tick delay;
603 PacketPtr data_pkt =
604 new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
605
606 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
607 data_pkt->dataStatic(load_inst->memData);
608 delay = TheISA::handleIprRead(thread, data_pkt);
609 } else {
610 assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
611 PacketPtr fst_data_pkt =
612 new Packet(sreqLow, MemCmd::ReadReq, Packet::Broadcast);
613 PacketPtr snd_data_pkt =
614 new Packet(sreqHigh, MemCmd::ReadReq, Packet::Broadcast);
615
616 fst_data_pkt->dataStatic(load_inst->memData);
617 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
618
619 delay = TheISA::handleIprRead(thread, fst_data_pkt);
620 unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
621 if (delay2 > delay)
622 delay = delay2;
623
624 delete sreqLow;
625 delete sreqHigh;
626 delete fst_data_pkt;
627 delete snd_data_pkt;
628 }
629 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
630 cpu->schedule(wb, curTick() + delay);
631 return NoFault;
632 }
633
634 while (store_idx != -1) {
635 // End once we've reached the top of the LSQ
636 if (store_idx == storeWBIdx) {
637 break;
638 }
639
640 // Move the index to one younger
641 if (--store_idx < 0)
642 store_idx += SQEntries;
643
644 assert(storeQueue[store_idx].inst);
645
646 store_size = storeQueue[store_idx].size;
647
648 if (store_size == 0)
649 continue;
650 else if (storeQueue[store_idx].inst->uncacheable())
651 continue;
652
653 assert(storeQueue[store_idx].inst->effAddrValid);
654
655 // Check if the store data is within the lower and upper bounds of
656 // addresses that the request needs.
657 bool store_has_lower_limit =
658 req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
659 bool store_has_upper_limit =
660 (req->getVaddr() + req->getSize()) <=
661 (storeQueue[store_idx].inst->effAddr + store_size);
662 bool lower_load_has_store_part =
663 req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
664 store_size);
665 bool upper_load_has_store_part =
666 (req->getVaddr() + req->getSize()) >
667 storeQueue[store_idx].inst->effAddr;
668
669 // If the store's data has all of the data needed, we can forward.
670 if ((store_has_lower_limit && store_has_upper_limit)) {
671 // Get shift amount for offset into the store's data.
672 int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
673
674 memcpy(data, storeQueue[store_idx].data + shift_amt,
675 req->getSize());
676
677 assert(!load_inst->memData);
678 load_inst->memData = new uint8_t[64];
679
680 memcpy(load_inst->memData,
681 storeQueue[store_idx].data + shift_amt, req->getSize());
682
683 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
684 "addr %#x, data %#x\n",
685 store_idx, req->getVaddr(), data);
686
687 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq,
688 Packet::Broadcast);
689 data_pkt->dataStatic(load_inst->memData);
690
691 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
692
693 // We'll say this has a 1 cycle load-store forwarding latency
694 // for now.
695 // @todo: Need to make this a parameter.
696 cpu->schedule(wb, curTick());
697
698 // Don't need to do anything special for split loads.
699 if (TheISA::HasUnalignedMemAcc && sreqLow) {
700 delete sreqLow;
701 delete sreqHigh;
702 }
703
704 ++lsqForwLoads;
705 return NoFault;
706 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
707 (store_has_upper_limit && upper_load_has_store_part) ||
708 (lower_load_has_store_part && upper_load_has_store_part)) {
709 // This is the partial store-load forwarding case where a store
710 // has only part of the load's data.
711
712 // If it's already been written back, then don't worry about
713 // stalling on it.
714 if (storeQueue[store_idx].completed) {
715 panic("Should not check one of these");
716 continue;
717 }
718
719 // Must stall load and force it to retry, so long as it's the oldest
720 // load that needs to do so.
721 if (!stalled ||
722 (stalled &&
723 load_inst->seqNum <
724 loadQueue[stallingLoadIdx]->seqNum)) {
725 stalled = true;
726 stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
727 stallingLoadIdx = load_idx;
728 }
729
730 // Tell IQ/mem dep unit that this instruction will need to be
731 // rescheduled eventually
732 iewStage->rescheduleMemInst(load_inst);
733 iewStage->decrWb(load_inst->seqNum);
734 load_inst->clearIssued();
735 ++lsqRescheduledLoads;
736
737 // Do not generate a writeback event as this instruction is not
738 // complete.
739 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
740 "Store idx %i to load addr %#x\n",
741 store_idx, req->getVaddr());
742
743 // Must delete request now that it wasn't handed off to
744 // memory. This is quite ugly. @todo: Figure out the
745 // proper place to really handle request deletes.
746 delete req;
747 if (TheISA::HasUnalignedMemAcc && sreqLow) {
748 delete sreqLow;
749 delete sreqHigh;
750 }
751
752 return NoFault;
753 }
754 }
755
756 // If there's no forwarding case, then go access memory
757 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
758 load_inst->seqNum, load_inst->pcState());
759
760 assert(!load_inst->memData);
761 load_inst->memData = new uint8_t[64];
762
763 ++usedPorts;
764
765 // if we the cache is not blocked, do cache access
766 bool completedFirst = false;
767 if (!lsq->cacheBlocked()) {
768 MemCmd command =
769 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
770 PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
771 PacketPtr fst_data_pkt = NULL;
772 PacketPtr snd_data_pkt = NULL;
773
774 data_pkt->dataStatic(load_inst->memData);
775
776 LSQSenderState *state = new LSQSenderState;
777 state->isLoad = true;
778 state->idx = load_idx;
779 state->inst = load_inst;
780 data_pkt->senderState = state;
781
782 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
783
784 // Point the first packet at the main data packet.
785 fst_data_pkt = data_pkt;
786 } else {
787
788 // Create the split packets.
789 fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
790 snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
791
792 fst_data_pkt->dataStatic(load_inst->memData);
793 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
794
795 fst_data_pkt->senderState = state;
796 snd_data_pkt->senderState = state;
797
798 state->isSplit = true;
799 state->outstanding = 2;
800 state->mainPkt = data_pkt;
801 }
802
803 if (!dcachePort->sendTiming(fst_data_pkt)) {
804 // Delete state and data packet because a load retry
805 // initiates a pipeline restart; it does not retry.
806 delete state;
807 delete data_pkt->req;
808 delete data_pkt;
809 if (TheISA::HasUnalignedMemAcc && sreqLow) {
810 delete fst_data_pkt->req;
811 delete fst_data_pkt;
812 delete snd_data_pkt->req;
813 delete snd_data_pkt;
814 sreqLow = NULL;
815 sreqHigh = NULL;
816 }
817
818 req = NULL;
819
820 // If the access didn't succeed, tell the LSQ by setting
821 // the retry thread id.
822 lsq->setRetryTid(lsqID);
823 } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
824 completedFirst = true;
825
826 // The first packet was sent without problems, so send this one
827 // too. If there is a problem with this packet then the whole
828 // load will be squashed, so indicate this to the state object.
829 // The first packet will return in completeDataAccess and be
830 // handled there.
831 ++usedPorts;
832 if (!dcachePort->sendTiming(snd_data_pkt)) {
833
834 // The main packet will be deleted in completeDataAccess.
835 delete snd_data_pkt->req;
836 delete snd_data_pkt;
837
838 state->complete();
839
840 req = NULL;
841 sreqHigh = NULL;
842
843 lsq->setRetryTid(lsqID);
844 }
845 }
846 }
847
848 // If the cache was blocked, or has become blocked due to the access,
849 // handle it.
850 if (lsq->cacheBlocked()) {
851 if (req)
852 delete req;
853 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
854 delete sreqLow;
855 delete sreqHigh;
856 }
857
858 ++lsqCacheBlocked;
859
860 // If the first part of a split access succeeds, then let the LSQ
861 // handle the decrWb when completeDataAccess is called upon return
862 // of the requested first part of data
863 if (!completedFirst)
864 iewStage->decrWb(load_inst->seqNum);
865
866 // There's an older load that's already going to squash.
867 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
868 return NoFault;
869
870 // Record that the load was blocked due to memory. This
871 // load will squash all instructions after it, be
872 // refetched, and re-executed.
873 isLoadBlocked = true;
874 loadBlockedHandled = false;
875 blockedLoadSeqNum = load_inst->seqNum;
876 // No fault occurred, even though the interface is blocked.
877 return NoFault;
878 }
879
880 return NoFault;
881}
882
883template <class Impl>
884Fault
885LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
886 uint8_t *data, int store_idx)
887{
888 assert(storeQueue[store_idx].inst);
889
890 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
891 " | storeHead:%i [sn:%i]\n",
892 store_idx, req->getPaddr(), data, storeHead,
893 storeQueue[store_idx].inst->seqNum);
894
895 storeQueue[store_idx].req = req;
896 storeQueue[store_idx].sreqLow = sreqLow;
897 storeQueue[store_idx].sreqHigh = sreqHigh;
898 unsigned size = req->getSize();
899 storeQueue[store_idx].size = size;
900 assert(size <= sizeof(storeQueue[store_idx].data));
901
902 // Split stores can only occur in ISAs with unaligned memory accesses. If
903 // a store request has been split, sreqLow and sreqHigh will be non-null.
904 if (TheISA::HasUnalignedMemAcc && sreqLow) {
905 storeQueue[store_idx].isSplit = true;
906 }
907
908 memcpy(storeQueue[store_idx].data, data, size);
909
910 // This function only writes the data to the store queue, so no fault
911 // can happen here.
912 return NoFault;
913}
914
915#endif // __CPU_O3_LSQ_UNIT_HH__
475 // Will also need how many read/write ports the Dcache has. Or keep track
476 // of that in stage that is one level up, and only call executeLoad/Store
477 // the appropriate number of times.
478 /** Total number of loads forwaded from LSQ stores. */
479 Stats::Scalar lsqForwLoads;
480
481 /** Total number of loads ignored due to invalid addresses. */
482 Stats::Scalar invAddrLoads;
483
484 /** Total number of squashed loads. */
485 Stats::Scalar lsqSquashedLoads;
486
487 /** Total number of responses from the memory system that are
488 * ignored due to the instruction already being squashed. */
489 Stats::Scalar lsqIgnoredResponses;
490
491 /** Tota number of memory ordering violations. */
492 Stats::Scalar lsqMemOrderViolation;
493
494 /** Total number of squashed stores. */
495 Stats::Scalar lsqSquashedStores;
496
497 /** Total number of software prefetches ignored due to invalid addresses. */
498 Stats::Scalar invAddrSwpfs;
499
500 /** Ready loads blocked due to partial store-forwarding. */
501 Stats::Scalar lsqBlockedLoads;
502
503 /** Number of loads that were rescheduled. */
504 Stats::Scalar lsqRescheduledLoads;
505
506 /** Number of times the LSQ is blocked due to the cache. */
507 Stats::Scalar lsqCacheBlocked;
508
509 public:
510 /** Executes the load at the given index. */
511 Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
512 uint8_t *data, int load_idx);
513
514 /** Executes the store at the given index. */
515 Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
516 uint8_t *data, int store_idx);
517
518 /** Returns the index of the head load instruction. */
519 int getLoadHead() { return loadHead; }
520 /** Returns the sequence number of the head load instruction. */
521 InstSeqNum getLoadHeadSeqNum()
522 {
523 if (loadQueue[loadHead]) {
524 return loadQueue[loadHead]->seqNum;
525 } else {
526 return 0;
527 }
528
529 }
530
531 /** Returns the index of the head store instruction. */
532 int getStoreHead() { return storeHead; }
533 /** Returns the sequence number of the head store instruction. */
534 InstSeqNum getStoreHeadSeqNum()
535 {
536 if (storeQueue[storeHead].inst) {
537 return storeQueue[storeHead].inst->seqNum;
538 } else {
539 return 0;
540 }
541
542 }
543
544 /** Returns whether or not the LSQ unit is stalled. */
545 bool isStalled() { return stalled; }
546};
547
548template <class Impl>
549Fault
550LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
551 uint8_t *data, int load_idx)
552{
553 DynInstPtr load_inst = loadQueue[load_idx];
554
555 assert(load_inst);
556
557 assert(!load_inst->isExecuted());
558
559 // Make sure this isn't an uncacheable access
560 // A bit of a hackish way to get uncached accesses to work only if they're
561 // at the head of the LSQ and are ready to commit (at the head of the ROB
562 // too).
563 if (req->isUncacheable() &&
564 (load_idx != loadHead || !load_inst->isAtCommit())) {
565 iewStage->rescheduleMemInst(load_inst);
566 ++lsqRescheduledLoads;
567 DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
568 load_inst->seqNum, load_inst->pcState());
569
570 // Must delete request now that it wasn't handed off to
571 // memory. This is quite ugly. @todo: Figure out the proper
572 // place to really handle request deletes.
573 delete req;
574 if (TheISA::HasUnalignedMemAcc && sreqLow) {
575 delete sreqLow;
576 delete sreqHigh;
577 }
578 return new GenericISA::M5PanicFault(
579 "Uncachable load [sn:%llx] PC %s\n",
580 load_inst->seqNum, load_inst->pcState());
581 }
582
583 // Check the SQ for any previous stores that might lead to forwarding
584 int store_idx = load_inst->sqIdx;
585
586 int store_size = 0;
587
588 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
589 "storeHead: %i addr: %#x%s\n",
590 load_idx, store_idx, storeHead, req->getPaddr(),
591 sreqLow ? " split" : "");
592
593 if (req->isLLSC()) {
594 assert(!sreqLow);
595 // Disable recording the result temporarily. Writing to misc
596 // regs normally updates the result, but this is not the
597 // desired behavior when handling store conditionals.
598 load_inst->recordResult = false;
599 TheISA::handleLockedRead(load_inst.get(), req);
600 load_inst->recordResult = true;
601 }
602
603 if (req->isMmappedIpr()) {
604 assert(!load_inst->memData);
605 load_inst->memData = new uint8_t[64];
606
607 ThreadContext *thread = cpu->tcBase(lsqID);
608 Tick delay;
609 PacketPtr data_pkt =
610 new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
611
612 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
613 data_pkt->dataStatic(load_inst->memData);
614 delay = TheISA::handleIprRead(thread, data_pkt);
615 } else {
616 assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
617 PacketPtr fst_data_pkt =
618 new Packet(sreqLow, MemCmd::ReadReq, Packet::Broadcast);
619 PacketPtr snd_data_pkt =
620 new Packet(sreqHigh, MemCmd::ReadReq, Packet::Broadcast);
621
622 fst_data_pkt->dataStatic(load_inst->memData);
623 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
624
625 delay = TheISA::handleIprRead(thread, fst_data_pkt);
626 unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
627 if (delay2 > delay)
628 delay = delay2;
629
630 delete sreqLow;
631 delete sreqHigh;
632 delete fst_data_pkt;
633 delete snd_data_pkt;
634 }
635 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
636 cpu->schedule(wb, curTick() + delay);
637 return NoFault;
638 }
639
640 while (store_idx != -1) {
641 // End once we've reached the top of the LSQ
642 if (store_idx == storeWBIdx) {
643 break;
644 }
645
646 // Move the index to one younger
647 if (--store_idx < 0)
648 store_idx += SQEntries;
649
650 assert(storeQueue[store_idx].inst);
651
652 store_size = storeQueue[store_idx].size;
653
654 if (store_size == 0)
655 continue;
656 else if (storeQueue[store_idx].inst->uncacheable())
657 continue;
658
659 assert(storeQueue[store_idx].inst->effAddrValid);
660
661 // Check if the store data is within the lower and upper bounds of
662 // addresses that the request needs.
663 bool store_has_lower_limit =
664 req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
665 bool store_has_upper_limit =
666 (req->getVaddr() + req->getSize()) <=
667 (storeQueue[store_idx].inst->effAddr + store_size);
668 bool lower_load_has_store_part =
669 req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
670 store_size);
671 bool upper_load_has_store_part =
672 (req->getVaddr() + req->getSize()) >
673 storeQueue[store_idx].inst->effAddr;
674
675 // If the store's data has all of the data needed, we can forward.
676 if ((store_has_lower_limit && store_has_upper_limit)) {
677 // Get shift amount for offset into the store's data.
678 int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
679
680 memcpy(data, storeQueue[store_idx].data + shift_amt,
681 req->getSize());
682
683 assert(!load_inst->memData);
684 load_inst->memData = new uint8_t[64];
685
686 memcpy(load_inst->memData,
687 storeQueue[store_idx].data + shift_amt, req->getSize());
688
689 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
690 "addr %#x, data %#x\n",
691 store_idx, req->getVaddr(), data);
692
693 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq,
694 Packet::Broadcast);
695 data_pkt->dataStatic(load_inst->memData);
696
697 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
698
699 // We'll say this has a 1 cycle load-store forwarding latency
700 // for now.
701 // @todo: Need to make this a parameter.
702 cpu->schedule(wb, curTick());
703
704 // Don't need to do anything special for split loads.
705 if (TheISA::HasUnalignedMemAcc && sreqLow) {
706 delete sreqLow;
707 delete sreqHigh;
708 }
709
710 ++lsqForwLoads;
711 return NoFault;
712 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
713 (store_has_upper_limit && upper_load_has_store_part) ||
714 (lower_load_has_store_part && upper_load_has_store_part)) {
715 // This is the partial store-load forwarding case where a store
716 // has only part of the load's data.
717
718 // If it's already been written back, then don't worry about
719 // stalling on it.
720 if (storeQueue[store_idx].completed) {
721 panic("Should not check one of these");
722 continue;
723 }
724
725 // Must stall load and force it to retry, so long as it's the oldest
726 // load that needs to do so.
727 if (!stalled ||
728 (stalled &&
729 load_inst->seqNum <
730 loadQueue[stallingLoadIdx]->seqNum)) {
731 stalled = true;
732 stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
733 stallingLoadIdx = load_idx;
734 }
735
736 // Tell IQ/mem dep unit that this instruction will need to be
737 // rescheduled eventually
738 iewStage->rescheduleMemInst(load_inst);
739 iewStage->decrWb(load_inst->seqNum);
740 load_inst->clearIssued();
741 ++lsqRescheduledLoads;
742
743 // Do not generate a writeback event as this instruction is not
744 // complete.
745 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
746 "Store idx %i to load addr %#x\n",
747 store_idx, req->getVaddr());
748
749 // Must delete request now that it wasn't handed off to
750 // memory. This is quite ugly. @todo: Figure out the
751 // proper place to really handle request deletes.
752 delete req;
753 if (TheISA::HasUnalignedMemAcc && sreqLow) {
754 delete sreqLow;
755 delete sreqHigh;
756 }
757
758 return NoFault;
759 }
760 }
761
762 // If there's no forwarding case, then go access memory
763 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
764 load_inst->seqNum, load_inst->pcState());
765
766 assert(!load_inst->memData);
767 load_inst->memData = new uint8_t[64];
768
769 ++usedPorts;
770
771 // if we the cache is not blocked, do cache access
772 bool completedFirst = false;
773 if (!lsq->cacheBlocked()) {
774 MemCmd command =
775 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
776 PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
777 PacketPtr fst_data_pkt = NULL;
778 PacketPtr snd_data_pkt = NULL;
779
780 data_pkt->dataStatic(load_inst->memData);
781
782 LSQSenderState *state = new LSQSenderState;
783 state->isLoad = true;
784 state->idx = load_idx;
785 state->inst = load_inst;
786 data_pkt->senderState = state;
787
788 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
789
790 // Point the first packet at the main data packet.
791 fst_data_pkt = data_pkt;
792 } else {
793
794 // Create the split packets.
795 fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
796 snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
797
798 fst_data_pkt->dataStatic(load_inst->memData);
799 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
800
801 fst_data_pkt->senderState = state;
802 snd_data_pkt->senderState = state;
803
804 state->isSplit = true;
805 state->outstanding = 2;
806 state->mainPkt = data_pkt;
807 }
808
809 if (!dcachePort->sendTiming(fst_data_pkt)) {
810 // Delete state and data packet because a load retry
811 // initiates a pipeline restart; it does not retry.
812 delete state;
813 delete data_pkt->req;
814 delete data_pkt;
815 if (TheISA::HasUnalignedMemAcc && sreqLow) {
816 delete fst_data_pkt->req;
817 delete fst_data_pkt;
818 delete snd_data_pkt->req;
819 delete snd_data_pkt;
820 sreqLow = NULL;
821 sreqHigh = NULL;
822 }
823
824 req = NULL;
825
826 // If the access didn't succeed, tell the LSQ by setting
827 // the retry thread id.
828 lsq->setRetryTid(lsqID);
829 } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
830 completedFirst = true;
831
832 // The first packet was sent without problems, so send this one
833 // too. If there is a problem with this packet then the whole
834 // load will be squashed, so indicate this to the state object.
835 // The first packet will return in completeDataAccess and be
836 // handled there.
837 ++usedPorts;
838 if (!dcachePort->sendTiming(snd_data_pkt)) {
839
840 // The main packet will be deleted in completeDataAccess.
841 delete snd_data_pkt->req;
842 delete snd_data_pkt;
843
844 state->complete();
845
846 req = NULL;
847 sreqHigh = NULL;
848
849 lsq->setRetryTid(lsqID);
850 }
851 }
852 }
853
854 // If the cache was blocked, or has become blocked due to the access,
855 // handle it.
856 if (lsq->cacheBlocked()) {
857 if (req)
858 delete req;
859 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
860 delete sreqLow;
861 delete sreqHigh;
862 }
863
864 ++lsqCacheBlocked;
865
866 // If the first part of a split access succeeds, then let the LSQ
867 // handle the decrWb when completeDataAccess is called upon return
868 // of the requested first part of data
869 if (!completedFirst)
870 iewStage->decrWb(load_inst->seqNum);
871
872 // There's an older load that's already going to squash.
873 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
874 return NoFault;
875
876 // Record that the load was blocked due to memory. This
877 // load will squash all instructions after it, be
878 // refetched, and re-executed.
879 isLoadBlocked = true;
880 loadBlockedHandled = false;
881 blockedLoadSeqNum = load_inst->seqNum;
882 // No fault occurred, even though the interface is blocked.
883 return NoFault;
884 }
885
886 return NoFault;
887}
888
889template <class Impl>
890Fault
891LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
892 uint8_t *data, int store_idx)
893{
894 assert(storeQueue[store_idx].inst);
895
896 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
897 " | storeHead:%i [sn:%i]\n",
898 store_idx, req->getPaddr(), data, storeHead,
899 storeQueue[store_idx].inst->seqNum);
900
901 storeQueue[store_idx].req = req;
902 storeQueue[store_idx].sreqLow = sreqLow;
903 storeQueue[store_idx].sreqHigh = sreqHigh;
904 unsigned size = req->getSize();
905 storeQueue[store_idx].size = size;
906 assert(size <= sizeof(storeQueue[store_idx].data));
907
908 // Split stores can only occur in ISAs with unaligned memory accesses. If
909 // a store request has been split, sreqLow and sreqHigh will be non-null.
910 if (TheISA::HasUnalignedMemAcc && sreqLow) {
911 storeQueue[store_idx].isSplit = true;
912 }
913
914 memcpy(storeQueue[store_idx].data, data, size);
915
916 // This function only writes the data to the store queue, so no fault
917 // can happen here.
918 return NoFault;
919}
920
921#endif // __CPU_O3_LSQ_UNIT_HH__