lsq_unit.hh (9046:a1104cc13db2) lsq_unit.hh (9152:86c0e6ca5e7c)
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/generic/debugfaults.hh"
41#include "arch/isa_traits.hh"
42#include "arch/locked_mem.hh"
43#include "arch/mmapped_ipr.hh"
44#include "base/hashmap.hh"
45#include "config/the_isa.hh"
46#include "cpu/inst_seq.hh"
47#include "cpu/timebuf.hh"
48#include "debug/LSQUnit.hh"
49#include "mem/packet.hh"
50#include "mem/port.hh"
51#include "sim/fault_fwd.hh"
52
53struct DerivO3CPUParams;
54
55/**
56 * Class that implements the actual LQ and SQ for each specific
57 * thread. Both are circular queues; load entries are freed upon
58 * committing, while store entries are freed once they writeback. The
59 * LSQUnit tracks if there are memory ordering violations, and also
60 * detects partial load to store forwarding cases (a store only has
61 * part of a load's data) that requires the load to wait until the
62 * store writes back. In the former case it holds onto the instruction
63 * until the dependence unit looks at it, and in the latter it stalls
64 * the LSQ until the store writes back. At that point the load is
65 * replayed.
66 */
67template <class Impl>
68class LSQUnit {
69 public:
70 typedef typename Impl::O3CPU O3CPU;
71 typedef typename Impl::DynInstPtr DynInstPtr;
72 typedef typename Impl::CPUPol::IEW IEW;
73 typedef typename Impl::CPUPol::LSQ LSQ;
74 typedef typename Impl::CPUPol::IssueStruct IssueStruct;
75
76 public:
77 /** Constructs an LSQ unit. init() must be called prior to use. */
78 LSQUnit();
79
80 /** Initializes the LSQ unit with the specified number of entries. */
81 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
82 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
83 unsigned id);
84
85 /** Returns the name of the LSQ unit. */
86 std::string name() const;
87
88 /** Registers statistics. */
89 void regStats();
90
91 /** Sets the pointer to the dcache port. */
92 void setDcachePort(MasterPort *dcache_port);
93
94 /** Switches out LSQ unit. */
95 void switchOut();
96
97 /** Takes over from another CPU's thread. */
98 void takeOverFrom();
99
100 /** Returns if the LSQ is switched out. */
101 bool isSwitchedOut() { return switchedOut; }
102
103 /** Ticks the LSQ unit, which in this case only resets the number of
104 * used cache ports.
105 * @todo: Move the number of used ports up to the LSQ level so it can
106 * be shared by all LSQ units.
107 */
108 void tick() { usedPorts = 0; }
109
110 /** Inserts an instruction. */
111 void insert(DynInstPtr &inst);
112 /** Inserts a load instruction. */
113 void insertLoad(DynInstPtr &load_inst);
114 /** Inserts a store instruction. */
115 void insertStore(DynInstPtr &store_inst);
116
117 /** Check for ordering violations in the LSQ. For a store squash if we
118 * ever find a conflicting load. For a load, only squash if we
119 * an external snoop invalidate has been seen for that load address
120 * @param load_idx index to start checking at
121 * @param inst the instruction to check
122 */
123 Fault checkViolations(int load_idx, DynInstPtr &inst);
124
125 /** Check if an incoming invalidate hits in the lsq on a load
126 * that might have issued out of order wrt another load beacuse
127 * of the intermediate invalidate.
128 */
129 void checkSnoop(PacketPtr pkt);
130
131 /** Executes a load instruction. */
132 Fault executeLoad(DynInstPtr &inst);
133
134 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
135 /** Executes a store instruction. */
136 Fault executeStore(DynInstPtr &inst);
137
138 /** Commits the head load. */
139 void commitLoad();
140 /** Commits loads older than a specific sequence number. */
141 void commitLoads(InstSeqNum &youngest_inst);
142
143 /** Commits stores older than a specific sequence number. */
144 void commitStores(InstSeqNum &youngest_inst);
145
146 /** Writes back stores. */
147 void writebackStores();
148
149 /** Completes the data access that has been returned from the
150 * memory system. */
151 void completeDataAccess(PacketPtr pkt);
152
153 /** Clears all the entries in the LQ. */
154 void clearLQ();
155
156 /** Clears all the entries in the SQ. */
157 void clearSQ();
158
159 /** Resizes the LQ to a given size. */
160 void resizeLQ(unsigned size);
161
162 /** Resizes the SQ to a given size. */
163 void resizeSQ(unsigned size);
164
165 /** Squashes all instructions younger than a specific sequence number. */
166 void squash(const InstSeqNum &squashed_num);
167
168 /** Returns if there is a memory ordering violation. Value is reset upon
169 * call to getMemDepViolator().
170 */
171 bool violation() { return memDepViolator; }
172
173 /** Returns the memory ordering violator. */
174 DynInstPtr getMemDepViolator();
175
176 /** Returns if a load became blocked due to the memory system. */
177 bool loadBlocked()
178 { return isLoadBlocked; }
179
180 /** Clears the signal that a load became blocked. */
181 void clearLoadBlocked()
182 { isLoadBlocked = false; }
183
184 /** Returns if the blocked load was handled. */
185 bool isLoadBlockedHandled()
186 { return loadBlockedHandled; }
187
188 /** Records the blocked load as being handled. */
189 void setLoadBlockedHandled()
190 { loadBlockedHandled = true; }
191
192 /** Returns the number of free entries (min of free LQ and SQ entries). */
193 unsigned numFreeEntries();
194
195 /** Returns the number of loads ready to execute. */
196 int numLoadsReady();
197
198 /** Returns the number of loads in the LQ. */
199 int numLoads() { return loads; }
200
201 /** Returns the number of stores in the SQ. */
202 int numStores() { return stores; }
203
204 /** Returns if either the LQ or SQ is full. */
205 bool isFull() { return lqFull() || sqFull(); }
206
207 /** Returns if the LQ is full. */
208 bool lqFull() { return loads >= (LQEntries - 1); }
209
210 /** Returns if the SQ is full. */
211 bool sqFull() { return stores >= (SQEntries - 1); }
212
213 /** Returns the number of instructions in the LSQ. */
214 unsigned getCount() { return loads + stores; }
215
216 /** Returns if there are any stores to writeback. */
217 bool hasStoresToWB() { return storesToWB; }
218
219 /** Returns the number of stores to writeback. */
220 int numStoresToWB() { return storesToWB; }
221
222 /** Returns if the LSQ unit will writeback on this cycle. */
223 bool willWB() { return storeQueue[storeWBIdx].canWB &&
224 !storeQueue[storeWBIdx].completed &&
225 !isStoreBlocked; }
226
227 /** Handles doing the retry. */
228 void recvRetry();
229
230 private:
231 /** Writes back the instruction, sending it to IEW. */
232 void writeback(DynInstPtr &inst, PacketPtr pkt);
233
234 /** Writes back a store that couldn't be completed the previous cycle. */
235 void writebackPendingStore();
236
237 /** Handles completing the send of a store to memory. */
238 void storePostSend(PacketPtr pkt);
239
240 /** Completes the store at the specified index. */
241 void completeStore(int store_idx);
242
243 /** Attempts to send a store to the cache. */
244 bool sendStore(PacketPtr data_pkt);
245
246 /** Increments the given store index (circular queue). */
247 inline void incrStIdx(int &store_idx);
248 /** Decrements the given store index (circular queue). */
249 inline void decrStIdx(int &store_idx);
250 /** Increments the given load index (circular queue). */
251 inline void incrLdIdx(int &load_idx);
252 /** Decrements the given load index (circular queue). */
253 inline void decrLdIdx(int &load_idx);
254
255 public:
256 /** Debugging function to dump instructions in the LSQ. */
257 void dumpInsts();
258
259 private:
260 /** Pointer to the CPU. */
261 O3CPU *cpu;
262
263 /** Pointer to the IEW stage. */
264 IEW *iewStage;
265
266 /** Pointer to the LSQ. */
267 LSQ *lsq;
268
269 /** Pointer to the dcache port. Used only for sending. */
270 MasterPort *dcachePort;
271
272 /** Derived class to hold any sender state the LSQ needs. */
273 class LSQSenderState : public Packet::SenderState
274 {
275 public:
276 /** Default constructor. */
277 LSQSenderState()
278 : mainPkt(NULL), pendingPacket(NULL), outstanding(1),
279 noWB(false), isSplit(false), pktToSend(false)
280 { }
281
282 /** Instruction who initiated the access to memory. */
283 DynInstPtr inst;
284 /** The main packet from a split load, used during writeback. */
285 PacketPtr mainPkt;
286 /** A second packet from a split store that needs sending. */
287 PacketPtr pendingPacket;
288 /** The LQ/SQ index of the instruction. */
289 uint8_t idx;
290 /** Number of outstanding packets to complete. */
291 uint8_t outstanding;
292 /** Whether or not it is a load. */
293 bool isLoad;
294 /** Whether or not the instruction will need to writeback. */
295 bool noWB;
296 /** Whether or not this access is split in two. */
297 bool isSplit;
298 /** Whether or not there is a packet that needs sending. */
299 bool pktToSend;
300
301 /** Completes a packet and returns whether the access is finished. */
302 inline bool complete() { return --outstanding == 0; }
303 };
304
305 /** Writeback event, specifically for when stores forward data to loads. */
306 class WritebackEvent : public Event {
307 public:
308 /** Constructs a writeback event. */
309 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
310
311 /** Processes the writeback event. */
312 void process();
313
314 /** Returns the description of this event. */
315 const char *description() const;
316
317 private:
318 /** Instruction whose results are being written back. */
319 DynInstPtr inst;
320
321 /** The packet that would have been sent to memory. */
322 PacketPtr pkt;
323
324 /** The pointer to the LSQ unit that issued the store. */
325 LSQUnit<Impl> *lsqPtr;
326 };
327
328 public:
329 struct SQEntry {
330 /** Constructs an empty store queue entry. */
331 SQEntry()
332 : inst(NULL), req(NULL), size(0),
333 canWB(0), committed(0), completed(0)
334 {
335 std::memset(data, 0, sizeof(data));
336 }
337
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/generic/debugfaults.hh"
41#include "arch/isa_traits.hh"
42#include "arch/locked_mem.hh"
43#include "arch/mmapped_ipr.hh"
44#include "base/hashmap.hh"
45#include "config/the_isa.hh"
46#include "cpu/inst_seq.hh"
47#include "cpu/timebuf.hh"
48#include "debug/LSQUnit.hh"
49#include "mem/packet.hh"
50#include "mem/port.hh"
51#include "sim/fault_fwd.hh"
52
53struct DerivO3CPUParams;
54
55/**
56 * Class that implements the actual LQ and SQ for each specific
57 * thread. Both are circular queues; load entries are freed upon
58 * committing, while store entries are freed once they writeback. The
59 * LSQUnit tracks if there are memory ordering violations, and also
60 * detects partial load to store forwarding cases (a store only has
61 * part of a load's data) that requires the load to wait until the
62 * store writes back. In the former case it holds onto the instruction
63 * until the dependence unit looks at it, and in the latter it stalls
64 * the LSQ until the store writes back. At that point the load is
65 * replayed.
66 */
67template <class Impl>
68class LSQUnit {
69 public:
70 typedef typename Impl::O3CPU O3CPU;
71 typedef typename Impl::DynInstPtr DynInstPtr;
72 typedef typename Impl::CPUPol::IEW IEW;
73 typedef typename Impl::CPUPol::LSQ LSQ;
74 typedef typename Impl::CPUPol::IssueStruct IssueStruct;
75
76 public:
77 /** Constructs an LSQ unit. init() must be called prior to use. */
78 LSQUnit();
79
80 /** Initializes the LSQ unit with the specified number of entries. */
81 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
82 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
83 unsigned id);
84
85 /** Returns the name of the LSQ unit. */
86 std::string name() const;
87
88 /** Registers statistics. */
89 void regStats();
90
91 /** Sets the pointer to the dcache port. */
92 void setDcachePort(MasterPort *dcache_port);
93
94 /** Switches out LSQ unit. */
95 void switchOut();
96
97 /** Takes over from another CPU's thread. */
98 void takeOverFrom();
99
100 /** Returns if the LSQ is switched out. */
101 bool isSwitchedOut() { return switchedOut; }
102
103 /** Ticks the LSQ unit, which in this case only resets the number of
104 * used cache ports.
105 * @todo: Move the number of used ports up to the LSQ level so it can
106 * be shared by all LSQ units.
107 */
108 void tick() { usedPorts = 0; }
109
110 /** Inserts an instruction. */
111 void insert(DynInstPtr &inst);
112 /** Inserts a load instruction. */
113 void insertLoad(DynInstPtr &load_inst);
114 /** Inserts a store instruction. */
115 void insertStore(DynInstPtr &store_inst);
116
117 /** Check for ordering violations in the LSQ. For a store squash if we
118 * ever find a conflicting load. For a load, only squash if we
119 * an external snoop invalidate has been seen for that load address
120 * @param load_idx index to start checking at
121 * @param inst the instruction to check
122 */
123 Fault checkViolations(int load_idx, DynInstPtr &inst);
124
125 /** Check if an incoming invalidate hits in the lsq on a load
126 * that might have issued out of order wrt another load beacuse
127 * of the intermediate invalidate.
128 */
129 void checkSnoop(PacketPtr pkt);
130
131 /** Executes a load instruction. */
132 Fault executeLoad(DynInstPtr &inst);
133
134 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
135 /** Executes a store instruction. */
136 Fault executeStore(DynInstPtr &inst);
137
138 /** Commits the head load. */
139 void commitLoad();
140 /** Commits loads older than a specific sequence number. */
141 void commitLoads(InstSeqNum &youngest_inst);
142
143 /** Commits stores older than a specific sequence number. */
144 void commitStores(InstSeqNum &youngest_inst);
145
146 /** Writes back stores. */
147 void writebackStores();
148
149 /** Completes the data access that has been returned from the
150 * memory system. */
151 void completeDataAccess(PacketPtr pkt);
152
153 /** Clears all the entries in the LQ. */
154 void clearLQ();
155
156 /** Clears all the entries in the SQ. */
157 void clearSQ();
158
159 /** Resizes the LQ to a given size. */
160 void resizeLQ(unsigned size);
161
162 /** Resizes the SQ to a given size. */
163 void resizeSQ(unsigned size);
164
165 /** Squashes all instructions younger than a specific sequence number. */
166 void squash(const InstSeqNum &squashed_num);
167
168 /** Returns if there is a memory ordering violation. Value is reset upon
169 * call to getMemDepViolator().
170 */
171 bool violation() { return memDepViolator; }
172
173 /** Returns the memory ordering violator. */
174 DynInstPtr getMemDepViolator();
175
176 /** Returns if a load became blocked due to the memory system. */
177 bool loadBlocked()
178 { return isLoadBlocked; }
179
180 /** Clears the signal that a load became blocked. */
181 void clearLoadBlocked()
182 { isLoadBlocked = false; }
183
184 /** Returns if the blocked load was handled. */
185 bool isLoadBlockedHandled()
186 { return loadBlockedHandled; }
187
188 /** Records the blocked load as being handled. */
189 void setLoadBlockedHandled()
190 { loadBlockedHandled = true; }
191
192 /** Returns the number of free entries (min of free LQ and SQ entries). */
193 unsigned numFreeEntries();
194
195 /** Returns the number of loads ready to execute. */
196 int numLoadsReady();
197
198 /** Returns the number of loads in the LQ. */
199 int numLoads() { return loads; }
200
201 /** Returns the number of stores in the SQ. */
202 int numStores() { return stores; }
203
204 /** Returns if either the LQ or SQ is full. */
205 bool isFull() { return lqFull() || sqFull(); }
206
207 /** Returns if the LQ is full. */
208 bool lqFull() { return loads >= (LQEntries - 1); }
209
210 /** Returns if the SQ is full. */
211 bool sqFull() { return stores >= (SQEntries - 1); }
212
213 /** Returns the number of instructions in the LSQ. */
214 unsigned getCount() { return loads + stores; }
215
216 /** Returns if there are any stores to writeback. */
217 bool hasStoresToWB() { return storesToWB; }
218
219 /** Returns the number of stores to writeback. */
220 int numStoresToWB() { return storesToWB; }
221
222 /** Returns if the LSQ unit will writeback on this cycle. */
223 bool willWB() { return storeQueue[storeWBIdx].canWB &&
224 !storeQueue[storeWBIdx].completed &&
225 !isStoreBlocked; }
226
227 /** Handles doing the retry. */
228 void recvRetry();
229
230 private:
231 /** Writes back the instruction, sending it to IEW. */
232 void writeback(DynInstPtr &inst, PacketPtr pkt);
233
234 /** Writes back a store that couldn't be completed the previous cycle. */
235 void writebackPendingStore();
236
237 /** Handles completing the send of a store to memory. */
238 void storePostSend(PacketPtr pkt);
239
240 /** Completes the store at the specified index. */
241 void completeStore(int store_idx);
242
243 /** Attempts to send a store to the cache. */
244 bool sendStore(PacketPtr data_pkt);
245
246 /** Increments the given store index (circular queue). */
247 inline void incrStIdx(int &store_idx);
248 /** Decrements the given store index (circular queue). */
249 inline void decrStIdx(int &store_idx);
250 /** Increments the given load index (circular queue). */
251 inline void incrLdIdx(int &load_idx);
252 /** Decrements the given load index (circular queue). */
253 inline void decrLdIdx(int &load_idx);
254
255 public:
256 /** Debugging function to dump instructions in the LSQ. */
257 void dumpInsts();
258
259 private:
260 /** Pointer to the CPU. */
261 O3CPU *cpu;
262
263 /** Pointer to the IEW stage. */
264 IEW *iewStage;
265
266 /** Pointer to the LSQ. */
267 LSQ *lsq;
268
269 /** Pointer to the dcache port. Used only for sending. */
270 MasterPort *dcachePort;
271
272 /** Derived class to hold any sender state the LSQ needs. */
273 class LSQSenderState : public Packet::SenderState
274 {
275 public:
276 /** Default constructor. */
277 LSQSenderState()
278 : mainPkt(NULL), pendingPacket(NULL), outstanding(1),
279 noWB(false), isSplit(false), pktToSend(false)
280 { }
281
282 /** Instruction who initiated the access to memory. */
283 DynInstPtr inst;
284 /** The main packet from a split load, used during writeback. */
285 PacketPtr mainPkt;
286 /** A second packet from a split store that needs sending. */
287 PacketPtr pendingPacket;
288 /** The LQ/SQ index of the instruction. */
289 uint8_t idx;
290 /** Number of outstanding packets to complete. */
291 uint8_t outstanding;
292 /** Whether or not it is a load. */
293 bool isLoad;
294 /** Whether or not the instruction will need to writeback. */
295 bool noWB;
296 /** Whether or not this access is split in two. */
297 bool isSplit;
298 /** Whether or not there is a packet that needs sending. */
299 bool pktToSend;
300
301 /** Completes a packet and returns whether the access is finished. */
302 inline bool complete() { return --outstanding == 0; }
303 };
304
305 /** Writeback event, specifically for when stores forward data to loads. */
306 class WritebackEvent : public Event {
307 public:
308 /** Constructs a writeback event. */
309 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
310
311 /** Processes the writeback event. */
312 void process();
313
314 /** Returns the description of this event. */
315 const char *description() const;
316
317 private:
318 /** Instruction whose results are being written back. */
319 DynInstPtr inst;
320
321 /** The packet that would have been sent to memory. */
322 PacketPtr pkt;
323
324 /** The pointer to the LSQ unit that issued the store. */
325 LSQUnit<Impl> *lsqPtr;
326 };
327
328 public:
329 struct SQEntry {
330 /** Constructs an empty store queue entry. */
331 SQEntry()
332 : inst(NULL), req(NULL), size(0),
333 canWB(0), committed(0), completed(0)
334 {
335 std::memset(data, 0, sizeof(data));
336 }
337
338 ~SQEntry()
339 {
340 inst = NULL;
341 }
342
338 /** Constructs a store queue entry for a given instruction. */
339 SQEntry(DynInstPtr &_inst)
340 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
341 isSplit(0), canWB(0), committed(0), completed(0)
342 {
343 std::memset(data, 0, sizeof(data));
344 }
345 /** The store data. */
346 char data[16];
347 /** The store instruction. */
348 DynInstPtr inst;
349 /** The request for the store. */
350 RequestPtr req;
351 /** The split requests for the store. */
352 RequestPtr sreqLow;
353 RequestPtr sreqHigh;
354 /** The size of the store. */
355 uint8_t size;
356 /** Whether or not the store is split into two requests. */
357 bool isSplit;
358 /** Whether or not the store can writeback. */
359 bool canWB;
360 /** Whether or not the store is committed. */
361 bool committed;
362 /** Whether or not the store is completed. */
363 bool completed;
364 };
365
366 private:
367 /** The LSQUnit thread id. */
368 ThreadID lsqID;
369
370 /** The store queue. */
371 std::vector<SQEntry> storeQueue;
372
373 /** The load queue. */
374 std::vector<DynInstPtr> loadQueue;
375
376 /** The number of LQ entries, plus a sentinel entry (circular queue).
377 * @todo: Consider having var that records the true number of LQ entries.
378 */
379 unsigned LQEntries;
380 /** The number of SQ entries, plus a sentinel entry (circular queue).
381 * @todo: Consider having var that records the true number of SQ entries.
382 */
383 unsigned SQEntries;
384
385 /** The number of places to shift addresses in the LSQ before checking
386 * for dependency violations
387 */
388 unsigned depCheckShift;
389
390 /** Should loads be checked for dependency issues */
391 bool checkLoads;
392
393 /** The number of load instructions in the LQ. */
394 int loads;
395 /** The number of store instructions in the SQ. */
396 int stores;
397 /** The number of store instructions in the SQ waiting to writeback. */
398 int storesToWB;
399
400 /** The index of the head instruction in the LQ. */
401 int loadHead;
402 /** The index of the tail instruction in the LQ. */
403 int loadTail;
404
405 /** The index of the head instruction in the SQ. */
406 int storeHead;
407 /** The index of the first instruction that may be ready to be
408 * written back, and has not yet been written back.
409 */
410 int storeWBIdx;
411 /** The index of the tail instruction in the SQ. */
412 int storeTail;
413
414 /// @todo Consider moving to a more advanced model with write vs read ports
415 /** The number of cache ports available each cycle. */
416 int cachePorts;
417
418 /** The number of used cache ports in this cycle. */
419 int usedPorts;
420
421 /** Is the LSQ switched out. */
422 bool switchedOut;
423
424 //list<InstSeqNum> mshrSeqNums;
425
426 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
427 Addr cacheBlockMask;
428
429 /** Wire to read information from the issue stage time queue. */
430 typename TimeBuffer<IssueStruct>::wire fromIssue;
431
432 /** Whether or not the LSQ is stalled. */
433 bool stalled;
434 /** The store that causes the stall due to partial store to load
435 * forwarding.
436 */
437 InstSeqNum stallingStoreIsn;
438 /** The index of the above store. */
439 int stallingLoadIdx;
440
441 /** The packet that needs to be retried. */
442 PacketPtr retryPkt;
443
444 /** Whehter or not a store is blocked due to the memory system. */
445 bool isStoreBlocked;
446
447 /** Whether or not a load is blocked due to the memory system. */
448 bool isLoadBlocked;
449
450 /** Has the blocked load been handled. */
451 bool loadBlockedHandled;
452
453 /** Whether or not a store is in flight. */
454 bool storeInFlight;
455
456 /** The sequence number of the blocked load. */
457 InstSeqNum blockedLoadSeqNum;
458
459 /** The oldest load that caused a memory ordering violation. */
460 DynInstPtr memDepViolator;
461
462 /** Whether or not there is a packet that couldn't be sent because of
463 * a lack of cache ports. */
464 bool hasPendingPkt;
465
466 /** The packet that is pending free cache ports. */
467 PacketPtr pendingPkt;
468
469 /** Flag for memory model. */
470 bool needsTSO;
471
472 // Will also need how many read/write ports the Dcache has. Or keep track
473 // of that in stage that is one level up, and only call executeLoad/Store
474 // the appropriate number of times.
475 /** Total number of loads forwaded from LSQ stores. */
476 Stats::Scalar lsqForwLoads;
477
478 /** Total number of loads ignored due to invalid addresses. */
479 Stats::Scalar invAddrLoads;
480
481 /** Total number of squashed loads. */
482 Stats::Scalar lsqSquashedLoads;
483
484 /** Total number of responses from the memory system that are
485 * ignored due to the instruction already being squashed. */
486 Stats::Scalar lsqIgnoredResponses;
487
488 /** Tota number of memory ordering violations. */
489 Stats::Scalar lsqMemOrderViolation;
490
491 /** Total number of squashed stores. */
492 Stats::Scalar lsqSquashedStores;
493
494 /** Total number of software prefetches ignored due to invalid addresses. */
495 Stats::Scalar invAddrSwpfs;
496
497 /** Ready loads blocked due to partial store-forwarding. */
498 Stats::Scalar lsqBlockedLoads;
499
500 /** Number of loads that were rescheduled. */
501 Stats::Scalar lsqRescheduledLoads;
502
503 /** Number of times the LSQ is blocked due to the cache. */
504 Stats::Scalar lsqCacheBlocked;
505
506 public:
507 /** Executes the load at the given index. */
508 Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
509 uint8_t *data, int load_idx);
510
511 /** Executes the store at the given index. */
512 Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
513 uint8_t *data, int store_idx);
514
515 /** Returns the index of the head load instruction. */
516 int getLoadHead() { return loadHead; }
517 /** Returns the sequence number of the head load instruction. */
518 InstSeqNum getLoadHeadSeqNum()
519 {
520 if (loadQueue[loadHead]) {
521 return loadQueue[loadHead]->seqNum;
522 } else {
523 return 0;
524 }
525
526 }
527
528 /** Returns the index of the head store instruction. */
529 int getStoreHead() { return storeHead; }
530 /** Returns the sequence number of the head store instruction. */
531 InstSeqNum getStoreHeadSeqNum()
532 {
533 if (storeQueue[storeHead].inst) {
534 return storeQueue[storeHead].inst->seqNum;
535 } else {
536 return 0;
537 }
538
539 }
540
541 /** Returns whether or not the LSQ unit is stalled. */
542 bool isStalled() { return stalled; }
543};
544
545template <class Impl>
546Fault
547LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
548 uint8_t *data, int load_idx)
549{
550 DynInstPtr load_inst = loadQueue[load_idx];
551
552 assert(load_inst);
553
554 assert(!load_inst->isExecuted());
555
556 // Make sure this isn't an uncacheable access
557 // A bit of a hackish way to get uncached accesses to work only if they're
558 // at the head of the LSQ and are ready to commit (at the head of the ROB
559 // too).
560 if (req->isUncacheable() &&
561 (load_idx != loadHead || !load_inst->isAtCommit())) {
562 iewStage->rescheduleMemInst(load_inst);
563 ++lsqRescheduledLoads;
564 DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
565 load_inst->seqNum, load_inst->pcState());
566
567 // Must delete request now that it wasn't handed off to
568 // memory. This is quite ugly. @todo: Figure out the proper
569 // place to really handle request deletes.
570 delete req;
571 if (TheISA::HasUnalignedMemAcc && sreqLow) {
572 delete sreqLow;
573 delete sreqHigh;
574 }
575 return new GenericISA::M5PanicFault(
576 "Uncachable load [sn:%llx] PC %s\n",
577 load_inst->seqNum, load_inst->pcState());
578 }
579
580 // Check the SQ for any previous stores that might lead to forwarding
581 int store_idx = load_inst->sqIdx;
582
583 int store_size = 0;
584
585 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
586 "storeHead: %i addr: %#x%s\n",
587 load_idx, store_idx, storeHead, req->getPaddr(),
588 sreqLow ? " split" : "");
589
590 if (req->isLLSC()) {
591 assert(!sreqLow);
592 // Disable recording the result temporarily. Writing to misc
593 // regs normally updates the result, but this is not the
594 // desired behavior when handling store conditionals.
595 load_inst->recordResult(false);
596 TheISA::handleLockedRead(load_inst.get(), req);
597 load_inst->recordResult(true);
598 }
599
600 if (req->isMmappedIpr()) {
601 assert(!load_inst->memData);
602 load_inst->memData = new uint8_t[64];
603
604 ThreadContext *thread = cpu->tcBase(lsqID);
605 Tick delay;
606 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
607
608 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
609 data_pkt->dataStatic(load_inst->memData);
610 delay = TheISA::handleIprRead(thread, data_pkt);
611 } else {
612 assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
613 PacketPtr fst_data_pkt = new Packet(sreqLow, MemCmd::ReadReq);
614 PacketPtr snd_data_pkt = new Packet(sreqHigh, MemCmd::ReadReq);
615
616 fst_data_pkt->dataStatic(load_inst->memData);
617 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
618
619 delay = TheISA::handleIprRead(thread, fst_data_pkt);
620 unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
621 if (delay2 > delay)
622 delay = delay2;
623
624 delete sreqLow;
625 delete sreqHigh;
626 delete fst_data_pkt;
627 delete snd_data_pkt;
628 }
629 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
630 cpu->schedule(wb, curTick() + delay);
631 return NoFault;
632 }
633
634 while (store_idx != -1) {
635 // End once we've reached the top of the LSQ
636 if (store_idx == storeWBIdx) {
637 break;
638 }
639
640 // Move the index to one younger
641 if (--store_idx < 0)
642 store_idx += SQEntries;
643
644 assert(storeQueue[store_idx].inst);
645
646 store_size = storeQueue[store_idx].size;
647
648 if (store_size == 0)
649 continue;
650 else if (storeQueue[store_idx].inst->uncacheable())
651 continue;
652
653 assert(storeQueue[store_idx].inst->effAddrValid());
654
655 // Check if the store data is within the lower and upper bounds of
656 // addresses that the request needs.
657 bool store_has_lower_limit =
658 req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
659 bool store_has_upper_limit =
660 (req->getVaddr() + req->getSize()) <=
661 (storeQueue[store_idx].inst->effAddr + store_size);
662 bool lower_load_has_store_part =
663 req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
664 store_size);
665 bool upper_load_has_store_part =
666 (req->getVaddr() + req->getSize()) >
667 storeQueue[store_idx].inst->effAddr;
668
669 // If the store's data has all of the data needed, we can forward.
670 if ((store_has_lower_limit && store_has_upper_limit)) {
671 // Get shift amount for offset into the store's data.
672 int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
673
674 memcpy(data, storeQueue[store_idx].data + shift_amt,
675 req->getSize());
676
677 assert(!load_inst->memData);
678 load_inst->memData = new uint8_t[64];
679
680 memcpy(load_inst->memData,
681 storeQueue[store_idx].data + shift_amt, req->getSize());
682
683 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
684 "addr %#x, data %#x\n",
685 store_idx, req->getVaddr(), data);
686
687 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
688 data_pkt->dataStatic(load_inst->memData);
689
690 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
691
692 // We'll say this has a 1 cycle load-store forwarding latency
693 // for now.
694 // @todo: Need to make this a parameter.
695 cpu->schedule(wb, curTick());
696
697 // Don't need to do anything special for split loads.
698 if (TheISA::HasUnalignedMemAcc && sreqLow) {
699 delete sreqLow;
700 delete sreqHigh;
701 }
702
703 ++lsqForwLoads;
704 return NoFault;
705 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
706 (store_has_upper_limit && upper_load_has_store_part) ||
707 (lower_load_has_store_part && upper_load_has_store_part)) {
708 // This is the partial store-load forwarding case where a store
709 // has only part of the load's data.
710
711 // If it's already been written back, then don't worry about
712 // stalling on it.
713 if (storeQueue[store_idx].completed) {
714 panic("Should not check one of these");
715 continue;
716 }
717
718 // Must stall load and force it to retry, so long as it's the oldest
719 // load that needs to do so.
720 if (!stalled ||
721 (stalled &&
722 load_inst->seqNum <
723 loadQueue[stallingLoadIdx]->seqNum)) {
724 stalled = true;
725 stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
726 stallingLoadIdx = load_idx;
727 }
728
729 // Tell IQ/mem dep unit that this instruction will need to be
730 // rescheduled eventually
731 iewStage->rescheduleMemInst(load_inst);
732 iewStage->decrWb(load_inst->seqNum);
733 load_inst->clearIssued();
734 ++lsqRescheduledLoads;
735
736 // Do not generate a writeback event as this instruction is not
737 // complete.
738 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
739 "Store idx %i to load addr %#x\n",
740 store_idx, req->getVaddr());
741
742 // Must delete request now that it wasn't handed off to
743 // memory. This is quite ugly. @todo: Figure out the
744 // proper place to really handle request deletes.
745 delete req;
746 if (TheISA::HasUnalignedMemAcc && sreqLow) {
747 delete sreqLow;
748 delete sreqHigh;
749 }
750
751 return NoFault;
752 }
753 }
754
755 // If there's no forwarding case, then go access memory
756 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
757 load_inst->seqNum, load_inst->pcState());
758
759 assert(!load_inst->memData);
760 load_inst->memData = new uint8_t[64];
761
762 ++usedPorts;
763
764 // if we the cache is not blocked, do cache access
765 bool completedFirst = false;
766 if (!lsq->cacheBlocked()) {
767 MemCmd command =
768 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
769 PacketPtr data_pkt = new Packet(req, command);
770 PacketPtr fst_data_pkt = NULL;
771 PacketPtr snd_data_pkt = NULL;
772
773 data_pkt->dataStatic(load_inst->memData);
774
775 LSQSenderState *state = new LSQSenderState;
776 state->isLoad = true;
777 state->idx = load_idx;
778 state->inst = load_inst;
779 data_pkt->senderState = state;
780
781 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
782
783 // Point the first packet at the main data packet.
784 fst_data_pkt = data_pkt;
785 } else {
786
787 // Create the split packets.
788 fst_data_pkt = new Packet(sreqLow, command);
789 snd_data_pkt = new Packet(sreqHigh, command);
790
791 fst_data_pkt->dataStatic(load_inst->memData);
792 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
793
794 fst_data_pkt->senderState = state;
795 snd_data_pkt->senderState = state;
796
797 state->isSplit = true;
798 state->outstanding = 2;
799 state->mainPkt = data_pkt;
800 }
801
802 if (!dcachePort->sendTimingReq(fst_data_pkt)) {
803 // Delete state and data packet because a load retry
804 // initiates a pipeline restart; it does not retry.
805 delete state;
806 delete data_pkt->req;
807 delete data_pkt;
808 if (TheISA::HasUnalignedMemAcc && sreqLow) {
809 delete fst_data_pkt->req;
810 delete fst_data_pkt;
811 delete snd_data_pkt->req;
812 delete snd_data_pkt;
813 sreqLow = NULL;
814 sreqHigh = NULL;
815 }
816
817 req = NULL;
818
819 // If the access didn't succeed, tell the LSQ by setting
820 // the retry thread id.
821 lsq->setRetryTid(lsqID);
822 } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
823 completedFirst = true;
824
825 // The first packet was sent without problems, so send this one
826 // too. If there is a problem with this packet then the whole
827 // load will be squashed, so indicate this to the state object.
828 // The first packet will return in completeDataAccess and be
829 // handled there.
830 ++usedPorts;
831 if (!dcachePort->sendTimingReq(snd_data_pkt)) {
832
833 // The main packet will be deleted in completeDataAccess.
834 delete snd_data_pkt->req;
835 delete snd_data_pkt;
836
837 state->complete();
838
839 req = NULL;
840 sreqHigh = NULL;
841
842 lsq->setRetryTid(lsqID);
843 }
844 }
845 }
846
847 // If the cache was blocked, or has become blocked due to the access,
848 // handle it.
849 if (lsq->cacheBlocked()) {
850 if (req)
851 delete req;
852 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
853 delete sreqLow;
854 delete sreqHigh;
855 }
856
857 ++lsqCacheBlocked;
858
859 // If the first part of a split access succeeds, then let the LSQ
860 // handle the decrWb when completeDataAccess is called upon return
861 // of the requested first part of data
862 if (!completedFirst)
863 iewStage->decrWb(load_inst->seqNum);
864
865 // There's an older load that's already going to squash.
866 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
867 return NoFault;
868
869 // Record that the load was blocked due to memory. This
870 // load will squash all instructions after it, be
871 // refetched, and re-executed.
872 isLoadBlocked = true;
873 loadBlockedHandled = false;
874 blockedLoadSeqNum = load_inst->seqNum;
875 // No fault occurred, even though the interface is blocked.
876 return NoFault;
877 }
878
879 return NoFault;
880}
881
882template <class Impl>
883Fault
884LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
885 uint8_t *data, int store_idx)
886{
887 assert(storeQueue[store_idx].inst);
888
889 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
890 " | storeHead:%i [sn:%i]\n",
891 store_idx, req->getPaddr(), data, storeHead,
892 storeQueue[store_idx].inst->seqNum);
893
894 storeQueue[store_idx].req = req;
895 storeQueue[store_idx].sreqLow = sreqLow;
896 storeQueue[store_idx].sreqHigh = sreqHigh;
897 unsigned size = req->getSize();
898 storeQueue[store_idx].size = size;
899 assert(size <= sizeof(storeQueue[store_idx].data));
900
901 // Split stores can only occur in ISAs with unaligned memory accesses. If
902 // a store request has been split, sreqLow and sreqHigh will be non-null.
903 if (TheISA::HasUnalignedMemAcc && sreqLow) {
904 storeQueue[store_idx].isSplit = true;
905 }
906
907 memcpy(storeQueue[store_idx].data, data, size);
908
909 // This function only writes the data to the store queue, so no fault
910 // can happen here.
911 return NoFault;
912}
913
914#endif // __CPU_O3_LSQ_UNIT_HH__
343 /** Constructs a store queue entry for a given instruction. */
344 SQEntry(DynInstPtr &_inst)
345 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
346 isSplit(0), canWB(0), committed(0), completed(0)
347 {
348 std::memset(data, 0, sizeof(data));
349 }
350 /** The store data. */
351 char data[16];
352 /** The store instruction. */
353 DynInstPtr inst;
354 /** The request for the store. */
355 RequestPtr req;
356 /** The split requests for the store. */
357 RequestPtr sreqLow;
358 RequestPtr sreqHigh;
359 /** The size of the store. */
360 uint8_t size;
361 /** Whether or not the store is split into two requests. */
362 bool isSplit;
363 /** Whether or not the store can writeback. */
364 bool canWB;
365 /** Whether or not the store is committed. */
366 bool committed;
367 /** Whether or not the store is completed. */
368 bool completed;
369 };
370
371 private:
372 /** The LSQUnit thread id. */
373 ThreadID lsqID;
374
375 /** The store queue. */
376 std::vector<SQEntry> storeQueue;
377
378 /** The load queue. */
379 std::vector<DynInstPtr> loadQueue;
380
381 /** The number of LQ entries, plus a sentinel entry (circular queue).
382 * @todo: Consider having var that records the true number of LQ entries.
383 */
384 unsigned LQEntries;
385 /** The number of SQ entries, plus a sentinel entry (circular queue).
386 * @todo: Consider having var that records the true number of SQ entries.
387 */
388 unsigned SQEntries;
389
390 /** The number of places to shift addresses in the LSQ before checking
391 * for dependency violations
392 */
393 unsigned depCheckShift;
394
395 /** Should loads be checked for dependency issues */
396 bool checkLoads;
397
398 /** The number of load instructions in the LQ. */
399 int loads;
400 /** The number of store instructions in the SQ. */
401 int stores;
402 /** The number of store instructions in the SQ waiting to writeback. */
403 int storesToWB;
404
405 /** The index of the head instruction in the LQ. */
406 int loadHead;
407 /** The index of the tail instruction in the LQ. */
408 int loadTail;
409
410 /** The index of the head instruction in the SQ. */
411 int storeHead;
412 /** The index of the first instruction that may be ready to be
413 * written back, and has not yet been written back.
414 */
415 int storeWBIdx;
416 /** The index of the tail instruction in the SQ. */
417 int storeTail;
418
419 /// @todo Consider moving to a more advanced model with write vs read ports
420 /** The number of cache ports available each cycle. */
421 int cachePorts;
422
423 /** The number of used cache ports in this cycle. */
424 int usedPorts;
425
426 /** Is the LSQ switched out. */
427 bool switchedOut;
428
429 //list<InstSeqNum> mshrSeqNums;
430
431 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
432 Addr cacheBlockMask;
433
434 /** Wire to read information from the issue stage time queue. */
435 typename TimeBuffer<IssueStruct>::wire fromIssue;
436
437 /** Whether or not the LSQ is stalled. */
438 bool stalled;
439 /** The store that causes the stall due to partial store to load
440 * forwarding.
441 */
442 InstSeqNum stallingStoreIsn;
443 /** The index of the above store. */
444 int stallingLoadIdx;
445
446 /** The packet that needs to be retried. */
447 PacketPtr retryPkt;
448
449 /** Whehter or not a store is blocked due to the memory system. */
450 bool isStoreBlocked;
451
452 /** Whether or not a load is blocked due to the memory system. */
453 bool isLoadBlocked;
454
455 /** Has the blocked load been handled. */
456 bool loadBlockedHandled;
457
458 /** Whether or not a store is in flight. */
459 bool storeInFlight;
460
461 /** The sequence number of the blocked load. */
462 InstSeqNum blockedLoadSeqNum;
463
464 /** The oldest load that caused a memory ordering violation. */
465 DynInstPtr memDepViolator;
466
467 /** Whether or not there is a packet that couldn't be sent because of
468 * a lack of cache ports. */
469 bool hasPendingPkt;
470
471 /** The packet that is pending free cache ports. */
472 PacketPtr pendingPkt;
473
474 /** Flag for memory model. */
475 bool needsTSO;
476
477 // Will also need how many read/write ports the Dcache has. Or keep track
478 // of that in stage that is one level up, and only call executeLoad/Store
479 // the appropriate number of times.
480 /** Total number of loads forwaded from LSQ stores. */
481 Stats::Scalar lsqForwLoads;
482
483 /** Total number of loads ignored due to invalid addresses. */
484 Stats::Scalar invAddrLoads;
485
486 /** Total number of squashed loads. */
487 Stats::Scalar lsqSquashedLoads;
488
489 /** Total number of responses from the memory system that are
490 * ignored due to the instruction already being squashed. */
491 Stats::Scalar lsqIgnoredResponses;
492
493 /** Tota number of memory ordering violations. */
494 Stats::Scalar lsqMemOrderViolation;
495
496 /** Total number of squashed stores. */
497 Stats::Scalar lsqSquashedStores;
498
499 /** Total number of software prefetches ignored due to invalid addresses. */
500 Stats::Scalar invAddrSwpfs;
501
502 /** Ready loads blocked due to partial store-forwarding. */
503 Stats::Scalar lsqBlockedLoads;
504
505 /** Number of loads that were rescheduled. */
506 Stats::Scalar lsqRescheduledLoads;
507
508 /** Number of times the LSQ is blocked due to the cache. */
509 Stats::Scalar lsqCacheBlocked;
510
511 public:
512 /** Executes the load at the given index. */
513 Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
514 uint8_t *data, int load_idx);
515
516 /** Executes the store at the given index. */
517 Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
518 uint8_t *data, int store_idx);
519
520 /** Returns the index of the head load instruction. */
521 int getLoadHead() { return loadHead; }
522 /** Returns the sequence number of the head load instruction. */
523 InstSeqNum getLoadHeadSeqNum()
524 {
525 if (loadQueue[loadHead]) {
526 return loadQueue[loadHead]->seqNum;
527 } else {
528 return 0;
529 }
530
531 }
532
533 /** Returns the index of the head store instruction. */
534 int getStoreHead() { return storeHead; }
535 /** Returns the sequence number of the head store instruction. */
536 InstSeqNum getStoreHeadSeqNum()
537 {
538 if (storeQueue[storeHead].inst) {
539 return storeQueue[storeHead].inst->seqNum;
540 } else {
541 return 0;
542 }
543
544 }
545
546 /** Returns whether or not the LSQ unit is stalled. */
547 bool isStalled() { return stalled; }
548};
549
550template <class Impl>
551Fault
552LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
553 uint8_t *data, int load_idx)
554{
555 DynInstPtr load_inst = loadQueue[load_idx];
556
557 assert(load_inst);
558
559 assert(!load_inst->isExecuted());
560
561 // Make sure this isn't an uncacheable access
562 // A bit of a hackish way to get uncached accesses to work only if they're
563 // at the head of the LSQ and are ready to commit (at the head of the ROB
564 // too).
565 if (req->isUncacheable() &&
566 (load_idx != loadHead || !load_inst->isAtCommit())) {
567 iewStage->rescheduleMemInst(load_inst);
568 ++lsqRescheduledLoads;
569 DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
570 load_inst->seqNum, load_inst->pcState());
571
572 // Must delete request now that it wasn't handed off to
573 // memory. This is quite ugly. @todo: Figure out the proper
574 // place to really handle request deletes.
575 delete req;
576 if (TheISA::HasUnalignedMemAcc && sreqLow) {
577 delete sreqLow;
578 delete sreqHigh;
579 }
580 return new GenericISA::M5PanicFault(
581 "Uncachable load [sn:%llx] PC %s\n",
582 load_inst->seqNum, load_inst->pcState());
583 }
584
585 // Check the SQ for any previous stores that might lead to forwarding
586 int store_idx = load_inst->sqIdx;
587
588 int store_size = 0;
589
590 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
591 "storeHead: %i addr: %#x%s\n",
592 load_idx, store_idx, storeHead, req->getPaddr(),
593 sreqLow ? " split" : "");
594
595 if (req->isLLSC()) {
596 assert(!sreqLow);
597 // Disable recording the result temporarily. Writing to misc
598 // regs normally updates the result, but this is not the
599 // desired behavior when handling store conditionals.
600 load_inst->recordResult(false);
601 TheISA::handleLockedRead(load_inst.get(), req);
602 load_inst->recordResult(true);
603 }
604
605 if (req->isMmappedIpr()) {
606 assert(!load_inst->memData);
607 load_inst->memData = new uint8_t[64];
608
609 ThreadContext *thread = cpu->tcBase(lsqID);
610 Tick delay;
611 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
612
613 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
614 data_pkt->dataStatic(load_inst->memData);
615 delay = TheISA::handleIprRead(thread, data_pkt);
616 } else {
617 assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
618 PacketPtr fst_data_pkt = new Packet(sreqLow, MemCmd::ReadReq);
619 PacketPtr snd_data_pkt = new Packet(sreqHigh, MemCmd::ReadReq);
620
621 fst_data_pkt->dataStatic(load_inst->memData);
622 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
623
624 delay = TheISA::handleIprRead(thread, fst_data_pkt);
625 unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
626 if (delay2 > delay)
627 delay = delay2;
628
629 delete sreqLow;
630 delete sreqHigh;
631 delete fst_data_pkt;
632 delete snd_data_pkt;
633 }
634 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
635 cpu->schedule(wb, curTick() + delay);
636 return NoFault;
637 }
638
639 while (store_idx != -1) {
640 // End once we've reached the top of the LSQ
641 if (store_idx == storeWBIdx) {
642 break;
643 }
644
645 // Move the index to one younger
646 if (--store_idx < 0)
647 store_idx += SQEntries;
648
649 assert(storeQueue[store_idx].inst);
650
651 store_size = storeQueue[store_idx].size;
652
653 if (store_size == 0)
654 continue;
655 else if (storeQueue[store_idx].inst->uncacheable())
656 continue;
657
658 assert(storeQueue[store_idx].inst->effAddrValid());
659
660 // Check if the store data is within the lower and upper bounds of
661 // addresses that the request needs.
662 bool store_has_lower_limit =
663 req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
664 bool store_has_upper_limit =
665 (req->getVaddr() + req->getSize()) <=
666 (storeQueue[store_idx].inst->effAddr + store_size);
667 bool lower_load_has_store_part =
668 req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
669 store_size);
670 bool upper_load_has_store_part =
671 (req->getVaddr() + req->getSize()) >
672 storeQueue[store_idx].inst->effAddr;
673
674 // If the store's data has all of the data needed, we can forward.
675 if ((store_has_lower_limit && store_has_upper_limit)) {
676 // Get shift amount for offset into the store's data.
677 int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
678
679 memcpy(data, storeQueue[store_idx].data + shift_amt,
680 req->getSize());
681
682 assert(!load_inst->memData);
683 load_inst->memData = new uint8_t[64];
684
685 memcpy(load_inst->memData,
686 storeQueue[store_idx].data + shift_amt, req->getSize());
687
688 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
689 "addr %#x, data %#x\n",
690 store_idx, req->getVaddr(), data);
691
692 PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
693 data_pkt->dataStatic(load_inst->memData);
694
695 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
696
697 // We'll say this has a 1 cycle load-store forwarding latency
698 // for now.
699 // @todo: Need to make this a parameter.
700 cpu->schedule(wb, curTick());
701
702 // Don't need to do anything special for split loads.
703 if (TheISA::HasUnalignedMemAcc && sreqLow) {
704 delete sreqLow;
705 delete sreqHigh;
706 }
707
708 ++lsqForwLoads;
709 return NoFault;
710 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
711 (store_has_upper_limit && upper_load_has_store_part) ||
712 (lower_load_has_store_part && upper_load_has_store_part)) {
713 // This is the partial store-load forwarding case where a store
714 // has only part of the load's data.
715
716 // If it's already been written back, then don't worry about
717 // stalling on it.
718 if (storeQueue[store_idx].completed) {
719 panic("Should not check one of these");
720 continue;
721 }
722
723 // Must stall load and force it to retry, so long as it's the oldest
724 // load that needs to do so.
725 if (!stalled ||
726 (stalled &&
727 load_inst->seqNum <
728 loadQueue[stallingLoadIdx]->seqNum)) {
729 stalled = true;
730 stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
731 stallingLoadIdx = load_idx;
732 }
733
734 // Tell IQ/mem dep unit that this instruction will need to be
735 // rescheduled eventually
736 iewStage->rescheduleMemInst(load_inst);
737 iewStage->decrWb(load_inst->seqNum);
738 load_inst->clearIssued();
739 ++lsqRescheduledLoads;
740
741 // Do not generate a writeback event as this instruction is not
742 // complete.
743 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
744 "Store idx %i to load addr %#x\n",
745 store_idx, req->getVaddr());
746
747 // Must delete request now that it wasn't handed off to
748 // memory. This is quite ugly. @todo: Figure out the
749 // proper place to really handle request deletes.
750 delete req;
751 if (TheISA::HasUnalignedMemAcc && sreqLow) {
752 delete sreqLow;
753 delete sreqHigh;
754 }
755
756 return NoFault;
757 }
758 }
759
760 // If there's no forwarding case, then go access memory
761 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
762 load_inst->seqNum, load_inst->pcState());
763
764 assert(!load_inst->memData);
765 load_inst->memData = new uint8_t[64];
766
767 ++usedPorts;
768
769 // if we the cache is not blocked, do cache access
770 bool completedFirst = false;
771 if (!lsq->cacheBlocked()) {
772 MemCmd command =
773 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
774 PacketPtr data_pkt = new Packet(req, command);
775 PacketPtr fst_data_pkt = NULL;
776 PacketPtr snd_data_pkt = NULL;
777
778 data_pkt->dataStatic(load_inst->memData);
779
780 LSQSenderState *state = new LSQSenderState;
781 state->isLoad = true;
782 state->idx = load_idx;
783 state->inst = load_inst;
784 data_pkt->senderState = state;
785
786 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
787
788 // Point the first packet at the main data packet.
789 fst_data_pkt = data_pkt;
790 } else {
791
792 // Create the split packets.
793 fst_data_pkt = new Packet(sreqLow, command);
794 snd_data_pkt = new Packet(sreqHigh, command);
795
796 fst_data_pkt->dataStatic(load_inst->memData);
797 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
798
799 fst_data_pkt->senderState = state;
800 snd_data_pkt->senderState = state;
801
802 state->isSplit = true;
803 state->outstanding = 2;
804 state->mainPkt = data_pkt;
805 }
806
807 if (!dcachePort->sendTimingReq(fst_data_pkt)) {
808 // Delete state and data packet because a load retry
809 // initiates a pipeline restart; it does not retry.
810 delete state;
811 delete data_pkt->req;
812 delete data_pkt;
813 if (TheISA::HasUnalignedMemAcc && sreqLow) {
814 delete fst_data_pkt->req;
815 delete fst_data_pkt;
816 delete snd_data_pkt->req;
817 delete snd_data_pkt;
818 sreqLow = NULL;
819 sreqHigh = NULL;
820 }
821
822 req = NULL;
823
824 // If the access didn't succeed, tell the LSQ by setting
825 // the retry thread id.
826 lsq->setRetryTid(lsqID);
827 } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
828 completedFirst = true;
829
830 // The first packet was sent without problems, so send this one
831 // too. If there is a problem with this packet then the whole
832 // load will be squashed, so indicate this to the state object.
833 // The first packet will return in completeDataAccess and be
834 // handled there.
835 ++usedPorts;
836 if (!dcachePort->sendTimingReq(snd_data_pkt)) {
837
838 // The main packet will be deleted in completeDataAccess.
839 delete snd_data_pkt->req;
840 delete snd_data_pkt;
841
842 state->complete();
843
844 req = NULL;
845 sreqHigh = NULL;
846
847 lsq->setRetryTid(lsqID);
848 }
849 }
850 }
851
852 // If the cache was blocked, or has become blocked due to the access,
853 // handle it.
854 if (lsq->cacheBlocked()) {
855 if (req)
856 delete req;
857 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
858 delete sreqLow;
859 delete sreqHigh;
860 }
861
862 ++lsqCacheBlocked;
863
864 // If the first part of a split access succeeds, then let the LSQ
865 // handle the decrWb when completeDataAccess is called upon return
866 // of the requested first part of data
867 if (!completedFirst)
868 iewStage->decrWb(load_inst->seqNum);
869
870 // There's an older load that's already going to squash.
871 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
872 return NoFault;
873
874 // Record that the load was blocked due to memory. This
875 // load will squash all instructions after it, be
876 // refetched, and re-executed.
877 isLoadBlocked = true;
878 loadBlockedHandled = false;
879 blockedLoadSeqNum = load_inst->seqNum;
880 // No fault occurred, even though the interface is blocked.
881 return NoFault;
882 }
883
884 return NoFault;
885}
886
887template <class Impl>
888Fault
889LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
890 uint8_t *data, int store_idx)
891{
892 assert(storeQueue[store_idx].inst);
893
894 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
895 " | storeHead:%i [sn:%i]\n",
896 store_idx, req->getPaddr(), data, storeHead,
897 storeQueue[store_idx].inst->seqNum);
898
899 storeQueue[store_idx].req = req;
900 storeQueue[store_idx].sreqLow = sreqLow;
901 storeQueue[store_idx].sreqHigh = sreqHigh;
902 unsigned size = req->getSize();
903 storeQueue[store_idx].size = size;
904 assert(size <= sizeof(storeQueue[store_idx].data));
905
906 // Split stores can only occur in ISAs with unaligned memory accesses. If
907 // a store request has been split, sreqLow and sreqHigh will be non-null.
908 if (TheISA::HasUnalignedMemAcc && sreqLow) {
909 storeQueue[store_idx].isSplit = true;
910 }
911
912 memcpy(storeQueue[store_idx].data, data, size);
913
914 // This function only writes the data to the store queue, so no fault
915 // can happen here.
916 return NoFault;
917}
918
919#endif // __CPU_O3_LSQ_UNIT_HH__