dram_ctrl.hh (11677:beaf1afe2f83) dram_ctrl.hh (11678:8c6991a00515)
1/*
2 * Copyright (c) 2012-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Andreas Hansson
41 * Ani Udipi
42 * Neha Agarwal
43 * Omar Naji
44 * Matthias Jung
1/*
2 * Copyright (c) 2012-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Andreas Hansson
41 * Ani Udipi
42 * Neha Agarwal
43 * Omar Naji
44 * Matthias Jung
45 * Wendy Elsasser
45 */
46
47/**
48 * @file
49 * DRAMCtrl declaration
50 */
51
52#ifndef __MEM_DRAM_CTRL_HH__
53#define __MEM_DRAM_CTRL_HH__
54
55#include <deque>
56#include <string>
57#include <unordered_set>
58
59#include "base/callback.hh"
60#include "base/statistics.hh"
61#include "enums/AddrMap.hh"
62#include "enums/MemSched.hh"
63#include "enums/PageManage.hh"
64#include "mem/abstract_mem.hh"
65#include "mem/qport.hh"
66#include "params/DRAMCtrl.hh"
67#include "sim/eventq.hh"
68#include "mem/drampower.hh"
69
70/**
71 * The DRAM controller is a single-channel memory controller capturing
72 * the most important timing constraints associated with a
73 * contemporary DRAM. For multi-channel memory systems, the controller
74 * is combined with a crossbar model, with the channel address
75 * interleaving taking part in the crossbar.
76 *
77 * As a basic design principle, this controller
78 * model is not cycle callable, but instead uses events to: 1) decide
79 * when new decisions can be made, 2) when resources become available,
80 * 3) when things are to be considered done, and 4) when to send
81 * things back. Through these simple principles, the model delivers
82 * high performance, and lots of flexibility, allowing users to
83 * evaluate the system impact of a wide range of memory technologies,
84 * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC.
85 *
86 * For more details, please see Hansson et al, "Simulating DRAM
87 * controllers for future system architecture exploration",
88 * Proc. ISPASS, 2014. If you use this model as part of your research
89 * please cite the paper.
46 */
47
48/**
49 * @file
50 * DRAMCtrl declaration
51 */
52
53#ifndef __MEM_DRAM_CTRL_HH__
54#define __MEM_DRAM_CTRL_HH__
55
56#include <deque>
57#include <string>
58#include <unordered_set>
59
60#include "base/callback.hh"
61#include "base/statistics.hh"
62#include "enums/AddrMap.hh"
63#include "enums/MemSched.hh"
64#include "enums/PageManage.hh"
65#include "mem/abstract_mem.hh"
66#include "mem/qport.hh"
67#include "params/DRAMCtrl.hh"
68#include "sim/eventq.hh"
69#include "mem/drampower.hh"
70
71/**
72 * The DRAM controller is a single-channel memory controller capturing
73 * the most important timing constraints associated with a
74 * contemporary DRAM. For multi-channel memory systems, the controller
75 * is combined with a crossbar model, with the channel address
76 * interleaving taking part in the crossbar.
77 *
78 * As a basic design principle, this controller
79 * model is not cycle callable, but instead uses events to: 1) decide
80 * when new decisions can be made, 2) when resources become available,
81 * 3) when things are to be considered done, and 4) when to send
82 * things back. Through these simple principles, the model delivers
83 * high performance, and lots of flexibility, allowing users to
84 * evaluate the system impact of a wide range of memory technologies,
85 * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC.
86 *
87 * For more details, please see Hansson et al, "Simulating DRAM
88 * controllers for future system architecture exploration",
89 * Proc. ISPASS, 2014. If you use this model as part of your research
90 * please cite the paper.
91 *
92 * The low-power functionality implements a staggered powerdown
93 * similar to that described in "Optimized Active and Power-Down Mode
94 * Refresh Control in 3D-DRAMs" by Jung et al, VLSI-SoC, 2014.
90 */
91class DRAMCtrl : public AbstractMemory
92{
93
94 private:
95
96 // For now, make use of a queued slave port to avoid dealing with
97 // flow control for the responses being sent back
98 class MemoryPort : public QueuedSlavePort
99 {
100
101 RespPacketQueue queue;
102 DRAMCtrl& memory;
103
104 public:
105
106 MemoryPort(const std::string& name, DRAMCtrl& _memory);
107
108 protected:
109
110 Tick recvAtomic(PacketPtr pkt);
111
112 void recvFunctional(PacketPtr pkt);
113
114 bool recvTimingReq(PacketPtr);
115
116 virtual AddrRangeList getAddrRanges() const;
117
118 };
119
120 /**
121 * Our incoming port, for a multi-ported controller add a crossbar
122 * in front of it
123 */
124 MemoryPort port;
125
126 /**
127 * Remeber if the memory system is in timing mode
128 */
129 bool isTimingMode;
130
131 /**
132 * Remember if we have to retry a request when available.
133 */
134 bool retryRdReq;
135 bool retryWrReq;
136
137 /**
138 * Bus state used to control the read/write switching and drive
139 * the scheduling of the next request.
140 */
141 enum BusState {
142 READ = 0,
95 */
96class DRAMCtrl : public AbstractMemory
97{
98
99 private:
100
101 // For now, make use of a queued slave port to avoid dealing with
102 // flow control for the responses being sent back
103 class MemoryPort : public QueuedSlavePort
104 {
105
106 RespPacketQueue queue;
107 DRAMCtrl& memory;
108
109 public:
110
111 MemoryPort(const std::string& name, DRAMCtrl& _memory);
112
113 protected:
114
115 Tick recvAtomic(PacketPtr pkt);
116
117 void recvFunctional(PacketPtr pkt);
118
119 bool recvTimingReq(PacketPtr);
120
121 virtual AddrRangeList getAddrRanges() const;
122
123 };
124
125 /**
126 * Our incoming port, for a multi-ported controller add a crossbar
127 * in front of it
128 */
129 MemoryPort port;
130
131 /**
132 * Remeber if the memory system is in timing mode
133 */
134 bool isTimingMode;
135
136 /**
137 * Remember if we have to retry a request when available.
138 */
139 bool retryRdReq;
140 bool retryWrReq;
141
142 /**
143 * Bus state used to control the read/write switching and drive
144 * the scheduling of the next request.
145 */
146 enum BusState {
147 READ = 0,
143 READ_TO_WRITE,
144 WRITE,
148 WRITE,
145 WRITE_TO_READ
146 };
147
148 BusState busState;
149
149 };
150
151 BusState busState;
152
153 /* bus state for next request event triggered */
154 BusState busStateNext;
155
150 /**
151 * Simple structure to hold the values needed to keep track of
152 * commands for DRAMPower
153 */
154 struct Command {
155 Data::MemCommand::cmds type;
156 uint8_t bank;
157 Tick timeStamp;
158
159 constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank,
160 Tick time_stamp)
161 : type(_type), bank(_bank), timeStamp(time_stamp)
162 { }
163 };
164
165 /**
166 * A basic class to track the bank state, i.e. what row is
167 * currently open (if any), when is the bank free to accept a new
168 * column (read/write) command, when can it be precharged, and
169 * when can it be activated.
170 *
171 * The bank also keeps track of how many bytes have been accessed
172 * in the open row since it was opened.
173 */
174 class Bank
175 {
176
177 public:
178
179 static const uint32_t NO_ROW = -1;
180
181 uint32_t openRow;
182 uint8_t bank;
183 uint8_t bankgr;
184
185 Tick colAllowedAt;
186 Tick preAllowedAt;
187 Tick actAllowedAt;
188
189 uint32_t rowAccesses;
190 uint32_t bytesAccessed;
191
192 Bank() :
193 openRow(NO_ROW), bank(0), bankgr(0),
194 colAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
195 rowAccesses(0), bytesAccessed(0)
196 { }
197 };
198
199
200 /**
156 /**
157 * Simple structure to hold the values needed to keep track of
158 * commands for DRAMPower
159 */
160 struct Command {
161 Data::MemCommand::cmds type;
162 uint8_t bank;
163 Tick timeStamp;
164
165 constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank,
166 Tick time_stamp)
167 : type(_type), bank(_bank), timeStamp(time_stamp)
168 { }
169 };
170
171 /**
172 * A basic class to track the bank state, i.e. what row is
173 * currently open (if any), when is the bank free to accept a new
174 * column (read/write) command, when can it be precharged, and
175 * when can it be activated.
176 *
177 * The bank also keeps track of how many bytes have been accessed
178 * in the open row since it was opened.
179 */
180 class Bank
181 {
182
183 public:
184
185 static const uint32_t NO_ROW = -1;
186
187 uint32_t openRow;
188 uint8_t bank;
189 uint8_t bankgr;
190
191 Tick colAllowedAt;
192 Tick preAllowedAt;
193 Tick actAllowedAt;
194
195 uint32_t rowAccesses;
196 uint32_t bytesAccessed;
197
198 Bank() :
199 openRow(NO_ROW), bank(0), bankgr(0),
200 colAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
201 rowAccesses(0), bytesAccessed(0)
202 { }
203 };
204
205
206 /**
207 * The power state captures the different operational states of
208 * the DRAM and interacts with the bus read/write state machine,
209 * and the refresh state machine.
210 *
211 * PWR_IDLE : The idle state in which all banks are closed
212 * From here can transition to: PWR_REF, PWR_ACT,
213 * PWR_PRE_PDN
214 *
215 * PWR_REF : Auto-refresh state. Will transition when refresh is
216 * complete based on power state prior to PWR_REF
217 * From here can transition to: PWR_IDLE, PWR_PRE_PDN,
218 * PWR_SREF
219 *
220 * PWR_SREF : Self-refresh state. Entered after refresh if
221 * previous state was PWR_PRE_PDN
222 * From here can transition to: PWR_IDLE
223 *
224 * PWR_PRE_PDN : Precharge power down state
225 * From here can transition to: PWR_REF, PWR_IDLE
226 *
227 * PWR_ACT : Activate state in which one or more banks are open
228 * From here can transition to: PWR_IDLE, PWR_ACT_PDN
229 *
230 * PWR_ACT_PDN : Activate power down state
231 * From here can transition to: PWR_ACT
232 */
233 enum PowerState {
234 PWR_IDLE = 0,
235 PWR_REF,
236 PWR_SREF,
237 PWR_PRE_PDN,
238 PWR_ACT,
239 PWR_ACT_PDN
240 };
241
242 /**
243 * The refresh state is used to control the progress of the
244 * refresh scheduling. When normal operation is in progress the
245 * refresh state is idle. Once tREFI has elasped, a refresh event
246 * is triggered to start the following STM transitions which are
247 * used to issue a refresh and return back to normal operation
248 *
249 * REF_IDLE : IDLE state used during normal operation
250 * From here can transition to: REF_DRAIN
251 *
252 * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled
253 * after self-refresh exit completes
254 * From here can transition to: REF_DRAIN
255 *
256 * REF_DRAIN : Drain state in which on going accesses complete.
257 * From here can transition to: REF_PD_EXIT
258 *
259 * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed
260 * Next state dependent on whether banks are open
261 * From here can transition to: REF_PRE, REF_START
262 *
263 * REF_PRE : Close (precharge) all open banks
264 * From here can transition to: REF_START
265 *
266 * REF_START : Issue refresh command and update DRAMPower stats
267 * From here can transition to: REF_RUN
268 *
269 * REF_RUN : Refresh running, waiting for tRFC to expire
270 * From here can transition to: REF_IDLE, REF_SREF_EXIT
271 */
272 enum RefreshState {
273 REF_IDLE = 0,
274 REF_DRAIN,
275 REF_PD_EXIT,
276 REF_SREF_EXIT,
277 REF_PRE,
278 REF_START,
279 REF_RUN
280 };
281
282 /**
201 * Rank class includes a vector of banks. Refresh and Power state
202 * machines are defined per rank. Events required to change the
203 * state of the refresh and power state machine are scheduled per
204 * rank. This class allows the implementation of rank-wise refresh
205 * and rank-wise power-down.
206 */
207 class Rank : public EventManager
208 {
209
210 private:
211
212 /**
283 * Rank class includes a vector of banks. Refresh and Power state
284 * machines are defined per rank. Events required to change the
285 * state of the refresh and power state machine are scheduled per
286 * rank. This class allows the implementation of rank-wise refresh
287 * and rank-wise power-down.
288 */
289 class Rank : public EventManager
290 {
291
292 private:
293
294 /**
213 * The power state captures the different operational states of
214 * the DRAM and interacts with the bus read/write state machine,
215 * and the refresh state machine. In the idle state all banks are
216 * precharged. From there we either go to an auto refresh (as
217 * determined by the refresh state machine), or to a precharge
218 * power down mode. From idle the memory can also go to the active
219 * state (with one or more banks active), and in turn from there
220 * to active power down. At the moment we do not capture the deep
221 * power down and self-refresh state.
222 */
223 enum PowerState {
224 PWR_IDLE = 0,
225 PWR_REF,
226 PWR_PRE_PDN,
227 PWR_ACT,
228 PWR_ACT_PDN
229 };
230
231 /**
232 * The refresh state is used to control the progress of the
233 * refresh scheduling. When normal operation is in progress the
234 * refresh state is idle. From there, it progresses to the refresh
235 * drain state once tREFI has passed. The refresh drain state
236 * captures the DRAM row active state, as it will stay there until
237 * all ongoing accesses complete. Thereafter all banks are
238 * precharged, and lastly, the DRAM is refreshed.
239 */
240 enum RefreshState {
241 REF_IDLE = 0,
242 REF_DRAIN,
243 REF_PRE,
244 REF_RUN
245 };
246
247 /**
248 * A reference to the parent DRAMCtrl instance
249 */
250 DRAMCtrl& memory;
251
252 /**
253 * Since we are taking decisions out of order, we need to keep
295 * A reference to the parent DRAMCtrl instance
296 */
297 DRAMCtrl& memory;
298
299 /**
300 * Since we are taking decisions out of order, we need to keep
254 * track of what power transition is happening at what time, such
255 * that we can go back in time and change history. For example, if
256 * we precharge all banks and schedule going to the idle state, we
257 * might at a later point decide to activate a bank before the
258 * transition to idle would have taken place.
301 * track of what power transition is happening at what time
259 */
260 PowerState pwrStateTrans;
261
262 /**
302 */
303 PowerState pwrStateTrans;
304
305 /**
263 * Current power state.
306 * Previous low-power state, which will be re-entered after refresh.
264 */
307 */
265 PowerState pwrState;
308 PowerState pwrStatePostRefresh;
266
267 /**
268 * Track when we transitioned to the current power state
269 */
270 Tick pwrStateTick;
271
272 /**
309
310 /**
311 * Track when we transitioned to the current power state
312 */
313 Tick pwrStateTick;
314
315 /**
273 * current refresh state
274 */
275 RefreshState refreshState;
276
277 /**
278 * Keep track of when a refresh is due.
279 */
280 Tick refreshDueAt;
281
282 /*
283 * Command energies
284 */
285 Stats::Scalar actEnergy;
286 Stats::Scalar preEnergy;
287 Stats::Scalar readEnergy;
288 Stats::Scalar writeEnergy;
289 Stats::Scalar refreshEnergy;
290
291 /*
292 * Active Background Energy
293 */
294 Stats::Scalar actBackEnergy;
295
296 /*
297 * Precharge Background Energy
298 */
299 Stats::Scalar preBackEnergy;
300
316 * Keep track of when a refresh is due.
317 */
318 Tick refreshDueAt;
319
320 /*
321 * Command energies
322 */
323 Stats::Scalar actEnergy;
324 Stats::Scalar preEnergy;
325 Stats::Scalar readEnergy;
326 Stats::Scalar writeEnergy;
327 Stats::Scalar refreshEnergy;
328
329 /*
330 * Active Background Energy
331 */
332 Stats::Scalar actBackEnergy;
333
334 /*
335 * Precharge Background Energy
336 */
337 Stats::Scalar preBackEnergy;
338
339 /*
340 * Active Power-Down Energy
341 */
342 Stats::Scalar actPowerDownEnergy;
343
344 /*
345 * Precharge Power-Down Energy
346 */
347 Stats::Scalar prePowerDownEnergy;
348
349 /*
350 * self Refresh Energy
351 */
352 Stats::Scalar selfRefreshEnergy;
353
301 Stats::Scalar totalEnergy;
302 Stats::Scalar averagePower;
303
304 /**
354 Stats::Scalar totalEnergy;
355 Stats::Scalar averagePower;
356
357 /**
358 * Stat to track total DRAM idle time
359 *
360 */
361 Stats::Scalar totalIdleTime;
362
363 /**
305 * Track time spent in each power state.
306 */
307 Stats::Vector pwrStateTime;
308
309 /**
310 * Function to update Power Stats
311 */
312 void updatePowerStats();
313
314 /**
315 * Schedule a power state transition in the future, and
316 * potentially override an already scheduled transition.
317 *
318 * @param pwr_state Power state to transition to
319 * @param tick Tick when transition should take place
320 */
321 void schedulePowerEvent(PowerState pwr_state, Tick tick);
322
323 public:
324
325 /**
364 * Track time spent in each power state.
365 */
366 Stats::Vector pwrStateTime;
367
368 /**
369 * Function to update Power Stats
370 */
371 void updatePowerStats();
372
373 /**
374 * Schedule a power state transition in the future, and
375 * potentially override an already scheduled transition.
376 *
377 * @param pwr_state Power state to transition to
378 * @param tick Tick when transition should take place
379 */
380 void schedulePowerEvent(PowerState pwr_state, Tick tick);
381
382 public:
383
384 /**
385 * Current power state.
386 */
387 PowerState pwrState;
388
389 /**
390 * current refresh state
391 */
392 RefreshState refreshState;
393
394 /**
395 * rank is in or transitioning to power-down or self-refresh
396 */
397 bool inLowPowerState;
398
399 /**
326 * Current Rank index
327 */
328 uint8_t rank;
329
400 * Current Rank index
401 */
402 uint8_t rank;
403
404 /**
405 * Track number of packets in read queue going to this rank
406 */
407 uint32_t readEntries;
408
409 /**
410 * Track number of packets in write queue going to this rank
411 */
412 uint32_t writeEntries;
413
330 /**
414 /**
415 * Number of ACT, RD, and WR events currently scheduled
416 * Incremented when a refresh event is started as well
417 * Used to determine when a low-power state can be entered
418 */
419 uint8_t outstandingEvents;
420
421 /**
422 * delay power-down and self-refresh exit until this requirement is met
423 */
424 Tick wakeUpAllowedAt;
425
426 /**
331 * One DRAMPower instance per rank
332 */
333 DRAMPower power;
334
335 /**
336 * List of comamnds issued, to be sent to DRAMPpower at refresh
337 * and stats dump. Keep commands here since commands to different
338 * banks are added out of order. Will only pass commands up to
339 * curTick() to DRAMPower after sorting.
340 */
341 std::vector<Command> cmdList;
342
343 /**
344 * Vector of Banks. Each rank is made of several devices which in
345 * term are made from several banks.
346 */
347 std::vector<Bank> banks;
348
349 /**
350 * To track number of banks which are currently active for
351 * this rank.
352 */
353 unsigned int numBanksActive;
354
355 /** List to keep track of activate ticks */
356 std::deque<Tick> actTicks;
357
358 Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p);
359
360 const std::string name() const
361 {
362 return csprintf("%s_%d", memory.name(), rank);
363 }
364
365 /**
366 * Kick off accounting for power and refresh states and
367 * schedule initial refresh.
368 *
369 * @param ref_tick Tick for first refresh
370 */
371 void startup(Tick ref_tick);
372
373 /**
374 * Stop the refresh events.
375 */
376 void suspend();
377
378 /**
379 * Check if the current rank is available for scheduling.
427 * One DRAMPower instance per rank
428 */
429 DRAMPower power;
430
431 /**
432 * List of comamnds issued, to be sent to DRAMPpower at refresh
433 * and stats dump. Keep commands here since commands to different
434 * banks are added out of order. Will only pass commands up to
435 * curTick() to DRAMPower after sorting.
436 */
437 std::vector<Command> cmdList;
438
439 /**
440 * Vector of Banks. Each rank is made of several devices which in
441 * term are made from several banks.
442 */
443 std::vector<Bank> banks;
444
445 /**
446 * To track number of banks which are currently active for
447 * this rank.
448 */
449 unsigned int numBanksActive;
450
451 /** List to keep track of activate ticks */
452 std::deque<Tick> actTicks;
453
454 Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p);
455
456 const std::string name() const
457 {
458 return csprintf("%s_%d", memory.name(), rank);
459 }
460
461 /**
462 * Kick off accounting for power and refresh states and
463 * schedule initial refresh.
464 *
465 * @param ref_tick Tick for first refresh
466 */
467 void startup(Tick ref_tick);
468
469 /**
470 * Stop the refresh events.
471 */
472 void suspend();
473
474 /**
475 * Check if the current rank is available for scheduling.
476 * Rank will be unavailable if refresh is ongoing.
477 * This includes refresh events explicitly scheduled from the the
478 * controller or memory initiated events which will occur during
479 * self-refresh mode.
380 *
381 * @param Return true if the rank is idle from a refresh point of view
382 */
383 bool isAvailable() const { return refreshState == REF_IDLE; }
384
385 /**
386 * Check if the current rank has all banks closed and is not
387 * in a low power state
388 *
389 * @param Return true if the rank is idle from a bank
390 * and power point of view
391 */
392 bool inPwrIdleState() const { return pwrState == PWR_IDLE; }
393
394 /**
480 *
481 * @param Return true if the rank is idle from a refresh point of view
482 */
483 bool isAvailable() const { return refreshState == REF_IDLE; }
484
485 /**
486 * Check if the current rank has all banks closed and is not
487 * in a low power state
488 *
489 * @param Return true if the rank is idle from a bank
490 * and power point of view
491 */
492 bool inPwrIdleState() const { return pwrState == PWR_IDLE; }
493
494 /**
495 * Trigger a self-refresh exit if there are entries enqueued
496 * Exit if there are any read entries regardless of the bus state.
497 * If we are currently issuing write commands, exit if we have any
498 * write commands enqueued as well.
499 * Could expand this in the future to analyze state of entire queue
500 * if needed.
501 *
502 * @return boolean indicating self-refresh exit should be scheduled
503 */
504 bool forceSelfRefreshExit() const {
505 return (readEntries != 0) ||
506 ((memory.busStateNext == WRITE) && (writeEntries != 0));
507 }
508
509 /**
510 * Check if the current rank is idle and should enter a low-pwer state
511 *
512 * @param Return true if the there are no read commands in Q
513 * and there are no outstanding events
514 */
515 bool lowPowerEntryReady() const;
516
517 /**
395 * Let the rank check if it was waiting for requests to drain
396 * to allow it to transition states.
397 */
398 void checkDrainDone();
399
400 /**
401 * Push command out of cmdList queue that are scheduled at
402 * or before curTick() to DRAMPower library
403 * All commands before curTick are guaranteed to be complete
404 * and can safely be flushed.
405 */
406 void flushCmdList();
407
408 /*
409 * Function to register Stats
410 */
411 void regStats();
412
413 /**
414 * Computes stats just prior to dump event
415 */
416 void computeStats();
417
518 * Let the rank check if it was waiting for requests to drain
519 * to allow it to transition states.
520 */
521 void checkDrainDone();
522
523 /**
524 * Push command out of cmdList queue that are scheduled at
525 * or before curTick() to DRAMPower library
526 * All commands before curTick are guaranteed to be complete
527 * and can safely be flushed.
528 */
529 void flushCmdList();
530
531 /*
532 * Function to register Stats
533 */
534 void regStats();
535
536 /**
537 * Computes stats just prior to dump event
538 */
539 void computeStats();
540
541 /**
542 * Schedule a transition to power-down (sleep)
543 *
544 * @param pwr_state Power state to transition to
545 * @param tick Absolute tick when transition should take place
546 */
547 void powerDownSleep(PowerState pwr_state, Tick tick);
548
549 /**
550 * schedule and event to wake-up from power-down or self-refresh
551 * and update bank timing parameters
552 *
553 * @param exit_delay Relative tick defining the delay required between
554 * low-power exit and the next command
555 */
556 void scheduleWakeUpEvent(Tick exit_delay);
557
558 void processWriteDoneEvent();
559 EventWrapper<Rank, &Rank::processWriteDoneEvent>
560 writeDoneEvent;
561
418 void processActivateEvent();
419 EventWrapper<Rank, &Rank::processActivateEvent>
420 activateEvent;
421
422 void processPrechargeEvent();
423 EventWrapper<Rank, &Rank::processPrechargeEvent>
424 prechargeEvent;
425
426 void processRefreshEvent();
427 EventWrapper<Rank, &Rank::processRefreshEvent>
428 refreshEvent;
429
430 void processPowerEvent();
431 EventWrapper<Rank, &Rank::processPowerEvent>
432 powerEvent;
433
562 void processActivateEvent();
563 EventWrapper<Rank, &Rank::processActivateEvent>
564 activateEvent;
565
566 void processPrechargeEvent();
567 EventWrapper<Rank, &Rank::processPrechargeEvent>
568 prechargeEvent;
569
570 void processRefreshEvent();
571 EventWrapper<Rank, &Rank::processRefreshEvent>
572 refreshEvent;
573
574 void processPowerEvent();
575 EventWrapper<Rank, &Rank::processPowerEvent>
576 powerEvent;
577
578 void processWakeUpEvent();
579 EventWrapper<Rank, &Rank::processWakeUpEvent>
580 wakeUpEvent;
581
434 };
435
436 // define the process to compute stats on simulation exit
437 // defined per rank as the per rank stats are based on state
438 // transition and periodically updated, requiring re-sync at
439 // exit.
440 class RankDumpCallback : public Callback
441 {
442 Rank *ranks;
443 public:
444 RankDumpCallback(Rank *r) : ranks(r) {}
445 virtual void process() { ranks->computeStats(); };
446 };
447
448 /**
449 * A burst helper helps organize and manage a packet that is larger than
450 * the DRAM burst size. A system packet that is larger than the burst size
451 * is split into multiple DRAM packets and all those DRAM packets point to
452 * a single burst helper such that we know when the whole packet is served.
453 */
454 class BurstHelper {
455
456 public:
457
458 /** Number of DRAM bursts requred for a system packet **/
459 const unsigned int burstCount;
460
461 /** Number of DRAM bursts serviced so far for a system packet **/
462 unsigned int burstsServiced;
463
464 BurstHelper(unsigned int _burstCount)
465 : burstCount(_burstCount), burstsServiced(0)
466 { }
467 };
468
469 /**
470 * A DRAM packet stores packets along with the timestamp of when
471 * the packet entered the queue, and also the decoded address.
472 */
473 class DRAMPacket {
474
475 public:
476
477 /** When did request enter the controller */
478 const Tick entryTime;
479
480 /** When will request leave the controller */
481 Tick readyTime;
482
483 /** This comes from the outside world */
484 const PacketPtr pkt;
485
486 const bool isRead;
487
488 /** Will be populated by address decoder */
489 const uint8_t rank;
490 const uint8_t bank;
491 const uint32_t row;
492
493 /**
494 * Bank id is calculated considering banks in all the ranks
495 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
496 * bankId = 8 --> rank1, bank0
497 */
498 const uint16_t bankId;
499
500 /**
501 * The starting address of the DRAM packet.
502 * This address could be unaligned to burst size boundaries. The
503 * reason is to keep the address offset so we can accurately check
504 * incoming read packets with packets in the write queue.
505 */
506 Addr addr;
507
508 /**
509 * The size of this dram packet in bytes
510 * It is always equal or smaller than DRAM burst size
511 */
512 unsigned int size;
513
514 /**
515 * A pointer to the BurstHelper if this DRAMPacket is a split packet
516 * If not a split packet (common case), this is set to NULL
517 */
518 BurstHelper* burstHelper;
519 Bank& bankRef;
520 Rank& rankRef;
521
522 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank,
523 uint32_t _row, uint16_t bank_id, Addr _addr,
524 unsigned int _size, Bank& bank_ref, Rank& rank_ref)
525 : entryTime(curTick()), readyTime(curTick()),
526 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row),
527 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
528 bankRef(bank_ref), rankRef(rank_ref)
529 { }
530
531 };
532
533 /**
534 * Bunch of things requires to setup "events" in gem5
535 * When event "respondEvent" occurs for example, the method
536 * processRespondEvent is called; no parameters are allowed
537 * in these methods
538 */
539 void processNextReqEvent();
540 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent;
541
542 void processRespondEvent();
543 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent;
544
545 /**
546 * Check if the read queue has room for more entries
547 *
548 * @param pktCount The number of entries needed in the read queue
549 * @return true if read queue is full, false otherwise
550 */
551 bool readQueueFull(unsigned int pktCount) const;
552
553 /**
554 * Check if the write queue has room for more entries
555 *
556 * @param pktCount The number of entries needed in the write queue
557 * @return true if write queue is full, false otherwise
558 */
559 bool writeQueueFull(unsigned int pktCount) const;
560
561 /**
562 * When a new read comes in, first check if the write q has a
563 * pending request to the same address.\ If not, decode the
564 * address to populate rank/bank/row, create one or mutliple
565 * "dram_pkt", and push them to the back of the read queue.\
566 * If this is the only
567 * read request in the system, schedule an event to start
568 * servicing it.
569 *
570 * @param pkt The request packet from the outside world
571 * @param pktCount The number of DRAM bursts the pkt
572 * translate to. If pkt size is larger then one full burst,
573 * then pktCount is greater than one.
574 */
575 void addToReadQueue(PacketPtr pkt, unsigned int pktCount);
576
577 /**
578 * Decode the incoming pkt, create a dram_pkt and push to the
579 * back of the write queue. \If the write q length is more than
580 * the threshold specified by the user, ie the queue is beginning
581 * to get full, stop reads, and start draining writes.
582 *
583 * @param pkt The request packet from the outside world
584 * @param pktCount The number of DRAM bursts the pkt
585 * translate to. If pkt size is larger then one full burst,
586 * then pktCount is greater than one.
587 */
588 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount);
589
590 /**
591 * Actually do the DRAM access - figure out the latency it
592 * will take to service the req based on bank state, channel state etc
593 * and then update those states to account for this request.\ Based
594 * on this, update the packet's "readyTime" and move it to the
595 * response q from where it will eventually go back to the outside
596 * world.
597 *
598 * @param pkt The DRAM packet created from the outside world pkt
599 */
600 void doDRAMAccess(DRAMPacket* dram_pkt);
601
602 /**
603 * When a packet reaches its "readyTime" in the response Q,
604 * use the "access()" method in AbstractMemory to actually
605 * create the response packet, and send it back to the outside
606 * world requestor.
607 *
608 * @param pkt The packet from the outside world
609 * @param static_latency Static latency to add before sending the packet
610 */
611 void accessAndRespond(PacketPtr pkt, Tick static_latency);
612
613 /**
614 * Address decoder to figure out physical mapping onto ranks,
615 * banks, and rows. This function is called multiple times on the same
616 * system packet if the pakcet is larger than burst of the memory. The
617 * dramPktAddr is used for the offset within the packet.
618 *
619 * @param pkt The packet from the outside world
620 * @param dramPktAddr The starting address of the DRAM packet
621 * @param size The size of the DRAM packet in bytes
622 * @param isRead Is the request for a read or a write to DRAM
623 * @return A DRAMPacket pointer with the decoded information
624 */
625 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size,
626 bool isRead);
627
628 /**
629 * The memory schduler/arbiter - picks which request needs to
630 * go next, based on the specified policy such as FCFS or FR-FCFS
631 * and moves it to the head of the queue.
632 * Prioritizes accesses to the same rank as previous burst unless
633 * controller is switching command type.
634 *
635 * @param queue Queued requests to consider
636 * @param extra_col_delay Any extra delay due to a read/write switch
637 * @return true if a packet is scheduled to a rank which is available else
638 * false
639 */
640 bool chooseNext(std::deque<DRAMPacket*>& queue, Tick extra_col_delay);
641
642 /**
643 * For FR-FCFS policy reorder the read/write queue depending on row buffer
644 * hits and earliest bursts available in DRAM
645 *
646 * @param queue Queued requests to consider
647 * @param extra_col_delay Any extra delay due to a read/write switch
648 * @return true if a packet is scheduled to a rank which is available else
649 * false
650 */
651 bool reorderQueue(std::deque<DRAMPacket*>& queue, Tick extra_col_delay);
652
653 /**
654 * Find which are the earliest banks ready to issue an activate
655 * for the enqueued requests. Assumes maximum of 64 banks per DIMM
656 * Also checks if the bank is already prepped.
657 *
658 * @param queue Queued requests to consider
659 * @param time of seamless burst command
660 * @return One-hot encoded mask of bank indices
661 * @return boolean indicating burst can issue seamlessly, with no gaps
662 */
663 std::pair<uint64_t, bool> minBankPrep(const std::deque<DRAMPacket*>& queue,
664 Tick min_col_at) const;
665
666 /**
667 * Keep track of when row activations happen, in order to enforce
668 * the maximum number of activations in the activation window. The
669 * method updates the time that the banks become available based
670 * on the current limits.
671 *
672 * @param rank_ref Reference to the rank
673 * @param bank_ref Reference to the bank
674 * @param act_tick Time when the activation takes place
675 * @param row Index of the row
676 */
677 void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick,
678 uint32_t row);
679
680 /**
681 * Precharge a given bank and also update when the precharge is
682 * done. This will also deal with any stats related to the
683 * accesses to the open page.
684 *
685 * @param rank_ref The rank to precharge
686 * @param bank_ref The bank to precharge
687 * @param pre_at Time when the precharge takes place
688 * @param trace Is this an auto precharge then do not add to trace
689 */
690 void prechargeBank(Rank& rank_ref, Bank& bank_ref,
691 Tick pre_at, bool trace = true);
692
693 /**
694 * Used for debugging to observe the contents of the queues.
695 */
696 void printQs() const;
697
698 /**
699 * Burst-align an address.
700 *
701 * @param addr The potentially unaligned address
702 *
703 * @return An address aligned to a DRAM burst
704 */
705 Addr burstAlign(Addr addr) const { return (addr & ~(Addr(burstSize - 1))); }
706
707 /**
708 * The controller's main read and write queues
709 */
710 std::deque<DRAMPacket*> readQueue;
711 std::deque<DRAMPacket*> writeQueue;
712
713 /**
714 * To avoid iterating over the write queue to check for
715 * overlapping transactions, maintain a set of burst addresses
716 * that are currently queued. Since we merge writes to the same
717 * location we never have more than one address to the same burst
718 * address.
719 */
720 std::unordered_set<Addr> isInWriteQueue;
721
722 /**
723 * Response queue where read packets wait after we're done working
724 * with them, but it's not time to send the response yet. The
725 * responses are stored seperately mostly to keep the code clean
726 * and help with events scheduling. For all logical purposes such
727 * as sizing the read queue, this and the main read queue need to
728 * be added together.
729 */
730 std::deque<DRAMPacket*> respQueue;
731
732 /**
733 * Vector of ranks
734 */
735 std::vector<Rank*> ranks;
736
737 /**
738 * The following are basic design parameters of the memory
739 * controller, and are initialized based on parameter values.
740 * The rowsPerBank is determined based on the capacity, number of
741 * ranks and banks, the burst size, and the row buffer size.
742 */
743 const uint32_t deviceSize;
744 const uint32_t deviceBusWidth;
745 const uint32_t burstLength;
746 const uint32_t deviceRowBufferSize;
747 const uint32_t devicesPerRank;
748 const uint32_t burstSize;
749 const uint32_t rowBufferSize;
750 const uint32_t columnsPerRowBuffer;
751 const uint32_t columnsPerStripe;
752 const uint32_t ranksPerChannel;
753 const uint32_t bankGroupsPerRank;
754 const bool bankGroupArch;
755 const uint32_t banksPerRank;
756 const uint32_t channels;
757 uint32_t rowsPerBank;
758 const uint32_t readBufferSize;
759 const uint32_t writeBufferSize;
760 const uint32_t writeHighThreshold;
761 const uint32_t writeLowThreshold;
762 const uint32_t minWritesPerSwitch;
763 uint32_t writesThisTime;
764 uint32_t readsThisTime;
765
766 /**
767 * Basic memory timing parameters initialized based on parameter
768 * values.
769 */
770 const Tick M5_CLASS_VAR_USED tCK;
771 const Tick tWTR;
772 const Tick tRTW;
773 const Tick tCS;
774 const Tick tBURST;
775 const Tick tCCD_L;
776 const Tick tRCD;
777 const Tick tCL;
778 const Tick tRP;
779 const Tick tRAS;
780 const Tick tWR;
781 const Tick tRTP;
782 const Tick tRFC;
783 const Tick tREFI;
784 const Tick tRRD;
785 const Tick tRRD_L;
786 const Tick tXAW;
787 const Tick tXP;
788 const Tick tXS;
789 const uint32_t activationLimit;
790
791 /**
792 * Memory controller configuration initialized based on parameter
793 * values.
794 */
795 Enums::MemSched memSchedPolicy;
796 Enums::AddrMap addrMapping;
797 Enums::PageManage pageMgmt;
798
799 /**
800 * Max column accesses (read and write) per row, before forefully
801 * closing it.
802 */
803 const uint32_t maxAccessesPerRow;
804
805 /**
806 * Pipeline latency of the controller frontend. The frontend
807 * contribution is added to writes (that complete when they are in
808 * the write buffer) and reads that are serviced the write buffer.
809 */
810 const Tick frontendLatency;
811
812 /**
813 * Pipeline latency of the backend and PHY. Along with the
814 * frontend contribution, this latency is added to reads serviced
815 * by the DRAM.
816 */
817 const Tick backendLatency;
818
819 /**
820 * Till when has the main data bus been spoken for already?
821 */
822 Tick busBusyUntil;
823
824 Tick prevArrival;
825
826 /**
827 * The soonest you have to start thinking about the next request
828 * is the longest access time that can occur before
829 * busBusyUntil. Assuming you need to precharge, open a new row,
830 * and access, it is tRP + tRCD + tCL.
831 */
832 Tick nextReqTime;
833
834 // All statistics that the model needs to capture
835 Stats::Scalar readReqs;
836 Stats::Scalar writeReqs;
837 Stats::Scalar readBursts;
838 Stats::Scalar writeBursts;
839 Stats::Scalar bytesReadDRAM;
840 Stats::Scalar bytesReadWrQ;
841 Stats::Scalar bytesWritten;
842 Stats::Scalar bytesReadSys;
843 Stats::Scalar bytesWrittenSys;
844 Stats::Scalar servicedByWrQ;
845 Stats::Scalar mergedWrBursts;
846 Stats::Scalar neitherReadNorWrite;
847 Stats::Vector perBankRdBursts;
848 Stats::Vector perBankWrBursts;
849 Stats::Scalar numRdRetry;
850 Stats::Scalar numWrRetry;
851 Stats::Scalar totGap;
852 Stats::Vector readPktSize;
853 Stats::Vector writePktSize;
854 Stats::Vector rdQLenPdf;
855 Stats::Vector wrQLenPdf;
856 Stats::Histogram bytesPerActivate;
857 Stats::Histogram rdPerTurnAround;
858 Stats::Histogram wrPerTurnAround;
859
860 // Latencies summed over all requests
861 Stats::Scalar totQLat;
862 Stats::Scalar totMemAccLat;
863 Stats::Scalar totBusLat;
864
865 // Average latencies per request
866 Stats::Formula avgQLat;
867 Stats::Formula avgBusLat;
868 Stats::Formula avgMemAccLat;
869
870 // Average bandwidth
871 Stats::Formula avgRdBW;
872 Stats::Formula avgWrBW;
873 Stats::Formula avgRdBWSys;
874 Stats::Formula avgWrBWSys;
875 Stats::Formula peakBW;
876 Stats::Formula busUtil;
877 Stats::Formula busUtilRead;
878 Stats::Formula busUtilWrite;
879
880 // Average queue lengths
881 Stats::Average avgRdQLen;
882 Stats::Average avgWrQLen;
883
884 // Row hit count and rate
885 Stats::Scalar readRowHits;
886 Stats::Scalar writeRowHits;
887 Stats::Formula readRowHitRate;
888 Stats::Formula writeRowHitRate;
889 Stats::Formula avgGap;
890
891 // DRAM Power Calculation
892 Stats::Formula pageHitRate;
893
894 // Holds the value of the rank of burst issued
895 uint8_t activeRank;
896
897 // timestamp offset
898 uint64_t timeStampOffset;
899
900 /**
901 * Upstream caches need this packet until true is returned, so
902 * hold it for deletion until a subsequent call
903 */
904 std::unique_ptr<Packet> pendingDelete;
905
906 /**
907 * This function increments the energy when called. If stats are
908 * dumped periodically, note accumulated energy values will
909 * appear in the stats (even if the stats are reset). This is a
910 * result of the energy values coming from DRAMPower, and there
911 * is currently no support for resetting the state.
912 *
913 * @param rank Currrent rank
914 */
915 void updatePowerStats(Rank& rank_ref);
916
917 /**
918 * Function for sorting Command structures based on timeStamp
919 *
920 * @param a Memory Command
921 * @param next Memory Command
922 * @return true if timeStamp of Command 1 < timeStamp of Command 2
923 */
924 static bool sortTime(const Command& cmd, const Command& cmd_next) {
925 return cmd.timeStamp < cmd_next.timeStamp;
926 };
927
928 public:
929
930 void regStats() override;
931
932 DRAMCtrl(const DRAMCtrlParams* p);
933
934 DrainState drain() override;
935
936 virtual BaseSlavePort& getSlavePort(const std::string& if_name,
937 PortID idx = InvalidPortID) override;
938
939 virtual void init() override;
940 virtual void startup() override;
941 virtual void drainResume() override;
942
943 /**
944 * Return true once refresh is complete for all ranks and there are no
945 * additional commands enqueued. (only evaluated when draining)
946 * This will ensure that all banks are closed, power state is IDLE, and
947 * power stats have been updated
948 *
949 * @return true if all ranks have refreshed, with no commands enqueued
950 *
951 */
952 bool allRanksDrained() const;
953
954 protected:
955
956 Tick recvAtomic(PacketPtr pkt);
957 void recvFunctional(PacketPtr pkt);
958 bool recvTimingReq(PacketPtr pkt);
959
960};
961
962#endif //__MEM_DRAM_CTRL_HH__
582 };
583
584 // define the process to compute stats on simulation exit
585 // defined per rank as the per rank stats are based on state
586 // transition and periodically updated, requiring re-sync at
587 // exit.
588 class RankDumpCallback : public Callback
589 {
590 Rank *ranks;
591 public:
592 RankDumpCallback(Rank *r) : ranks(r) {}
593 virtual void process() { ranks->computeStats(); };
594 };
595
596 /**
597 * A burst helper helps organize and manage a packet that is larger than
598 * the DRAM burst size. A system packet that is larger than the burst size
599 * is split into multiple DRAM packets and all those DRAM packets point to
600 * a single burst helper such that we know when the whole packet is served.
601 */
602 class BurstHelper {
603
604 public:
605
606 /** Number of DRAM bursts requred for a system packet **/
607 const unsigned int burstCount;
608
609 /** Number of DRAM bursts serviced so far for a system packet **/
610 unsigned int burstsServiced;
611
612 BurstHelper(unsigned int _burstCount)
613 : burstCount(_burstCount), burstsServiced(0)
614 { }
615 };
616
617 /**
618 * A DRAM packet stores packets along with the timestamp of when
619 * the packet entered the queue, and also the decoded address.
620 */
621 class DRAMPacket {
622
623 public:
624
625 /** When did request enter the controller */
626 const Tick entryTime;
627
628 /** When will request leave the controller */
629 Tick readyTime;
630
631 /** This comes from the outside world */
632 const PacketPtr pkt;
633
634 const bool isRead;
635
636 /** Will be populated by address decoder */
637 const uint8_t rank;
638 const uint8_t bank;
639 const uint32_t row;
640
641 /**
642 * Bank id is calculated considering banks in all the ranks
643 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
644 * bankId = 8 --> rank1, bank0
645 */
646 const uint16_t bankId;
647
648 /**
649 * The starting address of the DRAM packet.
650 * This address could be unaligned to burst size boundaries. The
651 * reason is to keep the address offset so we can accurately check
652 * incoming read packets with packets in the write queue.
653 */
654 Addr addr;
655
656 /**
657 * The size of this dram packet in bytes
658 * It is always equal or smaller than DRAM burst size
659 */
660 unsigned int size;
661
662 /**
663 * A pointer to the BurstHelper if this DRAMPacket is a split packet
664 * If not a split packet (common case), this is set to NULL
665 */
666 BurstHelper* burstHelper;
667 Bank& bankRef;
668 Rank& rankRef;
669
670 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank,
671 uint32_t _row, uint16_t bank_id, Addr _addr,
672 unsigned int _size, Bank& bank_ref, Rank& rank_ref)
673 : entryTime(curTick()), readyTime(curTick()),
674 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row),
675 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
676 bankRef(bank_ref), rankRef(rank_ref)
677 { }
678
679 };
680
681 /**
682 * Bunch of things requires to setup "events" in gem5
683 * When event "respondEvent" occurs for example, the method
684 * processRespondEvent is called; no parameters are allowed
685 * in these methods
686 */
687 void processNextReqEvent();
688 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent;
689
690 void processRespondEvent();
691 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent;
692
693 /**
694 * Check if the read queue has room for more entries
695 *
696 * @param pktCount The number of entries needed in the read queue
697 * @return true if read queue is full, false otherwise
698 */
699 bool readQueueFull(unsigned int pktCount) const;
700
701 /**
702 * Check if the write queue has room for more entries
703 *
704 * @param pktCount The number of entries needed in the write queue
705 * @return true if write queue is full, false otherwise
706 */
707 bool writeQueueFull(unsigned int pktCount) const;
708
709 /**
710 * When a new read comes in, first check if the write q has a
711 * pending request to the same address.\ If not, decode the
712 * address to populate rank/bank/row, create one or mutliple
713 * "dram_pkt", and push them to the back of the read queue.\
714 * If this is the only
715 * read request in the system, schedule an event to start
716 * servicing it.
717 *
718 * @param pkt The request packet from the outside world
719 * @param pktCount The number of DRAM bursts the pkt
720 * translate to. If pkt size is larger then one full burst,
721 * then pktCount is greater than one.
722 */
723 void addToReadQueue(PacketPtr pkt, unsigned int pktCount);
724
725 /**
726 * Decode the incoming pkt, create a dram_pkt and push to the
727 * back of the write queue. \If the write q length is more than
728 * the threshold specified by the user, ie the queue is beginning
729 * to get full, stop reads, and start draining writes.
730 *
731 * @param pkt The request packet from the outside world
732 * @param pktCount The number of DRAM bursts the pkt
733 * translate to. If pkt size is larger then one full burst,
734 * then pktCount is greater than one.
735 */
736 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount);
737
738 /**
739 * Actually do the DRAM access - figure out the latency it
740 * will take to service the req based on bank state, channel state etc
741 * and then update those states to account for this request.\ Based
742 * on this, update the packet's "readyTime" and move it to the
743 * response q from where it will eventually go back to the outside
744 * world.
745 *
746 * @param pkt The DRAM packet created from the outside world pkt
747 */
748 void doDRAMAccess(DRAMPacket* dram_pkt);
749
750 /**
751 * When a packet reaches its "readyTime" in the response Q,
752 * use the "access()" method in AbstractMemory to actually
753 * create the response packet, and send it back to the outside
754 * world requestor.
755 *
756 * @param pkt The packet from the outside world
757 * @param static_latency Static latency to add before sending the packet
758 */
759 void accessAndRespond(PacketPtr pkt, Tick static_latency);
760
761 /**
762 * Address decoder to figure out physical mapping onto ranks,
763 * banks, and rows. This function is called multiple times on the same
764 * system packet if the pakcet is larger than burst of the memory. The
765 * dramPktAddr is used for the offset within the packet.
766 *
767 * @param pkt The packet from the outside world
768 * @param dramPktAddr The starting address of the DRAM packet
769 * @param size The size of the DRAM packet in bytes
770 * @param isRead Is the request for a read or a write to DRAM
771 * @return A DRAMPacket pointer with the decoded information
772 */
773 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size,
774 bool isRead);
775
776 /**
777 * The memory schduler/arbiter - picks which request needs to
778 * go next, based on the specified policy such as FCFS or FR-FCFS
779 * and moves it to the head of the queue.
780 * Prioritizes accesses to the same rank as previous burst unless
781 * controller is switching command type.
782 *
783 * @param queue Queued requests to consider
784 * @param extra_col_delay Any extra delay due to a read/write switch
785 * @return true if a packet is scheduled to a rank which is available else
786 * false
787 */
788 bool chooseNext(std::deque<DRAMPacket*>& queue, Tick extra_col_delay);
789
790 /**
791 * For FR-FCFS policy reorder the read/write queue depending on row buffer
792 * hits and earliest bursts available in DRAM
793 *
794 * @param queue Queued requests to consider
795 * @param extra_col_delay Any extra delay due to a read/write switch
796 * @return true if a packet is scheduled to a rank which is available else
797 * false
798 */
799 bool reorderQueue(std::deque<DRAMPacket*>& queue, Tick extra_col_delay);
800
801 /**
802 * Find which are the earliest banks ready to issue an activate
803 * for the enqueued requests. Assumes maximum of 64 banks per DIMM
804 * Also checks if the bank is already prepped.
805 *
806 * @param queue Queued requests to consider
807 * @param time of seamless burst command
808 * @return One-hot encoded mask of bank indices
809 * @return boolean indicating burst can issue seamlessly, with no gaps
810 */
811 std::pair<uint64_t, bool> minBankPrep(const std::deque<DRAMPacket*>& queue,
812 Tick min_col_at) const;
813
814 /**
815 * Keep track of when row activations happen, in order to enforce
816 * the maximum number of activations in the activation window. The
817 * method updates the time that the banks become available based
818 * on the current limits.
819 *
820 * @param rank_ref Reference to the rank
821 * @param bank_ref Reference to the bank
822 * @param act_tick Time when the activation takes place
823 * @param row Index of the row
824 */
825 void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick,
826 uint32_t row);
827
828 /**
829 * Precharge a given bank and also update when the precharge is
830 * done. This will also deal with any stats related to the
831 * accesses to the open page.
832 *
833 * @param rank_ref The rank to precharge
834 * @param bank_ref The bank to precharge
835 * @param pre_at Time when the precharge takes place
836 * @param trace Is this an auto precharge then do not add to trace
837 */
838 void prechargeBank(Rank& rank_ref, Bank& bank_ref,
839 Tick pre_at, bool trace = true);
840
841 /**
842 * Used for debugging to observe the contents of the queues.
843 */
844 void printQs() const;
845
846 /**
847 * Burst-align an address.
848 *
849 * @param addr The potentially unaligned address
850 *
851 * @return An address aligned to a DRAM burst
852 */
853 Addr burstAlign(Addr addr) const { return (addr & ~(Addr(burstSize - 1))); }
854
855 /**
856 * The controller's main read and write queues
857 */
858 std::deque<DRAMPacket*> readQueue;
859 std::deque<DRAMPacket*> writeQueue;
860
861 /**
862 * To avoid iterating over the write queue to check for
863 * overlapping transactions, maintain a set of burst addresses
864 * that are currently queued. Since we merge writes to the same
865 * location we never have more than one address to the same burst
866 * address.
867 */
868 std::unordered_set<Addr> isInWriteQueue;
869
870 /**
871 * Response queue where read packets wait after we're done working
872 * with them, but it's not time to send the response yet. The
873 * responses are stored seperately mostly to keep the code clean
874 * and help with events scheduling. For all logical purposes such
875 * as sizing the read queue, this and the main read queue need to
876 * be added together.
877 */
878 std::deque<DRAMPacket*> respQueue;
879
880 /**
881 * Vector of ranks
882 */
883 std::vector<Rank*> ranks;
884
885 /**
886 * The following are basic design parameters of the memory
887 * controller, and are initialized based on parameter values.
888 * The rowsPerBank is determined based on the capacity, number of
889 * ranks and banks, the burst size, and the row buffer size.
890 */
891 const uint32_t deviceSize;
892 const uint32_t deviceBusWidth;
893 const uint32_t burstLength;
894 const uint32_t deviceRowBufferSize;
895 const uint32_t devicesPerRank;
896 const uint32_t burstSize;
897 const uint32_t rowBufferSize;
898 const uint32_t columnsPerRowBuffer;
899 const uint32_t columnsPerStripe;
900 const uint32_t ranksPerChannel;
901 const uint32_t bankGroupsPerRank;
902 const bool bankGroupArch;
903 const uint32_t banksPerRank;
904 const uint32_t channels;
905 uint32_t rowsPerBank;
906 const uint32_t readBufferSize;
907 const uint32_t writeBufferSize;
908 const uint32_t writeHighThreshold;
909 const uint32_t writeLowThreshold;
910 const uint32_t minWritesPerSwitch;
911 uint32_t writesThisTime;
912 uint32_t readsThisTime;
913
914 /**
915 * Basic memory timing parameters initialized based on parameter
916 * values.
917 */
918 const Tick M5_CLASS_VAR_USED tCK;
919 const Tick tWTR;
920 const Tick tRTW;
921 const Tick tCS;
922 const Tick tBURST;
923 const Tick tCCD_L;
924 const Tick tRCD;
925 const Tick tCL;
926 const Tick tRP;
927 const Tick tRAS;
928 const Tick tWR;
929 const Tick tRTP;
930 const Tick tRFC;
931 const Tick tREFI;
932 const Tick tRRD;
933 const Tick tRRD_L;
934 const Tick tXAW;
935 const Tick tXP;
936 const Tick tXS;
937 const uint32_t activationLimit;
938
939 /**
940 * Memory controller configuration initialized based on parameter
941 * values.
942 */
943 Enums::MemSched memSchedPolicy;
944 Enums::AddrMap addrMapping;
945 Enums::PageManage pageMgmt;
946
947 /**
948 * Max column accesses (read and write) per row, before forefully
949 * closing it.
950 */
951 const uint32_t maxAccessesPerRow;
952
953 /**
954 * Pipeline latency of the controller frontend. The frontend
955 * contribution is added to writes (that complete when they are in
956 * the write buffer) and reads that are serviced the write buffer.
957 */
958 const Tick frontendLatency;
959
960 /**
961 * Pipeline latency of the backend and PHY. Along with the
962 * frontend contribution, this latency is added to reads serviced
963 * by the DRAM.
964 */
965 const Tick backendLatency;
966
967 /**
968 * Till when has the main data bus been spoken for already?
969 */
970 Tick busBusyUntil;
971
972 Tick prevArrival;
973
974 /**
975 * The soonest you have to start thinking about the next request
976 * is the longest access time that can occur before
977 * busBusyUntil. Assuming you need to precharge, open a new row,
978 * and access, it is tRP + tRCD + tCL.
979 */
980 Tick nextReqTime;
981
982 // All statistics that the model needs to capture
983 Stats::Scalar readReqs;
984 Stats::Scalar writeReqs;
985 Stats::Scalar readBursts;
986 Stats::Scalar writeBursts;
987 Stats::Scalar bytesReadDRAM;
988 Stats::Scalar bytesReadWrQ;
989 Stats::Scalar bytesWritten;
990 Stats::Scalar bytesReadSys;
991 Stats::Scalar bytesWrittenSys;
992 Stats::Scalar servicedByWrQ;
993 Stats::Scalar mergedWrBursts;
994 Stats::Scalar neitherReadNorWrite;
995 Stats::Vector perBankRdBursts;
996 Stats::Vector perBankWrBursts;
997 Stats::Scalar numRdRetry;
998 Stats::Scalar numWrRetry;
999 Stats::Scalar totGap;
1000 Stats::Vector readPktSize;
1001 Stats::Vector writePktSize;
1002 Stats::Vector rdQLenPdf;
1003 Stats::Vector wrQLenPdf;
1004 Stats::Histogram bytesPerActivate;
1005 Stats::Histogram rdPerTurnAround;
1006 Stats::Histogram wrPerTurnAround;
1007
1008 // Latencies summed over all requests
1009 Stats::Scalar totQLat;
1010 Stats::Scalar totMemAccLat;
1011 Stats::Scalar totBusLat;
1012
1013 // Average latencies per request
1014 Stats::Formula avgQLat;
1015 Stats::Formula avgBusLat;
1016 Stats::Formula avgMemAccLat;
1017
1018 // Average bandwidth
1019 Stats::Formula avgRdBW;
1020 Stats::Formula avgWrBW;
1021 Stats::Formula avgRdBWSys;
1022 Stats::Formula avgWrBWSys;
1023 Stats::Formula peakBW;
1024 Stats::Formula busUtil;
1025 Stats::Formula busUtilRead;
1026 Stats::Formula busUtilWrite;
1027
1028 // Average queue lengths
1029 Stats::Average avgRdQLen;
1030 Stats::Average avgWrQLen;
1031
1032 // Row hit count and rate
1033 Stats::Scalar readRowHits;
1034 Stats::Scalar writeRowHits;
1035 Stats::Formula readRowHitRate;
1036 Stats::Formula writeRowHitRate;
1037 Stats::Formula avgGap;
1038
1039 // DRAM Power Calculation
1040 Stats::Formula pageHitRate;
1041
1042 // Holds the value of the rank of burst issued
1043 uint8_t activeRank;
1044
1045 // timestamp offset
1046 uint64_t timeStampOffset;
1047
1048 /**
1049 * Upstream caches need this packet until true is returned, so
1050 * hold it for deletion until a subsequent call
1051 */
1052 std::unique_ptr<Packet> pendingDelete;
1053
1054 /**
1055 * This function increments the energy when called. If stats are
1056 * dumped periodically, note accumulated energy values will
1057 * appear in the stats (even if the stats are reset). This is a
1058 * result of the energy values coming from DRAMPower, and there
1059 * is currently no support for resetting the state.
1060 *
1061 * @param rank Currrent rank
1062 */
1063 void updatePowerStats(Rank& rank_ref);
1064
1065 /**
1066 * Function for sorting Command structures based on timeStamp
1067 *
1068 * @param a Memory Command
1069 * @param next Memory Command
1070 * @return true if timeStamp of Command 1 < timeStamp of Command 2
1071 */
1072 static bool sortTime(const Command& cmd, const Command& cmd_next) {
1073 return cmd.timeStamp < cmd_next.timeStamp;
1074 };
1075
1076 public:
1077
1078 void regStats() override;
1079
1080 DRAMCtrl(const DRAMCtrlParams* p);
1081
1082 DrainState drain() override;
1083
1084 virtual BaseSlavePort& getSlavePort(const std::string& if_name,
1085 PortID idx = InvalidPortID) override;
1086
1087 virtual void init() override;
1088 virtual void startup() override;
1089 virtual void drainResume() override;
1090
1091 /**
1092 * Return true once refresh is complete for all ranks and there are no
1093 * additional commands enqueued. (only evaluated when draining)
1094 * This will ensure that all banks are closed, power state is IDLE, and
1095 * power stats have been updated
1096 *
1097 * @return true if all ranks have refreshed, with no commands enqueued
1098 *
1099 */
1100 bool allRanksDrained() const;
1101
1102 protected:
1103
1104 Tick recvAtomic(PacketPtr pkt);
1105 void recvFunctional(PacketPtr pkt);
1106 bool recvTimingReq(PacketPtr pkt);
1107
1108};
1109
1110#endif //__MEM_DRAM_CTRL_HH__