1/* 2 * Copyright (c) 2012-2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2013 Amin Farmahini-Farahani 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Andreas Hansson 41 * Ani Udipi 42 * Neha Agarwal 43 * Omar Naji 44 * Matthias Jung
| 1/* 2 * Copyright (c) 2012-2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2013 Amin Farmahini-Farahani 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Andreas Hansson 41 * Ani Udipi 42 * Neha Agarwal 43 * Omar Naji 44 * Matthias Jung
|
| 45 * Wendy Elsasser
|
45 */ 46 47/** 48 * @file 49 * DRAMCtrl declaration 50 */ 51 52#ifndef __MEM_DRAM_CTRL_HH__ 53#define __MEM_DRAM_CTRL_HH__ 54 55#include <deque> 56#include <string> 57#include <unordered_set> 58 59#include "base/callback.hh" 60#include "base/statistics.hh" 61#include "enums/AddrMap.hh" 62#include "enums/MemSched.hh" 63#include "enums/PageManage.hh" 64#include "mem/abstract_mem.hh" 65#include "mem/qport.hh" 66#include "params/DRAMCtrl.hh" 67#include "sim/eventq.hh" 68#include "mem/drampower.hh" 69 70/** 71 * The DRAM controller is a single-channel memory controller capturing 72 * the most important timing constraints associated with a 73 * contemporary DRAM. For multi-channel memory systems, the controller 74 * is combined with a crossbar model, with the channel address 75 * interleaving taking part in the crossbar. 76 * 77 * As a basic design principle, this controller 78 * model is not cycle callable, but instead uses events to: 1) decide 79 * when new decisions can be made, 2) when resources become available, 80 * 3) when things are to be considered done, and 4) when to send 81 * things back. Through these simple principles, the model delivers 82 * high performance, and lots of flexibility, allowing users to 83 * evaluate the system impact of a wide range of memory technologies, 84 * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC. 85 * 86 * For more details, please see Hansson et al, "Simulating DRAM 87 * controllers for future system architecture exploration", 88 * Proc. ISPASS, 2014. If you use this model as part of your research 89 * please cite the paper.
| 46 */ 47 48/** 49 * @file 50 * DRAMCtrl declaration 51 */ 52 53#ifndef __MEM_DRAM_CTRL_HH__ 54#define __MEM_DRAM_CTRL_HH__ 55 56#include <deque> 57#include <string> 58#include <unordered_set> 59 60#include "base/callback.hh" 61#include "base/statistics.hh" 62#include "enums/AddrMap.hh" 63#include "enums/MemSched.hh" 64#include "enums/PageManage.hh" 65#include "mem/abstract_mem.hh" 66#include "mem/qport.hh" 67#include "params/DRAMCtrl.hh" 68#include "sim/eventq.hh" 69#include "mem/drampower.hh" 70 71/** 72 * The DRAM controller is a single-channel memory controller capturing 73 * the most important timing constraints associated with a 74 * contemporary DRAM. For multi-channel memory systems, the controller 75 * is combined with a crossbar model, with the channel address 76 * interleaving taking part in the crossbar. 77 * 78 * As a basic design principle, this controller 79 * model is not cycle callable, but instead uses events to: 1) decide 80 * when new decisions can be made, 2) when resources become available, 81 * 3) when things are to be considered done, and 4) when to send 82 * things back. Through these simple principles, the model delivers 83 * high performance, and lots of flexibility, allowing users to 84 * evaluate the system impact of a wide range of memory technologies, 85 * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC. 86 * 87 * For more details, please see Hansson et al, "Simulating DRAM 88 * controllers for future system architecture exploration", 89 * Proc. ISPASS, 2014. If you use this model as part of your research 90 * please cite the paper.
|
| 91 * 92 * The low-power functionality implements a staggered powerdown 93 * similar to that described in "Optimized Active and Power-Down Mode 94 * Refresh Control in 3D-DRAMs" by Jung et al, VLSI-SoC, 2014.
|
90 */ 91class DRAMCtrl : public AbstractMemory 92{ 93 94 private: 95 96 // For now, make use of a queued slave port to avoid dealing with 97 // flow control for the responses being sent back 98 class MemoryPort : public QueuedSlavePort 99 { 100 101 RespPacketQueue queue; 102 DRAMCtrl& memory; 103 104 public: 105 106 MemoryPort(const std::string& name, DRAMCtrl& _memory); 107 108 protected: 109 110 Tick recvAtomic(PacketPtr pkt); 111 112 void recvFunctional(PacketPtr pkt); 113 114 bool recvTimingReq(PacketPtr); 115 116 virtual AddrRangeList getAddrRanges() const; 117 118 }; 119 120 /** 121 * Our incoming port, for a multi-ported controller add a crossbar 122 * in front of it 123 */ 124 MemoryPort port; 125 126 /** 127 * Remeber if the memory system is in timing mode 128 */ 129 bool isTimingMode; 130 131 /** 132 * Remember if we have to retry a request when available. 133 */ 134 bool retryRdReq; 135 bool retryWrReq; 136 137 /** 138 * Bus state used to control the read/write switching and drive 139 * the scheduling of the next request. 140 */ 141 enum BusState { 142 READ = 0,
| 95 */ 96class DRAMCtrl : public AbstractMemory 97{ 98 99 private: 100 101 // For now, make use of a queued slave port to avoid dealing with 102 // flow control for the responses being sent back 103 class MemoryPort : public QueuedSlavePort 104 { 105 106 RespPacketQueue queue; 107 DRAMCtrl& memory; 108 109 public: 110 111 MemoryPort(const std::string& name, DRAMCtrl& _memory); 112 113 protected: 114 115 Tick recvAtomic(PacketPtr pkt); 116 117 void recvFunctional(PacketPtr pkt); 118 119 bool recvTimingReq(PacketPtr); 120 121 virtual AddrRangeList getAddrRanges() const; 122 123 }; 124 125 /** 126 * Our incoming port, for a multi-ported controller add a crossbar 127 * in front of it 128 */ 129 MemoryPort port; 130 131 /** 132 * Remeber if the memory system is in timing mode 133 */ 134 bool isTimingMode; 135 136 /** 137 * Remember if we have to retry a request when available. 138 */ 139 bool retryRdReq; 140 bool retryWrReq; 141 142 /** 143 * Bus state used to control the read/write switching and drive 144 * the scheduling of the next request. 145 */ 146 enum BusState { 147 READ = 0,
|
143 READ_TO_WRITE,
| |
144 WRITE,
| 148 WRITE,
|
145 WRITE_TO_READ
| |
146 }; 147 148 BusState busState; 149
| 149 }; 150 151 BusState busState; 152
|
| 153 /* bus state for next request event triggered */ 154 BusState busStateNext; 155
|
150 /** 151 * Simple structure to hold the values needed to keep track of 152 * commands for DRAMPower 153 */ 154 struct Command { 155 Data::MemCommand::cmds type; 156 uint8_t bank; 157 Tick timeStamp; 158 159 constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank, 160 Tick time_stamp) 161 : type(_type), bank(_bank), timeStamp(time_stamp) 162 { } 163 }; 164 165 /** 166 * A basic class to track the bank state, i.e. what row is 167 * currently open (if any), when is the bank free to accept a new 168 * column (read/write) command, when can it be precharged, and 169 * when can it be activated. 170 * 171 * The bank also keeps track of how many bytes have been accessed 172 * in the open row since it was opened. 173 */ 174 class Bank 175 { 176 177 public: 178 179 static const uint32_t NO_ROW = -1; 180 181 uint32_t openRow; 182 uint8_t bank; 183 uint8_t bankgr; 184 185 Tick colAllowedAt; 186 Tick preAllowedAt; 187 Tick actAllowedAt; 188 189 uint32_t rowAccesses; 190 uint32_t bytesAccessed; 191 192 Bank() : 193 openRow(NO_ROW), bank(0), bankgr(0), 194 colAllowedAt(0), preAllowedAt(0), actAllowedAt(0), 195 rowAccesses(0), bytesAccessed(0) 196 { } 197 }; 198 199 200 /**
| 156 /** 157 * Simple structure to hold the values needed to keep track of 158 * commands for DRAMPower 159 */ 160 struct Command { 161 Data::MemCommand::cmds type; 162 uint8_t bank; 163 Tick timeStamp; 164 165 constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank, 166 Tick time_stamp) 167 : type(_type), bank(_bank), timeStamp(time_stamp) 168 { } 169 }; 170 171 /** 172 * A basic class to track the bank state, i.e. what row is 173 * currently open (if any), when is the bank free to accept a new 174 * column (read/write) command, when can it be precharged, and 175 * when can it be activated. 176 * 177 * The bank also keeps track of how many bytes have been accessed 178 * in the open row since it was opened. 179 */ 180 class Bank 181 { 182 183 public: 184 185 static const uint32_t NO_ROW = -1; 186 187 uint32_t openRow; 188 uint8_t bank; 189 uint8_t bankgr; 190 191 Tick colAllowedAt; 192 Tick preAllowedAt; 193 Tick actAllowedAt; 194 195 uint32_t rowAccesses; 196 uint32_t bytesAccessed; 197 198 Bank() : 199 openRow(NO_ROW), bank(0), bankgr(0), 200 colAllowedAt(0), preAllowedAt(0), actAllowedAt(0), 201 rowAccesses(0), bytesAccessed(0) 202 { } 203 }; 204 205 206 /**
|
| 207 * The power state captures the different operational states of 208 * the DRAM and interacts with the bus read/write state machine, 209 * and the refresh state machine. 210 * 211 * PWR_IDLE : The idle state in which all banks are closed 212 * From here can transition to: PWR_REF, PWR_ACT, 213 * PWR_PRE_PDN 214 * 215 * PWR_REF : Auto-refresh state. Will transition when refresh is 216 * complete based on power state prior to PWR_REF 217 * From here can transition to: PWR_IDLE, PWR_PRE_PDN, 218 * PWR_SREF 219 * 220 * PWR_SREF : Self-refresh state. Entered after refresh if 221 * previous state was PWR_PRE_PDN 222 * From here can transition to: PWR_IDLE 223 * 224 * PWR_PRE_PDN : Precharge power down state 225 * From here can transition to: PWR_REF, PWR_IDLE 226 * 227 * PWR_ACT : Activate state in which one or more banks are open 228 * From here can transition to: PWR_IDLE, PWR_ACT_PDN 229 * 230 * PWR_ACT_PDN : Activate power down state 231 * From here can transition to: PWR_ACT 232 */ 233 enum PowerState { 234 PWR_IDLE = 0, 235 PWR_REF, 236 PWR_SREF, 237 PWR_PRE_PDN, 238 PWR_ACT, 239 PWR_ACT_PDN 240 }; 241 242 /** 243 * The refresh state is used to control the progress of the 244 * refresh scheduling. When normal operation is in progress the 245 * refresh state is idle. Once tREFI has elasped, a refresh event 246 * is triggered to start the following STM transitions which are 247 * used to issue a refresh and return back to normal operation 248 * 249 * REF_IDLE : IDLE state used during normal operation 250 * From here can transition to: REF_DRAIN 251 * 252 * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled 253 * after self-refresh exit completes 254 * From here can transition to: REF_DRAIN 255 * 256 * REF_DRAIN : Drain state in which on going accesses complete. 257 * From here can transition to: REF_PD_EXIT 258 * 259 * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed 260 * Next state dependent on whether banks are open 261 * From here can transition to: REF_PRE, REF_START 262 * 263 * REF_PRE : Close (precharge) all open banks 264 * From here can transition to: REF_START 265 * 266 * REF_START : Issue refresh command and update DRAMPower stats 267 * From here can transition to: REF_RUN 268 * 269 * REF_RUN : Refresh running, waiting for tRFC to expire 270 * From here can transition to: REF_IDLE, REF_SREF_EXIT 271 */ 272 enum RefreshState { 273 REF_IDLE = 0, 274 REF_DRAIN, 275 REF_PD_EXIT, 276 REF_SREF_EXIT, 277 REF_PRE, 278 REF_START, 279 REF_RUN 280 }; 281 282 /**
|
201 * Rank class includes a vector of banks. Refresh and Power state 202 * machines are defined per rank. Events required to change the 203 * state of the refresh and power state machine are scheduled per 204 * rank. This class allows the implementation of rank-wise refresh 205 * and rank-wise power-down. 206 */ 207 class Rank : public EventManager 208 { 209 210 private: 211 212 /**
| 283 * Rank class includes a vector of banks. Refresh and Power state 284 * machines are defined per rank. Events required to change the 285 * state of the refresh and power state machine are scheduled per 286 * rank. This class allows the implementation of rank-wise refresh 287 * and rank-wise power-down. 288 */ 289 class Rank : public EventManager 290 { 291 292 private: 293 294 /**
|
213 * The power state captures the different operational states of 214 * the DRAM and interacts with the bus read/write state machine, 215 * and the refresh state machine. In the idle state all banks are 216 * precharged. From there we either go to an auto refresh (as 217 * determined by the refresh state machine), or to a precharge 218 * power down mode. From idle the memory can also go to the active 219 * state (with one or more banks active), and in turn from there 220 * to active power down. At the moment we do not capture the deep 221 * power down and self-refresh state. 222 */ 223 enum PowerState { 224 PWR_IDLE = 0, 225 PWR_REF, 226 PWR_PRE_PDN, 227 PWR_ACT, 228 PWR_ACT_PDN 229 }; 230 231 /** 232 * The refresh state is used to control the progress of the 233 * refresh scheduling. When normal operation is in progress the 234 * refresh state is idle. From there, it progresses to the refresh 235 * drain state once tREFI has passed. The refresh drain state 236 * captures the DRAM row active state, as it will stay there until 237 * all ongoing accesses complete. Thereafter all banks are 238 * precharged, and lastly, the DRAM is refreshed. 239 */ 240 enum RefreshState { 241 REF_IDLE = 0, 242 REF_DRAIN, 243 REF_PRE, 244 REF_RUN 245 }; 246 247 /**
| |
248 * A reference to the parent DRAMCtrl instance 249 */ 250 DRAMCtrl& memory; 251 252 /** 253 * Since we are taking decisions out of order, we need to keep
| 295 * A reference to the parent DRAMCtrl instance 296 */ 297 DRAMCtrl& memory; 298 299 /** 300 * Since we are taking decisions out of order, we need to keep
|
254 * track of what power transition is happening at what time, such 255 * that we can go back in time and change history. For example, if 256 * we precharge all banks and schedule going to the idle state, we 257 * might at a later point decide to activate a bank before the 258 * transition to idle would have taken place.
| 301 * track of what power transition is happening at what time
|
259 */ 260 PowerState pwrStateTrans; 261 262 /**
| 302 */ 303 PowerState pwrStateTrans; 304 305 /**
|
263 * Current power state.
| 306 * Previous low-power state, which will be re-entered after refresh.
|
264 */
| 307 */
|
265 PowerState pwrState;
| 308 PowerState pwrStatePostRefresh;
|
266 267 /** 268 * Track when we transitioned to the current power state 269 */ 270 Tick pwrStateTick; 271 272 /**
| 309 310 /** 311 * Track when we transitioned to the current power state 312 */ 313 Tick pwrStateTick; 314 315 /**
|
273 * current refresh state 274 */ 275 RefreshState refreshState; 276 277 /**
| |
278 * Keep track of when a refresh is due. 279 */ 280 Tick refreshDueAt; 281 282 /* 283 * Command energies 284 */ 285 Stats::Scalar actEnergy; 286 Stats::Scalar preEnergy; 287 Stats::Scalar readEnergy; 288 Stats::Scalar writeEnergy; 289 Stats::Scalar refreshEnergy; 290 291 /* 292 * Active Background Energy 293 */ 294 Stats::Scalar actBackEnergy; 295 296 /* 297 * Precharge Background Energy 298 */ 299 Stats::Scalar preBackEnergy; 300
| 316 * Keep track of when a refresh is due. 317 */ 318 Tick refreshDueAt; 319 320 /* 321 * Command energies 322 */ 323 Stats::Scalar actEnergy; 324 Stats::Scalar preEnergy; 325 Stats::Scalar readEnergy; 326 Stats::Scalar writeEnergy; 327 Stats::Scalar refreshEnergy; 328 329 /* 330 * Active Background Energy 331 */ 332 Stats::Scalar actBackEnergy; 333 334 /* 335 * Precharge Background Energy 336 */ 337 Stats::Scalar preBackEnergy; 338
|
| 339 /* 340 * Active Power-Down Energy 341 */ 342 Stats::Scalar actPowerDownEnergy; 343 344 /* 345 * Precharge Power-Down Energy 346 */ 347 Stats::Scalar prePowerDownEnergy; 348 349 /* 350 * self Refresh Energy 351 */ 352 Stats::Scalar selfRefreshEnergy; 353
|
301 Stats::Scalar totalEnergy; 302 Stats::Scalar averagePower; 303 304 /**
| 354 Stats::Scalar totalEnergy; 355 Stats::Scalar averagePower; 356 357 /**
|
| 358 * Stat to track total DRAM idle time 359 * 360 */ 361 Stats::Scalar totalIdleTime; 362 363 /**
|
305 * Track time spent in each power state. 306 */ 307 Stats::Vector pwrStateTime; 308 309 /** 310 * Function to update Power Stats 311 */ 312 void updatePowerStats(); 313 314 /** 315 * Schedule a power state transition in the future, and 316 * potentially override an already scheduled transition. 317 * 318 * @param pwr_state Power state to transition to 319 * @param tick Tick when transition should take place 320 */ 321 void schedulePowerEvent(PowerState pwr_state, Tick tick); 322 323 public: 324 325 /**
| 364 * Track time spent in each power state. 365 */ 366 Stats::Vector pwrStateTime; 367 368 /** 369 * Function to update Power Stats 370 */ 371 void updatePowerStats(); 372 373 /** 374 * Schedule a power state transition in the future, and 375 * potentially override an already scheduled transition. 376 * 377 * @param pwr_state Power state to transition to 378 * @param tick Tick when transition should take place 379 */ 380 void schedulePowerEvent(PowerState pwr_state, Tick tick); 381 382 public: 383 384 /**
|
| 385 * Current power state. 386 */ 387 PowerState pwrState; 388 389 /** 390 * current refresh state 391 */ 392 RefreshState refreshState; 393 394 /** 395 * rank is in or transitioning to power-down or self-refresh 396 */ 397 bool inLowPowerState; 398 399 /**
|
326 * Current Rank index 327 */ 328 uint8_t rank; 329
| 400 * Current Rank index 401 */ 402 uint8_t rank; 403
|
| 404 /** 405 * Track number of packets in read queue going to this rank 406 */ 407 uint32_t readEntries; 408 409 /** 410 * Track number of packets in write queue going to this rank 411 */ 412 uint32_t writeEntries; 413
|
330 /**
| 414 /**
|
| 415 * Number of ACT, RD, and WR events currently scheduled 416 * Incremented when a refresh event is started as well 417 * Used to determine when a low-power state can be entered 418 */ 419 uint8_t outstandingEvents; 420 421 /** 422 * delay power-down and self-refresh exit until this requirement is met 423 */ 424 Tick wakeUpAllowedAt; 425 426 /**
|
331 * One DRAMPower instance per rank 332 */ 333 DRAMPower power; 334 335 /** 336 * List of comamnds issued, to be sent to DRAMPpower at refresh 337 * and stats dump. Keep commands here since commands to different 338 * banks are added out of order. Will only pass commands up to 339 * curTick() to DRAMPower after sorting. 340 */ 341 std::vector<Command> cmdList; 342 343 /** 344 * Vector of Banks. Each rank is made of several devices which in 345 * term are made from several banks. 346 */ 347 std::vector<Bank> banks; 348 349 /** 350 * To track number of banks which are currently active for 351 * this rank. 352 */ 353 unsigned int numBanksActive; 354 355 /** List to keep track of activate ticks */ 356 std::deque<Tick> actTicks; 357 358 Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p); 359 360 const std::string name() const 361 { 362 return csprintf("%s_%d", memory.name(), rank); 363 } 364 365 /** 366 * Kick off accounting for power and refresh states and 367 * schedule initial refresh. 368 * 369 * @param ref_tick Tick for first refresh 370 */ 371 void startup(Tick ref_tick); 372 373 /** 374 * Stop the refresh events. 375 */ 376 void suspend(); 377 378 /** 379 * Check if the current rank is available for scheduling.
| 427 * One DRAMPower instance per rank 428 */ 429 DRAMPower power; 430 431 /** 432 * List of comamnds issued, to be sent to DRAMPpower at refresh 433 * and stats dump. Keep commands here since commands to different 434 * banks are added out of order. Will only pass commands up to 435 * curTick() to DRAMPower after sorting. 436 */ 437 std::vector<Command> cmdList; 438 439 /** 440 * Vector of Banks. Each rank is made of several devices which in 441 * term are made from several banks. 442 */ 443 std::vector<Bank> banks; 444 445 /** 446 * To track number of banks which are currently active for 447 * this rank. 448 */ 449 unsigned int numBanksActive; 450 451 /** List to keep track of activate ticks */ 452 std::deque<Tick> actTicks; 453 454 Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p); 455 456 const std::string name() const 457 { 458 return csprintf("%s_%d", memory.name(), rank); 459 } 460 461 /** 462 * Kick off accounting for power and refresh states and 463 * schedule initial refresh. 464 * 465 * @param ref_tick Tick for first refresh 466 */ 467 void startup(Tick ref_tick); 468 469 /** 470 * Stop the refresh events. 471 */ 472 void suspend(); 473 474 /** 475 * Check if the current rank is available for scheduling.
|
| 476 * Rank will be unavailable if refresh is ongoing. 477 * This includes refresh events explicitly scheduled from the the 478 * controller or memory initiated events which will occur during 479 * self-refresh mode.
|
380 * 381 * @param Return true if the rank is idle from a refresh point of view 382 */ 383 bool isAvailable() const { return refreshState == REF_IDLE; } 384 385 /** 386 * Check if the current rank has all banks closed and is not 387 * in a low power state 388 * 389 * @param Return true if the rank is idle from a bank 390 * and power point of view 391 */ 392 bool inPwrIdleState() const { return pwrState == PWR_IDLE; } 393 394 /**
| 480 * 481 * @param Return true if the rank is idle from a refresh point of view 482 */ 483 bool isAvailable() const { return refreshState == REF_IDLE; } 484 485 /** 486 * Check if the current rank has all banks closed and is not 487 * in a low power state 488 * 489 * @param Return true if the rank is idle from a bank 490 * and power point of view 491 */ 492 bool inPwrIdleState() const { return pwrState == PWR_IDLE; } 493 494 /**
|
| 495 * Trigger a self-refresh exit if there are entries enqueued 496 * Exit if there are any read entries regardless of the bus state. 497 * If we are currently issuing write commands, exit if we have any 498 * write commands enqueued as well. 499 * Could expand this in the future to analyze state of entire queue 500 * if needed. 501 * 502 * @return boolean indicating self-refresh exit should be scheduled 503 */ 504 bool forceSelfRefreshExit() const { 505 return (readEntries != 0) || 506 ((memory.busStateNext == WRITE) && (writeEntries != 0)); 507 } 508 509 /** 510 * Check if the current rank is idle and should enter a low-pwer state 511 * 512 * @param Return true if the there are no read commands in Q 513 * and there are no outstanding events 514 */ 515 bool lowPowerEntryReady() const; 516 517 /**
|
395 * Let the rank check if it was waiting for requests to drain 396 * to allow it to transition states. 397 */ 398 void checkDrainDone(); 399 400 /** 401 * Push command out of cmdList queue that are scheduled at 402 * or before curTick() to DRAMPower library 403 * All commands before curTick are guaranteed to be complete 404 * and can safely be flushed. 405 */ 406 void flushCmdList(); 407 408 /* 409 * Function to register Stats 410 */ 411 void regStats(); 412 413 /** 414 * Computes stats just prior to dump event 415 */ 416 void computeStats(); 417
| 518 * Let the rank check if it was waiting for requests to drain 519 * to allow it to transition states. 520 */ 521 void checkDrainDone(); 522 523 /** 524 * Push command out of cmdList queue that are scheduled at 525 * or before curTick() to DRAMPower library 526 * All commands before curTick are guaranteed to be complete 527 * and can safely be flushed. 528 */ 529 void flushCmdList(); 530 531 /* 532 * Function to register Stats 533 */ 534 void regStats(); 535 536 /** 537 * Computes stats just prior to dump event 538 */ 539 void computeStats(); 540
|
| 541 /** 542 * Schedule a transition to power-down (sleep) 543 * 544 * @param pwr_state Power state to transition to 545 * @param tick Absolute tick when transition should take place 546 */ 547 void powerDownSleep(PowerState pwr_state, Tick tick); 548 549 /** 550 * schedule and event to wake-up from power-down or self-refresh 551 * and update bank timing parameters 552 * 553 * @param exit_delay Relative tick defining the delay required between 554 * low-power exit and the next command 555 */ 556 void scheduleWakeUpEvent(Tick exit_delay); 557 558 void processWriteDoneEvent(); 559 EventWrapper<Rank, &Rank::processWriteDoneEvent> 560 writeDoneEvent; 561
|
418 void processActivateEvent(); 419 EventWrapper<Rank, &Rank::processActivateEvent> 420 activateEvent; 421 422 void processPrechargeEvent(); 423 EventWrapper<Rank, &Rank::processPrechargeEvent> 424 prechargeEvent; 425 426 void processRefreshEvent(); 427 EventWrapper<Rank, &Rank::processRefreshEvent> 428 refreshEvent; 429 430 void processPowerEvent(); 431 EventWrapper<Rank, &Rank::processPowerEvent> 432 powerEvent; 433
| 562 void processActivateEvent(); 563 EventWrapper<Rank, &Rank::processActivateEvent> 564 activateEvent; 565 566 void processPrechargeEvent(); 567 EventWrapper<Rank, &Rank::processPrechargeEvent> 568 prechargeEvent; 569 570 void processRefreshEvent(); 571 EventWrapper<Rank, &Rank::processRefreshEvent> 572 refreshEvent; 573 574 void processPowerEvent(); 575 EventWrapper<Rank, &Rank::processPowerEvent> 576 powerEvent; 577
|
| 578 void processWakeUpEvent(); 579 EventWrapper<Rank, &Rank::processWakeUpEvent> 580 wakeUpEvent; 581
|
434 }; 435 436 // define the process to compute stats on simulation exit 437 // defined per rank as the per rank stats are based on state 438 // transition and periodically updated, requiring re-sync at 439 // exit. 440 class RankDumpCallback : public Callback 441 { 442 Rank *ranks; 443 public: 444 RankDumpCallback(Rank *r) : ranks(r) {} 445 virtual void process() { ranks->computeStats(); }; 446 }; 447 448 /** 449 * A burst helper helps organize and manage a packet that is larger than 450 * the DRAM burst size. A system packet that is larger than the burst size 451 * is split into multiple DRAM packets and all those DRAM packets point to 452 * a single burst helper such that we know when the whole packet is served. 453 */ 454 class BurstHelper { 455 456 public: 457 458 /** Number of DRAM bursts requred for a system packet **/ 459 const unsigned int burstCount; 460 461 /** Number of DRAM bursts serviced so far for a system packet **/ 462 unsigned int burstsServiced; 463 464 BurstHelper(unsigned int _burstCount) 465 : burstCount(_burstCount), burstsServiced(0) 466 { } 467 }; 468 469 /** 470 * A DRAM packet stores packets along with the timestamp of when 471 * the packet entered the queue, and also the decoded address. 472 */ 473 class DRAMPacket { 474 475 public: 476 477 /** When did request enter the controller */ 478 const Tick entryTime; 479 480 /** When will request leave the controller */ 481 Tick readyTime; 482 483 /** This comes from the outside world */ 484 const PacketPtr pkt; 485 486 const bool isRead; 487 488 /** Will be populated by address decoder */ 489 const uint8_t rank; 490 const uint8_t bank; 491 const uint32_t row; 492 493 /** 494 * Bank id is calculated considering banks in all the ranks 495 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and 496 * bankId = 8 --> rank1, bank0 497 */ 498 const uint16_t bankId; 499 500 /** 501 * The starting address of the DRAM packet. 502 * This address could be unaligned to burst size boundaries. The 503 * reason is to keep the address offset so we can accurately check 504 * incoming read packets with packets in the write queue. 505 */ 506 Addr addr; 507 508 /** 509 * The size of this dram packet in bytes 510 * It is always equal or smaller than DRAM burst size 511 */ 512 unsigned int size; 513 514 /** 515 * A pointer to the BurstHelper if this DRAMPacket is a split packet 516 * If not a split packet (common case), this is set to NULL 517 */ 518 BurstHelper* burstHelper; 519 Bank& bankRef; 520 Rank& rankRef; 521 522 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, 523 uint32_t _row, uint16_t bank_id, Addr _addr, 524 unsigned int _size, Bank& bank_ref, Rank& rank_ref) 525 : entryTime(curTick()), readyTime(curTick()), 526 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), 527 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), 528 bankRef(bank_ref), rankRef(rank_ref) 529 { } 530 531 }; 532 533 /** 534 * Bunch of things requires to setup "events" in gem5 535 * When event "respondEvent" occurs for example, the method 536 * processRespondEvent is called; no parameters are allowed 537 * in these methods 538 */ 539 void processNextReqEvent(); 540 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent; 541 542 void processRespondEvent(); 543 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent; 544 545 /** 546 * Check if the read queue has room for more entries 547 * 548 * @param pktCount The number of entries needed in the read queue 549 * @return true if read queue is full, false otherwise 550 */ 551 bool readQueueFull(unsigned int pktCount) const; 552 553 /** 554 * Check if the write queue has room for more entries 555 * 556 * @param pktCount The number of entries needed in the write queue 557 * @return true if write queue is full, false otherwise 558 */ 559 bool writeQueueFull(unsigned int pktCount) const; 560 561 /** 562 * When a new read comes in, first check if the write q has a 563 * pending request to the same address.\ If not, decode the 564 * address to populate rank/bank/row, create one or mutliple 565 * "dram_pkt", and push them to the back of the read queue.\ 566 * If this is the only 567 * read request in the system, schedule an event to start 568 * servicing it. 569 * 570 * @param pkt The request packet from the outside world 571 * @param pktCount The number of DRAM bursts the pkt 572 * translate to. If pkt size is larger then one full burst, 573 * then pktCount is greater than one. 574 */ 575 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 576 577 /** 578 * Decode the incoming pkt, create a dram_pkt and push to the 579 * back of the write queue. \If the write q length is more than 580 * the threshold specified by the user, ie the queue is beginning 581 * to get full, stop reads, and start draining writes. 582 * 583 * @param pkt The request packet from the outside world 584 * @param pktCount The number of DRAM bursts the pkt 585 * translate to. If pkt size is larger then one full burst, 586 * then pktCount is greater than one. 587 */ 588 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 589 590 /** 591 * Actually do the DRAM access - figure out the latency it 592 * will take to service the req based on bank state, channel state etc 593 * and then update those states to account for this request.\ Based 594 * on this, update the packet's "readyTime" and move it to the 595 * response q from where it will eventually go back to the outside 596 * world. 597 * 598 * @param pkt The DRAM packet created from the outside world pkt 599 */ 600 void doDRAMAccess(DRAMPacket* dram_pkt); 601 602 /** 603 * When a packet reaches its "readyTime" in the response Q, 604 * use the "access()" method in AbstractMemory to actually 605 * create the response packet, and send it back to the outside 606 * world requestor. 607 * 608 * @param pkt The packet from the outside world 609 * @param static_latency Static latency to add before sending the packet 610 */ 611 void accessAndRespond(PacketPtr pkt, Tick static_latency); 612 613 /** 614 * Address decoder to figure out physical mapping onto ranks, 615 * banks, and rows. This function is called multiple times on the same 616 * system packet if the pakcet is larger than burst of the memory. The 617 * dramPktAddr is used for the offset within the packet. 618 * 619 * @param pkt The packet from the outside world 620 * @param dramPktAddr The starting address of the DRAM packet 621 * @param size The size of the DRAM packet in bytes 622 * @param isRead Is the request for a read or a write to DRAM 623 * @return A DRAMPacket pointer with the decoded information 624 */ 625 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, 626 bool isRead); 627 628 /** 629 * The memory schduler/arbiter - picks which request needs to 630 * go next, based on the specified policy such as FCFS or FR-FCFS 631 * and moves it to the head of the queue. 632 * Prioritizes accesses to the same rank as previous burst unless 633 * controller is switching command type. 634 * 635 * @param queue Queued requests to consider 636 * @param extra_col_delay Any extra delay due to a read/write switch 637 * @return true if a packet is scheduled to a rank which is available else 638 * false 639 */ 640 bool chooseNext(std::deque<DRAMPacket*>& queue, Tick extra_col_delay); 641 642 /** 643 * For FR-FCFS policy reorder the read/write queue depending on row buffer 644 * hits and earliest bursts available in DRAM 645 * 646 * @param queue Queued requests to consider 647 * @param extra_col_delay Any extra delay due to a read/write switch 648 * @return true if a packet is scheduled to a rank which is available else 649 * false 650 */ 651 bool reorderQueue(std::deque<DRAMPacket*>& queue, Tick extra_col_delay); 652 653 /** 654 * Find which are the earliest banks ready to issue an activate 655 * for the enqueued requests. Assumes maximum of 64 banks per DIMM 656 * Also checks if the bank is already prepped. 657 * 658 * @param queue Queued requests to consider 659 * @param time of seamless burst command 660 * @return One-hot encoded mask of bank indices 661 * @return boolean indicating burst can issue seamlessly, with no gaps 662 */ 663 std::pair<uint64_t, bool> minBankPrep(const std::deque<DRAMPacket*>& queue, 664 Tick min_col_at) const; 665 666 /** 667 * Keep track of when row activations happen, in order to enforce 668 * the maximum number of activations in the activation window. The 669 * method updates the time that the banks become available based 670 * on the current limits. 671 * 672 * @param rank_ref Reference to the rank 673 * @param bank_ref Reference to the bank 674 * @param act_tick Time when the activation takes place 675 * @param row Index of the row 676 */ 677 void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, 678 uint32_t row); 679 680 /** 681 * Precharge a given bank and also update when the precharge is 682 * done. This will also deal with any stats related to the 683 * accesses to the open page. 684 * 685 * @param rank_ref The rank to precharge 686 * @param bank_ref The bank to precharge 687 * @param pre_at Time when the precharge takes place 688 * @param trace Is this an auto precharge then do not add to trace 689 */ 690 void prechargeBank(Rank& rank_ref, Bank& bank_ref, 691 Tick pre_at, bool trace = true); 692 693 /** 694 * Used for debugging to observe the contents of the queues. 695 */ 696 void printQs() const; 697 698 /** 699 * Burst-align an address. 700 * 701 * @param addr The potentially unaligned address 702 * 703 * @return An address aligned to a DRAM burst 704 */ 705 Addr burstAlign(Addr addr) const { return (addr & ~(Addr(burstSize - 1))); } 706 707 /** 708 * The controller's main read and write queues 709 */ 710 std::deque<DRAMPacket*> readQueue; 711 std::deque<DRAMPacket*> writeQueue; 712 713 /** 714 * To avoid iterating over the write queue to check for 715 * overlapping transactions, maintain a set of burst addresses 716 * that are currently queued. Since we merge writes to the same 717 * location we never have more than one address to the same burst 718 * address. 719 */ 720 std::unordered_set<Addr> isInWriteQueue; 721 722 /** 723 * Response queue where read packets wait after we're done working 724 * with them, but it's not time to send the response yet. The 725 * responses are stored seperately mostly to keep the code clean 726 * and help with events scheduling. For all logical purposes such 727 * as sizing the read queue, this and the main read queue need to 728 * be added together. 729 */ 730 std::deque<DRAMPacket*> respQueue; 731 732 /** 733 * Vector of ranks 734 */ 735 std::vector<Rank*> ranks; 736 737 /** 738 * The following are basic design parameters of the memory 739 * controller, and are initialized based on parameter values. 740 * The rowsPerBank is determined based on the capacity, number of 741 * ranks and banks, the burst size, and the row buffer size. 742 */ 743 const uint32_t deviceSize; 744 const uint32_t deviceBusWidth; 745 const uint32_t burstLength; 746 const uint32_t deviceRowBufferSize; 747 const uint32_t devicesPerRank; 748 const uint32_t burstSize; 749 const uint32_t rowBufferSize; 750 const uint32_t columnsPerRowBuffer; 751 const uint32_t columnsPerStripe; 752 const uint32_t ranksPerChannel; 753 const uint32_t bankGroupsPerRank; 754 const bool bankGroupArch; 755 const uint32_t banksPerRank; 756 const uint32_t channels; 757 uint32_t rowsPerBank; 758 const uint32_t readBufferSize; 759 const uint32_t writeBufferSize; 760 const uint32_t writeHighThreshold; 761 const uint32_t writeLowThreshold; 762 const uint32_t minWritesPerSwitch; 763 uint32_t writesThisTime; 764 uint32_t readsThisTime; 765 766 /** 767 * Basic memory timing parameters initialized based on parameter 768 * values. 769 */ 770 const Tick M5_CLASS_VAR_USED tCK; 771 const Tick tWTR; 772 const Tick tRTW; 773 const Tick tCS; 774 const Tick tBURST; 775 const Tick tCCD_L; 776 const Tick tRCD; 777 const Tick tCL; 778 const Tick tRP; 779 const Tick tRAS; 780 const Tick tWR; 781 const Tick tRTP; 782 const Tick tRFC; 783 const Tick tREFI; 784 const Tick tRRD; 785 const Tick tRRD_L; 786 const Tick tXAW; 787 const Tick tXP; 788 const Tick tXS; 789 const uint32_t activationLimit; 790 791 /** 792 * Memory controller configuration initialized based on parameter 793 * values. 794 */ 795 Enums::MemSched memSchedPolicy; 796 Enums::AddrMap addrMapping; 797 Enums::PageManage pageMgmt; 798 799 /** 800 * Max column accesses (read and write) per row, before forefully 801 * closing it. 802 */ 803 const uint32_t maxAccessesPerRow; 804 805 /** 806 * Pipeline latency of the controller frontend. The frontend 807 * contribution is added to writes (that complete when they are in 808 * the write buffer) and reads that are serviced the write buffer. 809 */ 810 const Tick frontendLatency; 811 812 /** 813 * Pipeline latency of the backend and PHY. Along with the 814 * frontend contribution, this latency is added to reads serviced 815 * by the DRAM. 816 */ 817 const Tick backendLatency; 818 819 /** 820 * Till when has the main data bus been spoken for already? 821 */ 822 Tick busBusyUntil; 823 824 Tick prevArrival; 825 826 /** 827 * The soonest you have to start thinking about the next request 828 * is the longest access time that can occur before 829 * busBusyUntil. Assuming you need to precharge, open a new row, 830 * and access, it is tRP + tRCD + tCL. 831 */ 832 Tick nextReqTime; 833 834 // All statistics that the model needs to capture 835 Stats::Scalar readReqs; 836 Stats::Scalar writeReqs; 837 Stats::Scalar readBursts; 838 Stats::Scalar writeBursts; 839 Stats::Scalar bytesReadDRAM; 840 Stats::Scalar bytesReadWrQ; 841 Stats::Scalar bytesWritten; 842 Stats::Scalar bytesReadSys; 843 Stats::Scalar bytesWrittenSys; 844 Stats::Scalar servicedByWrQ; 845 Stats::Scalar mergedWrBursts; 846 Stats::Scalar neitherReadNorWrite; 847 Stats::Vector perBankRdBursts; 848 Stats::Vector perBankWrBursts; 849 Stats::Scalar numRdRetry; 850 Stats::Scalar numWrRetry; 851 Stats::Scalar totGap; 852 Stats::Vector readPktSize; 853 Stats::Vector writePktSize; 854 Stats::Vector rdQLenPdf; 855 Stats::Vector wrQLenPdf; 856 Stats::Histogram bytesPerActivate; 857 Stats::Histogram rdPerTurnAround; 858 Stats::Histogram wrPerTurnAround; 859 860 // Latencies summed over all requests 861 Stats::Scalar totQLat; 862 Stats::Scalar totMemAccLat; 863 Stats::Scalar totBusLat; 864 865 // Average latencies per request 866 Stats::Formula avgQLat; 867 Stats::Formula avgBusLat; 868 Stats::Formula avgMemAccLat; 869 870 // Average bandwidth 871 Stats::Formula avgRdBW; 872 Stats::Formula avgWrBW; 873 Stats::Formula avgRdBWSys; 874 Stats::Formula avgWrBWSys; 875 Stats::Formula peakBW; 876 Stats::Formula busUtil; 877 Stats::Formula busUtilRead; 878 Stats::Formula busUtilWrite; 879 880 // Average queue lengths 881 Stats::Average avgRdQLen; 882 Stats::Average avgWrQLen; 883 884 // Row hit count and rate 885 Stats::Scalar readRowHits; 886 Stats::Scalar writeRowHits; 887 Stats::Formula readRowHitRate; 888 Stats::Formula writeRowHitRate; 889 Stats::Formula avgGap; 890 891 // DRAM Power Calculation 892 Stats::Formula pageHitRate; 893 894 // Holds the value of the rank of burst issued 895 uint8_t activeRank; 896 897 // timestamp offset 898 uint64_t timeStampOffset; 899 900 /** 901 * Upstream caches need this packet until true is returned, so 902 * hold it for deletion until a subsequent call 903 */ 904 std::unique_ptr<Packet> pendingDelete; 905 906 /** 907 * This function increments the energy when called. If stats are 908 * dumped periodically, note accumulated energy values will 909 * appear in the stats (even if the stats are reset). This is a 910 * result of the energy values coming from DRAMPower, and there 911 * is currently no support for resetting the state. 912 * 913 * @param rank Currrent rank 914 */ 915 void updatePowerStats(Rank& rank_ref); 916 917 /** 918 * Function for sorting Command structures based on timeStamp 919 * 920 * @param a Memory Command 921 * @param next Memory Command 922 * @return true if timeStamp of Command 1 < timeStamp of Command 2 923 */ 924 static bool sortTime(const Command& cmd, const Command& cmd_next) { 925 return cmd.timeStamp < cmd_next.timeStamp; 926 }; 927 928 public: 929 930 void regStats() override; 931 932 DRAMCtrl(const DRAMCtrlParams* p); 933 934 DrainState drain() override; 935 936 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 937 PortID idx = InvalidPortID) override; 938 939 virtual void init() override; 940 virtual void startup() override; 941 virtual void drainResume() override; 942 943 /** 944 * Return true once refresh is complete for all ranks and there are no 945 * additional commands enqueued. (only evaluated when draining) 946 * This will ensure that all banks are closed, power state is IDLE, and 947 * power stats have been updated 948 * 949 * @return true if all ranks have refreshed, with no commands enqueued 950 * 951 */ 952 bool allRanksDrained() const; 953 954 protected: 955 956 Tick recvAtomic(PacketPtr pkt); 957 void recvFunctional(PacketPtr pkt); 958 bool recvTimingReq(PacketPtr pkt); 959 960}; 961 962#endif //__MEM_DRAM_CTRL_HH__
| 582 }; 583 584 // define the process to compute stats on simulation exit 585 // defined per rank as the per rank stats are based on state 586 // transition and periodically updated, requiring re-sync at 587 // exit. 588 class RankDumpCallback : public Callback 589 { 590 Rank *ranks; 591 public: 592 RankDumpCallback(Rank *r) : ranks(r) {} 593 virtual void process() { ranks->computeStats(); }; 594 }; 595 596 /** 597 * A burst helper helps organize and manage a packet that is larger than 598 * the DRAM burst size. A system packet that is larger than the burst size 599 * is split into multiple DRAM packets and all those DRAM packets point to 600 * a single burst helper such that we know when the whole packet is served. 601 */ 602 class BurstHelper { 603 604 public: 605 606 /** Number of DRAM bursts requred for a system packet **/ 607 const unsigned int burstCount; 608 609 /** Number of DRAM bursts serviced so far for a system packet **/ 610 unsigned int burstsServiced; 611 612 BurstHelper(unsigned int _burstCount) 613 : burstCount(_burstCount), burstsServiced(0) 614 { } 615 }; 616 617 /** 618 * A DRAM packet stores packets along with the timestamp of when 619 * the packet entered the queue, and also the decoded address. 620 */ 621 class DRAMPacket { 622 623 public: 624 625 /** When did request enter the controller */ 626 const Tick entryTime; 627 628 /** When will request leave the controller */ 629 Tick readyTime; 630 631 /** This comes from the outside world */ 632 const PacketPtr pkt; 633 634 const bool isRead; 635 636 /** Will be populated by address decoder */ 637 const uint8_t rank; 638 const uint8_t bank; 639 const uint32_t row; 640 641 /** 642 * Bank id is calculated considering banks in all the ranks 643 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and 644 * bankId = 8 --> rank1, bank0 645 */ 646 const uint16_t bankId; 647 648 /** 649 * The starting address of the DRAM packet. 650 * This address could be unaligned to burst size boundaries. The 651 * reason is to keep the address offset so we can accurately check 652 * incoming read packets with packets in the write queue. 653 */ 654 Addr addr; 655 656 /** 657 * The size of this dram packet in bytes 658 * It is always equal or smaller than DRAM burst size 659 */ 660 unsigned int size; 661 662 /** 663 * A pointer to the BurstHelper if this DRAMPacket is a split packet 664 * If not a split packet (common case), this is set to NULL 665 */ 666 BurstHelper* burstHelper; 667 Bank& bankRef; 668 Rank& rankRef; 669 670 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, 671 uint32_t _row, uint16_t bank_id, Addr _addr, 672 unsigned int _size, Bank& bank_ref, Rank& rank_ref) 673 : entryTime(curTick()), readyTime(curTick()), 674 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), 675 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), 676 bankRef(bank_ref), rankRef(rank_ref) 677 { } 678 679 }; 680 681 /** 682 * Bunch of things requires to setup "events" in gem5 683 * When event "respondEvent" occurs for example, the method 684 * processRespondEvent is called; no parameters are allowed 685 * in these methods 686 */ 687 void processNextReqEvent(); 688 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent; 689 690 void processRespondEvent(); 691 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent; 692 693 /** 694 * Check if the read queue has room for more entries 695 * 696 * @param pktCount The number of entries needed in the read queue 697 * @return true if read queue is full, false otherwise 698 */ 699 bool readQueueFull(unsigned int pktCount) const; 700 701 /** 702 * Check if the write queue has room for more entries 703 * 704 * @param pktCount The number of entries needed in the write queue 705 * @return true if write queue is full, false otherwise 706 */ 707 bool writeQueueFull(unsigned int pktCount) const; 708 709 /** 710 * When a new read comes in, first check if the write q has a 711 * pending request to the same address.\ If not, decode the 712 * address to populate rank/bank/row, create one or mutliple 713 * "dram_pkt", and push them to the back of the read queue.\ 714 * If this is the only 715 * read request in the system, schedule an event to start 716 * servicing it. 717 * 718 * @param pkt The request packet from the outside world 719 * @param pktCount The number of DRAM bursts the pkt 720 * translate to. If pkt size is larger then one full burst, 721 * then pktCount is greater than one. 722 */ 723 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 724 725 /** 726 * Decode the incoming pkt, create a dram_pkt and push to the 727 * back of the write queue. \If the write q length is more than 728 * the threshold specified by the user, ie the queue is beginning 729 * to get full, stop reads, and start draining writes. 730 * 731 * @param pkt The request packet from the outside world 732 * @param pktCount The number of DRAM bursts the pkt 733 * translate to. If pkt size is larger then one full burst, 734 * then pktCount is greater than one. 735 */ 736 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 737 738 /** 739 * Actually do the DRAM access - figure out the latency it 740 * will take to service the req based on bank state, channel state etc 741 * and then update those states to account for this request.\ Based 742 * on this, update the packet's "readyTime" and move it to the 743 * response q from where it will eventually go back to the outside 744 * world. 745 * 746 * @param pkt The DRAM packet created from the outside world pkt 747 */ 748 void doDRAMAccess(DRAMPacket* dram_pkt); 749 750 /** 751 * When a packet reaches its "readyTime" in the response Q, 752 * use the "access()" method in AbstractMemory to actually 753 * create the response packet, and send it back to the outside 754 * world requestor. 755 * 756 * @param pkt The packet from the outside world 757 * @param static_latency Static latency to add before sending the packet 758 */ 759 void accessAndRespond(PacketPtr pkt, Tick static_latency); 760 761 /** 762 * Address decoder to figure out physical mapping onto ranks, 763 * banks, and rows. This function is called multiple times on the same 764 * system packet if the pakcet is larger than burst of the memory. The 765 * dramPktAddr is used for the offset within the packet. 766 * 767 * @param pkt The packet from the outside world 768 * @param dramPktAddr The starting address of the DRAM packet 769 * @param size The size of the DRAM packet in bytes 770 * @param isRead Is the request for a read or a write to DRAM 771 * @return A DRAMPacket pointer with the decoded information 772 */ 773 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, 774 bool isRead); 775 776 /** 777 * The memory schduler/arbiter - picks which request needs to 778 * go next, based on the specified policy such as FCFS or FR-FCFS 779 * and moves it to the head of the queue. 780 * Prioritizes accesses to the same rank as previous burst unless 781 * controller is switching command type. 782 * 783 * @param queue Queued requests to consider 784 * @param extra_col_delay Any extra delay due to a read/write switch 785 * @return true if a packet is scheduled to a rank which is available else 786 * false 787 */ 788 bool chooseNext(std::deque<DRAMPacket*>& queue, Tick extra_col_delay); 789 790 /** 791 * For FR-FCFS policy reorder the read/write queue depending on row buffer 792 * hits and earliest bursts available in DRAM 793 * 794 * @param queue Queued requests to consider 795 * @param extra_col_delay Any extra delay due to a read/write switch 796 * @return true if a packet is scheduled to a rank which is available else 797 * false 798 */ 799 bool reorderQueue(std::deque<DRAMPacket*>& queue, Tick extra_col_delay); 800 801 /** 802 * Find which are the earliest banks ready to issue an activate 803 * for the enqueued requests. Assumes maximum of 64 banks per DIMM 804 * Also checks if the bank is already prepped. 805 * 806 * @param queue Queued requests to consider 807 * @param time of seamless burst command 808 * @return One-hot encoded mask of bank indices 809 * @return boolean indicating burst can issue seamlessly, with no gaps 810 */ 811 std::pair<uint64_t, bool> minBankPrep(const std::deque<DRAMPacket*>& queue, 812 Tick min_col_at) const; 813 814 /** 815 * Keep track of when row activations happen, in order to enforce 816 * the maximum number of activations in the activation window. The 817 * method updates the time that the banks become available based 818 * on the current limits. 819 * 820 * @param rank_ref Reference to the rank 821 * @param bank_ref Reference to the bank 822 * @param act_tick Time when the activation takes place 823 * @param row Index of the row 824 */ 825 void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, 826 uint32_t row); 827 828 /** 829 * Precharge a given bank and also update when the precharge is 830 * done. This will also deal with any stats related to the 831 * accesses to the open page. 832 * 833 * @param rank_ref The rank to precharge 834 * @param bank_ref The bank to precharge 835 * @param pre_at Time when the precharge takes place 836 * @param trace Is this an auto precharge then do not add to trace 837 */ 838 void prechargeBank(Rank& rank_ref, Bank& bank_ref, 839 Tick pre_at, bool trace = true); 840 841 /** 842 * Used for debugging to observe the contents of the queues. 843 */ 844 void printQs() const; 845 846 /** 847 * Burst-align an address. 848 * 849 * @param addr The potentially unaligned address 850 * 851 * @return An address aligned to a DRAM burst 852 */ 853 Addr burstAlign(Addr addr) const { return (addr & ~(Addr(burstSize - 1))); } 854 855 /** 856 * The controller's main read and write queues 857 */ 858 std::deque<DRAMPacket*> readQueue; 859 std::deque<DRAMPacket*> writeQueue; 860 861 /** 862 * To avoid iterating over the write queue to check for 863 * overlapping transactions, maintain a set of burst addresses 864 * that are currently queued. Since we merge writes to the same 865 * location we never have more than one address to the same burst 866 * address. 867 */ 868 std::unordered_set<Addr> isInWriteQueue; 869 870 /** 871 * Response queue where read packets wait after we're done working 872 * with them, but it's not time to send the response yet. The 873 * responses are stored seperately mostly to keep the code clean 874 * and help with events scheduling. For all logical purposes such 875 * as sizing the read queue, this and the main read queue need to 876 * be added together. 877 */ 878 std::deque<DRAMPacket*> respQueue; 879 880 /** 881 * Vector of ranks 882 */ 883 std::vector<Rank*> ranks; 884 885 /** 886 * The following are basic design parameters of the memory 887 * controller, and are initialized based on parameter values. 888 * The rowsPerBank is determined based on the capacity, number of 889 * ranks and banks, the burst size, and the row buffer size. 890 */ 891 const uint32_t deviceSize; 892 const uint32_t deviceBusWidth; 893 const uint32_t burstLength; 894 const uint32_t deviceRowBufferSize; 895 const uint32_t devicesPerRank; 896 const uint32_t burstSize; 897 const uint32_t rowBufferSize; 898 const uint32_t columnsPerRowBuffer; 899 const uint32_t columnsPerStripe; 900 const uint32_t ranksPerChannel; 901 const uint32_t bankGroupsPerRank; 902 const bool bankGroupArch; 903 const uint32_t banksPerRank; 904 const uint32_t channels; 905 uint32_t rowsPerBank; 906 const uint32_t readBufferSize; 907 const uint32_t writeBufferSize; 908 const uint32_t writeHighThreshold; 909 const uint32_t writeLowThreshold; 910 const uint32_t minWritesPerSwitch; 911 uint32_t writesThisTime; 912 uint32_t readsThisTime; 913 914 /** 915 * Basic memory timing parameters initialized based on parameter 916 * values. 917 */ 918 const Tick M5_CLASS_VAR_USED tCK; 919 const Tick tWTR; 920 const Tick tRTW; 921 const Tick tCS; 922 const Tick tBURST; 923 const Tick tCCD_L; 924 const Tick tRCD; 925 const Tick tCL; 926 const Tick tRP; 927 const Tick tRAS; 928 const Tick tWR; 929 const Tick tRTP; 930 const Tick tRFC; 931 const Tick tREFI; 932 const Tick tRRD; 933 const Tick tRRD_L; 934 const Tick tXAW; 935 const Tick tXP; 936 const Tick tXS; 937 const uint32_t activationLimit; 938 939 /** 940 * Memory controller configuration initialized based on parameter 941 * values. 942 */ 943 Enums::MemSched memSchedPolicy; 944 Enums::AddrMap addrMapping; 945 Enums::PageManage pageMgmt; 946 947 /** 948 * Max column accesses (read and write) per row, before forefully 949 * closing it. 950 */ 951 const uint32_t maxAccessesPerRow; 952 953 /** 954 * Pipeline latency of the controller frontend. The frontend 955 * contribution is added to writes (that complete when they are in 956 * the write buffer) and reads that are serviced the write buffer. 957 */ 958 const Tick frontendLatency; 959 960 /** 961 * Pipeline latency of the backend and PHY. Along with the 962 * frontend contribution, this latency is added to reads serviced 963 * by the DRAM. 964 */ 965 const Tick backendLatency; 966 967 /** 968 * Till when has the main data bus been spoken for already? 969 */ 970 Tick busBusyUntil; 971 972 Tick prevArrival; 973 974 /** 975 * The soonest you have to start thinking about the next request 976 * is the longest access time that can occur before 977 * busBusyUntil. Assuming you need to precharge, open a new row, 978 * and access, it is tRP + tRCD + tCL. 979 */ 980 Tick nextReqTime; 981 982 // All statistics that the model needs to capture 983 Stats::Scalar readReqs; 984 Stats::Scalar writeReqs; 985 Stats::Scalar readBursts; 986 Stats::Scalar writeBursts; 987 Stats::Scalar bytesReadDRAM; 988 Stats::Scalar bytesReadWrQ; 989 Stats::Scalar bytesWritten; 990 Stats::Scalar bytesReadSys; 991 Stats::Scalar bytesWrittenSys; 992 Stats::Scalar servicedByWrQ; 993 Stats::Scalar mergedWrBursts; 994 Stats::Scalar neitherReadNorWrite; 995 Stats::Vector perBankRdBursts; 996 Stats::Vector perBankWrBursts; 997 Stats::Scalar numRdRetry; 998 Stats::Scalar numWrRetry; 999 Stats::Scalar totGap; 1000 Stats::Vector readPktSize; 1001 Stats::Vector writePktSize; 1002 Stats::Vector rdQLenPdf; 1003 Stats::Vector wrQLenPdf; 1004 Stats::Histogram bytesPerActivate; 1005 Stats::Histogram rdPerTurnAround; 1006 Stats::Histogram wrPerTurnAround; 1007 1008 // Latencies summed over all requests 1009 Stats::Scalar totQLat; 1010 Stats::Scalar totMemAccLat; 1011 Stats::Scalar totBusLat; 1012 1013 // Average latencies per request 1014 Stats::Formula avgQLat; 1015 Stats::Formula avgBusLat; 1016 Stats::Formula avgMemAccLat; 1017 1018 // Average bandwidth 1019 Stats::Formula avgRdBW; 1020 Stats::Formula avgWrBW; 1021 Stats::Formula avgRdBWSys; 1022 Stats::Formula avgWrBWSys; 1023 Stats::Formula peakBW; 1024 Stats::Formula busUtil; 1025 Stats::Formula busUtilRead; 1026 Stats::Formula busUtilWrite; 1027 1028 // Average queue lengths 1029 Stats::Average avgRdQLen; 1030 Stats::Average avgWrQLen; 1031 1032 // Row hit count and rate 1033 Stats::Scalar readRowHits; 1034 Stats::Scalar writeRowHits; 1035 Stats::Formula readRowHitRate; 1036 Stats::Formula writeRowHitRate; 1037 Stats::Formula avgGap; 1038 1039 // DRAM Power Calculation 1040 Stats::Formula pageHitRate; 1041 1042 // Holds the value of the rank of burst issued 1043 uint8_t activeRank; 1044 1045 // timestamp offset 1046 uint64_t timeStampOffset; 1047 1048 /** 1049 * Upstream caches need this packet until true is returned, so 1050 * hold it for deletion until a subsequent call 1051 */ 1052 std::unique_ptr<Packet> pendingDelete; 1053 1054 /** 1055 * This function increments the energy when called. If stats are 1056 * dumped periodically, note accumulated energy values will 1057 * appear in the stats (even if the stats are reset). This is a 1058 * result of the energy values coming from DRAMPower, and there 1059 * is currently no support for resetting the state. 1060 * 1061 * @param rank Currrent rank 1062 */ 1063 void updatePowerStats(Rank& rank_ref); 1064 1065 /** 1066 * Function for sorting Command structures based on timeStamp 1067 * 1068 * @param a Memory Command 1069 * @param next Memory Command 1070 * @return true if timeStamp of Command 1 < timeStamp of Command 2 1071 */ 1072 static bool sortTime(const Command& cmd, const Command& cmd_next) { 1073 return cmd.timeStamp < cmd_next.timeStamp; 1074 }; 1075 1076 public: 1077 1078 void regStats() override; 1079 1080 DRAMCtrl(const DRAMCtrlParams* p); 1081 1082 DrainState drain() override; 1083 1084 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 1085 PortID idx = InvalidPortID) override; 1086 1087 virtual void init() override; 1088 virtual void startup() override; 1089 virtual void drainResume() override; 1090 1091 /** 1092 * Return true once refresh is complete for all ranks and there are no 1093 * additional commands enqueued. (only evaluated when draining) 1094 * This will ensure that all banks are closed, power state is IDLE, and 1095 * power stats have been updated 1096 * 1097 * @return true if all ranks have refreshed, with no commands enqueued 1098 * 1099 */ 1100 bool allRanksDrained() const; 1101 1102 protected: 1103 1104 Tick recvAtomic(PacketPtr pkt); 1105 void recvFunctional(PacketPtr pkt); 1106 bool recvTimingReq(PacketPtr pkt); 1107 1108}; 1109 1110#endif //__MEM_DRAM_CTRL_HH__
|