dram_ctrl.hh revision 10394
1/* 2 * Copyright (c) 2012-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2013 Amin Farmahini-Farahani 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Andreas Hansson 41 * Ani Udipi 42 * Neha Agarwal 43 */ 44 45/** 46 * @file 47 * DRAMCtrl declaration 48 */ 49 50#ifndef __MEM_DRAM_CTRL_HH__ 51#define __MEM_DRAM_CTRL_HH__ 52 53#include <deque> 54 55#include "base/statistics.hh" 56#include "enums/AddrMap.hh" 57#include "enums/MemSched.hh" 58#include "enums/PageManage.hh" 59#include "mem/abstract_mem.hh" 60#include "mem/qport.hh" 61#include "params/DRAMCtrl.hh" 62#include "sim/eventq.hh" 63 64/** 65 * The DRAM controller is a single-channel memory controller capturing 66 * the most important timing constraints associated with a 67 * contemporary DRAM. For multi-channel memory systems, the controller 68 * is combined with a crossbar model, with the channel address 69 * interleaving taking part in the crossbar. 70 * 71 * As a basic design principle, this controller 72 * model is not cycle callable, but instead uses events to: 1) decide 73 * when new decisions can be made, 2) when resources become available, 74 * 3) when things are to be considered done, and 4) when to send 75 * things back. Through these simple principles, the model delivers 76 * high performance, and lots of flexibility, allowing users to 77 * evaluate the system impact of a wide range of memory technologies, 78 * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC. 79 * 80 * For more details, please see Hansson et al, "Simulating DRAM 81 * controllers for future system architecture exploration", 82 * Proc. ISPASS, 2014. If you use this model as part of your research 83 * please cite the paper. 84 */ 85class DRAMCtrl : public AbstractMemory 86{ 87 88 private: 89 90 // For now, make use of a queued slave port to avoid dealing with 91 // flow control for the responses being sent back 92 class MemoryPort : public QueuedSlavePort 93 { 94 95 SlavePacketQueue queue; 96 DRAMCtrl& memory; 97 98 public: 99 100 MemoryPort(const std::string& name, DRAMCtrl& _memory); 101 102 protected: 103 104 Tick recvAtomic(PacketPtr pkt); 105 106 void recvFunctional(PacketPtr pkt); 107 108 bool recvTimingReq(PacketPtr); 109 110 virtual AddrRangeList getAddrRanges() const; 111 112 }; 113 114 /** 115 * Our incoming port, for a multi-ported controller add a crossbar 116 * in front of it 117 */ 118 MemoryPort port; 119 120 /** 121 * Remember if we have to retry a request when available. 122 */ 123 bool retryRdReq; 124 bool retryWrReq; 125 126 /** 127 * Bus state used to control the read/write switching and drive 128 * the scheduling of the next request. 129 */ 130 enum BusState { 131 READ = 0, 132 READ_TO_WRITE, 133 WRITE, 134 WRITE_TO_READ 135 }; 136 137 BusState busState; 138 139 /** List to keep track of activate ticks */ 140 std::vector<std::deque<Tick>> actTicks; 141 142 /** 143 * A basic class to track the bank state, i.e. what row is 144 * currently open (if any), when is the bank free to accept a new 145 * column (read/write) command, when can it be precharged, and 146 * when can it be activated. 147 * 148 * The bank also keeps track of how many bytes have been accessed 149 * in the open row since it was opened. 150 */ 151 class Bank 152 { 153 154 public: 155 156 static const uint32_t NO_ROW = -1; 157 158 uint32_t openRow; 159 uint8_t rank; 160 uint8_t bank; 161 uint8_t bankgr; 162 163 Tick colAllowedAt; 164 Tick preAllowedAt; 165 Tick actAllowedAt; 166 167 uint32_t rowAccesses; 168 uint32_t bytesAccessed; 169 170 Bank() : 171 openRow(NO_ROW), rank(0), bank(0), bankgr(0), 172 colAllowedAt(0), preAllowedAt(0), actAllowedAt(0), 173 rowAccesses(0), bytesAccessed(0) 174 { } 175 }; 176 177 /** 178 * A burst helper helps organize and manage a packet that is larger than 179 * the DRAM burst size. A system packet that is larger than the burst size 180 * is split into multiple DRAM packets and all those DRAM packets point to 181 * a single burst helper such that we know when the whole packet is served. 182 */ 183 class BurstHelper { 184 185 public: 186 187 /** Number of DRAM bursts requred for a system packet **/ 188 const unsigned int burstCount; 189 190 /** Number of DRAM bursts serviced so far for a system packet **/ 191 unsigned int burstsServiced; 192 193 BurstHelper(unsigned int _burstCount) 194 : burstCount(_burstCount), burstsServiced(0) 195 { } 196 }; 197 198 /** 199 * A DRAM packet stores packets along with the timestamp of when 200 * the packet entered the queue, and also the decoded address. 201 */ 202 class DRAMPacket { 203 204 public: 205 206 /** When did request enter the controller */ 207 const Tick entryTime; 208 209 /** When will request leave the controller */ 210 Tick readyTime; 211 212 /** This comes from the outside world */ 213 const PacketPtr pkt; 214 215 const bool isRead; 216 217 /** Will be populated by address decoder */ 218 const uint8_t rank; 219 const uint8_t bank; 220 const uint32_t row; 221 222 /** 223 * Bank id is calculated considering banks in all the ranks 224 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and 225 * bankId = 8 --> rank1, bank0 226 */ 227 const uint16_t bankId; 228 229 /** 230 * The starting address of the DRAM packet. 231 * This address could be unaligned to burst size boundaries. The 232 * reason is to keep the address offset so we can accurately check 233 * incoming read packets with packets in the write queue. 234 */ 235 Addr addr; 236 237 /** 238 * The size of this dram packet in bytes 239 * It is always equal or smaller than DRAM burst size 240 */ 241 unsigned int size; 242 243 /** 244 * A pointer to the BurstHelper if this DRAMPacket is a split packet 245 * If not a split packet (common case), this is set to NULL 246 */ 247 BurstHelper* burstHelper; 248 Bank& bankRef; 249 250 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, 251 uint32_t _row, uint16_t bank_id, Addr _addr, 252 unsigned int _size, Bank& bank_ref) 253 : entryTime(curTick()), readyTime(curTick()), 254 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), 255 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), 256 bankRef(bank_ref) 257 { } 258 259 }; 260 261 /** 262 * Bunch of things requires to setup "events" in gem5 263 * When event "respondEvent" occurs for example, the method 264 * processRespondEvent is called; no parameters are allowed 265 * in these methods 266 */ 267 void processNextReqEvent(); 268 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent; 269 270 void processRespondEvent(); 271 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent; 272 273 void processActivateEvent(); 274 EventWrapper<DRAMCtrl, &DRAMCtrl::processActivateEvent> activateEvent; 275 276 void processPrechargeEvent(); 277 EventWrapper<DRAMCtrl, &DRAMCtrl::processPrechargeEvent> prechargeEvent; 278 279 void processRefreshEvent(); 280 EventWrapper<DRAMCtrl, &DRAMCtrl::processRefreshEvent> refreshEvent; 281 282 void processPowerEvent(); 283 EventWrapper<DRAMCtrl,&DRAMCtrl::processPowerEvent> powerEvent; 284 285 /** 286 * Check if the read queue has room for more entries 287 * 288 * @param pktCount The number of entries needed in the read queue 289 * @return true if read queue is full, false otherwise 290 */ 291 bool readQueueFull(unsigned int pktCount) const; 292 293 /** 294 * Check if the write queue has room for more entries 295 * 296 * @param pktCount The number of entries needed in the write queue 297 * @return true if write queue is full, false otherwise 298 */ 299 bool writeQueueFull(unsigned int pktCount) const; 300 301 /** 302 * When a new read comes in, first check if the write q has a 303 * pending request to the same address.\ If not, decode the 304 * address to populate rank/bank/row, create one or mutliple 305 * "dram_pkt", and push them to the back of the read queue.\ 306 * If this is the only 307 * read request in the system, schedule an event to start 308 * servicing it. 309 * 310 * @param pkt The request packet from the outside world 311 * @param pktCount The number of DRAM bursts the pkt 312 * translate to. If pkt size is larger then one full burst, 313 * then pktCount is greater than one. 314 */ 315 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 316 317 /** 318 * Decode the incoming pkt, create a dram_pkt and push to the 319 * back of the write queue. \If the write q length is more than 320 * the threshold specified by the user, ie the queue is beginning 321 * to get full, stop reads, and start draining writes. 322 * 323 * @param pkt The request packet from the outside world 324 * @param pktCount The number of DRAM bursts the pkt 325 * translate to. If pkt size is larger then one full burst, 326 * then pktCount is greater than one. 327 */ 328 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 329 330 /** 331 * Actually do the DRAM access - figure out the latency it 332 * will take to service the req based on bank state, channel state etc 333 * and then update those states to account for this request.\ Based 334 * on this, update the packet's "readyTime" and move it to the 335 * response q from where it will eventually go back to the outside 336 * world. 337 * 338 * @param pkt The DRAM packet created from the outside world pkt 339 */ 340 void doDRAMAccess(DRAMPacket* dram_pkt); 341 342 /** 343 * When a packet reaches its "readyTime" in the response Q, 344 * use the "access()" method in AbstractMemory to actually 345 * create the response packet, and send it back to the outside 346 * world requestor. 347 * 348 * @param pkt The packet from the outside world 349 * @param static_latency Static latency to add before sending the packet 350 */ 351 void accessAndRespond(PacketPtr pkt, Tick static_latency); 352 353 /** 354 * Address decoder to figure out physical mapping onto ranks, 355 * banks, and rows. This function is called multiple times on the same 356 * system packet if the pakcet is larger than burst of the memory. The 357 * dramPktAddr is used for the offset within the packet. 358 * 359 * @param pkt The packet from the outside world 360 * @param dramPktAddr The starting address of the DRAM packet 361 * @param size The size of the DRAM packet in bytes 362 * @param isRead Is the request for a read or a write to DRAM 363 * @return A DRAMPacket pointer with the decoded information 364 */ 365 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, 366 bool isRead); 367 368 /** 369 * The memory schduler/arbiter - picks which request needs to 370 * go next, based on the specified policy such as FCFS or FR-FCFS 371 * and moves it to the head of the queue. 372 * Prioritizes accesses to the same rank as previous burst unless 373 * controller is switching command type. 374 * 375 * @param queue Queued requests to consider 376 * @param switched_cmd_type Command type is changing 377 */ 378 void chooseNext(std::deque<DRAMPacket*>& queue, bool switched_cmd_type); 379 380 /** 381 * For FR-FCFS policy reorder the read/write queue depending on row buffer 382 * hits and earliest banks available in DRAM 383 * Prioritizes accesses to the same rank as previous burst unless 384 * controller is switching command type. 385 * 386 * @param queue Queued requests to consider 387 * @param switched_cmd_type Command type is changing 388 */ 389 void reorderQueue(std::deque<DRAMPacket*>& queue, bool switched_cmd_type); 390 391 /** 392 * Find which are the earliest banks ready to issue an activate 393 * for the enqueued requests. Assumes maximum of 64 banks per DIMM 394 * Also checks if the bank is already prepped. 395 * 396 * @param queue Queued requests to consider 397 * @param switched_cmd_type Command type is changing 398 * @return One-hot encoded mask of bank indices 399 */ 400 uint64_t minBankPrep(const std::deque<DRAMPacket*>& queue, 401 bool switched_cmd_type) const; 402 403 /** 404 * Keep track of when row activations happen, in order to enforce 405 * the maximum number of activations in the activation window. The 406 * method updates the time that the banks become available based 407 * on the current limits. 408 * 409 * @param bank Reference to the bank 410 * @param act_tick Time when the activation takes place 411 * @param row Index of the row 412 */ 413 void activateBank(Bank& bank, Tick act_tick, uint32_t row); 414 415 /** 416 * Precharge a given bank and also update when the precharge is 417 * done. This will also deal with any stats related to the 418 * accesses to the open page. 419 * 420 * @param bank_ref The bank to precharge 421 * @param pre_at Time when the precharge takes place 422 * @param trace Is this an auto precharge then do not add to trace 423 */ 424 void prechargeBank(Bank& bank_ref, Tick pre_at, bool trace = true); 425 426 /** 427 * Used for debugging to observe the contents of the queues. 428 */ 429 void printQs() const; 430 431 /** 432 * The controller's main read and write queues 433 */ 434 std::deque<DRAMPacket*> readQueue; 435 std::deque<DRAMPacket*> writeQueue; 436 437 /** 438 * Response queue where read packets wait after we're done working 439 * with them, but it's not time to send the response yet. The 440 * responses are stored seperately mostly to keep the code clean 441 * and help with events scheduling. For all logical purposes such 442 * as sizing the read queue, this and the main read queue need to 443 * be added together. 444 */ 445 std::deque<DRAMPacket*> respQueue; 446 447 /** 448 * If we need to drain, keep the drain manager around until we're 449 * done here. 450 */ 451 DrainManager *drainManager; 452 453 /** 454 * Multi-dimensional vector of banks, first dimension is ranks, 455 * second is bank 456 */ 457 std::vector<std::vector<Bank> > banks; 458 459 /** 460 * The following are basic design parameters of the memory 461 * controller, and are initialized based on parameter values. 462 * The rowsPerBank is determined based on the capacity, number of 463 * ranks and banks, the burst size, and the row buffer size. 464 */ 465 const uint32_t deviceBusWidth; 466 const uint32_t burstLength; 467 const uint32_t deviceRowBufferSize; 468 const uint32_t devicesPerRank; 469 const uint32_t burstSize; 470 const uint32_t rowBufferSize; 471 const uint32_t columnsPerRowBuffer; 472 const uint32_t columnsPerStripe; 473 const uint32_t ranksPerChannel; 474 const uint32_t bankGroupsPerRank; 475 const bool bankGroupArch; 476 const uint32_t banksPerRank; 477 const uint32_t channels; 478 uint32_t rowsPerBank; 479 const uint32_t readBufferSize; 480 const uint32_t writeBufferSize; 481 const uint32_t writeHighThreshold; 482 const uint32_t writeLowThreshold; 483 const uint32_t minWritesPerSwitch; 484 uint32_t writesThisTime; 485 uint32_t readsThisTime; 486 487 /** 488 * Basic memory timing parameters initialized based on parameter 489 * values. 490 */ 491 const Tick M5_CLASS_VAR_USED tCK; 492 const Tick tWTR; 493 const Tick tRTW; 494 const Tick tCS; 495 const Tick tBURST; 496 const Tick tCCD_L; 497 const Tick tRCD; 498 const Tick tCL; 499 const Tick tRP; 500 const Tick tRAS; 501 const Tick tWR; 502 const Tick tRTP; 503 const Tick tRFC; 504 const Tick tREFI; 505 const Tick tRRD; 506 const Tick tRRD_L; 507 const Tick tXAW; 508 const uint32_t activationLimit; 509 510 /** 511 * Memory controller configuration initialized based on parameter 512 * values. 513 */ 514 Enums::MemSched memSchedPolicy; 515 Enums::AddrMap addrMapping; 516 Enums::PageManage pageMgmt; 517 518 /** 519 * Max column accesses (read and write) per row, before forefully 520 * closing it. 521 */ 522 const uint32_t maxAccessesPerRow; 523 524 /** 525 * Pipeline latency of the controller frontend. The frontend 526 * contribution is added to writes (that complete when they are in 527 * the write buffer) and reads that are serviced the write buffer. 528 */ 529 const Tick frontendLatency; 530 531 /** 532 * Pipeline latency of the backend and PHY. Along with the 533 * frontend contribution, this latency is added to reads serviced 534 * by the DRAM. 535 */ 536 const Tick backendLatency; 537 538 /** 539 * Till when has the main data bus been spoken for already? 540 */ 541 Tick busBusyUntil; 542 543 /** 544 * Keep track of when a refresh is due. 545 */ 546 Tick refreshDueAt; 547 548 /** 549 * The refresh state is used to control the progress of the 550 * refresh scheduling. When normal operation is in progress the 551 * refresh state is idle. From there, it progresses to the refresh 552 * drain state once tREFI has passed. The refresh drain state 553 * captures the DRAM row active state, as it will stay there until 554 * all ongoing accesses complete. Thereafter all banks are 555 * precharged, and lastly, the DRAM is refreshed. 556 */ 557 enum RefreshState { 558 REF_IDLE = 0, 559 REF_DRAIN, 560 REF_PRE, 561 REF_RUN 562 }; 563 564 RefreshState refreshState; 565 566 /** 567 * The power state captures the different operational states of 568 * the DRAM and interacts with the bus read/write state machine, 569 * and the refresh state machine. In the idle state all banks are 570 * precharged. From there we either go to an auto refresh (as 571 * determined by the refresh state machine), or to a precharge 572 * power down mode. From idle the memory can also go to the active 573 * state (with one or more banks active), and in turn from there 574 * to active power down. At the moment we do not capture the deep 575 * power down and self-refresh state. 576 */ 577 enum PowerState { 578 PWR_IDLE = 0, 579 PWR_REF, 580 PWR_PRE_PDN, 581 PWR_ACT, 582 PWR_ACT_PDN 583 }; 584 585 /** 586 * Since we are taking decisions out of order, we need to keep 587 * track of what power transition is happening at what time, such 588 * that we can go back in time and change history. For example, if 589 * we precharge all banks and schedule going to the idle state, we 590 * might at a later point decide to activate a bank before the 591 * transition to idle would have taken place. 592 */ 593 PowerState pwrStateTrans; 594 595 /** 596 * Current power state. 597 */ 598 PowerState pwrState; 599 600 /** 601 * Schedule a power state transition in the future, and 602 * potentially override an already scheduled transition. 603 * 604 * @param pwr_state Power state to transition to 605 * @param tick Tick when transition should take place 606 */ 607 void schedulePowerEvent(PowerState pwr_state, Tick tick); 608 609 Tick prevArrival; 610 611 /** 612 * The soonest you have to start thinking about the next request 613 * is the longest access time that can occur before 614 * busBusyUntil. Assuming you need to precharge, open a new row, 615 * and access, it is tRP + tRCD + tCL. 616 */ 617 Tick nextReqTime; 618 619 // All statistics that the model needs to capture 620 Stats::Scalar readReqs; 621 Stats::Scalar writeReqs; 622 Stats::Scalar readBursts; 623 Stats::Scalar writeBursts; 624 Stats::Scalar bytesReadDRAM; 625 Stats::Scalar bytesReadWrQ; 626 Stats::Scalar bytesWritten; 627 Stats::Scalar bytesReadSys; 628 Stats::Scalar bytesWrittenSys; 629 Stats::Scalar servicedByWrQ; 630 Stats::Scalar mergedWrBursts; 631 Stats::Scalar neitherReadNorWrite; 632 Stats::Vector perBankRdBursts; 633 Stats::Vector perBankWrBursts; 634 Stats::Scalar numRdRetry; 635 Stats::Scalar numWrRetry; 636 Stats::Scalar totGap; 637 Stats::Vector readPktSize; 638 Stats::Vector writePktSize; 639 Stats::Vector rdQLenPdf; 640 Stats::Vector wrQLenPdf; 641 Stats::Histogram bytesPerActivate; 642 Stats::Histogram rdPerTurnAround; 643 Stats::Histogram wrPerTurnAround; 644 645 // Latencies summed over all requests 646 Stats::Scalar totQLat; 647 Stats::Scalar totMemAccLat; 648 Stats::Scalar totBusLat; 649 650 // Average latencies per request 651 Stats::Formula avgQLat; 652 Stats::Formula avgBusLat; 653 Stats::Formula avgMemAccLat; 654 655 // Average bandwidth 656 Stats::Formula avgRdBW; 657 Stats::Formula avgWrBW; 658 Stats::Formula avgRdBWSys; 659 Stats::Formula avgWrBWSys; 660 Stats::Formula peakBW; 661 Stats::Formula busUtil; 662 Stats::Formula busUtilRead; 663 Stats::Formula busUtilWrite; 664 665 // Average queue lengths 666 Stats::Average avgRdQLen; 667 Stats::Average avgWrQLen; 668 669 // Row hit count and rate 670 Stats::Scalar readRowHits; 671 Stats::Scalar writeRowHits; 672 Stats::Formula readRowHitRate; 673 Stats::Formula writeRowHitRate; 674 Stats::Formula avgGap; 675 676 // DRAM Power Calculation 677 Stats::Formula pageHitRate; 678 Stats::Vector pwrStateTime; 679 680 // Track when we transitioned to the current power state 681 Tick pwrStateTick; 682 683 // To track number of banks which are currently active 684 unsigned int numBanksActive; 685 686 // Holds the value of the rank of burst issued 687 uint8_t activeRank; 688 689 /** @todo this is a temporary workaround until the 4-phase code is 690 * committed. upstream caches needs this packet until true is returned, so 691 * hold onto it for deletion until a subsequent call 692 */ 693 std::vector<PacketPtr> pendingDelete; 694 695 public: 696 697 void regStats(); 698 699 DRAMCtrl(const DRAMCtrlParams* p); 700 701 unsigned int drain(DrainManager* dm); 702 703 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 704 PortID idx = InvalidPortID); 705 706 virtual void init(); 707 virtual void startup(); 708 709 protected: 710 711 Tick recvAtomic(PacketPtr pkt); 712 void recvFunctional(PacketPtr pkt); 713 bool recvTimingReq(PacketPtr pkt); 714 715}; 716 717#endif //__MEM_DRAM_CTRL_HH__ 718