dram_ctrl.hh revision 10210:793e5ff26e0b
1/* 2 * Copyright (c) 2012-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2013 Amin Farmahini-Farahani 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Andreas Hansson 41 * Ani Udipi 42 * Neha Agarwal 43 */ 44 45/** 46 * @file 47 * DRAMCtrl declaration 48 */ 49 50#ifndef __MEM_DRAM_CTRL_HH__ 51#define __MEM_DRAM_CTRL_HH__ 52 53#include <deque> 54 55#include "base/statistics.hh" 56#include "enums/AddrMap.hh" 57#include "enums/MemSched.hh" 58#include "enums/PageManage.hh" 59#include "mem/abstract_mem.hh" 60#include "mem/qport.hh" 61#include "params/DRAMCtrl.hh" 62#include "sim/eventq.hh" 63 64/** 65 * The DRAM controller is a basic single-channel memory controller 66 * aiming to mimic a high-level DRAM controller and the most important 67 * timing constraints associated with the DRAM. The focus is really on 68 * modelling the impact on the system rather than the DRAM itself, 69 * hence the focus is on the controller model and not on the 70 * memory. By adhering to the correct timing constraints, ultimately 71 * there is no need for a memory model in addition to the controller 72 * model. 73 * 74 * As a basic design principle, this controller is not cycle callable, 75 * but instead uses events to decide when new decisions can be made, 76 * when resources become available, when things are to be considered 77 * done, and when to send things back. Through these simple 78 * principles, we achieve a performant model that is not 79 * cycle-accurate, but enables us to evaluate the system impact of a 80 * wide range of memory technologies, and also collect statistics 81 * about the use of the memory. 82 */ 83class DRAMCtrl : public AbstractMemory 84{ 85 86 private: 87 88 // For now, make use of a queued slave port to avoid dealing with 89 // flow control for the responses being sent back 90 class MemoryPort : public QueuedSlavePort 91 { 92 93 SlavePacketQueue queue; 94 DRAMCtrl& memory; 95 96 public: 97 98 MemoryPort(const std::string& name, DRAMCtrl& _memory); 99 100 protected: 101 102 Tick recvAtomic(PacketPtr pkt); 103 104 void recvFunctional(PacketPtr pkt); 105 106 bool recvTimingReq(PacketPtr); 107 108 virtual AddrRangeList getAddrRanges() const; 109 110 }; 111 112 /** 113 * Our incoming port, for a multi-ported controller add a crossbar 114 * in front of it 115 */ 116 MemoryPort port; 117 118 /** 119 * Remember if we have to retry a request when available. 120 */ 121 bool retryRdReq; 122 bool retryWrReq; 123 124 /** 125 * Remember that a row buffer hit occured 126 */ 127 bool rowHitFlag; 128 129 /** 130 * Bus state used to control the read/write switching and drive 131 * the scheduling of the next request. 132 */ 133 enum BusState { 134 READ = 0, 135 READ_TO_WRITE, 136 WRITE, 137 WRITE_TO_READ 138 }; 139 140 BusState busState; 141 142 /** List to keep track of activate ticks */ 143 std::vector<std::deque<Tick>> actTicks; 144 145 /** 146 * A basic class to track the bank state, i.e. what row is 147 * currently open (if any), when is the bank free to accept a new 148 * command, when can it be precharged, and when can it be 149 * activated. 150 * 151 * The bank also keeps track of how many bytes have been accessed 152 * in the open row since it was opened. 153 */ 154 class Bank 155 { 156 157 public: 158 159 static const uint32_t NO_ROW = -1; 160 161 uint32_t openRow; 162 163 Tick freeAt; 164 Tick preAllowedAt; 165 Tick actAllowedAt; 166 167 uint32_t rowAccesses; 168 uint32_t bytesAccessed; 169 170 Bank() : 171 openRow(NO_ROW), freeAt(0), preAllowedAt(0), actAllowedAt(0), 172 rowAccesses(0), bytesAccessed(0) 173 { } 174 }; 175 176 /** 177 * A burst helper helps organize and manage a packet that is larger than 178 * the DRAM burst size. A system packet that is larger than the burst size 179 * is split into multiple DRAM packets and all those DRAM packets point to 180 * a single burst helper such that we know when the whole packet is served. 181 */ 182 class BurstHelper { 183 184 public: 185 186 /** Number of DRAM bursts requred for a system packet **/ 187 const unsigned int burstCount; 188 189 /** Number of DRAM bursts serviced so far for a system packet **/ 190 unsigned int burstsServiced; 191 192 BurstHelper(unsigned int _burstCount) 193 : burstCount(_burstCount), burstsServiced(0) 194 { } 195 }; 196 197 /** 198 * A DRAM packet stores packets along with the timestamp of when 199 * the packet entered the queue, and also the decoded address. 200 */ 201 class DRAMPacket { 202 203 public: 204 205 /** When did request enter the controller */ 206 const Tick entryTime; 207 208 /** When will request leave the controller */ 209 Tick readyTime; 210 211 /** This comes from the outside world */ 212 const PacketPtr pkt; 213 214 const bool isRead; 215 216 /** Will be populated by address decoder */ 217 const uint8_t rank; 218 const uint8_t bank; 219 const uint16_t row; 220 221 /** 222 * Bank id is calculated considering banks in all the ranks 223 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and 224 * bankId = 8 --> rank1, bank0 225 */ 226 const uint16_t bankId; 227 228 /** 229 * The starting address of the DRAM packet. 230 * This address could be unaligned to burst size boundaries. The 231 * reason is to keep the address offset so we can accurately check 232 * incoming read packets with packets in the write queue. 233 */ 234 Addr addr; 235 236 /** 237 * The size of this dram packet in bytes 238 * It is always equal or smaller than DRAM burst size 239 */ 240 unsigned int size; 241 242 /** 243 * A pointer to the BurstHelper if this DRAMPacket is a split packet 244 * If not a split packet (common case), this is set to NULL 245 */ 246 BurstHelper* burstHelper; 247 Bank& bankRef; 248 249 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, 250 uint16_t _row, uint16_t bank_id, Addr _addr, 251 unsigned int _size, Bank& bank_ref) 252 : entryTime(curTick()), readyTime(curTick()), 253 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), 254 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), 255 bankRef(bank_ref) 256 { } 257 258 }; 259 260 /** 261 * Bunch of things requires to setup "events" in gem5 262 * When event "respondEvent" occurs for example, the method 263 * processRespondEvent is called; no parameters are allowed 264 * in these methods 265 */ 266 void processNextReqEvent(); 267 EventWrapper<DRAMCtrl,&DRAMCtrl::processNextReqEvent> nextReqEvent; 268 269 void processRespondEvent(); 270 EventWrapper<DRAMCtrl, &DRAMCtrl::processRespondEvent> respondEvent; 271 272 void processActivateEvent(); 273 EventWrapper<DRAMCtrl, &DRAMCtrl::processActivateEvent> activateEvent; 274 275 void processPrechargeEvent(); 276 EventWrapper<DRAMCtrl, &DRAMCtrl::processPrechargeEvent> prechargeEvent; 277 278 void processRefreshEvent(); 279 EventWrapper<DRAMCtrl, &DRAMCtrl::processRefreshEvent> refreshEvent; 280 281 void processPowerEvent(); 282 EventWrapper<DRAMCtrl,&DRAMCtrl::processPowerEvent> powerEvent; 283 284 /** 285 * Check if the read queue has room for more entries 286 * 287 * @param pktCount The number of entries needed in the read queue 288 * @return true if read queue is full, false otherwise 289 */ 290 bool readQueueFull(unsigned int pktCount) const; 291 292 /** 293 * Check if the write queue has room for more entries 294 * 295 * @param pktCount The number of entries needed in the write queue 296 * @return true if write queue is full, false otherwise 297 */ 298 bool writeQueueFull(unsigned int pktCount) const; 299 300 /** 301 * When a new read comes in, first check if the write q has a 302 * pending request to the same address.\ If not, decode the 303 * address to populate rank/bank/row, create one or mutliple 304 * "dram_pkt", and push them to the back of the read queue.\ 305 * If this is the only 306 * read request in the system, schedule an event to start 307 * servicing it. 308 * 309 * @param pkt The request packet from the outside world 310 * @param pktCount The number of DRAM bursts the pkt 311 * translate to. If pkt size is larger then one full burst, 312 * then pktCount is greater than one. 313 */ 314 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 315 316 /** 317 * Decode the incoming pkt, create a dram_pkt and push to the 318 * back of the write queue. \If the write q length is more than 319 * the threshold specified by the user, ie the queue is beginning 320 * to get full, stop reads, and start draining writes. 321 * 322 * @param pkt The request packet from the outside world 323 * @param pktCount The number of DRAM bursts the pkt 324 * translate to. If pkt size is larger then one full burst, 325 * then pktCount is greater than one. 326 */ 327 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 328 329 /** 330 * Actually do the DRAM access - figure out the latency it 331 * will take to service the req based on bank state, channel state etc 332 * and then update those states to account for this request.\ Based 333 * on this, update the packet's "readyTime" and move it to the 334 * response q from where it will eventually go back to the outside 335 * world. 336 * 337 * @param pkt The DRAM packet created from the outside world pkt 338 */ 339 void doDRAMAccess(DRAMPacket* dram_pkt); 340 341 /** 342 * When a packet reaches its "readyTime" in the response Q, 343 * use the "access()" method in AbstractMemory to actually 344 * create the response packet, and send it back to the outside 345 * world requestor. 346 * 347 * @param pkt The packet from the outside world 348 * @param static_latency Static latency to add before sending the packet 349 */ 350 void accessAndRespond(PacketPtr pkt, Tick static_latency); 351 352 /** 353 * Address decoder to figure out physical mapping onto ranks, 354 * banks, and rows. This function is called multiple times on the same 355 * system packet if the pakcet is larger than burst of the memory. The 356 * dramPktAddr is used for the offset within the packet. 357 * 358 * @param pkt The packet from the outside world 359 * @param dramPktAddr The starting address of the DRAM packet 360 * @param size The size of the DRAM packet in bytes 361 * @param isRead Is the request for a read or a write to DRAM 362 * @return A DRAMPacket pointer with the decoded information 363 */ 364 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, 365 bool isRead); 366 367 /** 368 * The memory schduler/arbiter - picks which request needs to 369 * go next, based on the specified policy such as FCFS or FR-FCFS 370 * and moves it to the head of the queue. 371 */ 372 void chooseNext(std::deque<DRAMPacket*>& queue); 373 374 /** 375 *Looks at the state of the banks, channels, row buffer hits etc 376 * to estimate how long a request will take to complete. 377 * 378 * @param dram_pkt The request for which we want to estimate latency 379 * @param inTime The tick at which you want to probe the memory 380 * 381 * @return A pair of ticks, one indicating how many ticks *after* 382 * inTime the request require, and the other indicating how 383 * much of that was just the bank access time, ignoring the 384 * ticks spent simply waiting for resources to become free 385 */ 386 std::pair<Tick, Tick> estimateLatency(DRAMPacket* dram_pkt, Tick inTime); 387 388 /** 389 * Move the request at the head of the read queue to the response 390 * queue, sorting by readyTime.\ If it is the only packet in the 391 * response queue, schedule a respond event to send it back to the 392 * outside world 393 */ 394 void moveToRespQ(); 395 396 /** 397 * For FR-FCFS policy reorder the read/write queue depending on row buffer 398 * hits and earliest banks available in DRAM 399 */ 400 void reorderQueue(std::deque<DRAMPacket*>& queue); 401 402 /** 403 * Find which are the earliest available banks for the enqueued 404 * requests. Assumes maximum of 64 banks per DIMM 405 * 406 * @param Queued requests to consider 407 * @return One-hot encoded mask of bank indices 408 */ 409 uint64_t minBankFreeAt(const std::deque<DRAMPacket*>& queue) const; 410 411 /** 412 * Keep track of when row activations happen, in order to enforce 413 * the maximum number of activations in the activation window. The 414 * method updates the time that the banks become available based 415 * on the current limits. 416 * 417 * @param act_tick Time when the activation takes place 418 * @param rank Index of the rank 419 * @param bank Index of the bank 420 * @param row Index of the row 421 * @param bank_ref Reference to the bank 422 */ 423 void activateBank(Tick act_tick, uint8_t rank, uint8_t bank, 424 uint16_t row, Bank& bank_ref); 425 426 /** 427 * Precharge a given bank and also update when the precharge is 428 * done. This will also deal with any stats related to the 429 * accesses to the open page. 430 * 431 * @param bank The bank to precharge 432 * @param pre_done_at Time when the precharge is done 433 */ 434 void prechargeBank(Bank& bank, Tick pre_done_at); 435 436 void printParams() const; 437 438 /** 439 * Used for debugging to observe the contents of the queues. 440 */ 441 void printQs() const; 442 443 /** 444 * The controller's main read and write queues 445 */ 446 std::deque<DRAMPacket*> readQueue; 447 std::deque<DRAMPacket*> writeQueue; 448 449 /** 450 * Response queue where read packets wait after we're done working 451 * with them, but it's not time to send the response yet. The 452 * responses are stored seperately mostly to keep the code clean 453 * and help with events scheduling. For all logical purposes such 454 * as sizing the read queue, this and the main read queue need to 455 * be added together. 456 */ 457 std::deque<DRAMPacket*> respQueue; 458 459 /** 460 * If we need to drain, keep the drain manager around until we're 461 * done here. 462 */ 463 DrainManager *drainManager; 464 465 /** 466 * Multi-dimensional vector of banks, first dimension is ranks, 467 * second is bank 468 */ 469 std::vector<std::vector<Bank> > banks; 470 471 /** 472 * The following are basic design parameters of the memory 473 * controller, and are initialized based on parameter values. 474 * The rowsPerBank is determined based on the capacity, number of 475 * ranks and banks, the burst size, and the row buffer size. 476 */ 477 const uint32_t deviceBusWidth; 478 const uint32_t burstLength; 479 const uint32_t deviceRowBufferSize; 480 const uint32_t devicesPerRank; 481 const uint32_t burstSize; 482 const uint32_t rowBufferSize; 483 const uint32_t columnsPerRowBuffer; 484 const uint32_t ranksPerChannel; 485 const uint32_t banksPerRank; 486 const uint32_t channels; 487 uint32_t rowsPerBank; 488 const uint32_t readBufferSize; 489 const uint32_t writeBufferSize; 490 const uint32_t writeHighThreshold; 491 const uint32_t writeLowThreshold; 492 const uint32_t minWritesPerSwitch; 493 uint32_t writesThisTime; 494 uint32_t readsThisTime; 495 496 /** 497 * Basic memory timing parameters initialized based on parameter 498 * values. 499 */ 500 const Tick tWTR; 501 const Tick tRTW; 502 const Tick tBURST; 503 const Tick tRCD; 504 const Tick tCL; 505 const Tick tRP; 506 const Tick tRAS; 507 const Tick tWR; 508 const Tick tRFC; 509 const Tick tREFI; 510 const Tick tRRD; 511 const Tick tXAW; 512 const uint32_t activationLimit; 513 514 /** 515 * Memory controller configuration initialized based on parameter 516 * values. 517 */ 518 Enums::MemSched memSchedPolicy; 519 Enums::AddrMap addrMapping; 520 Enums::PageManage pageMgmt; 521 522 /** 523 * Max column accesses (read and write) per row, before forefully 524 * closing it. 525 */ 526 const uint32_t maxAccessesPerRow; 527 528 /** 529 * Pipeline latency of the controller frontend. The frontend 530 * contribution is added to writes (that complete when they are in 531 * the write buffer) and reads that are serviced the write buffer. 532 */ 533 const Tick frontendLatency; 534 535 /** 536 * Pipeline latency of the backend and PHY. Along with the 537 * frontend contribution, this latency is added to reads serviced 538 * by the DRAM. 539 */ 540 const Tick backendLatency; 541 542 /** 543 * Till when has the main data bus been spoken for already? 544 */ 545 Tick busBusyUntil; 546 547 /** 548 * Keep track of when a refresh is due. 549 */ 550 Tick refreshDueAt; 551 552 /** 553 * The refresh state is used to control the progress of the 554 * refresh scheduling. When normal operation is in progress the 555 * refresh state is idle. From there, it progresses to the refresh 556 * drain state once tREFI has passed. The refresh drain state 557 * captures the DRAM row active state, as it will stay there until 558 * all ongoing accesses complete. Thereafter all banks are 559 * precharged, and lastly, the DRAM is refreshed. 560 */ 561 enum RefreshState { 562 REF_IDLE = 0, 563 REF_DRAIN, 564 REF_PRE, 565 REF_RUN 566 }; 567 568 RefreshState refreshState; 569 570 /** 571 * The power state captures the different operational states of 572 * the DRAM and interacts with the bus read/write state machine, 573 * and the refresh state machine. In the idle state all banks are 574 * precharged. From there we either go to an auto refresh (as 575 * determined by the refresh state machine), or to a precharge 576 * power down mode. From idle the memory can also go to the active 577 * state (with one or more banks active), and in turn from there 578 * to active power down. At the moment we do not capture the deep 579 * power down and self-refresh state. 580 */ 581 enum PowerState { 582 PWR_IDLE = 0, 583 PWR_REF, 584 PWR_PRE_PDN, 585 PWR_ACT, 586 PWR_ACT_PDN 587 }; 588 589 /** 590 * Since we are taking decisions out of order, we need to keep 591 * track of what power transition is happening at what time, such 592 * that we can go back in time and change history. For example, if 593 * we precharge all banks and schedule going to the idle state, we 594 * might at a later point decide to activate a bank before the 595 * transition to idle would have taken place. 596 */ 597 PowerState pwrStateTrans; 598 599 /** 600 * Current power state. 601 */ 602 PowerState pwrState; 603 604 /** 605 * Schedule a power state transition in the future, and 606 * potentially override an already scheduled transition. 607 * 608 * @param pwr_state Power state to transition to 609 * @param tick Tick when transition should take place 610 */ 611 void schedulePowerEvent(PowerState pwr_state, Tick tick); 612 613 Tick prevArrival; 614 615 /** 616 * The soonest you have to start thinking about the next request 617 * is the longest access time that can occur before 618 * busBusyUntil. Assuming you need to precharge, open a new row, 619 * and access, it is tRP + tRCD + tCL. 620 */ 621 Tick nextReqTime; 622 623 // All statistics that the model needs to capture 624 Stats::Scalar readReqs; 625 Stats::Scalar writeReqs; 626 Stats::Scalar readBursts; 627 Stats::Scalar writeBursts; 628 Stats::Scalar bytesReadDRAM; 629 Stats::Scalar bytesReadWrQ; 630 Stats::Scalar bytesWritten; 631 Stats::Scalar bytesReadSys; 632 Stats::Scalar bytesWrittenSys; 633 Stats::Scalar servicedByWrQ; 634 Stats::Scalar mergedWrBursts; 635 Stats::Scalar neitherReadNorWrite; 636 Stats::Vector perBankRdBursts; 637 Stats::Vector perBankWrBursts; 638 Stats::Scalar numRdRetry; 639 Stats::Scalar numWrRetry; 640 Stats::Scalar totGap; 641 Stats::Vector readPktSize; 642 Stats::Vector writePktSize; 643 Stats::Vector rdQLenPdf; 644 Stats::Vector wrQLenPdf; 645 Stats::Histogram bytesPerActivate; 646 Stats::Histogram rdPerTurnAround; 647 Stats::Histogram wrPerTurnAround; 648 649 // Latencies summed over all requests 650 Stats::Scalar totQLat; 651 Stats::Scalar totMemAccLat; 652 Stats::Scalar totBusLat; 653 Stats::Scalar totBankLat; 654 655 // Average latencies per request 656 Stats::Formula avgQLat; 657 Stats::Formula avgBankLat; 658 Stats::Formula avgBusLat; 659 Stats::Formula avgMemAccLat; 660 661 // Average bandwidth 662 Stats::Formula avgRdBW; 663 Stats::Formula avgWrBW; 664 Stats::Formula avgRdBWSys; 665 Stats::Formula avgWrBWSys; 666 Stats::Formula peakBW; 667 Stats::Formula busUtil; 668 Stats::Formula busUtilRead; 669 Stats::Formula busUtilWrite; 670 671 // Average queue lengths 672 Stats::Average avgRdQLen; 673 Stats::Average avgWrQLen; 674 675 // Row hit count and rate 676 Stats::Scalar readRowHits; 677 Stats::Scalar writeRowHits; 678 Stats::Formula readRowHitRate; 679 Stats::Formula writeRowHitRate; 680 Stats::Formula avgGap; 681 682 // DRAM Power Calculation 683 Stats::Formula pageHitRate; 684 Stats::Vector pwrStateTime; 685 686 // Track when we transitioned to the current power state 687 Tick pwrStateTick; 688 689 // To track number of banks which are currently active 690 unsigned int numBanksActive; 691 692 /** @todo this is a temporary workaround until the 4-phase code is 693 * committed. upstream caches needs this packet until true is returned, so 694 * hold onto it for deletion until a subsequent call 695 */ 696 std::vector<PacketPtr> pendingDelete; 697 698 public: 699 700 void regStats(); 701 702 DRAMCtrl(const DRAMCtrlParams* p); 703 704 unsigned int drain(DrainManager* dm); 705 706 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 707 PortID idx = InvalidPortID); 708 709 virtual void init(); 710 virtual void startup(); 711 712 protected: 713 714 Tick recvAtomic(PacketPtr pkt); 715 void recvFunctional(PacketPtr pkt); 716 bool recvTimingReq(PacketPtr pkt); 717 718}; 719 720#endif //__MEM_DRAM_CTRL_HH__ 721