dram_ctrl.hh revision 9974
1/* 2 * Copyright (c) 2012 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2013 Amin Farmahini-Farahani 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Andreas Hansson 41 * Ani Udipi 42 * Neha Agarwal 43 */ 44 45/** 46 * @file 47 * SimpleDRAM declaration 48 */ 49 50#ifndef __MEM_SIMPLE_DRAM_HH__ 51#define __MEM_SIMPLE_DRAM_HH__ 52 53#include <deque> 54 55#include "base/statistics.hh" 56#include "enums/AddrMap.hh" 57#include "enums/MemSched.hh" 58#include "enums/PageManage.hh" 59#include "mem/abstract_mem.hh" 60#include "mem/qport.hh" 61#include "params/SimpleDRAM.hh" 62#include "sim/eventq.hh" 63 64/** 65 * The simple DRAM is a basic single-channel memory controller aiming 66 * to mimic a high-level DRAM controller and the most important timing 67 * constraints associated with the DRAM. The focus is really on 68 * modelling the impact on the system rather than the DRAM itself, 69 * hence the focus is on the controller model and not on the 70 * memory. By adhering to the correct timing constraints, ultimately 71 * there is no need for a memory model in addition to the controller 72 * model. 73 * 74 * As a basic design principle, this controller is not cycle callable, 75 * but instead uses events to decide when new decisions can be made, 76 * when resources become available, when things are to be considered 77 * done, and when to send things back. Through these simple 78 * principles, we achieve a performant model that is not 79 * cycle-accurate, but enables us to evaluate the system impact of a 80 * wide range of memory technologies, and also collect statistics 81 * about the use of the memory. 82 */ 83class SimpleDRAM : public AbstractMemory 84{ 85 86 private: 87 88 // For now, make use of a queued slave port to avoid dealing with 89 // flow control for the responses being sent back 90 class MemoryPort : public QueuedSlavePort 91 { 92 93 SlavePacketQueue queue; 94 SimpleDRAM& memory; 95 96 public: 97 98 MemoryPort(const std::string& name, SimpleDRAM& _memory); 99 100 protected: 101 102 Tick recvAtomic(PacketPtr pkt); 103 104 void recvFunctional(PacketPtr pkt); 105 106 bool recvTimingReq(PacketPtr); 107 108 virtual AddrRangeList getAddrRanges() const; 109 110 }; 111 112 /** 113 * Our incoming port, for a multi-ported controller add a crossbar 114 * in front of it 115 */ 116 MemoryPort port; 117 118 /** 119 * Remember if we have to retry a request when available. 120 */ 121 bool retryRdReq; 122 bool retryWrReq; 123 124 /** 125 * Remember that a row buffer hit occured 126 */ 127 bool rowHitFlag; 128 129 /** 130 * Use this flag to shutoff reads, i.e. do not schedule any reads 131 * beyond those already done so that we can turn the bus around 132 * and do a few writes, or refresh, or whatever 133 */ 134 bool stopReads; 135 136 /** List to keep track of activate ticks */ 137 std::vector<std::deque<Tick>> actTicks; 138 139 /** 140 * A basic class to track the bank state indirectly via times 141 * "freeAt" and "tRASDoneAt" and what page is currently open. The 142 * bank also keeps track of how many bytes have been accessed in 143 * the open row since it was opened. 144 */ 145 class Bank 146 { 147 148 public: 149 150 static const uint32_t INVALID_ROW = -1; 151 152 uint32_t openRow; 153 154 Tick freeAt; 155 Tick tRASDoneAt; 156 Tick actAllowedAt; 157 158 uint32_t bytesAccessed; 159 160 Bank() : 161 openRow(INVALID_ROW), freeAt(0), tRASDoneAt(0), actAllowedAt(0), 162 bytesAccessed(0) 163 { } 164 }; 165 166 /** 167 * A burst helper helps organize and manage a packet that is larger than 168 * the DRAM burst size. A system packet that is larger than the burst size 169 * is split into multiple DRAM packets and all those DRAM packets point to 170 * a single burst helper such that we know when the whole packet is served. 171 */ 172 class BurstHelper { 173 174 public: 175 176 /** Number of DRAM bursts requred for a system packet **/ 177 const unsigned int burstCount; 178 179 /** Number of DRAM bursts serviced so far for a system packet **/ 180 unsigned int burstsServiced; 181 182 BurstHelper(unsigned int _burstCount) 183 : burstCount(_burstCount), burstsServiced(0) 184 { } 185 }; 186 187 /** 188 * A DRAM packet stores packets along with the timestamp of when 189 * the packet entered the queue, and also the decoded address. 190 */ 191 class DRAMPacket { 192 193 public: 194 195 /** When did request enter the controller */ 196 const Tick entryTime; 197 198 /** When will request leave the controller */ 199 Tick readyTime; 200 201 /** This comes from the outside world */ 202 const PacketPtr pkt; 203 204 const bool isRead; 205 206 /** Will be populated by address decoder */ 207 const uint8_t rank; 208 const uint8_t bank; 209 const uint16_t row; 210 211 /** 212 * Bank id is calculated considering banks in all the ranks 213 * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and 214 * bankId = 8 --> rank1, bank0 215 */ 216 const uint16_t bankId; 217 218 /** 219 * The starting address of the DRAM packet. 220 * This address could be unaligned to burst size boundaries. The 221 * reason is to keep the address offset so we can accurately check 222 * incoming read packets with packets in the write queue. 223 */ 224 Addr addr; 225 226 /** 227 * The size of this dram packet in bytes 228 * It is always equal or smaller than DRAM burst size 229 */ 230 unsigned int size; 231 232 /** 233 * A pointer to the BurstHelper if this DRAMPacket is a split packet 234 * If not a split packet (common case), this is set to NULL 235 */ 236 BurstHelper* burstHelper; 237 Bank& bankRef; 238 239 DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, 240 uint16_t _row, uint16_t bank_id, Addr _addr, 241 unsigned int _size, Bank& bank_ref) 242 : entryTime(curTick()), readyTime(curTick()), 243 pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), 244 bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), 245 bankRef(bank_ref) 246 { } 247 248 }; 249 250 /** 251 * Bunch of things requires to setup "events" in gem5 252 * When event "writeEvent" occurs for example, the method 253 * processWriteEvent is called; no parameters are allowed 254 * in these methods 255 */ 256 void processWriteEvent(); 257 EventWrapper<SimpleDRAM, &SimpleDRAM::processWriteEvent> writeEvent; 258 259 void processRespondEvent(); 260 EventWrapper<SimpleDRAM, &SimpleDRAM::processRespondEvent> respondEvent; 261 262 void processRefreshEvent(); 263 EventWrapper<SimpleDRAM, &SimpleDRAM::processRefreshEvent> refreshEvent; 264 265 void processNextReqEvent(); 266 EventWrapper<SimpleDRAM,&SimpleDRAM::processNextReqEvent> nextReqEvent; 267 268 269 /** 270 * Check if the read queue has room for more entries 271 * 272 * @param pktCount The number of entries needed in the read queue 273 * @return true if read queue is full, false otherwise 274 */ 275 bool readQueueFull(unsigned int pktCount) const; 276 277 /** 278 * Check if the write queue has room for more entries 279 * 280 * @param pktCount The number of entries needed in the write queue 281 * @return true if write queue is full, false otherwise 282 */ 283 bool writeQueueFull(unsigned int pktCount) const; 284 285 /** 286 * When a new read comes in, first check if the write q has a 287 * pending request to the same address.\ If not, decode the 288 * address to populate rank/bank/row, create one or mutliple 289 * "dram_pkt", and push them to the back of the read queue.\ 290 * If this is the only 291 * read request in the system, schedule an event to start 292 * servicing it. 293 * 294 * @param pkt The request packet from the outside world 295 * @param pktCount The number of DRAM bursts the pkt 296 * translate to. If pkt size is larger then one full burst, 297 * then pktCount is greater than one. 298 */ 299 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 300 301 /** 302 * Decode the incoming pkt, create a dram_pkt and push to the 303 * back of the write queue. \If the write q length is more than 304 * the threshold specified by the user, ie the queue is beginning 305 * to get full, stop reads, and start draining writes. 306 * 307 * @param pkt The request packet from the outside world 308 * @param pktCount The number of DRAM bursts the pkt 309 * translate to. If pkt size is larger then one full burst, 310 * then pktCount is greater than one. 311 */ 312 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 313 314 /** 315 * Actually do the DRAM access - figure out the latency it 316 * will take to service the req based on bank state, channel state etc 317 * and then update those states to account for this request.\ Based 318 * on this, update the packet's "readyTime" and move it to the 319 * response q from where it will eventually go back to the outside 320 * world. 321 * 322 * @param pkt The DRAM packet created from the outside world pkt 323 */ 324 void doDRAMAccess(DRAMPacket* dram_pkt); 325 326 /** 327 * Check when the channel is free to turnaround, add turnaround 328 * delay and schedule a whole bunch of writes. 329 */ 330 void triggerWrites(); 331 332 /** 333 * When a packet reaches its "readyTime" in the response Q, 334 * use the "access()" method in AbstractMemory to actually 335 * create the response packet, and send it back to the outside 336 * world requestor. 337 * 338 * @param pkt The packet from the outside world 339 * @param static_latency Static latency to add before sending the packet 340 */ 341 void accessAndRespond(PacketPtr pkt, Tick static_latency); 342 343 /** 344 * Address decoder to figure out physical mapping onto ranks, 345 * banks, and rows. This function is called multiple times on the same 346 * system packet if the pakcet is larger than burst of the memory. The 347 * dramPktAddr is used for the offset within the packet. 348 * 349 * @param pkt The packet from the outside world 350 * @param dramPktAddr The starting address of the DRAM packet 351 * @param size The size of the DRAM packet in bytes 352 * @param isRead Is the request for a read or a write to DRAM 353 * @return A DRAMPacket pointer with the decoded information 354 */ 355 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, bool isRead); 356 357 /** 358 * The memory schduler/arbiter - picks which read request needs to 359 * go next, based on the specified policy such as FCFS or FR-FCFS 360 * and moves it to the head of the read queue. 361 * 362 * @return True if a request was chosen and false if queue is empty 363 */ 364 bool chooseNextRead(); 365 366 /** 367 * Calls chooseNextReq() to pick the right request, then calls 368 * doDRAMAccess on that request in order to actually service 369 * that request 370 */ 371 void scheduleNextReq(); 372 373 /** 374 *Looks at the state of the banks, channels, row buffer hits etc 375 * to estimate how long a request will take to complete. 376 * 377 * @param dram_pkt The request for which we want to estimate latency 378 * @param inTime The tick at which you want to probe the memory 379 * 380 * @return A pair of ticks, one indicating how many ticks *after* 381 * inTime the request require, and the other indicating how 382 * much of that was just the bank access time, ignoring the 383 * ticks spent simply waiting for resources to become free 384 */ 385 std::pair<Tick, Tick> estimateLatency(DRAMPacket* dram_pkt, Tick inTime); 386 387 /** 388 * Move the request at the head of the read queue to the response 389 * queue, sorting by readyTime.\ If it is the only packet in the 390 * response queue, schedule a respond event to send it back to the 391 * outside world 392 */ 393 void moveToRespQ(); 394 395 /** 396 * Scheduling policy within the write queue 397 */ 398 void chooseNextWrite(); 399 400 /** 401 * For FR-FCFS policy reorder the read/write queue depending on row buffer 402 * hits and earliest banks available in DRAM 403 */ 404 void reorderQueue(std::deque<DRAMPacket*>& queue); 405 406 /** 407 * Looking at all banks, determine the moment in time when they 408 * are all free. 409 * 410 * @return The tick when all banks are free 411 */ 412 Tick maxBankFreeAt() const; 413 414 /** 415 * Find which are the earliest available banks for the enqueued 416 * requests. Assumes maximum of 64 banks per DIMM 417 * 418 * @param Queued requests to consider 419 * @return One-hot encoded mask of bank indices 420 */ 421 uint64_t minBankFreeAt(const std::deque<DRAMPacket*>& queue) const; 422 423 /** 424 * Keep track of when row activations happen, in order to enforce 425 * the maximum number of activations in the activation window. The 426 * method updates the time that the banks become available based 427 * on the current limits. 428 */ 429 void recordActivate(Tick act_tick, uint8_t rank, uint8_t bank); 430 431 void printParams() const; 432 void printQs() const; 433 434 /** 435 * The controller's main read and write queues 436 */ 437 std::deque<DRAMPacket*> readQueue; 438 std::deque<DRAMPacket*> writeQueue; 439 440 /** 441 * Response queue where read packets wait after we're done working 442 * with them, but it's not time to send the response yet. The 443 * responses are stored seperately mostly to keep the code clean 444 * and help with events scheduling. For all logical purposes such 445 * as sizing the read queue, this and the main read queue need to 446 * be added together. 447 */ 448 std::deque<DRAMPacket*> respQueue; 449 450 /** 451 * If we need to drain, keep the drain manager around until we're 452 * done here. 453 */ 454 DrainManager *drainManager; 455 456 /** 457 * Multi-dimensional vector of banks, first dimension is ranks, 458 * second is bank 459 */ 460 std::vector<std::vector<Bank> > banks; 461 462 /** 463 * The following are basic design parameters of the memory 464 * controller, and are initialized based on parameter values. 465 * The rowsPerBank is determined based on the capacity, number of 466 * ranks and banks, the burst size, and the row buffer size. 467 */ 468 const uint32_t deviceBusWidth; 469 const uint32_t burstLength; 470 const uint32_t deviceRowBufferSize; 471 const uint32_t devicesPerRank; 472 const uint32_t burstSize; 473 const uint32_t rowBufferSize; 474 const uint32_t ranksPerChannel; 475 const uint32_t banksPerRank; 476 const uint32_t channels; 477 uint32_t rowsPerBank; 478 uint32_t columnsPerRowBuffer; 479 const uint32_t readBufferSize; 480 const uint32_t writeBufferSize; 481 const double writeHighThresholdPerc; 482 uint32_t writeHighThreshold; 483 const double writeLowThresholdPerc; 484 uint32_t writeLowThreshold; 485 486 /** 487 * Basic memory timing parameters initialized based on parameter 488 * values. 489 */ 490 const Tick tWTR; 491 const Tick tBURST; 492 const Tick tRCD; 493 const Tick tCL; 494 const Tick tRP; 495 const Tick tRAS; 496 const Tick tRFC; 497 const Tick tREFI; 498 const Tick tRRD; 499 const Tick tXAW; 500 const uint32_t activationLimit; 501 502 /** 503 * Memory controller configuration initialized based on parameter 504 * values. 505 */ 506 Enums::MemSched memSchedPolicy; 507 Enums::AddrMap addrMapping; 508 Enums::PageManage pageMgmt; 509 510 /** 511 * Pipeline latency of the controller frontend. The frontend 512 * contribution is added to writes (that complete when they are in 513 * the write buffer) and reads that are serviced the write buffer. 514 */ 515 const Tick frontendLatency; 516 517 /** 518 * Pipeline latency of the backend and PHY. Along with the 519 * frontend contribution, this latency is added to reads serviced 520 * by the DRAM. 521 */ 522 const Tick backendLatency; 523 524 /** 525 * Till when has the main data bus been spoken for already? 526 */ 527 Tick busBusyUntil; 528 529 Tick writeStartTime; 530 Tick prevArrival; 531 int numReqs; 532 533 // Tracks number of writes done to meet the write threshold 534 uint32_t numWritesThisTime; 535 536 // The absolute soonest you have to start thinking about the 537 // next request is the longest access time that can occur before 538 // busBusyUntil. Assuming you need to precharge, 539 // open a new row, and access, it is tRP + tRCD + tCL 540 Tick newTime; 541 542 // All statistics that the model needs to capture 543 Stats::Scalar readReqs; 544 Stats::Scalar writeReqs; 545 Stats::Scalar readBursts; 546 Stats::Scalar writeBursts; 547 Stats::Scalar bytesRead; 548 Stats::Scalar bytesWritten; 549 Stats::Scalar bytesConsumedRd; 550 Stats::Scalar bytesConsumedWr; 551 Stats::Scalar servicedByWrQ; 552 Stats::Scalar neitherReadNorWrite; 553 Stats::Vector perBankRdReqs; 554 Stats::Vector perBankWrReqs; 555 Stats::Scalar numRdRetry; 556 Stats::Scalar numWrRetry; 557 Stats::Scalar totGap; 558 Stats::Vector readPktSize; 559 Stats::Vector writePktSize; 560 Stats::Vector rdQLenPdf; 561 Stats::Vector wrQLenPdf; 562 Stats::Histogram bytesPerActivate; 563 564 // Latencies summed over all requests 565 Stats::Scalar totQLat; 566 Stats::Scalar totMemAccLat; 567 Stats::Scalar totBusLat; 568 Stats::Scalar totBankLat; 569 570 // Average latencies per request 571 Stats::Formula avgQLat; 572 Stats::Formula avgBankLat; 573 Stats::Formula avgBusLat; 574 Stats::Formula avgMemAccLat; 575 576 // Average bandwidth 577 Stats::Formula avgRdBW; 578 Stats::Formula avgWrBW; 579 Stats::Formula avgConsumedRdBW; 580 Stats::Formula avgConsumedWrBW; 581 Stats::Formula peakBW; 582 Stats::Formula busUtil; 583 584 // Average queue lengths 585 Stats::Average avgRdQLen; 586 Stats::Average avgWrQLen; 587 588 // Row hit count and rate 589 Stats::Scalar readRowHits; 590 Stats::Scalar writeRowHits; 591 Stats::Formula readRowHitRate; 592 Stats::Formula writeRowHitRate; 593 Stats::Formula avgGap; 594 595 /** @todo this is a temporary workaround until the 4-phase code is 596 * committed. upstream caches needs this packet until true is returned, so 597 * hold onto it for deletion until a subsequent call 598 */ 599 std::vector<PacketPtr> pendingDelete; 600 601 public: 602 603 void regStats(); 604 605 SimpleDRAM(const SimpleDRAMParams* p); 606 607 unsigned int drain(DrainManager* dm); 608 609 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 610 PortID idx = InvalidPortID); 611 612 virtual void init(); 613 virtual void startup(); 614 615 protected: 616 617 Tick recvAtomic(PacketPtr pkt); 618 void recvFunctional(PacketPtr pkt); 619 bool recvTimingReq(PacketPtr pkt); 620 621}; 622 623#endif //__MEM_SIMPLE_DRAM_HH__ 624