dram_ctrl.hh revision 9966
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2012 ARM Limited 311308Santhony.gutierrez@amd.com * All rights reserved 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * The license below extends only to copyright in the software and shall 611308Santhony.gutierrez@amd.com * not be construed as granting a license to any other intellectual 711308Santhony.gutierrez@amd.com * property including but not limited to intellectual property relating 811308Santhony.gutierrez@amd.com * to a hardware implementation of the functionality of the software 911308Santhony.gutierrez@amd.com * licensed hereunder. You may use the software subject to the license 1011308Santhony.gutierrez@amd.com * terms below provided that you ensure that this notice is replicated 1111308Santhony.gutierrez@amd.com * unmodified and in its entirety in all distributions of the software, 1211308Santhony.gutierrez@amd.com * modified or unmodified, in source code or in binary form. 1311308Santhony.gutierrez@amd.com * 1411308Santhony.gutierrez@amd.com * Copyright (c) 2013 Amin Farmahini-Farahani 1511308Santhony.gutierrez@amd.com * All rights reserved. 1611308Santhony.gutierrez@amd.com * 1712697Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 1812697Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are 1912697Santhony.gutierrez@amd.com * met: redistributions of source code must retain the above copyright 2011308Santhony.gutierrez@amd.com * notice, this list of conditions and the following disclaimer; 2111308Santhony.gutierrez@amd.com * redistributions in binary form must reproduce the above copyright 2211308Santhony.gutierrez@amd.com * notice, this list of conditions and the following disclaimer in the 2311308Santhony.gutierrez@amd.com * documentation and/or other materials provided with the distribution; 2411308Santhony.gutierrez@amd.com * neither the name of the copyright holders nor the names of its 2511308Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from 2611308Santhony.gutierrez@amd.com * this software without specific prior written permission. 2711308Santhony.gutierrez@amd.com * 2811308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2911308Santhony.gutierrez@amd.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 3011308Santhony.gutierrez@amd.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 3111308Santhony.gutierrez@amd.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 3211308Santhony.gutierrez@amd.com * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3312697Santhony.gutierrez@amd.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3411308Santhony.gutierrez@amd.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3511308Santhony.gutierrez@amd.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3611308Santhony.gutierrez@amd.com * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3711696Santhony.gutierrez@amd.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3811308Santhony.gutierrez@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3911308Santhony.gutierrez@amd.com * 4011714Santhony.gutierrez@amd.com * Authors: Andreas Hansson 4111308Santhony.gutierrez@amd.com * Ani Udipi 4211308Santhony.gutierrez@amd.com */ 4311308Santhony.gutierrez@amd.com 4411308Santhony.gutierrez@amd.com/** 4511308Santhony.gutierrez@amd.com * @file 4611308Santhony.gutierrez@amd.com * SimpleDRAM declaration 4711308Santhony.gutierrez@amd.com */ 4811308Santhony.gutierrez@amd.com 4911308Santhony.gutierrez@amd.com#ifndef __MEM_SIMPLE_DRAM_HH__ 5011308Santhony.gutierrez@amd.com#define __MEM_SIMPLE_DRAM_HH__ 5111308Santhony.gutierrez@amd.com 5211308Santhony.gutierrez@amd.com#include <deque> 5311308Santhony.gutierrez@amd.com 5411308Santhony.gutierrez@amd.com#include "base/statistics.hh" 5511696Santhony.gutierrez@amd.com#include "enums/AddrMap.hh" 5611696Santhony.gutierrez@amd.com#include "enums/MemSched.hh" 5711696Santhony.gutierrez@amd.com#include "enums/PageManage.hh" 5811696Santhony.gutierrez@amd.com#include "mem/abstract_mem.hh" 5911714Santhony.gutierrez@amd.com#include "mem/qport.hh" 6011714Santhony.gutierrez@amd.com#include "params/SimpleDRAM.hh" 6111696Santhony.gutierrez@amd.com#include "sim/eventq.hh" 6211696Santhony.gutierrez@amd.com 6311696Santhony.gutierrez@amd.com/** 6411696Santhony.gutierrez@amd.com * The simple DRAM is a basic single-channel memory controller aiming 6511696Santhony.gutierrez@amd.com * to mimic a high-level DRAM controller and the most important timing 6611714Santhony.gutierrez@amd.com * constraints associated with the DRAM. The focus is really on 6711714Santhony.gutierrez@amd.com * modelling the impact on the system rather than the DRAM itself, 6811696Santhony.gutierrez@amd.com * hence the focus is on the controller model and not on the 69 * memory. By adhering to the correct timing constraints, ultimately 70 * there is no need for a memory model in addition to the controller 71 * model. 72 * 73 * As a basic design principle, this controller is not cycle callable, 74 * but instead uses events to decide when new decisions can be made, 75 * when resources become available, when things are to be considered 76 * done, and when to send things back. Through these simple 77 * principles, we achieve a performant model that is not 78 * cycle-accurate, but enables us to evaluate the system impact of a 79 * wide range of memory technologies, and also collect statistics 80 * about the use of the memory. 81 */ 82class SimpleDRAM : public AbstractMemory 83{ 84 85 private: 86 87 // For now, make use of a queued slave port to avoid dealing with 88 // flow control for the responses being sent back 89 class MemoryPort : public QueuedSlavePort 90 { 91 92 SlavePacketQueue queue; 93 SimpleDRAM& memory; 94 95 public: 96 97 MemoryPort(const std::string& name, SimpleDRAM& _memory); 98 99 protected: 100 101 Tick recvAtomic(PacketPtr pkt); 102 103 void recvFunctional(PacketPtr pkt); 104 105 bool recvTimingReq(PacketPtr); 106 107 virtual AddrRangeList getAddrRanges() const; 108 109 }; 110 111 /** 112 * Our incoming port, for a multi-ported controller add a crossbar 113 * in front of it 114 */ 115 MemoryPort port; 116 117 /** 118 * Remember if we have to retry a request when available. 119 */ 120 bool retryRdReq; 121 bool retryWrReq; 122 123 /** 124 * Remember that a row buffer hit occured 125 */ 126 bool rowHitFlag; 127 128 /** 129 * Use this flag to shutoff reads, i.e. do not schedule any reads 130 * beyond those already done so that we can turn the bus around 131 * and do a few writes, or refresh, or whatever 132 */ 133 bool stopReads; 134 135 /** List to keep track of activate ticks */ 136 std::deque<Tick> actTicks; 137 138 /** 139 * A basic class to track the bank state indirectly via times 140 * "freeAt" and "tRASDoneAt" and what page is currently open. The 141 * bank also keeps track of how many bytes have been accessed in 142 * the open row since it was opened. 143 */ 144 class Bank 145 { 146 147 public: 148 149 static const uint32_t INVALID_ROW = -1; 150 151 uint32_t openRow; 152 153 Tick freeAt; 154 Tick tRASDoneAt; 155 156 uint32_t bytesAccessed; 157 158 Bank() : 159 openRow(INVALID_ROW), freeAt(0), tRASDoneAt(0), bytesAccessed(0) 160 { } 161 }; 162 163 /** 164 * A burst helper helps organize and manage a packet that is larger than 165 * the DRAM burst size. A system packet that is larger than the burst size 166 * is split into multiple DRAM packets and all those DRAM packets point to 167 * a single burst helper such that we know when the whole packet is served. 168 */ 169 class BurstHelper { 170 171 public: 172 173 /** Number of DRAM bursts requred for a system packet **/ 174 const unsigned int burstCount; 175 176 /** Number of DRAM bursts serviced so far for a system packet **/ 177 unsigned int burstsServiced; 178 179 BurstHelper(unsigned int _burstCount) 180 : burstCount(_burstCount), burstsServiced(0) 181 { } 182 }; 183 184 /** 185 * A DRAM packet stores packets along with the timestamp of when 186 * the packet entered the queue, and also the decoded address. 187 */ 188 class DRAMPacket { 189 190 public: 191 192 /** When did request enter the controller */ 193 const Tick entryTime; 194 195 /** When will request leave the controller */ 196 Tick readyTime; 197 198 /** This comes from the outside world */ 199 const PacketPtr pkt; 200 201 const bool isRead; 202 203 /** Will be populated by address decoder */ 204 const uint8_t rank; 205 const uint16_t bank; 206 const uint16_t row; 207 208 /** 209 * The starting address of the DRAM packet. 210 * This address could be unaligned to burst size boundaries. The 211 * reason is to keep the address offset so we can accurately check 212 * incoming read packets with packets in the write queue. 213 */ 214 Addr addr; 215 216 /** 217 * The size of this dram packet in bytes 218 * It is always equal or smaller than DRAM burst size 219 */ 220 unsigned int size; 221 222 /** 223 * A pointer to the BurstHelper if this DRAMPacket is a split packet 224 * If not a split packet (common case), this is set to NULL 225 */ 226 BurstHelper* burstHelper; 227 Bank& bank_ref; 228 229 DRAMPacket(PacketPtr _pkt, bool _isRead, uint8_t _rank, uint16_t _bank, 230 uint16_t _row, Addr _addr, unsigned int _size, 231 Bank& _bank_ref) 232 : entryTime(curTick()), readyTime(curTick()), 233 pkt(_pkt), isRead(_isRead), rank(_rank), bank(_bank), row(_row), 234 addr(_addr), size(_size), burstHelper(NULL), bank_ref(_bank_ref) 235 { } 236 237 }; 238 239 /** 240 * Bunch of things requires to setup "events" in gem5 241 * When event "writeEvent" occurs for example, the method 242 * processWriteEvent is called; no parameters are allowed 243 * in these methods 244 */ 245 void processWriteEvent(); 246 EventWrapper<SimpleDRAM, &SimpleDRAM::processWriteEvent> writeEvent; 247 248 void processRespondEvent(); 249 EventWrapper<SimpleDRAM, &SimpleDRAM::processRespondEvent> respondEvent; 250 251 void processRefreshEvent(); 252 EventWrapper<SimpleDRAM, &SimpleDRAM::processRefreshEvent> refreshEvent; 253 254 void processNextReqEvent(); 255 EventWrapper<SimpleDRAM,&SimpleDRAM::processNextReqEvent> nextReqEvent; 256 257 258 /** 259 * Check if the read queue has room for more entries 260 * 261 * @param pktCount The number of entries needed in the read queue 262 * @return true if read queue is full, false otherwise 263 */ 264 bool readQueueFull(unsigned int pktCount) const; 265 266 /** 267 * Check if the write queue has room for more entries 268 * 269 * @param pktCount The number of entries needed in the write queue 270 * @return true if write queue is full, false otherwise 271 */ 272 bool writeQueueFull(unsigned int pktCount) const; 273 274 /** 275 * When a new read comes in, first check if the write q has a 276 * pending request to the same address.\ If not, decode the 277 * address to populate rank/bank/row, create one or mutliple 278 * "dram_pkt", and push them to the back of the read queue.\ 279 * If this is the only 280 * read request in the system, schedule an event to start 281 * servicing it. 282 * 283 * @param pkt The request packet from the outside world 284 * @param pktCount The number of DRAM bursts the pkt 285 * translate to. If pkt size is larger then one full burst, 286 * then pktCount is greater than one. 287 */ 288 void addToReadQueue(PacketPtr pkt, unsigned int pktCount); 289 290 /** 291 * Decode the incoming pkt, create a dram_pkt and push to the 292 * back of the write queue. \If the write q length is more than 293 * the threshold specified by the user, ie the queue is beginning 294 * to get full, stop reads, and start draining writes. 295 * 296 * @param pkt The request packet from the outside world 297 * @param pktCount The number of DRAM bursts the pkt 298 * translate to. If pkt size is larger then one full burst, 299 * then pktCount is greater than one. 300 */ 301 void addToWriteQueue(PacketPtr pkt, unsigned int pktCount); 302 303 /** 304 * Actually do the DRAM access - figure out the latency it 305 * will take to service the req based on bank state, channel state etc 306 * and then update those states to account for this request.\ Based 307 * on this, update the packet's "readyTime" and move it to the 308 * response q from where it will eventually go back to the outside 309 * world. 310 * 311 * @param pkt The DRAM packet created from the outside world pkt 312 */ 313 void doDRAMAccess(DRAMPacket* dram_pkt); 314 315 /** 316 * Check when the channel is free to turnaround, add turnaround 317 * delay and schedule a whole bunch of writes. 318 */ 319 void triggerWrites(); 320 321 /** 322 * When a packet reaches its "readyTime" in the response Q, 323 * use the "access()" method in AbstractMemory to actually 324 * create the response packet, and send it back to the outside 325 * world requestor. 326 * 327 * @param pkt The packet from the outside world 328 * @param static_latency Static latency to add before sending the packet 329 */ 330 void accessAndRespond(PacketPtr pkt, Tick static_latency); 331 332 /** 333 * Address decoder to figure out physical mapping onto ranks, 334 * banks, and rows. This function is called multiple times on the same 335 * system packet if the pakcet is larger than burst of the memory. The 336 * dramPktAddr is used for the offset within the packet. 337 * 338 * @param pkt The packet from the outside world 339 * @param dramPktAddr The starting address of the DRAM packet 340 * @param size The size of the DRAM packet in bytes 341 * @param isRead Is the request for a read or a write to DRAM 342 * @return A DRAMPacket pointer with the decoded information 343 */ 344 DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size, bool isRead); 345 346 /** 347 * The memory schduler/arbiter - picks which read request needs to 348 * go next, based on the specified policy such as FCFS or FR-FCFS 349 * and moves it to the head of the read queue. 350 * 351 * @return True if a request was chosen and false if queue is empty 352 */ 353 bool chooseNextRead(); 354 355 /** 356 * Calls chooseNextReq() to pick the right request, then calls 357 * doDRAMAccess on that request in order to actually service 358 * that request 359 */ 360 void scheduleNextReq(); 361 362 /** 363 *Looks at the state of the banks, channels, row buffer hits etc 364 * to estimate how long a request will take to complete. 365 * 366 * @param dram_pkt The request for which we want to estimate latency 367 * @param inTime The tick at which you want to probe the memory 368 * 369 * @return A pair of ticks, one indicating how many ticks *after* 370 * inTime the request require, and the other indicating how 371 * much of that was just the bank access time, ignoring the 372 * ticks spent simply waiting for resources to become free 373 */ 374 std::pair<Tick, Tick> estimateLatency(DRAMPacket* dram_pkt, Tick inTime); 375 376 /** 377 * Move the request at the head of the read queue to the response 378 * queue, sorting by readyTime.\ If it is the only packet in the 379 * response queue, schedule a respond event to send it back to the 380 * outside world 381 */ 382 void moveToRespQ(); 383 384 /** 385 * Scheduling policy within the write queue 386 */ 387 void chooseNextWrite(); 388 389 /** 390 * Looking at all banks, determine the moment in time when they 391 * are all free. 392 * 393 * @return The tick when all banks are free 394 */ 395 Tick maxBankFreeAt() const; 396 397 398 /** 399 * Keep track of when row activations happen, in order to enforce 400 * the maximum number of activations in the activation window. The 401 * method updates the time that the banks become available based 402 * on the current limits. 403 */ 404 void recordActivate(Tick act_tick); 405 406 void printParams() const; 407 void printQs() const; 408 409 /** 410 * The controller's main read and write queues 411 */ 412 std::deque<DRAMPacket*> readQueue; 413 std::deque<DRAMPacket*> writeQueue; 414 415 /** 416 * Response queue where read packets wait after we're done working 417 * with them, but it's not time to send the response yet. The 418 * responses are stored seperately mostly to keep the code clean 419 * and help with events scheduling. For all logical purposes such 420 * as sizing the read queue, this and the main read queue need to 421 * be added together. 422 */ 423 std::deque<DRAMPacket*> respQueue; 424 425 /** 426 * If we need to drain, keep the drain manager around until we're 427 * done here. 428 */ 429 DrainManager *drainManager; 430 431 /** 432 * Multi-dimensional vector of banks, first dimension is ranks, 433 * second is bank 434 */ 435 std::vector<std::vector<Bank> > banks; 436 437 /** 438 * The following are basic design parameters of the memory 439 * controller, and are initialized based on parameter values. 440 * The rowsPerBank is determined based on the capacity, number of 441 * ranks and banks, the burst size, and the row buffer size. 442 */ 443 const uint32_t deviceBusWidth; 444 const uint32_t burstLength; 445 const uint32_t deviceRowBufferSize; 446 const uint32_t devicesPerRank; 447 const uint32_t burstSize; 448 const uint32_t rowBufferSize; 449 const uint32_t ranksPerChannel; 450 const uint32_t banksPerRank; 451 const uint32_t channels; 452 uint32_t rowsPerBank; 453 uint32_t columnsPerRowBuffer; 454 const uint32_t readBufferSize; 455 const uint32_t writeBufferSize; 456 const double writeThresholdPerc; 457 uint32_t writeThreshold; 458 459 /** 460 * Basic memory timing parameters initialized based on parameter 461 * values. 462 */ 463 const Tick tWTR; 464 const Tick tBURST; 465 const Tick tRCD; 466 const Tick tCL; 467 const Tick tRP; 468 const Tick tRAS; 469 const Tick tRFC; 470 const Tick tREFI; 471 const Tick tXAW; 472 const uint32_t activationLimit; 473 474 /** 475 * Memory controller configuration initialized based on parameter 476 * values. 477 */ 478 Enums::MemSched memSchedPolicy; 479 Enums::AddrMap addrMapping; 480 Enums::PageManage pageMgmt; 481 482 /** 483 * Pipeline latency of the controller frontend. The frontend 484 * contribution is added to writes (that complete when they are in 485 * the write buffer) and reads that are serviced the write buffer. 486 */ 487 const Tick frontendLatency; 488 489 /** 490 * Pipeline latency of the backend and PHY. Along with the 491 * frontend contribution, this latency is added to reads serviced 492 * by the DRAM. 493 */ 494 const Tick backendLatency; 495 496 /** 497 * Till when has the main data bus been spoken for already? 498 */ 499 Tick busBusyUntil; 500 501 Tick writeStartTime; 502 Tick prevArrival; 503 int numReqs; 504 505 // All statistics that the model needs to capture 506 Stats::Scalar readReqs; 507 Stats::Scalar writeReqs; 508 Stats::Scalar readBursts; 509 Stats::Scalar writeBursts; 510 Stats::Scalar bytesRead; 511 Stats::Scalar bytesWritten; 512 Stats::Scalar bytesConsumedRd; 513 Stats::Scalar bytesConsumedWr; 514 Stats::Scalar servicedByWrQ; 515 Stats::Scalar neitherReadNorWrite; 516 Stats::Vector perBankRdReqs; 517 Stats::Vector perBankWrReqs; 518 Stats::Scalar numRdRetry; 519 Stats::Scalar numWrRetry; 520 Stats::Scalar totGap; 521 Stats::Vector readPktSize; 522 Stats::Vector writePktSize; 523 Stats::Vector rdQLenPdf; 524 Stats::Vector wrQLenPdf; 525 Stats::Histogram bytesPerActivate; 526 527 // Latencies summed over all requests 528 Stats::Scalar totQLat; 529 Stats::Scalar totMemAccLat; 530 Stats::Scalar totBusLat; 531 Stats::Scalar totBankLat; 532 533 // Average latencies per request 534 Stats::Formula avgQLat; 535 Stats::Formula avgBankLat; 536 Stats::Formula avgBusLat; 537 Stats::Formula avgMemAccLat; 538 539 // Average bandwidth 540 Stats::Formula avgRdBW; 541 Stats::Formula avgWrBW; 542 Stats::Formula avgConsumedRdBW; 543 Stats::Formula avgConsumedWrBW; 544 Stats::Formula peakBW; 545 Stats::Formula busUtil; 546 547 // Average queue lengths 548 Stats::Average avgRdQLen; 549 Stats::Average avgWrQLen; 550 551 // Row hit count and rate 552 Stats::Scalar readRowHits; 553 Stats::Scalar writeRowHits; 554 Stats::Formula readRowHitRate; 555 Stats::Formula writeRowHitRate; 556 Stats::Formula avgGap; 557 558 /** @todo this is a temporary workaround until the 4-phase code is 559 * committed. upstream caches needs this packet until true is returned, so 560 * hold onto it for deletion until a subsequent call 561 */ 562 std::vector<PacketPtr> pendingDelete; 563 564 public: 565 566 void regStats(); 567 568 SimpleDRAM(const SimpleDRAMParams* p); 569 570 unsigned int drain(DrainManager* dm); 571 572 virtual BaseSlavePort& getSlavePort(const std::string& if_name, 573 PortID idx = InvalidPortID); 574 575 virtual void init(); 576 virtual void startup(); 577 578 protected: 579 580 Tick recvAtomic(PacketPtr pkt); 581 void recvFunctional(PacketPtr pkt); 582 bool recvTimingReq(PacketPtr pkt); 583 584}; 585 586#endif //__MEM_SIMPLE_DRAM_HH__ 587