dram_ctrl.cc revision 9726
1/*
2 * Copyright (c) 2010-2012 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Hansson
38 *          Ani Udipi
39 */
40
41#include "base/trace.hh"
42#include "debug/Drain.hh"
43#include "debug/DRAM.hh"
44#include "debug/DRAMWR.hh"
45#include "mem/simple_dram.hh"
46
47using namespace std;
48
49SimpleDRAM::SimpleDRAM(const SimpleDRAMParams* p) :
50    AbstractMemory(p),
51    port(name() + ".port", *this),
52    retryRdReq(false), retryWrReq(false),
53    rowHitFlag(false), stopReads(false), actTicks(p->activation_limit, 0),
54    writeEvent(this), respondEvent(this),
55    refreshEvent(this), nextReqEvent(this), drainManager(NULL),
56    bytesPerCacheLine(0),
57    linesPerRowBuffer(p->lines_per_rowbuffer),
58    ranksPerChannel(p->ranks_per_channel),
59    banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
60    readBufferSize(p->read_buffer_size),
61    writeBufferSize(p->write_buffer_size),
62    writeThresholdPerc(p->write_thresh_perc),
63    tWTR(p->tWTR), tBURST(p->tBURST),
64    tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP),
65    tRFC(p->tRFC), tREFI(p->tREFI),
66    tXAW(p->tXAW), activationLimit(p->activation_limit),
67    memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
68    pageMgmt(p->page_policy),
69    frontendLatency(p->static_frontend_latency),
70    backendLatency(p->static_backend_latency),
71    busBusyUntil(0), writeStartTime(0),
72    prevArrival(0), numReqs(0)
73{
74    // create the bank states based on the dimensions of the ranks and
75    // banks
76    banks.resize(ranksPerChannel);
77    for (size_t c = 0; c < ranksPerChannel; ++c) {
78        banks[c].resize(banksPerRank);
79    }
80
81    // round the write threshold percent to a whole number of entries
82    // in the buffer
83    writeThreshold = writeBufferSize * writeThresholdPerc / 100.0;
84}
85
86void
87SimpleDRAM::init()
88{
89    if (!port.isConnected()) {
90        fatal("SimpleDRAM %s is unconnected!\n", name());
91    } else {
92        port.sendRangeChange();
93    }
94
95    // get the burst size from the connected port as it is currently
96    // assumed to be equal to the cache line size
97    bytesPerCacheLine = port.peerBlockSize();
98
99    // we could deal with plenty options here, but for now do a quick
100    // sanity check
101    if (bytesPerCacheLine != 64 && bytesPerCacheLine != 32)
102        panic("Unexpected burst size %d", bytesPerCacheLine);
103
104    // determine the rows per bank by looking at the total capacity
105    uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
106
107    DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
108            AbstractMemory::size());
109    rowsPerBank = capacity / (bytesPerCacheLine * linesPerRowBuffer *
110                              banksPerRank * ranksPerChannel);
111
112    if (range.interleaved()) {
113        if (channels != range.stripes())
114            panic("%s has %d interleaved address stripes but %d channel(s)\n",
115                  name(), range.stripes(), channels);
116
117        if (addrMapping == Enums::RaBaChCo) {
118            if (bytesPerCacheLine * linesPerRowBuffer !=
119                range.granularity()) {
120                panic("Interleaving of %s doesn't match RaBaChCo address map\n",
121                      name());
122            }
123        } else if (addrMapping == Enums::RaBaCoCh) {
124            if (bytesPerCacheLine != range.granularity()) {
125                panic("Interleaving of %s doesn't match RaBaCoCh address map\n",
126                      name());
127            }
128        } else if (addrMapping == Enums::CoRaBaCh) {
129            if (bytesPerCacheLine != range.granularity())
130                panic("Interleaving of %s doesn't match CoRaBaCh address map\n",
131                      name());
132        }
133    }
134}
135
136void
137SimpleDRAM::startup()
138{
139    // print the configuration of the controller
140    printParams();
141
142    // kick off the refresh
143    schedule(refreshEvent, curTick() + tREFI);
144}
145
146Tick
147SimpleDRAM::recvAtomic(PacketPtr pkt)
148{
149    DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr());
150
151    // do the actual memory access and turn the packet into a response
152    access(pkt);
153
154    Tick latency = 0;
155    if (!pkt->memInhibitAsserted() && pkt->hasData()) {
156        // this value is not supposed to be accurate, just enough to
157        // keep things going, mimic a closed page
158        latency = tRP + tRCD + tCL;
159    }
160    return latency;
161}
162
163bool
164SimpleDRAM::readQueueFull() const
165{
166    DPRINTF(DRAM, "Read queue limit %d current size %d\n",
167            readBufferSize, readQueue.size() + respQueue.size());
168
169    return (readQueue.size() + respQueue.size()) == readBufferSize;
170}
171
172bool
173SimpleDRAM::writeQueueFull() const
174{
175    DPRINTF(DRAM, "Write queue limit %d current size %d\n",
176            writeBufferSize, writeQueue.size());
177    return writeQueue.size() == writeBufferSize;
178}
179
180SimpleDRAM::DRAMPacket*
181SimpleDRAM::decodeAddr(PacketPtr pkt)
182{
183    // decode the address based on the address mapping scheme, with
184    // Ra, Co, Ba and Ch denoting rank, column, bank and channel,
185    // respectively
186    uint8_t rank;
187    uint16_t bank;
188    uint16_t row;
189
190    Addr addr = pkt->getAddr();
191
192    // truncate the address to the access granularity
193    addr = addr / bytesPerCacheLine;
194
195    // we have removed the lowest order address bits that denote the
196    // position within the cache line
197    if (addrMapping == Enums::RaBaChCo) {
198        // the lowest order bits denote the column to ensure that
199        // sequential cache lines occupy the same row
200        addr = addr / linesPerRowBuffer;
201
202        // take out the channel part of the address
203        addr = addr / channels;
204
205        // after the channel bits, get the bank bits to interleave
206        // over the banks
207        bank = addr % banksPerRank;
208        addr = addr / banksPerRank;
209
210        // after the bank, we get the rank bits which thus interleaves
211        // over the ranks
212        rank = addr % ranksPerChannel;
213        addr = addr / ranksPerChannel;
214
215        // lastly, get the row bits
216        row = addr % rowsPerBank;
217        addr = addr / rowsPerBank;
218    } else if (addrMapping == Enums::RaBaCoCh) {
219        // take out the channel part of the address
220        addr = addr / channels;
221
222        // next, the column
223        addr = addr / linesPerRowBuffer;
224
225        // after the column bits, we get the bank bits to interleave
226        // over the banks
227        bank = addr % banksPerRank;
228        addr = addr / banksPerRank;
229
230        // after the bank, we get the rank bits which thus interleaves
231        // over the ranks
232        rank = addr % ranksPerChannel;
233        addr = addr / ranksPerChannel;
234
235        // lastly, get the row bits
236        row = addr % rowsPerBank;
237        addr = addr / rowsPerBank;
238    } else if (addrMapping == Enums::CoRaBaCh) {
239        // optimise for closed page mode and utilise maximum
240        // parallelism of the DRAM (at the cost of power)
241
242        // take out the channel part of the address, not that this has
243        // to match with how accesses are interleaved between the
244        // controllers in the address mapping
245        addr = addr / channels;
246
247        // start with the bank bits, as this provides the maximum
248        // opportunity for parallelism between requests
249        bank = addr % banksPerRank;
250        addr = addr / banksPerRank;
251
252        // next get the rank bits
253        rank = addr % ranksPerChannel;
254        addr = addr / ranksPerChannel;
255
256        // next the column bits which we do not need to keep track of
257        // and simply skip past
258        addr = addr / linesPerRowBuffer;
259
260        // lastly, get the row bits
261        row = addr % rowsPerBank;
262        addr = addr / rowsPerBank;
263    } else
264        panic("Unknown address mapping policy chosen!");
265
266    assert(rank < ranksPerChannel);
267    assert(bank < banksPerRank);
268    assert(row < rowsPerBank);
269
270    DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
271            pkt->getAddr(), rank, bank, row);
272
273    // create the corresponding DRAM packet with the entry time and
274    // ready time set to the current tick, the latter will be updated
275    // later
276    return new DRAMPacket(pkt, rank, bank, row, pkt->getAddr(),
277                          banks[rank][bank]);
278}
279
280void
281SimpleDRAM::addToReadQueue(PacketPtr pkt)
282{
283    // only add to the read queue here. whenever the request is
284    // eventually done, set the readyTime, and call schedule()
285    assert(!pkt->isWrite());
286
287    // First check write buffer to see if the data is already at
288    // the controller
289    list<DRAMPacket*>::const_iterator i;
290    Addr addr = pkt->getAddr();
291
292    // @todo: add size check
293    for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
294        if ((*i)->addr == addr){
295            servicedByWrQ++;
296            DPRINTF(DRAM, "Read to %lld serviced by write queue\n", addr);
297            bytesRead += bytesPerCacheLine;
298            bytesConsumedRd += pkt->getSize();
299            accessAndRespond(pkt, frontendLatency);
300            return;
301        }
302    }
303
304    DRAMPacket* dram_pkt = decodeAddr(pkt);
305
306    assert(readQueue.size() + respQueue.size() < readBufferSize);
307    rdQLenPdf[readQueue.size() + respQueue.size()]++;
308
309    DPRINTF(DRAM, "Adding to read queue\n");
310
311    readQueue.push_back(dram_pkt);
312
313    // Update stats
314    uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
315    assert(bank_id < ranksPerChannel * banksPerRank);
316    perBankRdReqs[bank_id]++;
317
318    avgRdQLen = readQueue.size() + respQueue.size();
319
320    // If we are not already scheduled to get the read request out of
321    // the queue, do so now
322    if (!nextReqEvent.scheduled() && !stopReads) {
323        DPRINTF(DRAM, "Request scheduled immediately\n");
324        schedule(nextReqEvent, curTick());
325    }
326}
327
328void
329SimpleDRAM::processWriteEvent()
330{
331    assert(!writeQueue.empty());
332    uint32_t numWritesThisTime = 0;
333
334    DPRINTF(DRAMWR, "Beginning DRAM Writes\n");
335    Tick temp1 M5_VAR_USED = std::max(curTick(), busBusyUntil);
336    Tick temp2 M5_VAR_USED = std::max(curTick(), maxBankFreeAt());
337
338    // @todo: are there any dangers with the untimed while loop?
339    while (!writeQueue.empty()) {
340        if (numWritesThisTime > writeThreshold) {
341            DPRINTF(DRAMWR, "Hit write threshold %d\n", writeThreshold);
342            break;
343        }
344
345        chooseNextWrite();
346        DRAMPacket* dram_pkt = writeQueue.front();
347        // What's the earliest the request can be put on the bus
348        Tick schedTime = std::max(curTick(), busBusyUntil);
349
350        DPRINTF(DRAMWR, "Asking for latency estimate at %lld\n",
351                schedTime + tBURST);
352
353        pair<Tick, Tick> lat = estimateLatency(dram_pkt, schedTime + tBURST);
354        Tick accessLat = lat.second;
355
356        // look at the rowHitFlag set by estimateLatency
357        if (rowHitFlag)
358            writeRowHits++;
359
360        Bank& bank = dram_pkt->bank_ref;
361
362        if (pageMgmt == Enums::open) {
363            bank.openRow = dram_pkt->row;
364            bank.freeAt = schedTime + tBURST + std::max(accessLat, tCL);
365            busBusyUntil = bank.freeAt - tCL;
366
367            if (!rowHitFlag) {
368                bank.tRASDoneAt = bank.freeAt + tRP;
369                recordActivate(bank.freeAt - tCL - tRCD);
370                busBusyUntil = bank.freeAt - tCL - tRCD;
371            }
372        } else if (pageMgmt == Enums::close) {
373            bank.freeAt = schedTime + tBURST + accessLat + tRP + tRP;
374            // Work backwards from bank.freeAt to determine activate time
375            recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD);
376            busBusyUntil = bank.freeAt - tRP - tRP - tCL - tRCD;
377            DPRINTF(DRAMWR, "processWriteEvent::bank.freeAt for "
378                    "banks_id %d is %lld\n",
379                    dram_pkt->rank * banksPerRank + dram_pkt->bank,
380                    bank.freeAt);
381        } else
382            panic("Unknown page management policy chosen\n");
383
384        DPRINTF(DRAMWR, "Done writing to address %lld\n", dram_pkt->addr);
385
386        DPRINTF(DRAMWR, "schedtime is %lld, tBURST is %lld, "
387                "busbusyuntil is %lld\n",
388                schedTime, tBURST, busBusyUntil);
389
390        writeQueue.pop_front();
391        delete dram_pkt;
392
393        numWritesThisTime++;
394    }
395
396    DPRINTF(DRAMWR, "Completed %d writes, bus busy for %lld ticks,"\
397            "banks busy for %lld ticks\n", numWritesThisTime,
398            busBusyUntil - temp1, maxBankFreeAt() - temp2);
399
400    // Update stats
401    avgWrQLen = writeQueue.size();
402
403    // turn the bus back around for reads again
404    busBusyUntil += tWTR;
405    stopReads = false;
406
407    if (retryWrReq) {
408        retryWrReq = false;
409        port.sendRetry();
410    }
411
412    // if there is nothing left in any queue, signal a drain
413    if (writeQueue.empty() && readQueue.empty() &&
414        respQueue.empty () && drainManager) {
415        drainManager->signalDrainDone();
416        drainManager = NULL;
417    }
418
419    // Once you're done emptying the write queue, check if there's
420    // anything in the read queue, and call schedule if required. The
421    // retry above could already have caused it to be scheduled, so
422    // first check
423    if (!nextReqEvent.scheduled())
424        schedule(nextReqEvent, busBusyUntil);
425}
426
427void
428SimpleDRAM::triggerWrites()
429{
430    DPRINTF(DRAM, "Writes triggered at %lld\n", curTick());
431    // Flag variable to stop any more read scheduling
432    stopReads = true;
433
434    writeStartTime = std::max(busBusyUntil, curTick()) + tWTR;
435
436    DPRINTF(DRAM, "Writes scheduled at %lld\n", writeStartTime);
437
438    assert(writeStartTime >= curTick());
439    assert(!writeEvent.scheduled());
440    schedule(writeEvent, writeStartTime);
441}
442
443void
444SimpleDRAM::addToWriteQueue(PacketPtr pkt)
445{
446    // only add to the write queue here. whenever the request is
447    // eventually done, set the readyTime, and call schedule()
448    assert(pkt->isWrite());
449
450    DRAMPacket* dram_pkt = decodeAddr(pkt);
451
452    assert(writeQueue.size() < writeBufferSize);
453    wrQLenPdf[writeQueue.size()]++;
454
455    DPRINTF(DRAM, "Adding to write queue\n");
456
457    writeQueue.push_back(dram_pkt);
458
459    // Update stats
460    uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
461    assert(bank_id < ranksPerChannel * banksPerRank);
462    perBankWrReqs[bank_id]++;
463
464    avgWrQLen = writeQueue.size();
465
466    // we do not wait for the writes to be send to the actual memory,
467    // but instead take responsibility for the consistency here and
468    // snoop the write queue for any upcoming reads
469
470    bytesConsumedWr += pkt->getSize();
471    bytesWritten += bytesPerCacheLine;
472    accessAndRespond(pkt, frontendLatency);
473
474    // If your write buffer is starting to fill up, drain it!
475    if (writeQueue.size() > writeThreshold && !stopReads){
476        triggerWrites();
477    }
478}
479
480void
481SimpleDRAM::printParams() const
482{
483    // Sanity check print of important parameters
484    DPRINTF(DRAM,
485            "Memory controller %s physical organization\n"      \
486            "Bytes per cacheline  %d\n"                         \
487            "Lines per row buffer %d\n"                         \
488            "Rows  per bank       %d\n"                         \
489            "Banks per rank       %d\n"                         \
490            "Ranks per channel    %d\n"                         \
491            "Total mem capacity   %u\n",
492            name(), bytesPerCacheLine, linesPerRowBuffer, rowsPerBank,
493            banksPerRank, ranksPerChannel, bytesPerCacheLine *
494            linesPerRowBuffer * rowsPerBank * banksPerRank * ranksPerChannel);
495
496    string scheduler =  memSchedPolicy == Enums::fcfs ? "FCFS" : "FR-FCFS";
497    string address_mapping = addrMapping == Enums::RaBaChCo ? "RaBaChCo" :
498        (addrMapping == Enums::RaBaCoCh ? "RaBaCoCh" : "CoRaBaCh");
499    string page_policy = pageMgmt == Enums::open ? "OPEN" : "CLOSE";
500
501    DPRINTF(DRAM,
502            "Memory controller %s characteristics\n"    \
503            "Read buffer size     %d\n"                 \
504            "Write buffer size    %d\n"                 \
505            "Write buffer thresh  %d\n"                 \
506            "Scheduler            %s\n"                 \
507            "Address mapping      %s\n"                 \
508            "Page policy          %s\n",
509            name(), readBufferSize, writeBufferSize, writeThreshold,
510            scheduler, address_mapping, page_policy);
511
512    DPRINTF(DRAM, "Memory controller %s timing specs\n" \
513            "tRCD      %d ticks\n"                        \
514            "tCL       %d ticks\n"                        \
515            "tRP       %d ticks\n"                        \
516            "tBURST    %d ticks\n"                        \
517            "tRFC      %d ticks\n"                        \
518            "tREFI     %d ticks\n"                        \
519            "tWTR      %d ticks\n"                        \
520            "tXAW (%d) %d ticks\n",
521            name(), tRCD, tCL, tRP, tBURST, tRFC, tREFI, tWTR,
522            activationLimit, tXAW);
523}
524
525void
526SimpleDRAM::printQs() const {
527
528    list<DRAMPacket*>::const_iterator i;
529
530    DPRINTF(DRAM, "===READ QUEUE===\n\n");
531    for (i = readQueue.begin() ;  i != readQueue.end() ; ++i) {
532        DPRINTF(DRAM, "Read %lu\n", (*i)->addr);
533    }
534    DPRINTF(DRAM, "\n===RESP QUEUE===\n\n");
535    for (i = respQueue.begin() ;  i != respQueue.end() ; ++i) {
536        DPRINTF(DRAM, "Response %lu\n", (*i)->addr);
537    }
538    DPRINTF(DRAM, "\n===WRITE QUEUE===\n\n");
539    for (i = writeQueue.begin() ;  i != writeQueue.end() ; ++i) {
540        DPRINTF(DRAM, "Write %lu\n", (*i)->addr);
541    }
542}
543
544bool
545SimpleDRAM::recvTimingReq(PacketPtr pkt)
546{
547    /// @todo temporary hack to deal with memory corruption issues until
548    /// 4-phase transactions are complete
549    for (int x = 0; x < pendingDelete.size(); x++)
550        delete pendingDelete[x];
551    pendingDelete.clear();
552
553    // This is where we enter from the outside world
554    DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
555            pkt->cmdString(),pkt->getAddr(), pkt->getSize());
556
557    // simply drop inhibited packets for now
558    if (pkt->memInhibitAsserted()) {
559        DPRINTF(DRAM,"Inhibited packet -- Dropping it now\n");
560        pendingDelete.push_back(pkt);
561        return true;
562    }
563
564   if (pkt->getSize() == bytesPerCacheLine)
565       cpuReqs++;
566
567   // Every million accesses, print the state of the queues
568   if (numReqs % 1000000 == 0)
569       printQs();
570
571    // Calc avg gap between requests
572    if (prevArrival != 0) {
573        totGap += curTick() - prevArrival;
574    }
575    prevArrival = curTick();
576
577    unsigned size = pkt->getSize();
578    if (size > bytesPerCacheLine)
579        panic("Request size %d is greater than burst size %d",
580              size, bytesPerCacheLine);
581
582    // check local buffers and do not accept if full
583    if (pkt->isRead()) {
584        assert(size != 0);
585        if (readQueueFull()) {
586            DPRINTF(DRAM, "Read queue full, not accepting\n");
587            // remember that we have to retry this port
588            retryRdReq = true;
589            numRdRetry++;
590            return false;
591        } else {
592            readPktSize[ceilLog2(size)]++;
593            addToReadQueue(pkt);
594            readReqs++;
595            numReqs++;
596        }
597    } else if (pkt->isWrite()) {
598        assert(size != 0);
599        if (writeQueueFull()) {
600            DPRINTF(DRAM, "Write queue full, not accepting\n");
601            // remember that we have to retry this port
602            retryWrReq = true;
603            numWrRetry++;
604            return false;
605        } else {
606            writePktSize[ceilLog2(size)]++;
607            addToWriteQueue(pkt);
608            writeReqs++;
609            numReqs++;
610        }
611    } else {
612        DPRINTF(DRAM,"Neither read nor write, ignore timing\n");
613        neitherReadNorWrite++;
614        accessAndRespond(pkt, 1);
615    }
616
617    retryRdReq = false;
618    retryWrReq = false;
619    return true;
620}
621
622void
623SimpleDRAM::processRespondEvent()
624{
625    DPRINTF(DRAM,
626            "processRespondEvent(): Some req has reached its readyTime\n");
627
628     PacketPtr pkt = respQueue.front()->pkt;
629
630     // Actually responds to the requestor
631     bytesConsumedRd += pkt->getSize();
632     bytesRead += bytesPerCacheLine;
633     accessAndRespond(pkt, frontendLatency + backendLatency);
634
635     delete respQueue.front();
636     respQueue.pop_front();
637
638     // Update stats
639     avgRdQLen = readQueue.size() + respQueue.size();
640
641     if (!respQueue.empty()) {
642         assert(respQueue.front()->readyTime >= curTick());
643         assert(!respondEvent.scheduled());
644         schedule(respondEvent, respQueue.front()->readyTime);
645     } else {
646         // if there is nothing left in any queue, signal a drain
647         if (writeQueue.empty() && readQueue.empty() &&
648             drainManager) {
649             drainManager->signalDrainDone();
650             drainManager = NULL;
651         }
652     }
653
654     // We have made a location in the queue available at this point,
655     // so if there is a read that was forced to wait, retry now
656     if (retryRdReq) {
657         retryRdReq = false;
658         port.sendRetry();
659     }
660}
661
662void
663SimpleDRAM::chooseNextWrite()
664{
665    // This method does the arbitration between write requests. The
666    // chosen packet is simply moved to the head of the write
667    // queue. The other methods know that this is the place to
668    // look. For example, with FCFS, this method does nothing
669    assert(!writeQueue.empty());
670
671    if (writeQueue.size() == 1) {
672        DPRINTF(DRAMWR, "Single write request, nothing to do\n");
673        return;
674    }
675
676    if (memSchedPolicy == Enums::fcfs) {
677        // Do nothing, since the correct request is already head
678    } else if (memSchedPolicy == Enums::frfcfs) {
679        list<DRAMPacket*>::iterator i = writeQueue.begin();
680        bool foundRowHit = false;
681        while (!foundRowHit && i != writeQueue.end()) {
682            DRAMPacket* dram_pkt = *i;
683            const Bank& bank = dram_pkt->bank_ref;
684            if (bank.openRow == dram_pkt->row) { //FR part
685                DPRINTF(DRAMWR, "Write row buffer hit\n");
686                writeQueue.erase(i);
687                writeQueue.push_front(dram_pkt);
688                foundRowHit = true;
689            } else { //FCFS part
690                ;
691            }
692            ++i;
693        }
694    } else
695        panic("No scheduling policy chosen\n");
696
697    DPRINTF(DRAMWR, "Selected next write request\n");
698}
699
700bool
701SimpleDRAM::chooseNextRead()
702{
703    // This method does the arbitration between read requests. The
704    // chosen packet is simply moved to the head of the queue. The
705    // other methods know that this is the place to look. For example,
706    // with FCFS, this method does nothing
707    if (readQueue.empty()) {
708        DPRINTF(DRAM, "No read request to select\n");
709        return false;
710    }
711
712    // If there is only one request then there is nothing left to do
713    if (readQueue.size() == 1)
714        return true;
715
716    if (memSchedPolicy == Enums::fcfs) {
717        // Do nothing, since the request to serve is already the first
718        // one in the read queue
719    } else if (memSchedPolicy == Enums::frfcfs) {
720        for (list<DRAMPacket*>::iterator i = readQueue.begin();
721             i != readQueue.end() ; ++i) {
722            DRAMPacket* dram_pkt = *i;
723            const Bank& bank = dram_pkt->bank_ref;
724            // Check if it is a row hit
725            if (bank.openRow == dram_pkt->row) { //FR part
726                DPRINTF(DRAM, "Row buffer hit\n");
727                readQueue.erase(i);
728                readQueue.push_front(dram_pkt);
729                break;
730            } else { //FCFS part
731                ;
732            }
733        }
734    } else
735        panic("No scheduling policy chosen!\n");
736
737    DPRINTF(DRAM, "Selected next read request\n");
738    return true;
739}
740
741void
742SimpleDRAM::accessAndRespond(PacketPtr pkt, Tick static_latency)
743{
744    DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
745
746    bool needsResponse = pkt->needsResponse();
747    // do the actual memory access which also turns the packet into a
748    // response
749    access(pkt);
750
751    // turn packet around to go back to requester if response expected
752    if (needsResponse) {
753        // access already turned the packet into a response
754        assert(pkt->isResponse());
755
756        // @todo someone should pay for this
757        pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
758
759        // queue the packet in the response queue to be sent out after
760        // the static latency has passed
761        port.schedTimingResp(pkt, curTick() + static_latency);
762    } else {
763        // @todo the packet is going to be deleted, and the DRAMPacket
764        // is still having a pointer to it
765        pendingDelete.push_back(pkt);
766    }
767
768    DPRINTF(DRAM, "Done\n");
769
770    return;
771}
772
773pair<Tick, Tick>
774SimpleDRAM::estimateLatency(DRAMPacket* dram_pkt, Tick inTime)
775{
776    // If a request reaches a bank at tick 'inTime', how much time
777    // *after* that does it take to finish the request, depending
778    // on bank status and page open policy. Note that this method
779    // considers only the time taken for the actual read or write
780    // to complete, NOT any additional time thereafter for tRAS or
781    // tRP.
782    Tick accLat = 0;
783    Tick bankLat = 0;
784    rowHitFlag = false;
785
786    const Bank& bank = dram_pkt->bank_ref;
787    if (pageMgmt == Enums::open) { // open-page policy
788        if (bank.openRow == dram_pkt->row) {
789            // When we have a row-buffer hit,
790            // we don't care about tRAS having expired or not,
791            // but do care about bank being free for access
792            rowHitFlag = true;
793
794            if (bank.freeAt < inTime) {
795               // CAS latency only
796               accLat += tCL;
797               bankLat += tCL;
798            } else {
799                accLat += 0;
800                bankLat += 0;
801            }
802
803        } else {
804            // Row-buffer miss, need to close existing row
805            // once tRAS has expired, then open the new one,
806            // then add cas latency.
807            Tick freeTime = std::max(bank.tRASDoneAt, bank.freeAt);
808
809            if (freeTime > inTime)
810               accLat += freeTime - inTime;
811
812            accLat += tRP + tRCD + tCL;
813            bankLat += tRP + tRCD + tCL;
814        }
815    } else if (pageMgmt == Enums::close) {
816        // With a close page policy, no notion of
817        // bank.tRASDoneAt
818        if (bank.freeAt > inTime)
819            accLat += bank.freeAt - inTime;
820
821        // page already closed, simply open the row, and
822        // add cas latency
823        accLat += tRCD + tCL;
824        bankLat += tRCD + tCL;
825    } else
826        panic("No page management policy chosen\n");
827
828    DPRINTF(DRAM, "Returning < %lld, %lld > from estimateLatency()\n",
829            bankLat, accLat);
830
831    return make_pair(bankLat, accLat);
832}
833
834void
835SimpleDRAM::processNextReqEvent()
836{
837    scheduleNextReq();
838}
839
840void
841SimpleDRAM::recordActivate(Tick act_tick)
842{
843    assert(actTicks.size() == activationLimit);
844
845    DPRINTF(DRAM, "Activate at tick %d\n", act_tick);
846
847    // sanity check
848    if (actTicks.back() && (act_tick - actTicks.back()) < tXAW) {
849        panic("Got %d activates in window %d (%d - %d) which is smaller "
850              "than %d\n", activationLimit, act_tick - actTicks.back(),
851              act_tick, actTicks.back(), tXAW);
852    }
853
854    // shift the times used for the book keeping, the last element
855    // (highest index) is the oldest one and hence the lowest value
856    actTicks.pop_back();
857
858    // record an new activation (in the future)
859    actTicks.push_front(act_tick);
860
861    // cannot activate more than X times in time window tXAW, push the
862    // next one (the X + 1'st activate) to be tXAW away from the
863    // oldest in our window of X
864    if (actTicks.back() && (act_tick - actTicks.back()) < tXAW) {
865        DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate no earlier "
866                "than %d\n", activationLimit, actTicks.back() + tXAW);
867        for(int i = 0; i < ranksPerChannel; i++)
868            for(int j = 0; j < banksPerRank; j++)
869                // next activate must not happen before end of window
870                banks[i][j].freeAt = std::max(banks[i][j].freeAt,
871                                              actTicks.back() + tXAW);
872    }
873}
874
875void
876SimpleDRAM::doDRAMAccess(DRAMPacket* dram_pkt)
877{
878
879    DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
880            dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row);
881
882    // estimate the bank and access latency
883    pair<Tick, Tick> lat = estimateLatency(dram_pkt, curTick());
884    Tick bankLat = lat.first;
885    Tick accessLat = lat.second;
886
887    // This request was woken up at this time based on a prior call
888    // to estimateLatency(). However, between then and now, both the
889    // accessLatency and/or busBusyUntil may have changed. We need
890    // to correct for that.
891
892    Tick addDelay = (curTick() + accessLat < busBusyUntil) ?
893        busBusyUntil - (curTick() + accessLat) : 0;
894
895    Bank& bank = dram_pkt->bank_ref;
896
897    // Update bank state
898    if (pageMgmt == Enums::open) {
899        bank.openRow = dram_pkt->row;
900        bank.freeAt = curTick() + addDelay + accessLat;
901        // If you activated a new row do to this access, the next access
902        // will have to respect tRAS for this bank. Assume tRAS ~= 3 * tRP.
903        // Also need to account for t_XAW
904        if (!rowHitFlag) {
905            bank.tRASDoneAt = bank.freeAt + tRP;
906            recordActivate(bank.freeAt - tCL - tRCD); //since this is open page,
907                                                      //no tRP by default
908        }
909    } else if (pageMgmt == Enums::close) { // accounting for tRAS also
910        // assuming that tRAS ~= 3 * tRP, and tRC ~= 4 * tRP, as is common
911        // (refer Jacob/Ng/Wang and Micron datasheets)
912        bank.freeAt = curTick() + addDelay + accessLat + tRP + tRP;
913        recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD); //essentially (freeAt - tRC)
914        DPRINTF(DRAM,"doDRAMAccess::bank.freeAt is %lld\n",bank.freeAt);
915    } else
916        panic("No page management policy chosen\n");
917
918    // Update request parameters
919    dram_pkt->readyTime = curTick() + addDelay + accessLat + tBURST;
920
921
922    DPRINTF(DRAM, "Req %lld: curtick is %lld accessLat is %d " \
923                  "readytime is %lld busbusyuntil is %lld. " \
924                  "Scheduling at readyTime\n", dram_pkt->addr,
925                   curTick(), accessLat, dram_pkt->readyTime, busBusyUntil);
926
927    // Make sure requests are not overlapping on the databus
928    assert (dram_pkt->readyTime - busBusyUntil >= tBURST);
929
930    // Update bus state
931    busBusyUntil = dram_pkt->readyTime;
932
933    DPRINTF(DRAM,"Access time is %lld\n",
934            dram_pkt->readyTime - dram_pkt->entryTime);
935
936    // Update stats
937    totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
938    totBankLat += bankLat;
939    totBusLat += tBURST;
940    totQLat += dram_pkt->readyTime - dram_pkt->entryTime - bankLat - tBURST;
941
942    if (rowHitFlag)
943        readRowHits++;
944
945    // At this point we're done dealing with the request
946    // It will be moved to a separate response queue with a
947    // correct readyTime, and eventually be sent back at that
948    //time
949    moveToRespQ();
950
951    // The absolute soonest you have to start thinking about the
952    // next request is the longest access time that can occur before
953    // busBusyUntil. Assuming you need to meet tRAS, then precharge,
954    // open a new row, and access, it is ~4*tRCD.
955
956
957    Tick newTime = (busBusyUntil > 4 * tRCD) ?
958                   std::max(busBusyUntil - 4 * tRCD, curTick()) :
959                   curTick();
960
961    if (!nextReqEvent.scheduled() && !stopReads){
962        schedule(nextReqEvent, newTime);
963    } else {
964        if (newTime < nextReqEvent.when())
965            reschedule(nextReqEvent, newTime);
966    }
967
968
969}
970
971void
972SimpleDRAM::moveToRespQ()
973{
974    // Remove from read queue
975    DRAMPacket* dram_pkt = readQueue.front();
976    readQueue.pop_front();
977
978    // Insert into response queue sorted by readyTime
979    // It will be sent back to the requestor at its
980    // readyTime
981    if (respQueue.empty()) {
982        respQueue.push_front(dram_pkt);
983        assert(!respondEvent.scheduled());
984        assert(dram_pkt->readyTime >= curTick());
985        schedule(respondEvent, dram_pkt->readyTime);
986    } else {
987        bool done = false;
988        list<DRAMPacket*>::iterator i = respQueue.begin();
989        while (!done && i != respQueue.end()) {
990            if ((*i)->readyTime > dram_pkt->readyTime) {
991                respQueue.insert(i, dram_pkt);
992                done = true;
993            }
994            ++i;
995        }
996
997        if (!done)
998            respQueue.push_back(dram_pkt);
999
1000        assert(respondEvent.scheduled());
1001
1002        if (respQueue.front()->readyTime < respondEvent.when()) {
1003            assert(respQueue.front()->readyTime >= curTick());
1004            reschedule(respondEvent, respQueue.front()->readyTime);
1005        }
1006    }
1007}
1008
1009void
1010SimpleDRAM::scheduleNextReq()
1011{
1012    DPRINTF(DRAM, "Reached scheduleNextReq()\n");
1013
1014    // Figure out which read request goes next, and move it to the
1015    // front of the read queue
1016    if (!chooseNextRead()) {
1017        // In the case there is no read request to go next, see if we
1018        // are asked to drain, and if so trigger writes, this also
1019        // ensures that if we hit the write limit we will do this
1020        // multiple times until we are completely drained
1021        if (drainManager && !writeQueue.empty() && !writeEvent.scheduled())
1022            triggerWrites();
1023    } else {
1024        doDRAMAccess(readQueue.front());
1025    }
1026}
1027
1028Tick
1029SimpleDRAM::maxBankFreeAt() const
1030{
1031    Tick banksFree = 0;
1032
1033    for(int i = 0; i < ranksPerChannel; i++)
1034        for(int j = 0; j < banksPerRank; j++)
1035            banksFree = std::max(banks[i][j].freeAt, banksFree);
1036
1037    return banksFree;
1038}
1039
1040void
1041SimpleDRAM::processRefreshEvent()
1042{
1043    DPRINTF(DRAM, "Refreshing at tick %ld\n", curTick());
1044
1045    Tick banksFree = std::max(curTick(), maxBankFreeAt()) + tRFC;
1046
1047    for(int i = 0; i < ranksPerChannel; i++)
1048        for(int j = 0; j < banksPerRank; j++)
1049            banks[i][j].freeAt = banksFree;
1050
1051    schedule(refreshEvent, curTick() + tREFI);
1052}
1053
1054void
1055SimpleDRAM::regStats()
1056{
1057    using namespace Stats;
1058
1059    AbstractMemory::regStats();
1060
1061    readReqs
1062        .name(name() + ".readReqs")
1063        .desc("Total number of read requests seen");
1064
1065    writeReqs
1066        .name(name() + ".writeReqs")
1067        .desc("Total number of write requests seen");
1068
1069    servicedByWrQ
1070        .name(name() + ".servicedByWrQ")
1071        .desc("Number of read reqs serviced by write Q");
1072
1073    cpuReqs
1074        .name(name() + ".cpureqs")
1075        .desc("Reqs generatd by CPU via cache - shady");
1076
1077    neitherReadNorWrite
1078        .name(name() + ".neitherReadNorWrite")
1079        .desc("Reqs where no action is needed");
1080
1081    perBankRdReqs
1082        .init(banksPerRank * ranksPerChannel)
1083        .name(name() + ".perBankRdReqs")
1084        .desc("Track reads on a per bank basis");
1085
1086    perBankWrReqs
1087        .init(banksPerRank * ranksPerChannel)
1088        .name(name() + ".perBankWrReqs")
1089        .desc("Track writes on a per bank basis");
1090
1091    avgRdQLen
1092        .name(name() + ".avgRdQLen")
1093        .desc("Average read queue length over time")
1094        .precision(2);
1095
1096    avgWrQLen
1097        .name(name() + ".avgWrQLen")
1098        .desc("Average write queue length over time")
1099        .precision(2);
1100
1101    totQLat
1102        .name(name() + ".totQLat")
1103        .desc("Total cycles spent in queuing delays");
1104
1105    totBankLat
1106        .name(name() + ".totBankLat")
1107        .desc("Total cycles spent in bank access");
1108
1109    totBusLat
1110        .name(name() + ".totBusLat")
1111        .desc("Total cycles spent in databus access");
1112
1113    totMemAccLat
1114        .name(name() + ".totMemAccLat")
1115        .desc("Sum of mem lat for all requests");
1116
1117    avgQLat
1118        .name(name() + ".avgQLat")
1119        .desc("Average queueing delay per request")
1120        .precision(2);
1121
1122    avgQLat = totQLat / (readReqs - servicedByWrQ);
1123
1124    avgBankLat
1125        .name(name() + ".avgBankLat")
1126        .desc("Average bank access latency per request")
1127        .precision(2);
1128
1129    avgBankLat = totBankLat / (readReqs - servicedByWrQ);
1130
1131    avgBusLat
1132        .name(name() + ".avgBusLat")
1133        .desc("Average bus latency per request")
1134        .precision(2);
1135
1136    avgBusLat = totBusLat / (readReqs - servicedByWrQ);
1137
1138    avgMemAccLat
1139        .name(name() + ".avgMemAccLat")
1140        .desc("Average memory access latency")
1141        .precision(2);
1142
1143    avgMemAccLat = totMemAccLat / (readReqs - servicedByWrQ);
1144
1145    numRdRetry
1146        .name(name() + ".numRdRetry")
1147        .desc("Number of times rd buffer was full causing retry");
1148
1149    numWrRetry
1150        .name(name() + ".numWrRetry")
1151        .desc("Number of times wr buffer was full causing retry");
1152
1153    readRowHits
1154        .name(name() + ".readRowHits")
1155        .desc("Number of row buffer hits during reads");
1156
1157    writeRowHits
1158        .name(name() + ".writeRowHits")
1159        .desc("Number of row buffer hits during writes");
1160
1161    readRowHitRate
1162        .name(name() + ".readRowHitRate")
1163        .desc("Row buffer hit rate for reads")
1164        .precision(2);
1165
1166    readRowHitRate = (readRowHits / (readReqs - servicedByWrQ)) * 100;
1167
1168    writeRowHitRate
1169        .name(name() + ".writeRowHitRate")
1170        .desc("Row buffer hit rate for writes")
1171        .precision(2);
1172
1173    writeRowHitRate = (writeRowHits / writeReqs) * 100;
1174
1175    readPktSize
1176        .init(ceilLog2(bytesPerCacheLine) + 1)
1177        .name(name() + ".readPktSize")
1178        .desc("Categorize read packet sizes");
1179
1180     writePktSize
1181        .init(ceilLog2(bytesPerCacheLine) + 1)
1182        .name(name() + ".writePktSize")
1183        .desc("Categorize write packet sizes");
1184
1185     rdQLenPdf
1186        .init(readBufferSize)
1187        .name(name() + ".rdQLenPdf")
1188        .desc("What read queue length does an incoming req see");
1189
1190     wrQLenPdf
1191        .init(writeBufferSize)
1192        .name(name() + ".wrQLenPdf")
1193        .desc("What write queue length does an incoming req see");
1194
1195
1196    bytesRead
1197        .name(name() + ".bytesRead")
1198        .desc("Total number of bytes read from memory");
1199
1200    bytesWritten
1201        .name(name() + ".bytesWritten")
1202        .desc("Total number of bytes written to memory");
1203
1204    bytesConsumedRd
1205        .name(name() + ".bytesConsumedRd")
1206        .desc("bytesRead derated as per pkt->getSize()");
1207
1208    bytesConsumedWr
1209        .name(name() + ".bytesConsumedWr")
1210        .desc("bytesWritten derated as per pkt->getSize()");
1211
1212    avgRdBW
1213        .name(name() + ".avgRdBW")
1214        .desc("Average achieved read bandwidth in MB/s")
1215        .precision(2);
1216
1217    avgRdBW = (bytesRead / 1000000) / simSeconds;
1218
1219    avgWrBW
1220        .name(name() + ".avgWrBW")
1221        .desc("Average achieved write bandwidth in MB/s")
1222        .precision(2);
1223
1224    avgWrBW = (bytesWritten / 1000000) / simSeconds;
1225
1226    avgConsumedRdBW
1227        .name(name() + ".avgConsumedRdBW")
1228        .desc("Average consumed read bandwidth in MB/s")
1229        .precision(2);
1230
1231    avgConsumedRdBW = (bytesConsumedRd / 1000000) / simSeconds;
1232
1233    avgConsumedWrBW
1234        .name(name() + ".avgConsumedWrBW")
1235        .desc("Average consumed write bandwidth in MB/s")
1236        .precision(2);
1237
1238    avgConsumedWrBW = (bytesConsumedWr / 1000000) / simSeconds;
1239
1240    peakBW
1241        .name(name() + ".peakBW")
1242        .desc("Theoretical peak bandwidth in MB/s")
1243        .precision(2);
1244
1245    peakBW = (SimClock::Frequency / tBURST) * bytesPerCacheLine / 1000000;
1246
1247    busUtil
1248        .name(name() + ".busUtil")
1249        .desc("Data bus utilization in percentage")
1250        .precision(2);
1251
1252    busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
1253
1254    totGap
1255        .name(name() + ".totGap")
1256        .desc("Total gap between requests");
1257
1258    avgGap
1259        .name(name() + ".avgGap")
1260        .desc("Average gap between requests")
1261        .precision(2);
1262
1263    avgGap = totGap / (readReqs + writeReqs);
1264}
1265
1266void
1267SimpleDRAM::recvFunctional(PacketPtr pkt)
1268{
1269    // rely on the abstract memory
1270    functionalAccess(pkt);
1271}
1272
1273BaseSlavePort&
1274SimpleDRAM::getSlavePort(const string &if_name, PortID idx)
1275{
1276    if (if_name != "port") {
1277        return MemObject::getSlavePort(if_name, idx);
1278    } else {
1279        return port;
1280    }
1281}
1282
1283unsigned int
1284SimpleDRAM::drain(DrainManager *dm)
1285{
1286    unsigned int count = port.drain(dm);
1287
1288    // if there is anything in any of our internal queues, keep track
1289    // of that as well
1290    if (!(writeQueue.empty() && readQueue.empty() &&
1291          respQueue.empty())) {
1292        DPRINTF(Drain, "DRAM controller not drained, write: %d, read: %d,"
1293                " resp: %d\n", writeQueue.size(), readQueue.size(),
1294                respQueue.size());
1295        ++count;
1296        drainManager = dm;
1297        // the only part that is not drained automatically over time
1298        // is the write queue, thus trigger writes if there are any
1299        // waiting and no reads waiting, otherwise wait until the
1300        // reads are done
1301        if (readQueue.empty() && !writeQueue.empty() &&
1302            !writeEvent.scheduled())
1303            triggerWrites();
1304    }
1305
1306    if (count)
1307        setDrainState(Drainable::Draining);
1308    else
1309        setDrainState(Drainable::Drained);
1310    return count;
1311}
1312
1313SimpleDRAM::MemoryPort::MemoryPort(const std::string& name, SimpleDRAM& _memory)
1314    : QueuedSlavePort(name, &_memory, queue), queue(_memory, *this),
1315      memory(_memory)
1316{ }
1317
1318AddrRangeList
1319SimpleDRAM::MemoryPort::getAddrRanges() const
1320{
1321    AddrRangeList ranges;
1322    ranges.push_back(memory.getAddrRange());
1323    return ranges;
1324}
1325
1326void
1327SimpleDRAM::MemoryPort::recvFunctional(PacketPtr pkt)
1328{
1329    pkt->pushLabel(memory.name());
1330
1331    if (!queue.checkFunctional(pkt)) {
1332        // Default implementation of SimpleTimingPort::recvFunctional()
1333        // calls recvAtomic() and throws away the latency; we can save a
1334        // little here by just not calculating the latency.
1335        memory.recvFunctional(pkt);
1336    }
1337
1338    pkt->popLabel();
1339}
1340
1341Tick
1342SimpleDRAM::MemoryPort::recvAtomic(PacketPtr pkt)
1343{
1344    return memory.recvAtomic(pkt);
1345}
1346
1347bool
1348SimpleDRAM::MemoryPort::recvTimingReq(PacketPtr pkt)
1349{
1350    // pass it to the memory controller
1351    return memory.recvTimingReq(pkt);
1352}
1353
1354SimpleDRAM*
1355SimpleDRAMParams::create()
1356{
1357    return new SimpleDRAM(this);
1358}
1359