dram_ctrl.cc (10509:d5554f97c451) dram_ctrl.cc (10561:e1a853349529)
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Andreas Hansson
41 * Ani Udipi
42 * Neha Agarwal
43 */
44
45#include "base/bitfield.hh"
46#include "base/trace.hh"
47#include "debug/DRAM.hh"
48#include "debug/DRAMPower.hh"
49#include "debug/DRAMState.hh"
50#include "debug/Drain.hh"
51#include "mem/dram_ctrl.hh"
52#include "sim/system.hh"
53
54using namespace std;
55using namespace Data;
56
57DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
58 AbstractMemory(p),
59 port(name() + ".port", *this),
60 retryRdReq(false), retryWrReq(false),
61 busState(READ),
62 nextReqEvent(this), respondEvent(this), activateEvent(this),
63 prechargeEvent(this), refreshEvent(this), powerEvent(this),
64 drainManager(NULL),
65 deviceSize(p->device_size),
66 deviceBusWidth(p->device_bus_width), burstLength(p->burst_length),
67 deviceRowBufferSize(p->device_rowbuffer_size),
68 devicesPerRank(p->devices_per_rank),
69 burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
70 rowBufferSize(devicesPerRank * deviceRowBufferSize),
71 columnsPerRowBuffer(rowBufferSize / burstSize),
72 columnsPerStripe(range.granularity() / burstSize),
73 ranksPerChannel(p->ranks_per_channel),
74 bankGroupsPerRank(p->bank_groups_per_rank),
75 bankGroupArch(p->bank_groups_per_rank > 0),
76 banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
77 readBufferSize(p->read_buffer_size),
78 writeBufferSize(p->write_buffer_size),
79 writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
80 writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
81 minWritesPerSwitch(p->min_writes_per_switch),
82 writesThisTime(0), readsThisTime(0),
83 tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
84 tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
85 tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
86 tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit),
87 memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
88 pageMgmt(p->page_policy),
89 maxAccessesPerRow(p->max_accesses_per_row),
90 frontendLatency(p->static_frontend_latency),
91 backendLatency(p->static_backend_latency),
92 busBusyUntil(0), refreshDueAt(0), refreshState(REF_IDLE),
93 pwrStateTrans(PWR_IDLE), pwrState(PWR_IDLE), prevArrival(0),
94 nextReqTime(0), pwrStateTick(0), numBanksActive(0),
95 activeRank(0), timeStampOffset(0)
96{
97 // create the bank states based on the dimensions of the ranks and
98 // banks
99 banks.resize(ranksPerChannel);
100
101 //create list of drampower objects. For each rank 1 drampower instance.
102 for (int i = 0; i < ranksPerChannel; i++) {
103 DRAMPower drampower = DRAMPower(p, false);
104 rankPower.emplace_back(drampower);
105 }
106
107 actTicks.resize(ranksPerChannel);
108 for (size_t c = 0; c < ranksPerChannel; ++c) {
109 banks[c].resize(banksPerRank);
110 actTicks[c].resize(activationLimit, 0);
111 }
112
113 // set the bank indices
114 for (int r = 0; r < ranksPerChannel; r++) {
115 for (int b = 0; b < banksPerRank; b++) {
116 banks[r][b].rank = r;
117 banks[r][b].bank = b;
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Andreas Hansson
41 * Ani Udipi
42 * Neha Agarwal
43 */
44
45#include "base/bitfield.hh"
46#include "base/trace.hh"
47#include "debug/DRAM.hh"
48#include "debug/DRAMPower.hh"
49#include "debug/DRAMState.hh"
50#include "debug/Drain.hh"
51#include "mem/dram_ctrl.hh"
52#include "sim/system.hh"
53
54using namespace std;
55using namespace Data;
56
57DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
58 AbstractMemory(p),
59 port(name() + ".port", *this),
60 retryRdReq(false), retryWrReq(false),
61 busState(READ),
62 nextReqEvent(this), respondEvent(this), activateEvent(this),
63 prechargeEvent(this), refreshEvent(this), powerEvent(this),
64 drainManager(NULL),
65 deviceSize(p->device_size),
66 deviceBusWidth(p->device_bus_width), burstLength(p->burst_length),
67 deviceRowBufferSize(p->device_rowbuffer_size),
68 devicesPerRank(p->devices_per_rank),
69 burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
70 rowBufferSize(devicesPerRank * deviceRowBufferSize),
71 columnsPerRowBuffer(rowBufferSize / burstSize),
72 columnsPerStripe(range.granularity() / burstSize),
73 ranksPerChannel(p->ranks_per_channel),
74 bankGroupsPerRank(p->bank_groups_per_rank),
75 bankGroupArch(p->bank_groups_per_rank > 0),
76 banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
77 readBufferSize(p->read_buffer_size),
78 writeBufferSize(p->write_buffer_size),
79 writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
80 writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
81 minWritesPerSwitch(p->min_writes_per_switch),
82 writesThisTime(0), readsThisTime(0),
83 tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
84 tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
85 tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
86 tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit),
87 memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
88 pageMgmt(p->page_policy),
89 maxAccessesPerRow(p->max_accesses_per_row),
90 frontendLatency(p->static_frontend_latency),
91 backendLatency(p->static_backend_latency),
92 busBusyUntil(0), refreshDueAt(0), refreshState(REF_IDLE),
93 pwrStateTrans(PWR_IDLE), pwrState(PWR_IDLE), prevArrival(0),
94 nextReqTime(0), pwrStateTick(0), numBanksActive(0),
95 activeRank(0), timeStampOffset(0)
96{
97 // create the bank states based on the dimensions of the ranks and
98 // banks
99 banks.resize(ranksPerChannel);
100
101 //create list of drampower objects. For each rank 1 drampower instance.
102 for (int i = 0; i < ranksPerChannel; i++) {
103 DRAMPower drampower = DRAMPower(p, false);
104 rankPower.emplace_back(drampower);
105 }
106
107 actTicks.resize(ranksPerChannel);
108 for (size_t c = 0; c < ranksPerChannel; ++c) {
109 banks[c].resize(banksPerRank);
110 actTicks[c].resize(activationLimit, 0);
111 }
112
113 // set the bank indices
114 for (int r = 0; r < ranksPerChannel; r++) {
115 for (int b = 0; b < banksPerRank; b++) {
116 banks[r][b].rank = r;
117 banks[r][b].bank = b;
118 // GDDR addressing of banks to BG is linear.
119 // Here we assume that all DRAM generations address bank groups as
120 // follows:
118 if (bankGroupArch) {
119 // Simply assign lower bits to bank group in order to
120 // rotate across bank groups as banks are incremented
121 // e.g. with 4 banks per bank group and 16 banks total:
122 // banks 0,4,8,12 are in bank group 0
123 // banks 1,5,9,13 are in bank group 1
124 // banks 2,6,10,14 are in bank group 2
125 // banks 3,7,11,15 are in bank group 3
126 banks[r][b].bankgr = b % bankGroupsPerRank;
127 } else {
128 // No bank groups; simply assign to bank number
129 banks[r][b].bankgr = b;
130 }
131 }
132 }
133
134 // perform a basic check of the write thresholds
135 if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
136 fatal("Write buffer low threshold %d must be smaller than the "
137 "high threshold %d\n", p->write_low_thresh_perc,
138 p->write_high_thresh_perc);
139
140 // determine the rows per bank by looking at the total capacity
141 uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
142
143 // determine the dram actual capacity from the DRAM config in Mbytes
144 uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
145 ranksPerChannel;
146
147 // if actual DRAM size does not match memory capacity in system warn!
148 if (deviceCapacity != capacity / (1024 * 1024))
149 warn("DRAM device capacity (%d Mbytes) does not match the "
150 "address range assigned (%d Mbytes)\n", deviceCapacity,
151 capacity / (1024 * 1024));
152
153 DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
154 AbstractMemory::size());
155
156 DPRINTF(DRAM, "Row buffer size %d bytes with %d columns per row buffer\n",
157 rowBufferSize, columnsPerRowBuffer);
158
159 rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
160
161 // a bit of sanity checks on the interleaving
162 if (range.interleaved()) {
163 if (channels != range.stripes())
164 fatal("%s has %d interleaved address stripes but %d channel(s)\n",
165 name(), range.stripes(), channels);
166
167 if (addrMapping == Enums::RoRaBaChCo) {
168 if (rowBufferSize != range.granularity()) {
169 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
170 "address map\n", name());
171 }
172 } else if (addrMapping == Enums::RoRaBaCoCh ||
173 addrMapping == Enums::RoCoRaBaCh) {
174 // for the interleavings with channel bits in the bottom,
175 // if the system uses a channel striping granularity that
176 // is larger than the DRAM burst size, then map the
177 // sequential accesses within a stripe to a number of
178 // columns in the DRAM, effectively placing some of the
179 // lower-order column bits as the least-significant bits
180 // of the address (above the ones denoting the burst size)
181 assert(columnsPerStripe >= 1);
182
183 // channel striping has to be done at a granularity that
184 // is equal or larger to a cache line
185 if (system()->cacheLineSize() > range.granularity()) {
186 fatal("Channel interleaving of %s must be at least as large "
187 "as the cache line size\n", name());
188 }
189
190 // ...and equal or smaller than the row-buffer size
191 if (rowBufferSize < range.granularity()) {
192 fatal("Channel interleaving of %s must be at most as large "
193 "as the row-buffer size\n", name());
194 }
195 // this is essentially the check above, so just to be sure
196 assert(columnsPerStripe <= columnsPerRowBuffer);
197 }
198 }
199
200 // some basic sanity checks
201 if (tREFI <= tRP || tREFI <= tRFC) {
202 fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
203 tREFI, tRP, tRFC);
204 }
205
206 // basic bank group architecture checks ->
207 if (bankGroupArch) {
208 // must have at least one bank per bank group
209 if (bankGroupsPerRank > banksPerRank) {
210 fatal("banks per rank (%d) must be equal to or larger than "
211 "banks groups per rank (%d)\n",
212 banksPerRank, bankGroupsPerRank);
213 }
214 // must have same number of banks in each bank group
215 if ((banksPerRank % bankGroupsPerRank) != 0) {
216 fatal("Banks per rank (%d) must be evenly divisible by bank groups "
217 "per rank (%d) for equal banks per bank group\n",
218 banksPerRank, bankGroupsPerRank);
219 }
220 // tCCD_L should be greater than minimal, back-to-back burst delay
221 if (tCCD_L <= tBURST) {
222 fatal("tCCD_L (%d) should be larger than tBURST (%d) when "
223 "bank groups per rank (%d) is greater than 1\n",
224 tCCD_L, tBURST, bankGroupsPerRank);
225 }
226 // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
121 if (bankGroupArch) {
122 // Simply assign lower bits to bank group in order to
123 // rotate across bank groups as banks are incremented
124 // e.g. with 4 banks per bank group and 16 banks total:
125 // banks 0,4,8,12 are in bank group 0
126 // banks 1,5,9,13 are in bank group 1
127 // banks 2,6,10,14 are in bank group 2
128 // banks 3,7,11,15 are in bank group 3
129 banks[r][b].bankgr = b % bankGroupsPerRank;
130 } else {
131 // No bank groups; simply assign to bank number
132 banks[r][b].bankgr = b;
133 }
134 }
135 }
136
137 // perform a basic check of the write thresholds
138 if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
139 fatal("Write buffer low threshold %d must be smaller than the "
140 "high threshold %d\n", p->write_low_thresh_perc,
141 p->write_high_thresh_perc);
142
143 // determine the rows per bank by looking at the total capacity
144 uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
145
146 // determine the dram actual capacity from the DRAM config in Mbytes
147 uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
148 ranksPerChannel;
149
150 // if actual DRAM size does not match memory capacity in system warn!
151 if (deviceCapacity != capacity / (1024 * 1024))
152 warn("DRAM device capacity (%d Mbytes) does not match the "
153 "address range assigned (%d Mbytes)\n", deviceCapacity,
154 capacity / (1024 * 1024));
155
156 DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
157 AbstractMemory::size());
158
159 DPRINTF(DRAM, "Row buffer size %d bytes with %d columns per row buffer\n",
160 rowBufferSize, columnsPerRowBuffer);
161
162 rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
163
164 // a bit of sanity checks on the interleaving
165 if (range.interleaved()) {
166 if (channels != range.stripes())
167 fatal("%s has %d interleaved address stripes but %d channel(s)\n",
168 name(), range.stripes(), channels);
169
170 if (addrMapping == Enums::RoRaBaChCo) {
171 if (rowBufferSize != range.granularity()) {
172 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
173 "address map\n", name());
174 }
175 } else if (addrMapping == Enums::RoRaBaCoCh ||
176 addrMapping == Enums::RoCoRaBaCh) {
177 // for the interleavings with channel bits in the bottom,
178 // if the system uses a channel striping granularity that
179 // is larger than the DRAM burst size, then map the
180 // sequential accesses within a stripe to a number of
181 // columns in the DRAM, effectively placing some of the
182 // lower-order column bits as the least-significant bits
183 // of the address (above the ones denoting the burst size)
184 assert(columnsPerStripe >= 1);
185
186 // channel striping has to be done at a granularity that
187 // is equal or larger to a cache line
188 if (system()->cacheLineSize() > range.granularity()) {
189 fatal("Channel interleaving of %s must be at least as large "
190 "as the cache line size\n", name());
191 }
192
193 // ...and equal or smaller than the row-buffer size
194 if (rowBufferSize < range.granularity()) {
195 fatal("Channel interleaving of %s must be at most as large "
196 "as the row-buffer size\n", name());
197 }
198 // this is essentially the check above, so just to be sure
199 assert(columnsPerStripe <= columnsPerRowBuffer);
200 }
201 }
202
203 // some basic sanity checks
204 if (tREFI <= tRP || tREFI <= tRFC) {
205 fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
206 tREFI, tRP, tRFC);
207 }
208
209 // basic bank group architecture checks ->
210 if (bankGroupArch) {
211 // must have at least one bank per bank group
212 if (bankGroupsPerRank > banksPerRank) {
213 fatal("banks per rank (%d) must be equal to or larger than "
214 "banks groups per rank (%d)\n",
215 banksPerRank, bankGroupsPerRank);
216 }
217 // must have same number of banks in each bank group
218 if ((banksPerRank % bankGroupsPerRank) != 0) {
219 fatal("Banks per rank (%d) must be evenly divisible by bank groups "
220 "per rank (%d) for equal banks per bank group\n",
221 banksPerRank, bankGroupsPerRank);
222 }
223 // tCCD_L should be greater than minimal, back-to-back burst delay
224 if (tCCD_L <= tBURST) {
225 fatal("tCCD_L (%d) should be larger than tBURST (%d) when "
226 "bank groups per rank (%d) is greater than 1\n",
227 tCCD_L, tBURST, bankGroupsPerRank);
228 }
229 // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
227 if (tRRD_L <= tRRD) {
230 // some datasheets might specify it equal to tRRD
231 if (tRRD_L < tRRD) {
228 fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
229 "bank groups per rank (%d) is greater than 1\n",
230 tRRD_L, tRRD, bankGroupsPerRank);
231 }
232 }
233
234}
235
236void
237DRAMCtrl::init()
238{
239 AbstractMemory::init();
240
241 if (!port.isConnected()) {
242 fatal("DRAMCtrl %s is unconnected!\n", name());
243 } else {
244 port.sendRangeChange();
245 }
246}
247
248void
249DRAMCtrl::startup()
250{
251 // timestamp offset should be in clock cycles for DRAMPower
252 timeStampOffset = divCeil(curTick(), tCK);
253 // update the start tick for the precharge accounting to the
254 // current tick
255 pwrStateTick = curTick();
256
257 // shift the bus busy time sufficiently far ahead that we never
258 // have to worry about negative values when computing the time for
259 // the next request, this will add an insignificant bubble at the
260 // start of simulation
261 busBusyUntil = curTick() + tRP + tRCD + tCL;
262
263 // kick off the refresh, and give ourselves enough time to
264 // precharge
265 schedule(refreshEvent, curTick() + tREFI - tRP);
266}
267
268Tick
269DRAMCtrl::recvAtomic(PacketPtr pkt)
270{
271 DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr());
272
273 // do the actual memory access and turn the packet into a response
274 access(pkt);
275
276 Tick latency = 0;
277 if (!pkt->memInhibitAsserted() && pkt->hasData()) {
278 // this value is not supposed to be accurate, just enough to
279 // keep things going, mimic a closed page
280 latency = tRP + tRCD + tCL;
281 }
282 return latency;
283}
284
285bool
286DRAMCtrl::readQueueFull(unsigned int neededEntries) const
287{
288 DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
289 readBufferSize, readQueue.size() + respQueue.size(),
290 neededEntries);
291
292 return
293 (readQueue.size() + respQueue.size() + neededEntries) > readBufferSize;
294}
295
296bool
297DRAMCtrl::writeQueueFull(unsigned int neededEntries) const
298{
299 DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
300 writeBufferSize, writeQueue.size(), neededEntries);
301 return (writeQueue.size() + neededEntries) > writeBufferSize;
302}
303
304DRAMCtrl::DRAMPacket*
305DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
306 bool isRead)
307{
308 // decode the address based on the address mapping scheme, with
309 // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
310 // channel, respectively
311 uint8_t rank;
312 uint8_t bank;
313 // use a 64-bit unsigned during the computations as the row is
314 // always the top bits, and check before creating the DRAMPacket
315 uint64_t row;
316
317 // truncate the address to a DRAM burst, which makes it unique to
318 // a specific column, row, bank, rank and channel
319 Addr addr = dramPktAddr / burstSize;
320
321 // we have removed the lowest order address bits that denote the
322 // position within the column
323 if (addrMapping == Enums::RoRaBaChCo) {
324 // the lowest order bits denote the column to ensure that
325 // sequential cache lines occupy the same row
326 addr = addr / columnsPerRowBuffer;
327
328 // take out the channel part of the address
329 addr = addr / channels;
330
331 // after the channel bits, get the bank bits to interleave
332 // over the banks
333 bank = addr % banksPerRank;
334 addr = addr / banksPerRank;
335
336 // after the bank, we get the rank bits which thus interleaves
337 // over the ranks
338 rank = addr % ranksPerChannel;
339 addr = addr / ranksPerChannel;
340
341 // lastly, get the row bits
342 row = addr % rowsPerBank;
343 addr = addr / rowsPerBank;
344 } else if (addrMapping == Enums::RoRaBaCoCh) {
345 // take out the lower-order column bits
346 addr = addr / columnsPerStripe;
347
348 // take out the channel part of the address
349 addr = addr / channels;
350
351 // next, the higher-order column bites
352 addr = addr / (columnsPerRowBuffer / columnsPerStripe);
353
354 // after the column bits, we get the bank bits to interleave
355 // over the banks
356 bank = addr % banksPerRank;
357 addr = addr / banksPerRank;
358
359 // after the bank, we get the rank bits which thus interleaves
360 // over the ranks
361 rank = addr % ranksPerChannel;
362 addr = addr / ranksPerChannel;
363
364 // lastly, get the row bits
365 row = addr % rowsPerBank;
366 addr = addr / rowsPerBank;
367 } else if (addrMapping == Enums::RoCoRaBaCh) {
368 // optimise for closed page mode and utilise maximum
369 // parallelism of the DRAM (at the cost of power)
370
371 // take out the lower-order column bits
372 addr = addr / columnsPerStripe;
373
374 // take out the channel part of the address, not that this has
375 // to match with how accesses are interleaved between the
376 // controllers in the address mapping
377 addr = addr / channels;
378
379 // start with the bank bits, as this provides the maximum
380 // opportunity for parallelism between requests
381 bank = addr % banksPerRank;
382 addr = addr / banksPerRank;
383
384 // next get the rank bits
385 rank = addr % ranksPerChannel;
386 addr = addr / ranksPerChannel;
387
388 // next, the higher-order column bites
389 addr = addr / (columnsPerRowBuffer / columnsPerStripe);
390
391 // lastly, get the row bits
392 row = addr % rowsPerBank;
393 addr = addr / rowsPerBank;
394 } else
395 panic("Unknown address mapping policy chosen!");
396
397 assert(rank < ranksPerChannel);
398 assert(bank < banksPerRank);
399 assert(row < rowsPerBank);
400 assert(row < Bank::NO_ROW);
401
402 DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
403 dramPktAddr, rank, bank, row);
404
405 // create the corresponding DRAM packet with the entry time and
406 // ready time set to the current tick, the latter will be updated
407 // later
408 uint16_t bank_id = banksPerRank * rank + bank;
409 return new DRAMPacket(pkt, isRead, rank, bank, row, bank_id, dramPktAddr,
410 size, banks[rank][bank]);
411}
412
413void
414DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
415{
416 // only add to the read queue here. whenever the request is
417 // eventually done, set the readyTime, and call schedule()
418 assert(!pkt->isWrite());
419
420 assert(pktCount != 0);
421
422 // if the request size is larger than burst size, the pkt is split into
423 // multiple DRAM packets
424 // Note if the pkt starting address is not aligened to burst size, the
425 // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
426 // are aligned to burst size boundaries. This is to ensure we accurately
427 // check read packets against packets in write queue.
428 Addr addr = pkt->getAddr();
429 unsigned pktsServicedByWrQ = 0;
430 BurstHelper* burst_helper = NULL;
431 for (int cnt = 0; cnt < pktCount; ++cnt) {
432 unsigned size = std::min((addr | (burstSize - 1)) + 1,
433 pkt->getAddr() + pkt->getSize()) - addr;
434 readPktSize[ceilLog2(size)]++;
435 readBursts++;
436
437 // First check write buffer to see if the data is already at
438 // the controller
439 bool foundInWrQ = false;
440 for (auto i = writeQueue.begin(); i != writeQueue.end(); ++i) {
441 // check if the read is subsumed in the write entry we are
442 // looking at
443 if ((*i)->addr <= addr &&
444 (addr + size) <= ((*i)->addr + (*i)->size)) {
445 foundInWrQ = true;
446 servicedByWrQ++;
447 pktsServicedByWrQ++;
448 DPRINTF(DRAM, "Read to addr %lld with size %d serviced by "
449 "write queue\n", addr, size);
450 bytesReadWrQ += burstSize;
451 break;
452 }
453 }
454
455 // If not found in the write q, make a DRAM packet and
456 // push it onto the read queue
457 if (!foundInWrQ) {
458
459 // Make the burst helper for split packets
460 if (pktCount > 1 && burst_helper == NULL) {
461 DPRINTF(DRAM, "Read to addr %lld translates to %d "
462 "dram requests\n", pkt->getAddr(), pktCount);
463 burst_helper = new BurstHelper(pktCount);
464 }
465
466 DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size, true);
467 dram_pkt->burstHelper = burst_helper;
468
469 assert(!readQueueFull(1));
470 rdQLenPdf[readQueue.size() + respQueue.size()]++;
471
472 DPRINTF(DRAM, "Adding to read queue\n");
473
474 readQueue.push_back(dram_pkt);
475
476 // Update stats
477 avgRdQLen = readQueue.size() + respQueue.size();
478 }
479
480 // Starting address of next dram pkt (aligend to burstSize boundary)
481 addr = (addr | (burstSize - 1)) + 1;
482 }
483
484 // If all packets are serviced by write queue, we send the repsonse back
485 if (pktsServicedByWrQ == pktCount) {
486 accessAndRespond(pkt, frontendLatency);
487 return;
488 }
489
490 // Update how many split packets are serviced by write queue
491 if (burst_helper != NULL)
492 burst_helper->burstsServiced = pktsServicedByWrQ;
493
494 // If we are not already scheduled to get a request out of the
495 // queue, do so now
496 if (!nextReqEvent.scheduled()) {
497 DPRINTF(DRAM, "Request scheduled immediately\n");
498 schedule(nextReqEvent, curTick());
499 }
500}
501
502void
503DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
504{
505 // only add to the write queue here. whenever the request is
506 // eventually done, set the readyTime, and call schedule()
507 assert(pkt->isWrite());
508
509 // if the request size is larger than burst size, the pkt is split into
510 // multiple DRAM packets
511 Addr addr = pkt->getAddr();
512 for (int cnt = 0; cnt < pktCount; ++cnt) {
513 unsigned size = std::min((addr | (burstSize - 1)) + 1,
514 pkt->getAddr() + pkt->getSize()) - addr;
515 writePktSize[ceilLog2(size)]++;
516 writeBursts++;
517
518 // see if we can merge with an existing item in the write
519 // queue and keep track of whether we have merged or not so we
520 // can stop at that point and also avoid enqueueing a new
521 // request
522 bool merged = false;
523 auto w = writeQueue.begin();
524
525 while(!merged && w != writeQueue.end()) {
526 // either of the two could be first, if they are the same
527 // it does not matter which way we go
528 if ((*w)->addr >= addr) {
529 // the existing one starts after the new one, figure
530 // out where the new one ends with respect to the
531 // existing one
532 if ((addr + size) >= ((*w)->addr + (*w)->size)) {
533 // check if the existing one is completely
534 // subsumed in the new one
535 DPRINTF(DRAM, "Merging write covering existing burst\n");
536 merged = true;
537 // update both the address and the size
538 (*w)->addr = addr;
539 (*w)->size = size;
540 } else if ((addr + size) >= (*w)->addr &&
541 ((*w)->addr + (*w)->size - addr) <= burstSize) {
542 // the new one is just before or partially
543 // overlapping with the existing one, and together
544 // they fit within a burst
545 DPRINTF(DRAM, "Merging write before existing burst\n");
546 merged = true;
547 // the existing queue item needs to be adjusted with
548 // respect to both address and size
549 (*w)->size = (*w)->addr + (*w)->size - addr;
550 (*w)->addr = addr;
551 }
552 } else {
553 // the new one starts after the current one, figure
554 // out where the existing one ends with respect to the
555 // new one
556 if (((*w)->addr + (*w)->size) >= (addr + size)) {
557 // check if the new one is completely subsumed in the
558 // existing one
559 DPRINTF(DRAM, "Merging write into existing burst\n");
560 merged = true;
561 // no adjustments necessary
562 } else if (((*w)->addr + (*w)->size) >= addr &&
563 (addr + size - (*w)->addr) <= burstSize) {
564 // the existing one is just before or partially
565 // overlapping with the new one, and together
566 // they fit within a burst
567 DPRINTF(DRAM, "Merging write after existing burst\n");
568 merged = true;
569 // the address is right, and only the size has
570 // to be adjusted
571 (*w)->size = addr + size - (*w)->addr;
572 }
573 }
574 ++w;
575 }
576
577 // if the item was not merged we need to create a new write
578 // and enqueue it
579 if (!merged) {
580 DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size, false);
581
582 assert(writeQueue.size() < writeBufferSize);
583 wrQLenPdf[writeQueue.size()]++;
584
585 DPRINTF(DRAM, "Adding to write queue\n");
586
587 writeQueue.push_back(dram_pkt);
588
589 // Update stats
590 avgWrQLen = writeQueue.size();
591 } else {
592 // keep track of the fact that this burst effectively
593 // disappeared as it was merged with an existing one
594 mergedWrBursts++;
595 }
596
597 // Starting address of next dram pkt (aligend to burstSize boundary)
598 addr = (addr | (burstSize - 1)) + 1;
599 }
600
601 // we do not wait for the writes to be send to the actual memory,
602 // but instead take responsibility for the consistency here and
603 // snoop the write queue for any upcoming reads
604 // @todo, if a pkt size is larger than burst size, we might need a
605 // different front end latency
606 accessAndRespond(pkt, frontendLatency);
607
608 // If we are not already scheduled to get a request out of the
609 // queue, do so now
610 if (!nextReqEvent.scheduled()) {
611 DPRINTF(DRAM, "Request scheduled immediately\n");
612 schedule(nextReqEvent, curTick());
613 }
614}
615
616void
617DRAMCtrl::printQs() const {
618 DPRINTF(DRAM, "===READ QUEUE===\n\n");
619 for (auto i = readQueue.begin() ; i != readQueue.end() ; ++i) {
620 DPRINTF(DRAM, "Read %lu\n", (*i)->addr);
621 }
622 DPRINTF(DRAM, "\n===RESP QUEUE===\n\n");
623 for (auto i = respQueue.begin() ; i != respQueue.end() ; ++i) {
624 DPRINTF(DRAM, "Response %lu\n", (*i)->addr);
625 }
626 DPRINTF(DRAM, "\n===WRITE QUEUE===\n\n");
627 for (auto i = writeQueue.begin() ; i != writeQueue.end() ; ++i) {
628 DPRINTF(DRAM, "Write %lu\n", (*i)->addr);
629 }
630}
631
632bool
633DRAMCtrl::recvTimingReq(PacketPtr pkt)
634{
635 /// @todo temporary hack to deal with memory corruption issues until
636 /// 4-phase transactions are complete
637 for (int x = 0; x < pendingDelete.size(); x++)
638 delete pendingDelete[x];
639 pendingDelete.clear();
640
641 // This is where we enter from the outside world
642 DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
643 pkt->cmdString(), pkt->getAddr(), pkt->getSize());
644
645 // simply drop inhibited packets for now
646 if (pkt->memInhibitAsserted()) {
647 DPRINTF(DRAM, "Inhibited packet -- Dropping it now\n");
648 pendingDelete.push_back(pkt);
649 return true;
650 }
651
652 // Calc avg gap between requests
653 if (prevArrival != 0) {
654 totGap += curTick() - prevArrival;
655 }
656 prevArrival = curTick();
657
658
659 // Find out how many dram packets a pkt translates to
660 // If the burst size is equal or larger than the pkt size, then a pkt
661 // translates to only one dram packet. Otherwise, a pkt translates to
662 // multiple dram packets
663 unsigned size = pkt->getSize();
664 unsigned offset = pkt->getAddr() & (burstSize - 1);
665 unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
666
667 // check local buffers and do not accept if full
668 if (pkt->isRead()) {
669 assert(size != 0);
670 if (readQueueFull(dram_pkt_count)) {
671 DPRINTF(DRAM, "Read queue full, not accepting\n");
672 // remember that we have to retry this port
673 retryRdReq = true;
674 numRdRetry++;
675 return false;
676 } else {
677 addToReadQueue(pkt, dram_pkt_count);
678 readReqs++;
679 bytesReadSys += size;
680 }
681 } else if (pkt->isWrite()) {
682 assert(size != 0);
683 if (writeQueueFull(dram_pkt_count)) {
684 DPRINTF(DRAM, "Write queue full, not accepting\n");
685 // remember that we have to retry this port
686 retryWrReq = true;
687 numWrRetry++;
688 return false;
689 } else {
690 addToWriteQueue(pkt, dram_pkt_count);
691 writeReqs++;
692 bytesWrittenSys += size;
693 }
694 } else {
695 DPRINTF(DRAM,"Neither read nor write, ignore timing\n");
696 neitherReadNorWrite++;
697 accessAndRespond(pkt, 1);
698 }
699
700 return true;
701}
702
703void
704DRAMCtrl::processRespondEvent()
705{
706 DPRINTF(DRAM,
707 "processRespondEvent(): Some req has reached its readyTime\n");
708
709 DRAMPacket* dram_pkt = respQueue.front();
710
711 if (dram_pkt->burstHelper) {
712 // it is a split packet
713 dram_pkt->burstHelper->burstsServiced++;
714 if (dram_pkt->burstHelper->burstsServiced ==
715 dram_pkt->burstHelper->burstCount) {
716 // we have now serviced all children packets of a system packet
717 // so we can now respond to the requester
718 // @todo we probably want to have a different front end and back
719 // end latency for split packets
720 accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
721 delete dram_pkt->burstHelper;
722 dram_pkt->burstHelper = NULL;
723 }
724 } else {
725 // it is not a split packet
726 accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
727 }
728
729 delete respQueue.front();
730 respQueue.pop_front();
731
732 if (!respQueue.empty()) {
733 assert(respQueue.front()->readyTime >= curTick());
734 assert(!respondEvent.scheduled());
735 schedule(respondEvent, respQueue.front()->readyTime);
736 } else {
737 // if there is nothing left in any queue, signal a drain
738 if (writeQueue.empty() && readQueue.empty() &&
739 drainManager) {
740 DPRINTF(Drain, "DRAM controller done draining\n");
741 drainManager->signalDrainDone();
742 drainManager = NULL;
743 }
744 }
745
746 // We have made a location in the queue available at this point,
747 // so if there is a read that was forced to wait, retry now
748 if (retryRdReq) {
749 retryRdReq = false;
750 port.sendRetry();
751 }
752}
753
754void
755DRAMCtrl::chooseNext(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
756{
757 // This method does the arbitration between requests. The chosen
758 // packet is simply moved to the head of the queue. The other
759 // methods know that this is the place to look. For example, with
760 // FCFS, this method does nothing
761 assert(!queue.empty());
762
763 if (queue.size() == 1) {
764 DPRINTF(DRAM, "Single request, nothing to do\n");
765 return;
766 }
767
768 if (memSchedPolicy == Enums::fcfs) {
769 // Do nothing, since the correct request is already head
770 } else if (memSchedPolicy == Enums::frfcfs) {
771 reorderQueue(queue, switched_cmd_type);
772 } else
773 panic("No scheduling policy chosen\n");
774}
775
776void
777DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
778{
779 // Only determine this when needed
780 uint64_t earliest_banks = 0;
781
782 // Search for row hits first, if no row hit is found then schedule the
783 // packet to one of the earliest banks available
784 bool found_earliest_pkt = false;
785 bool found_prepped_diff_rank_pkt = false;
786 auto selected_pkt_it = queue.begin();
787
788 for (auto i = queue.begin(); i != queue.end() ; ++i) {
789 DRAMPacket* dram_pkt = *i;
790 const Bank& bank = dram_pkt->bankRef;
791 // Check if it is a row hit
792 if (bank.openRow == dram_pkt->row) {
793 if (dram_pkt->rank == activeRank || switched_cmd_type) {
794 // FCFS within the hits, giving priority to commands
795 // that access the same rank as the previous burst
796 // to minimize bus turnaround delays
797 // Only give rank prioity when command type is not changing
798 DPRINTF(DRAM, "Row buffer hit\n");
799 selected_pkt_it = i;
800 break;
801 } else if (!found_prepped_diff_rank_pkt) {
802 // found row hit for command on different rank than prev burst
803 selected_pkt_it = i;
804 found_prepped_diff_rank_pkt = true;
805 }
806 } else if (!found_earliest_pkt & !found_prepped_diff_rank_pkt) {
807 // No row hit and
808 // haven't found an entry with a row hit to a new rank
809 if (earliest_banks == 0)
810 // Determine entries with earliest bank prep delay
811 // Function will give priority to commands that access the
812 // same rank as previous burst and can prep the bank seamlessly
813 earliest_banks = minBankPrep(queue, switched_cmd_type);
814
815 // FCFS - Bank is first available bank
816 if (bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) {
817 // Remember the packet to be scheduled to one of the earliest
818 // banks available, FCFS amongst the earliest banks
819 selected_pkt_it = i;
820 found_earliest_pkt = true;
821 }
822 }
823 }
824
825 DRAMPacket* selected_pkt = *selected_pkt_it;
826 queue.erase(selected_pkt_it);
827 queue.push_front(selected_pkt);
828}
829
830void
831DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
832{
833 DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
834
835 bool needsResponse = pkt->needsResponse();
836 // do the actual memory access which also turns the packet into a
837 // response
838 access(pkt);
839
840 // turn packet around to go back to requester if response expected
841 if (needsResponse) {
842 // access already turned the packet into a response
843 assert(pkt->isResponse());
844
845 // @todo someone should pay for this
846 pkt->firstWordDelay = pkt->lastWordDelay = 0;
847
848 // queue the packet in the response queue to be sent out after
849 // the static latency has passed
850 port.schedTimingResp(pkt, curTick() + static_latency);
851 } else {
852 // @todo the packet is going to be deleted, and the DRAMPacket
853 // is still having a pointer to it
854 pendingDelete.push_back(pkt);
855 }
856
857 DPRINTF(DRAM, "Done\n");
858
859 return;
860}
861
862void
863DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row)
864{
865 // get the rank index from the bank
866 uint8_t rank = bank.rank;
867
868 assert(actTicks[rank].size() == activationLimit);
869
870 DPRINTF(DRAM, "Activate at tick %d\n", act_tick);
871
872 // update the open row
873 assert(bank.openRow == Bank::NO_ROW);
874 bank.openRow = row;
875
876 // start counting anew, this covers both the case when we
877 // auto-precharged, and when this access is forced to
878 // precharge
879 bank.bytesAccessed = 0;
880 bank.rowAccesses = 0;
881
882 ++numBanksActive;
883 assert(numBanksActive <= banksPerRank * ranksPerChannel);
884
885 DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got %d active\n",
886 bank.bank, bank.rank, act_tick, numBanksActive);
887
888 rankPower[bank.rank].powerlib.doCommand(MemCommand::ACT, bank.bank,
889 divCeil(act_tick, tCK) -
890 timeStampOffset);
891
892 DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_tick, tCK) -
893 timeStampOffset, bank.bank, bank.rank);
894
895 // The next access has to respect tRAS for this bank
896 bank.preAllowedAt = act_tick + tRAS;
897
898 // Respect the row-to-column command delay
899 bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt);
900
901 // start by enforcing tRRD
902 for(int i = 0; i < banksPerRank; i++) {
903 // next activate to any bank in this rank must not happen
904 // before tRRD
905 if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) {
906 // bank group architecture requires longer delays between
907 // ACT commands within the same bank group. Use tRRD_L
908 // in this case
909 banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L,
910 banks[rank][i].actAllowedAt);
911 } else {
912 // use shorter tRRD value when either
913 // 1) bank group architecture is not supportted
914 // 2) bank is in a different bank group
915 banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
916 banks[rank][i].actAllowedAt);
917 }
918 }
919
920 // next, we deal with tXAW, if the activation limit is disabled
921 // then we directly schedule an activate power event
922 if (!actTicks[rank].empty()) {
923 // sanity check
924 if (actTicks[rank].back() &&
925 (act_tick - actTicks[rank].back()) < tXAW) {
926 panic("Got %d activates in window %d (%llu - %llu) which "
927 "is smaller than %llu\n", activationLimit, act_tick -
928 actTicks[rank].back(), act_tick, actTicks[rank].back(),
929 tXAW);
930 }
931
932 // shift the times used for the book keeping, the last element
933 // (highest index) is the oldest one and hence the lowest value
934 actTicks[rank].pop_back();
935
936 // record an new activation (in the future)
937 actTicks[rank].push_front(act_tick);
938
939 // cannot activate more than X times in time window tXAW, push the
940 // next one (the X + 1'st activate) to be tXAW away from the
941 // oldest in our window of X
942 if (actTicks[rank].back() &&
943 (act_tick - actTicks[rank].back()) < tXAW) {
944 DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
945 "no earlier than %llu\n", activationLimit,
946 actTicks[rank].back() + tXAW);
947 for(int j = 0; j < banksPerRank; j++)
948 // next activate must not happen before end of window
949 banks[rank][j].actAllowedAt =
950 std::max(actTicks[rank].back() + tXAW,
951 banks[rank][j].actAllowedAt);
952 }
953 }
954
955 // at the point when this activate takes place, make sure we
956 // transition to the active power state
957 if (!activateEvent.scheduled())
958 schedule(activateEvent, act_tick);
959 else if (activateEvent.when() > act_tick)
960 // move it sooner in time
961 reschedule(activateEvent, act_tick);
962}
963
964void
965DRAMCtrl::processActivateEvent()
966{
967 // we should transition to the active state as soon as any bank is active
968 if (pwrState != PWR_ACT)
969 // note that at this point numBanksActive could be back at
970 // zero again due to a precharge scheduled in the future
971 schedulePowerEvent(PWR_ACT, curTick());
972}
973
974void
975DRAMCtrl::prechargeBank(Bank& bank, Tick pre_at, bool trace)
976{
977 // make sure the bank has an open row
978 assert(bank.openRow != Bank::NO_ROW);
979
980 // sample the bytes per activate here since we are closing
981 // the page
982 bytesPerActivate.sample(bank.bytesAccessed);
983
984 bank.openRow = Bank::NO_ROW;
985
986 // no precharge allowed before this one
987 bank.preAllowedAt = pre_at;
988
989 Tick pre_done_at = pre_at + tRP;
990
991 bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
992
993 assert(numBanksActive != 0);
994 --numBanksActive;
995
996 DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
997 "%d active\n", bank.bank, bank.rank, pre_at, numBanksActive);
998
999 if (trace) {
1000
1001 rankPower[bank.rank].powerlib.doCommand(MemCommand::PRE, bank.bank,
1002 divCeil(pre_at, tCK) -
1003 timeStampOffset);
1004 DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
1005 timeStampOffset, bank.bank, bank.rank);
1006 }
1007 // if we look at the current number of active banks we might be
1008 // tempted to think the DRAM is now idle, however this can be
1009 // undone by an activate that is scheduled to happen before we
1010 // would have reached the idle state, so schedule an event and
1011 // rather check once we actually make it to the point in time when
1012 // the (last) precharge takes place
1013 if (!prechargeEvent.scheduled())
1014 schedule(prechargeEvent, pre_done_at);
1015 else if (prechargeEvent.when() < pre_done_at)
1016 reschedule(prechargeEvent, pre_done_at);
1017}
1018
1019void
1020DRAMCtrl::processPrechargeEvent()
1021{
1022 // if we reached zero, then special conditions apply as we track
1023 // if all banks are precharged for the power models
1024 if (numBanksActive == 0) {
1025 // we should transition to the idle state when the last bank
1026 // is precharged
1027 schedulePowerEvent(PWR_IDLE, curTick());
1028 }
1029}
1030
1031void
1032DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
1033{
1034 DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
1035 dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row);
1036
1037 // get the bank
1038 Bank& bank = dram_pkt->bankRef;
1039
1040 // for the state we need to track if it is a row hit or not
1041 bool row_hit = true;
1042
1043 // respect any constraints on the command (e.g. tRCD or tCCD)
1044 Tick cmd_at = std::max(bank.colAllowedAt, curTick());
1045
1046 // Determine the access latency and update the bank state
1047 if (bank.openRow == dram_pkt->row) {
1048 // nothing to do
1049 } else {
1050 row_hit = false;
1051
1052 // If there is a page open, precharge it.
1053 if (bank.openRow != Bank::NO_ROW) {
1054 prechargeBank(bank, std::max(bank.preAllowedAt, curTick()));
1055 }
1056
1057 // next we need to account for the delay in activating the
1058 // page
1059 Tick act_tick = std::max(bank.actAllowedAt, curTick());
1060
1061 // Record the activation and deal with all the global timing
1062 // constraints caused be a new activation (tRRD and tXAW)
1063 activateBank(bank, act_tick, dram_pkt->row);
1064
1065 // issue the command as early as possible
1066 cmd_at = bank.colAllowedAt;
1067 }
1068
1069 // we need to wait until the bus is available before we can issue
1070 // the command
1071 cmd_at = std::max(cmd_at, busBusyUntil - tCL);
1072
1073 // update the packet ready time
1074 dram_pkt->readyTime = cmd_at + tCL + tBURST;
1075
1076 // only one burst can use the bus at any one point in time
1077 assert(dram_pkt->readyTime - busBusyUntil >= tBURST);
1078
1079 // update the time for the next read/write burst for each
1080 // bank (add a max with tCCD/tCCD_L here)
1081 Tick cmd_dly;
1082 for(int j = 0; j < ranksPerChannel; j++) {
1083 for(int i = 0; i < banksPerRank; i++) {
1084 // next burst to same bank group in this rank must not happen
1085 // before tCCD_L. Different bank group timing requirement is
1086 // tBURST; Add tCS for different ranks
1087 if (dram_pkt->rank == j) {
1088 if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) {
1089 // bank group architecture requires longer delays between
1090 // RD/WR burst commands to the same bank group.
1091 // Use tCCD_L in this case
1092 cmd_dly = tCCD_L;
1093 } else {
1094 // use tBURST (equivalent to tCCD_S), the shorter
1095 // cas-to-cas delay value, when either:
1096 // 1) bank group architecture is not supportted
1097 // 2) bank is in a different bank group
1098 cmd_dly = tBURST;
1099 }
1100 } else {
1101 // different rank is by default in a different bank group
1102 // use tBURST (equivalent to tCCD_S), which is the shorter
1103 // cas-to-cas delay in this case
1104 // Add tCS to account for rank-to-rank bus delay requirements
1105 cmd_dly = tBURST + tCS;
1106 }
1107 banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly,
1108 banks[j][i].colAllowedAt);
1109 }
1110 }
1111
1112 // Save rank of current access
1113 activeRank = dram_pkt->rank;
1114
1115 // If this is a write, we also need to respect the write recovery
1116 // time before a precharge, in the case of a read, respect the
1117 // read to precharge constraint
1118 bank.preAllowedAt = std::max(bank.preAllowedAt,
1119 dram_pkt->isRead ? cmd_at + tRTP :
1120 dram_pkt->readyTime + tWR);
1121
1122 // increment the bytes accessed and the accesses per row
1123 bank.bytesAccessed += burstSize;
1124 ++bank.rowAccesses;
1125
1126 // if we reached the max, then issue with an auto-precharge
1127 bool auto_precharge = pageMgmt == Enums::close ||
1128 bank.rowAccesses == maxAccessesPerRow;
1129
1130 // if we did not hit the limit, we might still want to
1131 // auto-precharge
1132 if (!auto_precharge &&
1133 (pageMgmt == Enums::open_adaptive ||
1134 pageMgmt == Enums::close_adaptive)) {
1135 // a twist on the open and close page policies:
1136 // 1) open_adaptive page policy does not blindly keep the
1137 // page open, but close it if there are no row hits, and there
1138 // are bank conflicts in the queue
1139 // 2) close_adaptive page policy does not blindly close the
1140 // page, but closes it only if there are no row hits in the queue.
1141 // In this case, only force an auto precharge when there
1142 // are no same page hits in the queue
1143 bool got_more_hits = false;
1144 bool got_bank_conflict = false;
1145
1146 // either look at the read queue or write queue
1147 const deque<DRAMPacket*>& queue = dram_pkt->isRead ? readQueue :
1148 writeQueue;
1149 auto p = queue.begin();
1150 // make sure we are not considering the packet that we are
1151 // currently dealing with (which is the head of the queue)
1152 ++p;
1153
1154 // keep on looking until we have found required condition or
1155 // reached the end
1156 while (!(got_more_hits &&
1157 (got_bank_conflict || pageMgmt == Enums::close_adaptive)) &&
1158 p != queue.end()) {
1159 bool same_rank_bank = (dram_pkt->rank == (*p)->rank) &&
1160 (dram_pkt->bank == (*p)->bank);
1161 bool same_row = dram_pkt->row == (*p)->row;
1162 got_more_hits |= same_rank_bank && same_row;
1163 got_bank_conflict |= same_rank_bank && !same_row;
1164 ++p;
1165 }
1166
1167 // auto pre-charge when either
1168 // 1) open_adaptive policy, we have not got any more hits, and
1169 // have a bank conflict
1170 // 2) close_adaptive policy and we have not got any more hits
1171 auto_precharge = !got_more_hits &&
1172 (got_bank_conflict || pageMgmt == Enums::close_adaptive);
1173 }
1174
1175 // DRAMPower trace command to be written
1176 std::string mem_cmd = dram_pkt->isRead ? "RD" : "WR";
1177
1178 // MemCommand required for DRAMPower library
1179 MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
1180 MemCommand::WR;
1181
1182 // if this access should use auto-precharge, then we are
1183 // closing the row
1184 if (auto_precharge) {
1185 // if auto-precharge push a PRE command at the correct tick to the
1186 // list used by DRAMPower library to calculate power
1187 prechargeBank(bank, std::max(curTick(), bank.preAllowedAt));
1188
1189 DPRINTF(DRAM, "Auto-precharged bank: %d\n", dram_pkt->bankId);
1190 }
1191
1192 // Update bus state
1193 busBusyUntil = dram_pkt->readyTime;
1194
1195 DPRINTF(DRAM, "Access to %lld, ready at %lld bus busy until %lld.\n",
1196 dram_pkt->addr, dram_pkt->readyTime, busBusyUntil);
1197
1198 rankPower[dram_pkt->rank].powerlib.doCommand(command, dram_pkt->bank,
1199 divCeil(cmd_at, tCK) -
1200 timeStampOffset);
1201
1202 DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
1203 timeStampOffset, mem_cmd, dram_pkt->bank, dram_pkt->rank);
1204
1205 // Update the minimum timing between the requests, this is a
1206 // conservative estimate of when we have to schedule the next
1207 // request to not introduce any unecessary bubbles. In most cases
1208 // we will wake up sooner than we have to.
1209 nextReqTime = busBusyUntil - (tRP + tRCD + tCL);
1210
1211 // Update the stats and schedule the next request
1212 if (dram_pkt->isRead) {
1213 ++readsThisTime;
1214 if (row_hit)
1215 readRowHits++;
1216 bytesReadDRAM += burstSize;
1217 perBankRdBursts[dram_pkt->bankId]++;
1218
1219 // Update latency stats
1220 totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
1221 totBusLat += tBURST;
1222 totQLat += cmd_at - dram_pkt->entryTime;
1223 } else {
1224 ++writesThisTime;
1225 if (row_hit)
1226 writeRowHits++;
1227 bytesWritten += burstSize;
1228 perBankWrBursts[dram_pkt->bankId]++;
1229 }
1230}
1231
1232void
1233DRAMCtrl::processNextReqEvent()
1234{
1235 // pre-emptively set to false. Overwrite if in READ_TO_WRITE
1236 // or WRITE_TO_READ state
1237 bool switched_cmd_type = false;
1238 if (busState == READ_TO_WRITE) {
1239 DPRINTF(DRAM, "Switching to writes after %d reads with %d reads "
1240 "waiting\n", readsThisTime, readQueue.size());
1241
1242 // sample and reset the read-related stats as we are now
1243 // transitioning to writes, and all reads are done
1244 rdPerTurnAround.sample(readsThisTime);
1245 readsThisTime = 0;
1246
1247 // now proceed to do the actual writes
1248 busState = WRITE;
1249 switched_cmd_type = true;
1250 } else if (busState == WRITE_TO_READ) {
1251 DPRINTF(DRAM, "Switching to reads after %d writes with %d writes "
1252 "waiting\n", writesThisTime, writeQueue.size());
1253
1254 wrPerTurnAround.sample(writesThisTime);
1255 writesThisTime = 0;
1256
1257 busState = READ;
1258 switched_cmd_type = true;
1259 }
1260
1261 if (refreshState != REF_IDLE) {
1262 // if a refresh waiting for this event loop to finish, then hand
1263 // over now, and do not schedule a new nextReqEvent
1264 if (refreshState == REF_DRAIN) {
1265 DPRINTF(DRAM, "Refresh drain done, now precharging\n");
1266
1267 refreshState = REF_PRE;
1268
1269 // hand control back to the refresh event loop
1270 schedule(refreshEvent, curTick());
1271 }
1272
1273 // let the refresh finish before issuing any further requests
1274 return;
1275 }
1276
1277 // when we get here it is either a read or a write
1278 if (busState == READ) {
1279
1280 // track if we should switch or not
1281 bool switch_to_writes = false;
1282
1283 if (readQueue.empty()) {
1284 // In the case there is no read request to go next,
1285 // trigger writes if we have passed the low threshold (or
1286 // if we are draining)
1287 if (!writeQueue.empty() &&
1288 (drainManager || writeQueue.size() > writeLowThreshold)) {
1289
1290 switch_to_writes = true;
1291 } else {
1292 // check if we are drained
1293 if (respQueue.empty () && drainManager) {
1294 DPRINTF(Drain, "DRAM controller done draining\n");
1295 drainManager->signalDrainDone();
1296 drainManager = NULL;
1297 }
1298
1299 // nothing to do, not even any point in scheduling an
1300 // event for the next request
1301 return;
1302 }
1303 } else {
1304 // Figure out which read request goes next, and move it to the
1305 // front of the read queue
1306 chooseNext(readQueue, switched_cmd_type);
1307
1308 DRAMPacket* dram_pkt = readQueue.front();
1309
1310 // here we get a bit creative and shift the bus busy time not
1311 // just the tWTR, but also a CAS latency to capture the fact
1312 // that we are allowed to prepare a new bank, but not issue a
1313 // read command until after tWTR, in essence we capture a
1314 // bubble on the data bus that is tWTR + tCL
1315 if (switched_cmd_type && dram_pkt->rank == activeRank) {
1316 busBusyUntil += tWTR + tCL;
1317 }
1318
1319 doDRAMAccess(dram_pkt);
1320
1321 // At this point we're done dealing with the request
1322 readQueue.pop_front();
1323
1324 // sanity check
1325 assert(dram_pkt->size <= burstSize);
1326 assert(dram_pkt->readyTime >= curTick());
1327
1328 // Insert into response queue. It will be sent back to the
1329 // requestor at its readyTime
1330 if (respQueue.empty()) {
1331 assert(!respondEvent.scheduled());
1332 schedule(respondEvent, dram_pkt->readyTime);
1333 } else {
1334 assert(respQueue.back()->readyTime <= dram_pkt->readyTime);
1335 assert(respondEvent.scheduled());
1336 }
1337
1338 respQueue.push_back(dram_pkt);
1339
1340 // we have so many writes that we have to transition
1341 if (writeQueue.size() > writeHighThreshold) {
1342 switch_to_writes = true;
1343 }
1344 }
1345
1346 // switching to writes, either because the read queue is empty
1347 // and the writes have passed the low threshold (or we are
1348 // draining), or because the writes hit the hight threshold
1349 if (switch_to_writes) {
1350 // transition to writing
1351 busState = READ_TO_WRITE;
1352 }
1353 } else {
1354 chooseNext(writeQueue, switched_cmd_type);
1355 DRAMPacket* dram_pkt = writeQueue.front();
1356 // sanity check
1357 assert(dram_pkt->size <= burstSize);
1358
1359 // add a bubble to the data bus, as defined by the
1360 // tRTW when access is to the same rank as previous burst
1361 // Different rank timing is handled with tCS, which is
1362 // applied to colAllowedAt
1363 if (switched_cmd_type && dram_pkt->rank == activeRank) {
1364 busBusyUntil += tRTW;
1365 }
1366
1367 doDRAMAccess(dram_pkt);
1368
1369 writeQueue.pop_front();
1370 delete dram_pkt;
1371
1372 // If we emptied the write queue, or got sufficiently below the
1373 // threshold (using the minWritesPerSwitch as the hysteresis) and
1374 // are not draining, or we have reads waiting and have done enough
1375 // writes, then switch to reads.
1376 if (writeQueue.empty() ||
1377 (writeQueue.size() + minWritesPerSwitch < writeLowThreshold &&
1378 !drainManager) ||
1379 (!readQueue.empty() && writesThisTime >= minWritesPerSwitch)) {
1380 // turn the bus back around for reads again
1381 busState = WRITE_TO_READ;
1382
1383 // note that the we switch back to reads also in the idle
1384 // case, which eventually will check for any draining and
1385 // also pause any further scheduling if there is really
1386 // nothing to do
1387 }
1388 }
1389
1390 schedule(nextReqEvent, std::max(nextReqTime, curTick()));
1391
1392 // If there is space available and we have writes waiting then let
1393 // them retry. This is done here to ensure that the retry does not
1394 // cause a nextReqEvent to be scheduled before we do so as part of
1395 // the next request processing
1396 if (retryWrReq && writeQueue.size() < writeBufferSize) {
1397 retryWrReq = false;
1398 port.sendRetry();
1399 }
1400}
1401
1402uint64_t
1403DRAMCtrl::minBankPrep(const deque<DRAMPacket*>& queue,
1404 bool switched_cmd_type) const
1405{
1406 uint64_t bank_mask = 0;
1407 Tick min_act_at = MaxTick;
1408
1409 uint64_t bank_mask_same_rank = 0;
1410 Tick min_act_at_same_rank = MaxTick;
1411
1412 // Give precedence to commands that access same rank as previous command
1413 bool same_rank_match = false;
1414
1415 // determine if we have queued transactions targetting the
1416 // bank in question
1417 vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
1418 for (auto p = queue.begin(); p != queue.end(); ++p) {
1419 got_waiting[(*p)->bankId] = true;
1420 }
1421
1422 for (int i = 0; i < ranksPerChannel; i++) {
1423 for (int j = 0; j < banksPerRank; j++) {
1424 uint8_t bank_id = i * banksPerRank + j;
1425
1426 // if we have waiting requests for the bank, and it is
1427 // amongst the first available, update the mask
1428 if (got_waiting[bank_id]) {
1429 // simplistic approximation of when the bank can issue
1430 // an activate, ignoring any rank-to-rank switching
1431 // cost in this calculation
1432 Tick act_at = banks[i][j].openRow == Bank::NO_ROW ?
1433 banks[i][j].actAllowedAt :
1434 std::max(banks[i][j].preAllowedAt, curTick()) + tRP;
1435
1436 // prioritize commands that access the
1437 // same rank as previous burst
1438 // Calculate bank mask separately for the case and
1439 // evaluate after loop iterations complete
1440 if (i == activeRank && ranksPerChannel > 1) {
1441 if (act_at <= min_act_at_same_rank) {
1442 // reset same rank bank mask if new minimum is found
1443 // and previous minimum could not immediately send ACT
1444 if (act_at < min_act_at_same_rank &&
1445 min_act_at_same_rank > curTick())
1446 bank_mask_same_rank = 0;
1447
1448 // Set flag indicating that a same rank
1449 // opportunity was found
1450 same_rank_match = true;
1451
1452 // set the bit corresponding to the available bank
1453 replaceBits(bank_mask_same_rank, bank_id, bank_id, 1);
1454 min_act_at_same_rank = act_at;
1455 }
1456 } else {
1457 if (act_at <= min_act_at) {
1458 // reset bank mask if new minimum is found
1459 // and either previous minimum could not immediately send ACT
1460 if (act_at < min_act_at && min_act_at > curTick())
1461 bank_mask = 0;
1462 // set the bit corresponding to the available bank
1463 replaceBits(bank_mask, bank_id, bank_id, 1);
1464 min_act_at = act_at;
1465 }
1466 }
1467 }
1468 }
1469 }
1470
1471 // Determine the earliest time when the next burst can issue based
1472 // on the current busBusyUntil delay.
1473 // Offset by tRCD to correlate with ACT timing variables
1474 Tick min_cmd_at = busBusyUntil - tCL - tRCD;
1475
1476 // Prioritize same rank accesses that can issue B2B
1477 // Only optimize for same ranks when the command type
1478 // does not change; do not want to unnecessarily incur tWTR
1479 //
1480 // Resulting FCFS prioritization Order is:
1481 // 1) Commands that access the same rank as previous burst
1482 // and can prep the bank seamlessly.
1483 // 2) Commands (any rank) with earliest bank prep
1484 if (!switched_cmd_type && same_rank_match &&
1485 min_act_at_same_rank <= min_cmd_at) {
1486 bank_mask = bank_mask_same_rank;
1487 }
1488
1489 return bank_mask;
1490}
1491
1492void
1493DRAMCtrl::processRefreshEvent()
1494{
1495 // when first preparing the refresh, remember when it was due
1496 if (refreshState == REF_IDLE) {
1497 // remember when the refresh is due
1498 refreshDueAt = curTick();
1499
1500 // proceed to drain
1501 refreshState = REF_DRAIN;
1502
1503 DPRINTF(DRAM, "Refresh due\n");
1504 }
1505
1506 // let any scheduled read or write go ahead, after which it will
1507 // hand control back to this event loop
1508 if (refreshState == REF_DRAIN) {
1509 if (nextReqEvent.scheduled()) {
1510 // hand control over to the request loop until it is
1511 // evaluated next
1512 DPRINTF(DRAM, "Refresh awaiting draining\n");
1513
1514 return;
1515 } else {
1516 refreshState = REF_PRE;
1517 }
1518 }
1519
1520 // at this point, ensure that all banks are precharged
1521 if (refreshState == REF_PRE) {
1522 // precharge any active bank if we are not already in the idle
1523 // state
1524 if (pwrState != PWR_IDLE) {
1525 // at the moment, we use a precharge all even if there is
1526 // only a single bank open
1527 DPRINTF(DRAM, "Precharging all\n");
1528
1529 // first determine when we can precharge
1530 Tick pre_at = curTick();
1531 for (int i = 0; i < ranksPerChannel; i++) {
1532 for (int j = 0; j < banksPerRank; j++) {
1533 // respect both causality and any existing bank
1534 // constraints, some banks could already have a
1535 // (auto) precharge scheduled
1536 pre_at = std::max(banks[i][j].preAllowedAt, pre_at);
1537 }
1538 }
1539
1540 // make sure all banks are precharged, and for those that
1541 // already are, update their availability
1542 Tick act_allowed_at = pre_at + tRP;
1543
1544 for (int i = 0; i < ranksPerChannel; i++) {
1545 for (int j = 0; j < banksPerRank; j++) {
1546 if (banks[i][j].openRow != Bank::NO_ROW) {
1547 prechargeBank(banks[i][j], pre_at, false);
1548 } else {
1549 banks[i][j].actAllowedAt =
1550 std::max(banks[i][j].actAllowedAt, act_allowed_at);
1551 banks[i][j].preAllowedAt =
1552 std::max(banks[i][j].preAllowedAt, pre_at);
1553 }
1554 }
1555
1556 // at the moment this affects all ranks
1557 rankPower[i].powerlib.doCommand(MemCommand::PREA, 0,
1558 divCeil(pre_at, tCK) -
1559 timeStampOffset);
1560
1561 DPRINTF(DRAMPower, "%llu,PREA,0,%d\n", divCeil(pre_at, tCK) -
1562 timeStampOffset, i);
1563 }
1564 } else {
1565 DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
1566
1567 // go ahead and kick the power state machine into gear if
1568 // we are already idle
1569 schedulePowerEvent(PWR_REF, curTick());
1570 }
1571
1572 refreshState = REF_RUN;
1573 assert(numBanksActive == 0);
1574
1575 // wait for all banks to be precharged, at which point the
1576 // power state machine will transition to the idle state, and
1577 // automatically move to a refresh, at that point it will also
1578 // call this method to get the refresh event loop going again
1579 return;
1580 }
1581
1582 // last but not least we perform the actual refresh
1583 if (refreshState == REF_RUN) {
1584 // should never get here with any banks active
1585 assert(numBanksActive == 0);
1586 assert(pwrState == PWR_REF);
1587
1588 Tick ref_done_at = curTick() + tRFC;
1589
1590 for (int i = 0; i < ranksPerChannel; i++) {
1591 for (int j = 0; j < banksPerRank; j++) {
1592 banks[i][j].actAllowedAt = ref_done_at;
1593 }
1594
1595 // at the moment this affects all ranks
1596 rankPower[i].powerlib.doCommand(MemCommand::REF, 0,
1597 divCeil(curTick(), tCK) -
1598 timeStampOffset);
1599
1600 // at the moment sort the list of commands and update the counters
1601 // for DRAMPower libray when doing a refresh
1602 sort(rankPower[i].powerlib.cmdList.begin(),
1603 rankPower[i].powerlib.cmdList.end(), DRAMCtrl::sortTime);
1604
1605 // update the counters for DRAMPower, passing false to
1606 // indicate that this is not the last command in the
1607 // list. DRAMPower requires this information for the
1608 // correct calculation of the background energy at the end
1609 // of the simulation. Ideally we would want to call this
1610 // function with true once at the end of the
1611 // simulation. However, the discarded energy is extremly
1612 // small and does not effect the final results.
1613 rankPower[i].powerlib.updateCounters(false);
1614
1615 // call the energy function
1616 rankPower[i].powerlib.calcEnergy();
1617
1618 // Update the stats
1619 updatePowerStats(i);
1620
1621 DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), tCK) -
1622 timeStampOffset, i);
1623 }
1624
1625 // make sure we did not wait so long that we cannot make up
1626 // for it
1627 if (refreshDueAt + tREFI < ref_done_at) {
1628 fatal("Refresh was delayed so long we cannot catch up\n");
1629 }
1630
1631 // compensate for the delay in actually performing the refresh
1632 // when scheduling the next one
1633 schedule(refreshEvent, refreshDueAt + tREFI - tRP);
1634
1635 assert(!powerEvent.scheduled());
1636
1637 // move to the idle power state once the refresh is done, this
1638 // will also move the refresh state machine to the refresh
1639 // idle state
1640 schedulePowerEvent(PWR_IDLE, ref_done_at);
1641
1642 DPRINTF(DRAMState, "Refresh done at %llu and next refresh at %llu\n",
1643 ref_done_at, refreshDueAt + tREFI);
1644 }
1645}
1646
1647void
1648DRAMCtrl::schedulePowerEvent(PowerState pwr_state, Tick tick)
1649{
1650 // respect causality
1651 assert(tick >= curTick());
1652
1653 if (!powerEvent.scheduled()) {
1654 DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
1655 tick, pwr_state);
1656
1657 // insert the new transition
1658 pwrStateTrans = pwr_state;
1659
1660 schedule(powerEvent, tick);
1661 } else {
1662 panic("Scheduled power event at %llu to state %d, "
1663 "with scheduled event at %llu to %d\n", tick, pwr_state,
1664 powerEvent.when(), pwrStateTrans);
1665 }
1666}
1667
1668void
1669DRAMCtrl::processPowerEvent()
1670{
1671 // remember where we were, and for how long
1672 Tick duration = curTick() - pwrStateTick;
1673 PowerState prev_state = pwrState;
1674
1675 // update the accounting
1676 pwrStateTime[prev_state] += duration;
1677
1678 pwrState = pwrStateTrans;
1679 pwrStateTick = curTick();
1680
1681 if (pwrState == PWR_IDLE) {
1682 DPRINTF(DRAMState, "All banks precharged\n");
1683
1684 // if we were refreshing, make sure we start scheduling requests again
1685 if (prev_state == PWR_REF) {
1686 DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
1687 assert(pwrState == PWR_IDLE);
1688
1689 // kick things into action again
1690 refreshState = REF_IDLE;
1691 assert(!nextReqEvent.scheduled());
1692 schedule(nextReqEvent, curTick());
1693 } else {
1694 assert(prev_state == PWR_ACT);
1695
1696 // if we have a pending refresh, and are now moving to
1697 // the idle state, direclty transition to a refresh
1698 if (refreshState == REF_RUN) {
1699 // there should be nothing waiting at this point
1700 assert(!powerEvent.scheduled());
1701
1702 // update the state in zero time and proceed below
1703 pwrState = PWR_REF;
1704 }
1705 }
1706 }
1707
1708 // we transition to the refresh state, let the refresh state
1709 // machine know of this state update and let it deal with the
1710 // scheduling of the next power state transition as well as the
1711 // following refresh
1712 if (pwrState == PWR_REF) {
1713 DPRINTF(DRAMState, "Refreshing\n");
1714 // kick the refresh event loop into action again, and that
1715 // in turn will schedule a transition to the idle power
1716 // state once the refresh is done
1717 assert(refreshState == REF_RUN);
1718 processRefreshEvent();
1719 }
1720}
1721
1722void
1723DRAMCtrl::updatePowerStats(uint8_t rank)
1724{
1725 // Get the energy and power from DRAMPower
1726 Data::MemoryPowerModel::Energy energy =
1727 rankPower[rank].powerlib.getEnergy();
1728 Data::MemoryPowerModel::Power power =
1729 rankPower[rank].powerlib.getPower();
1730
1731 actEnergy[rank] = energy.act_energy * devicesPerRank;
1732 preEnergy[rank] = energy.pre_energy * devicesPerRank;
1733 readEnergy[rank] = energy.read_energy * devicesPerRank;
1734 writeEnergy[rank] = energy.write_energy * devicesPerRank;
1735 refreshEnergy[rank] = energy.ref_energy * devicesPerRank;
1736 actBackEnergy[rank] = energy.act_stdby_energy * devicesPerRank;
1737 preBackEnergy[rank] = energy.pre_stdby_energy * devicesPerRank;
1738 totalEnergy[rank] = energy.total_energy * devicesPerRank;
1739 averagePower[rank] = power.average_power * devicesPerRank;
1740}
1741
1742void
1743DRAMCtrl::regStats()
1744{
1745 using namespace Stats;
1746
1747 AbstractMemory::regStats();
1748
1749 readReqs
1750 .name(name() + ".readReqs")
1751 .desc("Number of read requests accepted");
1752
1753 writeReqs
1754 .name(name() + ".writeReqs")
1755 .desc("Number of write requests accepted");
1756
1757 readBursts
1758 .name(name() + ".readBursts")
1759 .desc("Number of DRAM read bursts, "
1760 "including those serviced by the write queue");
1761
1762 writeBursts
1763 .name(name() + ".writeBursts")
1764 .desc("Number of DRAM write bursts, "
1765 "including those merged in the write queue");
1766
1767 servicedByWrQ
1768 .name(name() + ".servicedByWrQ")
1769 .desc("Number of DRAM read bursts serviced by the write queue");
1770
1771 mergedWrBursts
1772 .name(name() + ".mergedWrBursts")
1773 .desc("Number of DRAM write bursts merged with an existing one");
1774
1775 neitherReadNorWrite
1776 .name(name() + ".neitherReadNorWriteReqs")
1777 .desc("Number of requests that are neither read nor write");
1778
1779 perBankRdBursts
1780 .init(banksPerRank * ranksPerChannel)
1781 .name(name() + ".perBankRdBursts")
1782 .desc("Per bank write bursts");
1783
1784 perBankWrBursts
1785 .init(banksPerRank * ranksPerChannel)
1786 .name(name() + ".perBankWrBursts")
1787 .desc("Per bank write bursts");
1788
1789 avgRdQLen
1790 .name(name() + ".avgRdQLen")
1791 .desc("Average read queue length when enqueuing")
1792 .precision(2);
1793
1794 avgWrQLen
1795 .name(name() + ".avgWrQLen")
1796 .desc("Average write queue length when enqueuing")
1797 .precision(2);
1798
1799 totQLat
1800 .name(name() + ".totQLat")
1801 .desc("Total ticks spent queuing");
1802
1803 totBusLat
1804 .name(name() + ".totBusLat")
1805 .desc("Total ticks spent in databus transfers");
1806
1807 totMemAccLat
1808 .name(name() + ".totMemAccLat")
1809 .desc("Total ticks spent from burst creation until serviced "
1810 "by the DRAM");
1811
1812 avgQLat
1813 .name(name() + ".avgQLat")
1814 .desc("Average queueing delay per DRAM burst")
1815 .precision(2);
1816
1817 avgQLat = totQLat / (readBursts - servicedByWrQ);
1818
1819 avgBusLat
1820 .name(name() + ".avgBusLat")
1821 .desc("Average bus latency per DRAM burst")
1822 .precision(2);
1823
1824 avgBusLat = totBusLat / (readBursts - servicedByWrQ);
1825
1826 avgMemAccLat
1827 .name(name() + ".avgMemAccLat")
1828 .desc("Average memory access latency per DRAM burst")
1829 .precision(2);
1830
1831 avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ);
1832
1833 numRdRetry
1834 .name(name() + ".numRdRetry")
1835 .desc("Number of times read queue was full causing retry");
1836
1837 numWrRetry
1838 .name(name() + ".numWrRetry")
1839 .desc("Number of times write queue was full causing retry");
1840
1841 readRowHits
1842 .name(name() + ".readRowHits")
1843 .desc("Number of row buffer hits during reads");
1844
1845 writeRowHits
1846 .name(name() + ".writeRowHits")
1847 .desc("Number of row buffer hits during writes");
1848
1849 readRowHitRate
1850 .name(name() + ".readRowHitRate")
1851 .desc("Row buffer hit rate for reads")
1852 .precision(2);
1853
1854 readRowHitRate = (readRowHits / (readBursts - servicedByWrQ)) * 100;
1855
1856 writeRowHitRate
1857 .name(name() + ".writeRowHitRate")
1858 .desc("Row buffer hit rate for writes")
1859 .precision(2);
1860
1861 writeRowHitRate = (writeRowHits / (writeBursts - mergedWrBursts)) * 100;
1862
1863 readPktSize
1864 .init(ceilLog2(burstSize) + 1)
1865 .name(name() + ".readPktSize")
1866 .desc("Read request sizes (log2)");
1867
1868 writePktSize
1869 .init(ceilLog2(burstSize) + 1)
1870 .name(name() + ".writePktSize")
1871 .desc("Write request sizes (log2)");
1872
1873 rdQLenPdf
1874 .init(readBufferSize)
1875 .name(name() + ".rdQLenPdf")
1876 .desc("What read queue length does an incoming req see");
1877
1878 wrQLenPdf
1879 .init(writeBufferSize)
1880 .name(name() + ".wrQLenPdf")
1881 .desc("What write queue length does an incoming req see");
1882
1883 bytesPerActivate
1884 .init(maxAccessesPerRow)
1885 .name(name() + ".bytesPerActivate")
1886 .desc("Bytes accessed per row activation")
1887 .flags(nozero);
1888
1889 rdPerTurnAround
1890 .init(readBufferSize)
1891 .name(name() + ".rdPerTurnAround")
1892 .desc("Reads before turning the bus around for writes")
1893 .flags(nozero);
1894
1895 wrPerTurnAround
1896 .init(writeBufferSize)
1897 .name(name() + ".wrPerTurnAround")
1898 .desc("Writes before turning the bus around for reads")
1899 .flags(nozero);
1900
1901 bytesReadDRAM
1902 .name(name() + ".bytesReadDRAM")
1903 .desc("Total number of bytes read from DRAM");
1904
1905 bytesReadWrQ
1906 .name(name() + ".bytesReadWrQ")
1907 .desc("Total number of bytes read from write queue");
1908
1909 bytesWritten
1910 .name(name() + ".bytesWritten")
1911 .desc("Total number of bytes written to DRAM");
1912
1913 bytesReadSys
1914 .name(name() + ".bytesReadSys")
1915 .desc("Total read bytes from the system interface side");
1916
1917 bytesWrittenSys
1918 .name(name() + ".bytesWrittenSys")
1919 .desc("Total written bytes from the system interface side");
1920
1921 avgRdBW
1922 .name(name() + ".avgRdBW")
1923 .desc("Average DRAM read bandwidth in MiByte/s")
1924 .precision(2);
1925
1926 avgRdBW = (bytesReadDRAM / 1000000) / simSeconds;
1927
1928 avgWrBW
1929 .name(name() + ".avgWrBW")
1930 .desc("Average achieved write bandwidth in MiByte/s")
1931 .precision(2);
1932
1933 avgWrBW = (bytesWritten / 1000000) / simSeconds;
1934
1935 avgRdBWSys
1936 .name(name() + ".avgRdBWSys")
1937 .desc("Average system read bandwidth in MiByte/s")
1938 .precision(2);
1939
1940 avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
1941
1942 avgWrBWSys
1943 .name(name() + ".avgWrBWSys")
1944 .desc("Average system write bandwidth in MiByte/s")
1945 .precision(2);
1946
1947 avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
1948
1949 peakBW
1950 .name(name() + ".peakBW")
1951 .desc("Theoretical peak bandwidth in MiByte/s")
1952 .precision(2);
1953
1954 peakBW = (SimClock::Frequency / tBURST) * burstSize / 1000000;
1955
1956 busUtil
1957 .name(name() + ".busUtil")
1958 .desc("Data bus utilization in percentage")
1959 .precision(2);
1960
1961 busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
1962
1963 totGap
1964 .name(name() + ".totGap")
1965 .desc("Total gap between requests");
1966
1967 avgGap
1968 .name(name() + ".avgGap")
1969 .desc("Average gap between requests")
1970 .precision(2);
1971
1972 avgGap = totGap / (readReqs + writeReqs);
1973
1974 // Stats for DRAM Power calculation based on Micron datasheet
1975 busUtilRead
1976 .name(name() + ".busUtilRead")
1977 .desc("Data bus utilization in percentage for reads")
1978 .precision(2);
1979
1980 busUtilRead = avgRdBW / peakBW * 100;
1981
1982 busUtilWrite
1983 .name(name() + ".busUtilWrite")
1984 .desc("Data bus utilization in percentage for writes")
1985 .precision(2);
1986
1987 busUtilWrite = avgWrBW / peakBW * 100;
1988
1989 pageHitRate
1990 .name(name() + ".pageHitRate")
1991 .desc("Row buffer hit rate, read and write combined")
1992 .precision(2);
1993
1994 pageHitRate = (writeRowHits + readRowHits) /
1995 (writeBursts - mergedWrBursts + readBursts - servicedByWrQ) * 100;
1996
1997 pwrStateTime
1998 .init(5)
1999 .name(name() + ".memoryStateTime")
2000 .desc("Time in different power states");
2001 pwrStateTime.subname(0, "IDLE");
2002 pwrStateTime.subname(1, "REF");
2003 pwrStateTime.subname(2, "PRE_PDN");
2004 pwrStateTime.subname(3, "ACT");
2005 pwrStateTime.subname(4, "ACT_PDN");
2006
2007 actEnergy
2008 .init(ranksPerChannel)
2009 .name(name() + ".actEnergy")
2010 .desc("Energy for activate commands per rank (pJ)");
2011
2012 preEnergy
2013 .init(ranksPerChannel)
2014 .name(name() + ".preEnergy")
2015 .desc("Energy for precharge commands per rank (pJ)");
2016
2017 readEnergy
2018 .init(ranksPerChannel)
2019 .name(name() + ".readEnergy")
2020 .desc("Energy for read commands per rank (pJ)");
2021
2022 writeEnergy
2023 .init(ranksPerChannel)
2024 .name(name() + ".writeEnergy")
2025 .desc("Energy for write commands per rank (pJ)");
2026
2027 refreshEnergy
2028 .init(ranksPerChannel)
2029 .name(name() + ".refreshEnergy")
2030 .desc("Energy for refresh commands per rank (pJ)");
2031
2032 actBackEnergy
2033 .init(ranksPerChannel)
2034 .name(name() + ".actBackEnergy")
2035 .desc("Energy for active background per rank (pJ)");
2036
2037 preBackEnergy
2038 .init(ranksPerChannel)
2039 .name(name() + ".preBackEnergy")
2040 .desc("Energy for precharge background per rank (pJ)");
2041
2042 totalEnergy
2043 .init(ranksPerChannel)
2044 .name(name() + ".totalEnergy")
2045 .desc("Total energy per rank (pJ)");
2046
2047 averagePower
2048 .init(ranksPerChannel)
2049 .name(name() + ".averagePower")
2050 .desc("Core power per rank (mW)");
2051}
2052
2053void
2054DRAMCtrl::recvFunctional(PacketPtr pkt)
2055{
2056 // rely on the abstract memory
2057 functionalAccess(pkt);
2058}
2059
2060BaseSlavePort&
2061DRAMCtrl::getSlavePort(const string &if_name, PortID idx)
2062{
2063 if (if_name != "port") {
2064 return MemObject::getSlavePort(if_name, idx);
2065 } else {
2066 return port;
2067 }
2068}
2069
2070unsigned int
2071DRAMCtrl::drain(DrainManager *dm)
2072{
2073 unsigned int count = port.drain(dm);
2074
2075 // if there is anything in any of our internal queues, keep track
2076 // of that as well
2077 if (!(writeQueue.empty() && readQueue.empty() &&
2078 respQueue.empty())) {
2079 DPRINTF(Drain, "DRAM controller not drained, write: %d, read: %d,"
2080 " resp: %d\n", writeQueue.size(), readQueue.size(),
2081 respQueue.size());
2082 ++count;
2083 drainManager = dm;
2084
2085 // the only part that is not drained automatically over time
2086 // is the write queue, thus kick things into action if needed
2087 if (!writeQueue.empty() && !nextReqEvent.scheduled()) {
2088 schedule(nextReqEvent, curTick());
2089 }
2090 }
2091
2092 if (count)
2093 setDrainState(Drainable::Draining);
2094 else
2095 setDrainState(Drainable::Drained);
2096 return count;
2097}
2098
2099DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _memory)
2100 : QueuedSlavePort(name, &_memory, queue), queue(_memory, *this),
2101 memory(_memory)
2102{ }
2103
2104AddrRangeList
2105DRAMCtrl::MemoryPort::getAddrRanges() const
2106{
2107 AddrRangeList ranges;
2108 ranges.push_back(memory.getAddrRange());
2109 return ranges;
2110}
2111
2112void
2113DRAMCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
2114{
2115 pkt->pushLabel(memory.name());
2116
2117 if (!queue.checkFunctional(pkt)) {
2118 // Default implementation of SimpleTimingPort::recvFunctional()
2119 // calls recvAtomic() and throws away the latency; we can save a
2120 // little here by just not calculating the latency.
2121 memory.recvFunctional(pkt);
2122 }
2123
2124 pkt->popLabel();
2125}
2126
2127Tick
2128DRAMCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
2129{
2130 return memory.recvAtomic(pkt);
2131}
2132
2133bool
2134DRAMCtrl::MemoryPort::recvTimingReq(PacketPtr pkt)
2135{
2136 // pass it to the memory controller
2137 return memory.recvTimingReq(pkt);
2138}
2139
2140DRAMCtrl*
2141DRAMCtrlParams::create()
2142{
2143 return new DRAMCtrl(this);
2144}
232 fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
233 "bank groups per rank (%d) is greater than 1\n",
234 tRRD_L, tRRD, bankGroupsPerRank);
235 }
236 }
237
238}
239
240void
241DRAMCtrl::init()
242{
243 AbstractMemory::init();
244
245 if (!port.isConnected()) {
246 fatal("DRAMCtrl %s is unconnected!\n", name());
247 } else {
248 port.sendRangeChange();
249 }
250}
251
252void
253DRAMCtrl::startup()
254{
255 // timestamp offset should be in clock cycles for DRAMPower
256 timeStampOffset = divCeil(curTick(), tCK);
257 // update the start tick for the precharge accounting to the
258 // current tick
259 pwrStateTick = curTick();
260
261 // shift the bus busy time sufficiently far ahead that we never
262 // have to worry about negative values when computing the time for
263 // the next request, this will add an insignificant bubble at the
264 // start of simulation
265 busBusyUntil = curTick() + tRP + tRCD + tCL;
266
267 // kick off the refresh, and give ourselves enough time to
268 // precharge
269 schedule(refreshEvent, curTick() + tREFI - tRP);
270}
271
272Tick
273DRAMCtrl::recvAtomic(PacketPtr pkt)
274{
275 DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr());
276
277 // do the actual memory access and turn the packet into a response
278 access(pkt);
279
280 Tick latency = 0;
281 if (!pkt->memInhibitAsserted() && pkt->hasData()) {
282 // this value is not supposed to be accurate, just enough to
283 // keep things going, mimic a closed page
284 latency = tRP + tRCD + tCL;
285 }
286 return latency;
287}
288
289bool
290DRAMCtrl::readQueueFull(unsigned int neededEntries) const
291{
292 DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
293 readBufferSize, readQueue.size() + respQueue.size(),
294 neededEntries);
295
296 return
297 (readQueue.size() + respQueue.size() + neededEntries) > readBufferSize;
298}
299
300bool
301DRAMCtrl::writeQueueFull(unsigned int neededEntries) const
302{
303 DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
304 writeBufferSize, writeQueue.size(), neededEntries);
305 return (writeQueue.size() + neededEntries) > writeBufferSize;
306}
307
308DRAMCtrl::DRAMPacket*
309DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
310 bool isRead)
311{
312 // decode the address based on the address mapping scheme, with
313 // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
314 // channel, respectively
315 uint8_t rank;
316 uint8_t bank;
317 // use a 64-bit unsigned during the computations as the row is
318 // always the top bits, and check before creating the DRAMPacket
319 uint64_t row;
320
321 // truncate the address to a DRAM burst, which makes it unique to
322 // a specific column, row, bank, rank and channel
323 Addr addr = dramPktAddr / burstSize;
324
325 // we have removed the lowest order address bits that denote the
326 // position within the column
327 if (addrMapping == Enums::RoRaBaChCo) {
328 // the lowest order bits denote the column to ensure that
329 // sequential cache lines occupy the same row
330 addr = addr / columnsPerRowBuffer;
331
332 // take out the channel part of the address
333 addr = addr / channels;
334
335 // after the channel bits, get the bank bits to interleave
336 // over the banks
337 bank = addr % banksPerRank;
338 addr = addr / banksPerRank;
339
340 // after the bank, we get the rank bits which thus interleaves
341 // over the ranks
342 rank = addr % ranksPerChannel;
343 addr = addr / ranksPerChannel;
344
345 // lastly, get the row bits
346 row = addr % rowsPerBank;
347 addr = addr / rowsPerBank;
348 } else if (addrMapping == Enums::RoRaBaCoCh) {
349 // take out the lower-order column bits
350 addr = addr / columnsPerStripe;
351
352 // take out the channel part of the address
353 addr = addr / channels;
354
355 // next, the higher-order column bites
356 addr = addr / (columnsPerRowBuffer / columnsPerStripe);
357
358 // after the column bits, we get the bank bits to interleave
359 // over the banks
360 bank = addr % banksPerRank;
361 addr = addr / banksPerRank;
362
363 // after the bank, we get the rank bits which thus interleaves
364 // over the ranks
365 rank = addr % ranksPerChannel;
366 addr = addr / ranksPerChannel;
367
368 // lastly, get the row bits
369 row = addr % rowsPerBank;
370 addr = addr / rowsPerBank;
371 } else if (addrMapping == Enums::RoCoRaBaCh) {
372 // optimise for closed page mode and utilise maximum
373 // parallelism of the DRAM (at the cost of power)
374
375 // take out the lower-order column bits
376 addr = addr / columnsPerStripe;
377
378 // take out the channel part of the address, not that this has
379 // to match with how accesses are interleaved between the
380 // controllers in the address mapping
381 addr = addr / channels;
382
383 // start with the bank bits, as this provides the maximum
384 // opportunity for parallelism between requests
385 bank = addr % banksPerRank;
386 addr = addr / banksPerRank;
387
388 // next get the rank bits
389 rank = addr % ranksPerChannel;
390 addr = addr / ranksPerChannel;
391
392 // next, the higher-order column bites
393 addr = addr / (columnsPerRowBuffer / columnsPerStripe);
394
395 // lastly, get the row bits
396 row = addr % rowsPerBank;
397 addr = addr / rowsPerBank;
398 } else
399 panic("Unknown address mapping policy chosen!");
400
401 assert(rank < ranksPerChannel);
402 assert(bank < banksPerRank);
403 assert(row < rowsPerBank);
404 assert(row < Bank::NO_ROW);
405
406 DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
407 dramPktAddr, rank, bank, row);
408
409 // create the corresponding DRAM packet with the entry time and
410 // ready time set to the current tick, the latter will be updated
411 // later
412 uint16_t bank_id = banksPerRank * rank + bank;
413 return new DRAMPacket(pkt, isRead, rank, bank, row, bank_id, dramPktAddr,
414 size, banks[rank][bank]);
415}
416
417void
418DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
419{
420 // only add to the read queue here. whenever the request is
421 // eventually done, set the readyTime, and call schedule()
422 assert(!pkt->isWrite());
423
424 assert(pktCount != 0);
425
426 // if the request size is larger than burst size, the pkt is split into
427 // multiple DRAM packets
428 // Note if the pkt starting address is not aligened to burst size, the
429 // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
430 // are aligned to burst size boundaries. This is to ensure we accurately
431 // check read packets against packets in write queue.
432 Addr addr = pkt->getAddr();
433 unsigned pktsServicedByWrQ = 0;
434 BurstHelper* burst_helper = NULL;
435 for (int cnt = 0; cnt < pktCount; ++cnt) {
436 unsigned size = std::min((addr | (burstSize - 1)) + 1,
437 pkt->getAddr() + pkt->getSize()) - addr;
438 readPktSize[ceilLog2(size)]++;
439 readBursts++;
440
441 // First check write buffer to see if the data is already at
442 // the controller
443 bool foundInWrQ = false;
444 for (auto i = writeQueue.begin(); i != writeQueue.end(); ++i) {
445 // check if the read is subsumed in the write entry we are
446 // looking at
447 if ((*i)->addr <= addr &&
448 (addr + size) <= ((*i)->addr + (*i)->size)) {
449 foundInWrQ = true;
450 servicedByWrQ++;
451 pktsServicedByWrQ++;
452 DPRINTF(DRAM, "Read to addr %lld with size %d serviced by "
453 "write queue\n", addr, size);
454 bytesReadWrQ += burstSize;
455 break;
456 }
457 }
458
459 // If not found in the write q, make a DRAM packet and
460 // push it onto the read queue
461 if (!foundInWrQ) {
462
463 // Make the burst helper for split packets
464 if (pktCount > 1 && burst_helper == NULL) {
465 DPRINTF(DRAM, "Read to addr %lld translates to %d "
466 "dram requests\n", pkt->getAddr(), pktCount);
467 burst_helper = new BurstHelper(pktCount);
468 }
469
470 DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size, true);
471 dram_pkt->burstHelper = burst_helper;
472
473 assert(!readQueueFull(1));
474 rdQLenPdf[readQueue.size() + respQueue.size()]++;
475
476 DPRINTF(DRAM, "Adding to read queue\n");
477
478 readQueue.push_back(dram_pkt);
479
480 // Update stats
481 avgRdQLen = readQueue.size() + respQueue.size();
482 }
483
484 // Starting address of next dram pkt (aligend to burstSize boundary)
485 addr = (addr | (burstSize - 1)) + 1;
486 }
487
488 // If all packets are serviced by write queue, we send the repsonse back
489 if (pktsServicedByWrQ == pktCount) {
490 accessAndRespond(pkt, frontendLatency);
491 return;
492 }
493
494 // Update how many split packets are serviced by write queue
495 if (burst_helper != NULL)
496 burst_helper->burstsServiced = pktsServicedByWrQ;
497
498 // If we are not already scheduled to get a request out of the
499 // queue, do so now
500 if (!nextReqEvent.scheduled()) {
501 DPRINTF(DRAM, "Request scheduled immediately\n");
502 schedule(nextReqEvent, curTick());
503 }
504}
505
506void
507DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
508{
509 // only add to the write queue here. whenever the request is
510 // eventually done, set the readyTime, and call schedule()
511 assert(pkt->isWrite());
512
513 // if the request size is larger than burst size, the pkt is split into
514 // multiple DRAM packets
515 Addr addr = pkt->getAddr();
516 for (int cnt = 0; cnt < pktCount; ++cnt) {
517 unsigned size = std::min((addr | (burstSize - 1)) + 1,
518 pkt->getAddr() + pkt->getSize()) - addr;
519 writePktSize[ceilLog2(size)]++;
520 writeBursts++;
521
522 // see if we can merge with an existing item in the write
523 // queue and keep track of whether we have merged or not so we
524 // can stop at that point and also avoid enqueueing a new
525 // request
526 bool merged = false;
527 auto w = writeQueue.begin();
528
529 while(!merged && w != writeQueue.end()) {
530 // either of the two could be first, if they are the same
531 // it does not matter which way we go
532 if ((*w)->addr >= addr) {
533 // the existing one starts after the new one, figure
534 // out where the new one ends with respect to the
535 // existing one
536 if ((addr + size) >= ((*w)->addr + (*w)->size)) {
537 // check if the existing one is completely
538 // subsumed in the new one
539 DPRINTF(DRAM, "Merging write covering existing burst\n");
540 merged = true;
541 // update both the address and the size
542 (*w)->addr = addr;
543 (*w)->size = size;
544 } else if ((addr + size) >= (*w)->addr &&
545 ((*w)->addr + (*w)->size - addr) <= burstSize) {
546 // the new one is just before or partially
547 // overlapping with the existing one, and together
548 // they fit within a burst
549 DPRINTF(DRAM, "Merging write before existing burst\n");
550 merged = true;
551 // the existing queue item needs to be adjusted with
552 // respect to both address and size
553 (*w)->size = (*w)->addr + (*w)->size - addr;
554 (*w)->addr = addr;
555 }
556 } else {
557 // the new one starts after the current one, figure
558 // out where the existing one ends with respect to the
559 // new one
560 if (((*w)->addr + (*w)->size) >= (addr + size)) {
561 // check if the new one is completely subsumed in the
562 // existing one
563 DPRINTF(DRAM, "Merging write into existing burst\n");
564 merged = true;
565 // no adjustments necessary
566 } else if (((*w)->addr + (*w)->size) >= addr &&
567 (addr + size - (*w)->addr) <= burstSize) {
568 // the existing one is just before or partially
569 // overlapping with the new one, and together
570 // they fit within a burst
571 DPRINTF(DRAM, "Merging write after existing burst\n");
572 merged = true;
573 // the address is right, and only the size has
574 // to be adjusted
575 (*w)->size = addr + size - (*w)->addr;
576 }
577 }
578 ++w;
579 }
580
581 // if the item was not merged we need to create a new write
582 // and enqueue it
583 if (!merged) {
584 DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size, false);
585
586 assert(writeQueue.size() < writeBufferSize);
587 wrQLenPdf[writeQueue.size()]++;
588
589 DPRINTF(DRAM, "Adding to write queue\n");
590
591 writeQueue.push_back(dram_pkt);
592
593 // Update stats
594 avgWrQLen = writeQueue.size();
595 } else {
596 // keep track of the fact that this burst effectively
597 // disappeared as it was merged with an existing one
598 mergedWrBursts++;
599 }
600
601 // Starting address of next dram pkt (aligend to burstSize boundary)
602 addr = (addr | (burstSize - 1)) + 1;
603 }
604
605 // we do not wait for the writes to be send to the actual memory,
606 // but instead take responsibility for the consistency here and
607 // snoop the write queue for any upcoming reads
608 // @todo, if a pkt size is larger than burst size, we might need a
609 // different front end latency
610 accessAndRespond(pkt, frontendLatency);
611
612 // If we are not already scheduled to get a request out of the
613 // queue, do so now
614 if (!nextReqEvent.scheduled()) {
615 DPRINTF(DRAM, "Request scheduled immediately\n");
616 schedule(nextReqEvent, curTick());
617 }
618}
619
620void
621DRAMCtrl::printQs() const {
622 DPRINTF(DRAM, "===READ QUEUE===\n\n");
623 for (auto i = readQueue.begin() ; i != readQueue.end() ; ++i) {
624 DPRINTF(DRAM, "Read %lu\n", (*i)->addr);
625 }
626 DPRINTF(DRAM, "\n===RESP QUEUE===\n\n");
627 for (auto i = respQueue.begin() ; i != respQueue.end() ; ++i) {
628 DPRINTF(DRAM, "Response %lu\n", (*i)->addr);
629 }
630 DPRINTF(DRAM, "\n===WRITE QUEUE===\n\n");
631 for (auto i = writeQueue.begin() ; i != writeQueue.end() ; ++i) {
632 DPRINTF(DRAM, "Write %lu\n", (*i)->addr);
633 }
634}
635
636bool
637DRAMCtrl::recvTimingReq(PacketPtr pkt)
638{
639 /// @todo temporary hack to deal with memory corruption issues until
640 /// 4-phase transactions are complete
641 for (int x = 0; x < pendingDelete.size(); x++)
642 delete pendingDelete[x];
643 pendingDelete.clear();
644
645 // This is where we enter from the outside world
646 DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
647 pkt->cmdString(), pkt->getAddr(), pkt->getSize());
648
649 // simply drop inhibited packets for now
650 if (pkt->memInhibitAsserted()) {
651 DPRINTF(DRAM, "Inhibited packet -- Dropping it now\n");
652 pendingDelete.push_back(pkt);
653 return true;
654 }
655
656 // Calc avg gap between requests
657 if (prevArrival != 0) {
658 totGap += curTick() - prevArrival;
659 }
660 prevArrival = curTick();
661
662
663 // Find out how many dram packets a pkt translates to
664 // If the burst size is equal or larger than the pkt size, then a pkt
665 // translates to only one dram packet. Otherwise, a pkt translates to
666 // multiple dram packets
667 unsigned size = pkt->getSize();
668 unsigned offset = pkt->getAddr() & (burstSize - 1);
669 unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
670
671 // check local buffers and do not accept if full
672 if (pkt->isRead()) {
673 assert(size != 0);
674 if (readQueueFull(dram_pkt_count)) {
675 DPRINTF(DRAM, "Read queue full, not accepting\n");
676 // remember that we have to retry this port
677 retryRdReq = true;
678 numRdRetry++;
679 return false;
680 } else {
681 addToReadQueue(pkt, dram_pkt_count);
682 readReqs++;
683 bytesReadSys += size;
684 }
685 } else if (pkt->isWrite()) {
686 assert(size != 0);
687 if (writeQueueFull(dram_pkt_count)) {
688 DPRINTF(DRAM, "Write queue full, not accepting\n");
689 // remember that we have to retry this port
690 retryWrReq = true;
691 numWrRetry++;
692 return false;
693 } else {
694 addToWriteQueue(pkt, dram_pkt_count);
695 writeReqs++;
696 bytesWrittenSys += size;
697 }
698 } else {
699 DPRINTF(DRAM,"Neither read nor write, ignore timing\n");
700 neitherReadNorWrite++;
701 accessAndRespond(pkt, 1);
702 }
703
704 return true;
705}
706
707void
708DRAMCtrl::processRespondEvent()
709{
710 DPRINTF(DRAM,
711 "processRespondEvent(): Some req has reached its readyTime\n");
712
713 DRAMPacket* dram_pkt = respQueue.front();
714
715 if (dram_pkt->burstHelper) {
716 // it is a split packet
717 dram_pkt->burstHelper->burstsServiced++;
718 if (dram_pkt->burstHelper->burstsServiced ==
719 dram_pkt->burstHelper->burstCount) {
720 // we have now serviced all children packets of a system packet
721 // so we can now respond to the requester
722 // @todo we probably want to have a different front end and back
723 // end latency for split packets
724 accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
725 delete dram_pkt->burstHelper;
726 dram_pkt->burstHelper = NULL;
727 }
728 } else {
729 // it is not a split packet
730 accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
731 }
732
733 delete respQueue.front();
734 respQueue.pop_front();
735
736 if (!respQueue.empty()) {
737 assert(respQueue.front()->readyTime >= curTick());
738 assert(!respondEvent.scheduled());
739 schedule(respondEvent, respQueue.front()->readyTime);
740 } else {
741 // if there is nothing left in any queue, signal a drain
742 if (writeQueue.empty() && readQueue.empty() &&
743 drainManager) {
744 DPRINTF(Drain, "DRAM controller done draining\n");
745 drainManager->signalDrainDone();
746 drainManager = NULL;
747 }
748 }
749
750 // We have made a location in the queue available at this point,
751 // so if there is a read that was forced to wait, retry now
752 if (retryRdReq) {
753 retryRdReq = false;
754 port.sendRetry();
755 }
756}
757
758void
759DRAMCtrl::chooseNext(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
760{
761 // This method does the arbitration between requests. The chosen
762 // packet is simply moved to the head of the queue. The other
763 // methods know that this is the place to look. For example, with
764 // FCFS, this method does nothing
765 assert(!queue.empty());
766
767 if (queue.size() == 1) {
768 DPRINTF(DRAM, "Single request, nothing to do\n");
769 return;
770 }
771
772 if (memSchedPolicy == Enums::fcfs) {
773 // Do nothing, since the correct request is already head
774 } else if (memSchedPolicy == Enums::frfcfs) {
775 reorderQueue(queue, switched_cmd_type);
776 } else
777 panic("No scheduling policy chosen\n");
778}
779
780void
781DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
782{
783 // Only determine this when needed
784 uint64_t earliest_banks = 0;
785
786 // Search for row hits first, if no row hit is found then schedule the
787 // packet to one of the earliest banks available
788 bool found_earliest_pkt = false;
789 bool found_prepped_diff_rank_pkt = false;
790 auto selected_pkt_it = queue.begin();
791
792 for (auto i = queue.begin(); i != queue.end() ; ++i) {
793 DRAMPacket* dram_pkt = *i;
794 const Bank& bank = dram_pkt->bankRef;
795 // Check if it is a row hit
796 if (bank.openRow == dram_pkt->row) {
797 if (dram_pkt->rank == activeRank || switched_cmd_type) {
798 // FCFS within the hits, giving priority to commands
799 // that access the same rank as the previous burst
800 // to minimize bus turnaround delays
801 // Only give rank prioity when command type is not changing
802 DPRINTF(DRAM, "Row buffer hit\n");
803 selected_pkt_it = i;
804 break;
805 } else if (!found_prepped_diff_rank_pkt) {
806 // found row hit for command on different rank than prev burst
807 selected_pkt_it = i;
808 found_prepped_diff_rank_pkt = true;
809 }
810 } else if (!found_earliest_pkt & !found_prepped_diff_rank_pkt) {
811 // No row hit and
812 // haven't found an entry with a row hit to a new rank
813 if (earliest_banks == 0)
814 // Determine entries with earliest bank prep delay
815 // Function will give priority to commands that access the
816 // same rank as previous burst and can prep the bank seamlessly
817 earliest_banks = minBankPrep(queue, switched_cmd_type);
818
819 // FCFS - Bank is first available bank
820 if (bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) {
821 // Remember the packet to be scheduled to one of the earliest
822 // banks available, FCFS amongst the earliest banks
823 selected_pkt_it = i;
824 found_earliest_pkt = true;
825 }
826 }
827 }
828
829 DRAMPacket* selected_pkt = *selected_pkt_it;
830 queue.erase(selected_pkt_it);
831 queue.push_front(selected_pkt);
832}
833
834void
835DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
836{
837 DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
838
839 bool needsResponse = pkt->needsResponse();
840 // do the actual memory access which also turns the packet into a
841 // response
842 access(pkt);
843
844 // turn packet around to go back to requester if response expected
845 if (needsResponse) {
846 // access already turned the packet into a response
847 assert(pkt->isResponse());
848
849 // @todo someone should pay for this
850 pkt->firstWordDelay = pkt->lastWordDelay = 0;
851
852 // queue the packet in the response queue to be sent out after
853 // the static latency has passed
854 port.schedTimingResp(pkt, curTick() + static_latency);
855 } else {
856 // @todo the packet is going to be deleted, and the DRAMPacket
857 // is still having a pointer to it
858 pendingDelete.push_back(pkt);
859 }
860
861 DPRINTF(DRAM, "Done\n");
862
863 return;
864}
865
866void
867DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row)
868{
869 // get the rank index from the bank
870 uint8_t rank = bank.rank;
871
872 assert(actTicks[rank].size() == activationLimit);
873
874 DPRINTF(DRAM, "Activate at tick %d\n", act_tick);
875
876 // update the open row
877 assert(bank.openRow == Bank::NO_ROW);
878 bank.openRow = row;
879
880 // start counting anew, this covers both the case when we
881 // auto-precharged, and when this access is forced to
882 // precharge
883 bank.bytesAccessed = 0;
884 bank.rowAccesses = 0;
885
886 ++numBanksActive;
887 assert(numBanksActive <= banksPerRank * ranksPerChannel);
888
889 DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got %d active\n",
890 bank.bank, bank.rank, act_tick, numBanksActive);
891
892 rankPower[bank.rank].powerlib.doCommand(MemCommand::ACT, bank.bank,
893 divCeil(act_tick, tCK) -
894 timeStampOffset);
895
896 DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_tick, tCK) -
897 timeStampOffset, bank.bank, bank.rank);
898
899 // The next access has to respect tRAS for this bank
900 bank.preAllowedAt = act_tick + tRAS;
901
902 // Respect the row-to-column command delay
903 bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt);
904
905 // start by enforcing tRRD
906 for(int i = 0; i < banksPerRank; i++) {
907 // next activate to any bank in this rank must not happen
908 // before tRRD
909 if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) {
910 // bank group architecture requires longer delays between
911 // ACT commands within the same bank group. Use tRRD_L
912 // in this case
913 banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L,
914 banks[rank][i].actAllowedAt);
915 } else {
916 // use shorter tRRD value when either
917 // 1) bank group architecture is not supportted
918 // 2) bank is in a different bank group
919 banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
920 banks[rank][i].actAllowedAt);
921 }
922 }
923
924 // next, we deal with tXAW, if the activation limit is disabled
925 // then we directly schedule an activate power event
926 if (!actTicks[rank].empty()) {
927 // sanity check
928 if (actTicks[rank].back() &&
929 (act_tick - actTicks[rank].back()) < tXAW) {
930 panic("Got %d activates in window %d (%llu - %llu) which "
931 "is smaller than %llu\n", activationLimit, act_tick -
932 actTicks[rank].back(), act_tick, actTicks[rank].back(),
933 tXAW);
934 }
935
936 // shift the times used for the book keeping, the last element
937 // (highest index) is the oldest one and hence the lowest value
938 actTicks[rank].pop_back();
939
940 // record an new activation (in the future)
941 actTicks[rank].push_front(act_tick);
942
943 // cannot activate more than X times in time window tXAW, push the
944 // next one (the X + 1'st activate) to be tXAW away from the
945 // oldest in our window of X
946 if (actTicks[rank].back() &&
947 (act_tick - actTicks[rank].back()) < tXAW) {
948 DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
949 "no earlier than %llu\n", activationLimit,
950 actTicks[rank].back() + tXAW);
951 for(int j = 0; j < banksPerRank; j++)
952 // next activate must not happen before end of window
953 banks[rank][j].actAllowedAt =
954 std::max(actTicks[rank].back() + tXAW,
955 banks[rank][j].actAllowedAt);
956 }
957 }
958
959 // at the point when this activate takes place, make sure we
960 // transition to the active power state
961 if (!activateEvent.scheduled())
962 schedule(activateEvent, act_tick);
963 else if (activateEvent.when() > act_tick)
964 // move it sooner in time
965 reschedule(activateEvent, act_tick);
966}
967
968void
969DRAMCtrl::processActivateEvent()
970{
971 // we should transition to the active state as soon as any bank is active
972 if (pwrState != PWR_ACT)
973 // note that at this point numBanksActive could be back at
974 // zero again due to a precharge scheduled in the future
975 schedulePowerEvent(PWR_ACT, curTick());
976}
977
978void
979DRAMCtrl::prechargeBank(Bank& bank, Tick pre_at, bool trace)
980{
981 // make sure the bank has an open row
982 assert(bank.openRow != Bank::NO_ROW);
983
984 // sample the bytes per activate here since we are closing
985 // the page
986 bytesPerActivate.sample(bank.bytesAccessed);
987
988 bank.openRow = Bank::NO_ROW;
989
990 // no precharge allowed before this one
991 bank.preAllowedAt = pre_at;
992
993 Tick pre_done_at = pre_at + tRP;
994
995 bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
996
997 assert(numBanksActive != 0);
998 --numBanksActive;
999
1000 DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
1001 "%d active\n", bank.bank, bank.rank, pre_at, numBanksActive);
1002
1003 if (trace) {
1004
1005 rankPower[bank.rank].powerlib.doCommand(MemCommand::PRE, bank.bank,
1006 divCeil(pre_at, tCK) -
1007 timeStampOffset);
1008 DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
1009 timeStampOffset, bank.bank, bank.rank);
1010 }
1011 // if we look at the current number of active banks we might be
1012 // tempted to think the DRAM is now idle, however this can be
1013 // undone by an activate that is scheduled to happen before we
1014 // would have reached the idle state, so schedule an event and
1015 // rather check once we actually make it to the point in time when
1016 // the (last) precharge takes place
1017 if (!prechargeEvent.scheduled())
1018 schedule(prechargeEvent, pre_done_at);
1019 else if (prechargeEvent.when() < pre_done_at)
1020 reschedule(prechargeEvent, pre_done_at);
1021}
1022
1023void
1024DRAMCtrl::processPrechargeEvent()
1025{
1026 // if we reached zero, then special conditions apply as we track
1027 // if all banks are precharged for the power models
1028 if (numBanksActive == 0) {
1029 // we should transition to the idle state when the last bank
1030 // is precharged
1031 schedulePowerEvent(PWR_IDLE, curTick());
1032 }
1033}
1034
1035void
1036DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
1037{
1038 DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
1039 dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row);
1040
1041 // get the bank
1042 Bank& bank = dram_pkt->bankRef;
1043
1044 // for the state we need to track if it is a row hit or not
1045 bool row_hit = true;
1046
1047 // respect any constraints on the command (e.g. tRCD or tCCD)
1048 Tick cmd_at = std::max(bank.colAllowedAt, curTick());
1049
1050 // Determine the access latency and update the bank state
1051 if (bank.openRow == dram_pkt->row) {
1052 // nothing to do
1053 } else {
1054 row_hit = false;
1055
1056 // If there is a page open, precharge it.
1057 if (bank.openRow != Bank::NO_ROW) {
1058 prechargeBank(bank, std::max(bank.preAllowedAt, curTick()));
1059 }
1060
1061 // next we need to account for the delay in activating the
1062 // page
1063 Tick act_tick = std::max(bank.actAllowedAt, curTick());
1064
1065 // Record the activation and deal with all the global timing
1066 // constraints caused be a new activation (tRRD and tXAW)
1067 activateBank(bank, act_tick, dram_pkt->row);
1068
1069 // issue the command as early as possible
1070 cmd_at = bank.colAllowedAt;
1071 }
1072
1073 // we need to wait until the bus is available before we can issue
1074 // the command
1075 cmd_at = std::max(cmd_at, busBusyUntil - tCL);
1076
1077 // update the packet ready time
1078 dram_pkt->readyTime = cmd_at + tCL + tBURST;
1079
1080 // only one burst can use the bus at any one point in time
1081 assert(dram_pkt->readyTime - busBusyUntil >= tBURST);
1082
1083 // update the time for the next read/write burst for each
1084 // bank (add a max with tCCD/tCCD_L here)
1085 Tick cmd_dly;
1086 for(int j = 0; j < ranksPerChannel; j++) {
1087 for(int i = 0; i < banksPerRank; i++) {
1088 // next burst to same bank group in this rank must not happen
1089 // before tCCD_L. Different bank group timing requirement is
1090 // tBURST; Add tCS for different ranks
1091 if (dram_pkt->rank == j) {
1092 if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) {
1093 // bank group architecture requires longer delays between
1094 // RD/WR burst commands to the same bank group.
1095 // Use tCCD_L in this case
1096 cmd_dly = tCCD_L;
1097 } else {
1098 // use tBURST (equivalent to tCCD_S), the shorter
1099 // cas-to-cas delay value, when either:
1100 // 1) bank group architecture is not supportted
1101 // 2) bank is in a different bank group
1102 cmd_dly = tBURST;
1103 }
1104 } else {
1105 // different rank is by default in a different bank group
1106 // use tBURST (equivalent to tCCD_S), which is the shorter
1107 // cas-to-cas delay in this case
1108 // Add tCS to account for rank-to-rank bus delay requirements
1109 cmd_dly = tBURST + tCS;
1110 }
1111 banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly,
1112 banks[j][i].colAllowedAt);
1113 }
1114 }
1115
1116 // Save rank of current access
1117 activeRank = dram_pkt->rank;
1118
1119 // If this is a write, we also need to respect the write recovery
1120 // time before a precharge, in the case of a read, respect the
1121 // read to precharge constraint
1122 bank.preAllowedAt = std::max(bank.preAllowedAt,
1123 dram_pkt->isRead ? cmd_at + tRTP :
1124 dram_pkt->readyTime + tWR);
1125
1126 // increment the bytes accessed and the accesses per row
1127 bank.bytesAccessed += burstSize;
1128 ++bank.rowAccesses;
1129
1130 // if we reached the max, then issue with an auto-precharge
1131 bool auto_precharge = pageMgmt == Enums::close ||
1132 bank.rowAccesses == maxAccessesPerRow;
1133
1134 // if we did not hit the limit, we might still want to
1135 // auto-precharge
1136 if (!auto_precharge &&
1137 (pageMgmt == Enums::open_adaptive ||
1138 pageMgmt == Enums::close_adaptive)) {
1139 // a twist on the open and close page policies:
1140 // 1) open_adaptive page policy does not blindly keep the
1141 // page open, but close it if there are no row hits, and there
1142 // are bank conflicts in the queue
1143 // 2) close_adaptive page policy does not blindly close the
1144 // page, but closes it only if there are no row hits in the queue.
1145 // In this case, only force an auto precharge when there
1146 // are no same page hits in the queue
1147 bool got_more_hits = false;
1148 bool got_bank_conflict = false;
1149
1150 // either look at the read queue or write queue
1151 const deque<DRAMPacket*>& queue = dram_pkt->isRead ? readQueue :
1152 writeQueue;
1153 auto p = queue.begin();
1154 // make sure we are not considering the packet that we are
1155 // currently dealing with (which is the head of the queue)
1156 ++p;
1157
1158 // keep on looking until we have found required condition or
1159 // reached the end
1160 while (!(got_more_hits &&
1161 (got_bank_conflict || pageMgmt == Enums::close_adaptive)) &&
1162 p != queue.end()) {
1163 bool same_rank_bank = (dram_pkt->rank == (*p)->rank) &&
1164 (dram_pkt->bank == (*p)->bank);
1165 bool same_row = dram_pkt->row == (*p)->row;
1166 got_more_hits |= same_rank_bank && same_row;
1167 got_bank_conflict |= same_rank_bank && !same_row;
1168 ++p;
1169 }
1170
1171 // auto pre-charge when either
1172 // 1) open_adaptive policy, we have not got any more hits, and
1173 // have a bank conflict
1174 // 2) close_adaptive policy and we have not got any more hits
1175 auto_precharge = !got_more_hits &&
1176 (got_bank_conflict || pageMgmt == Enums::close_adaptive);
1177 }
1178
1179 // DRAMPower trace command to be written
1180 std::string mem_cmd = dram_pkt->isRead ? "RD" : "WR";
1181
1182 // MemCommand required for DRAMPower library
1183 MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
1184 MemCommand::WR;
1185
1186 // if this access should use auto-precharge, then we are
1187 // closing the row
1188 if (auto_precharge) {
1189 // if auto-precharge push a PRE command at the correct tick to the
1190 // list used by DRAMPower library to calculate power
1191 prechargeBank(bank, std::max(curTick(), bank.preAllowedAt));
1192
1193 DPRINTF(DRAM, "Auto-precharged bank: %d\n", dram_pkt->bankId);
1194 }
1195
1196 // Update bus state
1197 busBusyUntil = dram_pkt->readyTime;
1198
1199 DPRINTF(DRAM, "Access to %lld, ready at %lld bus busy until %lld.\n",
1200 dram_pkt->addr, dram_pkt->readyTime, busBusyUntil);
1201
1202 rankPower[dram_pkt->rank].powerlib.doCommand(command, dram_pkt->bank,
1203 divCeil(cmd_at, tCK) -
1204 timeStampOffset);
1205
1206 DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
1207 timeStampOffset, mem_cmd, dram_pkt->bank, dram_pkt->rank);
1208
1209 // Update the minimum timing between the requests, this is a
1210 // conservative estimate of when we have to schedule the next
1211 // request to not introduce any unecessary bubbles. In most cases
1212 // we will wake up sooner than we have to.
1213 nextReqTime = busBusyUntil - (tRP + tRCD + tCL);
1214
1215 // Update the stats and schedule the next request
1216 if (dram_pkt->isRead) {
1217 ++readsThisTime;
1218 if (row_hit)
1219 readRowHits++;
1220 bytesReadDRAM += burstSize;
1221 perBankRdBursts[dram_pkt->bankId]++;
1222
1223 // Update latency stats
1224 totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
1225 totBusLat += tBURST;
1226 totQLat += cmd_at - dram_pkt->entryTime;
1227 } else {
1228 ++writesThisTime;
1229 if (row_hit)
1230 writeRowHits++;
1231 bytesWritten += burstSize;
1232 perBankWrBursts[dram_pkt->bankId]++;
1233 }
1234}
1235
1236void
1237DRAMCtrl::processNextReqEvent()
1238{
1239 // pre-emptively set to false. Overwrite if in READ_TO_WRITE
1240 // or WRITE_TO_READ state
1241 bool switched_cmd_type = false;
1242 if (busState == READ_TO_WRITE) {
1243 DPRINTF(DRAM, "Switching to writes after %d reads with %d reads "
1244 "waiting\n", readsThisTime, readQueue.size());
1245
1246 // sample and reset the read-related stats as we are now
1247 // transitioning to writes, and all reads are done
1248 rdPerTurnAround.sample(readsThisTime);
1249 readsThisTime = 0;
1250
1251 // now proceed to do the actual writes
1252 busState = WRITE;
1253 switched_cmd_type = true;
1254 } else if (busState == WRITE_TO_READ) {
1255 DPRINTF(DRAM, "Switching to reads after %d writes with %d writes "
1256 "waiting\n", writesThisTime, writeQueue.size());
1257
1258 wrPerTurnAround.sample(writesThisTime);
1259 writesThisTime = 0;
1260
1261 busState = READ;
1262 switched_cmd_type = true;
1263 }
1264
1265 if (refreshState != REF_IDLE) {
1266 // if a refresh waiting for this event loop to finish, then hand
1267 // over now, and do not schedule a new nextReqEvent
1268 if (refreshState == REF_DRAIN) {
1269 DPRINTF(DRAM, "Refresh drain done, now precharging\n");
1270
1271 refreshState = REF_PRE;
1272
1273 // hand control back to the refresh event loop
1274 schedule(refreshEvent, curTick());
1275 }
1276
1277 // let the refresh finish before issuing any further requests
1278 return;
1279 }
1280
1281 // when we get here it is either a read or a write
1282 if (busState == READ) {
1283
1284 // track if we should switch or not
1285 bool switch_to_writes = false;
1286
1287 if (readQueue.empty()) {
1288 // In the case there is no read request to go next,
1289 // trigger writes if we have passed the low threshold (or
1290 // if we are draining)
1291 if (!writeQueue.empty() &&
1292 (drainManager || writeQueue.size() > writeLowThreshold)) {
1293
1294 switch_to_writes = true;
1295 } else {
1296 // check if we are drained
1297 if (respQueue.empty () && drainManager) {
1298 DPRINTF(Drain, "DRAM controller done draining\n");
1299 drainManager->signalDrainDone();
1300 drainManager = NULL;
1301 }
1302
1303 // nothing to do, not even any point in scheduling an
1304 // event for the next request
1305 return;
1306 }
1307 } else {
1308 // Figure out which read request goes next, and move it to the
1309 // front of the read queue
1310 chooseNext(readQueue, switched_cmd_type);
1311
1312 DRAMPacket* dram_pkt = readQueue.front();
1313
1314 // here we get a bit creative and shift the bus busy time not
1315 // just the tWTR, but also a CAS latency to capture the fact
1316 // that we are allowed to prepare a new bank, but not issue a
1317 // read command until after tWTR, in essence we capture a
1318 // bubble on the data bus that is tWTR + tCL
1319 if (switched_cmd_type && dram_pkt->rank == activeRank) {
1320 busBusyUntil += tWTR + tCL;
1321 }
1322
1323 doDRAMAccess(dram_pkt);
1324
1325 // At this point we're done dealing with the request
1326 readQueue.pop_front();
1327
1328 // sanity check
1329 assert(dram_pkt->size <= burstSize);
1330 assert(dram_pkt->readyTime >= curTick());
1331
1332 // Insert into response queue. It will be sent back to the
1333 // requestor at its readyTime
1334 if (respQueue.empty()) {
1335 assert(!respondEvent.scheduled());
1336 schedule(respondEvent, dram_pkt->readyTime);
1337 } else {
1338 assert(respQueue.back()->readyTime <= dram_pkt->readyTime);
1339 assert(respondEvent.scheduled());
1340 }
1341
1342 respQueue.push_back(dram_pkt);
1343
1344 // we have so many writes that we have to transition
1345 if (writeQueue.size() > writeHighThreshold) {
1346 switch_to_writes = true;
1347 }
1348 }
1349
1350 // switching to writes, either because the read queue is empty
1351 // and the writes have passed the low threshold (or we are
1352 // draining), or because the writes hit the hight threshold
1353 if (switch_to_writes) {
1354 // transition to writing
1355 busState = READ_TO_WRITE;
1356 }
1357 } else {
1358 chooseNext(writeQueue, switched_cmd_type);
1359 DRAMPacket* dram_pkt = writeQueue.front();
1360 // sanity check
1361 assert(dram_pkt->size <= burstSize);
1362
1363 // add a bubble to the data bus, as defined by the
1364 // tRTW when access is to the same rank as previous burst
1365 // Different rank timing is handled with tCS, which is
1366 // applied to colAllowedAt
1367 if (switched_cmd_type && dram_pkt->rank == activeRank) {
1368 busBusyUntil += tRTW;
1369 }
1370
1371 doDRAMAccess(dram_pkt);
1372
1373 writeQueue.pop_front();
1374 delete dram_pkt;
1375
1376 // If we emptied the write queue, or got sufficiently below the
1377 // threshold (using the minWritesPerSwitch as the hysteresis) and
1378 // are not draining, or we have reads waiting and have done enough
1379 // writes, then switch to reads.
1380 if (writeQueue.empty() ||
1381 (writeQueue.size() + minWritesPerSwitch < writeLowThreshold &&
1382 !drainManager) ||
1383 (!readQueue.empty() && writesThisTime >= minWritesPerSwitch)) {
1384 // turn the bus back around for reads again
1385 busState = WRITE_TO_READ;
1386
1387 // note that the we switch back to reads also in the idle
1388 // case, which eventually will check for any draining and
1389 // also pause any further scheduling if there is really
1390 // nothing to do
1391 }
1392 }
1393
1394 schedule(nextReqEvent, std::max(nextReqTime, curTick()));
1395
1396 // If there is space available and we have writes waiting then let
1397 // them retry. This is done here to ensure that the retry does not
1398 // cause a nextReqEvent to be scheduled before we do so as part of
1399 // the next request processing
1400 if (retryWrReq && writeQueue.size() < writeBufferSize) {
1401 retryWrReq = false;
1402 port.sendRetry();
1403 }
1404}
1405
1406uint64_t
1407DRAMCtrl::minBankPrep(const deque<DRAMPacket*>& queue,
1408 bool switched_cmd_type) const
1409{
1410 uint64_t bank_mask = 0;
1411 Tick min_act_at = MaxTick;
1412
1413 uint64_t bank_mask_same_rank = 0;
1414 Tick min_act_at_same_rank = MaxTick;
1415
1416 // Give precedence to commands that access same rank as previous command
1417 bool same_rank_match = false;
1418
1419 // determine if we have queued transactions targetting the
1420 // bank in question
1421 vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
1422 for (auto p = queue.begin(); p != queue.end(); ++p) {
1423 got_waiting[(*p)->bankId] = true;
1424 }
1425
1426 for (int i = 0; i < ranksPerChannel; i++) {
1427 for (int j = 0; j < banksPerRank; j++) {
1428 uint8_t bank_id = i * banksPerRank + j;
1429
1430 // if we have waiting requests for the bank, and it is
1431 // amongst the first available, update the mask
1432 if (got_waiting[bank_id]) {
1433 // simplistic approximation of when the bank can issue
1434 // an activate, ignoring any rank-to-rank switching
1435 // cost in this calculation
1436 Tick act_at = banks[i][j].openRow == Bank::NO_ROW ?
1437 banks[i][j].actAllowedAt :
1438 std::max(banks[i][j].preAllowedAt, curTick()) + tRP;
1439
1440 // prioritize commands that access the
1441 // same rank as previous burst
1442 // Calculate bank mask separately for the case and
1443 // evaluate after loop iterations complete
1444 if (i == activeRank && ranksPerChannel > 1) {
1445 if (act_at <= min_act_at_same_rank) {
1446 // reset same rank bank mask if new minimum is found
1447 // and previous minimum could not immediately send ACT
1448 if (act_at < min_act_at_same_rank &&
1449 min_act_at_same_rank > curTick())
1450 bank_mask_same_rank = 0;
1451
1452 // Set flag indicating that a same rank
1453 // opportunity was found
1454 same_rank_match = true;
1455
1456 // set the bit corresponding to the available bank
1457 replaceBits(bank_mask_same_rank, bank_id, bank_id, 1);
1458 min_act_at_same_rank = act_at;
1459 }
1460 } else {
1461 if (act_at <= min_act_at) {
1462 // reset bank mask if new minimum is found
1463 // and either previous minimum could not immediately send ACT
1464 if (act_at < min_act_at && min_act_at > curTick())
1465 bank_mask = 0;
1466 // set the bit corresponding to the available bank
1467 replaceBits(bank_mask, bank_id, bank_id, 1);
1468 min_act_at = act_at;
1469 }
1470 }
1471 }
1472 }
1473 }
1474
1475 // Determine the earliest time when the next burst can issue based
1476 // on the current busBusyUntil delay.
1477 // Offset by tRCD to correlate with ACT timing variables
1478 Tick min_cmd_at = busBusyUntil - tCL - tRCD;
1479
1480 // Prioritize same rank accesses that can issue B2B
1481 // Only optimize for same ranks when the command type
1482 // does not change; do not want to unnecessarily incur tWTR
1483 //
1484 // Resulting FCFS prioritization Order is:
1485 // 1) Commands that access the same rank as previous burst
1486 // and can prep the bank seamlessly.
1487 // 2) Commands (any rank) with earliest bank prep
1488 if (!switched_cmd_type && same_rank_match &&
1489 min_act_at_same_rank <= min_cmd_at) {
1490 bank_mask = bank_mask_same_rank;
1491 }
1492
1493 return bank_mask;
1494}
1495
1496void
1497DRAMCtrl::processRefreshEvent()
1498{
1499 // when first preparing the refresh, remember when it was due
1500 if (refreshState == REF_IDLE) {
1501 // remember when the refresh is due
1502 refreshDueAt = curTick();
1503
1504 // proceed to drain
1505 refreshState = REF_DRAIN;
1506
1507 DPRINTF(DRAM, "Refresh due\n");
1508 }
1509
1510 // let any scheduled read or write go ahead, after which it will
1511 // hand control back to this event loop
1512 if (refreshState == REF_DRAIN) {
1513 if (nextReqEvent.scheduled()) {
1514 // hand control over to the request loop until it is
1515 // evaluated next
1516 DPRINTF(DRAM, "Refresh awaiting draining\n");
1517
1518 return;
1519 } else {
1520 refreshState = REF_PRE;
1521 }
1522 }
1523
1524 // at this point, ensure that all banks are precharged
1525 if (refreshState == REF_PRE) {
1526 // precharge any active bank if we are not already in the idle
1527 // state
1528 if (pwrState != PWR_IDLE) {
1529 // at the moment, we use a precharge all even if there is
1530 // only a single bank open
1531 DPRINTF(DRAM, "Precharging all\n");
1532
1533 // first determine when we can precharge
1534 Tick pre_at = curTick();
1535 for (int i = 0; i < ranksPerChannel; i++) {
1536 for (int j = 0; j < banksPerRank; j++) {
1537 // respect both causality and any existing bank
1538 // constraints, some banks could already have a
1539 // (auto) precharge scheduled
1540 pre_at = std::max(banks[i][j].preAllowedAt, pre_at);
1541 }
1542 }
1543
1544 // make sure all banks are precharged, and for those that
1545 // already are, update their availability
1546 Tick act_allowed_at = pre_at + tRP;
1547
1548 for (int i = 0; i < ranksPerChannel; i++) {
1549 for (int j = 0; j < banksPerRank; j++) {
1550 if (banks[i][j].openRow != Bank::NO_ROW) {
1551 prechargeBank(banks[i][j], pre_at, false);
1552 } else {
1553 banks[i][j].actAllowedAt =
1554 std::max(banks[i][j].actAllowedAt, act_allowed_at);
1555 banks[i][j].preAllowedAt =
1556 std::max(banks[i][j].preAllowedAt, pre_at);
1557 }
1558 }
1559
1560 // at the moment this affects all ranks
1561 rankPower[i].powerlib.doCommand(MemCommand::PREA, 0,
1562 divCeil(pre_at, tCK) -
1563 timeStampOffset);
1564
1565 DPRINTF(DRAMPower, "%llu,PREA,0,%d\n", divCeil(pre_at, tCK) -
1566 timeStampOffset, i);
1567 }
1568 } else {
1569 DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
1570
1571 // go ahead and kick the power state machine into gear if
1572 // we are already idle
1573 schedulePowerEvent(PWR_REF, curTick());
1574 }
1575
1576 refreshState = REF_RUN;
1577 assert(numBanksActive == 0);
1578
1579 // wait for all banks to be precharged, at which point the
1580 // power state machine will transition to the idle state, and
1581 // automatically move to a refresh, at that point it will also
1582 // call this method to get the refresh event loop going again
1583 return;
1584 }
1585
1586 // last but not least we perform the actual refresh
1587 if (refreshState == REF_RUN) {
1588 // should never get here with any banks active
1589 assert(numBanksActive == 0);
1590 assert(pwrState == PWR_REF);
1591
1592 Tick ref_done_at = curTick() + tRFC;
1593
1594 for (int i = 0; i < ranksPerChannel; i++) {
1595 for (int j = 0; j < banksPerRank; j++) {
1596 banks[i][j].actAllowedAt = ref_done_at;
1597 }
1598
1599 // at the moment this affects all ranks
1600 rankPower[i].powerlib.doCommand(MemCommand::REF, 0,
1601 divCeil(curTick(), tCK) -
1602 timeStampOffset);
1603
1604 // at the moment sort the list of commands and update the counters
1605 // for DRAMPower libray when doing a refresh
1606 sort(rankPower[i].powerlib.cmdList.begin(),
1607 rankPower[i].powerlib.cmdList.end(), DRAMCtrl::sortTime);
1608
1609 // update the counters for DRAMPower, passing false to
1610 // indicate that this is not the last command in the
1611 // list. DRAMPower requires this information for the
1612 // correct calculation of the background energy at the end
1613 // of the simulation. Ideally we would want to call this
1614 // function with true once at the end of the
1615 // simulation. However, the discarded energy is extremly
1616 // small and does not effect the final results.
1617 rankPower[i].powerlib.updateCounters(false);
1618
1619 // call the energy function
1620 rankPower[i].powerlib.calcEnergy();
1621
1622 // Update the stats
1623 updatePowerStats(i);
1624
1625 DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), tCK) -
1626 timeStampOffset, i);
1627 }
1628
1629 // make sure we did not wait so long that we cannot make up
1630 // for it
1631 if (refreshDueAt + tREFI < ref_done_at) {
1632 fatal("Refresh was delayed so long we cannot catch up\n");
1633 }
1634
1635 // compensate for the delay in actually performing the refresh
1636 // when scheduling the next one
1637 schedule(refreshEvent, refreshDueAt + tREFI - tRP);
1638
1639 assert(!powerEvent.scheduled());
1640
1641 // move to the idle power state once the refresh is done, this
1642 // will also move the refresh state machine to the refresh
1643 // idle state
1644 schedulePowerEvent(PWR_IDLE, ref_done_at);
1645
1646 DPRINTF(DRAMState, "Refresh done at %llu and next refresh at %llu\n",
1647 ref_done_at, refreshDueAt + tREFI);
1648 }
1649}
1650
1651void
1652DRAMCtrl::schedulePowerEvent(PowerState pwr_state, Tick tick)
1653{
1654 // respect causality
1655 assert(tick >= curTick());
1656
1657 if (!powerEvent.scheduled()) {
1658 DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
1659 tick, pwr_state);
1660
1661 // insert the new transition
1662 pwrStateTrans = pwr_state;
1663
1664 schedule(powerEvent, tick);
1665 } else {
1666 panic("Scheduled power event at %llu to state %d, "
1667 "with scheduled event at %llu to %d\n", tick, pwr_state,
1668 powerEvent.when(), pwrStateTrans);
1669 }
1670}
1671
1672void
1673DRAMCtrl::processPowerEvent()
1674{
1675 // remember where we were, and for how long
1676 Tick duration = curTick() - pwrStateTick;
1677 PowerState prev_state = pwrState;
1678
1679 // update the accounting
1680 pwrStateTime[prev_state] += duration;
1681
1682 pwrState = pwrStateTrans;
1683 pwrStateTick = curTick();
1684
1685 if (pwrState == PWR_IDLE) {
1686 DPRINTF(DRAMState, "All banks precharged\n");
1687
1688 // if we were refreshing, make sure we start scheduling requests again
1689 if (prev_state == PWR_REF) {
1690 DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
1691 assert(pwrState == PWR_IDLE);
1692
1693 // kick things into action again
1694 refreshState = REF_IDLE;
1695 assert(!nextReqEvent.scheduled());
1696 schedule(nextReqEvent, curTick());
1697 } else {
1698 assert(prev_state == PWR_ACT);
1699
1700 // if we have a pending refresh, and are now moving to
1701 // the idle state, direclty transition to a refresh
1702 if (refreshState == REF_RUN) {
1703 // there should be nothing waiting at this point
1704 assert(!powerEvent.scheduled());
1705
1706 // update the state in zero time and proceed below
1707 pwrState = PWR_REF;
1708 }
1709 }
1710 }
1711
1712 // we transition to the refresh state, let the refresh state
1713 // machine know of this state update and let it deal with the
1714 // scheduling of the next power state transition as well as the
1715 // following refresh
1716 if (pwrState == PWR_REF) {
1717 DPRINTF(DRAMState, "Refreshing\n");
1718 // kick the refresh event loop into action again, and that
1719 // in turn will schedule a transition to the idle power
1720 // state once the refresh is done
1721 assert(refreshState == REF_RUN);
1722 processRefreshEvent();
1723 }
1724}
1725
1726void
1727DRAMCtrl::updatePowerStats(uint8_t rank)
1728{
1729 // Get the energy and power from DRAMPower
1730 Data::MemoryPowerModel::Energy energy =
1731 rankPower[rank].powerlib.getEnergy();
1732 Data::MemoryPowerModel::Power power =
1733 rankPower[rank].powerlib.getPower();
1734
1735 actEnergy[rank] = energy.act_energy * devicesPerRank;
1736 preEnergy[rank] = energy.pre_energy * devicesPerRank;
1737 readEnergy[rank] = energy.read_energy * devicesPerRank;
1738 writeEnergy[rank] = energy.write_energy * devicesPerRank;
1739 refreshEnergy[rank] = energy.ref_energy * devicesPerRank;
1740 actBackEnergy[rank] = energy.act_stdby_energy * devicesPerRank;
1741 preBackEnergy[rank] = energy.pre_stdby_energy * devicesPerRank;
1742 totalEnergy[rank] = energy.total_energy * devicesPerRank;
1743 averagePower[rank] = power.average_power * devicesPerRank;
1744}
1745
1746void
1747DRAMCtrl::regStats()
1748{
1749 using namespace Stats;
1750
1751 AbstractMemory::regStats();
1752
1753 readReqs
1754 .name(name() + ".readReqs")
1755 .desc("Number of read requests accepted");
1756
1757 writeReqs
1758 .name(name() + ".writeReqs")
1759 .desc("Number of write requests accepted");
1760
1761 readBursts
1762 .name(name() + ".readBursts")
1763 .desc("Number of DRAM read bursts, "
1764 "including those serviced by the write queue");
1765
1766 writeBursts
1767 .name(name() + ".writeBursts")
1768 .desc("Number of DRAM write bursts, "
1769 "including those merged in the write queue");
1770
1771 servicedByWrQ
1772 .name(name() + ".servicedByWrQ")
1773 .desc("Number of DRAM read bursts serviced by the write queue");
1774
1775 mergedWrBursts
1776 .name(name() + ".mergedWrBursts")
1777 .desc("Number of DRAM write bursts merged with an existing one");
1778
1779 neitherReadNorWrite
1780 .name(name() + ".neitherReadNorWriteReqs")
1781 .desc("Number of requests that are neither read nor write");
1782
1783 perBankRdBursts
1784 .init(banksPerRank * ranksPerChannel)
1785 .name(name() + ".perBankRdBursts")
1786 .desc("Per bank write bursts");
1787
1788 perBankWrBursts
1789 .init(banksPerRank * ranksPerChannel)
1790 .name(name() + ".perBankWrBursts")
1791 .desc("Per bank write bursts");
1792
1793 avgRdQLen
1794 .name(name() + ".avgRdQLen")
1795 .desc("Average read queue length when enqueuing")
1796 .precision(2);
1797
1798 avgWrQLen
1799 .name(name() + ".avgWrQLen")
1800 .desc("Average write queue length when enqueuing")
1801 .precision(2);
1802
1803 totQLat
1804 .name(name() + ".totQLat")
1805 .desc("Total ticks spent queuing");
1806
1807 totBusLat
1808 .name(name() + ".totBusLat")
1809 .desc("Total ticks spent in databus transfers");
1810
1811 totMemAccLat
1812 .name(name() + ".totMemAccLat")
1813 .desc("Total ticks spent from burst creation until serviced "
1814 "by the DRAM");
1815
1816 avgQLat
1817 .name(name() + ".avgQLat")
1818 .desc("Average queueing delay per DRAM burst")
1819 .precision(2);
1820
1821 avgQLat = totQLat / (readBursts - servicedByWrQ);
1822
1823 avgBusLat
1824 .name(name() + ".avgBusLat")
1825 .desc("Average bus latency per DRAM burst")
1826 .precision(2);
1827
1828 avgBusLat = totBusLat / (readBursts - servicedByWrQ);
1829
1830 avgMemAccLat
1831 .name(name() + ".avgMemAccLat")
1832 .desc("Average memory access latency per DRAM burst")
1833 .precision(2);
1834
1835 avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ);
1836
1837 numRdRetry
1838 .name(name() + ".numRdRetry")
1839 .desc("Number of times read queue was full causing retry");
1840
1841 numWrRetry
1842 .name(name() + ".numWrRetry")
1843 .desc("Number of times write queue was full causing retry");
1844
1845 readRowHits
1846 .name(name() + ".readRowHits")
1847 .desc("Number of row buffer hits during reads");
1848
1849 writeRowHits
1850 .name(name() + ".writeRowHits")
1851 .desc("Number of row buffer hits during writes");
1852
1853 readRowHitRate
1854 .name(name() + ".readRowHitRate")
1855 .desc("Row buffer hit rate for reads")
1856 .precision(2);
1857
1858 readRowHitRate = (readRowHits / (readBursts - servicedByWrQ)) * 100;
1859
1860 writeRowHitRate
1861 .name(name() + ".writeRowHitRate")
1862 .desc("Row buffer hit rate for writes")
1863 .precision(2);
1864
1865 writeRowHitRate = (writeRowHits / (writeBursts - mergedWrBursts)) * 100;
1866
1867 readPktSize
1868 .init(ceilLog2(burstSize) + 1)
1869 .name(name() + ".readPktSize")
1870 .desc("Read request sizes (log2)");
1871
1872 writePktSize
1873 .init(ceilLog2(burstSize) + 1)
1874 .name(name() + ".writePktSize")
1875 .desc("Write request sizes (log2)");
1876
1877 rdQLenPdf
1878 .init(readBufferSize)
1879 .name(name() + ".rdQLenPdf")
1880 .desc("What read queue length does an incoming req see");
1881
1882 wrQLenPdf
1883 .init(writeBufferSize)
1884 .name(name() + ".wrQLenPdf")
1885 .desc("What write queue length does an incoming req see");
1886
1887 bytesPerActivate
1888 .init(maxAccessesPerRow)
1889 .name(name() + ".bytesPerActivate")
1890 .desc("Bytes accessed per row activation")
1891 .flags(nozero);
1892
1893 rdPerTurnAround
1894 .init(readBufferSize)
1895 .name(name() + ".rdPerTurnAround")
1896 .desc("Reads before turning the bus around for writes")
1897 .flags(nozero);
1898
1899 wrPerTurnAround
1900 .init(writeBufferSize)
1901 .name(name() + ".wrPerTurnAround")
1902 .desc("Writes before turning the bus around for reads")
1903 .flags(nozero);
1904
1905 bytesReadDRAM
1906 .name(name() + ".bytesReadDRAM")
1907 .desc("Total number of bytes read from DRAM");
1908
1909 bytesReadWrQ
1910 .name(name() + ".bytesReadWrQ")
1911 .desc("Total number of bytes read from write queue");
1912
1913 bytesWritten
1914 .name(name() + ".bytesWritten")
1915 .desc("Total number of bytes written to DRAM");
1916
1917 bytesReadSys
1918 .name(name() + ".bytesReadSys")
1919 .desc("Total read bytes from the system interface side");
1920
1921 bytesWrittenSys
1922 .name(name() + ".bytesWrittenSys")
1923 .desc("Total written bytes from the system interface side");
1924
1925 avgRdBW
1926 .name(name() + ".avgRdBW")
1927 .desc("Average DRAM read bandwidth in MiByte/s")
1928 .precision(2);
1929
1930 avgRdBW = (bytesReadDRAM / 1000000) / simSeconds;
1931
1932 avgWrBW
1933 .name(name() + ".avgWrBW")
1934 .desc("Average achieved write bandwidth in MiByte/s")
1935 .precision(2);
1936
1937 avgWrBW = (bytesWritten / 1000000) / simSeconds;
1938
1939 avgRdBWSys
1940 .name(name() + ".avgRdBWSys")
1941 .desc("Average system read bandwidth in MiByte/s")
1942 .precision(2);
1943
1944 avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
1945
1946 avgWrBWSys
1947 .name(name() + ".avgWrBWSys")
1948 .desc("Average system write bandwidth in MiByte/s")
1949 .precision(2);
1950
1951 avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
1952
1953 peakBW
1954 .name(name() + ".peakBW")
1955 .desc("Theoretical peak bandwidth in MiByte/s")
1956 .precision(2);
1957
1958 peakBW = (SimClock::Frequency / tBURST) * burstSize / 1000000;
1959
1960 busUtil
1961 .name(name() + ".busUtil")
1962 .desc("Data bus utilization in percentage")
1963 .precision(2);
1964
1965 busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
1966
1967 totGap
1968 .name(name() + ".totGap")
1969 .desc("Total gap between requests");
1970
1971 avgGap
1972 .name(name() + ".avgGap")
1973 .desc("Average gap between requests")
1974 .precision(2);
1975
1976 avgGap = totGap / (readReqs + writeReqs);
1977
1978 // Stats for DRAM Power calculation based on Micron datasheet
1979 busUtilRead
1980 .name(name() + ".busUtilRead")
1981 .desc("Data bus utilization in percentage for reads")
1982 .precision(2);
1983
1984 busUtilRead = avgRdBW / peakBW * 100;
1985
1986 busUtilWrite
1987 .name(name() + ".busUtilWrite")
1988 .desc("Data bus utilization in percentage for writes")
1989 .precision(2);
1990
1991 busUtilWrite = avgWrBW / peakBW * 100;
1992
1993 pageHitRate
1994 .name(name() + ".pageHitRate")
1995 .desc("Row buffer hit rate, read and write combined")
1996 .precision(2);
1997
1998 pageHitRate = (writeRowHits + readRowHits) /
1999 (writeBursts - mergedWrBursts + readBursts - servicedByWrQ) * 100;
2000
2001 pwrStateTime
2002 .init(5)
2003 .name(name() + ".memoryStateTime")
2004 .desc("Time in different power states");
2005 pwrStateTime.subname(0, "IDLE");
2006 pwrStateTime.subname(1, "REF");
2007 pwrStateTime.subname(2, "PRE_PDN");
2008 pwrStateTime.subname(3, "ACT");
2009 pwrStateTime.subname(4, "ACT_PDN");
2010
2011 actEnergy
2012 .init(ranksPerChannel)
2013 .name(name() + ".actEnergy")
2014 .desc("Energy for activate commands per rank (pJ)");
2015
2016 preEnergy
2017 .init(ranksPerChannel)
2018 .name(name() + ".preEnergy")
2019 .desc("Energy for precharge commands per rank (pJ)");
2020
2021 readEnergy
2022 .init(ranksPerChannel)
2023 .name(name() + ".readEnergy")
2024 .desc("Energy for read commands per rank (pJ)");
2025
2026 writeEnergy
2027 .init(ranksPerChannel)
2028 .name(name() + ".writeEnergy")
2029 .desc("Energy for write commands per rank (pJ)");
2030
2031 refreshEnergy
2032 .init(ranksPerChannel)
2033 .name(name() + ".refreshEnergy")
2034 .desc("Energy for refresh commands per rank (pJ)");
2035
2036 actBackEnergy
2037 .init(ranksPerChannel)
2038 .name(name() + ".actBackEnergy")
2039 .desc("Energy for active background per rank (pJ)");
2040
2041 preBackEnergy
2042 .init(ranksPerChannel)
2043 .name(name() + ".preBackEnergy")
2044 .desc("Energy for precharge background per rank (pJ)");
2045
2046 totalEnergy
2047 .init(ranksPerChannel)
2048 .name(name() + ".totalEnergy")
2049 .desc("Total energy per rank (pJ)");
2050
2051 averagePower
2052 .init(ranksPerChannel)
2053 .name(name() + ".averagePower")
2054 .desc("Core power per rank (mW)");
2055}
2056
2057void
2058DRAMCtrl::recvFunctional(PacketPtr pkt)
2059{
2060 // rely on the abstract memory
2061 functionalAccess(pkt);
2062}
2063
2064BaseSlavePort&
2065DRAMCtrl::getSlavePort(const string &if_name, PortID idx)
2066{
2067 if (if_name != "port") {
2068 return MemObject::getSlavePort(if_name, idx);
2069 } else {
2070 return port;
2071 }
2072}
2073
2074unsigned int
2075DRAMCtrl::drain(DrainManager *dm)
2076{
2077 unsigned int count = port.drain(dm);
2078
2079 // if there is anything in any of our internal queues, keep track
2080 // of that as well
2081 if (!(writeQueue.empty() && readQueue.empty() &&
2082 respQueue.empty())) {
2083 DPRINTF(Drain, "DRAM controller not drained, write: %d, read: %d,"
2084 " resp: %d\n", writeQueue.size(), readQueue.size(),
2085 respQueue.size());
2086 ++count;
2087 drainManager = dm;
2088
2089 // the only part that is not drained automatically over time
2090 // is the write queue, thus kick things into action if needed
2091 if (!writeQueue.empty() && !nextReqEvent.scheduled()) {
2092 schedule(nextReqEvent, curTick());
2093 }
2094 }
2095
2096 if (count)
2097 setDrainState(Drainable::Draining);
2098 else
2099 setDrainState(Drainable::Drained);
2100 return count;
2101}
2102
2103DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _memory)
2104 : QueuedSlavePort(name, &_memory, queue), queue(_memory, *this),
2105 memory(_memory)
2106{ }
2107
2108AddrRangeList
2109DRAMCtrl::MemoryPort::getAddrRanges() const
2110{
2111 AddrRangeList ranges;
2112 ranges.push_back(memory.getAddrRange());
2113 return ranges;
2114}
2115
2116void
2117DRAMCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
2118{
2119 pkt->pushLabel(memory.name());
2120
2121 if (!queue.checkFunctional(pkt)) {
2122 // Default implementation of SimpleTimingPort::recvFunctional()
2123 // calls recvAtomic() and throws away the latency; we can save a
2124 // little here by just not calculating the latency.
2125 memory.recvFunctional(pkt);
2126 }
2127
2128 pkt->popLabel();
2129}
2130
2131Tick
2132DRAMCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
2133{
2134 return memory.recvAtomic(pkt);
2135}
2136
2137bool
2138DRAMCtrl::MemoryPort::recvTimingReq(PacketPtr pkt)
2139{
2140 // pass it to the memory controller
2141 return memory.recvTimingReq(pkt);
2142}
2143
2144DRAMCtrl*
2145DRAMCtrlParams::create()
2146{
2147 return new DRAMCtrl(this);
2148}