tlb_coalescer.cc (12717:2e2c211644d2) tlb_coalescer.cc (13449:2f7efa89c58b)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Lisa Hsu
34 */
35
36#include "gpu-compute/tlb_coalescer.hh"
37
38#include <cstring>
39
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Lisa Hsu
34 */
35
36#include "gpu-compute/tlb_coalescer.hh"
37
38#include <cstring>
39
40#include "base/logging.hh"
40#include "debug/GPUTLB.hh"
41#include "sim/process.hh"
42
43TLBCoalescer::TLBCoalescer(const Params *p)
44 : MemObject(p),
45 clock(p->clk_domain->clockPeriod()),
46 TLBProbesPerCycle(p->probesPerCycle),
47 coalescingWindow(p->coalescingWindow),
48 disableCoalescing(p->disableCoalescing),
49 probeTLBEvent([this]{ processProbeTLBEvent(); },
50 "Probe the TLB below",
51 false, Event::CPU_Tick_Pri),
52 cleanupEvent([this]{ processCleanupEvent(); },
53 "Cleanup issuedTranslationsTable hashmap",
54 false, Event::Maximum_Pri)
55{
56 // create the slave ports based on the number of connected ports
57 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
58 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
59 this, i));
60 }
61
62 // create the master ports based on the number of connected ports
63 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
64 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
65 this, i));
66 }
67}
68
69BaseSlavePort&
70TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
71{
72 if (if_name == "slave") {
73 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
74 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
75 }
76
77 return *cpuSidePort[idx];
78 } else {
79 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
80 }
81}
82
83BaseMasterPort&
84TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
85{
86 if (if_name == "master") {
87 if (idx >= static_cast<PortID>(memSidePort.size())) {
88 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
89 }
90
91 return *memSidePort[idx];
92 } else {
93 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
94 }
95}
96
97/*
98 * This method returns true if the <incoming_pkt>
99 * can be coalesced with <coalesced_pkt> and false otherwise.
100 * A given set of rules is checked.
101 * The rules can potentially be modified based on the TLB level.
102 */
103bool
104TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
105{
106 if (disableCoalescing)
107 return false;
108
109 TheISA::GpuTLB::TranslationState *incoming_state =
110 safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
111
112 TheISA::GpuTLB::TranslationState *coalesced_state =
113 safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
114
115 // Rule 1: Coalesce requests only if they
116 // fall within the same virtual page
117 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
118 TheISA::PageBytes);
119
120 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
121 TheISA::PageBytes);
122
123 if (incoming_virt_page_addr != coalesced_virt_page_addr)
124 return false;
125
126 //* Rule 2: Coalesce requests only if they
127 // share a TLB Mode, i.e. they are both read
128 // or write requests.
129 BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
130 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
131
132 if (incoming_mode != coalesced_mode)
133 return false;
134
135 // when we can coalesce a packet update the reqCnt
136 // that is the number of packets represented by
137 // this coalesced packet
138 if (!incoming_state->prefetch)
139 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
140
141 return true;
142}
143
144/*
145 * We need to update the physical addresses of all the translation requests
146 * that were coalesced into the one that just returned.
147 */
148void
149TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
150{
151 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
152
153 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
154 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
155
156 TheISA::GpuTLB::TranslationState *sender_state =
157 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
158
159 TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
160 assert(tlb_entry);
161 Addr first_entry_vaddr = tlb_entry->vaddr;
162 Addr first_entry_paddr = tlb_entry->paddr;
163 int page_size = tlb_entry->size();
164 bool uncacheable = tlb_entry->uncacheable;
165 int first_hit_level = sender_state->hitLevel;
166
167 // Get the physical page address of the translated request
168 // Using the page_size specified in the TLBEntry allows us
169 // to support different page sizes.
170 Addr phys_page_paddr = pkt->req->getPaddr();
171 phys_page_paddr &= ~(page_size - 1);
172
173 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
174 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
175 TheISA::GpuTLB::TranslationState *sender_state =
176 safe_cast<TheISA::GpuTLB::TranslationState*>(
177 local_pkt->senderState);
178
179 // we are sending the packet back, so pop the reqCnt associated
180 // with this level in the TLB hiearchy
181 if (!sender_state->prefetch)
182 sender_state->reqCnt.pop_back();
183
184 /*
185 * Only the first packet from this coalesced request has been
186 * translated. Grab the translated phys. page addr and update the
187 * physical addresses of the remaining packets with the appropriate
188 * page offsets.
189 */
190 if (i) {
191 Addr paddr = phys_page_paddr;
192 paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
193 local_pkt->req->setPaddr(paddr);
194
195 if (uncacheable)
196 local_pkt->req->setFlags(Request::UNCACHEABLE);
197
198 // update senderState->tlbEntry, so we can insert
199 // the correct TLBEentry in the TLBs above.
200 auto p = sender_state->tc->getProcessPtr();
201 sender_state->tlbEntry =
202 new TheISA::TlbEntry(p->pid(), first_entry_vaddr,
203 first_entry_paddr, false, false);
204
205 // update the hitLevel for all uncoalesced reqs
206 // so that each packet knows where it hit
207 // (used for statistics in the CUs)
208 sender_state->hitLevel = first_hit_level;
209 }
210
211 SlavePort *return_port = sender_state->ports.back();
212 sender_state->ports.pop_back();
213
214 // Translation is done - Convert to a response pkt if necessary and
215 // send the translation back
216 if (local_pkt->isRequest()) {
217 local_pkt->makeTimingResponse();
218 }
219
220 return_port->sendTimingResp(local_pkt);
221 }
222
223 // schedule clean up for end of this cycle
224 // This is a maximum priority event and must be on
225 // the same cycle as GPUTLB cleanup event to prevent
226 // race conditions with an IssueProbeEvent caused by
227 // MemSidePort::recvReqRetry
228 cleanupQueue.push(virt_page_addr);
229
230 if (!cleanupEvent.scheduled())
231 schedule(cleanupEvent, curTick());
232}
233
234// Receive translation requests, create a coalesced request,
235// and send them to the TLB (TLBProbesPerCycle)
236bool
237TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
238{
239 // first packet of a coalesced request
240 PacketPtr first_packet = nullptr;
241 // true if we are able to do coalescing
242 bool didCoalesce = false;
243 // number of coalesced reqs for a given window
244 int coalescedReq_cnt = 0;
245
246 TheISA::GpuTLB::TranslationState *sender_state =
247 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
248
249 // push back the port to remember the path back
250 sender_state->ports.push_back(this);
251
252 bool update_stats = !sender_state->prefetch;
253
254 if (update_stats) {
255 // if reqCnt is empty then this packet does not represent
256 // multiple uncoalesced reqs(pkts) but just a single pkt.
257 // If it does though then the reqCnt for each level in the
258 // hierarchy accumulates the total number of reqs this packet
259 // represents
260 int req_cnt = 1;
261
262 if (!sender_state->reqCnt.empty())
263 req_cnt = sender_state->reqCnt.back();
264
265 sender_state->reqCnt.push_back(req_cnt);
266
267 // update statistics
268 coalescer->uncoalescedAccesses++;
269 req_cnt = sender_state->reqCnt.back();
270 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
271 coalescer->queuingCycles -= (curTick() * req_cnt);
272 coalescer->localqueuingCycles -= curTick();
273 }
274
275 // FIXME if you want to coalesce not based on the issueTime
276 // of the packets (i.e., from the compute unit's perspective)
277 // but based on when they reached this coalescer then
278 // remove the following if statement and use curTick() or
279 // coalescingWindow for the tick_index.
280 if (!sender_state->issueTime)
281 sender_state->issueTime = curTick();
282
283 // The tick index is used as a key to the coalescerFIFO hashmap.
284 // It is shared by all candidates that fall within the
285 // given coalescingWindow.
286 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
287
288 if (coalescer->coalescerFIFO.count(tick_index)) {
289 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
290 }
291
292 // see if we can coalesce the incoming pkt with another
293 // coalesced request with the same tick_index
294 for (int i = 0; i < coalescedReq_cnt; ++i) {
295 first_packet = coalescer->coalescerFIFO[tick_index][i][0];
296
297 if (coalescer->canCoalesce(pkt, first_packet)) {
298 coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
299
300 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
301 i, tick_index,
302 coalescer->coalescerFIFO[tick_index][i].size());
303
304 didCoalesce = true;
305 break;
306 }
307 }
308
309 // if this is the first request for this tick_index
310 // or we did not manage to coalesce, update stats
311 // and make necessary allocations.
312 if (!coalescedReq_cnt || !didCoalesce) {
313 if (update_stats)
314 coalescer->coalescedAccesses++;
315
316 std::vector<PacketPtr> new_array;
317 new_array.push_back(pkt);
318 coalescer->coalescerFIFO[tick_index].push_back(new_array);
319
320 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
321 "push\n", tick_index,
322 coalescer->coalescerFIFO[tick_index].size());
323 }
324
325 //schedule probeTLBEvent next cycle to send the
326 //coalesced requests to the TLB
327 if (!coalescer->probeTLBEvent.scheduled()) {
328 coalescer->schedule(coalescer->probeTLBEvent,
329 curTick() + coalescer->ticks(1));
330 }
331
332 return true;
333}
334
335void
336TLBCoalescer::CpuSidePort::recvReqRetry()
337{
41#include "debug/GPUTLB.hh"
42#include "sim/process.hh"
43
44TLBCoalescer::TLBCoalescer(const Params *p)
45 : MemObject(p),
46 clock(p->clk_domain->clockPeriod()),
47 TLBProbesPerCycle(p->probesPerCycle),
48 coalescingWindow(p->coalescingWindow),
49 disableCoalescing(p->disableCoalescing),
50 probeTLBEvent([this]{ processProbeTLBEvent(); },
51 "Probe the TLB below",
52 false, Event::CPU_Tick_Pri),
53 cleanupEvent([this]{ processCleanupEvent(); },
54 "Cleanup issuedTranslationsTable hashmap",
55 false, Event::Maximum_Pri)
56{
57 // create the slave ports based on the number of connected ports
58 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
59 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
60 this, i));
61 }
62
63 // create the master ports based on the number of connected ports
64 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
65 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
66 this, i));
67 }
68}
69
70BaseSlavePort&
71TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
72{
73 if (if_name == "slave") {
74 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
75 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
76 }
77
78 return *cpuSidePort[idx];
79 } else {
80 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
81 }
82}
83
84BaseMasterPort&
85TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
86{
87 if (if_name == "master") {
88 if (idx >= static_cast<PortID>(memSidePort.size())) {
89 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
90 }
91
92 return *memSidePort[idx];
93 } else {
94 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
95 }
96}
97
98/*
99 * This method returns true if the <incoming_pkt>
100 * can be coalesced with <coalesced_pkt> and false otherwise.
101 * A given set of rules is checked.
102 * The rules can potentially be modified based on the TLB level.
103 */
104bool
105TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
106{
107 if (disableCoalescing)
108 return false;
109
110 TheISA::GpuTLB::TranslationState *incoming_state =
111 safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
112
113 TheISA::GpuTLB::TranslationState *coalesced_state =
114 safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
115
116 // Rule 1: Coalesce requests only if they
117 // fall within the same virtual page
118 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
119 TheISA::PageBytes);
120
121 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
122 TheISA::PageBytes);
123
124 if (incoming_virt_page_addr != coalesced_virt_page_addr)
125 return false;
126
127 //* Rule 2: Coalesce requests only if they
128 // share a TLB Mode, i.e. they are both read
129 // or write requests.
130 BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
131 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
132
133 if (incoming_mode != coalesced_mode)
134 return false;
135
136 // when we can coalesce a packet update the reqCnt
137 // that is the number of packets represented by
138 // this coalesced packet
139 if (!incoming_state->prefetch)
140 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
141
142 return true;
143}
144
145/*
146 * We need to update the physical addresses of all the translation requests
147 * that were coalesced into the one that just returned.
148 */
149void
150TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
151{
152 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
153
154 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
155 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
156
157 TheISA::GpuTLB::TranslationState *sender_state =
158 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
159
160 TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
161 assert(tlb_entry);
162 Addr first_entry_vaddr = tlb_entry->vaddr;
163 Addr first_entry_paddr = tlb_entry->paddr;
164 int page_size = tlb_entry->size();
165 bool uncacheable = tlb_entry->uncacheable;
166 int first_hit_level = sender_state->hitLevel;
167
168 // Get the physical page address of the translated request
169 // Using the page_size specified in the TLBEntry allows us
170 // to support different page sizes.
171 Addr phys_page_paddr = pkt->req->getPaddr();
172 phys_page_paddr &= ~(page_size - 1);
173
174 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
175 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
176 TheISA::GpuTLB::TranslationState *sender_state =
177 safe_cast<TheISA::GpuTLB::TranslationState*>(
178 local_pkt->senderState);
179
180 // we are sending the packet back, so pop the reqCnt associated
181 // with this level in the TLB hiearchy
182 if (!sender_state->prefetch)
183 sender_state->reqCnt.pop_back();
184
185 /*
186 * Only the first packet from this coalesced request has been
187 * translated. Grab the translated phys. page addr and update the
188 * physical addresses of the remaining packets with the appropriate
189 * page offsets.
190 */
191 if (i) {
192 Addr paddr = phys_page_paddr;
193 paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
194 local_pkt->req->setPaddr(paddr);
195
196 if (uncacheable)
197 local_pkt->req->setFlags(Request::UNCACHEABLE);
198
199 // update senderState->tlbEntry, so we can insert
200 // the correct TLBEentry in the TLBs above.
201 auto p = sender_state->tc->getProcessPtr();
202 sender_state->tlbEntry =
203 new TheISA::TlbEntry(p->pid(), first_entry_vaddr,
204 first_entry_paddr, false, false);
205
206 // update the hitLevel for all uncoalesced reqs
207 // so that each packet knows where it hit
208 // (used for statistics in the CUs)
209 sender_state->hitLevel = first_hit_level;
210 }
211
212 SlavePort *return_port = sender_state->ports.back();
213 sender_state->ports.pop_back();
214
215 // Translation is done - Convert to a response pkt if necessary and
216 // send the translation back
217 if (local_pkt->isRequest()) {
218 local_pkt->makeTimingResponse();
219 }
220
221 return_port->sendTimingResp(local_pkt);
222 }
223
224 // schedule clean up for end of this cycle
225 // This is a maximum priority event and must be on
226 // the same cycle as GPUTLB cleanup event to prevent
227 // race conditions with an IssueProbeEvent caused by
228 // MemSidePort::recvReqRetry
229 cleanupQueue.push(virt_page_addr);
230
231 if (!cleanupEvent.scheduled())
232 schedule(cleanupEvent, curTick());
233}
234
235// Receive translation requests, create a coalesced request,
236// and send them to the TLB (TLBProbesPerCycle)
237bool
238TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
239{
240 // first packet of a coalesced request
241 PacketPtr first_packet = nullptr;
242 // true if we are able to do coalescing
243 bool didCoalesce = false;
244 // number of coalesced reqs for a given window
245 int coalescedReq_cnt = 0;
246
247 TheISA::GpuTLB::TranslationState *sender_state =
248 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
249
250 // push back the port to remember the path back
251 sender_state->ports.push_back(this);
252
253 bool update_stats = !sender_state->prefetch;
254
255 if (update_stats) {
256 // if reqCnt is empty then this packet does not represent
257 // multiple uncoalesced reqs(pkts) but just a single pkt.
258 // If it does though then the reqCnt for each level in the
259 // hierarchy accumulates the total number of reqs this packet
260 // represents
261 int req_cnt = 1;
262
263 if (!sender_state->reqCnt.empty())
264 req_cnt = sender_state->reqCnt.back();
265
266 sender_state->reqCnt.push_back(req_cnt);
267
268 // update statistics
269 coalescer->uncoalescedAccesses++;
270 req_cnt = sender_state->reqCnt.back();
271 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
272 coalescer->queuingCycles -= (curTick() * req_cnt);
273 coalescer->localqueuingCycles -= curTick();
274 }
275
276 // FIXME if you want to coalesce not based on the issueTime
277 // of the packets (i.e., from the compute unit's perspective)
278 // but based on when they reached this coalescer then
279 // remove the following if statement and use curTick() or
280 // coalescingWindow for the tick_index.
281 if (!sender_state->issueTime)
282 sender_state->issueTime = curTick();
283
284 // The tick index is used as a key to the coalescerFIFO hashmap.
285 // It is shared by all candidates that fall within the
286 // given coalescingWindow.
287 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
288
289 if (coalescer->coalescerFIFO.count(tick_index)) {
290 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
291 }
292
293 // see if we can coalesce the incoming pkt with another
294 // coalesced request with the same tick_index
295 for (int i = 0; i < coalescedReq_cnt; ++i) {
296 first_packet = coalescer->coalescerFIFO[tick_index][i][0];
297
298 if (coalescer->canCoalesce(pkt, first_packet)) {
299 coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
300
301 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
302 i, tick_index,
303 coalescer->coalescerFIFO[tick_index][i].size());
304
305 didCoalesce = true;
306 break;
307 }
308 }
309
310 // if this is the first request for this tick_index
311 // or we did not manage to coalesce, update stats
312 // and make necessary allocations.
313 if (!coalescedReq_cnt || !didCoalesce) {
314 if (update_stats)
315 coalescer->coalescedAccesses++;
316
317 std::vector<PacketPtr> new_array;
318 new_array.push_back(pkt);
319 coalescer->coalescerFIFO[tick_index].push_back(new_array);
320
321 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
322 "push\n", tick_index,
323 coalescer->coalescerFIFO[tick_index].size());
324 }
325
326 //schedule probeTLBEvent next cycle to send the
327 //coalesced requests to the TLB
328 if (!coalescer->probeTLBEvent.scheduled()) {
329 coalescer->schedule(coalescer->probeTLBEvent,
330 curTick() + coalescer->ticks(1));
331 }
332
333 return true;
334}
335
336void
337TLBCoalescer::CpuSidePort::recvReqRetry()
338{
338 assert(false);
339 panic("recvReqRetry called");
339}
340
341void
342TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
343{
344
345 TheISA::GpuTLB::TranslationState *sender_state =
346 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
347
348 bool update_stats = !sender_state->prefetch;
349
350 if (update_stats)
351 coalescer->uncoalescedAccesses++;
352
353 // If there is a pending timing request for this virtual address
354 // print a warning message. This is a temporary caveat of
355 // the current simulator where atomic and timing requests can
356 // coexist. FIXME remove this check/warning in the future.
357 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
358 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
359
360 if (map_count) {
361 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
362 "req. pending\n", virt_page_addr);
363 }
364
365 coalescer->memSidePort[0]->sendFunctional(pkt);
366}
367
368AddrRangeList
369TLBCoalescer::CpuSidePort::getAddrRanges() const
370{
371 // currently not checked by the master
372 AddrRangeList ranges;
373
374 return ranges;
375}
376
377bool
378TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
379{
380 // a translation completed and returned
381 coalescer->updatePhysAddresses(pkt);
382
383 return true;
384}
385
386void
387TLBCoalescer::MemSidePort::recvReqRetry()
388{
389 //we've receeived a retry. Schedule a probeTLBEvent
390 if (!coalescer->probeTLBEvent.scheduled())
391 coalescer->schedule(coalescer->probeTLBEvent,
392 curTick() + coalescer->ticks(1));
393}
394
395void
396TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
397{
398 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
399}
400
401/*
402 * Here we scan the coalescer FIFO and issue the max
403 * number of permitted probes to the TLB below. We
404 * permit bypassing of coalesced requests for the same
405 * tick_index.
406 *
407 * We do not access the next tick_index unless we've
408 * drained the previous one. The coalesced requests
409 * that are successfully sent are moved to the
410 * issuedTranslationsTable table (the table which keeps
411 * track of the outstanding reqs)
412 */
413void
414TLBCoalescer::processProbeTLBEvent()
415{
416 // number of TLB probes sent so far
417 int sent_probes = 0;
418 // rejected denotes a blocking event
419 bool rejected = false;
420
421 // It is set to true either when the recvTiming of the TLB below
422 // returns false or when there is another outstanding request for the
423 // same virt. page.
424
425 DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
426
427 for (auto iter = coalescerFIFO.begin();
428 iter != coalescerFIFO.end() && !rejected; ) {
429 int coalescedReq_cnt = iter->second.size();
430 int i = 0;
431 int vector_index = 0;
432
433 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
434 coalescedReq_cnt, iter->first);
435
436 while (i < coalescedReq_cnt) {
437 ++i;
438 PacketPtr first_packet = iter->second[vector_index][0];
439
440 // compute virtual page address for this request
441 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
442 TheISA::PageBytes);
443
444 // is there another outstanding request for the same page addr?
445 int pending_reqs =
446 issuedTranslationsTable.count(virt_page_addr);
447
448 if (pending_reqs) {
449 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
450 "page %#x\n", virt_page_addr);
451
452 ++vector_index;
453 rejected = true;
454
455 continue;
456 }
457
458 // send the coalesced request for virt_page_addr
459 if (!memSidePort[0]->sendTimingReq(first_packet)) {
460 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
461 virt_page_addr);
462
463 // No need for a retries queue since we are already buffering
464 // the coalesced request in coalescerFIFO.
465 rejected = true;
466 ++vector_index;
467 } else {
468 TheISA::GpuTLB::TranslationState *tmp_sender_state =
469 safe_cast<TheISA::GpuTLB::TranslationState*>
470 (first_packet->senderState);
471
472 bool update_stats = !tmp_sender_state->prefetch;
473
474 if (update_stats) {
475 // req_cnt is total number of packets represented
476 // by the one we just sent counting all the way from
477 // the top of TLB hiearchy (i.e., from the CU)
478 int req_cnt = tmp_sender_state->reqCnt.back();
479 queuingCycles += (curTick() * req_cnt);
480
481 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
482 name(), req_cnt);
483
484 // pkt_cnt is number of packets we coalesced into the one
485 // we just sent but only at this coalescer level
486 int pkt_cnt = iter->second[vector_index].size();
487 localqueuingCycles += (curTick() * pkt_cnt);
488 }
489
490 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
491 virt_page_addr);
492
493 //copy coalescedReq to issuedTranslationsTable
494 issuedTranslationsTable[virt_page_addr]
495 = iter->second[vector_index];
496
497 //erase the entry of this coalesced req
498 iter->second.erase(iter->second.begin() + vector_index);
499
500 if (iter->second.empty())
501 assert(i == coalescedReq_cnt);
502
503 sent_probes++;
504 if (sent_probes == TLBProbesPerCycle)
505 return;
506 }
507 }
508
509 //if there are no more coalesced reqs for this tick_index
510 //erase the hash_map with the first iterator
511 if (iter->second.empty()) {
512 coalescerFIFO.erase(iter++);
513 } else {
514 ++iter;
515 }
516 }
517}
518
519void
520TLBCoalescer::processCleanupEvent()
521{
522 while (!cleanupQueue.empty()) {
523 Addr cleanup_addr = cleanupQueue.front();
524 cleanupQueue.pop();
525 issuedTranslationsTable.erase(cleanup_addr);
526
527 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
528 cleanup_addr);
529 }
530}
531
532void
533TLBCoalescer::regStats()
534{
535 MemObject::regStats();
536
537 uncoalescedAccesses
538 .name(name() + ".uncoalesced_accesses")
539 .desc("Number of uncoalesced TLB accesses")
540 ;
541
542 coalescedAccesses
543 .name(name() + ".coalesced_accesses")
544 .desc("Number of coalesced TLB accesses")
545 ;
546
547 queuingCycles
548 .name(name() + ".queuing_cycles")
549 .desc("Number of cycles spent in queue")
550 ;
551
552 localqueuingCycles
553 .name(name() + ".local_queuing_cycles")
554 .desc("Number of cycles spent in queue for all incoming reqs")
555 ;
556
557 localLatency
558 .name(name() + ".local_latency")
559 .desc("Avg. latency over all incoming pkts")
560 ;
561
562 localLatency = localqueuingCycles / uncoalescedAccesses;
563}
564
565
566TLBCoalescer*
567TLBCoalescerParams::create()
568{
569 return new TLBCoalescer(this);
570}
571
340}
341
342void
343TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
344{
345
346 TheISA::GpuTLB::TranslationState *sender_state =
347 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
348
349 bool update_stats = !sender_state->prefetch;
350
351 if (update_stats)
352 coalescer->uncoalescedAccesses++;
353
354 // If there is a pending timing request for this virtual address
355 // print a warning message. This is a temporary caveat of
356 // the current simulator where atomic and timing requests can
357 // coexist. FIXME remove this check/warning in the future.
358 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
359 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
360
361 if (map_count) {
362 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
363 "req. pending\n", virt_page_addr);
364 }
365
366 coalescer->memSidePort[0]->sendFunctional(pkt);
367}
368
369AddrRangeList
370TLBCoalescer::CpuSidePort::getAddrRanges() const
371{
372 // currently not checked by the master
373 AddrRangeList ranges;
374
375 return ranges;
376}
377
378bool
379TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
380{
381 // a translation completed and returned
382 coalescer->updatePhysAddresses(pkt);
383
384 return true;
385}
386
387void
388TLBCoalescer::MemSidePort::recvReqRetry()
389{
390 //we've receeived a retry. Schedule a probeTLBEvent
391 if (!coalescer->probeTLBEvent.scheduled())
392 coalescer->schedule(coalescer->probeTLBEvent,
393 curTick() + coalescer->ticks(1));
394}
395
396void
397TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
398{
399 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
400}
401
402/*
403 * Here we scan the coalescer FIFO and issue the max
404 * number of permitted probes to the TLB below. We
405 * permit bypassing of coalesced requests for the same
406 * tick_index.
407 *
408 * We do not access the next tick_index unless we've
409 * drained the previous one. The coalesced requests
410 * that are successfully sent are moved to the
411 * issuedTranslationsTable table (the table which keeps
412 * track of the outstanding reqs)
413 */
414void
415TLBCoalescer::processProbeTLBEvent()
416{
417 // number of TLB probes sent so far
418 int sent_probes = 0;
419 // rejected denotes a blocking event
420 bool rejected = false;
421
422 // It is set to true either when the recvTiming of the TLB below
423 // returns false or when there is another outstanding request for the
424 // same virt. page.
425
426 DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
427
428 for (auto iter = coalescerFIFO.begin();
429 iter != coalescerFIFO.end() && !rejected; ) {
430 int coalescedReq_cnt = iter->second.size();
431 int i = 0;
432 int vector_index = 0;
433
434 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
435 coalescedReq_cnt, iter->first);
436
437 while (i < coalescedReq_cnt) {
438 ++i;
439 PacketPtr first_packet = iter->second[vector_index][0];
440
441 // compute virtual page address for this request
442 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
443 TheISA::PageBytes);
444
445 // is there another outstanding request for the same page addr?
446 int pending_reqs =
447 issuedTranslationsTable.count(virt_page_addr);
448
449 if (pending_reqs) {
450 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
451 "page %#x\n", virt_page_addr);
452
453 ++vector_index;
454 rejected = true;
455
456 continue;
457 }
458
459 // send the coalesced request for virt_page_addr
460 if (!memSidePort[0]->sendTimingReq(first_packet)) {
461 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
462 virt_page_addr);
463
464 // No need for a retries queue since we are already buffering
465 // the coalesced request in coalescerFIFO.
466 rejected = true;
467 ++vector_index;
468 } else {
469 TheISA::GpuTLB::TranslationState *tmp_sender_state =
470 safe_cast<TheISA::GpuTLB::TranslationState*>
471 (first_packet->senderState);
472
473 bool update_stats = !tmp_sender_state->prefetch;
474
475 if (update_stats) {
476 // req_cnt is total number of packets represented
477 // by the one we just sent counting all the way from
478 // the top of TLB hiearchy (i.e., from the CU)
479 int req_cnt = tmp_sender_state->reqCnt.back();
480 queuingCycles += (curTick() * req_cnt);
481
482 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
483 name(), req_cnt);
484
485 // pkt_cnt is number of packets we coalesced into the one
486 // we just sent but only at this coalescer level
487 int pkt_cnt = iter->second[vector_index].size();
488 localqueuingCycles += (curTick() * pkt_cnt);
489 }
490
491 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
492 virt_page_addr);
493
494 //copy coalescedReq to issuedTranslationsTable
495 issuedTranslationsTable[virt_page_addr]
496 = iter->second[vector_index];
497
498 //erase the entry of this coalesced req
499 iter->second.erase(iter->second.begin() + vector_index);
500
501 if (iter->second.empty())
502 assert(i == coalescedReq_cnt);
503
504 sent_probes++;
505 if (sent_probes == TLBProbesPerCycle)
506 return;
507 }
508 }
509
510 //if there are no more coalesced reqs for this tick_index
511 //erase the hash_map with the first iterator
512 if (iter->second.empty()) {
513 coalescerFIFO.erase(iter++);
514 } else {
515 ++iter;
516 }
517 }
518}
519
520void
521TLBCoalescer::processCleanupEvent()
522{
523 while (!cleanupQueue.empty()) {
524 Addr cleanup_addr = cleanupQueue.front();
525 cleanupQueue.pop();
526 issuedTranslationsTable.erase(cleanup_addr);
527
528 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
529 cleanup_addr);
530 }
531}
532
533void
534TLBCoalescer::regStats()
535{
536 MemObject::regStats();
537
538 uncoalescedAccesses
539 .name(name() + ".uncoalesced_accesses")
540 .desc("Number of uncoalesced TLB accesses")
541 ;
542
543 coalescedAccesses
544 .name(name() + ".coalesced_accesses")
545 .desc("Number of coalesced TLB accesses")
546 ;
547
548 queuingCycles
549 .name(name() + ".queuing_cycles")
550 .desc("Number of cycles spent in queue")
551 ;
552
553 localqueuingCycles
554 .name(name() + ".local_queuing_cycles")
555 .desc("Number of cycles spent in queue for all incoming reqs")
556 ;
557
558 localLatency
559 .name(name() + ".local_latency")
560 .desc("Avg. latency over all incoming pkts")
561 ;
562
563 localLatency = localqueuingCycles / uncoalescedAccesses;
564}
565
566
567TLBCoalescer*
568TLBCoalescerParams::create()
569{
570 return new TLBCoalescer(this);
571}
572