tlb_coalescer.cc (11308:7d8836fd043d) tlb_coalescer.cc (11523:81332eb10367)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/tlb_coalescer.hh"
37
38#include <cstring>
39
40#include "debug/GPUTLB.hh"
41
42TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
43 clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
44 coalescingWindow(p->coalescingWindow),
45 disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
46 cleanupEvent(this)
47{
48 // create the slave ports based on the number of connected ports
49 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
50 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
51 this, i));
52 }
53
54 // create the master ports based on the number of connected ports
55 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
56 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
57 this, i));
58 }
59}
60
61BaseSlavePort&
62TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
63{
64 if (if_name == "slave") {
65 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
66 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
67 }
68
69 return *cpuSidePort[idx];
70 } else {
71 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
72 }
73}
74
75BaseMasterPort&
76TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
77{
78 if (if_name == "master") {
79 if (idx >= static_cast<PortID>(memSidePort.size())) {
80 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
81 }
82
83 return *memSidePort[idx];
84 } else {
85 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
86 }
87}
88
89/*
90 * This method returns true if the <incoming_pkt>
91 * can be coalesced with <coalesced_pkt> and false otherwise.
92 * A given set of rules is checked.
93 * The rules can potentially be modified based on the TLB level.
94 */
95bool
96TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
97{
98 if (disableCoalescing)
99 return false;
100
101 TheISA::GpuTLB::TranslationState *incoming_state =
102 safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
103
104 TheISA::GpuTLB::TranslationState *coalesced_state =
105 safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
106
107 // Rule 1: Coalesce requests only if they
108 // fall within the same virtual page
109 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
110 TheISA::PageBytes);
111
112 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
113 TheISA::PageBytes);
114
115 if (incoming_virt_page_addr != coalesced_virt_page_addr)
116 return false;
117
118 //* Rule 2: Coalesce requests only if they
119 // share a TLB Mode, i.e. they are both read
120 // or write requests.
121 BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
122 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
123
124 if (incoming_mode != coalesced_mode)
125 return false;
126
127 // when we can coalesce a packet update the reqCnt
128 // that is the number of packets represented by
129 // this coalesced packet
130 if (!incoming_state->prefetch)
131 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
132
133 return true;
134}
135
136/*
137 * We need to update the physical addresses of all the translation requests
138 * that were coalesced into the one that just returned.
139 */
140void
141TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
142{
143 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
144
145 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
146 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
147
148 TheISA::GpuTLB::TranslationState *sender_state =
149 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
150
151 TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
152 assert(tlb_entry);
153 Addr first_entry_vaddr = tlb_entry->vaddr;
154 Addr first_entry_paddr = tlb_entry->paddr;
155 int page_size = tlb_entry->size();
156 bool uncacheable = tlb_entry->uncacheable;
157 int first_hit_level = sender_state->hitLevel;
158 bool valid = tlb_entry->valid;
159
160 // Get the physical page address of the translated request
161 // Using the page_size specified in the TLBEntry allows us
162 // to support different page sizes.
163 Addr phys_page_paddr = pkt->req->getPaddr();
164 phys_page_paddr &= ~(page_size - 1);
165
166 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
167 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
168 TheISA::GpuTLB::TranslationState *sender_state =
169 safe_cast<TheISA::GpuTLB::TranslationState*>(
170 local_pkt->senderState);
171
172 // we are sending the packet back, so pop the reqCnt associated
173 // with this level in the TLB hiearchy
174 if (!sender_state->prefetch)
175 sender_state->reqCnt.pop_back();
176
177 /*
178 * Only the first packet from this coalesced request has been
179 * translated. Grab the translated phys. page addr and update the
180 * physical addresses of the remaining packets with the appropriate
181 * page offsets.
182 */
183 if (i) {
184 Addr paddr = phys_page_paddr;
185 paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
186 local_pkt->req->setPaddr(paddr);
187
188 if (uncacheable)
189 local_pkt->req->setFlags(Request::UNCACHEABLE);
190
191 // update senderState->tlbEntry, so we can insert
192 // the correct TLBEentry in the TLBs above.
193 sender_state->tlbEntry =
194 new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
195 valid);
196
197 // update the hitLevel for all uncoalesced reqs
198 // so that each packet knows where it hit
199 // (used for statistics in the CUs)
200 sender_state->hitLevel = first_hit_level;
201 }
202
203 SlavePort *return_port = sender_state->ports.back();
204 sender_state->ports.pop_back();
205
206 // Translation is done - Convert to a response pkt if necessary and
207 // send the translation back
208 if (local_pkt->isRequest()) {
209 local_pkt->makeTimingResponse();
210 }
211
212 return_port->sendTimingResp(local_pkt);
213 }
214
215 // schedule clean up for end of this cycle
216 // This is a maximum priority event and must be on
217 // the same cycle as GPUTLB cleanup event to prevent
218 // race conditions with an IssueProbeEvent caused by
219 // MemSidePort::recvReqRetry
220 cleanupQueue.push(virt_page_addr);
221
222 if (!cleanupEvent.scheduled())
223 schedule(cleanupEvent, curTick());
224}
225
226// Receive translation requests, create a coalesced request,
227// and send them to the TLB (TLBProbesPerCycle)
228bool
229TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
230{
231 // first packet of a coalesced request
232 PacketPtr first_packet = nullptr;
233 // true if we are able to do coalescing
234 bool didCoalesce = false;
235 // number of coalesced reqs for a given window
236 int coalescedReq_cnt = 0;
237
238 TheISA::GpuTLB::TranslationState *sender_state =
239 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
240
241 // push back the port to remember the path back
242 sender_state->ports.push_back(this);
243
244 bool update_stats = !sender_state->prefetch;
245
246 if (update_stats) {
247 // if reqCnt is empty then this packet does not represent
248 // multiple uncoalesced reqs(pkts) but just a single pkt.
249 // If it does though then the reqCnt for each level in the
250 // hierarchy accumulates the total number of reqs this packet
251 // represents
252 int req_cnt = 1;
253
254 if (!sender_state->reqCnt.empty())
255 req_cnt = sender_state->reqCnt.back();
256
257 sender_state->reqCnt.push_back(req_cnt);
258
259 // update statistics
260 coalescer->uncoalescedAccesses++;
261 req_cnt = sender_state->reqCnt.back();
262 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
263 coalescer->queuingCycles -= (curTick() * req_cnt);
264 coalescer->localqueuingCycles -= curTick();
265 }
266
267 // FIXME if you want to coalesce not based on the issueTime
268 // of the packets (i.e., from the compute unit's perspective)
269 // but based on when they reached this coalescer then
270 // remove the following if statement and use curTick() or
271 // coalescingWindow for the tick_index.
272 if (!sender_state->issueTime)
273 sender_state->issueTime = curTick();
274
275 // The tick index is used as a key to the coalescerFIFO hashmap.
276 // It is shared by all candidates that fall within the
277 // given coalescingWindow.
278 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
279
280 if (coalescer->coalescerFIFO.count(tick_index)) {
281 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
282 }
283
284 // see if we can coalesce the incoming pkt with another
285 // coalesced request with the same tick_index
286 for (int i = 0; i < coalescedReq_cnt; ++i) {
287 first_packet = coalescer->coalescerFIFO[tick_index][i][0];
288
289 if (coalescer->canCoalesce(pkt, first_packet)) {
290 coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
291
292 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
293 i, tick_index,
294 coalescer->coalescerFIFO[tick_index][i].size());
295
296 didCoalesce = true;
297 break;
298 }
299 }
300
301 // if this is the first request for this tick_index
302 // or we did not manage to coalesce, update stats
303 // and make necessary allocations.
304 if (!coalescedReq_cnt || !didCoalesce) {
305 if (update_stats)
306 coalescer->coalescedAccesses++;
307
308 std::vector<PacketPtr> new_array;
309 new_array.push_back(pkt);
310 coalescer->coalescerFIFO[tick_index].push_back(new_array);
311
312 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
313 "push\n", tick_index,
314 coalescer->coalescerFIFO[tick_index].size());
315 }
316
317 //schedule probeTLBEvent next cycle to send the
318 //coalesced requests to the TLB
319 if (!coalescer->probeTLBEvent.scheduled()) {
320 coalescer->schedule(coalescer->probeTLBEvent,
321 curTick() + coalescer->ticks(1));
322 }
323
324 return true;
325}
326
327void
328TLBCoalescer::CpuSidePort::recvReqRetry()
329{
330 assert(false);
331}
332
333void
334TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
335{
336
337 TheISA::GpuTLB::TranslationState *sender_state =
338 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
339
340 bool update_stats = !sender_state->prefetch;
341
342 if (update_stats)
343 coalescer->uncoalescedAccesses++;
344
345 // If there is a pending timing request for this virtual address
346 // print a warning message. This is a temporary caveat of
347 // the current simulator where atomic and timing requests can
348 // coexist. FIXME remove this check/warning in the future.
349 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
350 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
351
352 if (map_count) {
353 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
354 "req. pending\n", virt_page_addr);
355 }
356
357 coalescer->memSidePort[0]->sendFunctional(pkt);
358}
359
360AddrRangeList
361TLBCoalescer::CpuSidePort::getAddrRanges() const
362{
363 // currently not checked by the master
364 AddrRangeList ranges;
365
366 return ranges;
367}
368
369bool
370TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
371{
372 // a translation completed and returned
373 coalescer->updatePhysAddresses(pkt);
374
375 return true;
376}
377
378void
379TLBCoalescer::MemSidePort::recvReqRetry()
380{
381 //we've receeived a retry. Schedule a probeTLBEvent
382 if (!coalescer->probeTLBEvent.scheduled())
383 coalescer->schedule(coalescer->probeTLBEvent,
384 curTick() + coalescer->ticks(1));
385}
386
387void
388TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
389{
390 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
391}
392
393TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
394 : Event(CPU_Tick_Pri), coalescer(_coalescer)
395{
396}
397
398const char*
399TLBCoalescer::IssueProbeEvent::description() const
400{
401 return "Probe the TLB below";
402}
403
404/*
405 * Here we scan the coalescer FIFO and issue the max
406 * number of permitted probes to the TLB below. We
407 * permit bypassing of coalesced requests for the same
408 * tick_index.
409 *
410 * We do not access the next tick_index unless we've
411 * drained the previous one. The coalesced requests
412 * that are successfully sent are moved to the
413 * issuedTranslationsTable table (the table which keeps
414 * track of the outstanding reqs)
415 */
416void
417TLBCoalescer::IssueProbeEvent::process()
418{
419 // number of TLB probes sent so far
420 int sent_probes = 0;
421 // rejected denotes a blocking event
422 bool rejected = false;
423
424 // It is set to true either when the recvTiming of the TLB below
425 // returns false or when there is another outstanding request for the
426 // same virt. page.
427
428 DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
429
430 for (auto iter = coalescer->coalescerFIFO.begin();
431 iter != coalescer->coalescerFIFO.end() && !rejected; ) {
432 int coalescedReq_cnt = iter->second.size();
433 int i = 0;
434 int vector_index = 0;
435
436 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
437 coalescedReq_cnt, iter->first);
438
439 while (i < coalescedReq_cnt) {
440 ++i;
441 PacketPtr first_packet = iter->second[vector_index][0];
442
443 // compute virtual page address for this request
444 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
445 TheISA::PageBytes);
446
447 // is there another outstanding request for the same page addr?
448 int pending_reqs =
449 coalescer->issuedTranslationsTable.count(virt_page_addr);
450
451 if (pending_reqs) {
452 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
453 "page %#x\n", virt_page_addr);
454
455 ++vector_index;
456 rejected = true;
457
458 continue;
459 }
460
461 // send the coalesced request for virt_page_addr
462 if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
463 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
464 virt_page_addr);
465
466 // No need for a retries queue since we are already buffering
467 // the coalesced request in coalescerFIFO.
468 rejected = true;
469 ++vector_index;
470 } else {
471 TheISA::GpuTLB::TranslationState *tmp_sender_state =
472 safe_cast<TheISA::GpuTLB::TranslationState*>
473 (first_packet->senderState);
474
475 bool update_stats = !tmp_sender_state->prefetch;
476
477 if (update_stats) {
478 // req_cnt is total number of packets represented
479 // by the one we just sent counting all the way from
480 // the top of TLB hiearchy (i.e., from the CU)
481 int req_cnt = tmp_sender_state->reqCnt.back();
482 coalescer->queuingCycles += (curTick() * req_cnt);
483
484 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
485 coalescer->name(), req_cnt);
486
487 // pkt_cnt is number of packets we coalesced into the one
488 // we just sent but only at this coalescer level
489 int pkt_cnt = iter->second[vector_index].size();
490 coalescer->localqueuingCycles += (curTick() * pkt_cnt);
491 }
492
493 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
494 virt_page_addr);
495
496 //copy coalescedReq to issuedTranslationsTable
497 coalescer->issuedTranslationsTable[virt_page_addr]
498 = iter->second[vector_index];
499
500 //erase the entry of this coalesced req
501 iter->second.erase(iter->second.begin() + vector_index);
502
503 if (iter->second.empty())
504 assert(i == coalescedReq_cnt);
505
506 sent_probes++;
507 if (sent_probes == coalescer->TLBProbesPerCycle)
508 return;
509 }
510 }
511
512 //if there are no more coalesced reqs for this tick_index
513 //erase the hash_map with the first iterator
514 if (iter->second.empty()) {
515 coalescer->coalescerFIFO.erase(iter++);
516 } else {
517 ++iter;
518 }
519 }
520}
521
522TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
523 : Event(Maximum_Pri), coalescer(_coalescer)
524{
525}
526
527const char*
528TLBCoalescer::CleanupEvent::description() const
529{
530 return "Cleanup issuedTranslationsTable hashmap";
531}
532
533void
534TLBCoalescer::CleanupEvent::process()
535{
536 while (!coalescer->cleanupQueue.empty()) {
537 Addr cleanup_addr = coalescer->cleanupQueue.front();
538 coalescer->cleanupQueue.pop();
539 coalescer->issuedTranslationsTable.erase(cleanup_addr);
540
541 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
542 cleanup_addr);
543 }
544}
545
546void
547TLBCoalescer::regStats()
548{
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/tlb_coalescer.hh"
37
38#include <cstring>
39
40#include "debug/GPUTLB.hh"
41
42TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
43 clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
44 coalescingWindow(p->coalescingWindow),
45 disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
46 cleanupEvent(this)
47{
48 // create the slave ports based on the number of connected ports
49 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
50 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
51 this, i));
52 }
53
54 // create the master ports based on the number of connected ports
55 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
56 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
57 this, i));
58 }
59}
60
61BaseSlavePort&
62TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
63{
64 if (if_name == "slave") {
65 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
66 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
67 }
68
69 return *cpuSidePort[idx];
70 } else {
71 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
72 }
73}
74
75BaseMasterPort&
76TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
77{
78 if (if_name == "master") {
79 if (idx >= static_cast<PortID>(memSidePort.size())) {
80 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
81 }
82
83 return *memSidePort[idx];
84 } else {
85 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
86 }
87}
88
89/*
90 * This method returns true if the <incoming_pkt>
91 * can be coalesced with <coalesced_pkt> and false otherwise.
92 * A given set of rules is checked.
93 * The rules can potentially be modified based on the TLB level.
94 */
95bool
96TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
97{
98 if (disableCoalescing)
99 return false;
100
101 TheISA::GpuTLB::TranslationState *incoming_state =
102 safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
103
104 TheISA::GpuTLB::TranslationState *coalesced_state =
105 safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
106
107 // Rule 1: Coalesce requests only if they
108 // fall within the same virtual page
109 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
110 TheISA::PageBytes);
111
112 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
113 TheISA::PageBytes);
114
115 if (incoming_virt_page_addr != coalesced_virt_page_addr)
116 return false;
117
118 //* Rule 2: Coalesce requests only if they
119 // share a TLB Mode, i.e. they are both read
120 // or write requests.
121 BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
122 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
123
124 if (incoming_mode != coalesced_mode)
125 return false;
126
127 // when we can coalesce a packet update the reqCnt
128 // that is the number of packets represented by
129 // this coalesced packet
130 if (!incoming_state->prefetch)
131 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
132
133 return true;
134}
135
136/*
137 * We need to update the physical addresses of all the translation requests
138 * that were coalesced into the one that just returned.
139 */
140void
141TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
142{
143 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
144
145 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
146 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
147
148 TheISA::GpuTLB::TranslationState *sender_state =
149 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
150
151 TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
152 assert(tlb_entry);
153 Addr first_entry_vaddr = tlb_entry->vaddr;
154 Addr first_entry_paddr = tlb_entry->paddr;
155 int page_size = tlb_entry->size();
156 bool uncacheable = tlb_entry->uncacheable;
157 int first_hit_level = sender_state->hitLevel;
158 bool valid = tlb_entry->valid;
159
160 // Get the physical page address of the translated request
161 // Using the page_size specified in the TLBEntry allows us
162 // to support different page sizes.
163 Addr phys_page_paddr = pkt->req->getPaddr();
164 phys_page_paddr &= ~(page_size - 1);
165
166 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
167 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
168 TheISA::GpuTLB::TranslationState *sender_state =
169 safe_cast<TheISA::GpuTLB::TranslationState*>(
170 local_pkt->senderState);
171
172 // we are sending the packet back, so pop the reqCnt associated
173 // with this level in the TLB hiearchy
174 if (!sender_state->prefetch)
175 sender_state->reqCnt.pop_back();
176
177 /*
178 * Only the first packet from this coalesced request has been
179 * translated. Grab the translated phys. page addr and update the
180 * physical addresses of the remaining packets with the appropriate
181 * page offsets.
182 */
183 if (i) {
184 Addr paddr = phys_page_paddr;
185 paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
186 local_pkt->req->setPaddr(paddr);
187
188 if (uncacheable)
189 local_pkt->req->setFlags(Request::UNCACHEABLE);
190
191 // update senderState->tlbEntry, so we can insert
192 // the correct TLBEentry in the TLBs above.
193 sender_state->tlbEntry =
194 new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
195 valid);
196
197 // update the hitLevel for all uncoalesced reqs
198 // so that each packet knows where it hit
199 // (used for statistics in the CUs)
200 sender_state->hitLevel = first_hit_level;
201 }
202
203 SlavePort *return_port = sender_state->ports.back();
204 sender_state->ports.pop_back();
205
206 // Translation is done - Convert to a response pkt if necessary and
207 // send the translation back
208 if (local_pkt->isRequest()) {
209 local_pkt->makeTimingResponse();
210 }
211
212 return_port->sendTimingResp(local_pkt);
213 }
214
215 // schedule clean up for end of this cycle
216 // This is a maximum priority event and must be on
217 // the same cycle as GPUTLB cleanup event to prevent
218 // race conditions with an IssueProbeEvent caused by
219 // MemSidePort::recvReqRetry
220 cleanupQueue.push(virt_page_addr);
221
222 if (!cleanupEvent.scheduled())
223 schedule(cleanupEvent, curTick());
224}
225
226// Receive translation requests, create a coalesced request,
227// and send them to the TLB (TLBProbesPerCycle)
228bool
229TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
230{
231 // first packet of a coalesced request
232 PacketPtr first_packet = nullptr;
233 // true if we are able to do coalescing
234 bool didCoalesce = false;
235 // number of coalesced reqs for a given window
236 int coalescedReq_cnt = 0;
237
238 TheISA::GpuTLB::TranslationState *sender_state =
239 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
240
241 // push back the port to remember the path back
242 sender_state->ports.push_back(this);
243
244 bool update_stats = !sender_state->prefetch;
245
246 if (update_stats) {
247 // if reqCnt is empty then this packet does not represent
248 // multiple uncoalesced reqs(pkts) but just a single pkt.
249 // If it does though then the reqCnt for each level in the
250 // hierarchy accumulates the total number of reqs this packet
251 // represents
252 int req_cnt = 1;
253
254 if (!sender_state->reqCnt.empty())
255 req_cnt = sender_state->reqCnt.back();
256
257 sender_state->reqCnt.push_back(req_cnt);
258
259 // update statistics
260 coalescer->uncoalescedAccesses++;
261 req_cnt = sender_state->reqCnt.back();
262 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
263 coalescer->queuingCycles -= (curTick() * req_cnt);
264 coalescer->localqueuingCycles -= curTick();
265 }
266
267 // FIXME if you want to coalesce not based on the issueTime
268 // of the packets (i.e., from the compute unit's perspective)
269 // but based on when they reached this coalescer then
270 // remove the following if statement and use curTick() or
271 // coalescingWindow for the tick_index.
272 if (!sender_state->issueTime)
273 sender_state->issueTime = curTick();
274
275 // The tick index is used as a key to the coalescerFIFO hashmap.
276 // It is shared by all candidates that fall within the
277 // given coalescingWindow.
278 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
279
280 if (coalescer->coalescerFIFO.count(tick_index)) {
281 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
282 }
283
284 // see if we can coalesce the incoming pkt with another
285 // coalesced request with the same tick_index
286 for (int i = 0; i < coalescedReq_cnt; ++i) {
287 first_packet = coalescer->coalescerFIFO[tick_index][i][0];
288
289 if (coalescer->canCoalesce(pkt, first_packet)) {
290 coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
291
292 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
293 i, tick_index,
294 coalescer->coalescerFIFO[tick_index][i].size());
295
296 didCoalesce = true;
297 break;
298 }
299 }
300
301 // if this is the first request for this tick_index
302 // or we did not manage to coalesce, update stats
303 // and make necessary allocations.
304 if (!coalescedReq_cnt || !didCoalesce) {
305 if (update_stats)
306 coalescer->coalescedAccesses++;
307
308 std::vector<PacketPtr> new_array;
309 new_array.push_back(pkt);
310 coalescer->coalescerFIFO[tick_index].push_back(new_array);
311
312 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
313 "push\n", tick_index,
314 coalescer->coalescerFIFO[tick_index].size());
315 }
316
317 //schedule probeTLBEvent next cycle to send the
318 //coalesced requests to the TLB
319 if (!coalescer->probeTLBEvent.scheduled()) {
320 coalescer->schedule(coalescer->probeTLBEvent,
321 curTick() + coalescer->ticks(1));
322 }
323
324 return true;
325}
326
327void
328TLBCoalescer::CpuSidePort::recvReqRetry()
329{
330 assert(false);
331}
332
333void
334TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
335{
336
337 TheISA::GpuTLB::TranslationState *sender_state =
338 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
339
340 bool update_stats = !sender_state->prefetch;
341
342 if (update_stats)
343 coalescer->uncoalescedAccesses++;
344
345 // If there is a pending timing request for this virtual address
346 // print a warning message. This is a temporary caveat of
347 // the current simulator where atomic and timing requests can
348 // coexist. FIXME remove this check/warning in the future.
349 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
350 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
351
352 if (map_count) {
353 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
354 "req. pending\n", virt_page_addr);
355 }
356
357 coalescer->memSidePort[0]->sendFunctional(pkt);
358}
359
360AddrRangeList
361TLBCoalescer::CpuSidePort::getAddrRanges() const
362{
363 // currently not checked by the master
364 AddrRangeList ranges;
365
366 return ranges;
367}
368
369bool
370TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
371{
372 // a translation completed and returned
373 coalescer->updatePhysAddresses(pkt);
374
375 return true;
376}
377
378void
379TLBCoalescer::MemSidePort::recvReqRetry()
380{
381 //we've receeived a retry. Schedule a probeTLBEvent
382 if (!coalescer->probeTLBEvent.scheduled())
383 coalescer->schedule(coalescer->probeTLBEvent,
384 curTick() + coalescer->ticks(1));
385}
386
387void
388TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
389{
390 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
391}
392
393TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
394 : Event(CPU_Tick_Pri), coalescer(_coalescer)
395{
396}
397
398const char*
399TLBCoalescer::IssueProbeEvent::description() const
400{
401 return "Probe the TLB below";
402}
403
404/*
405 * Here we scan the coalescer FIFO and issue the max
406 * number of permitted probes to the TLB below. We
407 * permit bypassing of coalesced requests for the same
408 * tick_index.
409 *
410 * We do not access the next tick_index unless we've
411 * drained the previous one. The coalesced requests
412 * that are successfully sent are moved to the
413 * issuedTranslationsTable table (the table which keeps
414 * track of the outstanding reqs)
415 */
416void
417TLBCoalescer::IssueProbeEvent::process()
418{
419 // number of TLB probes sent so far
420 int sent_probes = 0;
421 // rejected denotes a blocking event
422 bool rejected = false;
423
424 // It is set to true either when the recvTiming of the TLB below
425 // returns false or when there is another outstanding request for the
426 // same virt. page.
427
428 DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
429
430 for (auto iter = coalescer->coalescerFIFO.begin();
431 iter != coalescer->coalescerFIFO.end() && !rejected; ) {
432 int coalescedReq_cnt = iter->second.size();
433 int i = 0;
434 int vector_index = 0;
435
436 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
437 coalescedReq_cnt, iter->first);
438
439 while (i < coalescedReq_cnt) {
440 ++i;
441 PacketPtr first_packet = iter->second[vector_index][0];
442
443 // compute virtual page address for this request
444 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
445 TheISA::PageBytes);
446
447 // is there another outstanding request for the same page addr?
448 int pending_reqs =
449 coalescer->issuedTranslationsTable.count(virt_page_addr);
450
451 if (pending_reqs) {
452 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
453 "page %#x\n", virt_page_addr);
454
455 ++vector_index;
456 rejected = true;
457
458 continue;
459 }
460
461 // send the coalesced request for virt_page_addr
462 if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
463 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
464 virt_page_addr);
465
466 // No need for a retries queue since we are already buffering
467 // the coalesced request in coalescerFIFO.
468 rejected = true;
469 ++vector_index;
470 } else {
471 TheISA::GpuTLB::TranslationState *tmp_sender_state =
472 safe_cast<TheISA::GpuTLB::TranslationState*>
473 (first_packet->senderState);
474
475 bool update_stats = !tmp_sender_state->prefetch;
476
477 if (update_stats) {
478 // req_cnt is total number of packets represented
479 // by the one we just sent counting all the way from
480 // the top of TLB hiearchy (i.e., from the CU)
481 int req_cnt = tmp_sender_state->reqCnt.back();
482 coalescer->queuingCycles += (curTick() * req_cnt);
483
484 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
485 coalescer->name(), req_cnt);
486
487 // pkt_cnt is number of packets we coalesced into the one
488 // we just sent but only at this coalescer level
489 int pkt_cnt = iter->second[vector_index].size();
490 coalescer->localqueuingCycles += (curTick() * pkt_cnt);
491 }
492
493 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
494 virt_page_addr);
495
496 //copy coalescedReq to issuedTranslationsTable
497 coalescer->issuedTranslationsTable[virt_page_addr]
498 = iter->second[vector_index];
499
500 //erase the entry of this coalesced req
501 iter->second.erase(iter->second.begin() + vector_index);
502
503 if (iter->second.empty())
504 assert(i == coalescedReq_cnt);
505
506 sent_probes++;
507 if (sent_probes == coalescer->TLBProbesPerCycle)
508 return;
509 }
510 }
511
512 //if there are no more coalesced reqs for this tick_index
513 //erase the hash_map with the first iterator
514 if (iter->second.empty()) {
515 coalescer->coalescerFIFO.erase(iter++);
516 } else {
517 ++iter;
518 }
519 }
520}
521
522TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
523 : Event(Maximum_Pri), coalescer(_coalescer)
524{
525}
526
527const char*
528TLBCoalescer::CleanupEvent::description() const
529{
530 return "Cleanup issuedTranslationsTable hashmap";
531}
532
533void
534TLBCoalescer::CleanupEvent::process()
535{
536 while (!coalescer->cleanupQueue.empty()) {
537 Addr cleanup_addr = coalescer->cleanupQueue.front();
538 coalescer->cleanupQueue.pop();
539 coalescer->issuedTranslationsTable.erase(cleanup_addr);
540
541 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
542 cleanup_addr);
543 }
544}
545
546void
547TLBCoalescer::regStats()
548{
549 MemObject::regStats();
550
549 uncoalescedAccesses
550 .name(name() + ".uncoalesced_accesses")
551 .desc("Number of uncoalesced TLB accesses")
552 ;
553
554 coalescedAccesses
555 .name(name() + ".coalesced_accesses")
556 .desc("Number of coalesced TLB accesses")
557 ;
558
559 queuingCycles
560 .name(name() + ".queuing_cycles")
561 .desc("Number of cycles spent in queue")
562 ;
563
564 localqueuingCycles
565 .name(name() + ".local_queuing_cycles")
566 .desc("Number of cycles spent in queue for all incoming reqs")
567 ;
568
569 localLatency
570 .name(name() + ".local_latency")
571 .desc("Avg. latency over all incoming pkts")
572 ;
573
574 localLatency = localqueuingCycles / uncoalescedAccesses;
575}
576
577
578TLBCoalescer*
579TLBCoalescerParams::create()
580{
581 return new TLBCoalescer(this);
582}
583
551 uncoalescedAccesses
552 .name(name() + ".uncoalesced_accesses")
553 .desc("Number of uncoalesced TLB accesses")
554 ;
555
556 coalescedAccesses
557 .name(name() + ".coalesced_accesses")
558 .desc("Number of coalesced TLB accesses")
559 ;
560
561 queuingCycles
562 .name(name() + ".queuing_cycles")
563 .desc("Number of cycles spent in queue")
564 ;
565
566 localqueuingCycles
567 .name(name() + ".local_queuing_cycles")
568 .desc("Number of cycles spent in queue for all incoming reqs")
569 ;
570
571 localLatency
572 .name(name() + ".local_latency")
573 .desc("Avg. latency over all incoming pkts")
574 ;
575
576 localLatency = localqueuingCycles / uncoalescedAccesses;
577}
578
579
580TLBCoalescer*
581TLBCoalescerParams::create()
582{
583 return new TLBCoalescer(this);
584}
585