gpu_tlb.cc (12749:223c83ed9979) gpu_tlb.cc (13449:2f7efa89c58b)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62 GpuTLB::GpuTLB(const Params *p)
63 : MemObject(p), configAddress(0), size(p->size),
64 cleanupEvent([this]{ cleanup(); }, name(), false,
65 Event::Maximum_Pri),
66 exitEvent([this]{ exitCallback(); }, name())
67 {
68 assoc = p->assoc;
69 assert(assoc <= size);
70 numSets = size/assoc;
71 allocationPolicy = p->allocationPolicy;
72 hasMemSidePort = false;
73 accessDistance = p->accessDistance;
74 clock = p->clk_domain->clockPeriod();
75
76 tlb.assign(size, TlbEntry());
77
78 freeList.resize(numSets);
79 entryList.resize(numSets);
80
81 for (int set = 0; set < numSets; ++set) {
82 for (int way = 0; way < assoc; ++way) {
83 int x = set * assoc + way;
84 freeList[set].push_back(&tlb.at(x));
85 }
86 }
87
88 FA = (size == assoc);
89
90 /**
91 * @warning: the set-associative version assumes you have a
92 * fixed page size of 4KB.
93 * If the page size is greather than 4KB (as defined in the
94 * TheISA::PageBytes), then there are various issues w/ the current
95 * implementation (you'd have the same 8KB page being replicated in
96 * different sets etc)
97 */
98 setMask = numSets - 1;
99
100 #if 0
101 // GpuTLB doesn't yet support full system
102 walker = p->walker;
103 walker->setTLB(this);
104 #endif
105
106 maxCoalescedReqs = p->maxOutstandingReqs;
107
108 // Do not allow maxCoalescedReqs to be more than the TLB associativity
109 if (maxCoalescedReqs > assoc) {
110 maxCoalescedReqs = assoc;
111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
112 }
113
114 outstandingReqs = 0;
115 hitLatency = p->hitLatency;
116 missLatency1 = p->missLatency1;
117 missLatency2 = p->missLatency2;
118
119 // create the slave ports based on the number of connected ports
120 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
121 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
122 name(), i), this, i));
123 }
124
125 // create the master ports based on the number of connected ports
126 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
127 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
128 name(), i), this, i));
129 }
130 }
131
132 // fixme: this is never called?
133 GpuTLB::~GpuTLB()
134 {
135 // make sure all the hash-maps are empty
136 assert(translationReturnEvent.empty());
137 }
138
139 BaseSlavePort&
140 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
141 {
142 if (if_name == "slave") {
143 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
145 }
146
147 return *cpuSidePort[idx];
148 } else {
149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
150 }
151 }
152
153 BaseMasterPort&
154 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
155 {
156 if (if_name == "master") {
157 if (idx >= static_cast<PortID>(memSidePort.size())) {
158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
159 }
160
161 hasMemSidePort = true;
162
163 return *memSidePort[idx];
164 } else {
165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
166 }
167 }
168
169 TlbEntry*
170 GpuTLB::insert(Addr vpn, TlbEntry &entry)
171 {
172 TlbEntry *newEntry = nullptr;
173
174 /**
175 * vpn holds the virtual page address
176 * The least significant bits are simply masked
177 */
178 int set = (vpn >> TheISA::PageShift) & setMask;
179
180 if (!freeList[set].empty()) {
181 newEntry = freeList[set].front();
182 freeList[set].pop_front();
183 } else {
184 newEntry = entryList[set].back();
185 entryList[set].pop_back();
186 }
187
188 *newEntry = entry;
189 newEntry->vaddr = vpn;
190 entryList[set].push_front(newEntry);
191
192 return newEntry;
193 }
194
195 GpuTLB::EntryList::iterator
196 GpuTLB::lookupIt(Addr va, bool update_lru)
197 {
198 int set = (va >> TheISA::PageShift) & setMask;
199
200 if (FA) {
201 assert(!set);
202 }
203
204 auto entry = entryList[set].begin();
205 for (; entry != entryList[set].end(); ++entry) {
206 int page_size = (*entry)->size();
207
208 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
209 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
210 "with size %#x.\n", va, (*entry)->vaddr, page_size);
211
212 if (update_lru) {
213 entryList[set].push_front(*entry);
214 entryList[set].erase(entry);
215 entry = entryList[set].begin();
216 }
217
218 break;
219 }
220 }
221
222 return entry;
223 }
224
225 TlbEntry*
226 GpuTLB::lookup(Addr va, bool update_lru)
227 {
228 int set = (va >> TheISA::PageShift) & setMask;
229
230 auto entry = lookupIt(va, update_lru);
231
232 if (entry == entryList[set].end())
233 return nullptr;
234 else
235 return *entry;
236 }
237
238 void
239 GpuTLB::invalidateAll()
240 {
241 DPRINTF(GPUTLB, "Invalidating all entries.\n");
242
243 for (int i = 0; i < numSets; ++i) {
244 while (!entryList[i].empty()) {
245 TlbEntry *entry = entryList[i].front();
246 entryList[i].pop_front();
247 freeList[i].push_back(entry);
248 }
249 }
250 }
251
252 void
253 GpuTLB::setConfigAddress(uint32_t addr)
254 {
255 configAddress = addr;
256 }
257
258 void
259 GpuTLB::invalidateNonGlobal()
260 {
261 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
262
263 for (int i = 0; i < numSets; ++i) {
264 for (auto entryIt = entryList[i].begin();
265 entryIt != entryList[i].end();) {
266 if (!(*entryIt)->global) {
267 freeList[i].push_back(*entryIt);
268 entryList[i].erase(entryIt++);
269 } else {
270 ++entryIt;
271 }
272 }
273 }
274 }
275
276 void
277 GpuTLB::demapPage(Addr va, uint64_t asn)
278 {
279
280 int set = (va >> TheISA::PageShift) & setMask;
281 auto entry = lookupIt(va, false);
282
283 if (entry != entryList[set].end()) {
284 freeList[set].push_back(*entry);
285 entryList[set].erase(entry);
286 }
287 }
288
289 Fault
290 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
291 {
292 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
293 Addr vaddr = req->getVaddr();
294 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
295
296 if (prefix == IntAddrPrefixCPUID) {
297 panic("CPUID memory space not yet implemented!\n");
298 } else if (prefix == IntAddrPrefixMSR) {
299 vaddr = vaddr >> 3;
300 req->setFlags(Request::MMAPPED_IPR);
301 Addr regNum = 0;
302
303 switch (vaddr & ~IntAddrPrefixMask) {
304 case 0x10:
305 regNum = MISCREG_TSC;
306 break;
307 case 0x1B:
308 regNum = MISCREG_APIC_BASE;
309 break;
310 case 0xFE:
311 regNum = MISCREG_MTRRCAP;
312 break;
313 case 0x174:
314 regNum = MISCREG_SYSENTER_CS;
315 break;
316 case 0x175:
317 regNum = MISCREG_SYSENTER_ESP;
318 break;
319 case 0x176:
320 regNum = MISCREG_SYSENTER_EIP;
321 break;
322 case 0x179:
323 regNum = MISCREG_MCG_CAP;
324 break;
325 case 0x17A:
326 regNum = MISCREG_MCG_STATUS;
327 break;
328 case 0x17B:
329 regNum = MISCREG_MCG_CTL;
330 break;
331 case 0x1D9:
332 regNum = MISCREG_DEBUG_CTL_MSR;
333 break;
334 case 0x1DB:
335 regNum = MISCREG_LAST_BRANCH_FROM_IP;
336 break;
337 case 0x1DC:
338 regNum = MISCREG_LAST_BRANCH_TO_IP;
339 break;
340 case 0x1DD:
341 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
342 break;
343 case 0x1DE:
344 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
345 break;
346 case 0x200:
347 regNum = MISCREG_MTRR_PHYS_BASE_0;
348 break;
349 case 0x201:
350 regNum = MISCREG_MTRR_PHYS_MASK_0;
351 break;
352 case 0x202:
353 regNum = MISCREG_MTRR_PHYS_BASE_1;
354 break;
355 case 0x203:
356 regNum = MISCREG_MTRR_PHYS_MASK_1;
357 break;
358 case 0x204:
359 regNum = MISCREG_MTRR_PHYS_BASE_2;
360 break;
361 case 0x205:
362 regNum = MISCREG_MTRR_PHYS_MASK_2;
363 break;
364 case 0x206:
365 regNum = MISCREG_MTRR_PHYS_BASE_3;
366 break;
367 case 0x207:
368 regNum = MISCREG_MTRR_PHYS_MASK_3;
369 break;
370 case 0x208:
371 regNum = MISCREG_MTRR_PHYS_BASE_4;
372 break;
373 case 0x209:
374 regNum = MISCREG_MTRR_PHYS_MASK_4;
375 break;
376 case 0x20A:
377 regNum = MISCREG_MTRR_PHYS_BASE_5;
378 break;
379 case 0x20B:
380 regNum = MISCREG_MTRR_PHYS_MASK_5;
381 break;
382 case 0x20C:
383 regNum = MISCREG_MTRR_PHYS_BASE_6;
384 break;
385 case 0x20D:
386 regNum = MISCREG_MTRR_PHYS_MASK_6;
387 break;
388 case 0x20E:
389 regNum = MISCREG_MTRR_PHYS_BASE_7;
390 break;
391 case 0x20F:
392 regNum = MISCREG_MTRR_PHYS_MASK_7;
393 break;
394 case 0x250:
395 regNum = MISCREG_MTRR_FIX_64K_00000;
396 break;
397 case 0x258:
398 regNum = MISCREG_MTRR_FIX_16K_80000;
399 break;
400 case 0x259:
401 regNum = MISCREG_MTRR_FIX_16K_A0000;
402 break;
403 case 0x268:
404 regNum = MISCREG_MTRR_FIX_4K_C0000;
405 break;
406 case 0x269:
407 regNum = MISCREG_MTRR_FIX_4K_C8000;
408 break;
409 case 0x26A:
410 regNum = MISCREG_MTRR_FIX_4K_D0000;
411 break;
412 case 0x26B:
413 regNum = MISCREG_MTRR_FIX_4K_D8000;
414 break;
415 case 0x26C:
416 regNum = MISCREG_MTRR_FIX_4K_E0000;
417 break;
418 case 0x26D:
419 regNum = MISCREG_MTRR_FIX_4K_E8000;
420 break;
421 case 0x26E:
422 regNum = MISCREG_MTRR_FIX_4K_F0000;
423 break;
424 case 0x26F:
425 regNum = MISCREG_MTRR_FIX_4K_F8000;
426 break;
427 case 0x277:
428 regNum = MISCREG_PAT;
429 break;
430 case 0x2FF:
431 regNum = MISCREG_DEF_TYPE;
432 break;
433 case 0x400:
434 regNum = MISCREG_MC0_CTL;
435 break;
436 case 0x404:
437 regNum = MISCREG_MC1_CTL;
438 break;
439 case 0x408:
440 regNum = MISCREG_MC2_CTL;
441 break;
442 case 0x40C:
443 regNum = MISCREG_MC3_CTL;
444 break;
445 case 0x410:
446 regNum = MISCREG_MC4_CTL;
447 break;
448 case 0x414:
449 regNum = MISCREG_MC5_CTL;
450 break;
451 case 0x418:
452 regNum = MISCREG_MC6_CTL;
453 break;
454 case 0x41C:
455 regNum = MISCREG_MC7_CTL;
456 break;
457 case 0x401:
458 regNum = MISCREG_MC0_STATUS;
459 break;
460 case 0x405:
461 regNum = MISCREG_MC1_STATUS;
462 break;
463 case 0x409:
464 regNum = MISCREG_MC2_STATUS;
465 break;
466 case 0x40D:
467 regNum = MISCREG_MC3_STATUS;
468 break;
469 case 0x411:
470 regNum = MISCREG_MC4_STATUS;
471 break;
472 case 0x415:
473 regNum = MISCREG_MC5_STATUS;
474 break;
475 case 0x419:
476 regNum = MISCREG_MC6_STATUS;
477 break;
478 case 0x41D:
479 regNum = MISCREG_MC7_STATUS;
480 break;
481 case 0x402:
482 regNum = MISCREG_MC0_ADDR;
483 break;
484 case 0x406:
485 regNum = MISCREG_MC1_ADDR;
486 break;
487 case 0x40A:
488 regNum = MISCREG_MC2_ADDR;
489 break;
490 case 0x40E:
491 regNum = MISCREG_MC3_ADDR;
492 break;
493 case 0x412:
494 regNum = MISCREG_MC4_ADDR;
495 break;
496 case 0x416:
497 regNum = MISCREG_MC5_ADDR;
498 break;
499 case 0x41A:
500 regNum = MISCREG_MC6_ADDR;
501 break;
502 case 0x41E:
503 regNum = MISCREG_MC7_ADDR;
504 break;
505 case 0x403:
506 regNum = MISCREG_MC0_MISC;
507 break;
508 case 0x407:
509 regNum = MISCREG_MC1_MISC;
510 break;
511 case 0x40B:
512 regNum = MISCREG_MC2_MISC;
513 break;
514 case 0x40F:
515 regNum = MISCREG_MC3_MISC;
516 break;
517 case 0x413:
518 regNum = MISCREG_MC4_MISC;
519 break;
520 case 0x417:
521 regNum = MISCREG_MC5_MISC;
522 break;
523 case 0x41B:
524 regNum = MISCREG_MC6_MISC;
525 break;
526 case 0x41F:
527 regNum = MISCREG_MC7_MISC;
528 break;
529 case 0xC0000080:
530 regNum = MISCREG_EFER;
531 break;
532 case 0xC0000081:
533 regNum = MISCREG_STAR;
534 break;
535 case 0xC0000082:
536 regNum = MISCREG_LSTAR;
537 break;
538 case 0xC0000083:
539 regNum = MISCREG_CSTAR;
540 break;
541 case 0xC0000084:
542 regNum = MISCREG_SF_MASK;
543 break;
544 case 0xC0000100:
545 regNum = MISCREG_FS_BASE;
546 break;
547 case 0xC0000101:
548 regNum = MISCREG_GS_BASE;
549 break;
550 case 0xC0000102:
551 regNum = MISCREG_KERNEL_GS_BASE;
552 break;
553 case 0xC0000103:
554 regNum = MISCREG_TSC_AUX;
555 break;
556 case 0xC0010000:
557 regNum = MISCREG_PERF_EVT_SEL0;
558 break;
559 case 0xC0010001:
560 regNum = MISCREG_PERF_EVT_SEL1;
561 break;
562 case 0xC0010002:
563 regNum = MISCREG_PERF_EVT_SEL2;
564 break;
565 case 0xC0010003:
566 regNum = MISCREG_PERF_EVT_SEL3;
567 break;
568 case 0xC0010004:
569 regNum = MISCREG_PERF_EVT_CTR0;
570 break;
571 case 0xC0010005:
572 regNum = MISCREG_PERF_EVT_CTR1;
573 break;
574 case 0xC0010006:
575 regNum = MISCREG_PERF_EVT_CTR2;
576 break;
577 case 0xC0010007:
578 regNum = MISCREG_PERF_EVT_CTR3;
579 break;
580 case 0xC0010010:
581 regNum = MISCREG_SYSCFG;
582 break;
583 case 0xC0010016:
584 regNum = MISCREG_IORR_BASE0;
585 break;
586 case 0xC0010017:
587 regNum = MISCREG_IORR_BASE1;
588 break;
589 case 0xC0010018:
590 regNum = MISCREG_IORR_MASK0;
591 break;
592 case 0xC0010019:
593 regNum = MISCREG_IORR_MASK1;
594 break;
595 case 0xC001001A:
596 regNum = MISCREG_TOP_MEM;
597 break;
598 case 0xC001001D:
599 regNum = MISCREG_TOP_MEM2;
600 break;
601 case 0xC0010114:
602 regNum = MISCREG_VM_CR;
603 break;
604 case 0xC0010115:
605 regNum = MISCREG_IGNNE;
606 break;
607 case 0xC0010116:
608 regNum = MISCREG_SMM_CTL;
609 break;
610 case 0xC0010117:
611 regNum = MISCREG_VM_HSAVE_PA;
612 break;
613 default:
614 return std::make_shared<GeneralProtection>(0);
615 }
616 //The index is multiplied by the size of a MiscReg so that
617 //any memory dependence calculations will not see these as
618 //overlapping.
619 req->setPaddr(regNum * sizeof(MiscReg));
620 return NoFault;
621 } else if (prefix == IntAddrPrefixIO) {
622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623 // bitmap in the TSS.
624
625 Addr IOPort = vaddr & ~IntAddrPrefixMask;
626 // Make sure the address fits in the expected 16 bit IO address
627 // space.
628 assert(!(IOPort & ~0xFFFF));
629
630 if (IOPort == 0xCF8 && req->getSize() == 4) {
631 req->setFlags(Request::MMAPPED_IPR);
632 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
633 } else if ((IOPort & ~mask(2)) == 0xCFC) {
634 req->setFlags(Request::UNCACHEABLE);
635
636 Addr configAddress =
637 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
638
639 if (bits(configAddress, 31, 31)) {
640 req->setPaddr(PhysAddrPrefixPciConfig |
641 mbits(configAddress, 30, 2) |
642 (IOPort & mask(2)));
643 } else {
644 req->setPaddr(PhysAddrPrefixIO | IOPort);
645 }
646 } else {
647 req->setFlags(Request::UNCACHEABLE);
648 req->setPaddr(PhysAddrPrefixIO | IOPort);
649 }
650 return NoFault;
651 } else {
652 panic("Access to unrecognized internal address space %#x.\n",
653 prefix);
654 }
655 }
656
657 /**
658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659 * and false on a TLB miss.
660 * Many of the checks about different modes have been converted to
661 * assertions, since these parts of the code are not really used.
662 * On a hit it will update the LRU stack.
663 */
664 bool
665 GpuTLB::tlbLookup(const RequestPtr &req,
666 ThreadContext *tc, bool update_stats)
667 {
668 bool tlb_hit = false;
669 #ifndef NDEBUG
670 uint32_t flags = req->getFlags();
671 int seg = flags & SegmentFlagMask;
672 #endif
673
674 assert(seg != SEGMENT_REG_MS);
675 Addr vaddr = req->getVaddr();
676 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
677 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
678
679 if (m5Reg.prot) {
680 DPRINTF(GPUTLB, "In protected mode.\n");
681 // make sure we are in 64-bit mode
682 assert(m5Reg.mode == LongMode);
683
684 // If paging is enabled, do the translation.
685 if (m5Reg.paging) {
686 DPRINTF(GPUTLB, "Paging enabled.\n");
687 //update LRU stack on a hit
688 TlbEntry *entry = lookup(vaddr, true);
689
690 if (entry)
691 tlb_hit = true;
692
693 if (!update_stats) {
694 // functional tlb access for memory initialization
695 // i.e., memory seeding or instr. seeding -> don't update
696 // TLB and stats
697 return tlb_hit;
698 }
699
700 localNumTLBAccesses++;
701
702 if (!entry) {
703 localNumTLBMisses++;
704 } else {
705 localNumTLBHits++;
706 }
707 }
708 }
709
710 return tlb_hit;
711 }
712
713 Fault
714 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
715 Translation *translation, Mode mode,
716 bool &delayedResponse, bool timing, int &latency)
717 {
718 uint32_t flags = req->getFlags();
719 int seg = flags & SegmentFlagMask;
720 bool storeCheck = flags & (StoreCheck << FlagShift);
721
722 // If this is true, we're dealing with a request
723 // to a non-memory address space.
724 if (seg == SEGMENT_REG_MS) {
725 return translateInt(req, tc);
726 }
727
728 delayedResponse = false;
729 Addr vaddr = req->getVaddr();
730 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
731
732 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
733
734 // If protected mode has been enabled...
735 if (m5Reg.prot) {
736 DPRINTF(GPUTLB, "In protected mode.\n");
737 // If we're not in 64-bit mode, do protection/limit checks
738 if (m5Reg.mode != LongMode) {
739 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
740 "protection.\n");
741
742 // Check for a null segment selector.
743 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
744 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
745 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
746 return std::make_shared<GeneralProtection>(0);
747 }
748
749 bool expandDown = false;
750 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
751
752 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
753 if (!attr.writable && (mode == BaseTLB::Write ||
754 storeCheck))
755 return std::make_shared<GeneralProtection>(0);
756
757 if (!attr.readable && mode == BaseTLB::Read)
758 return std::make_shared<GeneralProtection>(0);
759
760 expandDown = attr.expandDown;
761
762 }
763
764 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
765 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
766 // This assumes we're not in 64 bit mode. If we were, the
767 // default address size is 64 bits, overridable to 32.
768 int size = 32;
769 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
770 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
771
772 if ((csAttr.defaultSize && sizeOverride) ||
773 (!csAttr.defaultSize && !sizeOverride)) {
774 size = 16;
775 }
776
777 Addr offset = bits(vaddr - base, size - 1, 0);
778 Addr endOffset = offset + req->getSize() - 1;
779
780 if (expandDown) {
781 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
782 warn_once("Expand down segments are untested.\n");
783
784 if (offset <= limit || endOffset <= limit)
785 return std::make_shared<GeneralProtection>(0);
786 } else {
787 if (offset > limit || endOffset > limit)
788 return std::make_shared<GeneralProtection>(0);
789 }
790 }
791
792 // If paging is enabled, do the translation.
793 if (m5Reg.paging) {
794 DPRINTF(GPUTLB, "Paging enabled.\n");
795 // The vaddr already has the segment base applied.
796 TlbEntry *entry = lookup(vaddr);
797 localNumTLBAccesses++;
798
799 if (!entry) {
800 localNumTLBMisses++;
801 if (timing) {
802 latency = missLatency1;
803 }
804
805 if (FullSystem) {
806 fatal("GpuTLB doesn't support full-system mode\n");
807 } else {
808 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
809 "at pc %#x.\n", vaddr, tc->instAddr());
810
811 Process *p = tc->getProcessPtr();
812 const EmulationPageTable::Entry *pte =
813 p->pTable->lookup(vaddr);
814
815 if (!pte && mode != BaseTLB::Execute) {
816 // penalize a "page fault" more
817 if (timing)
818 latency += missLatency2;
819
820 if (p->fixupStackFault(vaddr))
821 pte = p->pTable->lookup(vaddr);
822 }
823
824 if (!pte) {
825 return std::make_shared<PageFault>(vaddr, true,
826 mode, true,
827 false);
828 } else {
829 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
830
831 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
832 alignedVaddr, pte->paddr);
833
834 TlbEntry gpuEntry(p->pid(), alignedVaddr,
835 pte->paddr, false, false);
836 entry = insert(alignedVaddr, gpuEntry);
837 }
838
839 DPRINTF(GPUTLB, "Miss was serviced.\n");
840 }
841 } else {
842 localNumTLBHits++;
843
844 if (timing) {
845 latency = hitLatency;
846 }
847 }
848
849 // Do paging protection checks.
850 bool inUser = (m5Reg.cpl == 3 &&
851 !(flags & (CPL0FlagBit << FlagShift)));
852
853 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854 bool badWrite = (!entry->writable && (inUser || cr0.wp));
855
856 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
857 badWrite)) {
858 // The page must have been present to get into the TLB in
859 // the first place. We'll assume the reserved bits are
860 // fine even though we're not checking them.
861 return std::make_shared<PageFault>(vaddr, true, mode,
862 inUser, false);
863 }
864
865 if (storeCheck && badWrite) {
866 // This would fault if this were a write, so return a page
867 // fault that reflects that happening.
868 return std::make_shared<PageFault>(vaddr, true,
869 BaseTLB::Write,
870 inUser, false);
871 }
872
873
874 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
875 "checks.\n", entry->paddr);
876
877 int page_size = entry->size();
878 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
879 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
880 req->setPaddr(paddr);
881
882 if (entry->uncacheable)
883 req->setFlags(Request::UNCACHEABLE);
884 } else {
885 //Use the address which already has segmentation applied.
886 DPRINTF(GPUTLB, "Paging disabled.\n");
887 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
888 req->setPaddr(vaddr);
889 }
890 } else {
891 // Real mode
892 DPRINTF(GPUTLB, "In real mode.\n");
893 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
894 req->setPaddr(vaddr);
895 }
896
897 // Check for an access to the local APIC
898 if (FullSystem) {
899 LocalApicBase localApicBase =
900 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
901
902 Addr baseAddr = localApicBase.base * PageBytes;
903 Addr paddr = req->getPaddr();
904
905 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
906 // Force the access to be uncacheable.
907 req->setFlags(Request::UNCACHEABLE);
908 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
909 paddr - baseAddr));
910 }
911 }
912
913 return NoFault;
914 };
915
916 Fault
917 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
918 Mode mode, int &latency)
919 {
920 bool delayedResponse;
921
922 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
923 latency);
924 }
925
926 void
927 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
928 Translation *translation, Mode mode, int &latency)
929 {
930 bool delayedResponse;
931 assert(translation);
932
933 Fault fault = GpuTLB::translate(req, tc, translation, mode,
934 delayedResponse, true, latency);
935
936 if (!delayedResponse)
937 translation->finish(fault, req, tc, mode);
938 }
939
940 Walker*
941 GpuTLB::getWalker()
942 {
943 return walker;
944 }
945
946
947 void
948 GpuTLB::serialize(CheckpointOut &cp) const
949 {
950 }
951
952 void
953 GpuTLB::unserialize(CheckpointIn &cp)
954 {
955 }
956
957 void
958 GpuTLB::regStats()
959 {
960 MemObject::regStats();
961
962 localNumTLBAccesses
963 .name(name() + ".local_TLB_accesses")
964 .desc("Number of TLB accesses")
965 ;
966
967 localNumTLBHits
968 .name(name() + ".local_TLB_hits")
969 .desc("Number of TLB hits")
970 ;
971
972 localNumTLBMisses
973 .name(name() + ".local_TLB_misses")
974 .desc("Number of TLB misses")
975 ;
976
977 localTLBMissRate
978 .name(name() + ".local_TLB_miss_rate")
979 .desc("TLB miss rate")
980 ;
981
982 accessCycles
983 .name(name() + ".access_cycles")
984 .desc("Cycles spent accessing this TLB level")
985 ;
986
987 pageTableCycles
988 .name(name() + ".page_table_cycles")
989 .desc("Cycles spent accessing the page table")
990 ;
991
992 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
993
994 numUniquePages
995 .name(name() + ".unique_pages")
996 .desc("Number of unique pages touched")
997 ;
998
999 localCycles
1000 .name(name() + ".local_cycles")
1001 .desc("Number of cycles spent in queue for all incoming reqs")
1002 ;
1003
1004 localLatency
1005 .name(name() + ".local_latency")
1006 .desc("Avg. latency over incoming coalesced reqs")
1007 ;
1008
1009 localLatency = localCycles / localNumTLBAccesses;
1010
1011 globalNumTLBAccesses
1012 .name(name() + ".global_TLB_accesses")
1013 .desc("Number of TLB accesses")
1014 ;
1015
1016 globalNumTLBHits
1017 .name(name() + ".global_TLB_hits")
1018 .desc("Number of TLB hits")
1019 ;
1020
1021 globalNumTLBMisses
1022 .name(name() + ".global_TLB_misses")
1023 .desc("Number of TLB misses")
1024 ;
1025
1026 globalTLBMissRate
1027 .name(name() + ".global_TLB_miss_rate")
1028 .desc("TLB miss rate")
1029 ;
1030
1031 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1032
1033 avgReuseDistance
1034 .name(name() + ".avg_reuse_distance")
1035 .desc("avg. reuse distance over all pages (in ticks)")
1036 ;
1037
1038 }
1039
1040 /**
1041 * Do the TLB lookup for this coalesced request and schedule
1042 * another event <TLB access latency> cycles later.
1043 */
1044
1045 void
1046 GpuTLB::issueTLBLookup(PacketPtr pkt)
1047 {
1048 assert(pkt);
1049 assert(pkt->senderState);
1050
1051 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1052 TheISA::PageBytes);
1053
1054 TranslationState *sender_state =
1055 safe_cast<TranslationState*>(pkt->senderState);
1056
1057 bool update_stats = !sender_state->prefetch;
1058 ThreadContext * tmp_tc = sender_state->tc;
1059
1060 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1061 virt_page_addr);
1062
1063 int req_cnt = sender_state->reqCnt.back();
1064
1065 if (update_stats) {
1066 accessCycles -= (curTick() * req_cnt);
1067 localCycles -= curTick();
1068 updatePageFootprint(virt_page_addr);
1069 globalNumTLBAccesses += req_cnt;
1070 }
1071
1072 tlbOutcome lookup_outcome = TLB_MISS;
1073 const RequestPtr &tmp_req = pkt->req;
1074
1075 // Access the TLB and figure out if it's a hit or a miss.
1076 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1077
1078 if (success) {
1079 lookup_outcome = TLB_HIT;
1080 // Put the entry in SenderState
1081 TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1082 assert(entry);
1083
1084 auto p = sender_state->tc->getProcessPtr();
1085 sender_state->tlbEntry =
1086 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1087 false, false);
1088
1089 if (update_stats) {
1090 // the reqCnt has an entry per level, so its size tells us
1091 // which level we are in
1092 sender_state->hitLevel = sender_state->reqCnt.size();
1093 globalNumTLBHits += req_cnt;
1094 }
1095 } else {
1096 if (update_stats)
1097 globalNumTLBMisses += req_cnt;
1098 }
1099
1100 /*
1101 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1102 * as the TLB access latency.
1103 *
1104 * We create and schedule a new TLBEvent which will help us take the
1105 * appropriate actions (e.g., update TLB on a hit, send request to lower
1106 * level TLB on a miss, or start a page walk if this was the last-level
1107 * TLB)
1108 */
1109 TLBEvent *tlb_event =
1110 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1111
1112 if (translationReturnEvent.count(virt_page_addr)) {
1113 panic("Virtual Page Address %#x already has a return event\n",
1114 virt_page_addr);
1115 }
1116
1117 translationReturnEvent[virt_page_addr] = tlb_event;
1118 assert(tlb_event);
1119
1120 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1121 curTick() + this->ticks(hitLatency));
1122
1123 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1124 }
1125
1126 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1127 PacketPtr _pkt)
1128 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1129 outcome(tlb_outcome), pkt(_pkt)
1130 {
1131 }
1132
1133 /**
1134 * Do Paging protection checks. If we encounter a page fault, then
1135 * an assertion is fired.
1136 */
1137 void
1138 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1139 TlbEntry * tlb_entry, Mode mode)
1140 {
1141 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1142 uint32_t flags = pkt->req->getFlags();
1143 bool storeCheck = flags & (StoreCheck << FlagShift);
1144
1145 // Do paging protection checks.
1146 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1147 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1148
1149 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1150
1151 if ((inUser && !tlb_entry->user) ||
1152 (mode == BaseTLB::Write && badWrite)) {
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63 GpuTLB::GpuTLB(const Params *p)
64 : MemObject(p), configAddress(0), size(p->size),
65 cleanupEvent([this]{ cleanup(); }, name(), false,
66 Event::Maximum_Pri),
67 exitEvent([this]{ exitCallback(); }, name())
68 {
69 assoc = p->assoc;
70 assert(assoc <= size);
71 numSets = size/assoc;
72 allocationPolicy = p->allocationPolicy;
73 hasMemSidePort = false;
74 accessDistance = p->accessDistance;
75 clock = p->clk_domain->clockPeriod();
76
77 tlb.assign(size, TlbEntry());
78
79 freeList.resize(numSets);
80 entryList.resize(numSets);
81
82 for (int set = 0; set < numSets; ++set) {
83 for (int way = 0; way < assoc; ++way) {
84 int x = set * assoc + way;
85 freeList[set].push_back(&tlb.at(x));
86 }
87 }
88
89 FA = (size == assoc);
90
91 /**
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
97 * different sets etc)
98 */
99 setMask = numSets - 1;
100
101 #if 0
102 // GpuTLB doesn't yet support full system
103 walker = p->walker;
104 walker->setTLB(this);
105 #endif
106
107 maxCoalescedReqs = p->maxOutstandingReqs;
108
109 // Do not allow maxCoalescedReqs to be more than the TLB associativity
110 if (maxCoalescedReqs > assoc) {
111 maxCoalescedReqs = assoc;
112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
113 }
114
115 outstandingReqs = 0;
116 hitLatency = p->hitLatency;
117 missLatency1 = p->missLatency1;
118 missLatency2 = p->missLatency2;
119
120 // create the slave ports based on the number of connected ports
121 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
122 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
123 name(), i), this, i));
124 }
125
126 // create the master ports based on the number of connected ports
127 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
128 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
129 name(), i), this, i));
130 }
131 }
132
133 // fixme: this is never called?
134 GpuTLB::~GpuTLB()
135 {
136 // make sure all the hash-maps are empty
137 assert(translationReturnEvent.empty());
138 }
139
140 BaseSlavePort&
141 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
142 {
143 if (if_name == "slave") {
144 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
145 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
146 }
147
148 return *cpuSidePort[idx];
149 } else {
150 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
151 }
152 }
153
154 BaseMasterPort&
155 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
156 {
157 if (if_name == "master") {
158 if (idx >= static_cast<PortID>(memSidePort.size())) {
159 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
160 }
161
162 hasMemSidePort = true;
163
164 return *memSidePort[idx];
165 } else {
166 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
167 }
168 }
169
170 TlbEntry*
171 GpuTLB::insert(Addr vpn, TlbEntry &entry)
172 {
173 TlbEntry *newEntry = nullptr;
174
175 /**
176 * vpn holds the virtual page address
177 * The least significant bits are simply masked
178 */
179 int set = (vpn >> TheISA::PageShift) & setMask;
180
181 if (!freeList[set].empty()) {
182 newEntry = freeList[set].front();
183 freeList[set].pop_front();
184 } else {
185 newEntry = entryList[set].back();
186 entryList[set].pop_back();
187 }
188
189 *newEntry = entry;
190 newEntry->vaddr = vpn;
191 entryList[set].push_front(newEntry);
192
193 return newEntry;
194 }
195
196 GpuTLB::EntryList::iterator
197 GpuTLB::lookupIt(Addr va, bool update_lru)
198 {
199 int set = (va >> TheISA::PageShift) & setMask;
200
201 if (FA) {
202 assert(!set);
203 }
204
205 auto entry = entryList[set].begin();
206 for (; entry != entryList[set].end(); ++entry) {
207 int page_size = (*entry)->size();
208
209 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
210 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
211 "with size %#x.\n", va, (*entry)->vaddr, page_size);
212
213 if (update_lru) {
214 entryList[set].push_front(*entry);
215 entryList[set].erase(entry);
216 entry = entryList[set].begin();
217 }
218
219 break;
220 }
221 }
222
223 return entry;
224 }
225
226 TlbEntry*
227 GpuTLB::lookup(Addr va, bool update_lru)
228 {
229 int set = (va >> TheISA::PageShift) & setMask;
230
231 auto entry = lookupIt(va, update_lru);
232
233 if (entry == entryList[set].end())
234 return nullptr;
235 else
236 return *entry;
237 }
238
239 void
240 GpuTLB::invalidateAll()
241 {
242 DPRINTF(GPUTLB, "Invalidating all entries.\n");
243
244 for (int i = 0; i < numSets; ++i) {
245 while (!entryList[i].empty()) {
246 TlbEntry *entry = entryList[i].front();
247 entryList[i].pop_front();
248 freeList[i].push_back(entry);
249 }
250 }
251 }
252
253 void
254 GpuTLB::setConfigAddress(uint32_t addr)
255 {
256 configAddress = addr;
257 }
258
259 void
260 GpuTLB::invalidateNonGlobal()
261 {
262 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
263
264 for (int i = 0; i < numSets; ++i) {
265 for (auto entryIt = entryList[i].begin();
266 entryIt != entryList[i].end();) {
267 if (!(*entryIt)->global) {
268 freeList[i].push_back(*entryIt);
269 entryList[i].erase(entryIt++);
270 } else {
271 ++entryIt;
272 }
273 }
274 }
275 }
276
277 void
278 GpuTLB::demapPage(Addr va, uint64_t asn)
279 {
280
281 int set = (va >> TheISA::PageShift) & setMask;
282 auto entry = lookupIt(va, false);
283
284 if (entry != entryList[set].end()) {
285 freeList[set].push_back(*entry);
286 entryList[set].erase(entry);
287 }
288 }
289
290 Fault
291 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
292 {
293 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
294 Addr vaddr = req->getVaddr();
295 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
296
297 if (prefix == IntAddrPrefixCPUID) {
298 panic("CPUID memory space not yet implemented!\n");
299 } else if (prefix == IntAddrPrefixMSR) {
300 vaddr = vaddr >> 3;
301 req->setFlags(Request::MMAPPED_IPR);
302 Addr regNum = 0;
303
304 switch (vaddr & ~IntAddrPrefixMask) {
305 case 0x10:
306 regNum = MISCREG_TSC;
307 break;
308 case 0x1B:
309 regNum = MISCREG_APIC_BASE;
310 break;
311 case 0xFE:
312 regNum = MISCREG_MTRRCAP;
313 break;
314 case 0x174:
315 regNum = MISCREG_SYSENTER_CS;
316 break;
317 case 0x175:
318 regNum = MISCREG_SYSENTER_ESP;
319 break;
320 case 0x176:
321 regNum = MISCREG_SYSENTER_EIP;
322 break;
323 case 0x179:
324 regNum = MISCREG_MCG_CAP;
325 break;
326 case 0x17A:
327 regNum = MISCREG_MCG_STATUS;
328 break;
329 case 0x17B:
330 regNum = MISCREG_MCG_CTL;
331 break;
332 case 0x1D9:
333 regNum = MISCREG_DEBUG_CTL_MSR;
334 break;
335 case 0x1DB:
336 regNum = MISCREG_LAST_BRANCH_FROM_IP;
337 break;
338 case 0x1DC:
339 regNum = MISCREG_LAST_BRANCH_TO_IP;
340 break;
341 case 0x1DD:
342 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
343 break;
344 case 0x1DE:
345 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
346 break;
347 case 0x200:
348 regNum = MISCREG_MTRR_PHYS_BASE_0;
349 break;
350 case 0x201:
351 regNum = MISCREG_MTRR_PHYS_MASK_0;
352 break;
353 case 0x202:
354 regNum = MISCREG_MTRR_PHYS_BASE_1;
355 break;
356 case 0x203:
357 regNum = MISCREG_MTRR_PHYS_MASK_1;
358 break;
359 case 0x204:
360 regNum = MISCREG_MTRR_PHYS_BASE_2;
361 break;
362 case 0x205:
363 regNum = MISCREG_MTRR_PHYS_MASK_2;
364 break;
365 case 0x206:
366 regNum = MISCREG_MTRR_PHYS_BASE_3;
367 break;
368 case 0x207:
369 regNum = MISCREG_MTRR_PHYS_MASK_3;
370 break;
371 case 0x208:
372 regNum = MISCREG_MTRR_PHYS_BASE_4;
373 break;
374 case 0x209:
375 regNum = MISCREG_MTRR_PHYS_MASK_4;
376 break;
377 case 0x20A:
378 regNum = MISCREG_MTRR_PHYS_BASE_5;
379 break;
380 case 0x20B:
381 regNum = MISCREG_MTRR_PHYS_MASK_5;
382 break;
383 case 0x20C:
384 regNum = MISCREG_MTRR_PHYS_BASE_6;
385 break;
386 case 0x20D:
387 regNum = MISCREG_MTRR_PHYS_MASK_6;
388 break;
389 case 0x20E:
390 regNum = MISCREG_MTRR_PHYS_BASE_7;
391 break;
392 case 0x20F:
393 regNum = MISCREG_MTRR_PHYS_MASK_7;
394 break;
395 case 0x250:
396 regNum = MISCREG_MTRR_FIX_64K_00000;
397 break;
398 case 0x258:
399 regNum = MISCREG_MTRR_FIX_16K_80000;
400 break;
401 case 0x259:
402 regNum = MISCREG_MTRR_FIX_16K_A0000;
403 break;
404 case 0x268:
405 regNum = MISCREG_MTRR_FIX_4K_C0000;
406 break;
407 case 0x269:
408 regNum = MISCREG_MTRR_FIX_4K_C8000;
409 break;
410 case 0x26A:
411 regNum = MISCREG_MTRR_FIX_4K_D0000;
412 break;
413 case 0x26B:
414 regNum = MISCREG_MTRR_FIX_4K_D8000;
415 break;
416 case 0x26C:
417 regNum = MISCREG_MTRR_FIX_4K_E0000;
418 break;
419 case 0x26D:
420 regNum = MISCREG_MTRR_FIX_4K_E8000;
421 break;
422 case 0x26E:
423 regNum = MISCREG_MTRR_FIX_4K_F0000;
424 break;
425 case 0x26F:
426 regNum = MISCREG_MTRR_FIX_4K_F8000;
427 break;
428 case 0x277:
429 regNum = MISCREG_PAT;
430 break;
431 case 0x2FF:
432 regNum = MISCREG_DEF_TYPE;
433 break;
434 case 0x400:
435 regNum = MISCREG_MC0_CTL;
436 break;
437 case 0x404:
438 regNum = MISCREG_MC1_CTL;
439 break;
440 case 0x408:
441 regNum = MISCREG_MC2_CTL;
442 break;
443 case 0x40C:
444 regNum = MISCREG_MC3_CTL;
445 break;
446 case 0x410:
447 regNum = MISCREG_MC4_CTL;
448 break;
449 case 0x414:
450 regNum = MISCREG_MC5_CTL;
451 break;
452 case 0x418:
453 regNum = MISCREG_MC6_CTL;
454 break;
455 case 0x41C:
456 regNum = MISCREG_MC7_CTL;
457 break;
458 case 0x401:
459 regNum = MISCREG_MC0_STATUS;
460 break;
461 case 0x405:
462 regNum = MISCREG_MC1_STATUS;
463 break;
464 case 0x409:
465 regNum = MISCREG_MC2_STATUS;
466 break;
467 case 0x40D:
468 regNum = MISCREG_MC3_STATUS;
469 break;
470 case 0x411:
471 regNum = MISCREG_MC4_STATUS;
472 break;
473 case 0x415:
474 regNum = MISCREG_MC5_STATUS;
475 break;
476 case 0x419:
477 regNum = MISCREG_MC6_STATUS;
478 break;
479 case 0x41D:
480 regNum = MISCREG_MC7_STATUS;
481 break;
482 case 0x402:
483 regNum = MISCREG_MC0_ADDR;
484 break;
485 case 0x406:
486 regNum = MISCREG_MC1_ADDR;
487 break;
488 case 0x40A:
489 regNum = MISCREG_MC2_ADDR;
490 break;
491 case 0x40E:
492 regNum = MISCREG_MC3_ADDR;
493 break;
494 case 0x412:
495 regNum = MISCREG_MC4_ADDR;
496 break;
497 case 0x416:
498 regNum = MISCREG_MC5_ADDR;
499 break;
500 case 0x41A:
501 regNum = MISCREG_MC6_ADDR;
502 break;
503 case 0x41E:
504 regNum = MISCREG_MC7_ADDR;
505 break;
506 case 0x403:
507 regNum = MISCREG_MC0_MISC;
508 break;
509 case 0x407:
510 regNum = MISCREG_MC1_MISC;
511 break;
512 case 0x40B:
513 regNum = MISCREG_MC2_MISC;
514 break;
515 case 0x40F:
516 regNum = MISCREG_MC3_MISC;
517 break;
518 case 0x413:
519 regNum = MISCREG_MC4_MISC;
520 break;
521 case 0x417:
522 regNum = MISCREG_MC5_MISC;
523 break;
524 case 0x41B:
525 regNum = MISCREG_MC6_MISC;
526 break;
527 case 0x41F:
528 regNum = MISCREG_MC7_MISC;
529 break;
530 case 0xC0000080:
531 regNum = MISCREG_EFER;
532 break;
533 case 0xC0000081:
534 regNum = MISCREG_STAR;
535 break;
536 case 0xC0000082:
537 regNum = MISCREG_LSTAR;
538 break;
539 case 0xC0000083:
540 regNum = MISCREG_CSTAR;
541 break;
542 case 0xC0000084:
543 regNum = MISCREG_SF_MASK;
544 break;
545 case 0xC0000100:
546 regNum = MISCREG_FS_BASE;
547 break;
548 case 0xC0000101:
549 regNum = MISCREG_GS_BASE;
550 break;
551 case 0xC0000102:
552 regNum = MISCREG_KERNEL_GS_BASE;
553 break;
554 case 0xC0000103:
555 regNum = MISCREG_TSC_AUX;
556 break;
557 case 0xC0010000:
558 regNum = MISCREG_PERF_EVT_SEL0;
559 break;
560 case 0xC0010001:
561 regNum = MISCREG_PERF_EVT_SEL1;
562 break;
563 case 0xC0010002:
564 regNum = MISCREG_PERF_EVT_SEL2;
565 break;
566 case 0xC0010003:
567 regNum = MISCREG_PERF_EVT_SEL3;
568 break;
569 case 0xC0010004:
570 regNum = MISCREG_PERF_EVT_CTR0;
571 break;
572 case 0xC0010005:
573 regNum = MISCREG_PERF_EVT_CTR1;
574 break;
575 case 0xC0010006:
576 regNum = MISCREG_PERF_EVT_CTR2;
577 break;
578 case 0xC0010007:
579 regNum = MISCREG_PERF_EVT_CTR3;
580 break;
581 case 0xC0010010:
582 regNum = MISCREG_SYSCFG;
583 break;
584 case 0xC0010016:
585 regNum = MISCREG_IORR_BASE0;
586 break;
587 case 0xC0010017:
588 regNum = MISCREG_IORR_BASE1;
589 break;
590 case 0xC0010018:
591 regNum = MISCREG_IORR_MASK0;
592 break;
593 case 0xC0010019:
594 regNum = MISCREG_IORR_MASK1;
595 break;
596 case 0xC001001A:
597 regNum = MISCREG_TOP_MEM;
598 break;
599 case 0xC001001D:
600 regNum = MISCREG_TOP_MEM2;
601 break;
602 case 0xC0010114:
603 regNum = MISCREG_VM_CR;
604 break;
605 case 0xC0010115:
606 regNum = MISCREG_IGNNE;
607 break;
608 case 0xC0010116:
609 regNum = MISCREG_SMM_CTL;
610 break;
611 case 0xC0010117:
612 regNum = MISCREG_VM_HSAVE_PA;
613 break;
614 default:
615 return std::make_shared<GeneralProtection>(0);
616 }
617 //The index is multiplied by the size of a MiscReg so that
618 //any memory dependence calculations will not see these as
619 //overlapping.
620 req->setPaddr(regNum * sizeof(MiscReg));
621 return NoFault;
622 } else if (prefix == IntAddrPrefixIO) {
623 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
624 // bitmap in the TSS.
625
626 Addr IOPort = vaddr & ~IntAddrPrefixMask;
627 // Make sure the address fits in the expected 16 bit IO address
628 // space.
629 assert(!(IOPort & ~0xFFFF));
630
631 if (IOPort == 0xCF8 && req->getSize() == 4) {
632 req->setFlags(Request::MMAPPED_IPR);
633 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
634 } else if ((IOPort & ~mask(2)) == 0xCFC) {
635 req->setFlags(Request::UNCACHEABLE);
636
637 Addr configAddress =
638 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
639
640 if (bits(configAddress, 31, 31)) {
641 req->setPaddr(PhysAddrPrefixPciConfig |
642 mbits(configAddress, 30, 2) |
643 (IOPort & mask(2)));
644 } else {
645 req->setPaddr(PhysAddrPrefixIO | IOPort);
646 }
647 } else {
648 req->setFlags(Request::UNCACHEABLE);
649 req->setPaddr(PhysAddrPrefixIO | IOPort);
650 }
651 return NoFault;
652 } else {
653 panic("Access to unrecognized internal address space %#x.\n",
654 prefix);
655 }
656 }
657
658 /**
659 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
660 * and false on a TLB miss.
661 * Many of the checks about different modes have been converted to
662 * assertions, since these parts of the code are not really used.
663 * On a hit it will update the LRU stack.
664 */
665 bool
666 GpuTLB::tlbLookup(const RequestPtr &req,
667 ThreadContext *tc, bool update_stats)
668 {
669 bool tlb_hit = false;
670 #ifndef NDEBUG
671 uint32_t flags = req->getFlags();
672 int seg = flags & SegmentFlagMask;
673 #endif
674
675 assert(seg != SEGMENT_REG_MS);
676 Addr vaddr = req->getVaddr();
677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
679
680 if (m5Reg.prot) {
681 DPRINTF(GPUTLB, "In protected mode.\n");
682 // make sure we are in 64-bit mode
683 assert(m5Reg.mode == LongMode);
684
685 // If paging is enabled, do the translation.
686 if (m5Reg.paging) {
687 DPRINTF(GPUTLB, "Paging enabled.\n");
688 //update LRU stack on a hit
689 TlbEntry *entry = lookup(vaddr, true);
690
691 if (entry)
692 tlb_hit = true;
693
694 if (!update_stats) {
695 // functional tlb access for memory initialization
696 // i.e., memory seeding or instr. seeding -> don't update
697 // TLB and stats
698 return tlb_hit;
699 }
700
701 localNumTLBAccesses++;
702
703 if (!entry) {
704 localNumTLBMisses++;
705 } else {
706 localNumTLBHits++;
707 }
708 }
709 }
710
711 return tlb_hit;
712 }
713
714 Fault
715 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
716 Translation *translation, Mode mode,
717 bool &delayedResponse, bool timing, int &latency)
718 {
719 uint32_t flags = req->getFlags();
720 int seg = flags & SegmentFlagMask;
721 bool storeCheck = flags & (StoreCheck << FlagShift);
722
723 // If this is true, we're dealing with a request
724 // to a non-memory address space.
725 if (seg == SEGMENT_REG_MS) {
726 return translateInt(req, tc);
727 }
728
729 delayedResponse = false;
730 Addr vaddr = req->getVaddr();
731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
732
733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734
735 // If protected mode has been enabled...
736 if (m5Reg.prot) {
737 DPRINTF(GPUTLB, "In protected mode.\n");
738 // If we're not in 64-bit mode, do protection/limit checks
739 if (m5Reg.mode != LongMode) {
740 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
741 "protection.\n");
742
743 // Check for a null segment selector.
744 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
745 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
747 return std::make_shared<GeneralProtection>(0);
748 }
749
750 bool expandDown = false;
751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
752
753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
754 if (!attr.writable && (mode == BaseTLB::Write ||
755 storeCheck))
756 return std::make_shared<GeneralProtection>(0);
757
758 if (!attr.readable && mode == BaseTLB::Read)
759 return std::make_shared<GeneralProtection>(0);
760
761 expandDown = attr.expandDown;
762
763 }
764
765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
767 // This assumes we're not in 64 bit mode. If we were, the
768 // default address size is 64 bits, overridable to 32.
769 int size = 32;
770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
772
773 if ((csAttr.defaultSize && sizeOverride) ||
774 (!csAttr.defaultSize && !sizeOverride)) {
775 size = 16;
776 }
777
778 Addr offset = bits(vaddr - base, size - 1, 0);
779 Addr endOffset = offset + req->getSize() - 1;
780
781 if (expandDown) {
782 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
783 warn_once("Expand down segments are untested.\n");
784
785 if (offset <= limit || endOffset <= limit)
786 return std::make_shared<GeneralProtection>(0);
787 } else {
788 if (offset > limit || endOffset > limit)
789 return std::make_shared<GeneralProtection>(0);
790 }
791 }
792
793 // If paging is enabled, do the translation.
794 if (m5Reg.paging) {
795 DPRINTF(GPUTLB, "Paging enabled.\n");
796 // The vaddr already has the segment base applied.
797 TlbEntry *entry = lookup(vaddr);
798 localNumTLBAccesses++;
799
800 if (!entry) {
801 localNumTLBMisses++;
802 if (timing) {
803 latency = missLatency1;
804 }
805
806 if (FullSystem) {
807 fatal("GpuTLB doesn't support full-system mode\n");
808 } else {
809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
810 "at pc %#x.\n", vaddr, tc->instAddr());
811
812 Process *p = tc->getProcessPtr();
813 const EmulationPageTable::Entry *pte =
814 p->pTable->lookup(vaddr);
815
816 if (!pte && mode != BaseTLB::Execute) {
817 // penalize a "page fault" more
818 if (timing)
819 latency += missLatency2;
820
821 if (p->fixupStackFault(vaddr))
822 pte = p->pTable->lookup(vaddr);
823 }
824
825 if (!pte) {
826 return std::make_shared<PageFault>(vaddr, true,
827 mode, true,
828 false);
829 } else {
830 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
831
832 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
833 alignedVaddr, pte->paddr);
834
835 TlbEntry gpuEntry(p->pid(), alignedVaddr,
836 pte->paddr, false, false);
837 entry = insert(alignedVaddr, gpuEntry);
838 }
839
840 DPRINTF(GPUTLB, "Miss was serviced.\n");
841 }
842 } else {
843 localNumTLBHits++;
844
845 if (timing) {
846 latency = hitLatency;
847 }
848 }
849
850 // Do paging protection checks.
851 bool inUser = (m5Reg.cpl == 3 &&
852 !(flags & (CPL0FlagBit << FlagShift)));
853
854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
855 bool badWrite = (!entry->writable && (inUser || cr0.wp));
856
857 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
858 badWrite)) {
859 // The page must have been present to get into the TLB in
860 // the first place. We'll assume the reserved bits are
861 // fine even though we're not checking them.
862 return std::make_shared<PageFault>(vaddr, true, mode,
863 inUser, false);
864 }
865
866 if (storeCheck && badWrite) {
867 // This would fault if this were a write, so return a page
868 // fault that reflects that happening.
869 return std::make_shared<PageFault>(vaddr, true,
870 BaseTLB::Write,
871 inUser, false);
872 }
873
874
875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
876 "checks.\n", entry->paddr);
877
878 int page_size = entry->size();
879 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
881 req->setPaddr(paddr);
882
883 if (entry->uncacheable)
884 req->setFlags(Request::UNCACHEABLE);
885 } else {
886 //Use the address which already has segmentation applied.
887 DPRINTF(GPUTLB, "Paging disabled.\n");
888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
889 req->setPaddr(vaddr);
890 }
891 } else {
892 // Real mode
893 DPRINTF(GPUTLB, "In real mode.\n");
894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
895 req->setPaddr(vaddr);
896 }
897
898 // Check for an access to the local APIC
899 if (FullSystem) {
900 LocalApicBase localApicBase =
901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
902
903 Addr baseAddr = localApicBase.base * PageBytes;
904 Addr paddr = req->getPaddr();
905
906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907 // Force the access to be uncacheable.
908 req->setFlags(Request::UNCACHEABLE);
909 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
910 paddr - baseAddr));
911 }
912 }
913
914 return NoFault;
915 };
916
917 Fault
918 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
919 Mode mode, int &latency)
920 {
921 bool delayedResponse;
922
923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
924 latency);
925 }
926
927 void
928 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
929 Translation *translation, Mode mode, int &latency)
930 {
931 bool delayedResponse;
932 assert(translation);
933
934 Fault fault = GpuTLB::translate(req, tc, translation, mode,
935 delayedResponse, true, latency);
936
937 if (!delayedResponse)
938 translation->finish(fault, req, tc, mode);
939 }
940
941 Walker*
942 GpuTLB::getWalker()
943 {
944 return walker;
945 }
946
947
948 void
949 GpuTLB::serialize(CheckpointOut &cp) const
950 {
951 }
952
953 void
954 GpuTLB::unserialize(CheckpointIn &cp)
955 {
956 }
957
958 void
959 GpuTLB::regStats()
960 {
961 MemObject::regStats();
962
963 localNumTLBAccesses
964 .name(name() + ".local_TLB_accesses")
965 .desc("Number of TLB accesses")
966 ;
967
968 localNumTLBHits
969 .name(name() + ".local_TLB_hits")
970 .desc("Number of TLB hits")
971 ;
972
973 localNumTLBMisses
974 .name(name() + ".local_TLB_misses")
975 .desc("Number of TLB misses")
976 ;
977
978 localTLBMissRate
979 .name(name() + ".local_TLB_miss_rate")
980 .desc("TLB miss rate")
981 ;
982
983 accessCycles
984 .name(name() + ".access_cycles")
985 .desc("Cycles spent accessing this TLB level")
986 ;
987
988 pageTableCycles
989 .name(name() + ".page_table_cycles")
990 .desc("Cycles spent accessing the page table")
991 ;
992
993 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
994
995 numUniquePages
996 .name(name() + ".unique_pages")
997 .desc("Number of unique pages touched")
998 ;
999
1000 localCycles
1001 .name(name() + ".local_cycles")
1002 .desc("Number of cycles spent in queue for all incoming reqs")
1003 ;
1004
1005 localLatency
1006 .name(name() + ".local_latency")
1007 .desc("Avg. latency over incoming coalesced reqs")
1008 ;
1009
1010 localLatency = localCycles / localNumTLBAccesses;
1011
1012 globalNumTLBAccesses
1013 .name(name() + ".global_TLB_accesses")
1014 .desc("Number of TLB accesses")
1015 ;
1016
1017 globalNumTLBHits
1018 .name(name() + ".global_TLB_hits")
1019 .desc("Number of TLB hits")
1020 ;
1021
1022 globalNumTLBMisses
1023 .name(name() + ".global_TLB_misses")
1024 .desc("Number of TLB misses")
1025 ;
1026
1027 globalTLBMissRate
1028 .name(name() + ".global_TLB_miss_rate")
1029 .desc("TLB miss rate")
1030 ;
1031
1032 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1033
1034 avgReuseDistance
1035 .name(name() + ".avg_reuse_distance")
1036 .desc("avg. reuse distance over all pages (in ticks)")
1037 ;
1038
1039 }
1040
1041 /**
1042 * Do the TLB lookup for this coalesced request and schedule
1043 * another event <TLB access latency> cycles later.
1044 */
1045
1046 void
1047 GpuTLB::issueTLBLookup(PacketPtr pkt)
1048 {
1049 assert(pkt);
1050 assert(pkt->senderState);
1051
1052 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1053 TheISA::PageBytes);
1054
1055 TranslationState *sender_state =
1056 safe_cast<TranslationState*>(pkt->senderState);
1057
1058 bool update_stats = !sender_state->prefetch;
1059 ThreadContext * tmp_tc = sender_state->tc;
1060
1061 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1062 virt_page_addr);
1063
1064 int req_cnt = sender_state->reqCnt.back();
1065
1066 if (update_stats) {
1067 accessCycles -= (curTick() * req_cnt);
1068 localCycles -= curTick();
1069 updatePageFootprint(virt_page_addr);
1070 globalNumTLBAccesses += req_cnt;
1071 }
1072
1073 tlbOutcome lookup_outcome = TLB_MISS;
1074 const RequestPtr &tmp_req = pkt->req;
1075
1076 // Access the TLB and figure out if it's a hit or a miss.
1077 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1078
1079 if (success) {
1080 lookup_outcome = TLB_HIT;
1081 // Put the entry in SenderState
1082 TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1083 assert(entry);
1084
1085 auto p = sender_state->tc->getProcessPtr();
1086 sender_state->tlbEntry =
1087 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1088 false, false);
1089
1090 if (update_stats) {
1091 // the reqCnt has an entry per level, so its size tells us
1092 // which level we are in
1093 sender_state->hitLevel = sender_state->reqCnt.size();
1094 globalNumTLBHits += req_cnt;
1095 }
1096 } else {
1097 if (update_stats)
1098 globalNumTLBMisses += req_cnt;
1099 }
1100
1101 /*
1102 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1103 * as the TLB access latency.
1104 *
1105 * We create and schedule a new TLBEvent which will help us take the
1106 * appropriate actions (e.g., update TLB on a hit, send request to lower
1107 * level TLB on a miss, or start a page walk if this was the last-level
1108 * TLB)
1109 */
1110 TLBEvent *tlb_event =
1111 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1112
1113 if (translationReturnEvent.count(virt_page_addr)) {
1114 panic("Virtual Page Address %#x already has a return event\n",
1115 virt_page_addr);
1116 }
1117
1118 translationReturnEvent[virt_page_addr] = tlb_event;
1119 assert(tlb_event);
1120
1121 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1122 curTick() + this->ticks(hitLatency));
1123
1124 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1125 }
1126
1127 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1128 PacketPtr _pkt)
1129 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1130 outcome(tlb_outcome), pkt(_pkt)
1131 {
1132 }
1133
1134 /**
1135 * Do Paging protection checks. If we encounter a page fault, then
1136 * an assertion is fired.
1137 */
1138 void
1139 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1140 TlbEntry * tlb_entry, Mode mode)
1141 {
1142 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1143 uint32_t flags = pkt->req->getFlags();
1144 bool storeCheck = flags & (StoreCheck << FlagShift);
1145
1146 // Do paging protection checks.
1147 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1148 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1149
1150 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1151
1152 if ((inUser && !tlb_entry->user) ||
1153 (mode == BaseTLB::Write && badWrite)) {
1153 // The page must have been present to get into the TLB in
1154 // the first place. We'll assume the reserved bits are
1155 // fine even though we're not checking them.
1156 assert(false);
1154 // The page must have been present to get into the TLB in
1155 // the first place. We'll assume the reserved bits are
1156 // fine even though we're not checking them.
1157 panic("Page fault detected");
1157 }
1158
1159 if (storeCheck && badWrite) {
1158 }
1159
1160 if (storeCheck && badWrite) {
1160 // This would fault if this were a write, so return a page
1161 // fault that reflects that happening.
1162 assert(false);
1161 // This would fault if this were a write, so return a page
1162 // fault that reflects that happening.
1163 panic("Page fault detected");
1163 }
1164 }
1165
1166 /**
1167 * handleTranslationReturn is called on a TLB hit,
1168 * when a TLB miss returns or when a page fault returns.
1169 * The latter calls handelHit with TLB miss as tlbOutcome.
1170 */
1171 void
1172 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1173 PacketPtr pkt)
1174 {
1175
1176 assert(pkt);
1177 Addr vaddr = pkt->req->getVaddr();
1178
1179 TranslationState *sender_state =
1180 safe_cast<TranslationState*>(pkt->senderState);
1181
1182 ThreadContext *tc = sender_state->tc;
1183 Mode mode = sender_state->tlbMode;
1184
1185 TlbEntry *local_entry, *new_entry;
1186
1187 if (tlb_outcome == TLB_HIT) {
1188 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1189 local_entry = sender_state->tlbEntry;
1190 } else {
1191 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1192 vaddr);
1193
1194 // We are returning either from a page walk or from a hit at a lower
1195 // TLB level. The senderState should be "carrying" a pointer to the
1196 // correct TLBEntry.
1197 new_entry = sender_state->tlbEntry;
1198 assert(new_entry);
1199 local_entry = new_entry;
1200
1201 if (allocationPolicy) {
1202 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1203 virt_page_addr);
1204
1205 local_entry = insert(virt_page_addr, *new_entry);
1206 }
1207
1208 assert(local_entry);
1209 }
1210
1211 /**
1212 * At this point the packet carries an up-to-date tlbEntry pointer
1213 * in its senderState.
1214 * Next step is to do the paging protection checks.
1215 */
1216 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1217 "while paddr was %#x.\n", local_entry->vaddr,
1218 local_entry->paddr);
1219
1220 pagingProtectionChecks(tc, pkt, local_entry, mode);
1221 int page_size = local_entry->size();
1222 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1223 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1224
1225 // Since this packet will be sent through the cpu side slave port,
1226 // it must be converted to a response pkt if it is not one already
1227 if (pkt->isRequest()) {
1228 pkt->makeTimingResponse();
1229 }
1230
1231 pkt->req->setPaddr(paddr);
1232
1233 if (local_entry->uncacheable) {
1234 pkt->req->setFlags(Request::UNCACHEABLE);
1235 }
1236
1237 //send packet back to coalescer
1238 cpuSidePort[0]->sendTimingResp(pkt);
1239 //schedule cleanup event
1240 cleanupQueue.push(virt_page_addr);
1241
1242 // schedule this only once per cycle.
1243 // The check is required because we might have multiple translations
1244 // returning the same cycle
1245 // this is a maximum priority event and must be on the same cycle
1246 // as the cleanup event in TLBCoalescer to avoid a race with
1247 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1248 if (!cleanupEvent.scheduled())
1249 schedule(cleanupEvent, curTick());
1250 }
1251
1252 /**
1253 * Here we take the appropriate actions based on the result of the
1254 * TLB lookup.
1255 */
1256 void
1257 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1258 PacketPtr pkt)
1259 {
1260 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1261
1262 assert(translationReturnEvent[virtPageAddr]);
1263 assert(pkt);
1264
1265 TranslationState *tmp_sender_state =
1266 safe_cast<TranslationState*>(pkt->senderState);
1267
1268 int req_cnt = tmp_sender_state->reqCnt.back();
1269 bool update_stats = !tmp_sender_state->prefetch;
1270
1271
1272 if (outcome == TLB_HIT) {
1273 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1274
1275 if (update_stats) {
1276 accessCycles += (req_cnt * curTick());
1277 localCycles += curTick();
1278 }
1279
1280 } else if (outcome == TLB_MISS) {
1281
1282 DPRINTF(GPUTLB, "This is a TLB miss\n");
1283 if (update_stats) {
1284 accessCycles += (req_cnt*curTick());
1285 localCycles += curTick();
1286 }
1287
1288 if (hasMemSidePort) {
1289 // the one cyle added here represent the delay from when we get
1290 // the reply back till when we propagate it to the coalescer
1291 // above.
1292 if (update_stats) {
1293 accessCycles += (req_cnt * 1);
1294 localCycles += 1;
1295 }
1296
1297 /**
1298 * There is a TLB below. Send the coalesced request.
1299 * We actually send the very first packet of all the
1300 * pending packets for this virtual page address.
1301 */
1302 if (!memSidePort[0]->sendTimingReq(pkt)) {
1303 DPRINTF(GPUTLB, "Failed sending translation request to "
1304 "lower level TLB for addr %#x\n", virtPageAddr);
1305
1306 memSidePort[0]->retries.push_back(pkt);
1307 } else {
1308 DPRINTF(GPUTLB, "Sent translation request to lower level "
1309 "TLB for addr %#x\n", virtPageAddr);
1310 }
1311 } else {
1312 //this is the last level TLB. Start a page walk
1313 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1314 "addr %#x\n", virtPageAddr);
1315
1316 if (update_stats)
1317 pageTableCycles -= (req_cnt*curTick());
1318
1319 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1320 assert(tlb_event);
1321 tlb_event->updateOutcome(PAGE_WALK);
1322 schedule(tlb_event, curTick() + ticks(missLatency2));
1323 }
1324 } else if (outcome == PAGE_WALK) {
1325 if (update_stats)
1326 pageTableCycles += (req_cnt*curTick());
1327
1328 // Need to access the page table and update the TLB
1329 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1330 virtPageAddr);
1331
1332 TranslationState *sender_state =
1333 safe_cast<TranslationState*>(pkt->senderState);
1334
1335 Process *p = sender_state->tc->getProcessPtr();
1336 Addr vaddr = pkt->req->getVaddr();
1337 #ifndef NDEBUG
1338 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1339 assert(alignedVaddr == virtPageAddr);
1340 #endif
1341 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1342 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1343 p->fixupStackFault(vaddr)) {
1344 pte = p->pTable->lookup(vaddr);
1345 }
1346
1347 if (pte) {
1348 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1349 pte->paddr);
1350
1351 sender_state->tlbEntry =
1352 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1353 false);
1354 } else {
1355 sender_state->tlbEntry = nullptr;
1356 }
1357
1358 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1359 } else if (outcome == MISS_RETURN) {
1360 /** we add an extra cycle in the return path of the translation
1361 * requests in between the various TLB levels.
1362 */
1363 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1364 } else {
1164 }
1165 }
1166
1167 /**
1168 * handleTranslationReturn is called on a TLB hit,
1169 * when a TLB miss returns or when a page fault returns.
1170 * The latter calls handelHit with TLB miss as tlbOutcome.
1171 */
1172 void
1173 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1174 PacketPtr pkt)
1175 {
1176
1177 assert(pkt);
1178 Addr vaddr = pkt->req->getVaddr();
1179
1180 TranslationState *sender_state =
1181 safe_cast<TranslationState*>(pkt->senderState);
1182
1183 ThreadContext *tc = sender_state->tc;
1184 Mode mode = sender_state->tlbMode;
1185
1186 TlbEntry *local_entry, *new_entry;
1187
1188 if (tlb_outcome == TLB_HIT) {
1189 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1190 local_entry = sender_state->tlbEntry;
1191 } else {
1192 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1193 vaddr);
1194
1195 // We are returning either from a page walk or from a hit at a lower
1196 // TLB level. The senderState should be "carrying" a pointer to the
1197 // correct TLBEntry.
1198 new_entry = sender_state->tlbEntry;
1199 assert(new_entry);
1200 local_entry = new_entry;
1201
1202 if (allocationPolicy) {
1203 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1204 virt_page_addr);
1205
1206 local_entry = insert(virt_page_addr, *new_entry);
1207 }
1208
1209 assert(local_entry);
1210 }
1211
1212 /**
1213 * At this point the packet carries an up-to-date tlbEntry pointer
1214 * in its senderState.
1215 * Next step is to do the paging protection checks.
1216 */
1217 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1218 "while paddr was %#x.\n", local_entry->vaddr,
1219 local_entry->paddr);
1220
1221 pagingProtectionChecks(tc, pkt, local_entry, mode);
1222 int page_size = local_entry->size();
1223 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1224 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1225
1226 // Since this packet will be sent through the cpu side slave port,
1227 // it must be converted to a response pkt if it is not one already
1228 if (pkt->isRequest()) {
1229 pkt->makeTimingResponse();
1230 }
1231
1232 pkt->req->setPaddr(paddr);
1233
1234 if (local_entry->uncacheable) {
1235 pkt->req->setFlags(Request::UNCACHEABLE);
1236 }
1237
1238 //send packet back to coalescer
1239 cpuSidePort[0]->sendTimingResp(pkt);
1240 //schedule cleanup event
1241 cleanupQueue.push(virt_page_addr);
1242
1243 // schedule this only once per cycle.
1244 // The check is required because we might have multiple translations
1245 // returning the same cycle
1246 // this is a maximum priority event and must be on the same cycle
1247 // as the cleanup event in TLBCoalescer to avoid a race with
1248 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1249 if (!cleanupEvent.scheduled())
1250 schedule(cleanupEvent, curTick());
1251 }
1252
1253 /**
1254 * Here we take the appropriate actions based on the result of the
1255 * TLB lookup.
1256 */
1257 void
1258 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1259 PacketPtr pkt)
1260 {
1261 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1262
1263 assert(translationReturnEvent[virtPageAddr]);
1264 assert(pkt);
1265
1266 TranslationState *tmp_sender_state =
1267 safe_cast<TranslationState*>(pkt->senderState);
1268
1269 int req_cnt = tmp_sender_state->reqCnt.back();
1270 bool update_stats = !tmp_sender_state->prefetch;
1271
1272
1273 if (outcome == TLB_HIT) {
1274 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1275
1276 if (update_stats) {
1277 accessCycles += (req_cnt * curTick());
1278 localCycles += curTick();
1279 }
1280
1281 } else if (outcome == TLB_MISS) {
1282
1283 DPRINTF(GPUTLB, "This is a TLB miss\n");
1284 if (update_stats) {
1285 accessCycles += (req_cnt*curTick());
1286 localCycles += curTick();
1287 }
1288
1289 if (hasMemSidePort) {
1290 // the one cyle added here represent the delay from when we get
1291 // the reply back till when we propagate it to the coalescer
1292 // above.
1293 if (update_stats) {
1294 accessCycles += (req_cnt * 1);
1295 localCycles += 1;
1296 }
1297
1298 /**
1299 * There is a TLB below. Send the coalesced request.
1300 * We actually send the very first packet of all the
1301 * pending packets for this virtual page address.
1302 */
1303 if (!memSidePort[0]->sendTimingReq(pkt)) {
1304 DPRINTF(GPUTLB, "Failed sending translation request to "
1305 "lower level TLB for addr %#x\n", virtPageAddr);
1306
1307 memSidePort[0]->retries.push_back(pkt);
1308 } else {
1309 DPRINTF(GPUTLB, "Sent translation request to lower level "
1310 "TLB for addr %#x\n", virtPageAddr);
1311 }
1312 } else {
1313 //this is the last level TLB. Start a page walk
1314 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1315 "addr %#x\n", virtPageAddr);
1316
1317 if (update_stats)
1318 pageTableCycles -= (req_cnt*curTick());
1319
1320 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1321 assert(tlb_event);
1322 tlb_event->updateOutcome(PAGE_WALK);
1323 schedule(tlb_event, curTick() + ticks(missLatency2));
1324 }
1325 } else if (outcome == PAGE_WALK) {
1326 if (update_stats)
1327 pageTableCycles += (req_cnt*curTick());
1328
1329 // Need to access the page table and update the TLB
1330 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1331 virtPageAddr);
1332
1333 TranslationState *sender_state =
1334 safe_cast<TranslationState*>(pkt->senderState);
1335
1336 Process *p = sender_state->tc->getProcessPtr();
1337 Addr vaddr = pkt->req->getVaddr();
1338 #ifndef NDEBUG
1339 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1340 assert(alignedVaddr == virtPageAddr);
1341 #endif
1342 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1343 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1344 p->fixupStackFault(vaddr)) {
1345 pte = p->pTable->lookup(vaddr);
1346 }
1347
1348 if (pte) {
1349 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1350 pte->paddr);
1351
1352 sender_state->tlbEntry =
1353 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1354 false);
1355 } else {
1356 sender_state->tlbEntry = nullptr;
1357 }
1358
1359 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1360 } else if (outcome == MISS_RETURN) {
1361 /** we add an extra cycle in the return path of the translation
1362 * requests in between the various TLB levels.
1363 */
1364 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1365 } else {
1365 assert(false);
1366 panic("Unexpected TLB outcome %d", outcome);
1366 }
1367 }
1368
1369 void
1370 GpuTLB::TLBEvent::process()
1371 {
1372 tlb->translationReturn(virtPageAddr, outcome, pkt);
1373 }
1374
1375 const char*
1376 GpuTLB::TLBEvent::description() const
1377 {
1378 return "trigger translationDoneEvent";
1379 }
1380
1381 void
1382 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1383 {
1384 outcome = _outcome;
1385 }
1386
1387 Addr
1388 GpuTLB::TLBEvent::getTLBEventVaddr()
1389 {
1390 return virtPageAddr;
1391 }
1392
1393 /*
1394 * recvTiming receives a coalesced timing request from a TLBCoalescer
1395 * and it calls issueTLBLookup()
1396 * It only rejects the packet if we have exceeded the max
1397 * outstanding number of requests for the TLB
1398 */
1399 bool
1400 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1401 {
1402 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1403 tlb->issueTLBLookup(pkt);
1404 // update number of outstanding translation requests
1405 tlb->outstandingReqs++;
1406 return true;
1407 } else {
1408 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1409 tlb->outstandingReqs);
1410 return false;
1411 }
1412 }
1413
1414 /**
1415 * handleFuncTranslationReturn is called on a TLB hit,
1416 * when a TLB miss returns or when a page fault returns.
1417 * It updates LRU, inserts the TLB entry on a miss
1418 * depending on the allocation policy and does the required
1419 * protection checks. It does NOT create a new packet to
1420 * update the packet's addr; this is done in hsail-gpu code.
1421 */
1422 void
1423 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1424 {
1425 TranslationState *sender_state =
1426 safe_cast<TranslationState*>(pkt->senderState);
1427
1428 ThreadContext *tc = sender_state->tc;
1429 Mode mode = sender_state->tlbMode;
1430 Addr vaddr = pkt->req->getVaddr();
1431
1432 TlbEntry *local_entry, *new_entry;
1433
1434 if (tlb_outcome == TLB_HIT) {
1435 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1436 "%#x\n", vaddr);
1437
1438 local_entry = sender_state->tlbEntry;
1439 } else {
1440 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1441 "%#x\n", vaddr);
1442
1443 // We are returning either from a page walk or from a hit at a lower
1444 // TLB level. The senderState should be "carrying" a pointer to the
1445 // correct TLBEntry.
1446 new_entry = sender_state->tlbEntry;
1447 assert(new_entry);
1448 local_entry = new_entry;
1449
1450 if (allocationPolicy) {
1451 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1452
1453 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1454 virt_page_addr);
1455
1456 local_entry = insert(virt_page_addr, *new_entry);
1457 }
1458
1459 assert(local_entry);
1460 }
1461
1462 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1463 "while paddr was %#x.\n", local_entry->vaddr,
1464 local_entry->paddr);
1465
1466 /**
1467 * Do paging checks if it's a normal functional access. If it's for a
1468 * prefetch, then sometimes you can try to prefetch something that
1469 * won't pass protection. We don't actually want to fault becuase there
1470 * is no demand access to deem this a violation. Just put it in the
1471 * TLB and it will fault if indeed a future demand access touches it in
1472 * violation.
1473 *
1474 * This feature could be used to explore security issues around
1475 * speculative memory accesses.
1476 */
1477 if (!sender_state->prefetch && sender_state->tlbEntry)
1478 pagingProtectionChecks(tc, pkt, local_entry, mode);
1479
1480 int page_size = local_entry->size();
1481 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1482 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1483
1484 pkt->req->setPaddr(paddr);
1485
1486 if (local_entry->uncacheable)
1487 pkt->req->setFlags(Request::UNCACHEABLE);
1488 }
1489
1490 // This is used for atomic translations. Need to
1491 // make it all happen during the same cycle.
1492 void
1493 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1494 {
1495 TranslationState *sender_state =
1496 safe_cast<TranslationState*>(pkt->senderState);
1497
1498 ThreadContext *tc = sender_state->tc;
1499 bool update_stats = !sender_state->prefetch;
1500
1501 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1502 TheISA::PageBytes);
1503
1504 if (update_stats)
1505 tlb->updatePageFootprint(virt_page_addr);
1506
1507 // do the TLB lookup without updating the stats
1508 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1509 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1510
1511 // functional mode means no coalescing
1512 // global metrics are the same as the local metrics
1513 if (update_stats) {
1514 tlb->globalNumTLBAccesses++;
1515
1516 if (success) {
1517 sender_state->hitLevel = sender_state->reqCnt.size();
1518 tlb->globalNumTLBHits++;
1519 }
1520 }
1521
1522 if (!success) {
1523 if (update_stats)
1524 tlb->globalNumTLBMisses++;
1525 if (tlb->hasMemSidePort) {
1526 // there is a TLB below -> propagate down the TLB hierarchy
1527 tlb->memSidePort[0]->sendFunctional(pkt);
1528 // If no valid translation from a prefetch, then just return
1529 if (sender_state->prefetch && !pkt->req->hasPaddr())
1530 return;
1531 } else {
1532 // Need to access the page table and update the TLB
1533 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1534 virt_page_addr);
1535
1536 Process *p = tc->getProcessPtr();
1537
1538 Addr vaddr = pkt->req->getVaddr();
1539 #ifndef NDEBUG
1540 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1541 assert(alignedVaddr == virt_page_addr);
1542 #endif
1543
1544 const EmulationPageTable::Entry *pte =
1545 p->pTable->lookup(vaddr);
1546 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1547 p->fixupStackFault(vaddr)) {
1548 pte = p->pTable->lookup(vaddr);
1549 }
1550
1551 if (!sender_state->prefetch) {
1552 // no PageFaults are permitted after
1553 // the second page table lookup
1554 assert(pte);
1555
1556 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1557 pte->paddr);
1558
1559 sender_state->tlbEntry =
1560 new TlbEntry(p->pid(), virt_page_addr,
1561 pte->paddr, false, false);
1562 } else {
1563 // If this was a prefetch, then do the normal thing if it
1564 // was a successful translation. Otherwise, send an empty
1565 // TLB entry back so that it can be figured out as empty and
1566 // handled accordingly.
1567 if (pte) {
1568 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1569 pte->paddr);
1570
1571 sender_state->tlbEntry =
1572 new TlbEntry(p->pid(), virt_page_addr,
1573 pte->paddr, false, false);
1574 } else {
1575 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1576 alignedVaddr);
1577
1578 sender_state->tlbEntry = nullptr;
1579
1580 return;
1581 }
1582 }
1583 }
1584 } else {
1585 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1586 tlb->lookup(pkt->req->getVaddr()));
1587
1588 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1589 update_stats);
1590
1591 assert(entry);
1592
1593 auto p = sender_state->tc->getProcessPtr();
1594 sender_state->tlbEntry =
1595 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1596 false, false);
1597 }
1598 // This is the function that would populate pkt->req with the paddr of
1599 // the translation. But if no translation happens (i.e Prefetch fails)
1600 // then the early returns in the above code wiill keep this function
1601 // from executing.
1602 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1603 }
1604
1605 void
1606 GpuTLB::CpuSidePort::recvReqRetry()
1607 {
1608 // The CPUSidePort never sends anything but replies. No retries
1609 // expected.
1367 }
1368 }
1369
1370 void
1371 GpuTLB::TLBEvent::process()
1372 {
1373 tlb->translationReturn(virtPageAddr, outcome, pkt);
1374 }
1375
1376 const char*
1377 GpuTLB::TLBEvent::description() const
1378 {
1379 return "trigger translationDoneEvent";
1380 }
1381
1382 void
1383 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1384 {
1385 outcome = _outcome;
1386 }
1387
1388 Addr
1389 GpuTLB::TLBEvent::getTLBEventVaddr()
1390 {
1391 return virtPageAddr;
1392 }
1393
1394 /*
1395 * recvTiming receives a coalesced timing request from a TLBCoalescer
1396 * and it calls issueTLBLookup()
1397 * It only rejects the packet if we have exceeded the max
1398 * outstanding number of requests for the TLB
1399 */
1400 bool
1401 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1402 {
1403 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1404 tlb->issueTLBLookup(pkt);
1405 // update number of outstanding translation requests
1406 tlb->outstandingReqs++;
1407 return true;
1408 } else {
1409 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1410 tlb->outstandingReqs);
1411 return false;
1412 }
1413 }
1414
1415 /**
1416 * handleFuncTranslationReturn is called on a TLB hit,
1417 * when a TLB miss returns or when a page fault returns.
1418 * It updates LRU, inserts the TLB entry on a miss
1419 * depending on the allocation policy and does the required
1420 * protection checks. It does NOT create a new packet to
1421 * update the packet's addr; this is done in hsail-gpu code.
1422 */
1423 void
1424 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1425 {
1426 TranslationState *sender_state =
1427 safe_cast<TranslationState*>(pkt->senderState);
1428
1429 ThreadContext *tc = sender_state->tc;
1430 Mode mode = sender_state->tlbMode;
1431 Addr vaddr = pkt->req->getVaddr();
1432
1433 TlbEntry *local_entry, *new_entry;
1434
1435 if (tlb_outcome == TLB_HIT) {
1436 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1437 "%#x\n", vaddr);
1438
1439 local_entry = sender_state->tlbEntry;
1440 } else {
1441 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1442 "%#x\n", vaddr);
1443
1444 // We are returning either from a page walk or from a hit at a lower
1445 // TLB level. The senderState should be "carrying" a pointer to the
1446 // correct TLBEntry.
1447 new_entry = sender_state->tlbEntry;
1448 assert(new_entry);
1449 local_entry = new_entry;
1450
1451 if (allocationPolicy) {
1452 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1453
1454 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1455 virt_page_addr);
1456
1457 local_entry = insert(virt_page_addr, *new_entry);
1458 }
1459
1460 assert(local_entry);
1461 }
1462
1463 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1464 "while paddr was %#x.\n", local_entry->vaddr,
1465 local_entry->paddr);
1466
1467 /**
1468 * Do paging checks if it's a normal functional access. If it's for a
1469 * prefetch, then sometimes you can try to prefetch something that
1470 * won't pass protection. We don't actually want to fault becuase there
1471 * is no demand access to deem this a violation. Just put it in the
1472 * TLB and it will fault if indeed a future demand access touches it in
1473 * violation.
1474 *
1475 * This feature could be used to explore security issues around
1476 * speculative memory accesses.
1477 */
1478 if (!sender_state->prefetch && sender_state->tlbEntry)
1479 pagingProtectionChecks(tc, pkt, local_entry, mode);
1480
1481 int page_size = local_entry->size();
1482 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1483 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1484
1485 pkt->req->setPaddr(paddr);
1486
1487 if (local_entry->uncacheable)
1488 pkt->req->setFlags(Request::UNCACHEABLE);
1489 }
1490
1491 // This is used for atomic translations. Need to
1492 // make it all happen during the same cycle.
1493 void
1494 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1495 {
1496 TranslationState *sender_state =
1497 safe_cast<TranslationState*>(pkt->senderState);
1498
1499 ThreadContext *tc = sender_state->tc;
1500 bool update_stats = !sender_state->prefetch;
1501
1502 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1503 TheISA::PageBytes);
1504
1505 if (update_stats)
1506 tlb->updatePageFootprint(virt_page_addr);
1507
1508 // do the TLB lookup without updating the stats
1509 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1510 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1511
1512 // functional mode means no coalescing
1513 // global metrics are the same as the local metrics
1514 if (update_stats) {
1515 tlb->globalNumTLBAccesses++;
1516
1517 if (success) {
1518 sender_state->hitLevel = sender_state->reqCnt.size();
1519 tlb->globalNumTLBHits++;
1520 }
1521 }
1522
1523 if (!success) {
1524 if (update_stats)
1525 tlb->globalNumTLBMisses++;
1526 if (tlb->hasMemSidePort) {
1527 // there is a TLB below -> propagate down the TLB hierarchy
1528 tlb->memSidePort[0]->sendFunctional(pkt);
1529 // If no valid translation from a prefetch, then just return
1530 if (sender_state->prefetch && !pkt->req->hasPaddr())
1531 return;
1532 } else {
1533 // Need to access the page table and update the TLB
1534 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1535 virt_page_addr);
1536
1537 Process *p = tc->getProcessPtr();
1538
1539 Addr vaddr = pkt->req->getVaddr();
1540 #ifndef NDEBUG
1541 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1542 assert(alignedVaddr == virt_page_addr);
1543 #endif
1544
1545 const EmulationPageTable::Entry *pte =
1546 p->pTable->lookup(vaddr);
1547 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1548 p->fixupStackFault(vaddr)) {
1549 pte = p->pTable->lookup(vaddr);
1550 }
1551
1552 if (!sender_state->prefetch) {
1553 // no PageFaults are permitted after
1554 // the second page table lookup
1555 assert(pte);
1556
1557 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1558 pte->paddr);
1559
1560 sender_state->tlbEntry =
1561 new TlbEntry(p->pid(), virt_page_addr,
1562 pte->paddr, false, false);
1563 } else {
1564 // If this was a prefetch, then do the normal thing if it
1565 // was a successful translation. Otherwise, send an empty
1566 // TLB entry back so that it can be figured out as empty and
1567 // handled accordingly.
1568 if (pte) {
1569 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1570 pte->paddr);
1571
1572 sender_state->tlbEntry =
1573 new TlbEntry(p->pid(), virt_page_addr,
1574 pte->paddr, false, false);
1575 } else {
1576 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1577 alignedVaddr);
1578
1579 sender_state->tlbEntry = nullptr;
1580
1581 return;
1582 }
1583 }
1584 }
1585 } else {
1586 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1587 tlb->lookup(pkt->req->getVaddr()));
1588
1589 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1590 update_stats);
1591
1592 assert(entry);
1593
1594 auto p = sender_state->tc->getProcessPtr();
1595 sender_state->tlbEntry =
1596 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1597 false, false);
1598 }
1599 // This is the function that would populate pkt->req with the paddr of
1600 // the translation. But if no translation happens (i.e Prefetch fails)
1601 // then the early returns in the above code wiill keep this function
1602 // from executing.
1603 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1604 }
1605
1606 void
1607 GpuTLB::CpuSidePort::recvReqRetry()
1608 {
1609 // The CPUSidePort never sends anything but replies. No retries
1610 // expected.
1610 assert(false);
1611 panic("recvReqRetry called");
1611 }
1612
1613 AddrRangeList
1614 GpuTLB::CpuSidePort::getAddrRanges() const
1615 {
1616 // currently not checked by the master
1617 AddrRangeList ranges;
1618
1619 return ranges;
1620 }
1621
1622 /**
1623 * MemSidePort receives the packet back.
1624 * We need to call the handleTranslationReturn
1625 * and propagate up the hierarchy.
1626 */
1627 bool
1628 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1629 {
1630 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1631 TheISA::PageBytes);
1632
1633 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1634 virt_page_addr);
1635
1636 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1637 assert(tlb_event);
1638 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1639
1640 tlb_event->updateOutcome(MISS_RETURN);
1641 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1642
1643 return true;
1644 }
1645
1646 void
1647 GpuTLB::MemSidePort::recvReqRetry()
1648 {
1649 // No retries should reach the TLB. The retries
1650 // should only reach the TLBCoalescer.
1612 }
1613
1614 AddrRangeList
1615 GpuTLB::CpuSidePort::getAddrRanges() const
1616 {
1617 // currently not checked by the master
1618 AddrRangeList ranges;
1619
1620 return ranges;
1621 }
1622
1623 /**
1624 * MemSidePort receives the packet back.
1625 * We need to call the handleTranslationReturn
1626 * and propagate up the hierarchy.
1627 */
1628 bool
1629 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1630 {
1631 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1632 TheISA::PageBytes);
1633
1634 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1635 virt_page_addr);
1636
1637 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1638 assert(tlb_event);
1639 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1640
1641 tlb_event->updateOutcome(MISS_RETURN);
1642 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1643
1644 return true;
1645 }
1646
1647 void
1648 GpuTLB::MemSidePort::recvReqRetry()
1649 {
1650 // No retries should reach the TLB. The retries
1651 // should only reach the TLBCoalescer.
1651 assert(false);
1652 panic("recvReqRetry called");
1652 }
1653
1654 void
1655 GpuTLB::cleanup()
1656 {
1657 while (!cleanupQueue.empty()) {
1658 Addr cleanup_addr = cleanupQueue.front();
1659 cleanupQueue.pop();
1660
1661 // delete TLBEvent
1662 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1663 delete old_tlb_event;
1664 translationReturnEvent.erase(cleanup_addr);
1665
1666 // update number of outstanding requests
1667 outstandingReqs--;
1668 }
1669
1670 /** the higher level coalescer should retry if it has
1671 * any pending requests.
1672 */
1673 for (int i = 0; i < cpuSidePort.size(); ++i) {
1674 cpuSidePort[i]->sendRetryReq();
1675 }
1676 }
1677
1678 void
1679 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1680 {
1681
1682 std::pair<AccessPatternTable::iterator, bool> ret;
1683
1684 AccessInfo tmp_access_info;
1685 tmp_access_info.lastTimeAccessed = 0;
1686 tmp_access_info.accessesPerPage = 0;
1687 tmp_access_info.totalReuseDistance = 0;
1688 tmp_access_info.sumDistance = 0;
1689 tmp_access_info.meanDistance = 0;
1690
1691 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1692 tmp_access_info));
1693
1694 bool first_page_access = ret.second;
1695
1696 if (first_page_access) {
1697 numUniquePages++;
1698 } else {
1699 int accessed_before;
1700 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1701 ret.first->second.totalReuseDistance += accessed_before;
1702 }
1703
1704 ret.first->second.accessesPerPage++;
1705 ret.first->second.lastTimeAccessed = curTick();
1706
1707 if (accessDistance) {
1708 ret.first->second.localTLBAccesses
1709 .push_back(localNumTLBAccesses.value());
1710 }
1711 }
1712
1713 void
1714 GpuTLB::exitCallback()
1715 {
1716 std::ostream *page_stat_file = nullptr;
1717
1718 if (accessDistance) {
1719
1720 // print per page statistics to a separate file (.csv format)
1721 // simout is the gem5 output directory (default is m5out or the one
1722 // specified with -d
1723 page_stat_file = simout.create(name().c_str())->stream();
1724
1725 // print header
1726 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1727 << "stddev_distance" << std::endl;
1728 }
1729
1730 // update avg. reuse distance footprint
1731 AccessPatternTable::iterator iter, iter_begin, iter_end;
1732 unsigned int sum_avg_reuse_distance_per_page = 0;
1733
1734 // iterate through all pages seen by this TLB
1735 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1736 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1737 iter->second.accessesPerPage;
1738
1739 if (accessDistance) {
1740 unsigned int tmp = iter->second.localTLBAccesses[0];
1741 unsigned int prev = tmp;
1742
1743 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1744 if (i) {
1745 tmp = prev + 1;
1746 }
1747
1748 prev = iter->second.localTLBAccesses[i];
1749 // update the localTLBAccesses value
1750 // with the actual differece
1751 iter->second.localTLBAccesses[i] -= tmp;
1752 // compute the sum of AccessDistance per page
1753 // used later for mean
1754 iter->second.sumDistance +=
1755 iter->second.localTLBAccesses[i];
1756 }
1757
1758 iter->second.meanDistance =
1759 iter->second.sumDistance / iter->second.accessesPerPage;
1760
1761 // compute std_dev and max (we need a second round because we
1762 // need to know the mean value
1763 unsigned int max_distance = 0;
1764 unsigned int stddev_distance = 0;
1765
1766 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1767 unsigned int tmp_access_distance =
1768 iter->second.localTLBAccesses[i];
1769
1770 if (tmp_access_distance > max_distance) {
1771 max_distance = tmp_access_distance;
1772 }
1773
1774 unsigned int diff =
1775 tmp_access_distance - iter->second.meanDistance;
1776 stddev_distance += pow(diff, 2);
1777
1778 }
1779
1780 stddev_distance =
1781 sqrt(stddev_distance/iter->second.accessesPerPage);
1782
1783 if (page_stat_file) {
1784 *page_stat_file << std::hex << iter->first << ",";
1785 *page_stat_file << std::dec << max_distance << ",";
1786 *page_stat_file << std::dec << iter->second.meanDistance
1787 << ",";
1788 *page_stat_file << std::dec << stddev_distance;
1789 *page_stat_file << std::endl;
1790 }
1791
1792 // erase the localTLBAccesses array
1793 iter->second.localTLBAccesses.clear();
1794 }
1795 }
1796
1797 if (!TLBFootprint.empty()) {
1798 avgReuseDistance =
1799 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1800 }
1801
1802 //clear the TLBFootprint map
1803 TLBFootprint.clear();
1804 }
1805} // namespace X86ISA
1806
1807X86ISA::GpuTLB*
1808X86GPUTLBParams::create()
1809{
1810 return new X86ISA::GpuTLB(this);
1811}
1812
1653 }
1654
1655 void
1656 GpuTLB::cleanup()
1657 {
1658 while (!cleanupQueue.empty()) {
1659 Addr cleanup_addr = cleanupQueue.front();
1660 cleanupQueue.pop();
1661
1662 // delete TLBEvent
1663 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1664 delete old_tlb_event;
1665 translationReturnEvent.erase(cleanup_addr);
1666
1667 // update number of outstanding requests
1668 outstandingReqs--;
1669 }
1670
1671 /** the higher level coalescer should retry if it has
1672 * any pending requests.
1673 */
1674 for (int i = 0; i < cpuSidePort.size(); ++i) {
1675 cpuSidePort[i]->sendRetryReq();
1676 }
1677 }
1678
1679 void
1680 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1681 {
1682
1683 std::pair<AccessPatternTable::iterator, bool> ret;
1684
1685 AccessInfo tmp_access_info;
1686 tmp_access_info.lastTimeAccessed = 0;
1687 tmp_access_info.accessesPerPage = 0;
1688 tmp_access_info.totalReuseDistance = 0;
1689 tmp_access_info.sumDistance = 0;
1690 tmp_access_info.meanDistance = 0;
1691
1692 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1693 tmp_access_info));
1694
1695 bool first_page_access = ret.second;
1696
1697 if (first_page_access) {
1698 numUniquePages++;
1699 } else {
1700 int accessed_before;
1701 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1702 ret.first->second.totalReuseDistance += accessed_before;
1703 }
1704
1705 ret.first->second.accessesPerPage++;
1706 ret.first->second.lastTimeAccessed = curTick();
1707
1708 if (accessDistance) {
1709 ret.first->second.localTLBAccesses
1710 .push_back(localNumTLBAccesses.value());
1711 }
1712 }
1713
1714 void
1715 GpuTLB::exitCallback()
1716 {
1717 std::ostream *page_stat_file = nullptr;
1718
1719 if (accessDistance) {
1720
1721 // print per page statistics to a separate file (.csv format)
1722 // simout is the gem5 output directory (default is m5out or the one
1723 // specified with -d
1724 page_stat_file = simout.create(name().c_str())->stream();
1725
1726 // print header
1727 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1728 << "stddev_distance" << std::endl;
1729 }
1730
1731 // update avg. reuse distance footprint
1732 AccessPatternTable::iterator iter, iter_begin, iter_end;
1733 unsigned int sum_avg_reuse_distance_per_page = 0;
1734
1735 // iterate through all pages seen by this TLB
1736 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1737 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1738 iter->second.accessesPerPage;
1739
1740 if (accessDistance) {
1741 unsigned int tmp = iter->second.localTLBAccesses[0];
1742 unsigned int prev = tmp;
1743
1744 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1745 if (i) {
1746 tmp = prev + 1;
1747 }
1748
1749 prev = iter->second.localTLBAccesses[i];
1750 // update the localTLBAccesses value
1751 // with the actual differece
1752 iter->second.localTLBAccesses[i] -= tmp;
1753 // compute the sum of AccessDistance per page
1754 // used later for mean
1755 iter->second.sumDistance +=
1756 iter->second.localTLBAccesses[i];
1757 }
1758
1759 iter->second.meanDistance =
1760 iter->second.sumDistance / iter->second.accessesPerPage;
1761
1762 // compute std_dev and max (we need a second round because we
1763 // need to know the mean value
1764 unsigned int max_distance = 0;
1765 unsigned int stddev_distance = 0;
1766
1767 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1768 unsigned int tmp_access_distance =
1769 iter->second.localTLBAccesses[i];
1770
1771 if (tmp_access_distance > max_distance) {
1772 max_distance = tmp_access_distance;
1773 }
1774
1775 unsigned int diff =
1776 tmp_access_distance - iter->second.meanDistance;
1777 stddev_distance += pow(diff, 2);
1778
1779 }
1780
1781 stddev_distance =
1782 sqrt(stddev_distance/iter->second.accessesPerPage);
1783
1784 if (page_stat_file) {
1785 *page_stat_file << std::hex << iter->first << ",";
1786 *page_stat_file << std::dec << max_distance << ",";
1787 *page_stat_file << std::dec << iter->second.meanDistance
1788 << ",";
1789 *page_stat_file << std::dec << stddev_distance;
1790 *page_stat_file << std::endl;
1791 }
1792
1793 // erase the localTLBAccesses array
1794 iter->second.localTLBAccesses.clear();
1795 }
1796 }
1797
1798 if (!TLBFootprint.empty()) {
1799 avgReuseDistance =
1800 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1801 }
1802
1803 //clear the TLBFootprint map
1804 TLBFootprint.clear();
1805 }
1806} // namespace X86ISA
1807
1808X86ISA::GpuTLB*
1809X86GPUTLBParams::create()
1810{
1811 return new X86ISA::GpuTLB(this);
1812}
1813