gpu_tlb.cc (11364:1bd9f1b27438) gpu_tlb.cc (11523:81332eb10367)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62 GpuTLB::GpuTLB(const Params *p)
63 : MemObject(p), configAddress(0), size(p->size),
64 cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
65 {
66 assoc = p->assoc;
67 assert(assoc <= size);
68 numSets = size/assoc;
69 allocationPolicy = p->allocationPolicy;
70 hasMemSidePort = false;
71 accessDistance = p->accessDistance;
72 clock = p->clk_domain->clockPeriod();
73
74 tlb = new GpuTlbEntry[size];
75 std::memset(tlb, 0, sizeof(GpuTlbEntry) * size);
76
77 freeList.resize(numSets);
78 entryList.resize(numSets);
79
80 for (int set = 0; set < numSets; ++set) {
81 for (int way = 0; way < assoc; ++way) {
82 int x = set*assoc + way;
83 freeList[set].push_back(&tlb[x]);
84 }
85 }
86
87 FA = (size == assoc);
88
89 /**
90 * @warning: the set-associative version assumes you have a
91 * fixed page size of 4KB.
92 * If the page size is greather than 4KB (as defined in the
93 * TheISA::PageBytes), then there are various issues w/ the current
94 * implementation (you'd have the same 8KB page being replicated in
95 * different sets etc)
96 */
97 setMask = numSets - 1;
98
99 #if 0
100 // GpuTLB doesn't yet support full system
101 walker = p->walker;
102 walker->setTLB(this);
103 #endif
104
105 maxCoalescedReqs = p->maxOutstandingReqs;
106
107 // Do not allow maxCoalescedReqs to be more than the TLB associativity
108 if (maxCoalescedReqs > assoc) {
109 maxCoalescedReqs = assoc;
110 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
111 }
112
113 outstandingReqs = 0;
114 hitLatency = p->hitLatency;
115 missLatency1 = p->missLatency1;
116 missLatency2 = p->missLatency2;
117
118 // create the slave ports based on the number of connected ports
119 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
120 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
121 name(), i), this, i));
122 }
123
124 // create the master ports based on the number of connected ports
125 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
126 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
127 name(), i), this, i));
128 }
129 }
130
131 // fixme: this is never called?
132 GpuTLB::~GpuTLB()
133 {
134 // make sure all the hash-maps are empty
135 assert(translationReturnEvent.empty());
136
137 // delete the TLB
138 delete[] tlb;
139 }
140
141 BaseSlavePort&
142 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
143 {
144 if (if_name == "slave") {
145 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
146 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
147 }
148
149 return *cpuSidePort[idx];
150 } else {
151 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
152 }
153 }
154
155 BaseMasterPort&
156 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
157 {
158 if (if_name == "master") {
159 if (idx >= static_cast<PortID>(memSidePort.size())) {
160 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
161 }
162
163 hasMemSidePort = true;
164
165 return *memSidePort[idx];
166 } else {
167 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
168 }
169 }
170
171 GpuTlbEntry*
172 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
173 {
174 GpuTlbEntry *newEntry = nullptr;
175
176 /**
177 * vpn holds the virtual page address
178 * The least significant bits are simply masked
179 */
180 int set = (vpn >> TheISA::PageShift) & setMask;
181
182 if (!freeList[set].empty()) {
183 newEntry = freeList[set].front();
184 freeList[set].pop_front();
185 } else {
186 newEntry = entryList[set].back();
187 entryList[set].pop_back();
188 }
189
190 *newEntry = entry;
191 newEntry->vaddr = vpn;
192 entryList[set].push_front(newEntry);
193
194 return newEntry;
195 }
196
197 GpuTLB::EntryList::iterator
198 GpuTLB::lookupIt(Addr va, bool update_lru)
199 {
200 int set = (va >> TheISA::PageShift) & setMask;
201
202 if (FA) {
203 assert(!set);
204 }
205
206 auto entry = entryList[set].begin();
207 for (; entry != entryList[set].end(); ++entry) {
208 int page_size = (*entry)->size();
209
210 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
211 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
212 "with size %#x.\n", va, (*entry)->vaddr, page_size);
213
214 if (update_lru) {
215 entryList[set].push_front(*entry);
216 entryList[set].erase(entry);
217 entry = entryList[set].begin();
218 }
219
220 break;
221 }
222 }
223
224 return entry;
225 }
226
227 GpuTlbEntry*
228 GpuTLB::lookup(Addr va, bool update_lru)
229 {
230 int set = (va >> TheISA::PageShift) & setMask;
231
232 auto entry = lookupIt(va, update_lru);
233
234 if (entry == entryList[set].end())
235 return nullptr;
236 else
237 return *entry;
238 }
239
240 void
241 GpuTLB::invalidateAll()
242 {
243 DPRINTF(GPUTLB, "Invalidating all entries.\n");
244
245 for (int i = 0; i < numSets; ++i) {
246 while (!entryList[i].empty()) {
247 GpuTlbEntry *entry = entryList[i].front();
248 entryList[i].pop_front();
249 freeList[i].push_back(entry);
250 }
251 }
252 }
253
254 void
255 GpuTLB::setConfigAddress(uint32_t addr)
256 {
257 configAddress = addr;
258 }
259
260 void
261 GpuTLB::invalidateNonGlobal()
262 {
263 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
264
265 for (int i = 0; i < numSets; ++i) {
266 for (auto entryIt = entryList[i].begin();
267 entryIt != entryList[i].end();) {
268 if (!(*entryIt)->global) {
269 freeList[i].push_back(*entryIt);
270 entryList[i].erase(entryIt++);
271 } else {
272 ++entryIt;
273 }
274 }
275 }
276 }
277
278 void
279 GpuTLB::demapPage(Addr va, uint64_t asn)
280 {
281
282 int set = (va >> TheISA::PageShift) & setMask;
283 auto entry = lookupIt(va, false);
284
285 if (entry != entryList[set].end()) {
286 freeList[set].push_back(*entry);
287 entryList[set].erase(entry);
288 }
289 }
290
291 Fault
292 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
293 {
294 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
295 Addr vaddr = req->getVaddr();
296 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
297
298 if (prefix == IntAddrPrefixCPUID) {
299 panic("CPUID memory space not yet implemented!\n");
300 } else if (prefix == IntAddrPrefixMSR) {
301 vaddr = vaddr >> 3;
302 req->setFlags(Request::MMAPPED_IPR);
303 Addr regNum = 0;
304
305 switch (vaddr & ~IntAddrPrefixMask) {
306 case 0x10:
307 regNum = MISCREG_TSC;
308 break;
309 case 0x1B:
310 regNum = MISCREG_APIC_BASE;
311 break;
312 case 0xFE:
313 regNum = MISCREG_MTRRCAP;
314 break;
315 case 0x174:
316 regNum = MISCREG_SYSENTER_CS;
317 break;
318 case 0x175:
319 regNum = MISCREG_SYSENTER_ESP;
320 break;
321 case 0x176:
322 regNum = MISCREG_SYSENTER_EIP;
323 break;
324 case 0x179:
325 regNum = MISCREG_MCG_CAP;
326 break;
327 case 0x17A:
328 regNum = MISCREG_MCG_STATUS;
329 break;
330 case 0x17B:
331 regNum = MISCREG_MCG_CTL;
332 break;
333 case 0x1D9:
334 regNum = MISCREG_DEBUG_CTL_MSR;
335 break;
336 case 0x1DB:
337 regNum = MISCREG_LAST_BRANCH_FROM_IP;
338 break;
339 case 0x1DC:
340 regNum = MISCREG_LAST_BRANCH_TO_IP;
341 break;
342 case 0x1DD:
343 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
344 break;
345 case 0x1DE:
346 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
347 break;
348 case 0x200:
349 regNum = MISCREG_MTRR_PHYS_BASE_0;
350 break;
351 case 0x201:
352 regNum = MISCREG_MTRR_PHYS_MASK_0;
353 break;
354 case 0x202:
355 regNum = MISCREG_MTRR_PHYS_BASE_1;
356 break;
357 case 0x203:
358 regNum = MISCREG_MTRR_PHYS_MASK_1;
359 break;
360 case 0x204:
361 regNum = MISCREG_MTRR_PHYS_BASE_2;
362 break;
363 case 0x205:
364 regNum = MISCREG_MTRR_PHYS_MASK_2;
365 break;
366 case 0x206:
367 regNum = MISCREG_MTRR_PHYS_BASE_3;
368 break;
369 case 0x207:
370 regNum = MISCREG_MTRR_PHYS_MASK_3;
371 break;
372 case 0x208:
373 regNum = MISCREG_MTRR_PHYS_BASE_4;
374 break;
375 case 0x209:
376 regNum = MISCREG_MTRR_PHYS_MASK_4;
377 break;
378 case 0x20A:
379 regNum = MISCREG_MTRR_PHYS_BASE_5;
380 break;
381 case 0x20B:
382 regNum = MISCREG_MTRR_PHYS_MASK_5;
383 break;
384 case 0x20C:
385 regNum = MISCREG_MTRR_PHYS_BASE_6;
386 break;
387 case 0x20D:
388 regNum = MISCREG_MTRR_PHYS_MASK_6;
389 break;
390 case 0x20E:
391 regNum = MISCREG_MTRR_PHYS_BASE_7;
392 break;
393 case 0x20F:
394 regNum = MISCREG_MTRR_PHYS_MASK_7;
395 break;
396 case 0x250:
397 regNum = MISCREG_MTRR_FIX_64K_00000;
398 break;
399 case 0x258:
400 regNum = MISCREG_MTRR_FIX_16K_80000;
401 break;
402 case 0x259:
403 regNum = MISCREG_MTRR_FIX_16K_A0000;
404 break;
405 case 0x268:
406 regNum = MISCREG_MTRR_FIX_4K_C0000;
407 break;
408 case 0x269:
409 regNum = MISCREG_MTRR_FIX_4K_C8000;
410 break;
411 case 0x26A:
412 regNum = MISCREG_MTRR_FIX_4K_D0000;
413 break;
414 case 0x26B:
415 regNum = MISCREG_MTRR_FIX_4K_D8000;
416 break;
417 case 0x26C:
418 regNum = MISCREG_MTRR_FIX_4K_E0000;
419 break;
420 case 0x26D:
421 regNum = MISCREG_MTRR_FIX_4K_E8000;
422 break;
423 case 0x26E:
424 regNum = MISCREG_MTRR_FIX_4K_F0000;
425 break;
426 case 0x26F:
427 regNum = MISCREG_MTRR_FIX_4K_F8000;
428 break;
429 case 0x277:
430 regNum = MISCREG_PAT;
431 break;
432 case 0x2FF:
433 regNum = MISCREG_DEF_TYPE;
434 break;
435 case 0x400:
436 regNum = MISCREG_MC0_CTL;
437 break;
438 case 0x404:
439 regNum = MISCREG_MC1_CTL;
440 break;
441 case 0x408:
442 regNum = MISCREG_MC2_CTL;
443 break;
444 case 0x40C:
445 regNum = MISCREG_MC3_CTL;
446 break;
447 case 0x410:
448 regNum = MISCREG_MC4_CTL;
449 break;
450 case 0x414:
451 regNum = MISCREG_MC5_CTL;
452 break;
453 case 0x418:
454 regNum = MISCREG_MC6_CTL;
455 break;
456 case 0x41C:
457 regNum = MISCREG_MC7_CTL;
458 break;
459 case 0x401:
460 regNum = MISCREG_MC0_STATUS;
461 break;
462 case 0x405:
463 regNum = MISCREG_MC1_STATUS;
464 break;
465 case 0x409:
466 regNum = MISCREG_MC2_STATUS;
467 break;
468 case 0x40D:
469 regNum = MISCREG_MC3_STATUS;
470 break;
471 case 0x411:
472 regNum = MISCREG_MC4_STATUS;
473 break;
474 case 0x415:
475 regNum = MISCREG_MC5_STATUS;
476 break;
477 case 0x419:
478 regNum = MISCREG_MC6_STATUS;
479 break;
480 case 0x41D:
481 regNum = MISCREG_MC7_STATUS;
482 break;
483 case 0x402:
484 regNum = MISCREG_MC0_ADDR;
485 break;
486 case 0x406:
487 regNum = MISCREG_MC1_ADDR;
488 break;
489 case 0x40A:
490 regNum = MISCREG_MC2_ADDR;
491 break;
492 case 0x40E:
493 regNum = MISCREG_MC3_ADDR;
494 break;
495 case 0x412:
496 regNum = MISCREG_MC4_ADDR;
497 break;
498 case 0x416:
499 regNum = MISCREG_MC5_ADDR;
500 break;
501 case 0x41A:
502 regNum = MISCREG_MC6_ADDR;
503 break;
504 case 0x41E:
505 regNum = MISCREG_MC7_ADDR;
506 break;
507 case 0x403:
508 regNum = MISCREG_MC0_MISC;
509 break;
510 case 0x407:
511 regNum = MISCREG_MC1_MISC;
512 break;
513 case 0x40B:
514 regNum = MISCREG_MC2_MISC;
515 break;
516 case 0x40F:
517 regNum = MISCREG_MC3_MISC;
518 break;
519 case 0x413:
520 regNum = MISCREG_MC4_MISC;
521 break;
522 case 0x417:
523 regNum = MISCREG_MC5_MISC;
524 break;
525 case 0x41B:
526 regNum = MISCREG_MC6_MISC;
527 break;
528 case 0x41F:
529 regNum = MISCREG_MC7_MISC;
530 break;
531 case 0xC0000080:
532 regNum = MISCREG_EFER;
533 break;
534 case 0xC0000081:
535 regNum = MISCREG_STAR;
536 break;
537 case 0xC0000082:
538 regNum = MISCREG_LSTAR;
539 break;
540 case 0xC0000083:
541 regNum = MISCREG_CSTAR;
542 break;
543 case 0xC0000084:
544 regNum = MISCREG_SF_MASK;
545 break;
546 case 0xC0000100:
547 regNum = MISCREG_FS_BASE;
548 break;
549 case 0xC0000101:
550 regNum = MISCREG_GS_BASE;
551 break;
552 case 0xC0000102:
553 regNum = MISCREG_KERNEL_GS_BASE;
554 break;
555 case 0xC0000103:
556 regNum = MISCREG_TSC_AUX;
557 break;
558 case 0xC0010000:
559 regNum = MISCREG_PERF_EVT_SEL0;
560 break;
561 case 0xC0010001:
562 regNum = MISCREG_PERF_EVT_SEL1;
563 break;
564 case 0xC0010002:
565 regNum = MISCREG_PERF_EVT_SEL2;
566 break;
567 case 0xC0010003:
568 regNum = MISCREG_PERF_EVT_SEL3;
569 break;
570 case 0xC0010004:
571 regNum = MISCREG_PERF_EVT_CTR0;
572 break;
573 case 0xC0010005:
574 regNum = MISCREG_PERF_EVT_CTR1;
575 break;
576 case 0xC0010006:
577 regNum = MISCREG_PERF_EVT_CTR2;
578 break;
579 case 0xC0010007:
580 regNum = MISCREG_PERF_EVT_CTR3;
581 break;
582 case 0xC0010010:
583 regNum = MISCREG_SYSCFG;
584 break;
585 case 0xC0010016:
586 regNum = MISCREG_IORR_BASE0;
587 break;
588 case 0xC0010017:
589 regNum = MISCREG_IORR_BASE1;
590 break;
591 case 0xC0010018:
592 regNum = MISCREG_IORR_MASK0;
593 break;
594 case 0xC0010019:
595 regNum = MISCREG_IORR_MASK1;
596 break;
597 case 0xC001001A:
598 regNum = MISCREG_TOP_MEM;
599 break;
600 case 0xC001001D:
601 regNum = MISCREG_TOP_MEM2;
602 break;
603 case 0xC0010114:
604 regNum = MISCREG_VM_CR;
605 break;
606 case 0xC0010115:
607 regNum = MISCREG_IGNNE;
608 break;
609 case 0xC0010116:
610 regNum = MISCREG_SMM_CTL;
611 break;
612 case 0xC0010117:
613 regNum = MISCREG_VM_HSAVE_PA;
614 break;
615 default:
616 return std::make_shared<GeneralProtection>(0);
617 }
618 //The index is multiplied by the size of a MiscReg so that
619 //any memory dependence calculations will not see these as
620 //overlapping.
621 req->setPaddr(regNum * sizeof(MiscReg));
622 return NoFault;
623 } else if (prefix == IntAddrPrefixIO) {
624 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
625 // bitmap in the TSS.
626
627 Addr IOPort = vaddr & ~IntAddrPrefixMask;
628 // Make sure the address fits in the expected 16 bit IO address
629 // space.
630 assert(!(IOPort & ~0xFFFF));
631
632 if (IOPort == 0xCF8 && req->getSize() == 4) {
633 req->setFlags(Request::MMAPPED_IPR);
634 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
635 } else if ((IOPort & ~mask(2)) == 0xCFC) {
636 req->setFlags(Request::UNCACHEABLE);
637
638 Addr configAddress =
639 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
640
641 if (bits(configAddress, 31, 31)) {
642 req->setPaddr(PhysAddrPrefixPciConfig |
643 mbits(configAddress, 30, 2) |
644 (IOPort & mask(2)));
645 } else {
646 req->setPaddr(PhysAddrPrefixIO | IOPort);
647 }
648 } else {
649 req->setFlags(Request::UNCACHEABLE);
650 req->setPaddr(PhysAddrPrefixIO | IOPort);
651 }
652 return NoFault;
653 } else {
654 panic("Access to unrecognized internal address space %#x.\n",
655 prefix);
656 }
657 }
658
659 /**
660 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
661 * and false on a TLB miss.
662 * Many of the checks about different modes have been converted to
663 * assertions, since these parts of the code are not really used.
664 * On a hit it will update the LRU stack.
665 */
666 bool
667 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
668 {
669 bool tlb_hit = false;
670 #ifndef NDEBUG
671 uint32_t flags = req->getFlags();
672 int seg = flags & SegmentFlagMask;
673 #endif
674
675 assert(seg != SEGMENT_REG_MS);
676 Addr vaddr = req->getVaddr();
677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
679
680 if (m5Reg.prot) {
681 DPRINTF(GPUTLB, "In protected mode.\n");
682 // make sure we are in 64-bit mode
683 assert(m5Reg.mode == LongMode);
684
685 // If paging is enabled, do the translation.
686 if (m5Reg.paging) {
687 DPRINTF(GPUTLB, "Paging enabled.\n");
688 //update LRU stack on a hit
689 GpuTlbEntry *entry = lookup(vaddr, true);
690
691 if (entry)
692 tlb_hit = true;
693
694 if (!update_stats) {
695 // functional tlb access for memory initialization
696 // i.e., memory seeding or instr. seeding -> don't update
697 // TLB and stats
698 return tlb_hit;
699 }
700
701 localNumTLBAccesses++;
702
703 if (!entry) {
704 localNumTLBMisses++;
705 } else {
706 localNumTLBHits++;
707 }
708 }
709 }
710
711 return tlb_hit;
712 }
713
714 Fault
715 GpuTLB::translate(RequestPtr req, ThreadContext *tc,
716 Translation *translation, Mode mode,
717 bool &delayedResponse, bool timing, int &latency)
718 {
719 uint32_t flags = req->getFlags();
720 int seg = flags & SegmentFlagMask;
721 bool storeCheck = flags & (StoreCheck << FlagShift);
722
723 // If this is true, we're dealing with a request
724 // to a non-memory address space.
725 if (seg == SEGMENT_REG_MS) {
726 return translateInt(req, tc);
727 }
728
729 delayedResponse = false;
730 Addr vaddr = req->getVaddr();
731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
732
733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734
735 // If protected mode has been enabled...
736 if (m5Reg.prot) {
737 DPRINTF(GPUTLB, "In protected mode.\n");
738 // If we're not in 64-bit mode, do protection/limit checks
739 if (m5Reg.mode != LongMode) {
740 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
741 "protection.\n");
742
743 // Check for a null segment selector.
744 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
745 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
747 return std::make_shared<GeneralProtection>(0);
748 }
749
750 bool expandDown = false;
751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
752
753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
754 if (!attr.writable && (mode == BaseTLB::Write ||
755 storeCheck))
756 return std::make_shared<GeneralProtection>(0);
757
758 if (!attr.readable && mode == BaseTLB::Read)
759 return std::make_shared<GeneralProtection>(0);
760
761 expandDown = attr.expandDown;
762
763 }
764
765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
767 // This assumes we're not in 64 bit mode. If we were, the
768 // default address size is 64 bits, overridable to 32.
769 int size = 32;
770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
772
773 if ((csAttr.defaultSize && sizeOverride) ||
774 (!csAttr.defaultSize && !sizeOverride)) {
775 size = 16;
776 }
777
778 Addr offset = bits(vaddr - base, size - 1, 0);
779 Addr endOffset = offset + req->getSize() - 1;
780
781 if (expandDown) {
782 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
783 warn_once("Expand down segments are untested.\n");
784
785 if (offset <= limit || endOffset <= limit)
786 return std::make_shared<GeneralProtection>(0);
787 } else {
788 if (offset > limit || endOffset > limit)
789 return std::make_shared<GeneralProtection>(0);
790 }
791 }
792
793 // If paging is enabled, do the translation.
794 if (m5Reg.paging) {
795 DPRINTF(GPUTLB, "Paging enabled.\n");
796 // The vaddr already has the segment base applied.
797 GpuTlbEntry *entry = lookup(vaddr);
798 localNumTLBAccesses++;
799
800 if (!entry) {
801 localNumTLBMisses++;
802 if (timing) {
803 latency = missLatency1;
804 }
805
806 if (FullSystem) {
807 fatal("GpuTLB doesn't support full-system mode\n");
808 } else {
809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
810 "at pc %#x.\n", vaddr, tc->instAddr());
811
812 Process *p = tc->getProcessPtr();
813 GpuTlbEntry newEntry;
814 bool success = p->pTable->lookup(vaddr, newEntry);
815
816 if (!success && mode != BaseTLB::Execute) {
817 // penalize a "page fault" more
818 if (timing) {
819 latency += missLatency2;
820 }
821
822 if (p->fixupStackFault(vaddr))
823 success = p->pTable->lookup(vaddr, newEntry);
824 }
825
826 if (!success) {
827 return std::make_shared<PageFault>(vaddr, true,
828 mode, true,
829 false);
830 } else {
831 newEntry.valid = success;
832 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
833
834 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
835 alignedVaddr, newEntry.pageStart());
836
837 entry = insert(alignedVaddr, newEntry);
838 }
839
840 DPRINTF(GPUTLB, "Miss was serviced.\n");
841 }
842 } else {
843 localNumTLBHits++;
844
845 if (timing) {
846 latency = hitLatency;
847 }
848 }
849
850 // Do paging protection checks.
851 bool inUser = (m5Reg.cpl == 3 &&
852 !(flags & (CPL0FlagBit << FlagShift)));
853
854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
855 bool badWrite = (!entry->writable && (inUser || cr0.wp));
856
857 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
858 badWrite)) {
859 // The page must have been present to get into the TLB in
860 // the first place. We'll assume the reserved bits are
861 // fine even though we're not checking them.
862 return std::make_shared<PageFault>(vaddr, true, mode,
863 inUser, false);
864 }
865
866 if (storeCheck && badWrite) {
867 // This would fault if this were a write, so return a page
868 // fault that reflects that happening.
869 return std::make_shared<PageFault>(vaddr, true,
870 BaseTLB::Write,
871 inUser, false);
872 }
873
874
875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
876 "checks.\n", entry->paddr);
877
878 int page_size = entry->size();
879 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
881 req->setPaddr(paddr);
882
883 if (entry->uncacheable)
884 req->setFlags(Request::UNCACHEABLE);
885 } else {
886 //Use the address which already has segmentation applied.
887 DPRINTF(GPUTLB, "Paging disabled.\n");
888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
889 req->setPaddr(vaddr);
890 }
891 } else {
892 // Real mode
893 DPRINTF(GPUTLB, "In real mode.\n");
894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
895 req->setPaddr(vaddr);
896 }
897
898 // Check for an access to the local APIC
899 if (FullSystem) {
900 LocalApicBase localApicBase =
901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
902
903 Addr baseAddr = localApicBase.base * PageBytes;
904 Addr paddr = req->getPaddr();
905
906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907 // Force the access to be uncacheable.
908 req->setFlags(Request::UNCACHEABLE);
909 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
910 paddr - baseAddr));
911 }
912 }
913
914 return NoFault;
915 };
916
917 Fault
918 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
919 int &latency)
920 {
921 bool delayedResponse;
922
923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
924 latency);
925 }
926
927 void
928 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
929 Translation *translation, Mode mode, int &latency)
930 {
931 bool delayedResponse;
932 assert(translation);
933
934 Fault fault = GpuTLB::translate(req, tc, translation, mode,
935 delayedResponse, true, latency);
936
937 if (!delayedResponse)
938 translation->finish(fault, req, tc, mode);
939 }
940
941 Walker*
942 GpuTLB::getWalker()
943 {
944 return walker;
945 }
946
947
948 void
949 GpuTLB::serialize(CheckpointOut &cp) const
950 {
951 }
952
953 void
954 GpuTLB::unserialize(CheckpointIn &cp)
955 {
956 }
957
958 void
959 GpuTLB::regStats()
960 {
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62 GpuTLB::GpuTLB(const Params *p)
63 : MemObject(p), configAddress(0), size(p->size),
64 cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
65 {
66 assoc = p->assoc;
67 assert(assoc <= size);
68 numSets = size/assoc;
69 allocationPolicy = p->allocationPolicy;
70 hasMemSidePort = false;
71 accessDistance = p->accessDistance;
72 clock = p->clk_domain->clockPeriod();
73
74 tlb = new GpuTlbEntry[size];
75 std::memset(tlb, 0, sizeof(GpuTlbEntry) * size);
76
77 freeList.resize(numSets);
78 entryList.resize(numSets);
79
80 for (int set = 0; set < numSets; ++set) {
81 for (int way = 0; way < assoc; ++way) {
82 int x = set*assoc + way;
83 freeList[set].push_back(&tlb[x]);
84 }
85 }
86
87 FA = (size == assoc);
88
89 /**
90 * @warning: the set-associative version assumes you have a
91 * fixed page size of 4KB.
92 * If the page size is greather than 4KB (as defined in the
93 * TheISA::PageBytes), then there are various issues w/ the current
94 * implementation (you'd have the same 8KB page being replicated in
95 * different sets etc)
96 */
97 setMask = numSets - 1;
98
99 #if 0
100 // GpuTLB doesn't yet support full system
101 walker = p->walker;
102 walker->setTLB(this);
103 #endif
104
105 maxCoalescedReqs = p->maxOutstandingReqs;
106
107 // Do not allow maxCoalescedReqs to be more than the TLB associativity
108 if (maxCoalescedReqs > assoc) {
109 maxCoalescedReqs = assoc;
110 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
111 }
112
113 outstandingReqs = 0;
114 hitLatency = p->hitLatency;
115 missLatency1 = p->missLatency1;
116 missLatency2 = p->missLatency2;
117
118 // create the slave ports based on the number of connected ports
119 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
120 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
121 name(), i), this, i));
122 }
123
124 // create the master ports based on the number of connected ports
125 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
126 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
127 name(), i), this, i));
128 }
129 }
130
131 // fixme: this is never called?
132 GpuTLB::~GpuTLB()
133 {
134 // make sure all the hash-maps are empty
135 assert(translationReturnEvent.empty());
136
137 // delete the TLB
138 delete[] tlb;
139 }
140
141 BaseSlavePort&
142 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
143 {
144 if (if_name == "slave") {
145 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
146 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
147 }
148
149 return *cpuSidePort[idx];
150 } else {
151 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
152 }
153 }
154
155 BaseMasterPort&
156 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
157 {
158 if (if_name == "master") {
159 if (idx >= static_cast<PortID>(memSidePort.size())) {
160 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
161 }
162
163 hasMemSidePort = true;
164
165 return *memSidePort[idx];
166 } else {
167 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
168 }
169 }
170
171 GpuTlbEntry*
172 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
173 {
174 GpuTlbEntry *newEntry = nullptr;
175
176 /**
177 * vpn holds the virtual page address
178 * The least significant bits are simply masked
179 */
180 int set = (vpn >> TheISA::PageShift) & setMask;
181
182 if (!freeList[set].empty()) {
183 newEntry = freeList[set].front();
184 freeList[set].pop_front();
185 } else {
186 newEntry = entryList[set].back();
187 entryList[set].pop_back();
188 }
189
190 *newEntry = entry;
191 newEntry->vaddr = vpn;
192 entryList[set].push_front(newEntry);
193
194 return newEntry;
195 }
196
197 GpuTLB::EntryList::iterator
198 GpuTLB::lookupIt(Addr va, bool update_lru)
199 {
200 int set = (va >> TheISA::PageShift) & setMask;
201
202 if (FA) {
203 assert(!set);
204 }
205
206 auto entry = entryList[set].begin();
207 for (; entry != entryList[set].end(); ++entry) {
208 int page_size = (*entry)->size();
209
210 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
211 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
212 "with size %#x.\n", va, (*entry)->vaddr, page_size);
213
214 if (update_lru) {
215 entryList[set].push_front(*entry);
216 entryList[set].erase(entry);
217 entry = entryList[set].begin();
218 }
219
220 break;
221 }
222 }
223
224 return entry;
225 }
226
227 GpuTlbEntry*
228 GpuTLB::lookup(Addr va, bool update_lru)
229 {
230 int set = (va >> TheISA::PageShift) & setMask;
231
232 auto entry = lookupIt(va, update_lru);
233
234 if (entry == entryList[set].end())
235 return nullptr;
236 else
237 return *entry;
238 }
239
240 void
241 GpuTLB::invalidateAll()
242 {
243 DPRINTF(GPUTLB, "Invalidating all entries.\n");
244
245 for (int i = 0; i < numSets; ++i) {
246 while (!entryList[i].empty()) {
247 GpuTlbEntry *entry = entryList[i].front();
248 entryList[i].pop_front();
249 freeList[i].push_back(entry);
250 }
251 }
252 }
253
254 void
255 GpuTLB::setConfigAddress(uint32_t addr)
256 {
257 configAddress = addr;
258 }
259
260 void
261 GpuTLB::invalidateNonGlobal()
262 {
263 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
264
265 for (int i = 0; i < numSets; ++i) {
266 for (auto entryIt = entryList[i].begin();
267 entryIt != entryList[i].end();) {
268 if (!(*entryIt)->global) {
269 freeList[i].push_back(*entryIt);
270 entryList[i].erase(entryIt++);
271 } else {
272 ++entryIt;
273 }
274 }
275 }
276 }
277
278 void
279 GpuTLB::demapPage(Addr va, uint64_t asn)
280 {
281
282 int set = (va >> TheISA::PageShift) & setMask;
283 auto entry = lookupIt(va, false);
284
285 if (entry != entryList[set].end()) {
286 freeList[set].push_back(*entry);
287 entryList[set].erase(entry);
288 }
289 }
290
291 Fault
292 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
293 {
294 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
295 Addr vaddr = req->getVaddr();
296 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
297
298 if (prefix == IntAddrPrefixCPUID) {
299 panic("CPUID memory space not yet implemented!\n");
300 } else if (prefix == IntAddrPrefixMSR) {
301 vaddr = vaddr >> 3;
302 req->setFlags(Request::MMAPPED_IPR);
303 Addr regNum = 0;
304
305 switch (vaddr & ~IntAddrPrefixMask) {
306 case 0x10:
307 regNum = MISCREG_TSC;
308 break;
309 case 0x1B:
310 regNum = MISCREG_APIC_BASE;
311 break;
312 case 0xFE:
313 regNum = MISCREG_MTRRCAP;
314 break;
315 case 0x174:
316 regNum = MISCREG_SYSENTER_CS;
317 break;
318 case 0x175:
319 regNum = MISCREG_SYSENTER_ESP;
320 break;
321 case 0x176:
322 regNum = MISCREG_SYSENTER_EIP;
323 break;
324 case 0x179:
325 regNum = MISCREG_MCG_CAP;
326 break;
327 case 0x17A:
328 regNum = MISCREG_MCG_STATUS;
329 break;
330 case 0x17B:
331 regNum = MISCREG_MCG_CTL;
332 break;
333 case 0x1D9:
334 regNum = MISCREG_DEBUG_CTL_MSR;
335 break;
336 case 0x1DB:
337 regNum = MISCREG_LAST_BRANCH_FROM_IP;
338 break;
339 case 0x1DC:
340 regNum = MISCREG_LAST_BRANCH_TO_IP;
341 break;
342 case 0x1DD:
343 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
344 break;
345 case 0x1DE:
346 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
347 break;
348 case 0x200:
349 regNum = MISCREG_MTRR_PHYS_BASE_0;
350 break;
351 case 0x201:
352 regNum = MISCREG_MTRR_PHYS_MASK_0;
353 break;
354 case 0x202:
355 regNum = MISCREG_MTRR_PHYS_BASE_1;
356 break;
357 case 0x203:
358 regNum = MISCREG_MTRR_PHYS_MASK_1;
359 break;
360 case 0x204:
361 regNum = MISCREG_MTRR_PHYS_BASE_2;
362 break;
363 case 0x205:
364 regNum = MISCREG_MTRR_PHYS_MASK_2;
365 break;
366 case 0x206:
367 regNum = MISCREG_MTRR_PHYS_BASE_3;
368 break;
369 case 0x207:
370 regNum = MISCREG_MTRR_PHYS_MASK_3;
371 break;
372 case 0x208:
373 regNum = MISCREG_MTRR_PHYS_BASE_4;
374 break;
375 case 0x209:
376 regNum = MISCREG_MTRR_PHYS_MASK_4;
377 break;
378 case 0x20A:
379 regNum = MISCREG_MTRR_PHYS_BASE_5;
380 break;
381 case 0x20B:
382 regNum = MISCREG_MTRR_PHYS_MASK_5;
383 break;
384 case 0x20C:
385 regNum = MISCREG_MTRR_PHYS_BASE_6;
386 break;
387 case 0x20D:
388 regNum = MISCREG_MTRR_PHYS_MASK_6;
389 break;
390 case 0x20E:
391 regNum = MISCREG_MTRR_PHYS_BASE_7;
392 break;
393 case 0x20F:
394 regNum = MISCREG_MTRR_PHYS_MASK_7;
395 break;
396 case 0x250:
397 regNum = MISCREG_MTRR_FIX_64K_00000;
398 break;
399 case 0x258:
400 regNum = MISCREG_MTRR_FIX_16K_80000;
401 break;
402 case 0x259:
403 regNum = MISCREG_MTRR_FIX_16K_A0000;
404 break;
405 case 0x268:
406 regNum = MISCREG_MTRR_FIX_4K_C0000;
407 break;
408 case 0x269:
409 regNum = MISCREG_MTRR_FIX_4K_C8000;
410 break;
411 case 0x26A:
412 regNum = MISCREG_MTRR_FIX_4K_D0000;
413 break;
414 case 0x26B:
415 regNum = MISCREG_MTRR_FIX_4K_D8000;
416 break;
417 case 0x26C:
418 regNum = MISCREG_MTRR_FIX_4K_E0000;
419 break;
420 case 0x26D:
421 regNum = MISCREG_MTRR_FIX_4K_E8000;
422 break;
423 case 0x26E:
424 regNum = MISCREG_MTRR_FIX_4K_F0000;
425 break;
426 case 0x26F:
427 regNum = MISCREG_MTRR_FIX_4K_F8000;
428 break;
429 case 0x277:
430 regNum = MISCREG_PAT;
431 break;
432 case 0x2FF:
433 regNum = MISCREG_DEF_TYPE;
434 break;
435 case 0x400:
436 regNum = MISCREG_MC0_CTL;
437 break;
438 case 0x404:
439 regNum = MISCREG_MC1_CTL;
440 break;
441 case 0x408:
442 regNum = MISCREG_MC2_CTL;
443 break;
444 case 0x40C:
445 regNum = MISCREG_MC3_CTL;
446 break;
447 case 0x410:
448 regNum = MISCREG_MC4_CTL;
449 break;
450 case 0x414:
451 regNum = MISCREG_MC5_CTL;
452 break;
453 case 0x418:
454 regNum = MISCREG_MC6_CTL;
455 break;
456 case 0x41C:
457 regNum = MISCREG_MC7_CTL;
458 break;
459 case 0x401:
460 regNum = MISCREG_MC0_STATUS;
461 break;
462 case 0x405:
463 regNum = MISCREG_MC1_STATUS;
464 break;
465 case 0x409:
466 regNum = MISCREG_MC2_STATUS;
467 break;
468 case 0x40D:
469 regNum = MISCREG_MC3_STATUS;
470 break;
471 case 0x411:
472 regNum = MISCREG_MC4_STATUS;
473 break;
474 case 0x415:
475 regNum = MISCREG_MC5_STATUS;
476 break;
477 case 0x419:
478 regNum = MISCREG_MC6_STATUS;
479 break;
480 case 0x41D:
481 regNum = MISCREG_MC7_STATUS;
482 break;
483 case 0x402:
484 regNum = MISCREG_MC0_ADDR;
485 break;
486 case 0x406:
487 regNum = MISCREG_MC1_ADDR;
488 break;
489 case 0x40A:
490 regNum = MISCREG_MC2_ADDR;
491 break;
492 case 0x40E:
493 regNum = MISCREG_MC3_ADDR;
494 break;
495 case 0x412:
496 regNum = MISCREG_MC4_ADDR;
497 break;
498 case 0x416:
499 regNum = MISCREG_MC5_ADDR;
500 break;
501 case 0x41A:
502 regNum = MISCREG_MC6_ADDR;
503 break;
504 case 0x41E:
505 regNum = MISCREG_MC7_ADDR;
506 break;
507 case 0x403:
508 regNum = MISCREG_MC0_MISC;
509 break;
510 case 0x407:
511 regNum = MISCREG_MC1_MISC;
512 break;
513 case 0x40B:
514 regNum = MISCREG_MC2_MISC;
515 break;
516 case 0x40F:
517 regNum = MISCREG_MC3_MISC;
518 break;
519 case 0x413:
520 regNum = MISCREG_MC4_MISC;
521 break;
522 case 0x417:
523 regNum = MISCREG_MC5_MISC;
524 break;
525 case 0x41B:
526 regNum = MISCREG_MC6_MISC;
527 break;
528 case 0x41F:
529 regNum = MISCREG_MC7_MISC;
530 break;
531 case 0xC0000080:
532 regNum = MISCREG_EFER;
533 break;
534 case 0xC0000081:
535 regNum = MISCREG_STAR;
536 break;
537 case 0xC0000082:
538 regNum = MISCREG_LSTAR;
539 break;
540 case 0xC0000083:
541 regNum = MISCREG_CSTAR;
542 break;
543 case 0xC0000084:
544 regNum = MISCREG_SF_MASK;
545 break;
546 case 0xC0000100:
547 regNum = MISCREG_FS_BASE;
548 break;
549 case 0xC0000101:
550 regNum = MISCREG_GS_BASE;
551 break;
552 case 0xC0000102:
553 regNum = MISCREG_KERNEL_GS_BASE;
554 break;
555 case 0xC0000103:
556 regNum = MISCREG_TSC_AUX;
557 break;
558 case 0xC0010000:
559 regNum = MISCREG_PERF_EVT_SEL0;
560 break;
561 case 0xC0010001:
562 regNum = MISCREG_PERF_EVT_SEL1;
563 break;
564 case 0xC0010002:
565 regNum = MISCREG_PERF_EVT_SEL2;
566 break;
567 case 0xC0010003:
568 regNum = MISCREG_PERF_EVT_SEL3;
569 break;
570 case 0xC0010004:
571 regNum = MISCREG_PERF_EVT_CTR0;
572 break;
573 case 0xC0010005:
574 regNum = MISCREG_PERF_EVT_CTR1;
575 break;
576 case 0xC0010006:
577 regNum = MISCREG_PERF_EVT_CTR2;
578 break;
579 case 0xC0010007:
580 regNum = MISCREG_PERF_EVT_CTR3;
581 break;
582 case 0xC0010010:
583 regNum = MISCREG_SYSCFG;
584 break;
585 case 0xC0010016:
586 regNum = MISCREG_IORR_BASE0;
587 break;
588 case 0xC0010017:
589 regNum = MISCREG_IORR_BASE1;
590 break;
591 case 0xC0010018:
592 regNum = MISCREG_IORR_MASK0;
593 break;
594 case 0xC0010019:
595 regNum = MISCREG_IORR_MASK1;
596 break;
597 case 0xC001001A:
598 regNum = MISCREG_TOP_MEM;
599 break;
600 case 0xC001001D:
601 regNum = MISCREG_TOP_MEM2;
602 break;
603 case 0xC0010114:
604 regNum = MISCREG_VM_CR;
605 break;
606 case 0xC0010115:
607 regNum = MISCREG_IGNNE;
608 break;
609 case 0xC0010116:
610 regNum = MISCREG_SMM_CTL;
611 break;
612 case 0xC0010117:
613 regNum = MISCREG_VM_HSAVE_PA;
614 break;
615 default:
616 return std::make_shared<GeneralProtection>(0);
617 }
618 //The index is multiplied by the size of a MiscReg so that
619 //any memory dependence calculations will not see these as
620 //overlapping.
621 req->setPaddr(regNum * sizeof(MiscReg));
622 return NoFault;
623 } else if (prefix == IntAddrPrefixIO) {
624 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
625 // bitmap in the TSS.
626
627 Addr IOPort = vaddr & ~IntAddrPrefixMask;
628 // Make sure the address fits in the expected 16 bit IO address
629 // space.
630 assert(!(IOPort & ~0xFFFF));
631
632 if (IOPort == 0xCF8 && req->getSize() == 4) {
633 req->setFlags(Request::MMAPPED_IPR);
634 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
635 } else if ((IOPort & ~mask(2)) == 0xCFC) {
636 req->setFlags(Request::UNCACHEABLE);
637
638 Addr configAddress =
639 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
640
641 if (bits(configAddress, 31, 31)) {
642 req->setPaddr(PhysAddrPrefixPciConfig |
643 mbits(configAddress, 30, 2) |
644 (IOPort & mask(2)));
645 } else {
646 req->setPaddr(PhysAddrPrefixIO | IOPort);
647 }
648 } else {
649 req->setFlags(Request::UNCACHEABLE);
650 req->setPaddr(PhysAddrPrefixIO | IOPort);
651 }
652 return NoFault;
653 } else {
654 panic("Access to unrecognized internal address space %#x.\n",
655 prefix);
656 }
657 }
658
659 /**
660 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
661 * and false on a TLB miss.
662 * Many of the checks about different modes have been converted to
663 * assertions, since these parts of the code are not really used.
664 * On a hit it will update the LRU stack.
665 */
666 bool
667 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
668 {
669 bool tlb_hit = false;
670 #ifndef NDEBUG
671 uint32_t flags = req->getFlags();
672 int seg = flags & SegmentFlagMask;
673 #endif
674
675 assert(seg != SEGMENT_REG_MS);
676 Addr vaddr = req->getVaddr();
677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
679
680 if (m5Reg.prot) {
681 DPRINTF(GPUTLB, "In protected mode.\n");
682 // make sure we are in 64-bit mode
683 assert(m5Reg.mode == LongMode);
684
685 // If paging is enabled, do the translation.
686 if (m5Reg.paging) {
687 DPRINTF(GPUTLB, "Paging enabled.\n");
688 //update LRU stack on a hit
689 GpuTlbEntry *entry = lookup(vaddr, true);
690
691 if (entry)
692 tlb_hit = true;
693
694 if (!update_stats) {
695 // functional tlb access for memory initialization
696 // i.e., memory seeding or instr. seeding -> don't update
697 // TLB and stats
698 return tlb_hit;
699 }
700
701 localNumTLBAccesses++;
702
703 if (!entry) {
704 localNumTLBMisses++;
705 } else {
706 localNumTLBHits++;
707 }
708 }
709 }
710
711 return tlb_hit;
712 }
713
714 Fault
715 GpuTLB::translate(RequestPtr req, ThreadContext *tc,
716 Translation *translation, Mode mode,
717 bool &delayedResponse, bool timing, int &latency)
718 {
719 uint32_t flags = req->getFlags();
720 int seg = flags & SegmentFlagMask;
721 bool storeCheck = flags & (StoreCheck << FlagShift);
722
723 // If this is true, we're dealing with a request
724 // to a non-memory address space.
725 if (seg == SEGMENT_REG_MS) {
726 return translateInt(req, tc);
727 }
728
729 delayedResponse = false;
730 Addr vaddr = req->getVaddr();
731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
732
733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734
735 // If protected mode has been enabled...
736 if (m5Reg.prot) {
737 DPRINTF(GPUTLB, "In protected mode.\n");
738 // If we're not in 64-bit mode, do protection/limit checks
739 if (m5Reg.mode != LongMode) {
740 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
741 "protection.\n");
742
743 // Check for a null segment selector.
744 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
745 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
747 return std::make_shared<GeneralProtection>(0);
748 }
749
750 bool expandDown = false;
751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
752
753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
754 if (!attr.writable && (mode == BaseTLB::Write ||
755 storeCheck))
756 return std::make_shared<GeneralProtection>(0);
757
758 if (!attr.readable && mode == BaseTLB::Read)
759 return std::make_shared<GeneralProtection>(0);
760
761 expandDown = attr.expandDown;
762
763 }
764
765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
767 // This assumes we're not in 64 bit mode. If we were, the
768 // default address size is 64 bits, overridable to 32.
769 int size = 32;
770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
772
773 if ((csAttr.defaultSize && sizeOverride) ||
774 (!csAttr.defaultSize && !sizeOverride)) {
775 size = 16;
776 }
777
778 Addr offset = bits(vaddr - base, size - 1, 0);
779 Addr endOffset = offset + req->getSize() - 1;
780
781 if (expandDown) {
782 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
783 warn_once("Expand down segments are untested.\n");
784
785 if (offset <= limit || endOffset <= limit)
786 return std::make_shared<GeneralProtection>(0);
787 } else {
788 if (offset > limit || endOffset > limit)
789 return std::make_shared<GeneralProtection>(0);
790 }
791 }
792
793 // If paging is enabled, do the translation.
794 if (m5Reg.paging) {
795 DPRINTF(GPUTLB, "Paging enabled.\n");
796 // The vaddr already has the segment base applied.
797 GpuTlbEntry *entry = lookup(vaddr);
798 localNumTLBAccesses++;
799
800 if (!entry) {
801 localNumTLBMisses++;
802 if (timing) {
803 latency = missLatency1;
804 }
805
806 if (FullSystem) {
807 fatal("GpuTLB doesn't support full-system mode\n");
808 } else {
809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
810 "at pc %#x.\n", vaddr, tc->instAddr());
811
812 Process *p = tc->getProcessPtr();
813 GpuTlbEntry newEntry;
814 bool success = p->pTable->lookup(vaddr, newEntry);
815
816 if (!success && mode != BaseTLB::Execute) {
817 // penalize a "page fault" more
818 if (timing) {
819 latency += missLatency2;
820 }
821
822 if (p->fixupStackFault(vaddr))
823 success = p->pTable->lookup(vaddr, newEntry);
824 }
825
826 if (!success) {
827 return std::make_shared<PageFault>(vaddr, true,
828 mode, true,
829 false);
830 } else {
831 newEntry.valid = success;
832 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
833
834 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
835 alignedVaddr, newEntry.pageStart());
836
837 entry = insert(alignedVaddr, newEntry);
838 }
839
840 DPRINTF(GPUTLB, "Miss was serviced.\n");
841 }
842 } else {
843 localNumTLBHits++;
844
845 if (timing) {
846 latency = hitLatency;
847 }
848 }
849
850 // Do paging protection checks.
851 bool inUser = (m5Reg.cpl == 3 &&
852 !(flags & (CPL0FlagBit << FlagShift)));
853
854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
855 bool badWrite = (!entry->writable && (inUser || cr0.wp));
856
857 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
858 badWrite)) {
859 // The page must have been present to get into the TLB in
860 // the first place. We'll assume the reserved bits are
861 // fine even though we're not checking them.
862 return std::make_shared<PageFault>(vaddr, true, mode,
863 inUser, false);
864 }
865
866 if (storeCheck && badWrite) {
867 // This would fault if this were a write, so return a page
868 // fault that reflects that happening.
869 return std::make_shared<PageFault>(vaddr, true,
870 BaseTLB::Write,
871 inUser, false);
872 }
873
874
875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
876 "checks.\n", entry->paddr);
877
878 int page_size = entry->size();
879 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
881 req->setPaddr(paddr);
882
883 if (entry->uncacheable)
884 req->setFlags(Request::UNCACHEABLE);
885 } else {
886 //Use the address which already has segmentation applied.
887 DPRINTF(GPUTLB, "Paging disabled.\n");
888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
889 req->setPaddr(vaddr);
890 }
891 } else {
892 // Real mode
893 DPRINTF(GPUTLB, "In real mode.\n");
894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
895 req->setPaddr(vaddr);
896 }
897
898 // Check for an access to the local APIC
899 if (FullSystem) {
900 LocalApicBase localApicBase =
901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
902
903 Addr baseAddr = localApicBase.base * PageBytes;
904 Addr paddr = req->getPaddr();
905
906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907 // Force the access to be uncacheable.
908 req->setFlags(Request::UNCACHEABLE);
909 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
910 paddr - baseAddr));
911 }
912 }
913
914 return NoFault;
915 };
916
917 Fault
918 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
919 int &latency)
920 {
921 bool delayedResponse;
922
923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
924 latency);
925 }
926
927 void
928 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
929 Translation *translation, Mode mode, int &latency)
930 {
931 bool delayedResponse;
932 assert(translation);
933
934 Fault fault = GpuTLB::translate(req, tc, translation, mode,
935 delayedResponse, true, latency);
936
937 if (!delayedResponse)
938 translation->finish(fault, req, tc, mode);
939 }
940
941 Walker*
942 GpuTLB::getWalker()
943 {
944 return walker;
945 }
946
947
948 void
949 GpuTLB::serialize(CheckpointOut &cp) const
950 {
951 }
952
953 void
954 GpuTLB::unserialize(CheckpointIn &cp)
955 {
956 }
957
958 void
959 GpuTLB::regStats()
960 {
961 MemObject::regStats();
962
961 localNumTLBAccesses
962 .name(name() + ".local_TLB_accesses")
963 .desc("Number of TLB accesses")
964 ;
965
966 localNumTLBHits
967 .name(name() + ".local_TLB_hits")
968 .desc("Number of TLB hits")
969 ;
970
971 localNumTLBMisses
972 .name(name() + ".local_TLB_misses")
973 .desc("Number of TLB misses")
974 ;
975
976 localTLBMissRate
977 .name(name() + ".local_TLB_miss_rate")
978 .desc("TLB miss rate")
979 ;
980
981 accessCycles
982 .name(name() + ".access_cycles")
983 .desc("Cycles spent accessing this TLB level")
984 ;
985
986 pageTableCycles
987 .name(name() + ".page_table_cycles")
988 .desc("Cycles spent accessing the page table")
989 ;
990
991 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
992
993 numUniquePages
994 .name(name() + ".unique_pages")
995 .desc("Number of unique pages touched")
996 ;
997
998 localCycles
999 .name(name() + ".local_cycles")
1000 .desc("Number of cycles spent in queue for all incoming reqs")
1001 ;
1002
1003 localLatency
1004 .name(name() + ".local_latency")
1005 .desc("Avg. latency over incoming coalesced reqs")
1006 ;
1007
1008 localLatency = localCycles / localNumTLBAccesses;
1009
1010 globalNumTLBAccesses
1011 .name(name() + ".global_TLB_accesses")
1012 .desc("Number of TLB accesses")
1013 ;
1014
1015 globalNumTLBHits
1016 .name(name() + ".global_TLB_hits")
1017 .desc("Number of TLB hits")
1018 ;
1019
1020 globalNumTLBMisses
1021 .name(name() + ".global_TLB_misses")
1022 .desc("Number of TLB misses")
1023 ;
1024
1025 globalTLBMissRate
1026 .name(name() + ".global_TLB_miss_rate")
1027 .desc("TLB miss rate")
1028 ;
1029
1030 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1031
1032 avgReuseDistance
1033 .name(name() + ".avg_reuse_distance")
1034 .desc("avg. reuse distance over all pages (in ticks)")
1035 ;
1036
1037 }
1038
1039 /**
1040 * Do the TLB lookup for this coalesced request and schedule
1041 * another event <TLB access latency> cycles later.
1042 */
1043
1044 void
1045 GpuTLB::issueTLBLookup(PacketPtr pkt)
1046 {
1047 assert(pkt);
1048 assert(pkt->senderState);
1049
1050 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1051 TheISA::PageBytes);
1052
1053 TranslationState *sender_state =
1054 safe_cast<TranslationState*>(pkt->senderState);
1055
1056 bool update_stats = !sender_state->prefetch;
1057 ThreadContext * tmp_tc = sender_state->tc;
1058
1059 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1060 virt_page_addr);
1061
1062 int req_cnt = sender_state->reqCnt.back();
1063
1064 if (update_stats) {
1065 accessCycles -= (curTick() * req_cnt);
1066 localCycles -= curTick();
1067 updatePageFootprint(virt_page_addr);
1068 globalNumTLBAccesses += req_cnt;
1069 }
1070
1071 tlbOutcome lookup_outcome = TLB_MISS;
1072 RequestPtr tmp_req = pkt->req;
1073
1074 // Access the TLB and figure out if it's a hit or a miss.
1075 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1076
1077 if (success) {
1078 lookup_outcome = TLB_HIT;
1079 // Put the entry in SenderState
1080 GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1081 assert(entry);
1082
1083 sender_state->tlbEntry =
1084 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1085
1086 if (update_stats) {
1087 // the reqCnt has an entry per level, so its size tells us
1088 // which level we are in
1089 sender_state->hitLevel = sender_state->reqCnt.size();
1090 globalNumTLBHits += req_cnt;
1091 }
1092 } else {
1093 if (update_stats)
1094 globalNumTLBMisses += req_cnt;
1095 }
1096
1097 /*
1098 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1099 * as the TLB access latency.
1100 *
1101 * We create and schedule a new TLBEvent which will help us take the
1102 * appropriate actions (e.g., update TLB on a hit, send request to lower
1103 * level TLB on a miss, or start a page walk if this was the last-level
1104 * TLB)
1105 */
1106 TLBEvent *tlb_event =
1107 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1108
1109 if (translationReturnEvent.count(virt_page_addr)) {
1110 panic("Virtual Page Address %#x already has a return event\n",
1111 virt_page_addr);
1112 }
1113
1114 translationReturnEvent[virt_page_addr] = tlb_event;
1115 assert(tlb_event);
1116
1117 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1118 curTick() + this->ticks(hitLatency));
1119
1120 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1121 }
1122
1123 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1124 PacketPtr _pkt)
1125 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1126 outcome(tlb_outcome), pkt(_pkt)
1127 {
1128 }
1129
1130 /**
1131 * Do Paging protection checks. If we encounter a page fault, then
1132 * an assertion is fired.
1133 */
1134 void
1135 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1136 GpuTlbEntry * tlb_entry, Mode mode)
1137 {
1138 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1139 uint32_t flags = pkt->req->getFlags();
1140 bool storeCheck = flags & (StoreCheck << FlagShift);
1141
1142 // Do paging protection checks.
1143 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1144 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1145
1146 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1147
1148 if ((inUser && !tlb_entry->user) ||
1149 (mode == BaseTLB::Write && badWrite)) {
1150 // The page must have been present to get into the TLB in
1151 // the first place. We'll assume the reserved bits are
1152 // fine even though we're not checking them.
1153 assert(false);
1154 }
1155
1156 if (storeCheck && badWrite) {
1157 // This would fault if this were a write, so return a page
1158 // fault that reflects that happening.
1159 assert(false);
1160 }
1161 }
1162
1163 /**
1164 * handleTranslationReturn is called on a TLB hit,
1165 * when a TLB miss returns or when a page fault returns.
1166 * The latter calls handelHit with TLB miss as tlbOutcome.
1167 */
1168 void
1169 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1170 PacketPtr pkt)
1171 {
1172
1173 assert(pkt);
1174 Addr vaddr = pkt->req->getVaddr();
1175
1176 TranslationState *sender_state =
1177 safe_cast<TranslationState*>(pkt->senderState);
1178
1179 ThreadContext *tc = sender_state->tc;
1180 Mode mode = sender_state->tlbMode;
1181
1182 GpuTlbEntry *local_entry, *new_entry;
1183
1184 if (tlb_outcome == TLB_HIT) {
1185 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1186 local_entry = sender_state->tlbEntry;
1187 } else {
1188 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1189 vaddr);
1190
1191 // We are returning either from a page walk or from a hit at a lower
1192 // TLB level. The senderState should be "carrying" a pointer to the
1193 // correct TLBEntry.
1194 new_entry = sender_state->tlbEntry;
1195 assert(new_entry);
1196 local_entry = new_entry;
1197
1198 if (allocationPolicy) {
1199 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1200 virt_page_addr);
1201
1202 local_entry = insert(virt_page_addr, *new_entry);
1203 }
1204
1205 assert(local_entry);
1206 }
1207
1208 /**
1209 * At this point the packet carries an up-to-date tlbEntry pointer
1210 * in its senderState.
1211 * Next step is to do the paging protection checks.
1212 */
1213 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1214 "while paddr was %#x.\n", local_entry->vaddr,
1215 local_entry->paddr);
1216
1217 pagingProtectionChecks(tc, pkt, local_entry, mode);
1218 int page_size = local_entry->size();
1219 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1220 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1221
1222 // Since this packet will be sent through the cpu side slave port,
1223 // it must be converted to a response pkt if it is not one already
1224 if (pkt->isRequest()) {
1225 pkt->makeTimingResponse();
1226 }
1227
1228 pkt->req->setPaddr(paddr);
1229
1230 if (local_entry->uncacheable) {
1231 pkt->req->setFlags(Request::UNCACHEABLE);
1232 }
1233
1234 //send packet back to coalescer
1235 cpuSidePort[0]->sendTimingResp(pkt);
1236 //schedule cleanup event
1237 cleanupQueue.push(virt_page_addr);
1238
1239 // schedule this only once per cycle.
1240 // The check is required because we might have multiple translations
1241 // returning the same cycle
1242 // this is a maximum priority event and must be on the same cycle
1243 // as the cleanup event in TLBCoalescer to avoid a race with
1244 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1245 if (!cleanupEvent.scheduled())
1246 schedule(cleanupEvent, curTick());
1247 }
1248
1249 /**
1250 * Here we take the appropriate actions based on the result of the
1251 * TLB lookup.
1252 */
1253 void
1254 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1255 PacketPtr pkt)
1256 {
1257 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1258
1259 assert(translationReturnEvent[virtPageAddr]);
1260 assert(pkt);
1261
1262 TranslationState *tmp_sender_state =
1263 safe_cast<TranslationState*>(pkt->senderState);
1264
1265 int req_cnt = tmp_sender_state->reqCnt.back();
1266 bool update_stats = !tmp_sender_state->prefetch;
1267
1268
1269 if (outcome == TLB_HIT) {
1270 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1271
1272 if (update_stats) {
1273 accessCycles += (req_cnt * curTick());
1274 localCycles += curTick();
1275 }
1276
1277 } else if (outcome == TLB_MISS) {
1278
1279 DPRINTF(GPUTLB, "This is a TLB miss\n");
1280 if (update_stats) {
1281 accessCycles += (req_cnt*curTick());
1282 localCycles += curTick();
1283 }
1284
1285 if (hasMemSidePort) {
1286 // the one cyle added here represent the delay from when we get
1287 // the reply back till when we propagate it to the coalescer
1288 // above.
1289 if (update_stats) {
1290 accessCycles += (req_cnt * 1);
1291 localCycles += 1;
1292 }
1293
1294 /**
1295 * There is a TLB below. Send the coalesced request.
1296 * We actually send the very first packet of all the
1297 * pending packets for this virtual page address.
1298 */
1299 if (!memSidePort[0]->sendTimingReq(pkt)) {
1300 DPRINTF(GPUTLB, "Failed sending translation request to "
1301 "lower level TLB for addr %#x\n", virtPageAddr);
1302
1303 memSidePort[0]->retries.push_back(pkt);
1304 } else {
1305 DPRINTF(GPUTLB, "Sent translation request to lower level "
1306 "TLB for addr %#x\n", virtPageAddr);
1307 }
1308 } else {
1309 //this is the last level TLB. Start a page walk
1310 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1311 "addr %#x\n", virtPageAddr);
1312
1313 if (update_stats)
1314 pageTableCycles -= (req_cnt*curTick());
1315
1316 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1317 assert(tlb_event);
1318 tlb_event->updateOutcome(PAGE_WALK);
1319 schedule(tlb_event, curTick() + ticks(missLatency2));
1320 }
1321 } else if (outcome == PAGE_WALK) {
1322 if (update_stats)
1323 pageTableCycles += (req_cnt*curTick());
1324
1325 // Need to access the page table and update the TLB
1326 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1327 virtPageAddr);
1328
1329 TranslationState *sender_state =
1330 safe_cast<TranslationState*>(pkt->senderState);
1331
1332 Process *p = sender_state->tc->getProcessPtr();
1333 TlbEntry newEntry;
1334 Addr vaddr = pkt->req->getVaddr();
1335 #ifndef NDEBUG
1336 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1337 assert(alignedVaddr == virtPageAddr);
1338 #endif
1339 bool success;
1340 success = p->pTable->lookup(vaddr, newEntry);
1341 if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1342 if (p->fixupStackFault(vaddr)) {
1343 success = p->pTable->lookup(vaddr, newEntry);
1344 }
1345 }
1346
1347 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1348 newEntry.pageStart());
1349
1350 sender_state->tlbEntry =
1351 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1352
1353 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1354 } else if (outcome == MISS_RETURN) {
1355 /** we add an extra cycle in the return path of the translation
1356 * requests in between the various TLB levels.
1357 */
1358 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1359 } else {
1360 assert(false);
1361 }
1362 }
1363
1364 void
1365 GpuTLB::TLBEvent::process()
1366 {
1367 tlb->translationReturn(virtPageAddr, outcome, pkt);
1368 }
1369
1370 const char*
1371 GpuTLB::TLBEvent::description() const
1372 {
1373 return "trigger translationDoneEvent";
1374 }
1375
1376 void
1377 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1378 {
1379 outcome = _outcome;
1380 }
1381
1382 Addr
1383 GpuTLB::TLBEvent::getTLBEventVaddr()
1384 {
1385 return virtPageAddr;
1386 }
1387
1388 /*
1389 * recvTiming receives a coalesced timing request from a TLBCoalescer
1390 * and it calls issueTLBLookup()
1391 * It only rejects the packet if we have exceeded the max
1392 * outstanding number of requests for the TLB
1393 */
1394 bool
1395 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1396 {
1397 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1398 tlb->issueTLBLookup(pkt);
1399 // update number of outstanding translation requests
1400 tlb->outstandingReqs++;
1401 return true;
1402 } else {
1403 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1404 tlb->outstandingReqs);
1405 return false;
1406 }
1407 }
1408
1409 /**
1410 * handleFuncTranslationReturn is called on a TLB hit,
1411 * when a TLB miss returns or when a page fault returns.
1412 * It updates LRU, inserts the TLB entry on a miss
1413 * depending on the allocation policy and does the required
1414 * protection checks. It does NOT create a new packet to
1415 * update the packet's addr; this is done in hsail-gpu code.
1416 */
1417 void
1418 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1419 {
1420 TranslationState *sender_state =
1421 safe_cast<TranslationState*>(pkt->senderState);
1422
1423 ThreadContext *tc = sender_state->tc;
1424 Mode mode = sender_state->tlbMode;
1425 Addr vaddr = pkt->req->getVaddr();
1426
1427 GpuTlbEntry *local_entry, *new_entry;
1428
1429 if (tlb_outcome == TLB_HIT) {
1430 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1431 "%#x\n", vaddr);
1432
1433 local_entry = sender_state->tlbEntry;
1434 } else {
1435 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1436 "%#x\n", vaddr);
1437
1438 // We are returning either from a page walk or from a hit at a lower
1439 // TLB level. The senderState should be "carrying" a pointer to the
1440 // correct TLBEntry.
1441 new_entry = sender_state->tlbEntry;
1442 assert(new_entry);
1443 local_entry = new_entry;
1444
1445 if (allocationPolicy) {
1446 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1447
1448 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1449 virt_page_addr);
1450
1451 local_entry = insert(virt_page_addr, *new_entry);
1452 }
1453
1454 assert(local_entry);
1455 }
1456
1457 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1458 "while paddr was %#x.\n", local_entry->vaddr,
1459 local_entry->paddr);
1460
1461 // Do paging checks if it's a normal functional access. If it's for a
1462 // prefetch, then sometimes you can try to prefetch something that won't
1463 // pass protection. We don't actually want to fault becuase there is no
1464 // demand access to deem this a violation. Just put it in the TLB and
1465 // it will fault if indeed a future demand access touches it in
1466 // violation.
1467 if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1468 pagingProtectionChecks(tc, pkt, local_entry, mode);
1469
1470 int page_size = local_entry->size();
1471 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1472 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1473
1474 pkt->req->setPaddr(paddr);
1475
1476 if (local_entry->uncacheable)
1477 pkt->req->setFlags(Request::UNCACHEABLE);
1478 }
1479
1480 // This is used for atomic translations. Need to
1481 // make it all happen during the same cycle.
1482 void
1483 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1484 {
1485 TranslationState *sender_state =
1486 safe_cast<TranslationState*>(pkt->senderState);
1487
1488 ThreadContext *tc = sender_state->tc;
1489 bool update_stats = !sender_state->prefetch;
1490
1491 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1492 TheISA::PageBytes);
1493
1494 if (update_stats)
1495 tlb->updatePageFootprint(virt_page_addr);
1496
1497 // do the TLB lookup without updating the stats
1498 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1499 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1500
1501 // functional mode means no coalescing
1502 // global metrics are the same as the local metrics
1503 if (update_stats) {
1504 tlb->globalNumTLBAccesses++;
1505
1506 if (success) {
1507 sender_state->hitLevel = sender_state->reqCnt.size();
1508 tlb->globalNumTLBHits++;
1509 }
1510 }
1511
1512 if (!success) {
1513 if (update_stats)
1514 tlb->globalNumTLBMisses++;
1515 if (tlb->hasMemSidePort) {
1516 // there is a TLB below -> propagate down the TLB hierarchy
1517 tlb->memSidePort[0]->sendFunctional(pkt);
1518 // If no valid translation from a prefetch, then just return
1519 if (sender_state->prefetch && !pkt->req->hasPaddr())
1520 return;
1521 } else {
1522 // Need to access the page table and update the TLB
1523 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1524 virt_page_addr);
1525
1526 Process *p = tc->getProcessPtr();
1527 TlbEntry newEntry;
1528
1529 Addr vaddr = pkt->req->getVaddr();
1530 #ifndef NDEBUG
1531 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1532 assert(alignedVaddr == virt_page_addr);
1533 #endif
1534
1535 bool success = p->pTable->lookup(vaddr, newEntry);
1536 if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1537 if (p->fixupStackFault(vaddr))
1538 success = p->pTable->lookup(vaddr, newEntry);
1539 }
1540
1541 if (!sender_state->prefetch) {
1542 // no PageFaults are permitted after
1543 // the second page table lookup
1544 assert(success);
1545
1546 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1547 newEntry.pageStart());
1548
1549 sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
1550 newEntry.paddr,
1551 success);
1552 } else {
1553 // If this was a prefetch, then do the normal thing if it
1554 // was a successful translation. Otherwise, send an empty
1555 // TLB entry back so that it can be figured out as empty and
1556 // handled accordingly.
1557 if (success) {
1558 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1559 newEntry.pageStart());
1560
1561 sender_state->tlbEntry = new GpuTlbEntry(0,
1562 newEntry.vaddr,
1563 newEntry.paddr,
1564 success);
1565 } else {
1566 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1567 alignedVaddr);
1568
1569 sender_state->tlbEntry = new GpuTlbEntry();
1570
1571 return;
1572 }
1573 }
1574 }
1575 } else {
1576 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1577 tlb->lookup(pkt->req->getVaddr()));
1578
1579 GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1580 update_stats);
1581
1582 assert(entry);
1583
1584 sender_state->tlbEntry =
1585 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1586 }
1587 // This is the function that would populate pkt->req with the paddr of
1588 // the translation. But if no translation happens (i.e Prefetch fails)
1589 // then the early returns in the above code wiill keep this function
1590 // from executing.
1591 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1592 }
1593
1594 void
1595 GpuTLB::CpuSidePort::recvReqRetry()
1596 {
1597 // The CPUSidePort never sends anything but replies. No retries
1598 // expected.
1599 assert(false);
1600 }
1601
1602 AddrRangeList
1603 GpuTLB::CpuSidePort::getAddrRanges() const
1604 {
1605 // currently not checked by the master
1606 AddrRangeList ranges;
1607
1608 return ranges;
1609 }
1610
1611 /**
1612 * MemSidePort receives the packet back.
1613 * We need to call the handleTranslationReturn
1614 * and propagate up the hierarchy.
1615 */
1616 bool
1617 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1618 {
1619 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1620 TheISA::PageBytes);
1621
1622 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1623 virt_page_addr);
1624
1625 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1626 assert(tlb_event);
1627 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1628
1629 tlb_event->updateOutcome(MISS_RETURN);
1630 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1631
1632 return true;
1633 }
1634
1635 void
1636 GpuTLB::MemSidePort::recvReqRetry()
1637 {
1638 // No retries should reach the TLB. The retries
1639 // should only reach the TLBCoalescer.
1640 assert(false);
1641 }
1642
1643 void
1644 GpuTLB::cleanup()
1645 {
1646 while (!cleanupQueue.empty()) {
1647 Addr cleanup_addr = cleanupQueue.front();
1648 cleanupQueue.pop();
1649
1650 // delete TLBEvent
1651 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1652 delete old_tlb_event;
1653 translationReturnEvent.erase(cleanup_addr);
1654
1655 // update number of outstanding requests
1656 outstandingReqs--;
1657 }
1658
1659 /** the higher level coalescer should retry if it has
1660 * any pending requests.
1661 */
1662 for (int i = 0; i < cpuSidePort.size(); ++i) {
1663 cpuSidePort[i]->sendRetryReq();
1664 }
1665 }
1666
1667 void
1668 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1669 {
1670
1671 std::pair<AccessPatternTable::iterator, bool> ret;
1672
1673 AccessInfo tmp_access_info;
1674 tmp_access_info.lastTimeAccessed = 0;
1675 tmp_access_info.accessesPerPage = 0;
1676 tmp_access_info.totalReuseDistance = 0;
1677 tmp_access_info.sumDistance = 0;
1678 tmp_access_info.meanDistance = 0;
1679
1680 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1681 tmp_access_info));
1682
1683 bool first_page_access = ret.second;
1684
1685 if (first_page_access) {
1686 numUniquePages++;
1687 } else {
1688 int accessed_before;
1689 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1690 ret.first->second.totalReuseDistance += accessed_before;
1691 }
1692
1693 ret.first->second.accessesPerPage++;
1694 ret.first->second.lastTimeAccessed = curTick();
1695
1696 if (accessDistance) {
1697 ret.first->second.localTLBAccesses
1698 .push_back(localNumTLBAccesses.value());
1699 }
1700 }
1701
1702 void
1703 GpuTLB::exitCallback()
1704 {
1705 std::ostream *page_stat_file = nullptr;
1706
1707 if (accessDistance) {
1708
1709 // print per page statistics to a separate file (.csv format)
1710 // simout is the gem5 output directory (default is m5out or the one
1711 // specified with -d
1712 page_stat_file = simout.create(name().c_str())->stream();
1713
1714 // print header
1715 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1716 << "stddev_distance" << std::endl;
1717 }
1718
1719 // update avg. reuse distance footprint
1720 AccessPatternTable::iterator iter, iter_begin, iter_end;
1721 unsigned int sum_avg_reuse_distance_per_page = 0;
1722
1723 // iterate through all pages seen by this TLB
1724 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1725 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1726 iter->second.accessesPerPage;
1727
1728 if (accessDistance) {
1729 unsigned int tmp = iter->second.localTLBAccesses[0];
1730 unsigned int prev = tmp;
1731
1732 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1733 if (i) {
1734 tmp = prev + 1;
1735 }
1736
1737 prev = iter->second.localTLBAccesses[i];
1738 // update the localTLBAccesses value
1739 // with the actual differece
1740 iter->second.localTLBAccesses[i] -= tmp;
1741 // compute the sum of AccessDistance per page
1742 // used later for mean
1743 iter->second.sumDistance +=
1744 iter->second.localTLBAccesses[i];
1745 }
1746
1747 iter->second.meanDistance =
1748 iter->second.sumDistance / iter->second.accessesPerPage;
1749
1750 // compute std_dev and max (we need a second round because we
1751 // need to know the mean value
1752 unsigned int max_distance = 0;
1753 unsigned int stddev_distance = 0;
1754
1755 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1756 unsigned int tmp_access_distance =
1757 iter->second.localTLBAccesses[i];
1758
1759 if (tmp_access_distance > max_distance) {
1760 max_distance = tmp_access_distance;
1761 }
1762
1763 unsigned int diff =
1764 tmp_access_distance - iter->second.meanDistance;
1765 stddev_distance += pow(diff, 2);
1766
1767 }
1768
1769 stddev_distance =
1770 sqrt(stddev_distance/iter->second.accessesPerPage);
1771
1772 if (page_stat_file) {
1773 *page_stat_file << std::hex << iter->first << ",";
1774 *page_stat_file << std::dec << max_distance << ",";
1775 *page_stat_file << std::dec << iter->second.meanDistance
1776 << ",";
1777 *page_stat_file << std::dec << stddev_distance;
1778 *page_stat_file << std::endl;
1779 }
1780
1781 // erase the localTLBAccesses array
1782 iter->second.localTLBAccesses.clear();
1783 }
1784 }
1785
1786 if (!TLBFootprint.empty()) {
1787 avgReuseDistance =
1788 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1789 }
1790
1791 //clear the TLBFootprint map
1792 TLBFootprint.clear();
1793 }
1794} // namespace X86ISA
1795
1796X86ISA::GpuTLB*
1797X86GPUTLBParams::create()
1798{
1799 return new X86ISA::GpuTLB(this);
1800}
1801
963 localNumTLBAccesses
964 .name(name() + ".local_TLB_accesses")
965 .desc("Number of TLB accesses")
966 ;
967
968 localNumTLBHits
969 .name(name() + ".local_TLB_hits")
970 .desc("Number of TLB hits")
971 ;
972
973 localNumTLBMisses
974 .name(name() + ".local_TLB_misses")
975 .desc("Number of TLB misses")
976 ;
977
978 localTLBMissRate
979 .name(name() + ".local_TLB_miss_rate")
980 .desc("TLB miss rate")
981 ;
982
983 accessCycles
984 .name(name() + ".access_cycles")
985 .desc("Cycles spent accessing this TLB level")
986 ;
987
988 pageTableCycles
989 .name(name() + ".page_table_cycles")
990 .desc("Cycles spent accessing the page table")
991 ;
992
993 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
994
995 numUniquePages
996 .name(name() + ".unique_pages")
997 .desc("Number of unique pages touched")
998 ;
999
1000 localCycles
1001 .name(name() + ".local_cycles")
1002 .desc("Number of cycles spent in queue for all incoming reqs")
1003 ;
1004
1005 localLatency
1006 .name(name() + ".local_latency")
1007 .desc("Avg. latency over incoming coalesced reqs")
1008 ;
1009
1010 localLatency = localCycles / localNumTLBAccesses;
1011
1012 globalNumTLBAccesses
1013 .name(name() + ".global_TLB_accesses")
1014 .desc("Number of TLB accesses")
1015 ;
1016
1017 globalNumTLBHits
1018 .name(name() + ".global_TLB_hits")
1019 .desc("Number of TLB hits")
1020 ;
1021
1022 globalNumTLBMisses
1023 .name(name() + ".global_TLB_misses")
1024 .desc("Number of TLB misses")
1025 ;
1026
1027 globalTLBMissRate
1028 .name(name() + ".global_TLB_miss_rate")
1029 .desc("TLB miss rate")
1030 ;
1031
1032 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1033
1034 avgReuseDistance
1035 .name(name() + ".avg_reuse_distance")
1036 .desc("avg. reuse distance over all pages (in ticks)")
1037 ;
1038
1039 }
1040
1041 /**
1042 * Do the TLB lookup for this coalesced request and schedule
1043 * another event <TLB access latency> cycles later.
1044 */
1045
1046 void
1047 GpuTLB::issueTLBLookup(PacketPtr pkt)
1048 {
1049 assert(pkt);
1050 assert(pkt->senderState);
1051
1052 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1053 TheISA::PageBytes);
1054
1055 TranslationState *sender_state =
1056 safe_cast<TranslationState*>(pkt->senderState);
1057
1058 bool update_stats = !sender_state->prefetch;
1059 ThreadContext * tmp_tc = sender_state->tc;
1060
1061 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1062 virt_page_addr);
1063
1064 int req_cnt = sender_state->reqCnt.back();
1065
1066 if (update_stats) {
1067 accessCycles -= (curTick() * req_cnt);
1068 localCycles -= curTick();
1069 updatePageFootprint(virt_page_addr);
1070 globalNumTLBAccesses += req_cnt;
1071 }
1072
1073 tlbOutcome lookup_outcome = TLB_MISS;
1074 RequestPtr tmp_req = pkt->req;
1075
1076 // Access the TLB and figure out if it's a hit or a miss.
1077 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1078
1079 if (success) {
1080 lookup_outcome = TLB_HIT;
1081 // Put the entry in SenderState
1082 GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1083 assert(entry);
1084
1085 sender_state->tlbEntry =
1086 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1087
1088 if (update_stats) {
1089 // the reqCnt has an entry per level, so its size tells us
1090 // which level we are in
1091 sender_state->hitLevel = sender_state->reqCnt.size();
1092 globalNumTLBHits += req_cnt;
1093 }
1094 } else {
1095 if (update_stats)
1096 globalNumTLBMisses += req_cnt;
1097 }
1098
1099 /*
1100 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1101 * as the TLB access latency.
1102 *
1103 * We create and schedule a new TLBEvent which will help us take the
1104 * appropriate actions (e.g., update TLB on a hit, send request to lower
1105 * level TLB on a miss, or start a page walk if this was the last-level
1106 * TLB)
1107 */
1108 TLBEvent *tlb_event =
1109 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1110
1111 if (translationReturnEvent.count(virt_page_addr)) {
1112 panic("Virtual Page Address %#x already has a return event\n",
1113 virt_page_addr);
1114 }
1115
1116 translationReturnEvent[virt_page_addr] = tlb_event;
1117 assert(tlb_event);
1118
1119 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1120 curTick() + this->ticks(hitLatency));
1121
1122 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1123 }
1124
1125 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1126 PacketPtr _pkt)
1127 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1128 outcome(tlb_outcome), pkt(_pkt)
1129 {
1130 }
1131
1132 /**
1133 * Do Paging protection checks. If we encounter a page fault, then
1134 * an assertion is fired.
1135 */
1136 void
1137 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1138 GpuTlbEntry * tlb_entry, Mode mode)
1139 {
1140 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1141 uint32_t flags = pkt->req->getFlags();
1142 bool storeCheck = flags & (StoreCheck << FlagShift);
1143
1144 // Do paging protection checks.
1145 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1146 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1147
1148 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1149
1150 if ((inUser && !tlb_entry->user) ||
1151 (mode == BaseTLB::Write && badWrite)) {
1152 // The page must have been present to get into the TLB in
1153 // the first place. We'll assume the reserved bits are
1154 // fine even though we're not checking them.
1155 assert(false);
1156 }
1157
1158 if (storeCheck && badWrite) {
1159 // This would fault if this were a write, so return a page
1160 // fault that reflects that happening.
1161 assert(false);
1162 }
1163 }
1164
1165 /**
1166 * handleTranslationReturn is called on a TLB hit,
1167 * when a TLB miss returns or when a page fault returns.
1168 * The latter calls handelHit with TLB miss as tlbOutcome.
1169 */
1170 void
1171 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1172 PacketPtr pkt)
1173 {
1174
1175 assert(pkt);
1176 Addr vaddr = pkt->req->getVaddr();
1177
1178 TranslationState *sender_state =
1179 safe_cast<TranslationState*>(pkt->senderState);
1180
1181 ThreadContext *tc = sender_state->tc;
1182 Mode mode = sender_state->tlbMode;
1183
1184 GpuTlbEntry *local_entry, *new_entry;
1185
1186 if (tlb_outcome == TLB_HIT) {
1187 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1188 local_entry = sender_state->tlbEntry;
1189 } else {
1190 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1191 vaddr);
1192
1193 // We are returning either from a page walk or from a hit at a lower
1194 // TLB level. The senderState should be "carrying" a pointer to the
1195 // correct TLBEntry.
1196 new_entry = sender_state->tlbEntry;
1197 assert(new_entry);
1198 local_entry = new_entry;
1199
1200 if (allocationPolicy) {
1201 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1202 virt_page_addr);
1203
1204 local_entry = insert(virt_page_addr, *new_entry);
1205 }
1206
1207 assert(local_entry);
1208 }
1209
1210 /**
1211 * At this point the packet carries an up-to-date tlbEntry pointer
1212 * in its senderState.
1213 * Next step is to do the paging protection checks.
1214 */
1215 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1216 "while paddr was %#x.\n", local_entry->vaddr,
1217 local_entry->paddr);
1218
1219 pagingProtectionChecks(tc, pkt, local_entry, mode);
1220 int page_size = local_entry->size();
1221 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1222 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1223
1224 // Since this packet will be sent through the cpu side slave port,
1225 // it must be converted to a response pkt if it is not one already
1226 if (pkt->isRequest()) {
1227 pkt->makeTimingResponse();
1228 }
1229
1230 pkt->req->setPaddr(paddr);
1231
1232 if (local_entry->uncacheable) {
1233 pkt->req->setFlags(Request::UNCACHEABLE);
1234 }
1235
1236 //send packet back to coalescer
1237 cpuSidePort[0]->sendTimingResp(pkt);
1238 //schedule cleanup event
1239 cleanupQueue.push(virt_page_addr);
1240
1241 // schedule this only once per cycle.
1242 // The check is required because we might have multiple translations
1243 // returning the same cycle
1244 // this is a maximum priority event and must be on the same cycle
1245 // as the cleanup event in TLBCoalescer to avoid a race with
1246 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1247 if (!cleanupEvent.scheduled())
1248 schedule(cleanupEvent, curTick());
1249 }
1250
1251 /**
1252 * Here we take the appropriate actions based on the result of the
1253 * TLB lookup.
1254 */
1255 void
1256 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1257 PacketPtr pkt)
1258 {
1259 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1260
1261 assert(translationReturnEvent[virtPageAddr]);
1262 assert(pkt);
1263
1264 TranslationState *tmp_sender_state =
1265 safe_cast<TranslationState*>(pkt->senderState);
1266
1267 int req_cnt = tmp_sender_state->reqCnt.back();
1268 bool update_stats = !tmp_sender_state->prefetch;
1269
1270
1271 if (outcome == TLB_HIT) {
1272 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1273
1274 if (update_stats) {
1275 accessCycles += (req_cnt * curTick());
1276 localCycles += curTick();
1277 }
1278
1279 } else if (outcome == TLB_MISS) {
1280
1281 DPRINTF(GPUTLB, "This is a TLB miss\n");
1282 if (update_stats) {
1283 accessCycles += (req_cnt*curTick());
1284 localCycles += curTick();
1285 }
1286
1287 if (hasMemSidePort) {
1288 // the one cyle added here represent the delay from when we get
1289 // the reply back till when we propagate it to the coalescer
1290 // above.
1291 if (update_stats) {
1292 accessCycles += (req_cnt * 1);
1293 localCycles += 1;
1294 }
1295
1296 /**
1297 * There is a TLB below. Send the coalesced request.
1298 * We actually send the very first packet of all the
1299 * pending packets for this virtual page address.
1300 */
1301 if (!memSidePort[0]->sendTimingReq(pkt)) {
1302 DPRINTF(GPUTLB, "Failed sending translation request to "
1303 "lower level TLB for addr %#x\n", virtPageAddr);
1304
1305 memSidePort[0]->retries.push_back(pkt);
1306 } else {
1307 DPRINTF(GPUTLB, "Sent translation request to lower level "
1308 "TLB for addr %#x\n", virtPageAddr);
1309 }
1310 } else {
1311 //this is the last level TLB. Start a page walk
1312 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1313 "addr %#x\n", virtPageAddr);
1314
1315 if (update_stats)
1316 pageTableCycles -= (req_cnt*curTick());
1317
1318 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1319 assert(tlb_event);
1320 tlb_event->updateOutcome(PAGE_WALK);
1321 schedule(tlb_event, curTick() + ticks(missLatency2));
1322 }
1323 } else if (outcome == PAGE_WALK) {
1324 if (update_stats)
1325 pageTableCycles += (req_cnt*curTick());
1326
1327 // Need to access the page table and update the TLB
1328 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1329 virtPageAddr);
1330
1331 TranslationState *sender_state =
1332 safe_cast<TranslationState*>(pkt->senderState);
1333
1334 Process *p = sender_state->tc->getProcessPtr();
1335 TlbEntry newEntry;
1336 Addr vaddr = pkt->req->getVaddr();
1337 #ifndef NDEBUG
1338 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1339 assert(alignedVaddr == virtPageAddr);
1340 #endif
1341 bool success;
1342 success = p->pTable->lookup(vaddr, newEntry);
1343 if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1344 if (p->fixupStackFault(vaddr)) {
1345 success = p->pTable->lookup(vaddr, newEntry);
1346 }
1347 }
1348
1349 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1350 newEntry.pageStart());
1351
1352 sender_state->tlbEntry =
1353 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1354
1355 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1356 } else if (outcome == MISS_RETURN) {
1357 /** we add an extra cycle in the return path of the translation
1358 * requests in between the various TLB levels.
1359 */
1360 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1361 } else {
1362 assert(false);
1363 }
1364 }
1365
1366 void
1367 GpuTLB::TLBEvent::process()
1368 {
1369 tlb->translationReturn(virtPageAddr, outcome, pkt);
1370 }
1371
1372 const char*
1373 GpuTLB::TLBEvent::description() const
1374 {
1375 return "trigger translationDoneEvent";
1376 }
1377
1378 void
1379 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1380 {
1381 outcome = _outcome;
1382 }
1383
1384 Addr
1385 GpuTLB::TLBEvent::getTLBEventVaddr()
1386 {
1387 return virtPageAddr;
1388 }
1389
1390 /*
1391 * recvTiming receives a coalesced timing request from a TLBCoalescer
1392 * and it calls issueTLBLookup()
1393 * It only rejects the packet if we have exceeded the max
1394 * outstanding number of requests for the TLB
1395 */
1396 bool
1397 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1398 {
1399 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1400 tlb->issueTLBLookup(pkt);
1401 // update number of outstanding translation requests
1402 tlb->outstandingReqs++;
1403 return true;
1404 } else {
1405 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1406 tlb->outstandingReqs);
1407 return false;
1408 }
1409 }
1410
1411 /**
1412 * handleFuncTranslationReturn is called on a TLB hit,
1413 * when a TLB miss returns or when a page fault returns.
1414 * It updates LRU, inserts the TLB entry on a miss
1415 * depending on the allocation policy and does the required
1416 * protection checks. It does NOT create a new packet to
1417 * update the packet's addr; this is done in hsail-gpu code.
1418 */
1419 void
1420 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1421 {
1422 TranslationState *sender_state =
1423 safe_cast<TranslationState*>(pkt->senderState);
1424
1425 ThreadContext *tc = sender_state->tc;
1426 Mode mode = sender_state->tlbMode;
1427 Addr vaddr = pkt->req->getVaddr();
1428
1429 GpuTlbEntry *local_entry, *new_entry;
1430
1431 if (tlb_outcome == TLB_HIT) {
1432 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1433 "%#x\n", vaddr);
1434
1435 local_entry = sender_state->tlbEntry;
1436 } else {
1437 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1438 "%#x\n", vaddr);
1439
1440 // We are returning either from a page walk or from a hit at a lower
1441 // TLB level. The senderState should be "carrying" a pointer to the
1442 // correct TLBEntry.
1443 new_entry = sender_state->tlbEntry;
1444 assert(new_entry);
1445 local_entry = new_entry;
1446
1447 if (allocationPolicy) {
1448 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1449
1450 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1451 virt_page_addr);
1452
1453 local_entry = insert(virt_page_addr, *new_entry);
1454 }
1455
1456 assert(local_entry);
1457 }
1458
1459 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1460 "while paddr was %#x.\n", local_entry->vaddr,
1461 local_entry->paddr);
1462
1463 // Do paging checks if it's a normal functional access. If it's for a
1464 // prefetch, then sometimes you can try to prefetch something that won't
1465 // pass protection. We don't actually want to fault becuase there is no
1466 // demand access to deem this a violation. Just put it in the TLB and
1467 // it will fault if indeed a future demand access touches it in
1468 // violation.
1469 if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1470 pagingProtectionChecks(tc, pkt, local_entry, mode);
1471
1472 int page_size = local_entry->size();
1473 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1474 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1475
1476 pkt->req->setPaddr(paddr);
1477
1478 if (local_entry->uncacheable)
1479 pkt->req->setFlags(Request::UNCACHEABLE);
1480 }
1481
1482 // This is used for atomic translations. Need to
1483 // make it all happen during the same cycle.
1484 void
1485 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1486 {
1487 TranslationState *sender_state =
1488 safe_cast<TranslationState*>(pkt->senderState);
1489
1490 ThreadContext *tc = sender_state->tc;
1491 bool update_stats = !sender_state->prefetch;
1492
1493 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1494 TheISA::PageBytes);
1495
1496 if (update_stats)
1497 tlb->updatePageFootprint(virt_page_addr);
1498
1499 // do the TLB lookup without updating the stats
1500 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1501 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1502
1503 // functional mode means no coalescing
1504 // global metrics are the same as the local metrics
1505 if (update_stats) {
1506 tlb->globalNumTLBAccesses++;
1507
1508 if (success) {
1509 sender_state->hitLevel = sender_state->reqCnt.size();
1510 tlb->globalNumTLBHits++;
1511 }
1512 }
1513
1514 if (!success) {
1515 if (update_stats)
1516 tlb->globalNumTLBMisses++;
1517 if (tlb->hasMemSidePort) {
1518 // there is a TLB below -> propagate down the TLB hierarchy
1519 tlb->memSidePort[0]->sendFunctional(pkt);
1520 // If no valid translation from a prefetch, then just return
1521 if (sender_state->prefetch && !pkt->req->hasPaddr())
1522 return;
1523 } else {
1524 // Need to access the page table and update the TLB
1525 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1526 virt_page_addr);
1527
1528 Process *p = tc->getProcessPtr();
1529 TlbEntry newEntry;
1530
1531 Addr vaddr = pkt->req->getVaddr();
1532 #ifndef NDEBUG
1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534 assert(alignedVaddr == virt_page_addr);
1535 #endif
1536
1537 bool success = p->pTable->lookup(vaddr, newEntry);
1538 if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1539 if (p->fixupStackFault(vaddr))
1540 success = p->pTable->lookup(vaddr, newEntry);
1541 }
1542
1543 if (!sender_state->prefetch) {
1544 // no PageFaults are permitted after
1545 // the second page table lookup
1546 assert(success);
1547
1548 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1549 newEntry.pageStart());
1550
1551 sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
1552 newEntry.paddr,
1553 success);
1554 } else {
1555 // If this was a prefetch, then do the normal thing if it
1556 // was a successful translation. Otherwise, send an empty
1557 // TLB entry back so that it can be figured out as empty and
1558 // handled accordingly.
1559 if (success) {
1560 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1561 newEntry.pageStart());
1562
1563 sender_state->tlbEntry = new GpuTlbEntry(0,
1564 newEntry.vaddr,
1565 newEntry.paddr,
1566 success);
1567 } else {
1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569 alignedVaddr);
1570
1571 sender_state->tlbEntry = new GpuTlbEntry();
1572
1573 return;
1574 }
1575 }
1576 }
1577 } else {
1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579 tlb->lookup(pkt->req->getVaddr()));
1580
1581 GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582 update_stats);
1583
1584 assert(entry);
1585
1586 sender_state->tlbEntry =
1587 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1588 }
1589 // This is the function that would populate pkt->req with the paddr of
1590 // the translation. But if no translation happens (i.e Prefetch fails)
1591 // then the early returns in the above code wiill keep this function
1592 // from executing.
1593 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1594 }
1595
1596 void
1597 GpuTLB::CpuSidePort::recvReqRetry()
1598 {
1599 // The CPUSidePort never sends anything but replies. No retries
1600 // expected.
1601 assert(false);
1602 }
1603
1604 AddrRangeList
1605 GpuTLB::CpuSidePort::getAddrRanges() const
1606 {
1607 // currently not checked by the master
1608 AddrRangeList ranges;
1609
1610 return ranges;
1611 }
1612
1613 /**
1614 * MemSidePort receives the packet back.
1615 * We need to call the handleTranslationReturn
1616 * and propagate up the hierarchy.
1617 */
1618 bool
1619 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1620 {
1621 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1622 TheISA::PageBytes);
1623
1624 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1625 virt_page_addr);
1626
1627 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1628 assert(tlb_event);
1629 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1630
1631 tlb_event->updateOutcome(MISS_RETURN);
1632 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1633
1634 return true;
1635 }
1636
1637 void
1638 GpuTLB::MemSidePort::recvReqRetry()
1639 {
1640 // No retries should reach the TLB. The retries
1641 // should only reach the TLBCoalescer.
1642 assert(false);
1643 }
1644
1645 void
1646 GpuTLB::cleanup()
1647 {
1648 while (!cleanupQueue.empty()) {
1649 Addr cleanup_addr = cleanupQueue.front();
1650 cleanupQueue.pop();
1651
1652 // delete TLBEvent
1653 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1654 delete old_tlb_event;
1655 translationReturnEvent.erase(cleanup_addr);
1656
1657 // update number of outstanding requests
1658 outstandingReqs--;
1659 }
1660
1661 /** the higher level coalescer should retry if it has
1662 * any pending requests.
1663 */
1664 for (int i = 0; i < cpuSidePort.size(); ++i) {
1665 cpuSidePort[i]->sendRetryReq();
1666 }
1667 }
1668
1669 void
1670 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1671 {
1672
1673 std::pair<AccessPatternTable::iterator, bool> ret;
1674
1675 AccessInfo tmp_access_info;
1676 tmp_access_info.lastTimeAccessed = 0;
1677 tmp_access_info.accessesPerPage = 0;
1678 tmp_access_info.totalReuseDistance = 0;
1679 tmp_access_info.sumDistance = 0;
1680 tmp_access_info.meanDistance = 0;
1681
1682 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1683 tmp_access_info));
1684
1685 bool first_page_access = ret.second;
1686
1687 if (first_page_access) {
1688 numUniquePages++;
1689 } else {
1690 int accessed_before;
1691 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1692 ret.first->second.totalReuseDistance += accessed_before;
1693 }
1694
1695 ret.first->second.accessesPerPage++;
1696 ret.first->second.lastTimeAccessed = curTick();
1697
1698 if (accessDistance) {
1699 ret.first->second.localTLBAccesses
1700 .push_back(localNumTLBAccesses.value());
1701 }
1702 }
1703
1704 void
1705 GpuTLB::exitCallback()
1706 {
1707 std::ostream *page_stat_file = nullptr;
1708
1709 if (accessDistance) {
1710
1711 // print per page statistics to a separate file (.csv format)
1712 // simout is the gem5 output directory (default is m5out or the one
1713 // specified with -d
1714 page_stat_file = simout.create(name().c_str())->stream();
1715
1716 // print header
1717 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1718 << "stddev_distance" << std::endl;
1719 }
1720
1721 // update avg. reuse distance footprint
1722 AccessPatternTable::iterator iter, iter_begin, iter_end;
1723 unsigned int sum_avg_reuse_distance_per_page = 0;
1724
1725 // iterate through all pages seen by this TLB
1726 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1727 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1728 iter->second.accessesPerPage;
1729
1730 if (accessDistance) {
1731 unsigned int tmp = iter->second.localTLBAccesses[0];
1732 unsigned int prev = tmp;
1733
1734 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1735 if (i) {
1736 tmp = prev + 1;
1737 }
1738
1739 prev = iter->second.localTLBAccesses[i];
1740 // update the localTLBAccesses value
1741 // with the actual differece
1742 iter->second.localTLBAccesses[i] -= tmp;
1743 // compute the sum of AccessDistance per page
1744 // used later for mean
1745 iter->second.sumDistance +=
1746 iter->second.localTLBAccesses[i];
1747 }
1748
1749 iter->second.meanDistance =
1750 iter->second.sumDistance / iter->second.accessesPerPage;
1751
1752 // compute std_dev and max (we need a second round because we
1753 // need to know the mean value
1754 unsigned int max_distance = 0;
1755 unsigned int stddev_distance = 0;
1756
1757 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1758 unsigned int tmp_access_distance =
1759 iter->second.localTLBAccesses[i];
1760
1761 if (tmp_access_distance > max_distance) {
1762 max_distance = tmp_access_distance;
1763 }
1764
1765 unsigned int diff =
1766 tmp_access_distance - iter->second.meanDistance;
1767 stddev_distance += pow(diff, 2);
1768
1769 }
1770
1771 stddev_distance =
1772 sqrt(stddev_distance/iter->second.accessesPerPage);
1773
1774 if (page_stat_file) {
1775 *page_stat_file << std::hex << iter->first << ",";
1776 *page_stat_file << std::dec << max_distance << ",";
1777 *page_stat_file << std::dec << iter->second.meanDistance
1778 << ",";
1779 *page_stat_file << std::dec << stddev_distance;
1780 *page_stat_file << std::endl;
1781 }
1782
1783 // erase the localTLBAccesses array
1784 iter->second.localTLBAccesses.clear();
1785 }
1786 }
1787
1788 if (!TLBFootprint.empty()) {
1789 avgReuseDistance =
1790 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1791 }
1792
1793 //clear the TLBFootprint map
1794 TLBFootprint.clear();
1795 }
1796} // namespace X86ISA
1797
1798X86ISA::GpuTLB*
1799X86GPUTLBParams::create()
1800{
1801 return new X86ISA::GpuTLB(this);
1802}
1803