gpu_tlb.cc (12461:a4cb506cda74) gpu_tlb.cc (12663:565c16ffe1d1)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62 GpuTLB::GpuTLB(const Params *p)
63 : MemObject(p), configAddress(0), size(p->size),
64 cleanupEvent([this]{ cleanup(); }, name(), false,
65 Event::Maximum_Pri),
66 exitEvent([this]{ exitCallback(); }, name())
67 {
68 assoc = p->assoc;
69 assert(assoc <= size);
70 numSets = size/assoc;
71 allocationPolicy = p->allocationPolicy;
72 hasMemSidePort = false;
73 accessDistance = p->accessDistance;
74 clock = p->clk_domain->clockPeriod();
75
76 tlb.assign(size, GpuTlbEntry());
77
78 freeList.resize(numSets);
79 entryList.resize(numSets);
80
81 for (int set = 0; set < numSets; ++set) {
82 for (int way = 0; way < assoc; ++way) {
83 int x = set * assoc + way;
84 freeList[set].push_back(&tlb.at(x));
85 }
86 }
87
88 FA = (size == assoc);
89
90 /**
91 * @warning: the set-associative version assumes you have a
92 * fixed page size of 4KB.
93 * If the page size is greather than 4KB (as defined in the
94 * TheISA::PageBytes), then there are various issues w/ the current
95 * implementation (you'd have the same 8KB page being replicated in
96 * different sets etc)
97 */
98 setMask = numSets - 1;
99
100 #if 0
101 // GpuTLB doesn't yet support full system
102 walker = p->walker;
103 walker->setTLB(this);
104 #endif
105
106 maxCoalescedReqs = p->maxOutstandingReqs;
107
108 // Do not allow maxCoalescedReqs to be more than the TLB associativity
109 if (maxCoalescedReqs > assoc) {
110 maxCoalescedReqs = assoc;
111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
112 }
113
114 outstandingReqs = 0;
115 hitLatency = p->hitLatency;
116 missLatency1 = p->missLatency1;
117 missLatency2 = p->missLatency2;
118
119 // create the slave ports based on the number of connected ports
120 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
121 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
122 name(), i), this, i));
123 }
124
125 // create the master ports based on the number of connected ports
126 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
127 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
128 name(), i), this, i));
129 }
130 }
131
132 // fixme: this is never called?
133 GpuTLB::~GpuTLB()
134 {
135 // make sure all the hash-maps are empty
136 assert(translationReturnEvent.empty());
137 }
138
139 BaseSlavePort&
140 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
141 {
142 if (if_name == "slave") {
143 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
145 }
146
147 return *cpuSidePort[idx];
148 } else {
149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
150 }
151 }
152
153 BaseMasterPort&
154 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
155 {
156 if (if_name == "master") {
157 if (idx >= static_cast<PortID>(memSidePort.size())) {
158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
159 }
160
161 hasMemSidePort = true;
162
163 return *memSidePort[idx];
164 } else {
165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
166 }
167 }
168
169 GpuTlbEntry*
170 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
171 {
172 GpuTlbEntry *newEntry = nullptr;
173
174 /**
175 * vpn holds the virtual page address
176 * The least significant bits are simply masked
177 */
178 int set = (vpn >> TheISA::PageShift) & setMask;
179
180 if (!freeList[set].empty()) {
181 newEntry = freeList[set].front();
182 freeList[set].pop_front();
183 } else {
184 newEntry = entryList[set].back();
185 entryList[set].pop_back();
186 }
187
188 *newEntry = entry;
189 newEntry->vaddr = vpn;
190 entryList[set].push_front(newEntry);
191
192 return newEntry;
193 }
194
195 GpuTLB::EntryList::iterator
196 GpuTLB::lookupIt(Addr va, bool update_lru)
197 {
198 int set = (va >> TheISA::PageShift) & setMask;
199
200 if (FA) {
201 assert(!set);
202 }
203
204 auto entry = entryList[set].begin();
205 for (; entry != entryList[set].end(); ++entry) {
206 int page_size = (*entry)->size();
207
208 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
209 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
210 "with size %#x.\n", va, (*entry)->vaddr, page_size);
211
212 if (update_lru) {
213 entryList[set].push_front(*entry);
214 entryList[set].erase(entry);
215 entry = entryList[set].begin();
216 }
217
218 break;
219 }
220 }
221
222 return entry;
223 }
224
225 GpuTlbEntry*
226 GpuTLB::lookup(Addr va, bool update_lru)
227 {
228 int set = (va >> TheISA::PageShift) & setMask;
229
230 auto entry = lookupIt(va, update_lru);
231
232 if (entry == entryList[set].end())
233 return nullptr;
234 else
235 return *entry;
236 }
237
238 void
239 GpuTLB::invalidateAll()
240 {
241 DPRINTF(GPUTLB, "Invalidating all entries.\n");
242
243 for (int i = 0; i < numSets; ++i) {
244 while (!entryList[i].empty()) {
245 GpuTlbEntry *entry = entryList[i].front();
246 entryList[i].pop_front();
247 freeList[i].push_back(entry);
248 }
249 }
250 }
251
252 void
253 GpuTLB::setConfigAddress(uint32_t addr)
254 {
255 configAddress = addr;
256 }
257
258 void
259 GpuTLB::invalidateNonGlobal()
260 {
261 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
262
263 for (int i = 0; i < numSets; ++i) {
264 for (auto entryIt = entryList[i].begin();
265 entryIt != entryList[i].end();) {
266 if (!(*entryIt)->global) {
267 freeList[i].push_back(*entryIt);
268 entryList[i].erase(entryIt++);
269 } else {
270 ++entryIt;
271 }
272 }
273 }
274 }
275
276 void
277 GpuTLB::demapPage(Addr va, uint64_t asn)
278 {
279
280 int set = (va >> TheISA::PageShift) & setMask;
281 auto entry = lookupIt(va, false);
282
283 if (entry != entryList[set].end()) {
284 freeList[set].push_back(*entry);
285 entryList[set].erase(entry);
286 }
287 }
288
289 Fault
290 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
291 {
292 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
293 Addr vaddr = req->getVaddr();
294 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
295
296 if (prefix == IntAddrPrefixCPUID) {
297 panic("CPUID memory space not yet implemented!\n");
298 } else if (prefix == IntAddrPrefixMSR) {
299 vaddr = vaddr >> 3;
300 req->setFlags(Request::MMAPPED_IPR);
301 Addr regNum = 0;
302
303 switch (vaddr & ~IntAddrPrefixMask) {
304 case 0x10:
305 regNum = MISCREG_TSC;
306 break;
307 case 0x1B:
308 regNum = MISCREG_APIC_BASE;
309 break;
310 case 0xFE:
311 regNum = MISCREG_MTRRCAP;
312 break;
313 case 0x174:
314 regNum = MISCREG_SYSENTER_CS;
315 break;
316 case 0x175:
317 regNum = MISCREG_SYSENTER_ESP;
318 break;
319 case 0x176:
320 regNum = MISCREG_SYSENTER_EIP;
321 break;
322 case 0x179:
323 regNum = MISCREG_MCG_CAP;
324 break;
325 case 0x17A:
326 regNum = MISCREG_MCG_STATUS;
327 break;
328 case 0x17B:
329 regNum = MISCREG_MCG_CTL;
330 break;
331 case 0x1D9:
332 regNum = MISCREG_DEBUG_CTL_MSR;
333 break;
334 case 0x1DB:
335 regNum = MISCREG_LAST_BRANCH_FROM_IP;
336 break;
337 case 0x1DC:
338 regNum = MISCREG_LAST_BRANCH_TO_IP;
339 break;
340 case 0x1DD:
341 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
342 break;
343 case 0x1DE:
344 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
345 break;
346 case 0x200:
347 regNum = MISCREG_MTRR_PHYS_BASE_0;
348 break;
349 case 0x201:
350 regNum = MISCREG_MTRR_PHYS_MASK_0;
351 break;
352 case 0x202:
353 regNum = MISCREG_MTRR_PHYS_BASE_1;
354 break;
355 case 0x203:
356 regNum = MISCREG_MTRR_PHYS_MASK_1;
357 break;
358 case 0x204:
359 regNum = MISCREG_MTRR_PHYS_BASE_2;
360 break;
361 case 0x205:
362 regNum = MISCREG_MTRR_PHYS_MASK_2;
363 break;
364 case 0x206:
365 regNum = MISCREG_MTRR_PHYS_BASE_3;
366 break;
367 case 0x207:
368 regNum = MISCREG_MTRR_PHYS_MASK_3;
369 break;
370 case 0x208:
371 regNum = MISCREG_MTRR_PHYS_BASE_4;
372 break;
373 case 0x209:
374 regNum = MISCREG_MTRR_PHYS_MASK_4;
375 break;
376 case 0x20A:
377 regNum = MISCREG_MTRR_PHYS_BASE_5;
378 break;
379 case 0x20B:
380 regNum = MISCREG_MTRR_PHYS_MASK_5;
381 break;
382 case 0x20C:
383 regNum = MISCREG_MTRR_PHYS_BASE_6;
384 break;
385 case 0x20D:
386 regNum = MISCREG_MTRR_PHYS_MASK_6;
387 break;
388 case 0x20E:
389 regNum = MISCREG_MTRR_PHYS_BASE_7;
390 break;
391 case 0x20F:
392 regNum = MISCREG_MTRR_PHYS_MASK_7;
393 break;
394 case 0x250:
395 regNum = MISCREG_MTRR_FIX_64K_00000;
396 break;
397 case 0x258:
398 regNum = MISCREG_MTRR_FIX_16K_80000;
399 break;
400 case 0x259:
401 regNum = MISCREG_MTRR_FIX_16K_A0000;
402 break;
403 case 0x268:
404 regNum = MISCREG_MTRR_FIX_4K_C0000;
405 break;
406 case 0x269:
407 regNum = MISCREG_MTRR_FIX_4K_C8000;
408 break;
409 case 0x26A:
410 regNum = MISCREG_MTRR_FIX_4K_D0000;
411 break;
412 case 0x26B:
413 regNum = MISCREG_MTRR_FIX_4K_D8000;
414 break;
415 case 0x26C:
416 regNum = MISCREG_MTRR_FIX_4K_E0000;
417 break;
418 case 0x26D:
419 regNum = MISCREG_MTRR_FIX_4K_E8000;
420 break;
421 case 0x26E:
422 regNum = MISCREG_MTRR_FIX_4K_F0000;
423 break;
424 case 0x26F:
425 regNum = MISCREG_MTRR_FIX_4K_F8000;
426 break;
427 case 0x277:
428 regNum = MISCREG_PAT;
429 break;
430 case 0x2FF:
431 regNum = MISCREG_DEF_TYPE;
432 break;
433 case 0x400:
434 regNum = MISCREG_MC0_CTL;
435 break;
436 case 0x404:
437 regNum = MISCREG_MC1_CTL;
438 break;
439 case 0x408:
440 regNum = MISCREG_MC2_CTL;
441 break;
442 case 0x40C:
443 regNum = MISCREG_MC3_CTL;
444 break;
445 case 0x410:
446 regNum = MISCREG_MC4_CTL;
447 break;
448 case 0x414:
449 regNum = MISCREG_MC5_CTL;
450 break;
451 case 0x418:
452 regNum = MISCREG_MC6_CTL;
453 break;
454 case 0x41C:
455 regNum = MISCREG_MC7_CTL;
456 break;
457 case 0x401:
458 regNum = MISCREG_MC0_STATUS;
459 break;
460 case 0x405:
461 regNum = MISCREG_MC1_STATUS;
462 break;
463 case 0x409:
464 regNum = MISCREG_MC2_STATUS;
465 break;
466 case 0x40D:
467 regNum = MISCREG_MC3_STATUS;
468 break;
469 case 0x411:
470 regNum = MISCREG_MC4_STATUS;
471 break;
472 case 0x415:
473 regNum = MISCREG_MC5_STATUS;
474 break;
475 case 0x419:
476 regNum = MISCREG_MC6_STATUS;
477 break;
478 case 0x41D:
479 regNum = MISCREG_MC7_STATUS;
480 break;
481 case 0x402:
482 regNum = MISCREG_MC0_ADDR;
483 break;
484 case 0x406:
485 regNum = MISCREG_MC1_ADDR;
486 break;
487 case 0x40A:
488 regNum = MISCREG_MC2_ADDR;
489 break;
490 case 0x40E:
491 regNum = MISCREG_MC3_ADDR;
492 break;
493 case 0x412:
494 regNum = MISCREG_MC4_ADDR;
495 break;
496 case 0x416:
497 regNum = MISCREG_MC5_ADDR;
498 break;
499 case 0x41A:
500 regNum = MISCREG_MC6_ADDR;
501 break;
502 case 0x41E:
503 regNum = MISCREG_MC7_ADDR;
504 break;
505 case 0x403:
506 regNum = MISCREG_MC0_MISC;
507 break;
508 case 0x407:
509 regNum = MISCREG_MC1_MISC;
510 break;
511 case 0x40B:
512 regNum = MISCREG_MC2_MISC;
513 break;
514 case 0x40F:
515 regNum = MISCREG_MC3_MISC;
516 break;
517 case 0x413:
518 regNum = MISCREG_MC4_MISC;
519 break;
520 case 0x417:
521 regNum = MISCREG_MC5_MISC;
522 break;
523 case 0x41B:
524 regNum = MISCREG_MC6_MISC;
525 break;
526 case 0x41F:
527 regNum = MISCREG_MC7_MISC;
528 break;
529 case 0xC0000080:
530 regNum = MISCREG_EFER;
531 break;
532 case 0xC0000081:
533 regNum = MISCREG_STAR;
534 break;
535 case 0xC0000082:
536 regNum = MISCREG_LSTAR;
537 break;
538 case 0xC0000083:
539 regNum = MISCREG_CSTAR;
540 break;
541 case 0xC0000084:
542 regNum = MISCREG_SF_MASK;
543 break;
544 case 0xC0000100:
545 regNum = MISCREG_FS_BASE;
546 break;
547 case 0xC0000101:
548 regNum = MISCREG_GS_BASE;
549 break;
550 case 0xC0000102:
551 regNum = MISCREG_KERNEL_GS_BASE;
552 break;
553 case 0xC0000103:
554 regNum = MISCREG_TSC_AUX;
555 break;
556 case 0xC0010000:
557 regNum = MISCREG_PERF_EVT_SEL0;
558 break;
559 case 0xC0010001:
560 regNum = MISCREG_PERF_EVT_SEL1;
561 break;
562 case 0xC0010002:
563 regNum = MISCREG_PERF_EVT_SEL2;
564 break;
565 case 0xC0010003:
566 regNum = MISCREG_PERF_EVT_SEL3;
567 break;
568 case 0xC0010004:
569 regNum = MISCREG_PERF_EVT_CTR0;
570 break;
571 case 0xC0010005:
572 regNum = MISCREG_PERF_EVT_CTR1;
573 break;
574 case 0xC0010006:
575 regNum = MISCREG_PERF_EVT_CTR2;
576 break;
577 case 0xC0010007:
578 regNum = MISCREG_PERF_EVT_CTR3;
579 break;
580 case 0xC0010010:
581 regNum = MISCREG_SYSCFG;
582 break;
583 case 0xC0010016:
584 regNum = MISCREG_IORR_BASE0;
585 break;
586 case 0xC0010017:
587 regNum = MISCREG_IORR_BASE1;
588 break;
589 case 0xC0010018:
590 regNum = MISCREG_IORR_MASK0;
591 break;
592 case 0xC0010019:
593 regNum = MISCREG_IORR_MASK1;
594 break;
595 case 0xC001001A:
596 regNum = MISCREG_TOP_MEM;
597 break;
598 case 0xC001001D:
599 regNum = MISCREG_TOP_MEM2;
600 break;
601 case 0xC0010114:
602 regNum = MISCREG_VM_CR;
603 break;
604 case 0xC0010115:
605 regNum = MISCREG_IGNNE;
606 break;
607 case 0xC0010116:
608 regNum = MISCREG_SMM_CTL;
609 break;
610 case 0xC0010117:
611 regNum = MISCREG_VM_HSAVE_PA;
612 break;
613 default:
614 return std::make_shared<GeneralProtection>(0);
615 }
616 //The index is multiplied by the size of a MiscReg so that
617 //any memory dependence calculations will not see these as
618 //overlapping.
619 req->setPaddr(regNum * sizeof(MiscReg));
620 return NoFault;
621 } else if (prefix == IntAddrPrefixIO) {
622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623 // bitmap in the TSS.
624
625 Addr IOPort = vaddr & ~IntAddrPrefixMask;
626 // Make sure the address fits in the expected 16 bit IO address
627 // space.
628 assert(!(IOPort & ~0xFFFF));
629
630 if (IOPort == 0xCF8 && req->getSize() == 4) {
631 req->setFlags(Request::MMAPPED_IPR);
632 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
633 } else if ((IOPort & ~mask(2)) == 0xCFC) {
634 req->setFlags(Request::UNCACHEABLE);
635
636 Addr configAddress =
637 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
638
639 if (bits(configAddress, 31, 31)) {
640 req->setPaddr(PhysAddrPrefixPciConfig |
641 mbits(configAddress, 30, 2) |
642 (IOPort & mask(2)));
643 } else {
644 req->setPaddr(PhysAddrPrefixIO | IOPort);
645 }
646 } else {
647 req->setFlags(Request::UNCACHEABLE);
648 req->setPaddr(PhysAddrPrefixIO | IOPort);
649 }
650 return NoFault;
651 } else {
652 panic("Access to unrecognized internal address space %#x.\n",
653 prefix);
654 }
655 }
656
657 /**
658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659 * and false on a TLB miss.
660 * Many of the checks about different modes have been converted to
661 * assertions, since these parts of the code are not really used.
662 * On a hit it will update the LRU stack.
663 */
664 bool
665 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
666 {
667 bool tlb_hit = false;
668 #ifndef NDEBUG
669 uint32_t flags = req->getFlags();
670 int seg = flags & SegmentFlagMask;
671 #endif
672
673 assert(seg != SEGMENT_REG_MS);
674 Addr vaddr = req->getVaddr();
675 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
676 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
677
678 if (m5Reg.prot) {
679 DPRINTF(GPUTLB, "In protected mode.\n");
680 // make sure we are in 64-bit mode
681 assert(m5Reg.mode == LongMode);
682
683 // If paging is enabled, do the translation.
684 if (m5Reg.paging) {
685 DPRINTF(GPUTLB, "Paging enabled.\n");
686 //update LRU stack on a hit
687 GpuTlbEntry *entry = lookup(vaddr, true);
688
689 if (entry)
690 tlb_hit = true;
691
692 if (!update_stats) {
693 // functional tlb access for memory initialization
694 // i.e., memory seeding or instr. seeding -> don't update
695 // TLB and stats
696 return tlb_hit;
697 }
698
699 localNumTLBAccesses++;
700
701 if (!entry) {
702 localNumTLBMisses++;
703 } else {
704 localNumTLBHits++;
705 }
706 }
707 }
708
709 return tlb_hit;
710 }
711
712 Fault
713 GpuTLB::translate(RequestPtr req, ThreadContext *tc,
714 Translation *translation, Mode mode,
715 bool &delayedResponse, bool timing, int &latency)
716 {
717 uint32_t flags = req->getFlags();
718 int seg = flags & SegmentFlagMask;
719 bool storeCheck = flags & (StoreCheck << FlagShift);
720
721 // If this is true, we're dealing with a request
722 // to a non-memory address space.
723 if (seg == SEGMENT_REG_MS) {
724 return translateInt(req, tc);
725 }
726
727 delayedResponse = false;
728 Addr vaddr = req->getVaddr();
729 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
730
731 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
732
733 // If protected mode has been enabled...
734 if (m5Reg.prot) {
735 DPRINTF(GPUTLB, "In protected mode.\n");
736 // If we're not in 64-bit mode, do protection/limit checks
737 if (m5Reg.mode != LongMode) {
738 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
739 "protection.\n");
740
741 // Check for a null segment selector.
742 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
743 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
744 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
745 return std::make_shared<GeneralProtection>(0);
746 }
747
748 bool expandDown = false;
749 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
750
751 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
752 if (!attr.writable && (mode == BaseTLB::Write ||
753 storeCheck))
754 return std::make_shared<GeneralProtection>(0);
755
756 if (!attr.readable && mode == BaseTLB::Read)
757 return std::make_shared<GeneralProtection>(0);
758
759 expandDown = attr.expandDown;
760
761 }
762
763 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
764 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
765 // This assumes we're not in 64 bit mode. If we were, the
766 // default address size is 64 bits, overridable to 32.
767 int size = 32;
768 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
769 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
770
771 if ((csAttr.defaultSize && sizeOverride) ||
772 (!csAttr.defaultSize && !sizeOverride)) {
773 size = 16;
774 }
775
776 Addr offset = bits(vaddr - base, size - 1, 0);
777 Addr endOffset = offset + req->getSize() - 1;
778
779 if (expandDown) {
780 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
781 warn_once("Expand down segments are untested.\n");
782
783 if (offset <= limit || endOffset <= limit)
784 return std::make_shared<GeneralProtection>(0);
785 } else {
786 if (offset > limit || endOffset > limit)
787 return std::make_shared<GeneralProtection>(0);
788 }
789 }
790
791 // If paging is enabled, do the translation.
792 if (m5Reg.paging) {
793 DPRINTF(GPUTLB, "Paging enabled.\n");
794 // The vaddr already has the segment base applied.
795 GpuTlbEntry *entry = lookup(vaddr);
796 localNumTLBAccesses++;
797
798 if (!entry) {
799 localNumTLBMisses++;
800 if (timing) {
801 latency = missLatency1;
802 }
803
804 if (FullSystem) {
805 fatal("GpuTLB doesn't support full-system mode\n");
806 } else {
807 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
808 "at pc %#x.\n", vaddr, tc->instAddr());
809
810 Process *p = tc->getProcessPtr();
811 const EmulationPageTable::Entry *pte =
812 p->pTable->lookup(vaddr);
813
814 if (!pte && mode != BaseTLB::Execute) {
815 // penalize a "page fault" more
816 if (timing)
817 latency += missLatency2;
818
819 if (p->fixupStackFault(vaddr))
820 pte = p->pTable->lookup(vaddr);
821 }
822
823 if (!pte) {
824 return std::make_shared<PageFault>(vaddr, true,
825 mode, true,
826 false);
827 } else {
828 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
829
830 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
831 alignedVaddr, pte->paddr);
832
833 GpuTlbEntry gpuEntry(
834 p->pTable->pid(), alignedVaddr,
835 pte->paddr, true);
836 entry = insert(alignedVaddr, gpuEntry);
837 }
838
839 DPRINTF(GPUTLB, "Miss was serviced.\n");
840 }
841 } else {
842 localNumTLBHits++;
843
844 if (timing) {
845 latency = hitLatency;
846 }
847 }
848
849 // Do paging protection checks.
850 bool inUser = (m5Reg.cpl == 3 &&
851 !(flags & (CPL0FlagBit << FlagShift)));
852
853 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854 bool badWrite = (!entry->writable && (inUser || cr0.wp));
855
856 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
857 badWrite)) {
858 // The page must have been present to get into the TLB in
859 // the first place. We'll assume the reserved bits are
860 // fine even though we're not checking them.
861 return std::make_shared<PageFault>(vaddr, true, mode,
862 inUser, false);
863 }
864
865 if (storeCheck && badWrite) {
866 // This would fault if this were a write, so return a page
867 // fault that reflects that happening.
868 return std::make_shared<PageFault>(vaddr, true,
869 BaseTLB::Write,
870 inUser, false);
871 }
872
873
874 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
875 "checks.\n", entry->paddr);
876
877 int page_size = entry->size();
878 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
879 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
880 req->setPaddr(paddr);
881
882 if (entry->uncacheable)
883 req->setFlags(Request::UNCACHEABLE);
884 } else {
885 //Use the address which already has segmentation applied.
886 DPRINTF(GPUTLB, "Paging disabled.\n");
887 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
888 req->setPaddr(vaddr);
889 }
890 } else {
891 // Real mode
892 DPRINTF(GPUTLB, "In real mode.\n");
893 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
894 req->setPaddr(vaddr);
895 }
896
897 // Check for an access to the local APIC
898 if (FullSystem) {
899 LocalApicBase localApicBase =
900 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
901
902 Addr baseAddr = localApicBase.base * PageBytes;
903 Addr paddr = req->getPaddr();
904
905 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
906 // Force the access to be uncacheable.
907 req->setFlags(Request::UNCACHEABLE);
908 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
909 paddr - baseAddr));
910 }
911 }
912
913 return NoFault;
914 };
915
916 Fault
917 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
918 int &latency)
919 {
920 bool delayedResponse;
921
922 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
923 latency);
924 }
925
926 void
927 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
928 Translation *translation, Mode mode, int &latency)
929 {
930 bool delayedResponse;
931 assert(translation);
932
933 Fault fault = GpuTLB::translate(req, tc, translation, mode,
934 delayedResponse, true, latency);
935
936 if (!delayedResponse)
937 translation->finish(fault, req, tc, mode);
938 }
939
940 Walker*
941 GpuTLB::getWalker()
942 {
943 return walker;
944 }
945
946
947 void
948 GpuTLB::serialize(CheckpointOut &cp) const
949 {
950 }
951
952 void
953 GpuTLB::unserialize(CheckpointIn &cp)
954 {
955 }
956
957 void
958 GpuTLB::regStats()
959 {
960 MemObject::regStats();
961
962 localNumTLBAccesses
963 .name(name() + ".local_TLB_accesses")
964 .desc("Number of TLB accesses")
965 ;
966
967 localNumTLBHits
968 .name(name() + ".local_TLB_hits")
969 .desc("Number of TLB hits")
970 ;
971
972 localNumTLBMisses
973 .name(name() + ".local_TLB_misses")
974 .desc("Number of TLB misses")
975 ;
976
977 localTLBMissRate
978 .name(name() + ".local_TLB_miss_rate")
979 .desc("TLB miss rate")
980 ;
981
982 accessCycles
983 .name(name() + ".access_cycles")
984 .desc("Cycles spent accessing this TLB level")
985 ;
986
987 pageTableCycles
988 .name(name() + ".page_table_cycles")
989 .desc("Cycles spent accessing the page table")
990 ;
991
992 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
993
994 numUniquePages
995 .name(name() + ".unique_pages")
996 .desc("Number of unique pages touched")
997 ;
998
999 localCycles
1000 .name(name() + ".local_cycles")
1001 .desc("Number of cycles spent in queue for all incoming reqs")
1002 ;
1003
1004 localLatency
1005 .name(name() + ".local_latency")
1006 .desc("Avg. latency over incoming coalesced reqs")
1007 ;
1008
1009 localLatency = localCycles / localNumTLBAccesses;
1010
1011 globalNumTLBAccesses
1012 .name(name() + ".global_TLB_accesses")
1013 .desc("Number of TLB accesses")
1014 ;
1015
1016 globalNumTLBHits
1017 .name(name() + ".global_TLB_hits")
1018 .desc("Number of TLB hits")
1019 ;
1020
1021 globalNumTLBMisses
1022 .name(name() + ".global_TLB_misses")
1023 .desc("Number of TLB misses")
1024 ;
1025
1026 globalTLBMissRate
1027 .name(name() + ".global_TLB_miss_rate")
1028 .desc("TLB miss rate")
1029 ;
1030
1031 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1032
1033 avgReuseDistance
1034 .name(name() + ".avg_reuse_distance")
1035 .desc("avg. reuse distance over all pages (in ticks)")
1036 ;
1037
1038 }
1039
1040 /**
1041 * Do the TLB lookup for this coalesced request and schedule
1042 * another event <TLB access latency> cycles later.
1043 */
1044
1045 void
1046 GpuTLB::issueTLBLookup(PacketPtr pkt)
1047 {
1048 assert(pkt);
1049 assert(pkt->senderState);
1050
1051 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1052 TheISA::PageBytes);
1053
1054 TranslationState *sender_state =
1055 safe_cast<TranslationState*>(pkt->senderState);
1056
1057 bool update_stats = !sender_state->prefetch;
1058 ThreadContext * tmp_tc = sender_state->tc;
1059
1060 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1061 virt_page_addr);
1062
1063 int req_cnt = sender_state->reqCnt.back();
1064
1065 if (update_stats) {
1066 accessCycles -= (curTick() * req_cnt);
1067 localCycles -= curTick();
1068 updatePageFootprint(virt_page_addr);
1069 globalNumTLBAccesses += req_cnt;
1070 }
1071
1072 tlbOutcome lookup_outcome = TLB_MISS;
1073 RequestPtr tmp_req = pkt->req;
1074
1075 // Access the TLB and figure out if it's a hit or a miss.
1076 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1077
1078 if (success) {
1079 lookup_outcome = TLB_HIT;
1080 // Put the entry in SenderState
1081 GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1082 assert(entry);
1083
1084 sender_state->tlbEntry =
1085 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1086
1087 if (update_stats) {
1088 // the reqCnt has an entry per level, so its size tells us
1089 // which level we are in
1090 sender_state->hitLevel = sender_state->reqCnt.size();
1091 globalNumTLBHits += req_cnt;
1092 }
1093 } else {
1094 if (update_stats)
1095 globalNumTLBMisses += req_cnt;
1096 }
1097
1098 /*
1099 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1100 * as the TLB access latency.
1101 *
1102 * We create and schedule a new TLBEvent which will help us take the
1103 * appropriate actions (e.g., update TLB on a hit, send request to lower
1104 * level TLB on a miss, or start a page walk if this was the last-level
1105 * TLB)
1106 */
1107 TLBEvent *tlb_event =
1108 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1109
1110 if (translationReturnEvent.count(virt_page_addr)) {
1111 panic("Virtual Page Address %#x already has a return event\n",
1112 virt_page_addr);
1113 }
1114
1115 translationReturnEvent[virt_page_addr] = tlb_event;
1116 assert(tlb_event);
1117
1118 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1119 curTick() + this->ticks(hitLatency));
1120
1121 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1122 }
1123
1124 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1125 PacketPtr _pkt)
1126 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1127 outcome(tlb_outcome), pkt(_pkt)
1128 {
1129 }
1130
1131 /**
1132 * Do Paging protection checks. If we encounter a page fault, then
1133 * an assertion is fired.
1134 */
1135 void
1136 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1137 GpuTlbEntry * tlb_entry, Mode mode)
1138 {
1139 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1140 uint32_t flags = pkt->req->getFlags();
1141 bool storeCheck = flags & (StoreCheck << FlagShift);
1142
1143 // Do paging protection checks.
1144 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1145 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1146
1147 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1148
1149 if ((inUser && !tlb_entry->user) ||
1150 (mode == BaseTLB::Write && badWrite)) {
1151 // The page must have been present to get into the TLB in
1152 // the first place. We'll assume the reserved bits are
1153 // fine even though we're not checking them.
1154 assert(false);
1155 }
1156
1157 if (storeCheck && badWrite) {
1158 // This would fault if this were a write, so return a page
1159 // fault that reflects that happening.
1160 assert(false);
1161 }
1162 }
1163
1164 /**
1165 * handleTranslationReturn is called on a TLB hit,
1166 * when a TLB miss returns or when a page fault returns.
1167 * The latter calls handelHit with TLB miss as tlbOutcome.
1168 */
1169 void
1170 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1171 PacketPtr pkt)
1172 {
1173
1174 assert(pkt);
1175 Addr vaddr = pkt->req->getVaddr();
1176
1177 TranslationState *sender_state =
1178 safe_cast<TranslationState*>(pkt->senderState);
1179
1180 ThreadContext *tc = sender_state->tc;
1181 Mode mode = sender_state->tlbMode;
1182
1183 GpuTlbEntry *local_entry, *new_entry;
1184
1185 if (tlb_outcome == TLB_HIT) {
1186 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1187 local_entry = sender_state->tlbEntry;
1188 } else {
1189 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1190 vaddr);
1191
1192 // We are returning either from a page walk or from a hit at a lower
1193 // TLB level. The senderState should be "carrying" a pointer to the
1194 // correct TLBEntry.
1195 new_entry = sender_state->tlbEntry;
1196 assert(new_entry);
1197 local_entry = new_entry;
1198
1199 if (allocationPolicy) {
1200 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1201 virt_page_addr);
1202
1203 local_entry = insert(virt_page_addr, *new_entry);
1204 }
1205
1206 assert(local_entry);
1207 }
1208
1209 /**
1210 * At this point the packet carries an up-to-date tlbEntry pointer
1211 * in its senderState.
1212 * Next step is to do the paging protection checks.
1213 */
1214 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1215 "while paddr was %#x.\n", local_entry->vaddr,
1216 local_entry->paddr);
1217
1218 pagingProtectionChecks(tc, pkt, local_entry, mode);
1219 int page_size = local_entry->size();
1220 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1221 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1222
1223 // Since this packet will be sent through the cpu side slave port,
1224 // it must be converted to a response pkt if it is not one already
1225 if (pkt->isRequest()) {
1226 pkt->makeTimingResponse();
1227 }
1228
1229 pkt->req->setPaddr(paddr);
1230
1231 if (local_entry->uncacheable) {
1232 pkt->req->setFlags(Request::UNCACHEABLE);
1233 }
1234
1235 //send packet back to coalescer
1236 cpuSidePort[0]->sendTimingResp(pkt);
1237 //schedule cleanup event
1238 cleanupQueue.push(virt_page_addr);
1239
1240 // schedule this only once per cycle.
1241 // The check is required because we might have multiple translations
1242 // returning the same cycle
1243 // this is a maximum priority event and must be on the same cycle
1244 // as the cleanup event in TLBCoalescer to avoid a race with
1245 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1246 if (!cleanupEvent.scheduled())
1247 schedule(cleanupEvent, curTick());
1248 }
1249
1250 /**
1251 * Here we take the appropriate actions based on the result of the
1252 * TLB lookup.
1253 */
1254 void
1255 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1256 PacketPtr pkt)
1257 {
1258 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1259
1260 assert(translationReturnEvent[virtPageAddr]);
1261 assert(pkt);
1262
1263 TranslationState *tmp_sender_state =
1264 safe_cast<TranslationState*>(pkt->senderState);
1265
1266 int req_cnt = tmp_sender_state->reqCnt.back();
1267 bool update_stats = !tmp_sender_state->prefetch;
1268
1269
1270 if (outcome == TLB_HIT) {
1271 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1272
1273 if (update_stats) {
1274 accessCycles += (req_cnt * curTick());
1275 localCycles += curTick();
1276 }
1277
1278 } else if (outcome == TLB_MISS) {
1279
1280 DPRINTF(GPUTLB, "This is a TLB miss\n");
1281 if (update_stats) {
1282 accessCycles += (req_cnt*curTick());
1283 localCycles += curTick();
1284 }
1285
1286 if (hasMemSidePort) {
1287 // the one cyle added here represent the delay from when we get
1288 // the reply back till when we propagate it to the coalescer
1289 // above.
1290 if (update_stats) {
1291 accessCycles += (req_cnt * 1);
1292 localCycles += 1;
1293 }
1294
1295 /**
1296 * There is a TLB below. Send the coalesced request.
1297 * We actually send the very first packet of all the
1298 * pending packets for this virtual page address.
1299 */
1300 if (!memSidePort[0]->sendTimingReq(pkt)) {
1301 DPRINTF(GPUTLB, "Failed sending translation request to "
1302 "lower level TLB for addr %#x\n", virtPageAddr);
1303
1304 memSidePort[0]->retries.push_back(pkt);
1305 } else {
1306 DPRINTF(GPUTLB, "Sent translation request to lower level "
1307 "TLB for addr %#x\n", virtPageAddr);
1308 }
1309 } else {
1310 //this is the last level TLB. Start a page walk
1311 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1312 "addr %#x\n", virtPageAddr);
1313
1314 if (update_stats)
1315 pageTableCycles -= (req_cnt*curTick());
1316
1317 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1318 assert(tlb_event);
1319 tlb_event->updateOutcome(PAGE_WALK);
1320 schedule(tlb_event, curTick() + ticks(missLatency2));
1321 }
1322 } else if (outcome == PAGE_WALK) {
1323 if (update_stats)
1324 pageTableCycles += (req_cnt*curTick());
1325
1326 // Need to access the page table and update the TLB
1327 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1328 virtPageAddr);
1329
1330 TranslationState *sender_state =
1331 safe_cast<TranslationState*>(pkt->senderState);
1332
1333 Process *p = sender_state->tc->getProcessPtr();
1334 Addr vaddr = pkt->req->getVaddr();
1335 #ifndef NDEBUG
1336 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1337 assert(alignedVaddr == virtPageAddr);
1338 #endif
1339 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1340 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1341 p->fixupStackFault(vaddr)) {
1342 pte = p->pTable->lookup(vaddr);
1343 }
1344
1345 if (pte) {
1346 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1347 pte->paddr);
1348
1349 sender_state->tlbEntry =
1350 new GpuTlbEntry(0, virtPageAddr, pte->paddr, true);
1351 } else {
1352 sender_state->tlbEntry =
1353 new GpuTlbEntry(0, 0, 0, false);
1354 }
1355
1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357 } else if (outcome == MISS_RETURN) {
1358 /** we add an extra cycle in the return path of the translation
1359 * requests in between the various TLB levels.
1360 */
1361 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1362 } else {
1363 assert(false);
1364 }
1365 }
1366
1367 void
1368 GpuTLB::TLBEvent::process()
1369 {
1370 tlb->translationReturn(virtPageAddr, outcome, pkt);
1371 }
1372
1373 const char*
1374 GpuTLB::TLBEvent::description() const
1375 {
1376 return "trigger translationDoneEvent";
1377 }
1378
1379 void
1380 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1381 {
1382 outcome = _outcome;
1383 }
1384
1385 Addr
1386 GpuTLB::TLBEvent::getTLBEventVaddr()
1387 {
1388 return virtPageAddr;
1389 }
1390
1391 /*
1392 * recvTiming receives a coalesced timing request from a TLBCoalescer
1393 * and it calls issueTLBLookup()
1394 * It only rejects the packet if we have exceeded the max
1395 * outstanding number of requests for the TLB
1396 */
1397 bool
1398 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1399 {
1400 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1401 tlb->issueTLBLookup(pkt);
1402 // update number of outstanding translation requests
1403 tlb->outstandingReqs++;
1404 return true;
1405 } else {
1406 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1407 tlb->outstandingReqs);
1408 return false;
1409 }
1410 }
1411
1412 /**
1413 * handleFuncTranslationReturn is called on a TLB hit,
1414 * when a TLB miss returns or when a page fault returns.
1415 * It updates LRU, inserts the TLB entry on a miss
1416 * depending on the allocation policy and does the required
1417 * protection checks. It does NOT create a new packet to
1418 * update the packet's addr; this is done in hsail-gpu code.
1419 */
1420 void
1421 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1422 {
1423 TranslationState *sender_state =
1424 safe_cast<TranslationState*>(pkt->senderState);
1425
1426 ThreadContext *tc = sender_state->tc;
1427 Mode mode = sender_state->tlbMode;
1428 Addr vaddr = pkt->req->getVaddr();
1429
1430 GpuTlbEntry *local_entry, *new_entry;
1431
1432 if (tlb_outcome == TLB_HIT) {
1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1434 "%#x\n", vaddr);
1435
1436 local_entry = sender_state->tlbEntry;
1437 } else {
1438 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1439 "%#x\n", vaddr);
1440
1441 // We are returning either from a page walk or from a hit at a lower
1442 // TLB level. The senderState should be "carrying" a pointer to the
1443 // correct TLBEntry.
1444 new_entry = sender_state->tlbEntry;
1445 assert(new_entry);
1446 local_entry = new_entry;
1447
1448 if (allocationPolicy) {
1449 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1450
1451 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1452 virt_page_addr);
1453
1454 local_entry = insert(virt_page_addr, *new_entry);
1455 }
1456
1457 assert(local_entry);
1458 }
1459
1460 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1461 "while paddr was %#x.\n", local_entry->vaddr,
1462 local_entry->paddr);
1463
1464 // Do paging checks if it's a normal functional access. If it's for a
1465 // prefetch, then sometimes you can try to prefetch something that won't
1466 // pass protection. We don't actually want to fault becuase there is no
1467 // demand access to deem this a violation. Just put it in the TLB and
1468 // it will fault if indeed a future demand access touches it in
1469 // violation.
1470 if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1471 pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473 int page_size = local_entry->size();
1474 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477 pkt->req->setPaddr(paddr);
1478
1479 if (local_entry->uncacheable)
1480 pkt->req->setFlags(Request::UNCACHEABLE);
1481 }
1482
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1485 void
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487 {
1488 TranslationState *sender_state =
1489 safe_cast<TranslationState*>(pkt->senderState);
1490
1491 ThreadContext *tc = sender_state->tc;
1492 bool update_stats = !sender_state->prefetch;
1493
1494 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495 TheISA::PageBytes);
1496
1497 if (update_stats)
1498 tlb->updatePageFootprint(virt_page_addr);
1499
1500 // do the TLB lookup without updating the stats
1501 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1506 if (update_stats) {
1507 tlb->globalNumTLBAccesses++;
1508
1509 if (success) {
1510 sender_state->hitLevel = sender_state->reqCnt.size();
1511 tlb->globalNumTLBHits++;
1512 }
1513 }
1514
1515 if (!success) {
1516 if (update_stats)
1517 tlb->globalNumTLBMisses++;
1518 if (tlb->hasMemSidePort) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb->memSidePort[0]->sendFunctional(pkt);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state->prefetch && !pkt->req->hasPaddr())
1523 return;
1524 } else {
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527 virt_page_addr);
1528
1529 Process *p = tc->getProcessPtr();
1530
1531 Addr vaddr = pkt->req->getVaddr();
1532 #ifndef NDEBUG
1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534 assert(alignedVaddr == virt_page_addr);
1535 #endif
1536
1537 const EmulationPageTable::Entry *pte =
1538 p->pTable->lookup(vaddr);
1539 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540 p->fixupStackFault(vaddr)) {
1541 pte = p->pTable->lookup(vaddr);
1542 }
1543
1544 if (!sender_state->prefetch) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62 GpuTLB::GpuTLB(const Params *p)
63 : MemObject(p), configAddress(0), size(p->size),
64 cleanupEvent([this]{ cleanup(); }, name(), false,
65 Event::Maximum_Pri),
66 exitEvent([this]{ exitCallback(); }, name())
67 {
68 assoc = p->assoc;
69 assert(assoc <= size);
70 numSets = size/assoc;
71 allocationPolicy = p->allocationPolicy;
72 hasMemSidePort = false;
73 accessDistance = p->accessDistance;
74 clock = p->clk_domain->clockPeriod();
75
76 tlb.assign(size, GpuTlbEntry());
77
78 freeList.resize(numSets);
79 entryList.resize(numSets);
80
81 for (int set = 0; set < numSets; ++set) {
82 for (int way = 0; way < assoc; ++way) {
83 int x = set * assoc + way;
84 freeList[set].push_back(&tlb.at(x));
85 }
86 }
87
88 FA = (size == assoc);
89
90 /**
91 * @warning: the set-associative version assumes you have a
92 * fixed page size of 4KB.
93 * If the page size is greather than 4KB (as defined in the
94 * TheISA::PageBytes), then there are various issues w/ the current
95 * implementation (you'd have the same 8KB page being replicated in
96 * different sets etc)
97 */
98 setMask = numSets - 1;
99
100 #if 0
101 // GpuTLB doesn't yet support full system
102 walker = p->walker;
103 walker->setTLB(this);
104 #endif
105
106 maxCoalescedReqs = p->maxOutstandingReqs;
107
108 // Do not allow maxCoalescedReqs to be more than the TLB associativity
109 if (maxCoalescedReqs > assoc) {
110 maxCoalescedReqs = assoc;
111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
112 }
113
114 outstandingReqs = 0;
115 hitLatency = p->hitLatency;
116 missLatency1 = p->missLatency1;
117 missLatency2 = p->missLatency2;
118
119 // create the slave ports based on the number of connected ports
120 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
121 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
122 name(), i), this, i));
123 }
124
125 // create the master ports based on the number of connected ports
126 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
127 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
128 name(), i), this, i));
129 }
130 }
131
132 // fixme: this is never called?
133 GpuTLB::~GpuTLB()
134 {
135 // make sure all the hash-maps are empty
136 assert(translationReturnEvent.empty());
137 }
138
139 BaseSlavePort&
140 GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
141 {
142 if (if_name == "slave") {
143 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
145 }
146
147 return *cpuSidePort[idx];
148 } else {
149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
150 }
151 }
152
153 BaseMasterPort&
154 GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
155 {
156 if (if_name == "master") {
157 if (idx >= static_cast<PortID>(memSidePort.size())) {
158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
159 }
160
161 hasMemSidePort = true;
162
163 return *memSidePort[idx];
164 } else {
165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
166 }
167 }
168
169 GpuTlbEntry*
170 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
171 {
172 GpuTlbEntry *newEntry = nullptr;
173
174 /**
175 * vpn holds the virtual page address
176 * The least significant bits are simply masked
177 */
178 int set = (vpn >> TheISA::PageShift) & setMask;
179
180 if (!freeList[set].empty()) {
181 newEntry = freeList[set].front();
182 freeList[set].pop_front();
183 } else {
184 newEntry = entryList[set].back();
185 entryList[set].pop_back();
186 }
187
188 *newEntry = entry;
189 newEntry->vaddr = vpn;
190 entryList[set].push_front(newEntry);
191
192 return newEntry;
193 }
194
195 GpuTLB::EntryList::iterator
196 GpuTLB::lookupIt(Addr va, bool update_lru)
197 {
198 int set = (va >> TheISA::PageShift) & setMask;
199
200 if (FA) {
201 assert(!set);
202 }
203
204 auto entry = entryList[set].begin();
205 for (; entry != entryList[set].end(); ++entry) {
206 int page_size = (*entry)->size();
207
208 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
209 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
210 "with size %#x.\n", va, (*entry)->vaddr, page_size);
211
212 if (update_lru) {
213 entryList[set].push_front(*entry);
214 entryList[set].erase(entry);
215 entry = entryList[set].begin();
216 }
217
218 break;
219 }
220 }
221
222 return entry;
223 }
224
225 GpuTlbEntry*
226 GpuTLB::lookup(Addr va, bool update_lru)
227 {
228 int set = (va >> TheISA::PageShift) & setMask;
229
230 auto entry = lookupIt(va, update_lru);
231
232 if (entry == entryList[set].end())
233 return nullptr;
234 else
235 return *entry;
236 }
237
238 void
239 GpuTLB::invalidateAll()
240 {
241 DPRINTF(GPUTLB, "Invalidating all entries.\n");
242
243 for (int i = 0; i < numSets; ++i) {
244 while (!entryList[i].empty()) {
245 GpuTlbEntry *entry = entryList[i].front();
246 entryList[i].pop_front();
247 freeList[i].push_back(entry);
248 }
249 }
250 }
251
252 void
253 GpuTLB::setConfigAddress(uint32_t addr)
254 {
255 configAddress = addr;
256 }
257
258 void
259 GpuTLB::invalidateNonGlobal()
260 {
261 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
262
263 for (int i = 0; i < numSets; ++i) {
264 for (auto entryIt = entryList[i].begin();
265 entryIt != entryList[i].end();) {
266 if (!(*entryIt)->global) {
267 freeList[i].push_back(*entryIt);
268 entryList[i].erase(entryIt++);
269 } else {
270 ++entryIt;
271 }
272 }
273 }
274 }
275
276 void
277 GpuTLB::demapPage(Addr va, uint64_t asn)
278 {
279
280 int set = (va >> TheISA::PageShift) & setMask;
281 auto entry = lookupIt(va, false);
282
283 if (entry != entryList[set].end()) {
284 freeList[set].push_back(*entry);
285 entryList[set].erase(entry);
286 }
287 }
288
289 Fault
290 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
291 {
292 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
293 Addr vaddr = req->getVaddr();
294 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
295
296 if (prefix == IntAddrPrefixCPUID) {
297 panic("CPUID memory space not yet implemented!\n");
298 } else if (prefix == IntAddrPrefixMSR) {
299 vaddr = vaddr >> 3;
300 req->setFlags(Request::MMAPPED_IPR);
301 Addr regNum = 0;
302
303 switch (vaddr & ~IntAddrPrefixMask) {
304 case 0x10:
305 regNum = MISCREG_TSC;
306 break;
307 case 0x1B:
308 regNum = MISCREG_APIC_BASE;
309 break;
310 case 0xFE:
311 regNum = MISCREG_MTRRCAP;
312 break;
313 case 0x174:
314 regNum = MISCREG_SYSENTER_CS;
315 break;
316 case 0x175:
317 regNum = MISCREG_SYSENTER_ESP;
318 break;
319 case 0x176:
320 regNum = MISCREG_SYSENTER_EIP;
321 break;
322 case 0x179:
323 regNum = MISCREG_MCG_CAP;
324 break;
325 case 0x17A:
326 regNum = MISCREG_MCG_STATUS;
327 break;
328 case 0x17B:
329 regNum = MISCREG_MCG_CTL;
330 break;
331 case 0x1D9:
332 regNum = MISCREG_DEBUG_CTL_MSR;
333 break;
334 case 0x1DB:
335 regNum = MISCREG_LAST_BRANCH_FROM_IP;
336 break;
337 case 0x1DC:
338 regNum = MISCREG_LAST_BRANCH_TO_IP;
339 break;
340 case 0x1DD:
341 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
342 break;
343 case 0x1DE:
344 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
345 break;
346 case 0x200:
347 regNum = MISCREG_MTRR_PHYS_BASE_0;
348 break;
349 case 0x201:
350 regNum = MISCREG_MTRR_PHYS_MASK_0;
351 break;
352 case 0x202:
353 regNum = MISCREG_MTRR_PHYS_BASE_1;
354 break;
355 case 0x203:
356 regNum = MISCREG_MTRR_PHYS_MASK_1;
357 break;
358 case 0x204:
359 regNum = MISCREG_MTRR_PHYS_BASE_2;
360 break;
361 case 0x205:
362 regNum = MISCREG_MTRR_PHYS_MASK_2;
363 break;
364 case 0x206:
365 regNum = MISCREG_MTRR_PHYS_BASE_3;
366 break;
367 case 0x207:
368 regNum = MISCREG_MTRR_PHYS_MASK_3;
369 break;
370 case 0x208:
371 regNum = MISCREG_MTRR_PHYS_BASE_4;
372 break;
373 case 0x209:
374 regNum = MISCREG_MTRR_PHYS_MASK_4;
375 break;
376 case 0x20A:
377 regNum = MISCREG_MTRR_PHYS_BASE_5;
378 break;
379 case 0x20B:
380 regNum = MISCREG_MTRR_PHYS_MASK_5;
381 break;
382 case 0x20C:
383 regNum = MISCREG_MTRR_PHYS_BASE_6;
384 break;
385 case 0x20D:
386 regNum = MISCREG_MTRR_PHYS_MASK_6;
387 break;
388 case 0x20E:
389 regNum = MISCREG_MTRR_PHYS_BASE_7;
390 break;
391 case 0x20F:
392 regNum = MISCREG_MTRR_PHYS_MASK_7;
393 break;
394 case 0x250:
395 regNum = MISCREG_MTRR_FIX_64K_00000;
396 break;
397 case 0x258:
398 regNum = MISCREG_MTRR_FIX_16K_80000;
399 break;
400 case 0x259:
401 regNum = MISCREG_MTRR_FIX_16K_A0000;
402 break;
403 case 0x268:
404 regNum = MISCREG_MTRR_FIX_4K_C0000;
405 break;
406 case 0x269:
407 regNum = MISCREG_MTRR_FIX_4K_C8000;
408 break;
409 case 0x26A:
410 regNum = MISCREG_MTRR_FIX_4K_D0000;
411 break;
412 case 0x26B:
413 regNum = MISCREG_MTRR_FIX_4K_D8000;
414 break;
415 case 0x26C:
416 regNum = MISCREG_MTRR_FIX_4K_E0000;
417 break;
418 case 0x26D:
419 regNum = MISCREG_MTRR_FIX_4K_E8000;
420 break;
421 case 0x26E:
422 regNum = MISCREG_MTRR_FIX_4K_F0000;
423 break;
424 case 0x26F:
425 regNum = MISCREG_MTRR_FIX_4K_F8000;
426 break;
427 case 0x277:
428 regNum = MISCREG_PAT;
429 break;
430 case 0x2FF:
431 regNum = MISCREG_DEF_TYPE;
432 break;
433 case 0x400:
434 regNum = MISCREG_MC0_CTL;
435 break;
436 case 0x404:
437 regNum = MISCREG_MC1_CTL;
438 break;
439 case 0x408:
440 regNum = MISCREG_MC2_CTL;
441 break;
442 case 0x40C:
443 regNum = MISCREG_MC3_CTL;
444 break;
445 case 0x410:
446 regNum = MISCREG_MC4_CTL;
447 break;
448 case 0x414:
449 regNum = MISCREG_MC5_CTL;
450 break;
451 case 0x418:
452 regNum = MISCREG_MC6_CTL;
453 break;
454 case 0x41C:
455 regNum = MISCREG_MC7_CTL;
456 break;
457 case 0x401:
458 regNum = MISCREG_MC0_STATUS;
459 break;
460 case 0x405:
461 regNum = MISCREG_MC1_STATUS;
462 break;
463 case 0x409:
464 regNum = MISCREG_MC2_STATUS;
465 break;
466 case 0x40D:
467 regNum = MISCREG_MC3_STATUS;
468 break;
469 case 0x411:
470 regNum = MISCREG_MC4_STATUS;
471 break;
472 case 0x415:
473 regNum = MISCREG_MC5_STATUS;
474 break;
475 case 0x419:
476 regNum = MISCREG_MC6_STATUS;
477 break;
478 case 0x41D:
479 regNum = MISCREG_MC7_STATUS;
480 break;
481 case 0x402:
482 regNum = MISCREG_MC0_ADDR;
483 break;
484 case 0x406:
485 regNum = MISCREG_MC1_ADDR;
486 break;
487 case 0x40A:
488 regNum = MISCREG_MC2_ADDR;
489 break;
490 case 0x40E:
491 regNum = MISCREG_MC3_ADDR;
492 break;
493 case 0x412:
494 regNum = MISCREG_MC4_ADDR;
495 break;
496 case 0x416:
497 regNum = MISCREG_MC5_ADDR;
498 break;
499 case 0x41A:
500 regNum = MISCREG_MC6_ADDR;
501 break;
502 case 0x41E:
503 regNum = MISCREG_MC7_ADDR;
504 break;
505 case 0x403:
506 regNum = MISCREG_MC0_MISC;
507 break;
508 case 0x407:
509 regNum = MISCREG_MC1_MISC;
510 break;
511 case 0x40B:
512 regNum = MISCREG_MC2_MISC;
513 break;
514 case 0x40F:
515 regNum = MISCREG_MC3_MISC;
516 break;
517 case 0x413:
518 regNum = MISCREG_MC4_MISC;
519 break;
520 case 0x417:
521 regNum = MISCREG_MC5_MISC;
522 break;
523 case 0x41B:
524 regNum = MISCREG_MC6_MISC;
525 break;
526 case 0x41F:
527 regNum = MISCREG_MC7_MISC;
528 break;
529 case 0xC0000080:
530 regNum = MISCREG_EFER;
531 break;
532 case 0xC0000081:
533 regNum = MISCREG_STAR;
534 break;
535 case 0xC0000082:
536 regNum = MISCREG_LSTAR;
537 break;
538 case 0xC0000083:
539 regNum = MISCREG_CSTAR;
540 break;
541 case 0xC0000084:
542 regNum = MISCREG_SF_MASK;
543 break;
544 case 0xC0000100:
545 regNum = MISCREG_FS_BASE;
546 break;
547 case 0xC0000101:
548 regNum = MISCREG_GS_BASE;
549 break;
550 case 0xC0000102:
551 regNum = MISCREG_KERNEL_GS_BASE;
552 break;
553 case 0xC0000103:
554 regNum = MISCREG_TSC_AUX;
555 break;
556 case 0xC0010000:
557 regNum = MISCREG_PERF_EVT_SEL0;
558 break;
559 case 0xC0010001:
560 regNum = MISCREG_PERF_EVT_SEL1;
561 break;
562 case 0xC0010002:
563 regNum = MISCREG_PERF_EVT_SEL2;
564 break;
565 case 0xC0010003:
566 regNum = MISCREG_PERF_EVT_SEL3;
567 break;
568 case 0xC0010004:
569 regNum = MISCREG_PERF_EVT_CTR0;
570 break;
571 case 0xC0010005:
572 regNum = MISCREG_PERF_EVT_CTR1;
573 break;
574 case 0xC0010006:
575 regNum = MISCREG_PERF_EVT_CTR2;
576 break;
577 case 0xC0010007:
578 regNum = MISCREG_PERF_EVT_CTR3;
579 break;
580 case 0xC0010010:
581 regNum = MISCREG_SYSCFG;
582 break;
583 case 0xC0010016:
584 regNum = MISCREG_IORR_BASE0;
585 break;
586 case 0xC0010017:
587 regNum = MISCREG_IORR_BASE1;
588 break;
589 case 0xC0010018:
590 regNum = MISCREG_IORR_MASK0;
591 break;
592 case 0xC0010019:
593 regNum = MISCREG_IORR_MASK1;
594 break;
595 case 0xC001001A:
596 regNum = MISCREG_TOP_MEM;
597 break;
598 case 0xC001001D:
599 regNum = MISCREG_TOP_MEM2;
600 break;
601 case 0xC0010114:
602 regNum = MISCREG_VM_CR;
603 break;
604 case 0xC0010115:
605 regNum = MISCREG_IGNNE;
606 break;
607 case 0xC0010116:
608 regNum = MISCREG_SMM_CTL;
609 break;
610 case 0xC0010117:
611 regNum = MISCREG_VM_HSAVE_PA;
612 break;
613 default:
614 return std::make_shared<GeneralProtection>(0);
615 }
616 //The index is multiplied by the size of a MiscReg so that
617 //any memory dependence calculations will not see these as
618 //overlapping.
619 req->setPaddr(regNum * sizeof(MiscReg));
620 return NoFault;
621 } else if (prefix == IntAddrPrefixIO) {
622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623 // bitmap in the TSS.
624
625 Addr IOPort = vaddr & ~IntAddrPrefixMask;
626 // Make sure the address fits in the expected 16 bit IO address
627 // space.
628 assert(!(IOPort & ~0xFFFF));
629
630 if (IOPort == 0xCF8 && req->getSize() == 4) {
631 req->setFlags(Request::MMAPPED_IPR);
632 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
633 } else if ((IOPort & ~mask(2)) == 0xCFC) {
634 req->setFlags(Request::UNCACHEABLE);
635
636 Addr configAddress =
637 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
638
639 if (bits(configAddress, 31, 31)) {
640 req->setPaddr(PhysAddrPrefixPciConfig |
641 mbits(configAddress, 30, 2) |
642 (IOPort & mask(2)));
643 } else {
644 req->setPaddr(PhysAddrPrefixIO | IOPort);
645 }
646 } else {
647 req->setFlags(Request::UNCACHEABLE);
648 req->setPaddr(PhysAddrPrefixIO | IOPort);
649 }
650 return NoFault;
651 } else {
652 panic("Access to unrecognized internal address space %#x.\n",
653 prefix);
654 }
655 }
656
657 /**
658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659 * and false on a TLB miss.
660 * Many of the checks about different modes have been converted to
661 * assertions, since these parts of the code are not really used.
662 * On a hit it will update the LRU stack.
663 */
664 bool
665 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
666 {
667 bool tlb_hit = false;
668 #ifndef NDEBUG
669 uint32_t flags = req->getFlags();
670 int seg = flags & SegmentFlagMask;
671 #endif
672
673 assert(seg != SEGMENT_REG_MS);
674 Addr vaddr = req->getVaddr();
675 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
676 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
677
678 if (m5Reg.prot) {
679 DPRINTF(GPUTLB, "In protected mode.\n");
680 // make sure we are in 64-bit mode
681 assert(m5Reg.mode == LongMode);
682
683 // If paging is enabled, do the translation.
684 if (m5Reg.paging) {
685 DPRINTF(GPUTLB, "Paging enabled.\n");
686 //update LRU stack on a hit
687 GpuTlbEntry *entry = lookup(vaddr, true);
688
689 if (entry)
690 tlb_hit = true;
691
692 if (!update_stats) {
693 // functional tlb access for memory initialization
694 // i.e., memory seeding or instr. seeding -> don't update
695 // TLB and stats
696 return tlb_hit;
697 }
698
699 localNumTLBAccesses++;
700
701 if (!entry) {
702 localNumTLBMisses++;
703 } else {
704 localNumTLBHits++;
705 }
706 }
707 }
708
709 return tlb_hit;
710 }
711
712 Fault
713 GpuTLB::translate(RequestPtr req, ThreadContext *tc,
714 Translation *translation, Mode mode,
715 bool &delayedResponse, bool timing, int &latency)
716 {
717 uint32_t flags = req->getFlags();
718 int seg = flags & SegmentFlagMask;
719 bool storeCheck = flags & (StoreCheck << FlagShift);
720
721 // If this is true, we're dealing with a request
722 // to a non-memory address space.
723 if (seg == SEGMENT_REG_MS) {
724 return translateInt(req, tc);
725 }
726
727 delayedResponse = false;
728 Addr vaddr = req->getVaddr();
729 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
730
731 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
732
733 // If protected mode has been enabled...
734 if (m5Reg.prot) {
735 DPRINTF(GPUTLB, "In protected mode.\n");
736 // If we're not in 64-bit mode, do protection/limit checks
737 if (m5Reg.mode != LongMode) {
738 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
739 "protection.\n");
740
741 // Check for a null segment selector.
742 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
743 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
744 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
745 return std::make_shared<GeneralProtection>(0);
746 }
747
748 bool expandDown = false;
749 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
750
751 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
752 if (!attr.writable && (mode == BaseTLB::Write ||
753 storeCheck))
754 return std::make_shared<GeneralProtection>(0);
755
756 if (!attr.readable && mode == BaseTLB::Read)
757 return std::make_shared<GeneralProtection>(0);
758
759 expandDown = attr.expandDown;
760
761 }
762
763 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
764 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
765 // This assumes we're not in 64 bit mode. If we were, the
766 // default address size is 64 bits, overridable to 32.
767 int size = 32;
768 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
769 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
770
771 if ((csAttr.defaultSize && sizeOverride) ||
772 (!csAttr.defaultSize && !sizeOverride)) {
773 size = 16;
774 }
775
776 Addr offset = bits(vaddr - base, size - 1, 0);
777 Addr endOffset = offset + req->getSize() - 1;
778
779 if (expandDown) {
780 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
781 warn_once("Expand down segments are untested.\n");
782
783 if (offset <= limit || endOffset <= limit)
784 return std::make_shared<GeneralProtection>(0);
785 } else {
786 if (offset > limit || endOffset > limit)
787 return std::make_shared<GeneralProtection>(0);
788 }
789 }
790
791 // If paging is enabled, do the translation.
792 if (m5Reg.paging) {
793 DPRINTF(GPUTLB, "Paging enabled.\n");
794 // The vaddr already has the segment base applied.
795 GpuTlbEntry *entry = lookup(vaddr);
796 localNumTLBAccesses++;
797
798 if (!entry) {
799 localNumTLBMisses++;
800 if (timing) {
801 latency = missLatency1;
802 }
803
804 if (FullSystem) {
805 fatal("GpuTLB doesn't support full-system mode\n");
806 } else {
807 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
808 "at pc %#x.\n", vaddr, tc->instAddr());
809
810 Process *p = tc->getProcessPtr();
811 const EmulationPageTable::Entry *pte =
812 p->pTable->lookup(vaddr);
813
814 if (!pte && mode != BaseTLB::Execute) {
815 // penalize a "page fault" more
816 if (timing)
817 latency += missLatency2;
818
819 if (p->fixupStackFault(vaddr))
820 pte = p->pTable->lookup(vaddr);
821 }
822
823 if (!pte) {
824 return std::make_shared<PageFault>(vaddr, true,
825 mode, true,
826 false);
827 } else {
828 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
829
830 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
831 alignedVaddr, pte->paddr);
832
833 GpuTlbEntry gpuEntry(
834 p->pTable->pid(), alignedVaddr,
835 pte->paddr, true);
836 entry = insert(alignedVaddr, gpuEntry);
837 }
838
839 DPRINTF(GPUTLB, "Miss was serviced.\n");
840 }
841 } else {
842 localNumTLBHits++;
843
844 if (timing) {
845 latency = hitLatency;
846 }
847 }
848
849 // Do paging protection checks.
850 bool inUser = (m5Reg.cpl == 3 &&
851 !(flags & (CPL0FlagBit << FlagShift)));
852
853 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854 bool badWrite = (!entry->writable && (inUser || cr0.wp));
855
856 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
857 badWrite)) {
858 // The page must have been present to get into the TLB in
859 // the first place. We'll assume the reserved bits are
860 // fine even though we're not checking them.
861 return std::make_shared<PageFault>(vaddr, true, mode,
862 inUser, false);
863 }
864
865 if (storeCheck && badWrite) {
866 // This would fault if this were a write, so return a page
867 // fault that reflects that happening.
868 return std::make_shared<PageFault>(vaddr, true,
869 BaseTLB::Write,
870 inUser, false);
871 }
872
873
874 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
875 "checks.\n", entry->paddr);
876
877 int page_size = entry->size();
878 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
879 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
880 req->setPaddr(paddr);
881
882 if (entry->uncacheable)
883 req->setFlags(Request::UNCACHEABLE);
884 } else {
885 //Use the address which already has segmentation applied.
886 DPRINTF(GPUTLB, "Paging disabled.\n");
887 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
888 req->setPaddr(vaddr);
889 }
890 } else {
891 // Real mode
892 DPRINTF(GPUTLB, "In real mode.\n");
893 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
894 req->setPaddr(vaddr);
895 }
896
897 // Check for an access to the local APIC
898 if (FullSystem) {
899 LocalApicBase localApicBase =
900 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
901
902 Addr baseAddr = localApicBase.base * PageBytes;
903 Addr paddr = req->getPaddr();
904
905 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
906 // Force the access to be uncacheable.
907 req->setFlags(Request::UNCACHEABLE);
908 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
909 paddr - baseAddr));
910 }
911 }
912
913 return NoFault;
914 };
915
916 Fault
917 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
918 int &latency)
919 {
920 bool delayedResponse;
921
922 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
923 latency);
924 }
925
926 void
927 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
928 Translation *translation, Mode mode, int &latency)
929 {
930 bool delayedResponse;
931 assert(translation);
932
933 Fault fault = GpuTLB::translate(req, tc, translation, mode,
934 delayedResponse, true, latency);
935
936 if (!delayedResponse)
937 translation->finish(fault, req, tc, mode);
938 }
939
940 Walker*
941 GpuTLB::getWalker()
942 {
943 return walker;
944 }
945
946
947 void
948 GpuTLB::serialize(CheckpointOut &cp) const
949 {
950 }
951
952 void
953 GpuTLB::unserialize(CheckpointIn &cp)
954 {
955 }
956
957 void
958 GpuTLB::regStats()
959 {
960 MemObject::regStats();
961
962 localNumTLBAccesses
963 .name(name() + ".local_TLB_accesses")
964 .desc("Number of TLB accesses")
965 ;
966
967 localNumTLBHits
968 .name(name() + ".local_TLB_hits")
969 .desc("Number of TLB hits")
970 ;
971
972 localNumTLBMisses
973 .name(name() + ".local_TLB_misses")
974 .desc("Number of TLB misses")
975 ;
976
977 localTLBMissRate
978 .name(name() + ".local_TLB_miss_rate")
979 .desc("TLB miss rate")
980 ;
981
982 accessCycles
983 .name(name() + ".access_cycles")
984 .desc("Cycles spent accessing this TLB level")
985 ;
986
987 pageTableCycles
988 .name(name() + ".page_table_cycles")
989 .desc("Cycles spent accessing the page table")
990 ;
991
992 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
993
994 numUniquePages
995 .name(name() + ".unique_pages")
996 .desc("Number of unique pages touched")
997 ;
998
999 localCycles
1000 .name(name() + ".local_cycles")
1001 .desc("Number of cycles spent in queue for all incoming reqs")
1002 ;
1003
1004 localLatency
1005 .name(name() + ".local_latency")
1006 .desc("Avg. latency over incoming coalesced reqs")
1007 ;
1008
1009 localLatency = localCycles / localNumTLBAccesses;
1010
1011 globalNumTLBAccesses
1012 .name(name() + ".global_TLB_accesses")
1013 .desc("Number of TLB accesses")
1014 ;
1015
1016 globalNumTLBHits
1017 .name(name() + ".global_TLB_hits")
1018 .desc("Number of TLB hits")
1019 ;
1020
1021 globalNumTLBMisses
1022 .name(name() + ".global_TLB_misses")
1023 .desc("Number of TLB misses")
1024 ;
1025
1026 globalTLBMissRate
1027 .name(name() + ".global_TLB_miss_rate")
1028 .desc("TLB miss rate")
1029 ;
1030
1031 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1032
1033 avgReuseDistance
1034 .name(name() + ".avg_reuse_distance")
1035 .desc("avg. reuse distance over all pages (in ticks)")
1036 ;
1037
1038 }
1039
1040 /**
1041 * Do the TLB lookup for this coalesced request and schedule
1042 * another event <TLB access latency> cycles later.
1043 */
1044
1045 void
1046 GpuTLB::issueTLBLookup(PacketPtr pkt)
1047 {
1048 assert(pkt);
1049 assert(pkt->senderState);
1050
1051 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1052 TheISA::PageBytes);
1053
1054 TranslationState *sender_state =
1055 safe_cast<TranslationState*>(pkt->senderState);
1056
1057 bool update_stats = !sender_state->prefetch;
1058 ThreadContext * tmp_tc = sender_state->tc;
1059
1060 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1061 virt_page_addr);
1062
1063 int req_cnt = sender_state->reqCnt.back();
1064
1065 if (update_stats) {
1066 accessCycles -= (curTick() * req_cnt);
1067 localCycles -= curTick();
1068 updatePageFootprint(virt_page_addr);
1069 globalNumTLBAccesses += req_cnt;
1070 }
1071
1072 tlbOutcome lookup_outcome = TLB_MISS;
1073 RequestPtr tmp_req = pkt->req;
1074
1075 // Access the TLB and figure out if it's a hit or a miss.
1076 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1077
1078 if (success) {
1079 lookup_outcome = TLB_HIT;
1080 // Put the entry in SenderState
1081 GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1082 assert(entry);
1083
1084 sender_state->tlbEntry =
1085 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1086
1087 if (update_stats) {
1088 // the reqCnt has an entry per level, so its size tells us
1089 // which level we are in
1090 sender_state->hitLevel = sender_state->reqCnt.size();
1091 globalNumTLBHits += req_cnt;
1092 }
1093 } else {
1094 if (update_stats)
1095 globalNumTLBMisses += req_cnt;
1096 }
1097
1098 /*
1099 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1100 * as the TLB access latency.
1101 *
1102 * We create and schedule a new TLBEvent which will help us take the
1103 * appropriate actions (e.g., update TLB on a hit, send request to lower
1104 * level TLB on a miss, or start a page walk if this was the last-level
1105 * TLB)
1106 */
1107 TLBEvent *tlb_event =
1108 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1109
1110 if (translationReturnEvent.count(virt_page_addr)) {
1111 panic("Virtual Page Address %#x already has a return event\n",
1112 virt_page_addr);
1113 }
1114
1115 translationReturnEvent[virt_page_addr] = tlb_event;
1116 assert(tlb_event);
1117
1118 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1119 curTick() + this->ticks(hitLatency));
1120
1121 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1122 }
1123
1124 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1125 PacketPtr _pkt)
1126 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1127 outcome(tlb_outcome), pkt(_pkt)
1128 {
1129 }
1130
1131 /**
1132 * Do Paging protection checks. If we encounter a page fault, then
1133 * an assertion is fired.
1134 */
1135 void
1136 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1137 GpuTlbEntry * tlb_entry, Mode mode)
1138 {
1139 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1140 uint32_t flags = pkt->req->getFlags();
1141 bool storeCheck = flags & (StoreCheck << FlagShift);
1142
1143 // Do paging protection checks.
1144 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1145 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1146
1147 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1148
1149 if ((inUser && !tlb_entry->user) ||
1150 (mode == BaseTLB::Write && badWrite)) {
1151 // The page must have been present to get into the TLB in
1152 // the first place. We'll assume the reserved bits are
1153 // fine even though we're not checking them.
1154 assert(false);
1155 }
1156
1157 if (storeCheck && badWrite) {
1158 // This would fault if this were a write, so return a page
1159 // fault that reflects that happening.
1160 assert(false);
1161 }
1162 }
1163
1164 /**
1165 * handleTranslationReturn is called on a TLB hit,
1166 * when a TLB miss returns or when a page fault returns.
1167 * The latter calls handelHit with TLB miss as tlbOutcome.
1168 */
1169 void
1170 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1171 PacketPtr pkt)
1172 {
1173
1174 assert(pkt);
1175 Addr vaddr = pkt->req->getVaddr();
1176
1177 TranslationState *sender_state =
1178 safe_cast<TranslationState*>(pkt->senderState);
1179
1180 ThreadContext *tc = sender_state->tc;
1181 Mode mode = sender_state->tlbMode;
1182
1183 GpuTlbEntry *local_entry, *new_entry;
1184
1185 if (tlb_outcome == TLB_HIT) {
1186 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1187 local_entry = sender_state->tlbEntry;
1188 } else {
1189 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1190 vaddr);
1191
1192 // We are returning either from a page walk or from a hit at a lower
1193 // TLB level. The senderState should be "carrying" a pointer to the
1194 // correct TLBEntry.
1195 new_entry = sender_state->tlbEntry;
1196 assert(new_entry);
1197 local_entry = new_entry;
1198
1199 if (allocationPolicy) {
1200 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1201 virt_page_addr);
1202
1203 local_entry = insert(virt_page_addr, *new_entry);
1204 }
1205
1206 assert(local_entry);
1207 }
1208
1209 /**
1210 * At this point the packet carries an up-to-date tlbEntry pointer
1211 * in its senderState.
1212 * Next step is to do the paging protection checks.
1213 */
1214 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1215 "while paddr was %#x.\n", local_entry->vaddr,
1216 local_entry->paddr);
1217
1218 pagingProtectionChecks(tc, pkt, local_entry, mode);
1219 int page_size = local_entry->size();
1220 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1221 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1222
1223 // Since this packet will be sent through the cpu side slave port,
1224 // it must be converted to a response pkt if it is not one already
1225 if (pkt->isRequest()) {
1226 pkt->makeTimingResponse();
1227 }
1228
1229 pkt->req->setPaddr(paddr);
1230
1231 if (local_entry->uncacheable) {
1232 pkt->req->setFlags(Request::UNCACHEABLE);
1233 }
1234
1235 //send packet back to coalescer
1236 cpuSidePort[0]->sendTimingResp(pkt);
1237 //schedule cleanup event
1238 cleanupQueue.push(virt_page_addr);
1239
1240 // schedule this only once per cycle.
1241 // The check is required because we might have multiple translations
1242 // returning the same cycle
1243 // this is a maximum priority event and must be on the same cycle
1244 // as the cleanup event in TLBCoalescer to avoid a race with
1245 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1246 if (!cleanupEvent.scheduled())
1247 schedule(cleanupEvent, curTick());
1248 }
1249
1250 /**
1251 * Here we take the appropriate actions based on the result of the
1252 * TLB lookup.
1253 */
1254 void
1255 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1256 PacketPtr pkt)
1257 {
1258 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1259
1260 assert(translationReturnEvent[virtPageAddr]);
1261 assert(pkt);
1262
1263 TranslationState *tmp_sender_state =
1264 safe_cast<TranslationState*>(pkt->senderState);
1265
1266 int req_cnt = tmp_sender_state->reqCnt.back();
1267 bool update_stats = !tmp_sender_state->prefetch;
1268
1269
1270 if (outcome == TLB_HIT) {
1271 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1272
1273 if (update_stats) {
1274 accessCycles += (req_cnt * curTick());
1275 localCycles += curTick();
1276 }
1277
1278 } else if (outcome == TLB_MISS) {
1279
1280 DPRINTF(GPUTLB, "This is a TLB miss\n");
1281 if (update_stats) {
1282 accessCycles += (req_cnt*curTick());
1283 localCycles += curTick();
1284 }
1285
1286 if (hasMemSidePort) {
1287 // the one cyle added here represent the delay from when we get
1288 // the reply back till when we propagate it to the coalescer
1289 // above.
1290 if (update_stats) {
1291 accessCycles += (req_cnt * 1);
1292 localCycles += 1;
1293 }
1294
1295 /**
1296 * There is a TLB below. Send the coalesced request.
1297 * We actually send the very first packet of all the
1298 * pending packets for this virtual page address.
1299 */
1300 if (!memSidePort[0]->sendTimingReq(pkt)) {
1301 DPRINTF(GPUTLB, "Failed sending translation request to "
1302 "lower level TLB for addr %#x\n", virtPageAddr);
1303
1304 memSidePort[0]->retries.push_back(pkt);
1305 } else {
1306 DPRINTF(GPUTLB, "Sent translation request to lower level "
1307 "TLB for addr %#x\n", virtPageAddr);
1308 }
1309 } else {
1310 //this is the last level TLB. Start a page walk
1311 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1312 "addr %#x\n", virtPageAddr);
1313
1314 if (update_stats)
1315 pageTableCycles -= (req_cnt*curTick());
1316
1317 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1318 assert(tlb_event);
1319 tlb_event->updateOutcome(PAGE_WALK);
1320 schedule(tlb_event, curTick() + ticks(missLatency2));
1321 }
1322 } else if (outcome == PAGE_WALK) {
1323 if (update_stats)
1324 pageTableCycles += (req_cnt*curTick());
1325
1326 // Need to access the page table and update the TLB
1327 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1328 virtPageAddr);
1329
1330 TranslationState *sender_state =
1331 safe_cast<TranslationState*>(pkt->senderState);
1332
1333 Process *p = sender_state->tc->getProcessPtr();
1334 Addr vaddr = pkt->req->getVaddr();
1335 #ifndef NDEBUG
1336 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1337 assert(alignedVaddr == virtPageAddr);
1338 #endif
1339 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1340 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1341 p->fixupStackFault(vaddr)) {
1342 pte = p->pTable->lookup(vaddr);
1343 }
1344
1345 if (pte) {
1346 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1347 pte->paddr);
1348
1349 sender_state->tlbEntry =
1350 new GpuTlbEntry(0, virtPageAddr, pte->paddr, true);
1351 } else {
1352 sender_state->tlbEntry =
1353 new GpuTlbEntry(0, 0, 0, false);
1354 }
1355
1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357 } else if (outcome == MISS_RETURN) {
1358 /** we add an extra cycle in the return path of the translation
1359 * requests in between the various TLB levels.
1360 */
1361 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1362 } else {
1363 assert(false);
1364 }
1365 }
1366
1367 void
1368 GpuTLB::TLBEvent::process()
1369 {
1370 tlb->translationReturn(virtPageAddr, outcome, pkt);
1371 }
1372
1373 const char*
1374 GpuTLB::TLBEvent::description() const
1375 {
1376 return "trigger translationDoneEvent";
1377 }
1378
1379 void
1380 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1381 {
1382 outcome = _outcome;
1383 }
1384
1385 Addr
1386 GpuTLB::TLBEvent::getTLBEventVaddr()
1387 {
1388 return virtPageAddr;
1389 }
1390
1391 /*
1392 * recvTiming receives a coalesced timing request from a TLBCoalescer
1393 * and it calls issueTLBLookup()
1394 * It only rejects the packet if we have exceeded the max
1395 * outstanding number of requests for the TLB
1396 */
1397 bool
1398 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1399 {
1400 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1401 tlb->issueTLBLookup(pkt);
1402 // update number of outstanding translation requests
1403 tlb->outstandingReqs++;
1404 return true;
1405 } else {
1406 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1407 tlb->outstandingReqs);
1408 return false;
1409 }
1410 }
1411
1412 /**
1413 * handleFuncTranslationReturn is called on a TLB hit,
1414 * when a TLB miss returns or when a page fault returns.
1415 * It updates LRU, inserts the TLB entry on a miss
1416 * depending on the allocation policy and does the required
1417 * protection checks. It does NOT create a new packet to
1418 * update the packet's addr; this is done in hsail-gpu code.
1419 */
1420 void
1421 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1422 {
1423 TranslationState *sender_state =
1424 safe_cast<TranslationState*>(pkt->senderState);
1425
1426 ThreadContext *tc = sender_state->tc;
1427 Mode mode = sender_state->tlbMode;
1428 Addr vaddr = pkt->req->getVaddr();
1429
1430 GpuTlbEntry *local_entry, *new_entry;
1431
1432 if (tlb_outcome == TLB_HIT) {
1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1434 "%#x\n", vaddr);
1435
1436 local_entry = sender_state->tlbEntry;
1437 } else {
1438 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1439 "%#x\n", vaddr);
1440
1441 // We are returning either from a page walk or from a hit at a lower
1442 // TLB level. The senderState should be "carrying" a pointer to the
1443 // correct TLBEntry.
1444 new_entry = sender_state->tlbEntry;
1445 assert(new_entry);
1446 local_entry = new_entry;
1447
1448 if (allocationPolicy) {
1449 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1450
1451 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1452 virt_page_addr);
1453
1454 local_entry = insert(virt_page_addr, *new_entry);
1455 }
1456
1457 assert(local_entry);
1458 }
1459
1460 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1461 "while paddr was %#x.\n", local_entry->vaddr,
1462 local_entry->paddr);
1463
1464 // Do paging checks if it's a normal functional access. If it's for a
1465 // prefetch, then sometimes you can try to prefetch something that won't
1466 // pass protection. We don't actually want to fault becuase there is no
1467 // demand access to deem this a violation. Just put it in the TLB and
1468 // it will fault if indeed a future demand access touches it in
1469 // violation.
1470 if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1471 pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473 int page_size = local_entry->size();
1474 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477 pkt->req->setPaddr(paddr);
1478
1479 if (local_entry->uncacheable)
1480 pkt->req->setFlags(Request::UNCACHEABLE);
1481 }
1482
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1485 void
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487 {
1488 TranslationState *sender_state =
1489 safe_cast<TranslationState*>(pkt->senderState);
1490
1491 ThreadContext *tc = sender_state->tc;
1492 bool update_stats = !sender_state->prefetch;
1493
1494 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495 TheISA::PageBytes);
1496
1497 if (update_stats)
1498 tlb->updatePageFootprint(virt_page_addr);
1499
1500 // do the TLB lookup without updating the stats
1501 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1506 if (update_stats) {
1507 tlb->globalNumTLBAccesses++;
1508
1509 if (success) {
1510 sender_state->hitLevel = sender_state->reqCnt.size();
1511 tlb->globalNumTLBHits++;
1512 }
1513 }
1514
1515 if (!success) {
1516 if (update_stats)
1517 tlb->globalNumTLBMisses++;
1518 if (tlb->hasMemSidePort) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb->memSidePort[0]->sendFunctional(pkt);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state->prefetch && !pkt->req->hasPaddr())
1523 return;
1524 } else {
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527 virt_page_addr);
1528
1529 Process *p = tc->getProcessPtr();
1530
1531 Addr vaddr = pkt->req->getVaddr();
1532 #ifndef NDEBUG
1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534 assert(alignedVaddr == virt_page_addr);
1535 #endif
1536
1537 const EmulationPageTable::Entry *pte =
1538 p->pTable->lookup(vaddr);
1539 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540 p->fixupStackFault(vaddr)) {
1541 pte = p->pTable->lookup(vaddr);
1542 }
1543
1544 if (!sender_state->prefetch) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1547 assert(success);
1547 assert(pte);
1548
1549 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550 pte->paddr);
1551
1552 sender_state->tlbEntry =
1553 new GpuTlbEntry(0, virt_page_addr,
1548
1549 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550 pte->paddr);
1551
1552 sender_state->tlbEntry =
1553 new GpuTlbEntry(0, virt_page_addr,
1554 pte->paddr, success);
1554 pte->paddr, true);
1555 } else {
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1560 if (pte) {
1561 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562 pte->paddr);
1563
1564 sender_state->tlbEntry =
1565 new GpuTlbEntry(0, virt_page_addr,
1555 } else {
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1560 if (pte) {
1561 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562 pte->paddr);
1563
1564 sender_state->tlbEntry =
1565 new GpuTlbEntry(0, virt_page_addr,
1566 pte->paddr, success);
1566 pte->paddr, true);
1567 } else {
1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569 alignedVaddr);
1570
1571 sender_state->tlbEntry = new GpuTlbEntry();
1572
1573 return;
1574 }
1575 }
1576 }
1577 } else {
1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579 tlb->lookup(pkt->req->getVaddr()));
1580
1581 GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582 update_stats);
1583
1584 assert(entry);
1585
1586 sender_state->tlbEntry =
1587 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1588 }
1589 // This is the function that would populate pkt->req with the paddr of
1590 // the translation. But if no translation happens (i.e Prefetch fails)
1591 // then the early returns in the above code wiill keep this function
1592 // from executing.
1593 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1594 }
1595
1596 void
1597 GpuTLB::CpuSidePort::recvReqRetry()
1598 {
1599 // The CPUSidePort never sends anything but replies. No retries
1600 // expected.
1601 assert(false);
1602 }
1603
1604 AddrRangeList
1605 GpuTLB::CpuSidePort::getAddrRanges() const
1606 {
1607 // currently not checked by the master
1608 AddrRangeList ranges;
1609
1610 return ranges;
1611 }
1612
1613 /**
1614 * MemSidePort receives the packet back.
1615 * We need to call the handleTranslationReturn
1616 * and propagate up the hierarchy.
1617 */
1618 bool
1619 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1620 {
1621 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1622 TheISA::PageBytes);
1623
1624 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1625 virt_page_addr);
1626
1627 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1628 assert(tlb_event);
1629 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1630
1631 tlb_event->updateOutcome(MISS_RETURN);
1632 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1633
1634 return true;
1635 }
1636
1637 void
1638 GpuTLB::MemSidePort::recvReqRetry()
1639 {
1640 // No retries should reach the TLB. The retries
1641 // should only reach the TLBCoalescer.
1642 assert(false);
1643 }
1644
1645 void
1646 GpuTLB::cleanup()
1647 {
1648 while (!cleanupQueue.empty()) {
1649 Addr cleanup_addr = cleanupQueue.front();
1650 cleanupQueue.pop();
1651
1652 // delete TLBEvent
1653 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1654 delete old_tlb_event;
1655 translationReturnEvent.erase(cleanup_addr);
1656
1657 // update number of outstanding requests
1658 outstandingReqs--;
1659 }
1660
1661 /** the higher level coalescer should retry if it has
1662 * any pending requests.
1663 */
1664 for (int i = 0; i < cpuSidePort.size(); ++i) {
1665 cpuSidePort[i]->sendRetryReq();
1666 }
1667 }
1668
1669 void
1670 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1671 {
1672
1673 std::pair<AccessPatternTable::iterator, bool> ret;
1674
1675 AccessInfo tmp_access_info;
1676 tmp_access_info.lastTimeAccessed = 0;
1677 tmp_access_info.accessesPerPage = 0;
1678 tmp_access_info.totalReuseDistance = 0;
1679 tmp_access_info.sumDistance = 0;
1680 tmp_access_info.meanDistance = 0;
1681
1682 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1683 tmp_access_info));
1684
1685 bool first_page_access = ret.second;
1686
1687 if (first_page_access) {
1688 numUniquePages++;
1689 } else {
1690 int accessed_before;
1691 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1692 ret.first->second.totalReuseDistance += accessed_before;
1693 }
1694
1695 ret.first->second.accessesPerPage++;
1696 ret.first->second.lastTimeAccessed = curTick();
1697
1698 if (accessDistance) {
1699 ret.first->second.localTLBAccesses
1700 .push_back(localNumTLBAccesses.value());
1701 }
1702 }
1703
1704 void
1705 GpuTLB::exitCallback()
1706 {
1707 std::ostream *page_stat_file = nullptr;
1708
1709 if (accessDistance) {
1710
1711 // print per page statistics to a separate file (.csv format)
1712 // simout is the gem5 output directory (default is m5out or the one
1713 // specified with -d
1714 page_stat_file = simout.create(name().c_str())->stream();
1715
1716 // print header
1717 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1718 << "stddev_distance" << std::endl;
1719 }
1720
1721 // update avg. reuse distance footprint
1722 AccessPatternTable::iterator iter, iter_begin, iter_end;
1723 unsigned int sum_avg_reuse_distance_per_page = 0;
1724
1725 // iterate through all pages seen by this TLB
1726 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1727 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1728 iter->second.accessesPerPage;
1729
1730 if (accessDistance) {
1731 unsigned int tmp = iter->second.localTLBAccesses[0];
1732 unsigned int prev = tmp;
1733
1734 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1735 if (i) {
1736 tmp = prev + 1;
1737 }
1738
1739 prev = iter->second.localTLBAccesses[i];
1740 // update the localTLBAccesses value
1741 // with the actual differece
1742 iter->second.localTLBAccesses[i] -= tmp;
1743 // compute the sum of AccessDistance per page
1744 // used later for mean
1745 iter->second.sumDistance +=
1746 iter->second.localTLBAccesses[i];
1747 }
1748
1749 iter->second.meanDistance =
1750 iter->second.sumDistance / iter->second.accessesPerPage;
1751
1752 // compute std_dev and max (we need a second round because we
1753 // need to know the mean value
1754 unsigned int max_distance = 0;
1755 unsigned int stddev_distance = 0;
1756
1757 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1758 unsigned int tmp_access_distance =
1759 iter->second.localTLBAccesses[i];
1760
1761 if (tmp_access_distance > max_distance) {
1762 max_distance = tmp_access_distance;
1763 }
1764
1765 unsigned int diff =
1766 tmp_access_distance - iter->second.meanDistance;
1767 stddev_distance += pow(diff, 2);
1768
1769 }
1770
1771 stddev_distance =
1772 sqrt(stddev_distance/iter->second.accessesPerPage);
1773
1774 if (page_stat_file) {
1775 *page_stat_file << std::hex << iter->first << ",";
1776 *page_stat_file << std::dec << max_distance << ",";
1777 *page_stat_file << std::dec << iter->second.meanDistance
1778 << ",";
1779 *page_stat_file << std::dec << stddev_distance;
1780 *page_stat_file << std::endl;
1781 }
1782
1783 // erase the localTLBAccesses array
1784 iter->second.localTLBAccesses.clear();
1785 }
1786 }
1787
1788 if (!TLBFootprint.empty()) {
1789 avgReuseDistance =
1790 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1791 }
1792
1793 //clear the TLBFootprint map
1794 TLBFootprint.clear();
1795 }
1796} // namespace X86ISA
1797
1798X86ISA::GpuTLB*
1799X86GPUTLBParams::create()
1800{
1801 return new X86ISA::GpuTLB(this);
1802}
1803
1567 } else {
1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569 alignedVaddr);
1570
1571 sender_state->tlbEntry = new GpuTlbEntry();
1572
1573 return;
1574 }
1575 }
1576 }
1577 } else {
1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579 tlb->lookup(pkt->req->getVaddr()));
1580
1581 GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582 update_stats);
1583
1584 assert(entry);
1585
1586 sender_state->tlbEntry =
1587 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1588 }
1589 // This is the function that would populate pkt->req with the paddr of
1590 // the translation. But if no translation happens (i.e Prefetch fails)
1591 // then the early returns in the above code wiill keep this function
1592 // from executing.
1593 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1594 }
1595
1596 void
1597 GpuTLB::CpuSidePort::recvReqRetry()
1598 {
1599 // The CPUSidePort never sends anything but replies. No retries
1600 // expected.
1601 assert(false);
1602 }
1603
1604 AddrRangeList
1605 GpuTLB::CpuSidePort::getAddrRanges() const
1606 {
1607 // currently not checked by the master
1608 AddrRangeList ranges;
1609
1610 return ranges;
1611 }
1612
1613 /**
1614 * MemSidePort receives the packet back.
1615 * We need to call the handleTranslationReturn
1616 * and propagate up the hierarchy.
1617 */
1618 bool
1619 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1620 {
1621 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1622 TheISA::PageBytes);
1623
1624 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1625 virt_page_addr);
1626
1627 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1628 assert(tlb_event);
1629 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1630
1631 tlb_event->updateOutcome(MISS_RETURN);
1632 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1633
1634 return true;
1635 }
1636
1637 void
1638 GpuTLB::MemSidePort::recvReqRetry()
1639 {
1640 // No retries should reach the TLB. The retries
1641 // should only reach the TLBCoalescer.
1642 assert(false);
1643 }
1644
1645 void
1646 GpuTLB::cleanup()
1647 {
1648 while (!cleanupQueue.empty()) {
1649 Addr cleanup_addr = cleanupQueue.front();
1650 cleanupQueue.pop();
1651
1652 // delete TLBEvent
1653 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1654 delete old_tlb_event;
1655 translationReturnEvent.erase(cleanup_addr);
1656
1657 // update number of outstanding requests
1658 outstandingReqs--;
1659 }
1660
1661 /** the higher level coalescer should retry if it has
1662 * any pending requests.
1663 */
1664 for (int i = 0; i < cpuSidePort.size(); ++i) {
1665 cpuSidePort[i]->sendRetryReq();
1666 }
1667 }
1668
1669 void
1670 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1671 {
1672
1673 std::pair<AccessPatternTable::iterator, bool> ret;
1674
1675 AccessInfo tmp_access_info;
1676 tmp_access_info.lastTimeAccessed = 0;
1677 tmp_access_info.accessesPerPage = 0;
1678 tmp_access_info.totalReuseDistance = 0;
1679 tmp_access_info.sumDistance = 0;
1680 tmp_access_info.meanDistance = 0;
1681
1682 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1683 tmp_access_info));
1684
1685 bool first_page_access = ret.second;
1686
1687 if (first_page_access) {
1688 numUniquePages++;
1689 } else {
1690 int accessed_before;
1691 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1692 ret.first->second.totalReuseDistance += accessed_before;
1693 }
1694
1695 ret.first->second.accessesPerPage++;
1696 ret.first->second.lastTimeAccessed = curTick();
1697
1698 if (accessDistance) {
1699 ret.first->second.localTLBAccesses
1700 .push_back(localNumTLBAccesses.value());
1701 }
1702 }
1703
1704 void
1705 GpuTLB::exitCallback()
1706 {
1707 std::ostream *page_stat_file = nullptr;
1708
1709 if (accessDistance) {
1710
1711 // print per page statistics to a separate file (.csv format)
1712 // simout is the gem5 output directory (default is m5out or the one
1713 // specified with -d
1714 page_stat_file = simout.create(name().c_str())->stream();
1715
1716 // print header
1717 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1718 << "stddev_distance" << std::endl;
1719 }
1720
1721 // update avg. reuse distance footprint
1722 AccessPatternTable::iterator iter, iter_begin, iter_end;
1723 unsigned int sum_avg_reuse_distance_per_page = 0;
1724
1725 // iterate through all pages seen by this TLB
1726 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1727 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1728 iter->second.accessesPerPage;
1729
1730 if (accessDistance) {
1731 unsigned int tmp = iter->second.localTLBAccesses[0];
1732 unsigned int prev = tmp;
1733
1734 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1735 if (i) {
1736 tmp = prev + 1;
1737 }
1738
1739 prev = iter->second.localTLBAccesses[i];
1740 // update the localTLBAccesses value
1741 // with the actual differece
1742 iter->second.localTLBAccesses[i] -= tmp;
1743 // compute the sum of AccessDistance per page
1744 // used later for mean
1745 iter->second.sumDistance +=
1746 iter->second.localTLBAccesses[i];
1747 }
1748
1749 iter->second.meanDistance =
1750 iter->second.sumDistance / iter->second.accessesPerPage;
1751
1752 // compute std_dev and max (we need a second round because we
1753 // need to know the mean value
1754 unsigned int max_distance = 0;
1755 unsigned int stddev_distance = 0;
1756
1757 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1758 unsigned int tmp_access_distance =
1759 iter->second.localTLBAccesses[i];
1760
1761 if (tmp_access_distance > max_distance) {
1762 max_distance = tmp_access_distance;
1763 }
1764
1765 unsigned int diff =
1766 tmp_access_distance - iter->second.meanDistance;
1767 stddev_distance += pow(diff, 2);
1768
1769 }
1770
1771 stddev_distance =
1772 sqrt(stddev_distance/iter->second.accessesPerPage);
1773
1774 if (page_stat_file) {
1775 *page_stat_file << std::hex << iter->first << ",";
1776 *page_stat_file << std::dec << max_distance << ",";
1777 *page_stat_file << std::dec << iter->second.meanDistance
1778 << ",";
1779 *page_stat_file << std::dec << stddev_distance;
1780 *page_stat_file << std::endl;
1781 }
1782
1783 // erase the localTLBAccesses array
1784 iter->second.localTLBAccesses.clear();
1785 }
1786 }
1787
1788 if (!TLBFootprint.empty()) {
1789 avgReuseDistance =
1790 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1791 }
1792
1793 //clear the TLBFootprint map
1794 TLBFootprint.clear();
1795 }
1796} // namespace X86ISA
1797
1798X86ISA::GpuTLB*
1799X86GPUTLBParams::create()
1800{
1801 return new X86ISA::GpuTLB(this);
1802}
1803