gpu_tlb.cc (13784:1941dc118243) gpu_tlb.cc (13892:0182a0601f66)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63 GpuTLB::GpuTLB(const Params *p)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63 GpuTLB::GpuTLB(const Params *p)
64 : MemObject(p), configAddress(0), size(p->size),
64 : ClockedObject(p), configAddress(0), size(p->size),
65 cleanupEvent([this]{ cleanup(); }, name(), false,
66 Event::Maximum_Pri),
67 exitEvent([this]{ exitCallback(); }, name())
68 {
69 assoc = p->assoc;
70 assert(assoc <= size);
71 numSets = size/assoc;
72 allocationPolicy = p->allocationPolicy;
73 hasMemSidePort = false;
74 accessDistance = p->accessDistance;
75 clock = p->clk_domain->clockPeriod();
76
77 tlb.assign(size, TlbEntry());
78
79 freeList.resize(numSets);
80 entryList.resize(numSets);
81
82 for (int set = 0; set < numSets; ++set) {
83 for (int way = 0; way < assoc; ++way) {
84 int x = set * assoc + way;
85 freeList[set].push_back(&tlb.at(x));
86 }
87 }
88
89 FA = (size == assoc);
90
91 /**
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
97 * different sets etc)
98 */
99 setMask = numSets - 1;
100
101 #if 0
102 // GpuTLB doesn't yet support full system
103 walker = p->walker;
104 walker->setTLB(this);
105 #endif
106
107 maxCoalescedReqs = p->maxOutstandingReqs;
108
109 // Do not allow maxCoalescedReqs to be more than the TLB associativity
110 if (maxCoalescedReqs > assoc) {
111 maxCoalescedReqs = assoc;
112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
113 }
114
115 outstandingReqs = 0;
116 hitLatency = p->hitLatency;
117 missLatency1 = p->missLatency1;
118 missLatency2 = p->missLatency2;
119
120 // create the slave ports based on the number of connected ports
121 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
122 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
123 name(), i), this, i));
124 }
125
126 // create the master ports based on the number of connected ports
127 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
128 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
129 name(), i), this, i));
130 }
131 }
132
133 // fixme: this is never called?
134 GpuTLB::~GpuTLB()
135 {
136 // make sure all the hash-maps are empty
137 assert(translationReturnEvent.empty());
138 }
139
140 Port &
141 GpuTLB::getPort(const std::string &if_name, PortID idx)
142 {
143 if (if_name == "slave") {
144 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
145 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
146 }
147
148 return *cpuSidePort[idx];
149 } else if (if_name == "master") {
150 if (idx >= static_cast<PortID>(memSidePort.size())) {
151 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
152 }
153
154 hasMemSidePort = true;
155
156 return *memSidePort[idx];
157 } else {
158 panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
159 }
160 }
161
162 TlbEntry*
163 GpuTLB::insert(Addr vpn, TlbEntry &entry)
164 {
165 TlbEntry *newEntry = nullptr;
166
167 /**
168 * vpn holds the virtual page address
169 * The least significant bits are simply masked
170 */
171 int set = (vpn >> TheISA::PageShift) & setMask;
172
173 if (!freeList[set].empty()) {
174 newEntry = freeList[set].front();
175 freeList[set].pop_front();
176 } else {
177 newEntry = entryList[set].back();
178 entryList[set].pop_back();
179 }
180
181 *newEntry = entry;
182 newEntry->vaddr = vpn;
183 entryList[set].push_front(newEntry);
184
185 return newEntry;
186 }
187
188 GpuTLB::EntryList::iterator
189 GpuTLB::lookupIt(Addr va, bool update_lru)
190 {
191 int set = (va >> TheISA::PageShift) & setMask;
192
193 if (FA) {
194 assert(!set);
195 }
196
197 auto entry = entryList[set].begin();
198 for (; entry != entryList[set].end(); ++entry) {
199 int page_size = (*entry)->size();
200
201 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
202 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
203 "with size %#x.\n", va, (*entry)->vaddr, page_size);
204
205 if (update_lru) {
206 entryList[set].push_front(*entry);
207 entryList[set].erase(entry);
208 entry = entryList[set].begin();
209 }
210
211 break;
212 }
213 }
214
215 return entry;
216 }
217
218 TlbEntry*
219 GpuTLB::lookup(Addr va, bool update_lru)
220 {
221 int set = (va >> TheISA::PageShift) & setMask;
222
223 auto entry = lookupIt(va, update_lru);
224
225 if (entry == entryList[set].end())
226 return nullptr;
227 else
228 return *entry;
229 }
230
231 void
232 GpuTLB::invalidateAll()
233 {
234 DPRINTF(GPUTLB, "Invalidating all entries.\n");
235
236 for (int i = 0; i < numSets; ++i) {
237 while (!entryList[i].empty()) {
238 TlbEntry *entry = entryList[i].front();
239 entryList[i].pop_front();
240 freeList[i].push_back(entry);
241 }
242 }
243 }
244
245 void
246 GpuTLB::setConfigAddress(uint32_t addr)
247 {
248 configAddress = addr;
249 }
250
251 void
252 GpuTLB::invalidateNonGlobal()
253 {
254 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
255
256 for (int i = 0; i < numSets; ++i) {
257 for (auto entryIt = entryList[i].begin();
258 entryIt != entryList[i].end();) {
259 if (!(*entryIt)->global) {
260 freeList[i].push_back(*entryIt);
261 entryList[i].erase(entryIt++);
262 } else {
263 ++entryIt;
264 }
265 }
266 }
267 }
268
269 void
270 GpuTLB::demapPage(Addr va, uint64_t asn)
271 {
272
273 int set = (va >> TheISA::PageShift) & setMask;
274 auto entry = lookupIt(va, false);
275
276 if (entry != entryList[set].end()) {
277 freeList[set].push_back(*entry);
278 entryList[set].erase(entry);
279 }
280 }
281
282 Fault
283 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
284 {
285 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
286 Addr vaddr = req->getVaddr();
287 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
288
289 if (prefix == IntAddrPrefixCPUID) {
290 panic("CPUID memory space not yet implemented!\n");
291 } else if (prefix == IntAddrPrefixMSR) {
292 vaddr = vaddr >> 3;
293 req->setFlags(Request::MMAPPED_IPR);
294 Addr regNum = 0;
295
296 switch (vaddr & ~IntAddrPrefixMask) {
297 case 0x10:
298 regNum = MISCREG_TSC;
299 break;
300 case 0x1B:
301 regNum = MISCREG_APIC_BASE;
302 break;
303 case 0xFE:
304 regNum = MISCREG_MTRRCAP;
305 break;
306 case 0x174:
307 regNum = MISCREG_SYSENTER_CS;
308 break;
309 case 0x175:
310 regNum = MISCREG_SYSENTER_ESP;
311 break;
312 case 0x176:
313 regNum = MISCREG_SYSENTER_EIP;
314 break;
315 case 0x179:
316 regNum = MISCREG_MCG_CAP;
317 break;
318 case 0x17A:
319 regNum = MISCREG_MCG_STATUS;
320 break;
321 case 0x17B:
322 regNum = MISCREG_MCG_CTL;
323 break;
324 case 0x1D9:
325 regNum = MISCREG_DEBUG_CTL_MSR;
326 break;
327 case 0x1DB:
328 regNum = MISCREG_LAST_BRANCH_FROM_IP;
329 break;
330 case 0x1DC:
331 regNum = MISCREG_LAST_BRANCH_TO_IP;
332 break;
333 case 0x1DD:
334 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
335 break;
336 case 0x1DE:
337 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
338 break;
339 case 0x200:
340 regNum = MISCREG_MTRR_PHYS_BASE_0;
341 break;
342 case 0x201:
343 regNum = MISCREG_MTRR_PHYS_MASK_0;
344 break;
345 case 0x202:
346 regNum = MISCREG_MTRR_PHYS_BASE_1;
347 break;
348 case 0x203:
349 regNum = MISCREG_MTRR_PHYS_MASK_1;
350 break;
351 case 0x204:
352 regNum = MISCREG_MTRR_PHYS_BASE_2;
353 break;
354 case 0x205:
355 regNum = MISCREG_MTRR_PHYS_MASK_2;
356 break;
357 case 0x206:
358 regNum = MISCREG_MTRR_PHYS_BASE_3;
359 break;
360 case 0x207:
361 regNum = MISCREG_MTRR_PHYS_MASK_3;
362 break;
363 case 0x208:
364 regNum = MISCREG_MTRR_PHYS_BASE_4;
365 break;
366 case 0x209:
367 regNum = MISCREG_MTRR_PHYS_MASK_4;
368 break;
369 case 0x20A:
370 regNum = MISCREG_MTRR_PHYS_BASE_5;
371 break;
372 case 0x20B:
373 regNum = MISCREG_MTRR_PHYS_MASK_5;
374 break;
375 case 0x20C:
376 regNum = MISCREG_MTRR_PHYS_BASE_6;
377 break;
378 case 0x20D:
379 regNum = MISCREG_MTRR_PHYS_MASK_6;
380 break;
381 case 0x20E:
382 regNum = MISCREG_MTRR_PHYS_BASE_7;
383 break;
384 case 0x20F:
385 regNum = MISCREG_MTRR_PHYS_MASK_7;
386 break;
387 case 0x250:
388 regNum = MISCREG_MTRR_FIX_64K_00000;
389 break;
390 case 0x258:
391 regNum = MISCREG_MTRR_FIX_16K_80000;
392 break;
393 case 0x259:
394 regNum = MISCREG_MTRR_FIX_16K_A0000;
395 break;
396 case 0x268:
397 regNum = MISCREG_MTRR_FIX_4K_C0000;
398 break;
399 case 0x269:
400 regNum = MISCREG_MTRR_FIX_4K_C8000;
401 break;
402 case 0x26A:
403 regNum = MISCREG_MTRR_FIX_4K_D0000;
404 break;
405 case 0x26B:
406 regNum = MISCREG_MTRR_FIX_4K_D8000;
407 break;
408 case 0x26C:
409 regNum = MISCREG_MTRR_FIX_4K_E0000;
410 break;
411 case 0x26D:
412 regNum = MISCREG_MTRR_FIX_4K_E8000;
413 break;
414 case 0x26E:
415 regNum = MISCREG_MTRR_FIX_4K_F0000;
416 break;
417 case 0x26F:
418 regNum = MISCREG_MTRR_FIX_4K_F8000;
419 break;
420 case 0x277:
421 regNum = MISCREG_PAT;
422 break;
423 case 0x2FF:
424 regNum = MISCREG_DEF_TYPE;
425 break;
426 case 0x400:
427 regNum = MISCREG_MC0_CTL;
428 break;
429 case 0x404:
430 regNum = MISCREG_MC1_CTL;
431 break;
432 case 0x408:
433 regNum = MISCREG_MC2_CTL;
434 break;
435 case 0x40C:
436 regNum = MISCREG_MC3_CTL;
437 break;
438 case 0x410:
439 regNum = MISCREG_MC4_CTL;
440 break;
441 case 0x414:
442 regNum = MISCREG_MC5_CTL;
443 break;
444 case 0x418:
445 regNum = MISCREG_MC6_CTL;
446 break;
447 case 0x41C:
448 regNum = MISCREG_MC7_CTL;
449 break;
450 case 0x401:
451 regNum = MISCREG_MC0_STATUS;
452 break;
453 case 0x405:
454 regNum = MISCREG_MC1_STATUS;
455 break;
456 case 0x409:
457 regNum = MISCREG_MC2_STATUS;
458 break;
459 case 0x40D:
460 regNum = MISCREG_MC3_STATUS;
461 break;
462 case 0x411:
463 regNum = MISCREG_MC4_STATUS;
464 break;
465 case 0x415:
466 regNum = MISCREG_MC5_STATUS;
467 break;
468 case 0x419:
469 regNum = MISCREG_MC6_STATUS;
470 break;
471 case 0x41D:
472 regNum = MISCREG_MC7_STATUS;
473 break;
474 case 0x402:
475 regNum = MISCREG_MC0_ADDR;
476 break;
477 case 0x406:
478 regNum = MISCREG_MC1_ADDR;
479 break;
480 case 0x40A:
481 regNum = MISCREG_MC2_ADDR;
482 break;
483 case 0x40E:
484 regNum = MISCREG_MC3_ADDR;
485 break;
486 case 0x412:
487 regNum = MISCREG_MC4_ADDR;
488 break;
489 case 0x416:
490 regNum = MISCREG_MC5_ADDR;
491 break;
492 case 0x41A:
493 regNum = MISCREG_MC6_ADDR;
494 break;
495 case 0x41E:
496 regNum = MISCREG_MC7_ADDR;
497 break;
498 case 0x403:
499 regNum = MISCREG_MC0_MISC;
500 break;
501 case 0x407:
502 regNum = MISCREG_MC1_MISC;
503 break;
504 case 0x40B:
505 regNum = MISCREG_MC2_MISC;
506 break;
507 case 0x40F:
508 regNum = MISCREG_MC3_MISC;
509 break;
510 case 0x413:
511 regNum = MISCREG_MC4_MISC;
512 break;
513 case 0x417:
514 regNum = MISCREG_MC5_MISC;
515 break;
516 case 0x41B:
517 regNum = MISCREG_MC6_MISC;
518 break;
519 case 0x41F:
520 regNum = MISCREG_MC7_MISC;
521 break;
522 case 0xC0000080:
523 regNum = MISCREG_EFER;
524 break;
525 case 0xC0000081:
526 regNum = MISCREG_STAR;
527 break;
528 case 0xC0000082:
529 regNum = MISCREG_LSTAR;
530 break;
531 case 0xC0000083:
532 regNum = MISCREG_CSTAR;
533 break;
534 case 0xC0000084:
535 regNum = MISCREG_SF_MASK;
536 break;
537 case 0xC0000100:
538 regNum = MISCREG_FS_BASE;
539 break;
540 case 0xC0000101:
541 regNum = MISCREG_GS_BASE;
542 break;
543 case 0xC0000102:
544 regNum = MISCREG_KERNEL_GS_BASE;
545 break;
546 case 0xC0000103:
547 regNum = MISCREG_TSC_AUX;
548 break;
549 case 0xC0010000:
550 regNum = MISCREG_PERF_EVT_SEL0;
551 break;
552 case 0xC0010001:
553 regNum = MISCREG_PERF_EVT_SEL1;
554 break;
555 case 0xC0010002:
556 regNum = MISCREG_PERF_EVT_SEL2;
557 break;
558 case 0xC0010003:
559 regNum = MISCREG_PERF_EVT_SEL3;
560 break;
561 case 0xC0010004:
562 regNum = MISCREG_PERF_EVT_CTR0;
563 break;
564 case 0xC0010005:
565 regNum = MISCREG_PERF_EVT_CTR1;
566 break;
567 case 0xC0010006:
568 regNum = MISCREG_PERF_EVT_CTR2;
569 break;
570 case 0xC0010007:
571 regNum = MISCREG_PERF_EVT_CTR3;
572 break;
573 case 0xC0010010:
574 regNum = MISCREG_SYSCFG;
575 break;
576 case 0xC0010016:
577 regNum = MISCREG_IORR_BASE0;
578 break;
579 case 0xC0010017:
580 regNum = MISCREG_IORR_BASE1;
581 break;
582 case 0xC0010018:
583 regNum = MISCREG_IORR_MASK0;
584 break;
585 case 0xC0010019:
586 regNum = MISCREG_IORR_MASK1;
587 break;
588 case 0xC001001A:
589 regNum = MISCREG_TOP_MEM;
590 break;
591 case 0xC001001D:
592 regNum = MISCREG_TOP_MEM2;
593 break;
594 case 0xC0010114:
595 regNum = MISCREG_VM_CR;
596 break;
597 case 0xC0010115:
598 regNum = MISCREG_IGNNE;
599 break;
600 case 0xC0010116:
601 regNum = MISCREG_SMM_CTL;
602 break;
603 case 0xC0010117:
604 regNum = MISCREG_VM_HSAVE_PA;
605 break;
606 default:
607 return std::make_shared<GeneralProtection>(0);
608 }
609 //The index is multiplied by the size of a MiscReg so that
610 //any memory dependence calculations will not see these as
611 //overlapping.
612 req->setPaddr(regNum * sizeof(RegVal));
613 return NoFault;
614 } else if (prefix == IntAddrPrefixIO) {
615 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
616 // bitmap in the TSS.
617
618 Addr IOPort = vaddr & ~IntAddrPrefixMask;
619 // Make sure the address fits in the expected 16 bit IO address
620 // space.
621 assert(!(IOPort & ~0xFFFF));
622
623 if (IOPort == 0xCF8 && req->getSize() == 4) {
624 req->setFlags(Request::MMAPPED_IPR);
625 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
626 } else if ((IOPort & ~mask(2)) == 0xCFC) {
627 req->setFlags(Request::UNCACHEABLE);
628
629 Addr configAddress =
630 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
631
632 if (bits(configAddress, 31, 31)) {
633 req->setPaddr(PhysAddrPrefixPciConfig |
634 mbits(configAddress, 30, 2) |
635 (IOPort & mask(2)));
636 } else {
637 req->setPaddr(PhysAddrPrefixIO | IOPort);
638 }
639 } else {
640 req->setFlags(Request::UNCACHEABLE);
641 req->setPaddr(PhysAddrPrefixIO | IOPort);
642 }
643 return NoFault;
644 } else {
645 panic("Access to unrecognized internal address space %#x.\n",
646 prefix);
647 }
648 }
649
650 /**
651 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
652 * and false on a TLB miss.
653 * Many of the checks about different modes have been converted to
654 * assertions, since these parts of the code are not really used.
655 * On a hit it will update the LRU stack.
656 */
657 bool
658 GpuTLB::tlbLookup(const RequestPtr &req,
659 ThreadContext *tc, bool update_stats)
660 {
661 bool tlb_hit = false;
662 #ifndef NDEBUG
663 uint32_t flags = req->getFlags();
664 int seg = flags & SegmentFlagMask;
665 #endif
666
667 assert(seg != SEGMENT_REG_MS);
668 Addr vaddr = req->getVaddr();
669 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
670 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
671
672 if (m5Reg.prot) {
673 DPRINTF(GPUTLB, "In protected mode.\n");
674 // make sure we are in 64-bit mode
675 assert(m5Reg.mode == LongMode);
676
677 // If paging is enabled, do the translation.
678 if (m5Reg.paging) {
679 DPRINTF(GPUTLB, "Paging enabled.\n");
680 //update LRU stack on a hit
681 TlbEntry *entry = lookup(vaddr, true);
682
683 if (entry)
684 tlb_hit = true;
685
686 if (!update_stats) {
687 // functional tlb access for memory initialization
688 // i.e., memory seeding or instr. seeding -> don't update
689 // TLB and stats
690 return tlb_hit;
691 }
692
693 localNumTLBAccesses++;
694
695 if (!entry) {
696 localNumTLBMisses++;
697 } else {
698 localNumTLBHits++;
699 }
700 }
701 }
702
703 return tlb_hit;
704 }
705
706 Fault
707 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
708 Translation *translation, Mode mode,
709 bool &delayedResponse, bool timing, int &latency)
710 {
711 uint32_t flags = req->getFlags();
712 int seg = flags & SegmentFlagMask;
713 bool storeCheck = flags & (StoreCheck << FlagShift);
714
715 // If this is true, we're dealing with a request
716 // to a non-memory address space.
717 if (seg == SEGMENT_REG_MS) {
718 return translateInt(req, tc);
719 }
720
721 delayedResponse = false;
722 Addr vaddr = req->getVaddr();
723 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
724
725 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
726
727 // If protected mode has been enabled...
728 if (m5Reg.prot) {
729 DPRINTF(GPUTLB, "In protected mode.\n");
730 // If we're not in 64-bit mode, do protection/limit checks
731 if (m5Reg.mode != LongMode) {
732 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
733 "protection.\n");
734
735 // Check for a null segment selector.
736 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
737 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
738 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
739 return std::make_shared<GeneralProtection>(0);
740 }
741
742 bool expandDown = false;
743 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
744
745 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
746 if (!attr.writable && (mode == BaseTLB::Write ||
747 storeCheck))
748 return std::make_shared<GeneralProtection>(0);
749
750 if (!attr.readable && mode == BaseTLB::Read)
751 return std::make_shared<GeneralProtection>(0);
752
753 expandDown = attr.expandDown;
754
755 }
756
757 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
758 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
759 // This assumes we're not in 64 bit mode. If we were, the
760 // default address size is 64 bits, overridable to 32.
761 int size = 32;
762 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
763 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
764
765 if ((csAttr.defaultSize && sizeOverride) ||
766 (!csAttr.defaultSize && !sizeOverride)) {
767 size = 16;
768 }
769
770 Addr offset = bits(vaddr - base, size - 1, 0);
771 Addr endOffset = offset + req->getSize() - 1;
772
773 if (expandDown) {
774 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
775 warn_once("Expand down segments are untested.\n");
776
777 if (offset <= limit || endOffset <= limit)
778 return std::make_shared<GeneralProtection>(0);
779 } else {
780 if (offset > limit || endOffset > limit)
781 return std::make_shared<GeneralProtection>(0);
782 }
783 }
784
785 // If paging is enabled, do the translation.
786 if (m5Reg.paging) {
787 DPRINTF(GPUTLB, "Paging enabled.\n");
788 // The vaddr already has the segment base applied.
789 TlbEntry *entry = lookup(vaddr);
790 localNumTLBAccesses++;
791
792 if (!entry) {
793 localNumTLBMisses++;
794 if (timing) {
795 latency = missLatency1;
796 }
797
798 if (FullSystem) {
799 fatal("GpuTLB doesn't support full-system mode\n");
800 } else {
801 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
802 "at pc %#x.\n", vaddr, tc->instAddr());
803
804 Process *p = tc->getProcessPtr();
805 const EmulationPageTable::Entry *pte =
806 p->pTable->lookup(vaddr);
807
808 if (!pte && mode != BaseTLB::Execute) {
809 // penalize a "page fault" more
810 if (timing)
811 latency += missLatency2;
812
813 if (p->fixupStackFault(vaddr))
814 pte = p->pTable->lookup(vaddr);
815 }
816
817 if (!pte) {
818 return std::make_shared<PageFault>(vaddr, true,
819 mode, true,
820 false);
821 } else {
822 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
823
824 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
825 alignedVaddr, pte->paddr);
826
827 TlbEntry gpuEntry(p->pid(), alignedVaddr,
828 pte->paddr, false, false);
829 entry = insert(alignedVaddr, gpuEntry);
830 }
831
832 DPRINTF(GPUTLB, "Miss was serviced.\n");
833 }
834 } else {
835 localNumTLBHits++;
836
837 if (timing) {
838 latency = hitLatency;
839 }
840 }
841
842 // Do paging protection checks.
843 bool inUser = (m5Reg.cpl == 3 &&
844 !(flags & (CPL0FlagBit << FlagShift)));
845
846 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
847 bool badWrite = (!entry->writable && (inUser || cr0.wp));
848
849 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
850 badWrite)) {
851 // The page must have been present to get into the TLB in
852 // the first place. We'll assume the reserved bits are
853 // fine even though we're not checking them.
854 return std::make_shared<PageFault>(vaddr, true, mode,
855 inUser, false);
856 }
857
858 if (storeCheck && badWrite) {
859 // This would fault if this were a write, so return a page
860 // fault that reflects that happening.
861 return std::make_shared<PageFault>(vaddr, true,
862 BaseTLB::Write,
863 inUser, false);
864 }
865
866
867 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
868 "checks.\n", entry->paddr);
869
870 int page_size = entry->size();
871 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
872 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
873 req->setPaddr(paddr);
874
875 if (entry->uncacheable)
876 req->setFlags(Request::UNCACHEABLE);
877 } else {
878 //Use the address which already has segmentation applied.
879 DPRINTF(GPUTLB, "Paging disabled.\n");
880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
881 req->setPaddr(vaddr);
882 }
883 } else {
884 // Real mode
885 DPRINTF(GPUTLB, "In real mode.\n");
886 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
887 req->setPaddr(vaddr);
888 }
889
890 // Check for an access to the local APIC
891 if (FullSystem) {
892 LocalApicBase localApicBase =
893 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
894
895 Addr baseAddr = localApicBase.base * PageBytes;
896 Addr paddr = req->getPaddr();
897
898 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
899 // Force the access to be uncacheable.
900 req->setFlags(Request::UNCACHEABLE);
901 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
902 paddr - baseAddr));
903 }
904 }
905
906 return NoFault;
907 };
908
909 Fault
910 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
911 Mode mode, int &latency)
912 {
913 bool delayedResponse;
914
915 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
916 latency);
917 }
918
919 void
920 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
921 Translation *translation, Mode mode, int &latency)
922 {
923 bool delayedResponse;
924 assert(translation);
925
926 Fault fault = GpuTLB::translate(req, tc, translation, mode,
927 delayedResponse, true, latency);
928
929 if (!delayedResponse)
930 translation->finish(fault, req, tc, mode);
931 }
932
933 Walker*
934 GpuTLB::getWalker()
935 {
936 return walker;
937 }
938
939
940 void
941 GpuTLB::serialize(CheckpointOut &cp) const
942 {
943 }
944
945 void
946 GpuTLB::unserialize(CheckpointIn &cp)
947 {
948 }
949
950 void
951 GpuTLB::regStats()
952 {
65 cleanupEvent([this]{ cleanup(); }, name(), false,
66 Event::Maximum_Pri),
67 exitEvent([this]{ exitCallback(); }, name())
68 {
69 assoc = p->assoc;
70 assert(assoc <= size);
71 numSets = size/assoc;
72 allocationPolicy = p->allocationPolicy;
73 hasMemSidePort = false;
74 accessDistance = p->accessDistance;
75 clock = p->clk_domain->clockPeriod();
76
77 tlb.assign(size, TlbEntry());
78
79 freeList.resize(numSets);
80 entryList.resize(numSets);
81
82 for (int set = 0; set < numSets; ++set) {
83 for (int way = 0; way < assoc; ++way) {
84 int x = set * assoc + way;
85 freeList[set].push_back(&tlb.at(x));
86 }
87 }
88
89 FA = (size == assoc);
90
91 /**
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
97 * different sets etc)
98 */
99 setMask = numSets - 1;
100
101 #if 0
102 // GpuTLB doesn't yet support full system
103 walker = p->walker;
104 walker->setTLB(this);
105 #endif
106
107 maxCoalescedReqs = p->maxOutstandingReqs;
108
109 // Do not allow maxCoalescedReqs to be more than the TLB associativity
110 if (maxCoalescedReqs > assoc) {
111 maxCoalescedReqs = assoc;
112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
113 }
114
115 outstandingReqs = 0;
116 hitLatency = p->hitLatency;
117 missLatency1 = p->missLatency1;
118 missLatency2 = p->missLatency2;
119
120 // create the slave ports based on the number of connected ports
121 for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
122 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
123 name(), i), this, i));
124 }
125
126 // create the master ports based on the number of connected ports
127 for (size_t i = 0; i < p->port_master_connection_count; ++i) {
128 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
129 name(), i), this, i));
130 }
131 }
132
133 // fixme: this is never called?
134 GpuTLB::~GpuTLB()
135 {
136 // make sure all the hash-maps are empty
137 assert(translationReturnEvent.empty());
138 }
139
140 Port &
141 GpuTLB::getPort(const std::string &if_name, PortID idx)
142 {
143 if (if_name == "slave") {
144 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
145 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
146 }
147
148 return *cpuSidePort[idx];
149 } else if (if_name == "master") {
150 if (idx >= static_cast<PortID>(memSidePort.size())) {
151 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
152 }
153
154 hasMemSidePort = true;
155
156 return *memSidePort[idx];
157 } else {
158 panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
159 }
160 }
161
162 TlbEntry*
163 GpuTLB::insert(Addr vpn, TlbEntry &entry)
164 {
165 TlbEntry *newEntry = nullptr;
166
167 /**
168 * vpn holds the virtual page address
169 * The least significant bits are simply masked
170 */
171 int set = (vpn >> TheISA::PageShift) & setMask;
172
173 if (!freeList[set].empty()) {
174 newEntry = freeList[set].front();
175 freeList[set].pop_front();
176 } else {
177 newEntry = entryList[set].back();
178 entryList[set].pop_back();
179 }
180
181 *newEntry = entry;
182 newEntry->vaddr = vpn;
183 entryList[set].push_front(newEntry);
184
185 return newEntry;
186 }
187
188 GpuTLB::EntryList::iterator
189 GpuTLB::lookupIt(Addr va, bool update_lru)
190 {
191 int set = (va >> TheISA::PageShift) & setMask;
192
193 if (FA) {
194 assert(!set);
195 }
196
197 auto entry = entryList[set].begin();
198 for (; entry != entryList[set].end(); ++entry) {
199 int page_size = (*entry)->size();
200
201 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
202 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
203 "with size %#x.\n", va, (*entry)->vaddr, page_size);
204
205 if (update_lru) {
206 entryList[set].push_front(*entry);
207 entryList[set].erase(entry);
208 entry = entryList[set].begin();
209 }
210
211 break;
212 }
213 }
214
215 return entry;
216 }
217
218 TlbEntry*
219 GpuTLB::lookup(Addr va, bool update_lru)
220 {
221 int set = (va >> TheISA::PageShift) & setMask;
222
223 auto entry = lookupIt(va, update_lru);
224
225 if (entry == entryList[set].end())
226 return nullptr;
227 else
228 return *entry;
229 }
230
231 void
232 GpuTLB::invalidateAll()
233 {
234 DPRINTF(GPUTLB, "Invalidating all entries.\n");
235
236 for (int i = 0; i < numSets; ++i) {
237 while (!entryList[i].empty()) {
238 TlbEntry *entry = entryList[i].front();
239 entryList[i].pop_front();
240 freeList[i].push_back(entry);
241 }
242 }
243 }
244
245 void
246 GpuTLB::setConfigAddress(uint32_t addr)
247 {
248 configAddress = addr;
249 }
250
251 void
252 GpuTLB::invalidateNonGlobal()
253 {
254 DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
255
256 for (int i = 0; i < numSets; ++i) {
257 for (auto entryIt = entryList[i].begin();
258 entryIt != entryList[i].end();) {
259 if (!(*entryIt)->global) {
260 freeList[i].push_back(*entryIt);
261 entryList[i].erase(entryIt++);
262 } else {
263 ++entryIt;
264 }
265 }
266 }
267 }
268
269 void
270 GpuTLB::demapPage(Addr va, uint64_t asn)
271 {
272
273 int set = (va >> TheISA::PageShift) & setMask;
274 auto entry = lookupIt(va, false);
275
276 if (entry != entryList[set].end()) {
277 freeList[set].push_back(*entry);
278 entryList[set].erase(entry);
279 }
280 }
281
282 Fault
283 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
284 {
285 DPRINTF(GPUTLB, "Addresses references internal memory.\n");
286 Addr vaddr = req->getVaddr();
287 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
288
289 if (prefix == IntAddrPrefixCPUID) {
290 panic("CPUID memory space not yet implemented!\n");
291 } else if (prefix == IntAddrPrefixMSR) {
292 vaddr = vaddr >> 3;
293 req->setFlags(Request::MMAPPED_IPR);
294 Addr regNum = 0;
295
296 switch (vaddr & ~IntAddrPrefixMask) {
297 case 0x10:
298 regNum = MISCREG_TSC;
299 break;
300 case 0x1B:
301 regNum = MISCREG_APIC_BASE;
302 break;
303 case 0xFE:
304 regNum = MISCREG_MTRRCAP;
305 break;
306 case 0x174:
307 regNum = MISCREG_SYSENTER_CS;
308 break;
309 case 0x175:
310 regNum = MISCREG_SYSENTER_ESP;
311 break;
312 case 0x176:
313 regNum = MISCREG_SYSENTER_EIP;
314 break;
315 case 0x179:
316 regNum = MISCREG_MCG_CAP;
317 break;
318 case 0x17A:
319 regNum = MISCREG_MCG_STATUS;
320 break;
321 case 0x17B:
322 regNum = MISCREG_MCG_CTL;
323 break;
324 case 0x1D9:
325 regNum = MISCREG_DEBUG_CTL_MSR;
326 break;
327 case 0x1DB:
328 regNum = MISCREG_LAST_BRANCH_FROM_IP;
329 break;
330 case 0x1DC:
331 regNum = MISCREG_LAST_BRANCH_TO_IP;
332 break;
333 case 0x1DD:
334 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
335 break;
336 case 0x1DE:
337 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
338 break;
339 case 0x200:
340 regNum = MISCREG_MTRR_PHYS_BASE_0;
341 break;
342 case 0x201:
343 regNum = MISCREG_MTRR_PHYS_MASK_0;
344 break;
345 case 0x202:
346 regNum = MISCREG_MTRR_PHYS_BASE_1;
347 break;
348 case 0x203:
349 regNum = MISCREG_MTRR_PHYS_MASK_1;
350 break;
351 case 0x204:
352 regNum = MISCREG_MTRR_PHYS_BASE_2;
353 break;
354 case 0x205:
355 regNum = MISCREG_MTRR_PHYS_MASK_2;
356 break;
357 case 0x206:
358 regNum = MISCREG_MTRR_PHYS_BASE_3;
359 break;
360 case 0x207:
361 regNum = MISCREG_MTRR_PHYS_MASK_3;
362 break;
363 case 0x208:
364 regNum = MISCREG_MTRR_PHYS_BASE_4;
365 break;
366 case 0x209:
367 regNum = MISCREG_MTRR_PHYS_MASK_4;
368 break;
369 case 0x20A:
370 regNum = MISCREG_MTRR_PHYS_BASE_5;
371 break;
372 case 0x20B:
373 regNum = MISCREG_MTRR_PHYS_MASK_5;
374 break;
375 case 0x20C:
376 regNum = MISCREG_MTRR_PHYS_BASE_6;
377 break;
378 case 0x20D:
379 regNum = MISCREG_MTRR_PHYS_MASK_6;
380 break;
381 case 0x20E:
382 regNum = MISCREG_MTRR_PHYS_BASE_7;
383 break;
384 case 0x20F:
385 regNum = MISCREG_MTRR_PHYS_MASK_7;
386 break;
387 case 0x250:
388 regNum = MISCREG_MTRR_FIX_64K_00000;
389 break;
390 case 0x258:
391 regNum = MISCREG_MTRR_FIX_16K_80000;
392 break;
393 case 0x259:
394 regNum = MISCREG_MTRR_FIX_16K_A0000;
395 break;
396 case 0x268:
397 regNum = MISCREG_MTRR_FIX_4K_C0000;
398 break;
399 case 0x269:
400 regNum = MISCREG_MTRR_FIX_4K_C8000;
401 break;
402 case 0x26A:
403 regNum = MISCREG_MTRR_FIX_4K_D0000;
404 break;
405 case 0x26B:
406 regNum = MISCREG_MTRR_FIX_4K_D8000;
407 break;
408 case 0x26C:
409 regNum = MISCREG_MTRR_FIX_4K_E0000;
410 break;
411 case 0x26D:
412 regNum = MISCREG_MTRR_FIX_4K_E8000;
413 break;
414 case 0x26E:
415 regNum = MISCREG_MTRR_FIX_4K_F0000;
416 break;
417 case 0x26F:
418 regNum = MISCREG_MTRR_FIX_4K_F8000;
419 break;
420 case 0x277:
421 regNum = MISCREG_PAT;
422 break;
423 case 0x2FF:
424 regNum = MISCREG_DEF_TYPE;
425 break;
426 case 0x400:
427 regNum = MISCREG_MC0_CTL;
428 break;
429 case 0x404:
430 regNum = MISCREG_MC1_CTL;
431 break;
432 case 0x408:
433 regNum = MISCREG_MC2_CTL;
434 break;
435 case 0x40C:
436 regNum = MISCREG_MC3_CTL;
437 break;
438 case 0x410:
439 regNum = MISCREG_MC4_CTL;
440 break;
441 case 0x414:
442 regNum = MISCREG_MC5_CTL;
443 break;
444 case 0x418:
445 regNum = MISCREG_MC6_CTL;
446 break;
447 case 0x41C:
448 regNum = MISCREG_MC7_CTL;
449 break;
450 case 0x401:
451 regNum = MISCREG_MC0_STATUS;
452 break;
453 case 0x405:
454 regNum = MISCREG_MC1_STATUS;
455 break;
456 case 0x409:
457 regNum = MISCREG_MC2_STATUS;
458 break;
459 case 0x40D:
460 regNum = MISCREG_MC3_STATUS;
461 break;
462 case 0x411:
463 regNum = MISCREG_MC4_STATUS;
464 break;
465 case 0x415:
466 regNum = MISCREG_MC5_STATUS;
467 break;
468 case 0x419:
469 regNum = MISCREG_MC6_STATUS;
470 break;
471 case 0x41D:
472 regNum = MISCREG_MC7_STATUS;
473 break;
474 case 0x402:
475 regNum = MISCREG_MC0_ADDR;
476 break;
477 case 0x406:
478 regNum = MISCREG_MC1_ADDR;
479 break;
480 case 0x40A:
481 regNum = MISCREG_MC2_ADDR;
482 break;
483 case 0x40E:
484 regNum = MISCREG_MC3_ADDR;
485 break;
486 case 0x412:
487 regNum = MISCREG_MC4_ADDR;
488 break;
489 case 0x416:
490 regNum = MISCREG_MC5_ADDR;
491 break;
492 case 0x41A:
493 regNum = MISCREG_MC6_ADDR;
494 break;
495 case 0x41E:
496 regNum = MISCREG_MC7_ADDR;
497 break;
498 case 0x403:
499 regNum = MISCREG_MC0_MISC;
500 break;
501 case 0x407:
502 regNum = MISCREG_MC1_MISC;
503 break;
504 case 0x40B:
505 regNum = MISCREG_MC2_MISC;
506 break;
507 case 0x40F:
508 regNum = MISCREG_MC3_MISC;
509 break;
510 case 0x413:
511 regNum = MISCREG_MC4_MISC;
512 break;
513 case 0x417:
514 regNum = MISCREG_MC5_MISC;
515 break;
516 case 0x41B:
517 regNum = MISCREG_MC6_MISC;
518 break;
519 case 0x41F:
520 regNum = MISCREG_MC7_MISC;
521 break;
522 case 0xC0000080:
523 regNum = MISCREG_EFER;
524 break;
525 case 0xC0000081:
526 regNum = MISCREG_STAR;
527 break;
528 case 0xC0000082:
529 regNum = MISCREG_LSTAR;
530 break;
531 case 0xC0000083:
532 regNum = MISCREG_CSTAR;
533 break;
534 case 0xC0000084:
535 regNum = MISCREG_SF_MASK;
536 break;
537 case 0xC0000100:
538 regNum = MISCREG_FS_BASE;
539 break;
540 case 0xC0000101:
541 regNum = MISCREG_GS_BASE;
542 break;
543 case 0xC0000102:
544 regNum = MISCREG_KERNEL_GS_BASE;
545 break;
546 case 0xC0000103:
547 regNum = MISCREG_TSC_AUX;
548 break;
549 case 0xC0010000:
550 regNum = MISCREG_PERF_EVT_SEL0;
551 break;
552 case 0xC0010001:
553 regNum = MISCREG_PERF_EVT_SEL1;
554 break;
555 case 0xC0010002:
556 regNum = MISCREG_PERF_EVT_SEL2;
557 break;
558 case 0xC0010003:
559 regNum = MISCREG_PERF_EVT_SEL3;
560 break;
561 case 0xC0010004:
562 regNum = MISCREG_PERF_EVT_CTR0;
563 break;
564 case 0xC0010005:
565 regNum = MISCREG_PERF_EVT_CTR1;
566 break;
567 case 0xC0010006:
568 regNum = MISCREG_PERF_EVT_CTR2;
569 break;
570 case 0xC0010007:
571 regNum = MISCREG_PERF_EVT_CTR3;
572 break;
573 case 0xC0010010:
574 regNum = MISCREG_SYSCFG;
575 break;
576 case 0xC0010016:
577 regNum = MISCREG_IORR_BASE0;
578 break;
579 case 0xC0010017:
580 regNum = MISCREG_IORR_BASE1;
581 break;
582 case 0xC0010018:
583 regNum = MISCREG_IORR_MASK0;
584 break;
585 case 0xC0010019:
586 regNum = MISCREG_IORR_MASK1;
587 break;
588 case 0xC001001A:
589 regNum = MISCREG_TOP_MEM;
590 break;
591 case 0xC001001D:
592 regNum = MISCREG_TOP_MEM2;
593 break;
594 case 0xC0010114:
595 regNum = MISCREG_VM_CR;
596 break;
597 case 0xC0010115:
598 regNum = MISCREG_IGNNE;
599 break;
600 case 0xC0010116:
601 regNum = MISCREG_SMM_CTL;
602 break;
603 case 0xC0010117:
604 regNum = MISCREG_VM_HSAVE_PA;
605 break;
606 default:
607 return std::make_shared<GeneralProtection>(0);
608 }
609 //The index is multiplied by the size of a MiscReg so that
610 //any memory dependence calculations will not see these as
611 //overlapping.
612 req->setPaddr(regNum * sizeof(RegVal));
613 return NoFault;
614 } else if (prefix == IntAddrPrefixIO) {
615 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
616 // bitmap in the TSS.
617
618 Addr IOPort = vaddr & ~IntAddrPrefixMask;
619 // Make sure the address fits in the expected 16 bit IO address
620 // space.
621 assert(!(IOPort & ~0xFFFF));
622
623 if (IOPort == 0xCF8 && req->getSize() == 4) {
624 req->setFlags(Request::MMAPPED_IPR);
625 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
626 } else if ((IOPort & ~mask(2)) == 0xCFC) {
627 req->setFlags(Request::UNCACHEABLE);
628
629 Addr configAddress =
630 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
631
632 if (bits(configAddress, 31, 31)) {
633 req->setPaddr(PhysAddrPrefixPciConfig |
634 mbits(configAddress, 30, 2) |
635 (IOPort & mask(2)));
636 } else {
637 req->setPaddr(PhysAddrPrefixIO | IOPort);
638 }
639 } else {
640 req->setFlags(Request::UNCACHEABLE);
641 req->setPaddr(PhysAddrPrefixIO | IOPort);
642 }
643 return NoFault;
644 } else {
645 panic("Access to unrecognized internal address space %#x.\n",
646 prefix);
647 }
648 }
649
650 /**
651 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
652 * and false on a TLB miss.
653 * Many of the checks about different modes have been converted to
654 * assertions, since these parts of the code are not really used.
655 * On a hit it will update the LRU stack.
656 */
657 bool
658 GpuTLB::tlbLookup(const RequestPtr &req,
659 ThreadContext *tc, bool update_stats)
660 {
661 bool tlb_hit = false;
662 #ifndef NDEBUG
663 uint32_t flags = req->getFlags();
664 int seg = flags & SegmentFlagMask;
665 #endif
666
667 assert(seg != SEGMENT_REG_MS);
668 Addr vaddr = req->getVaddr();
669 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
670 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
671
672 if (m5Reg.prot) {
673 DPRINTF(GPUTLB, "In protected mode.\n");
674 // make sure we are in 64-bit mode
675 assert(m5Reg.mode == LongMode);
676
677 // If paging is enabled, do the translation.
678 if (m5Reg.paging) {
679 DPRINTF(GPUTLB, "Paging enabled.\n");
680 //update LRU stack on a hit
681 TlbEntry *entry = lookup(vaddr, true);
682
683 if (entry)
684 tlb_hit = true;
685
686 if (!update_stats) {
687 // functional tlb access for memory initialization
688 // i.e., memory seeding or instr. seeding -> don't update
689 // TLB and stats
690 return tlb_hit;
691 }
692
693 localNumTLBAccesses++;
694
695 if (!entry) {
696 localNumTLBMisses++;
697 } else {
698 localNumTLBHits++;
699 }
700 }
701 }
702
703 return tlb_hit;
704 }
705
706 Fault
707 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
708 Translation *translation, Mode mode,
709 bool &delayedResponse, bool timing, int &latency)
710 {
711 uint32_t flags = req->getFlags();
712 int seg = flags & SegmentFlagMask;
713 bool storeCheck = flags & (StoreCheck << FlagShift);
714
715 // If this is true, we're dealing with a request
716 // to a non-memory address space.
717 if (seg == SEGMENT_REG_MS) {
718 return translateInt(req, tc);
719 }
720
721 delayedResponse = false;
722 Addr vaddr = req->getVaddr();
723 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
724
725 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
726
727 // If protected mode has been enabled...
728 if (m5Reg.prot) {
729 DPRINTF(GPUTLB, "In protected mode.\n");
730 // If we're not in 64-bit mode, do protection/limit checks
731 if (m5Reg.mode != LongMode) {
732 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
733 "protection.\n");
734
735 // Check for a null segment selector.
736 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
737 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
738 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
739 return std::make_shared<GeneralProtection>(0);
740 }
741
742 bool expandDown = false;
743 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
744
745 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
746 if (!attr.writable && (mode == BaseTLB::Write ||
747 storeCheck))
748 return std::make_shared<GeneralProtection>(0);
749
750 if (!attr.readable && mode == BaseTLB::Read)
751 return std::make_shared<GeneralProtection>(0);
752
753 expandDown = attr.expandDown;
754
755 }
756
757 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
758 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
759 // This assumes we're not in 64 bit mode. If we were, the
760 // default address size is 64 bits, overridable to 32.
761 int size = 32;
762 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
763 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
764
765 if ((csAttr.defaultSize && sizeOverride) ||
766 (!csAttr.defaultSize && !sizeOverride)) {
767 size = 16;
768 }
769
770 Addr offset = bits(vaddr - base, size - 1, 0);
771 Addr endOffset = offset + req->getSize() - 1;
772
773 if (expandDown) {
774 DPRINTF(GPUTLB, "Checking an expand down segment.\n");
775 warn_once("Expand down segments are untested.\n");
776
777 if (offset <= limit || endOffset <= limit)
778 return std::make_shared<GeneralProtection>(0);
779 } else {
780 if (offset > limit || endOffset > limit)
781 return std::make_shared<GeneralProtection>(0);
782 }
783 }
784
785 // If paging is enabled, do the translation.
786 if (m5Reg.paging) {
787 DPRINTF(GPUTLB, "Paging enabled.\n");
788 // The vaddr already has the segment base applied.
789 TlbEntry *entry = lookup(vaddr);
790 localNumTLBAccesses++;
791
792 if (!entry) {
793 localNumTLBMisses++;
794 if (timing) {
795 latency = missLatency1;
796 }
797
798 if (FullSystem) {
799 fatal("GpuTLB doesn't support full-system mode\n");
800 } else {
801 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
802 "at pc %#x.\n", vaddr, tc->instAddr());
803
804 Process *p = tc->getProcessPtr();
805 const EmulationPageTable::Entry *pte =
806 p->pTable->lookup(vaddr);
807
808 if (!pte && mode != BaseTLB::Execute) {
809 // penalize a "page fault" more
810 if (timing)
811 latency += missLatency2;
812
813 if (p->fixupStackFault(vaddr))
814 pte = p->pTable->lookup(vaddr);
815 }
816
817 if (!pte) {
818 return std::make_shared<PageFault>(vaddr, true,
819 mode, true,
820 false);
821 } else {
822 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
823
824 DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
825 alignedVaddr, pte->paddr);
826
827 TlbEntry gpuEntry(p->pid(), alignedVaddr,
828 pte->paddr, false, false);
829 entry = insert(alignedVaddr, gpuEntry);
830 }
831
832 DPRINTF(GPUTLB, "Miss was serviced.\n");
833 }
834 } else {
835 localNumTLBHits++;
836
837 if (timing) {
838 latency = hitLatency;
839 }
840 }
841
842 // Do paging protection checks.
843 bool inUser = (m5Reg.cpl == 3 &&
844 !(flags & (CPL0FlagBit << FlagShift)));
845
846 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
847 bool badWrite = (!entry->writable && (inUser || cr0.wp));
848
849 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
850 badWrite)) {
851 // The page must have been present to get into the TLB in
852 // the first place. We'll assume the reserved bits are
853 // fine even though we're not checking them.
854 return std::make_shared<PageFault>(vaddr, true, mode,
855 inUser, false);
856 }
857
858 if (storeCheck && badWrite) {
859 // This would fault if this were a write, so return a page
860 // fault that reflects that happening.
861 return std::make_shared<PageFault>(vaddr, true,
862 BaseTLB::Write,
863 inUser, false);
864 }
865
866
867 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
868 "checks.\n", entry->paddr);
869
870 int page_size = entry->size();
871 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
872 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
873 req->setPaddr(paddr);
874
875 if (entry->uncacheable)
876 req->setFlags(Request::UNCACHEABLE);
877 } else {
878 //Use the address which already has segmentation applied.
879 DPRINTF(GPUTLB, "Paging disabled.\n");
880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
881 req->setPaddr(vaddr);
882 }
883 } else {
884 // Real mode
885 DPRINTF(GPUTLB, "In real mode.\n");
886 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
887 req->setPaddr(vaddr);
888 }
889
890 // Check for an access to the local APIC
891 if (FullSystem) {
892 LocalApicBase localApicBase =
893 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
894
895 Addr baseAddr = localApicBase.base * PageBytes;
896 Addr paddr = req->getPaddr();
897
898 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
899 // Force the access to be uncacheable.
900 req->setFlags(Request::UNCACHEABLE);
901 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
902 paddr - baseAddr));
903 }
904 }
905
906 return NoFault;
907 };
908
909 Fault
910 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
911 Mode mode, int &latency)
912 {
913 bool delayedResponse;
914
915 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
916 latency);
917 }
918
919 void
920 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
921 Translation *translation, Mode mode, int &latency)
922 {
923 bool delayedResponse;
924 assert(translation);
925
926 Fault fault = GpuTLB::translate(req, tc, translation, mode,
927 delayedResponse, true, latency);
928
929 if (!delayedResponse)
930 translation->finish(fault, req, tc, mode);
931 }
932
933 Walker*
934 GpuTLB::getWalker()
935 {
936 return walker;
937 }
938
939
940 void
941 GpuTLB::serialize(CheckpointOut &cp) const
942 {
943 }
944
945 void
946 GpuTLB::unserialize(CheckpointIn &cp)
947 {
948 }
949
950 void
951 GpuTLB::regStats()
952 {
953 MemObject::regStats();
953 ClockedObject::regStats();
954
955 localNumTLBAccesses
956 .name(name() + ".local_TLB_accesses")
957 .desc("Number of TLB accesses")
958 ;
959
960 localNumTLBHits
961 .name(name() + ".local_TLB_hits")
962 .desc("Number of TLB hits")
963 ;
964
965 localNumTLBMisses
966 .name(name() + ".local_TLB_misses")
967 .desc("Number of TLB misses")
968 ;
969
970 localTLBMissRate
971 .name(name() + ".local_TLB_miss_rate")
972 .desc("TLB miss rate")
973 ;
974
975 accessCycles
976 .name(name() + ".access_cycles")
977 .desc("Cycles spent accessing this TLB level")
978 ;
979
980 pageTableCycles
981 .name(name() + ".page_table_cycles")
982 .desc("Cycles spent accessing the page table")
983 ;
984
985 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
986
987 numUniquePages
988 .name(name() + ".unique_pages")
989 .desc("Number of unique pages touched")
990 ;
991
992 localCycles
993 .name(name() + ".local_cycles")
994 .desc("Number of cycles spent in queue for all incoming reqs")
995 ;
996
997 localLatency
998 .name(name() + ".local_latency")
999 .desc("Avg. latency over incoming coalesced reqs")
1000 ;
1001
1002 localLatency = localCycles / localNumTLBAccesses;
1003
1004 globalNumTLBAccesses
1005 .name(name() + ".global_TLB_accesses")
1006 .desc("Number of TLB accesses")
1007 ;
1008
1009 globalNumTLBHits
1010 .name(name() + ".global_TLB_hits")
1011 .desc("Number of TLB hits")
1012 ;
1013
1014 globalNumTLBMisses
1015 .name(name() + ".global_TLB_misses")
1016 .desc("Number of TLB misses")
1017 ;
1018
1019 globalTLBMissRate
1020 .name(name() + ".global_TLB_miss_rate")
1021 .desc("TLB miss rate")
1022 ;
1023
1024 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1025
1026 avgReuseDistance
1027 .name(name() + ".avg_reuse_distance")
1028 .desc("avg. reuse distance over all pages (in ticks)")
1029 ;
1030
1031 }
1032
1033 /**
1034 * Do the TLB lookup for this coalesced request and schedule
1035 * another event <TLB access latency> cycles later.
1036 */
1037
1038 void
1039 GpuTLB::issueTLBLookup(PacketPtr pkt)
1040 {
1041 assert(pkt);
1042 assert(pkt->senderState);
1043
1044 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1045 TheISA::PageBytes);
1046
1047 TranslationState *sender_state =
1048 safe_cast<TranslationState*>(pkt->senderState);
1049
1050 bool update_stats = !sender_state->prefetch;
1051 ThreadContext * tmp_tc = sender_state->tc;
1052
1053 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1054 virt_page_addr);
1055
1056 int req_cnt = sender_state->reqCnt.back();
1057
1058 if (update_stats) {
1059 accessCycles -= (curTick() * req_cnt);
1060 localCycles -= curTick();
1061 updatePageFootprint(virt_page_addr);
1062 globalNumTLBAccesses += req_cnt;
1063 }
1064
1065 tlbOutcome lookup_outcome = TLB_MISS;
1066 const RequestPtr &tmp_req = pkt->req;
1067
1068 // Access the TLB and figure out if it's a hit or a miss.
1069 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1070
1071 if (success) {
1072 lookup_outcome = TLB_HIT;
1073 // Put the entry in SenderState
1074 TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1075 assert(entry);
1076
1077 auto p = sender_state->tc->getProcessPtr();
1078 sender_state->tlbEntry =
1079 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1080 false, false);
1081
1082 if (update_stats) {
1083 // the reqCnt has an entry per level, so its size tells us
1084 // which level we are in
1085 sender_state->hitLevel = sender_state->reqCnt.size();
1086 globalNumTLBHits += req_cnt;
1087 }
1088 } else {
1089 if (update_stats)
1090 globalNumTLBMisses += req_cnt;
1091 }
1092
1093 /*
1094 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1095 * as the TLB access latency.
1096 *
1097 * We create and schedule a new TLBEvent which will help us take the
1098 * appropriate actions (e.g., update TLB on a hit, send request to lower
1099 * level TLB on a miss, or start a page walk if this was the last-level
1100 * TLB)
1101 */
1102 TLBEvent *tlb_event =
1103 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1104
1105 if (translationReturnEvent.count(virt_page_addr)) {
1106 panic("Virtual Page Address %#x already has a return event\n",
1107 virt_page_addr);
1108 }
1109
1110 translationReturnEvent[virt_page_addr] = tlb_event;
1111 assert(tlb_event);
1112
1113 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1114 curTick() + this->ticks(hitLatency));
1115
1116 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1117 }
1118
1119 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1120 PacketPtr _pkt)
1121 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1122 outcome(tlb_outcome), pkt(_pkt)
1123 {
1124 }
1125
1126 /**
1127 * Do Paging protection checks. If we encounter a page fault, then
1128 * an assertion is fired.
1129 */
1130 void
1131 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1132 TlbEntry * tlb_entry, Mode mode)
1133 {
1134 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1135 uint32_t flags = pkt->req->getFlags();
1136 bool storeCheck = flags & (StoreCheck << FlagShift);
1137
1138 // Do paging protection checks.
1139 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1140 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1141
1142 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1143
1144 if ((inUser && !tlb_entry->user) ||
1145 (mode == BaseTLB::Write && badWrite)) {
1146 // The page must have been present to get into the TLB in
1147 // the first place. We'll assume the reserved bits are
1148 // fine even though we're not checking them.
1149 panic("Page fault detected");
1150 }
1151
1152 if (storeCheck && badWrite) {
1153 // This would fault if this were a write, so return a page
1154 // fault that reflects that happening.
1155 panic("Page fault detected");
1156 }
1157 }
1158
1159 /**
1160 * handleTranslationReturn is called on a TLB hit,
1161 * when a TLB miss returns or when a page fault returns.
1162 * The latter calls handelHit with TLB miss as tlbOutcome.
1163 */
1164 void
1165 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1166 PacketPtr pkt)
1167 {
1168
1169 assert(pkt);
1170 Addr vaddr = pkt->req->getVaddr();
1171
1172 TranslationState *sender_state =
1173 safe_cast<TranslationState*>(pkt->senderState);
1174
1175 ThreadContext *tc = sender_state->tc;
1176 Mode mode = sender_state->tlbMode;
1177
1178 TlbEntry *local_entry, *new_entry;
1179
1180 if (tlb_outcome == TLB_HIT) {
1181 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1182 local_entry = sender_state->tlbEntry;
1183 } else {
1184 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1185 vaddr);
1186
1187 // We are returning either from a page walk or from a hit at a lower
1188 // TLB level. The senderState should be "carrying" a pointer to the
1189 // correct TLBEntry.
1190 new_entry = sender_state->tlbEntry;
1191 assert(new_entry);
1192 local_entry = new_entry;
1193
1194 if (allocationPolicy) {
1195 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1196 virt_page_addr);
1197
1198 local_entry = insert(virt_page_addr, *new_entry);
1199 }
1200
1201 assert(local_entry);
1202 }
1203
1204 /**
1205 * At this point the packet carries an up-to-date tlbEntry pointer
1206 * in its senderState.
1207 * Next step is to do the paging protection checks.
1208 */
1209 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1210 "while paddr was %#x.\n", local_entry->vaddr,
1211 local_entry->paddr);
1212
1213 pagingProtectionChecks(tc, pkt, local_entry, mode);
1214 int page_size = local_entry->size();
1215 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1216 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1217
1218 // Since this packet will be sent through the cpu side slave port,
1219 // it must be converted to a response pkt if it is not one already
1220 if (pkt->isRequest()) {
1221 pkt->makeTimingResponse();
1222 }
1223
1224 pkt->req->setPaddr(paddr);
1225
1226 if (local_entry->uncacheable) {
1227 pkt->req->setFlags(Request::UNCACHEABLE);
1228 }
1229
1230 //send packet back to coalescer
1231 cpuSidePort[0]->sendTimingResp(pkt);
1232 //schedule cleanup event
1233 cleanupQueue.push(virt_page_addr);
1234
1235 // schedule this only once per cycle.
1236 // The check is required because we might have multiple translations
1237 // returning the same cycle
1238 // this is a maximum priority event and must be on the same cycle
1239 // as the cleanup event in TLBCoalescer to avoid a race with
1240 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1241 if (!cleanupEvent.scheduled())
1242 schedule(cleanupEvent, curTick());
1243 }
1244
1245 /**
1246 * Here we take the appropriate actions based on the result of the
1247 * TLB lookup.
1248 */
1249 void
1250 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1251 PacketPtr pkt)
1252 {
1253 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1254
1255 assert(translationReturnEvent[virtPageAddr]);
1256 assert(pkt);
1257
1258 TranslationState *tmp_sender_state =
1259 safe_cast<TranslationState*>(pkt->senderState);
1260
1261 int req_cnt = tmp_sender_state->reqCnt.back();
1262 bool update_stats = !tmp_sender_state->prefetch;
1263
1264
1265 if (outcome == TLB_HIT) {
1266 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1267
1268 if (update_stats) {
1269 accessCycles += (req_cnt * curTick());
1270 localCycles += curTick();
1271 }
1272
1273 } else if (outcome == TLB_MISS) {
1274
1275 DPRINTF(GPUTLB, "This is a TLB miss\n");
1276 if (update_stats) {
1277 accessCycles += (req_cnt*curTick());
1278 localCycles += curTick();
1279 }
1280
1281 if (hasMemSidePort) {
1282 // the one cyle added here represent the delay from when we get
1283 // the reply back till when we propagate it to the coalescer
1284 // above.
1285 if (update_stats) {
1286 accessCycles += (req_cnt * 1);
1287 localCycles += 1;
1288 }
1289
1290 /**
1291 * There is a TLB below. Send the coalesced request.
1292 * We actually send the very first packet of all the
1293 * pending packets for this virtual page address.
1294 */
1295 if (!memSidePort[0]->sendTimingReq(pkt)) {
1296 DPRINTF(GPUTLB, "Failed sending translation request to "
1297 "lower level TLB for addr %#x\n", virtPageAddr);
1298
1299 memSidePort[0]->retries.push_back(pkt);
1300 } else {
1301 DPRINTF(GPUTLB, "Sent translation request to lower level "
1302 "TLB for addr %#x\n", virtPageAddr);
1303 }
1304 } else {
1305 //this is the last level TLB. Start a page walk
1306 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1307 "addr %#x\n", virtPageAddr);
1308
1309 if (update_stats)
1310 pageTableCycles -= (req_cnt*curTick());
1311
1312 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1313 assert(tlb_event);
1314 tlb_event->updateOutcome(PAGE_WALK);
1315 schedule(tlb_event, curTick() + ticks(missLatency2));
1316 }
1317 } else if (outcome == PAGE_WALK) {
1318 if (update_stats)
1319 pageTableCycles += (req_cnt*curTick());
1320
1321 // Need to access the page table and update the TLB
1322 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1323 virtPageAddr);
1324
1325 TranslationState *sender_state =
1326 safe_cast<TranslationState*>(pkt->senderState);
1327
1328 Process *p = sender_state->tc->getProcessPtr();
1329 Addr vaddr = pkt->req->getVaddr();
1330 #ifndef NDEBUG
1331 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1332 assert(alignedVaddr == virtPageAddr);
1333 #endif
1334 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1335 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1336 p->fixupStackFault(vaddr)) {
1337 pte = p->pTable->lookup(vaddr);
1338 }
1339
1340 if (pte) {
1341 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1342 pte->paddr);
1343
1344 sender_state->tlbEntry =
1345 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1346 false);
1347 } else {
1348 sender_state->tlbEntry = nullptr;
1349 }
1350
1351 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352 } else if (outcome == MISS_RETURN) {
1353 /** we add an extra cycle in the return path of the translation
1354 * requests in between the various TLB levels.
1355 */
1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357 } else {
1358 panic("Unexpected TLB outcome %d", outcome);
1359 }
1360 }
1361
1362 void
1363 GpuTLB::TLBEvent::process()
1364 {
1365 tlb->translationReturn(virtPageAddr, outcome, pkt);
1366 }
1367
1368 const char*
1369 GpuTLB::TLBEvent::description() const
1370 {
1371 return "trigger translationDoneEvent";
1372 }
1373
1374 void
1375 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1376 {
1377 outcome = _outcome;
1378 }
1379
1380 Addr
1381 GpuTLB::TLBEvent::getTLBEventVaddr()
1382 {
1383 return virtPageAddr;
1384 }
1385
1386 /*
1387 * recvTiming receives a coalesced timing request from a TLBCoalescer
1388 * and it calls issueTLBLookup()
1389 * It only rejects the packet if we have exceeded the max
1390 * outstanding number of requests for the TLB
1391 */
1392 bool
1393 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1394 {
1395 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396 tlb->issueTLBLookup(pkt);
1397 // update number of outstanding translation requests
1398 tlb->outstandingReqs++;
1399 return true;
1400 } else {
1401 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402 tlb->outstandingReqs);
1403 return false;
1404 }
1405 }
1406
1407 /**
1408 * handleFuncTranslationReturn is called on a TLB hit,
1409 * when a TLB miss returns or when a page fault returns.
1410 * It updates LRU, inserts the TLB entry on a miss
1411 * depending on the allocation policy and does the required
1412 * protection checks. It does NOT create a new packet to
1413 * update the packet's addr; this is done in hsail-gpu code.
1414 */
1415 void
1416 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1417 {
1418 TranslationState *sender_state =
1419 safe_cast<TranslationState*>(pkt->senderState);
1420
1421 ThreadContext *tc = sender_state->tc;
1422 Mode mode = sender_state->tlbMode;
1423 Addr vaddr = pkt->req->getVaddr();
1424
1425 TlbEntry *local_entry, *new_entry;
1426
1427 if (tlb_outcome == TLB_HIT) {
1428 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429 "%#x\n", vaddr);
1430
1431 local_entry = sender_state->tlbEntry;
1432 } else {
1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434 "%#x\n", vaddr);
1435
1436 // We are returning either from a page walk or from a hit at a lower
1437 // TLB level. The senderState should be "carrying" a pointer to the
1438 // correct TLBEntry.
1439 new_entry = sender_state->tlbEntry;
1440 assert(new_entry);
1441 local_entry = new_entry;
1442
1443 if (allocationPolicy) {
1444 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445
1446 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447 virt_page_addr);
1448
1449 local_entry = insert(virt_page_addr, *new_entry);
1450 }
1451
1452 assert(local_entry);
1453 }
1454
1455 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456 "while paddr was %#x.\n", local_entry->vaddr,
1457 local_entry->paddr);
1458
1459 /**
1460 * Do paging checks if it's a normal functional access. If it's for a
1461 * prefetch, then sometimes you can try to prefetch something that
1462 * won't pass protection. We don't actually want to fault becuase there
1463 * is no demand access to deem this a violation. Just put it in the
1464 * TLB and it will fault if indeed a future demand access touches it in
1465 * violation.
1466 *
1467 * This feature could be used to explore security issues around
1468 * speculative memory accesses.
1469 */
1470 if (!sender_state->prefetch && sender_state->tlbEntry)
1471 pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473 int page_size = local_entry->size();
1474 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477 pkt->req->setPaddr(paddr);
1478
1479 if (local_entry->uncacheable)
1480 pkt->req->setFlags(Request::UNCACHEABLE);
1481 }
1482
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1485 void
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487 {
1488 TranslationState *sender_state =
1489 safe_cast<TranslationState*>(pkt->senderState);
1490
1491 ThreadContext *tc = sender_state->tc;
1492 bool update_stats = !sender_state->prefetch;
1493
1494 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495 TheISA::PageBytes);
1496
1497 if (update_stats)
1498 tlb->updatePageFootprint(virt_page_addr);
1499
1500 // do the TLB lookup without updating the stats
1501 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1506 if (update_stats) {
1507 tlb->globalNumTLBAccesses++;
1508
1509 if (success) {
1510 sender_state->hitLevel = sender_state->reqCnt.size();
1511 tlb->globalNumTLBHits++;
1512 }
1513 }
1514
1515 if (!success) {
1516 if (update_stats)
1517 tlb->globalNumTLBMisses++;
1518 if (tlb->hasMemSidePort) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb->memSidePort[0]->sendFunctional(pkt);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state->prefetch && !pkt->req->hasPaddr())
1523 return;
1524 } else {
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527 virt_page_addr);
1528
1529 Process *p = tc->getProcessPtr();
1530
1531 Addr vaddr = pkt->req->getVaddr();
1532 #ifndef NDEBUG
1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534 assert(alignedVaddr == virt_page_addr);
1535 #endif
1536
1537 const EmulationPageTable::Entry *pte =
1538 p->pTable->lookup(vaddr);
1539 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540 p->fixupStackFault(vaddr)) {
1541 pte = p->pTable->lookup(vaddr);
1542 }
1543
1544 if (!sender_state->prefetch) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1547 assert(pte);
1548
1549 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550 pte->paddr);
1551
1552 sender_state->tlbEntry =
1553 new TlbEntry(p->pid(), virt_page_addr,
1554 pte->paddr, false, false);
1555 } else {
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1560 if (pte) {
1561 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562 pte->paddr);
1563
1564 sender_state->tlbEntry =
1565 new TlbEntry(p->pid(), virt_page_addr,
1566 pte->paddr, false, false);
1567 } else {
1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569 alignedVaddr);
1570
1571 sender_state->tlbEntry = nullptr;
1572
1573 return;
1574 }
1575 }
1576 }
1577 } else {
1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579 tlb->lookup(pkt->req->getVaddr()));
1580
1581 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582 update_stats);
1583
1584 assert(entry);
1585
1586 auto p = sender_state->tc->getProcessPtr();
1587 sender_state->tlbEntry =
1588 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1589 false, false);
1590 }
1591 // This is the function that would populate pkt->req with the paddr of
1592 // the translation. But if no translation happens (i.e Prefetch fails)
1593 // then the early returns in the above code wiill keep this function
1594 // from executing.
1595 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1596 }
1597
1598 void
1599 GpuTLB::CpuSidePort::recvReqRetry()
1600 {
1601 // The CPUSidePort never sends anything but replies. No retries
1602 // expected.
1603 panic("recvReqRetry called");
1604 }
1605
1606 AddrRangeList
1607 GpuTLB::CpuSidePort::getAddrRanges() const
1608 {
1609 // currently not checked by the master
1610 AddrRangeList ranges;
1611
1612 return ranges;
1613 }
1614
1615 /**
1616 * MemSidePort receives the packet back.
1617 * We need to call the handleTranslationReturn
1618 * and propagate up the hierarchy.
1619 */
1620 bool
1621 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1622 {
1623 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1624 TheISA::PageBytes);
1625
1626 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1627 virt_page_addr);
1628
1629 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1630 assert(tlb_event);
1631 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1632
1633 tlb_event->updateOutcome(MISS_RETURN);
1634 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1635
1636 return true;
1637 }
1638
1639 void
1640 GpuTLB::MemSidePort::recvReqRetry()
1641 {
1642 // No retries should reach the TLB. The retries
1643 // should only reach the TLBCoalescer.
1644 panic("recvReqRetry called");
1645 }
1646
1647 void
1648 GpuTLB::cleanup()
1649 {
1650 while (!cleanupQueue.empty()) {
1651 Addr cleanup_addr = cleanupQueue.front();
1652 cleanupQueue.pop();
1653
1654 // delete TLBEvent
1655 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1656 delete old_tlb_event;
1657 translationReturnEvent.erase(cleanup_addr);
1658
1659 // update number of outstanding requests
1660 outstandingReqs--;
1661 }
1662
1663 /** the higher level coalescer should retry if it has
1664 * any pending requests.
1665 */
1666 for (int i = 0; i < cpuSidePort.size(); ++i) {
1667 cpuSidePort[i]->sendRetryReq();
1668 }
1669 }
1670
1671 void
1672 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1673 {
1674
1675 std::pair<AccessPatternTable::iterator, bool> ret;
1676
1677 AccessInfo tmp_access_info;
1678 tmp_access_info.lastTimeAccessed = 0;
1679 tmp_access_info.accessesPerPage = 0;
1680 tmp_access_info.totalReuseDistance = 0;
1681 tmp_access_info.sumDistance = 0;
1682 tmp_access_info.meanDistance = 0;
1683
1684 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1685 tmp_access_info));
1686
1687 bool first_page_access = ret.second;
1688
1689 if (first_page_access) {
1690 numUniquePages++;
1691 } else {
1692 int accessed_before;
1693 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1694 ret.first->second.totalReuseDistance += accessed_before;
1695 }
1696
1697 ret.first->second.accessesPerPage++;
1698 ret.first->second.lastTimeAccessed = curTick();
1699
1700 if (accessDistance) {
1701 ret.first->second.localTLBAccesses
1702 .push_back(localNumTLBAccesses.value());
1703 }
1704 }
1705
1706 void
1707 GpuTLB::exitCallback()
1708 {
1709 std::ostream *page_stat_file = nullptr;
1710
1711 if (accessDistance) {
1712
1713 // print per page statistics to a separate file (.csv format)
1714 // simout is the gem5 output directory (default is m5out or the one
1715 // specified with -d
1716 page_stat_file = simout.create(name().c_str())->stream();
1717
1718 // print header
1719 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1720 << "stddev_distance" << std::endl;
1721 }
1722
1723 // update avg. reuse distance footprint
1724 AccessPatternTable::iterator iter, iter_begin, iter_end;
1725 unsigned int sum_avg_reuse_distance_per_page = 0;
1726
1727 // iterate through all pages seen by this TLB
1728 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1729 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1730 iter->second.accessesPerPage;
1731
1732 if (accessDistance) {
1733 unsigned int tmp = iter->second.localTLBAccesses[0];
1734 unsigned int prev = tmp;
1735
1736 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1737 if (i) {
1738 tmp = prev + 1;
1739 }
1740
1741 prev = iter->second.localTLBAccesses[i];
1742 // update the localTLBAccesses value
1743 // with the actual differece
1744 iter->second.localTLBAccesses[i] -= tmp;
1745 // compute the sum of AccessDistance per page
1746 // used later for mean
1747 iter->second.sumDistance +=
1748 iter->second.localTLBAccesses[i];
1749 }
1750
1751 iter->second.meanDistance =
1752 iter->second.sumDistance / iter->second.accessesPerPage;
1753
1754 // compute std_dev and max (we need a second round because we
1755 // need to know the mean value
1756 unsigned int max_distance = 0;
1757 unsigned int stddev_distance = 0;
1758
1759 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1760 unsigned int tmp_access_distance =
1761 iter->second.localTLBAccesses[i];
1762
1763 if (tmp_access_distance > max_distance) {
1764 max_distance = tmp_access_distance;
1765 }
1766
1767 unsigned int diff =
1768 tmp_access_distance - iter->second.meanDistance;
1769 stddev_distance += pow(diff, 2);
1770
1771 }
1772
1773 stddev_distance =
1774 sqrt(stddev_distance/iter->second.accessesPerPage);
1775
1776 if (page_stat_file) {
1777 *page_stat_file << std::hex << iter->first << ",";
1778 *page_stat_file << std::dec << max_distance << ",";
1779 *page_stat_file << std::dec << iter->second.meanDistance
1780 << ",";
1781 *page_stat_file << std::dec << stddev_distance;
1782 *page_stat_file << std::endl;
1783 }
1784
1785 // erase the localTLBAccesses array
1786 iter->second.localTLBAccesses.clear();
1787 }
1788 }
1789
1790 if (!TLBFootprint.empty()) {
1791 avgReuseDistance =
1792 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1793 }
1794
1795 //clear the TLBFootprint map
1796 TLBFootprint.clear();
1797 }
1798} // namespace X86ISA
1799
1800X86ISA::GpuTLB*
1801X86GPUTLBParams::create()
1802{
1803 return new X86ISA::GpuTLB(this);
1804}
1805
954
955 localNumTLBAccesses
956 .name(name() + ".local_TLB_accesses")
957 .desc("Number of TLB accesses")
958 ;
959
960 localNumTLBHits
961 .name(name() + ".local_TLB_hits")
962 .desc("Number of TLB hits")
963 ;
964
965 localNumTLBMisses
966 .name(name() + ".local_TLB_misses")
967 .desc("Number of TLB misses")
968 ;
969
970 localTLBMissRate
971 .name(name() + ".local_TLB_miss_rate")
972 .desc("TLB miss rate")
973 ;
974
975 accessCycles
976 .name(name() + ".access_cycles")
977 .desc("Cycles spent accessing this TLB level")
978 ;
979
980 pageTableCycles
981 .name(name() + ".page_table_cycles")
982 .desc("Cycles spent accessing the page table")
983 ;
984
985 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
986
987 numUniquePages
988 .name(name() + ".unique_pages")
989 .desc("Number of unique pages touched")
990 ;
991
992 localCycles
993 .name(name() + ".local_cycles")
994 .desc("Number of cycles spent in queue for all incoming reqs")
995 ;
996
997 localLatency
998 .name(name() + ".local_latency")
999 .desc("Avg. latency over incoming coalesced reqs")
1000 ;
1001
1002 localLatency = localCycles / localNumTLBAccesses;
1003
1004 globalNumTLBAccesses
1005 .name(name() + ".global_TLB_accesses")
1006 .desc("Number of TLB accesses")
1007 ;
1008
1009 globalNumTLBHits
1010 .name(name() + ".global_TLB_hits")
1011 .desc("Number of TLB hits")
1012 ;
1013
1014 globalNumTLBMisses
1015 .name(name() + ".global_TLB_misses")
1016 .desc("Number of TLB misses")
1017 ;
1018
1019 globalTLBMissRate
1020 .name(name() + ".global_TLB_miss_rate")
1021 .desc("TLB miss rate")
1022 ;
1023
1024 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1025
1026 avgReuseDistance
1027 .name(name() + ".avg_reuse_distance")
1028 .desc("avg. reuse distance over all pages (in ticks)")
1029 ;
1030
1031 }
1032
1033 /**
1034 * Do the TLB lookup for this coalesced request and schedule
1035 * another event <TLB access latency> cycles later.
1036 */
1037
1038 void
1039 GpuTLB::issueTLBLookup(PacketPtr pkt)
1040 {
1041 assert(pkt);
1042 assert(pkt->senderState);
1043
1044 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1045 TheISA::PageBytes);
1046
1047 TranslationState *sender_state =
1048 safe_cast<TranslationState*>(pkt->senderState);
1049
1050 bool update_stats = !sender_state->prefetch;
1051 ThreadContext * tmp_tc = sender_state->tc;
1052
1053 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1054 virt_page_addr);
1055
1056 int req_cnt = sender_state->reqCnt.back();
1057
1058 if (update_stats) {
1059 accessCycles -= (curTick() * req_cnt);
1060 localCycles -= curTick();
1061 updatePageFootprint(virt_page_addr);
1062 globalNumTLBAccesses += req_cnt;
1063 }
1064
1065 tlbOutcome lookup_outcome = TLB_MISS;
1066 const RequestPtr &tmp_req = pkt->req;
1067
1068 // Access the TLB and figure out if it's a hit or a miss.
1069 bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1070
1071 if (success) {
1072 lookup_outcome = TLB_HIT;
1073 // Put the entry in SenderState
1074 TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1075 assert(entry);
1076
1077 auto p = sender_state->tc->getProcessPtr();
1078 sender_state->tlbEntry =
1079 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1080 false, false);
1081
1082 if (update_stats) {
1083 // the reqCnt has an entry per level, so its size tells us
1084 // which level we are in
1085 sender_state->hitLevel = sender_state->reqCnt.size();
1086 globalNumTLBHits += req_cnt;
1087 }
1088 } else {
1089 if (update_stats)
1090 globalNumTLBMisses += req_cnt;
1091 }
1092
1093 /*
1094 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1095 * as the TLB access latency.
1096 *
1097 * We create and schedule a new TLBEvent which will help us take the
1098 * appropriate actions (e.g., update TLB on a hit, send request to lower
1099 * level TLB on a miss, or start a page walk if this was the last-level
1100 * TLB)
1101 */
1102 TLBEvent *tlb_event =
1103 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1104
1105 if (translationReturnEvent.count(virt_page_addr)) {
1106 panic("Virtual Page Address %#x already has a return event\n",
1107 virt_page_addr);
1108 }
1109
1110 translationReturnEvent[virt_page_addr] = tlb_event;
1111 assert(tlb_event);
1112
1113 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1114 curTick() + this->ticks(hitLatency));
1115
1116 schedule(tlb_event, curTick() + this->ticks(hitLatency));
1117 }
1118
1119 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1120 PacketPtr _pkt)
1121 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1122 outcome(tlb_outcome), pkt(_pkt)
1123 {
1124 }
1125
1126 /**
1127 * Do Paging protection checks. If we encounter a page fault, then
1128 * an assertion is fired.
1129 */
1130 void
1131 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1132 TlbEntry * tlb_entry, Mode mode)
1133 {
1134 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1135 uint32_t flags = pkt->req->getFlags();
1136 bool storeCheck = flags & (StoreCheck << FlagShift);
1137
1138 // Do paging protection checks.
1139 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1140 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1141
1142 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1143
1144 if ((inUser && !tlb_entry->user) ||
1145 (mode == BaseTLB::Write && badWrite)) {
1146 // The page must have been present to get into the TLB in
1147 // the first place. We'll assume the reserved bits are
1148 // fine even though we're not checking them.
1149 panic("Page fault detected");
1150 }
1151
1152 if (storeCheck && badWrite) {
1153 // This would fault if this were a write, so return a page
1154 // fault that reflects that happening.
1155 panic("Page fault detected");
1156 }
1157 }
1158
1159 /**
1160 * handleTranslationReturn is called on a TLB hit,
1161 * when a TLB miss returns or when a page fault returns.
1162 * The latter calls handelHit with TLB miss as tlbOutcome.
1163 */
1164 void
1165 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1166 PacketPtr pkt)
1167 {
1168
1169 assert(pkt);
1170 Addr vaddr = pkt->req->getVaddr();
1171
1172 TranslationState *sender_state =
1173 safe_cast<TranslationState*>(pkt->senderState);
1174
1175 ThreadContext *tc = sender_state->tc;
1176 Mode mode = sender_state->tlbMode;
1177
1178 TlbEntry *local_entry, *new_entry;
1179
1180 if (tlb_outcome == TLB_HIT) {
1181 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1182 local_entry = sender_state->tlbEntry;
1183 } else {
1184 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1185 vaddr);
1186
1187 // We are returning either from a page walk or from a hit at a lower
1188 // TLB level. The senderState should be "carrying" a pointer to the
1189 // correct TLBEntry.
1190 new_entry = sender_state->tlbEntry;
1191 assert(new_entry);
1192 local_entry = new_entry;
1193
1194 if (allocationPolicy) {
1195 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1196 virt_page_addr);
1197
1198 local_entry = insert(virt_page_addr, *new_entry);
1199 }
1200
1201 assert(local_entry);
1202 }
1203
1204 /**
1205 * At this point the packet carries an up-to-date tlbEntry pointer
1206 * in its senderState.
1207 * Next step is to do the paging protection checks.
1208 */
1209 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1210 "while paddr was %#x.\n", local_entry->vaddr,
1211 local_entry->paddr);
1212
1213 pagingProtectionChecks(tc, pkt, local_entry, mode);
1214 int page_size = local_entry->size();
1215 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1216 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1217
1218 // Since this packet will be sent through the cpu side slave port,
1219 // it must be converted to a response pkt if it is not one already
1220 if (pkt->isRequest()) {
1221 pkt->makeTimingResponse();
1222 }
1223
1224 pkt->req->setPaddr(paddr);
1225
1226 if (local_entry->uncacheable) {
1227 pkt->req->setFlags(Request::UNCACHEABLE);
1228 }
1229
1230 //send packet back to coalescer
1231 cpuSidePort[0]->sendTimingResp(pkt);
1232 //schedule cleanup event
1233 cleanupQueue.push(virt_page_addr);
1234
1235 // schedule this only once per cycle.
1236 // The check is required because we might have multiple translations
1237 // returning the same cycle
1238 // this is a maximum priority event and must be on the same cycle
1239 // as the cleanup event in TLBCoalescer to avoid a race with
1240 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1241 if (!cleanupEvent.scheduled())
1242 schedule(cleanupEvent, curTick());
1243 }
1244
1245 /**
1246 * Here we take the appropriate actions based on the result of the
1247 * TLB lookup.
1248 */
1249 void
1250 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1251 PacketPtr pkt)
1252 {
1253 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1254
1255 assert(translationReturnEvent[virtPageAddr]);
1256 assert(pkt);
1257
1258 TranslationState *tmp_sender_state =
1259 safe_cast<TranslationState*>(pkt->senderState);
1260
1261 int req_cnt = tmp_sender_state->reqCnt.back();
1262 bool update_stats = !tmp_sender_state->prefetch;
1263
1264
1265 if (outcome == TLB_HIT) {
1266 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1267
1268 if (update_stats) {
1269 accessCycles += (req_cnt * curTick());
1270 localCycles += curTick();
1271 }
1272
1273 } else if (outcome == TLB_MISS) {
1274
1275 DPRINTF(GPUTLB, "This is a TLB miss\n");
1276 if (update_stats) {
1277 accessCycles += (req_cnt*curTick());
1278 localCycles += curTick();
1279 }
1280
1281 if (hasMemSidePort) {
1282 // the one cyle added here represent the delay from when we get
1283 // the reply back till when we propagate it to the coalescer
1284 // above.
1285 if (update_stats) {
1286 accessCycles += (req_cnt * 1);
1287 localCycles += 1;
1288 }
1289
1290 /**
1291 * There is a TLB below. Send the coalesced request.
1292 * We actually send the very first packet of all the
1293 * pending packets for this virtual page address.
1294 */
1295 if (!memSidePort[0]->sendTimingReq(pkt)) {
1296 DPRINTF(GPUTLB, "Failed sending translation request to "
1297 "lower level TLB for addr %#x\n", virtPageAddr);
1298
1299 memSidePort[0]->retries.push_back(pkt);
1300 } else {
1301 DPRINTF(GPUTLB, "Sent translation request to lower level "
1302 "TLB for addr %#x\n", virtPageAddr);
1303 }
1304 } else {
1305 //this is the last level TLB. Start a page walk
1306 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1307 "addr %#x\n", virtPageAddr);
1308
1309 if (update_stats)
1310 pageTableCycles -= (req_cnt*curTick());
1311
1312 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1313 assert(tlb_event);
1314 tlb_event->updateOutcome(PAGE_WALK);
1315 schedule(tlb_event, curTick() + ticks(missLatency2));
1316 }
1317 } else if (outcome == PAGE_WALK) {
1318 if (update_stats)
1319 pageTableCycles += (req_cnt*curTick());
1320
1321 // Need to access the page table and update the TLB
1322 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1323 virtPageAddr);
1324
1325 TranslationState *sender_state =
1326 safe_cast<TranslationState*>(pkt->senderState);
1327
1328 Process *p = sender_state->tc->getProcessPtr();
1329 Addr vaddr = pkt->req->getVaddr();
1330 #ifndef NDEBUG
1331 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1332 assert(alignedVaddr == virtPageAddr);
1333 #endif
1334 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1335 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1336 p->fixupStackFault(vaddr)) {
1337 pte = p->pTable->lookup(vaddr);
1338 }
1339
1340 if (pte) {
1341 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1342 pte->paddr);
1343
1344 sender_state->tlbEntry =
1345 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1346 false);
1347 } else {
1348 sender_state->tlbEntry = nullptr;
1349 }
1350
1351 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352 } else if (outcome == MISS_RETURN) {
1353 /** we add an extra cycle in the return path of the translation
1354 * requests in between the various TLB levels.
1355 */
1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357 } else {
1358 panic("Unexpected TLB outcome %d", outcome);
1359 }
1360 }
1361
1362 void
1363 GpuTLB::TLBEvent::process()
1364 {
1365 tlb->translationReturn(virtPageAddr, outcome, pkt);
1366 }
1367
1368 const char*
1369 GpuTLB::TLBEvent::description() const
1370 {
1371 return "trigger translationDoneEvent";
1372 }
1373
1374 void
1375 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1376 {
1377 outcome = _outcome;
1378 }
1379
1380 Addr
1381 GpuTLB::TLBEvent::getTLBEventVaddr()
1382 {
1383 return virtPageAddr;
1384 }
1385
1386 /*
1387 * recvTiming receives a coalesced timing request from a TLBCoalescer
1388 * and it calls issueTLBLookup()
1389 * It only rejects the packet if we have exceeded the max
1390 * outstanding number of requests for the TLB
1391 */
1392 bool
1393 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1394 {
1395 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396 tlb->issueTLBLookup(pkt);
1397 // update number of outstanding translation requests
1398 tlb->outstandingReqs++;
1399 return true;
1400 } else {
1401 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402 tlb->outstandingReqs);
1403 return false;
1404 }
1405 }
1406
1407 /**
1408 * handleFuncTranslationReturn is called on a TLB hit,
1409 * when a TLB miss returns or when a page fault returns.
1410 * It updates LRU, inserts the TLB entry on a miss
1411 * depending on the allocation policy and does the required
1412 * protection checks. It does NOT create a new packet to
1413 * update the packet's addr; this is done in hsail-gpu code.
1414 */
1415 void
1416 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1417 {
1418 TranslationState *sender_state =
1419 safe_cast<TranslationState*>(pkt->senderState);
1420
1421 ThreadContext *tc = sender_state->tc;
1422 Mode mode = sender_state->tlbMode;
1423 Addr vaddr = pkt->req->getVaddr();
1424
1425 TlbEntry *local_entry, *new_entry;
1426
1427 if (tlb_outcome == TLB_HIT) {
1428 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429 "%#x\n", vaddr);
1430
1431 local_entry = sender_state->tlbEntry;
1432 } else {
1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434 "%#x\n", vaddr);
1435
1436 // We are returning either from a page walk or from a hit at a lower
1437 // TLB level. The senderState should be "carrying" a pointer to the
1438 // correct TLBEntry.
1439 new_entry = sender_state->tlbEntry;
1440 assert(new_entry);
1441 local_entry = new_entry;
1442
1443 if (allocationPolicy) {
1444 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445
1446 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447 virt_page_addr);
1448
1449 local_entry = insert(virt_page_addr, *new_entry);
1450 }
1451
1452 assert(local_entry);
1453 }
1454
1455 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456 "while paddr was %#x.\n", local_entry->vaddr,
1457 local_entry->paddr);
1458
1459 /**
1460 * Do paging checks if it's a normal functional access. If it's for a
1461 * prefetch, then sometimes you can try to prefetch something that
1462 * won't pass protection. We don't actually want to fault becuase there
1463 * is no demand access to deem this a violation. Just put it in the
1464 * TLB and it will fault if indeed a future demand access touches it in
1465 * violation.
1466 *
1467 * This feature could be used to explore security issues around
1468 * speculative memory accesses.
1469 */
1470 if (!sender_state->prefetch && sender_state->tlbEntry)
1471 pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473 int page_size = local_entry->size();
1474 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477 pkt->req->setPaddr(paddr);
1478
1479 if (local_entry->uncacheable)
1480 pkt->req->setFlags(Request::UNCACHEABLE);
1481 }
1482
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1485 void
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487 {
1488 TranslationState *sender_state =
1489 safe_cast<TranslationState*>(pkt->senderState);
1490
1491 ThreadContext *tc = sender_state->tc;
1492 bool update_stats = !sender_state->prefetch;
1493
1494 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495 TheISA::PageBytes);
1496
1497 if (update_stats)
1498 tlb->updatePageFootprint(virt_page_addr);
1499
1500 // do the TLB lookup without updating the stats
1501 bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1506 if (update_stats) {
1507 tlb->globalNumTLBAccesses++;
1508
1509 if (success) {
1510 sender_state->hitLevel = sender_state->reqCnt.size();
1511 tlb->globalNumTLBHits++;
1512 }
1513 }
1514
1515 if (!success) {
1516 if (update_stats)
1517 tlb->globalNumTLBMisses++;
1518 if (tlb->hasMemSidePort) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb->memSidePort[0]->sendFunctional(pkt);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state->prefetch && !pkt->req->hasPaddr())
1523 return;
1524 } else {
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527 virt_page_addr);
1528
1529 Process *p = tc->getProcessPtr();
1530
1531 Addr vaddr = pkt->req->getVaddr();
1532 #ifndef NDEBUG
1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534 assert(alignedVaddr == virt_page_addr);
1535 #endif
1536
1537 const EmulationPageTable::Entry *pte =
1538 p->pTable->lookup(vaddr);
1539 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540 p->fixupStackFault(vaddr)) {
1541 pte = p->pTable->lookup(vaddr);
1542 }
1543
1544 if (!sender_state->prefetch) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1547 assert(pte);
1548
1549 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550 pte->paddr);
1551
1552 sender_state->tlbEntry =
1553 new TlbEntry(p->pid(), virt_page_addr,
1554 pte->paddr, false, false);
1555 } else {
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1560 if (pte) {
1561 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562 pte->paddr);
1563
1564 sender_state->tlbEntry =
1565 new TlbEntry(p->pid(), virt_page_addr,
1566 pte->paddr, false, false);
1567 } else {
1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569 alignedVaddr);
1570
1571 sender_state->tlbEntry = nullptr;
1572
1573 return;
1574 }
1575 }
1576 }
1577 } else {
1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579 tlb->lookup(pkt->req->getVaddr()));
1580
1581 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582 update_stats);
1583
1584 assert(entry);
1585
1586 auto p = sender_state->tc->getProcessPtr();
1587 sender_state->tlbEntry =
1588 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1589 false, false);
1590 }
1591 // This is the function that would populate pkt->req with the paddr of
1592 // the translation. But if no translation happens (i.e Prefetch fails)
1593 // then the early returns in the above code wiill keep this function
1594 // from executing.
1595 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1596 }
1597
1598 void
1599 GpuTLB::CpuSidePort::recvReqRetry()
1600 {
1601 // The CPUSidePort never sends anything but replies. No retries
1602 // expected.
1603 panic("recvReqRetry called");
1604 }
1605
1606 AddrRangeList
1607 GpuTLB::CpuSidePort::getAddrRanges() const
1608 {
1609 // currently not checked by the master
1610 AddrRangeList ranges;
1611
1612 return ranges;
1613 }
1614
1615 /**
1616 * MemSidePort receives the packet back.
1617 * We need to call the handleTranslationReturn
1618 * and propagate up the hierarchy.
1619 */
1620 bool
1621 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1622 {
1623 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1624 TheISA::PageBytes);
1625
1626 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1627 virt_page_addr);
1628
1629 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1630 assert(tlb_event);
1631 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1632
1633 tlb_event->updateOutcome(MISS_RETURN);
1634 tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1635
1636 return true;
1637 }
1638
1639 void
1640 GpuTLB::MemSidePort::recvReqRetry()
1641 {
1642 // No retries should reach the TLB. The retries
1643 // should only reach the TLBCoalescer.
1644 panic("recvReqRetry called");
1645 }
1646
1647 void
1648 GpuTLB::cleanup()
1649 {
1650 while (!cleanupQueue.empty()) {
1651 Addr cleanup_addr = cleanupQueue.front();
1652 cleanupQueue.pop();
1653
1654 // delete TLBEvent
1655 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1656 delete old_tlb_event;
1657 translationReturnEvent.erase(cleanup_addr);
1658
1659 // update number of outstanding requests
1660 outstandingReqs--;
1661 }
1662
1663 /** the higher level coalescer should retry if it has
1664 * any pending requests.
1665 */
1666 for (int i = 0; i < cpuSidePort.size(); ++i) {
1667 cpuSidePort[i]->sendRetryReq();
1668 }
1669 }
1670
1671 void
1672 GpuTLB::updatePageFootprint(Addr virt_page_addr)
1673 {
1674
1675 std::pair<AccessPatternTable::iterator, bool> ret;
1676
1677 AccessInfo tmp_access_info;
1678 tmp_access_info.lastTimeAccessed = 0;
1679 tmp_access_info.accessesPerPage = 0;
1680 tmp_access_info.totalReuseDistance = 0;
1681 tmp_access_info.sumDistance = 0;
1682 tmp_access_info.meanDistance = 0;
1683
1684 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1685 tmp_access_info));
1686
1687 bool first_page_access = ret.second;
1688
1689 if (first_page_access) {
1690 numUniquePages++;
1691 } else {
1692 int accessed_before;
1693 accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1694 ret.first->second.totalReuseDistance += accessed_before;
1695 }
1696
1697 ret.first->second.accessesPerPage++;
1698 ret.first->second.lastTimeAccessed = curTick();
1699
1700 if (accessDistance) {
1701 ret.first->second.localTLBAccesses
1702 .push_back(localNumTLBAccesses.value());
1703 }
1704 }
1705
1706 void
1707 GpuTLB::exitCallback()
1708 {
1709 std::ostream *page_stat_file = nullptr;
1710
1711 if (accessDistance) {
1712
1713 // print per page statistics to a separate file (.csv format)
1714 // simout is the gem5 output directory (default is m5out or the one
1715 // specified with -d
1716 page_stat_file = simout.create(name().c_str())->stream();
1717
1718 // print header
1719 *page_stat_file << "page,max_access_distance,mean_access_distance, "
1720 << "stddev_distance" << std::endl;
1721 }
1722
1723 // update avg. reuse distance footprint
1724 AccessPatternTable::iterator iter, iter_begin, iter_end;
1725 unsigned int sum_avg_reuse_distance_per_page = 0;
1726
1727 // iterate through all pages seen by this TLB
1728 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1729 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1730 iter->second.accessesPerPage;
1731
1732 if (accessDistance) {
1733 unsigned int tmp = iter->second.localTLBAccesses[0];
1734 unsigned int prev = tmp;
1735
1736 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1737 if (i) {
1738 tmp = prev + 1;
1739 }
1740
1741 prev = iter->second.localTLBAccesses[i];
1742 // update the localTLBAccesses value
1743 // with the actual differece
1744 iter->second.localTLBAccesses[i] -= tmp;
1745 // compute the sum of AccessDistance per page
1746 // used later for mean
1747 iter->second.sumDistance +=
1748 iter->second.localTLBAccesses[i];
1749 }
1750
1751 iter->second.meanDistance =
1752 iter->second.sumDistance / iter->second.accessesPerPage;
1753
1754 // compute std_dev and max (we need a second round because we
1755 // need to know the mean value
1756 unsigned int max_distance = 0;
1757 unsigned int stddev_distance = 0;
1758
1759 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1760 unsigned int tmp_access_distance =
1761 iter->second.localTLBAccesses[i];
1762
1763 if (tmp_access_distance > max_distance) {
1764 max_distance = tmp_access_distance;
1765 }
1766
1767 unsigned int diff =
1768 tmp_access_distance - iter->second.meanDistance;
1769 stddev_distance += pow(diff, 2);
1770
1771 }
1772
1773 stddev_distance =
1774 sqrt(stddev_distance/iter->second.accessesPerPage);
1775
1776 if (page_stat_file) {
1777 *page_stat_file << std::hex << iter->first << ",";
1778 *page_stat_file << std::dec << max_distance << ",";
1779 *page_stat_file << std::dec << iter->second.meanDistance
1780 << ",";
1781 *page_stat_file << std::dec << stddev_distance;
1782 *page_stat_file << std::endl;
1783 }
1784
1785 // erase the localTLBAccesses array
1786 iter->second.localTLBAccesses.clear();
1787 }
1788 }
1789
1790 if (!TLBFootprint.empty()) {
1791 avgReuseDistance =
1792 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1793 }
1794
1795 //clear the TLBFootprint map
1796 TLBFootprint.clear();
1797 }
1798} // namespace X86ISA
1799
1800X86ISA::GpuTLB*
1801X86GPUTLBParams::create()
1802{
1803 return new X86ISA::GpuTLB(this);
1804}
1805