Cross Reference: /gem5/src/gpu-compute/gpu_tlb.cc

Deleted Added

sdiff udiff text old ( 11364:1bd9f1b27438 ) new ( 11523:81332eb10367 )

full compact

gpu_tlb.cc (11364:1bd9f1b27438)	gpu_tlb.cc (11523:81332eb10367)
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 / 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/output.hh" 49#include "base/trace.hh" 50#include "cpu/base.hh" 51#include "cpu/thread_context.hh" 52#include "debug/GPUPrefetch.hh" 53#include "debug/GPUTLB.hh" 54#include "mem/packet_access.hh" 55#include "mem/page_table.hh" 56#include "mem/request.hh" 57#include "sim/process.hh" 58 59namespace X86ISA 60{ 61 62 GpuTLB::GpuTLB(const Params p) 63 : MemObject(p), configAddress(0), size(p->size), 64 cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this) 65 { 66 assoc = p->assoc; 67 assert(assoc <= size); 68 numSets = size/assoc; 69 allocationPolicy = p->allocationPolicy; 70 hasMemSidePort = false; 71 accessDistance = p->accessDistance; 72 clock = p->clk_domain->clockPeriod(); 73 74 tlb = new GpuTlbEntry[size]; 75 std::memset(tlb, 0, sizeof(GpuTlbEntry) * size); 76 77 freeList.resize(numSets); 78 entryList.resize(numSets); 79 80 for (int set = 0; set < numSets; ++set) { 81 for (int way = 0; way < assoc; ++way) { 82 int x = setassoc + way; 83 freeList[set].push_back(&tlb[x]); 84 } 85 } 86 87 FA = (size == assoc); 88 89 /* 90 * @warning: the set-associative version assumes you have a 91 * fixed page size of 4KB. 92 * If the page size is greather than 4KB (as defined in the 93 * TheISA::PageBytes), then there are various issues w/ the current 94 * implementation (you'd have the same 8KB page being replicated in 95 * different sets etc) 96 / 97 setMask = numSets - 1; 98 99 #if 0 100* // GpuTLB doesn't yet support full system 101 walker = p->walker; 102 walker->setTLB(this); 103 #endif 104 105 maxCoalescedReqs = p->maxOutstandingReqs; 106 107 // Do not allow maxCoalescedReqs to be more than the TLB associativity 108 if (maxCoalescedReqs > assoc) { 109 maxCoalescedReqs = assoc; 110 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 111 } 112 113 outstandingReqs = 0; 114 hitLatency = p->hitLatency; 115 missLatency1 = p->missLatency1; 116 missLatency2 = p->missLatency2; 117 118 // create the slave ports based on the number of connected ports 119 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 120 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 121 name(), i), this, i)); 122 } 123 124 // create the master ports based on the number of connected ports 125 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 126 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 127 name(), i), this, i)); 128 } 129 } 130 131 // fixme: this is never called? 132 GpuTLB::~GpuTLB() 133 { 134 // make sure all the hash-maps are empty 135 assert(translationReturnEvent.empty()); 136 137 // delete the TLB 138 delete[] tlb; 139 } 140 141 BaseSlavePort& 142 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 143 { 144 if (if_name == "slave") { 145 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 146 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 147 } 148 149 return cpuSidePort[idx]; 150* } else { 151 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 152 } 153 } 154 155 BaseMasterPort& 156 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 157 { 158 if (if_name == "master") { 159 if (idx >= static_cast<PortID>(memSidePort.size())) { 160 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 161 } 162 163 hasMemSidePort = true; 164 165 return memSidePort[idx]; 166* } else { 167 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 168 } 169 } 170 171 GpuTlbEntry* 172 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry) 173 { 174 GpuTlbEntry newEntry = nullptr; 175* 176 /** 177 * vpn holds the virtual page address 178 * The least significant bits are simply masked 179 / 180* int set = (vpn >> TheISA::PageShift) & setMask; 181 182 if (!freeList[set].empty()) { 183 newEntry = freeList[set].front(); 184 freeList[set].pop_front(); 185 } else { 186 newEntry = entryList[set].back(); 187 entryList[set].pop_back(); 188 } 189 190 newEntry = entry; 191* newEntry->vaddr = vpn; 192 entryList[set].push_front(newEntry); 193 194 return newEntry; 195 } 196 197 GpuTLB::EntryList::iterator 198 GpuTLB::lookupIt(Addr va, bool update_lru) 199 { 200 int set = (va >> TheISA::PageShift) & setMask; 201 202 if (FA) { 203 assert(!set); 204 } 205 206 auto entry = entryList[set].begin(); 207 for (; entry != entryList[set].end(); ++entry) { 208 int page_size = (entry)->size(); 209* 210 if ((entry)->vaddr <= va && (entry)->vaddr + page_size > va) { 211 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 212 "with size %#x.\n", va, (entry)->vaddr, page_size); 213* 214 if (update_lru) { 215 entryList[set].push_front(entry); 216* entryList[set].erase(entry); 217 entry = entryList[set].begin(); 218 } 219 220 break; 221 } 222 } 223 224 return entry; 225 } 226 227 GpuTlbEntry* 228 GpuTLB::lookup(Addr va, bool update_lru) 229 { 230 int set = (va >> TheISA::PageShift) & setMask; 231 232 auto entry = lookupIt(va, update_lru); 233 234 if (entry == entryList[set].end()) 235 return nullptr; 236 else 237 return entry; 238* } 239 240 void 241 GpuTLB::invalidateAll() 242 { 243 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 244 245 for (int i = 0; i < numSets; ++i) { 246 while (!entryList[i].empty()) { 247 GpuTlbEntry entry = entryList[i].front(); 248* entryList[i].pop_front(); 249 freeList[i].push_back(entry); 250 } 251 } 252 } 253 254 void 255 GpuTLB::setConfigAddress(uint32_t addr) 256 { 257 configAddress = addr; 258 } 259 260 void 261 GpuTLB::invalidateNonGlobal() 262 { 263 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 264 265 for (int i = 0; i < numSets; ++i) { 266 for (auto entryIt = entryList[i].begin(); 267 entryIt != entryList[i].end();) { 268 if (!(entryIt)->global) { 269* freeList[i].push_back(entryIt); 270* entryList[i].erase(entryIt++); 271 } else { 272 ++entryIt; 273 } 274 } 275 } 276 } 277 278 void 279 GpuTLB::demapPage(Addr va, uint64_t asn) 280 { 281 282 int set = (va >> TheISA::PageShift) & setMask; 283 auto entry = lookupIt(va, false); 284 285 if (entry != entryList[set].end()) { 286 freeList[set].push_back(entry); 287* entryList[set].erase(entry); 288 } 289 } 290 291 Fault 292 GpuTLB::translateInt(RequestPtr req, ThreadContext tc) 293* { 294 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 295 Addr vaddr = req->getVaddr(); 296 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 297 298 if (prefix == IntAddrPrefixCPUID) { 299 panic("CPUID memory space not yet implemented!\n"); 300 } else if (prefix == IntAddrPrefixMSR) { 301 vaddr = vaddr >> 3; 302 req->setFlags(Request::MMAPPED_IPR); 303 Addr regNum = 0; 304 305 switch (vaddr & ~IntAddrPrefixMask) { 306 case 0x10: 307 regNum = MISCREG_TSC; 308 break; 309 case 0x1B: 310 regNum = MISCREG_APIC_BASE; 311 break; 312 case 0xFE: 313 regNum = MISCREG_MTRRCAP; 314 break; 315 case 0x174: 316 regNum = MISCREG_SYSENTER_CS; 317 break; 318 case 0x175: 319 regNum = MISCREG_SYSENTER_ESP; 320 break; 321 case 0x176: 322 regNum = MISCREG_SYSENTER_EIP; 323 break; 324 case 0x179: 325 regNum = MISCREG_MCG_CAP; 326 break; 327 case 0x17A: 328 regNum = MISCREG_MCG_STATUS; 329 break; 330 case 0x17B: 331 regNum = MISCREG_MCG_CTL; 332 break; 333 case 0x1D9: 334 regNum = MISCREG_DEBUG_CTL_MSR; 335 break; 336 case 0x1DB: 337 regNum = MISCREG_LAST_BRANCH_FROM_IP; 338 break; 339 case 0x1DC: 340 regNum = MISCREG_LAST_BRANCH_TO_IP; 341 break; 342 case 0x1DD: 343 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 344 break; 345 case 0x1DE: 346 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 347 break; 348 case 0x200: 349 regNum = MISCREG_MTRR_PHYS_BASE_0; 350 break; 351 case 0x201: 352 regNum = MISCREG_MTRR_PHYS_MASK_0; 353 break; 354 case 0x202: 355 regNum = MISCREG_MTRR_PHYS_BASE_1; 356 break; 357 case 0x203: 358 regNum = MISCREG_MTRR_PHYS_MASK_1; 359 break; 360 case 0x204: 361 regNum = MISCREG_MTRR_PHYS_BASE_2; 362 break; 363 case 0x205: 364 regNum = MISCREG_MTRR_PHYS_MASK_2; 365 break; 366 case 0x206: 367 regNum = MISCREG_MTRR_PHYS_BASE_3; 368 break; 369 case 0x207: 370 regNum = MISCREG_MTRR_PHYS_MASK_3; 371 break; 372 case 0x208: 373 regNum = MISCREG_MTRR_PHYS_BASE_4; 374 break; 375 case 0x209: 376 regNum = MISCREG_MTRR_PHYS_MASK_4; 377 break; 378 case 0x20A: 379 regNum = MISCREG_MTRR_PHYS_BASE_5; 380 break; 381 case 0x20B: 382 regNum = MISCREG_MTRR_PHYS_MASK_5; 383 break; 384 case 0x20C: 385 regNum = MISCREG_MTRR_PHYS_BASE_6; 386 break; 387 case 0x20D: 388 regNum = MISCREG_MTRR_PHYS_MASK_6; 389 break; 390 case 0x20E: 391 regNum = MISCREG_MTRR_PHYS_BASE_7; 392 break; 393 case 0x20F: 394 regNum = MISCREG_MTRR_PHYS_MASK_7; 395 break; 396 case 0x250: 397 regNum = MISCREG_MTRR_FIX_64K_00000; 398 break; 399 case 0x258: 400 regNum = MISCREG_MTRR_FIX_16K_80000; 401 break; 402 case 0x259: 403 regNum = MISCREG_MTRR_FIX_16K_A0000; 404 break; 405 case 0x268: 406 regNum = MISCREG_MTRR_FIX_4K_C0000; 407 break; 408 case 0x269: 409 regNum = MISCREG_MTRR_FIX_4K_C8000; 410 break; 411 case 0x26A: 412 regNum = MISCREG_MTRR_FIX_4K_D0000; 413 break; 414 case 0x26B: 415 regNum = MISCREG_MTRR_FIX_4K_D8000; 416 break; 417 case 0x26C: 418 regNum = MISCREG_MTRR_FIX_4K_E0000; 419 break; 420 case 0x26D: 421 regNum = MISCREG_MTRR_FIX_4K_E8000; 422 break; 423 case 0x26E: 424 regNum = MISCREG_MTRR_FIX_4K_F0000; 425 break; 426 case 0x26F: 427 regNum = MISCREG_MTRR_FIX_4K_F8000; 428 break; 429 case 0x277: 430 regNum = MISCREG_PAT; 431 break; 432 case 0x2FF: 433 regNum = MISCREG_DEF_TYPE; 434 break; 435 case 0x400: 436 regNum = MISCREG_MC0_CTL; 437 break; 438 case 0x404: 439 regNum = MISCREG_MC1_CTL; 440 break; 441 case 0x408: 442 regNum = MISCREG_MC2_CTL; 443 break; 444 case 0x40C: 445 regNum = MISCREG_MC3_CTL; 446 break; 447 case 0x410: 448 regNum = MISCREG_MC4_CTL; 449 break; 450 case 0x414: 451 regNum = MISCREG_MC5_CTL; 452 break; 453 case 0x418: 454 regNum = MISCREG_MC6_CTL; 455 break; 456 case 0x41C: 457 regNum = MISCREG_MC7_CTL; 458 break; 459 case 0x401: 460 regNum = MISCREG_MC0_STATUS; 461 break; 462 case 0x405: 463 regNum = MISCREG_MC1_STATUS; 464 break; 465 case 0x409: 466 regNum = MISCREG_MC2_STATUS; 467 break; 468 case 0x40D: 469 regNum = MISCREG_MC3_STATUS; 470 break; 471 case 0x411: 472 regNum = MISCREG_MC4_STATUS; 473 break; 474 case 0x415: 475 regNum = MISCREG_MC5_STATUS; 476 break; 477 case 0x419: 478 regNum = MISCREG_MC6_STATUS; 479 break; 480 case 0x41D: 481 regNum = MISCREG_MC7_STATUS; 482 break; 483 case 0x402: 484 regNum = MISCREG_MC0_ADDR; 485 break; 486 case 0x406: 487 regNum = MISCREG_MC1_ADDR; 488 break; 489 case 0x40A: 490 regNum = MISCREG_MC2_ADDR; 491 break; 492 case 0x40E: 493 regNum = MISCREG_MC3_ADDR; 494 break; 495 case 0x412: 496 regNum = MISCREG_MC4_ADDR; 497 break; 498 case 0x416: 499 regNum = MISCREG_MC5_ADDR; 500 break; 501 case 0x41A: 502 regNum = MISCREG_MC6_ADDR; 503 break; 504 case 0x41E: 505 regNum = MISCREG_MC7_ADDR; 506 break; 507 case 0x403: 508 regNum = MISCREG_MC0_MISC; 509 break; 510 case 0x407: 511 regNum = MISCREG_MC1_MISC; 512 break; 513 case 0x40B: 514 regNum = MISCREG_MC2_MISC; 515 break; 516 case 0x40F: 517 regNum = MISCREG_MC3_MISC; 518 break; 519 case 0x413: 520 regNum = MISCREG_MC4_MISC; 521 break; 522 case 0x417: 523 regNum = MISCREG_MC5_MISC; 524 break; 525 case 0x41B: 526 regNum = MISCREG_MC6_MISC; 527 break; 528 case 0x41F: 529 regNum = MISCREG_MC7_MISC; 530 break; 531 case 0xC0000080: 532 regNum = MISCREG_EFER; 533 break; 534 case 0xC0000081: 535 regNum = MISCREG_STAR; 536 break; 537 case 0xC0000082: 538 regNum = MISCREG_LSTAR; 539 break; 540 case 0xC0000083: 541 regNum = MISCREG_CSTAR; 542 break; 543 case 0xC0000084: 544 regNum = MISCREG_SF_MASK; 545 break; 546 case 0xC0000100: 547 regNum = MISCREG_FS_BASE; 548 break; 549 case 0xC0000101: 550 regNum = MISCREG_GS_BASE; 551 break; 552 case 0xC0000102: 553 regNum = MISCREG_KERNEL_GS_BASE; 554 break; 555 case 0xC0000103: 556 regNum = MISCREG_TSC_AUX; 557 break; 558 case 0xC0010000: 559 regNum = MISCREG_PERF_EVT_SEL0; 560 break; 561 case 0xC0010001: 562 regNum = MISCREG_PERF_EVT_SEL1; 563 break; 564 case 0xC0010002: 565 regNum = MISCREG_PERF_EVT_SEL2; 566 break; 567 case 0xC0010003: 568 regNum = MISCREG_PERF_EVT_SEL3; 569 break; 570 case 0xC0010004: 571 regNum = MISCREG_PERF_EVT_CTR0; 572 break; 573 case 0xC0010005: 574 regNum = MISCREG_PERF_EVT_CTR1; 575 break; 576 case 0xC0010006: 577 regNum = MISCREG_PERF_EVT_CTR2; 578 break; 579 case 0xC0010007: 580 regNum = MISCREG_PERF_EVT_CTR3; 581 break; 582 case 0xC0010010: 583 regNum = MISCREG_SYSCFG; 584 break; 585 case 0xC0010016: 586 regNum = MISCREG_IORR_BASE0; 587 break; 588 case 0xC0010017: 589 regNum = MISCREG_IORR_BASE1; 590 break; 591 case 0xC0010018: 592 regNum = MISCREG_IORR_MASK0; 593 break; 594 case 0xC0010019: 595 regNum = MISCREG_IORR_MASK1; 596 break; 597 case 0xC001001A: 598 regNum = MISCREG_TOP_MEM; 599 break; 600 case 0xC001001D: 601 regNum = MISCREG_TOP_MEM2; 602 break; 603 case 0xC0010114: 604 regNum = MISCREG_VM_CR; 605 break; 606 case 0xC0010115: 607 regNum = MISCREG_IGNNE; 608 break; 609 case 0xC0010116: 610 regNum = MISCREG_SMM_CTL; 611 break; 612 case 0xC0010117: 613 regNum = MISCREG_VM_HSAVE_PA; 614 break; 615 default: 616 return std::make_shared<GeneralProtection>(0); 617 } 618 //The index is multiplied by the size of a MiscReg so that 619 //any memory dependence calculations will not see these as 620 //overlapping. 621 req->setPaddr(regNum * sizeof(MiscReg)); 622 return NoFault; 623 } else if (prefix == IntAddrPrefixIO) { 624 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 625 // bitmap in the TSS. 626 627 Addr IOPort = vaddr & ~IntAddrPrefixMask; 628 // Make sure the address fits in the expected 16 bit IO address 629 // space. 630 assert(!(IOPort & ~0xFFFF)); 631 632 if (IOPort == 0xCF8 && req->getSize() == 4) { 633 req->setFlags(Request::MMAPPED_IPR); 634 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 635 } else if ((IOPort & ~mask(2)) == 0xCFC) { 636 req->setFlags(Request::UNCACHEABLE); 637 638 Addr configAddress = 639 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 640 641 if (bits(configAddress, 31, 31)) { 642 req->setPaddr(PhysAddrPrefixPciConfig \| 643 mbits(configAddress, 30, 2) \| 644 (IOPort & mask(2))); 645 } else { 646 req->setPaddr(PhysAddrPrefixIO \| IOPort); 647 } 648 } else { 649 req->setFlags(Request::UNCACHEABLE); 650 req->setPaddr(PhysAddrPrefixIO \| IOPort); 651 } 652 return NoFault; 653 } else { 654 panic("Access to unrecognized internal address space %#x.\n", 655 prefix); 656 } 657 } 658 659 /** 660 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 661 * and false on a TLB miss. 662 * Many of the checks about different modes have been converted to 663 * assertions, since these parts of the code are not really used. 664 * On a hit it will update the LRU stack. 665 / 666* bool 667 GpuTLB::tlbLookup(RequestPtr req, ThreadContext tc, bool update_stats) 668* { 669 bool tlb_hit = false; 670 #ifndef NDEBUG 671 uint32_t flags = req->getFlags(); 672 int seg = flags & SegmentFlagMask; 673 #endif 674 675 assert(seg != SEGMENT_REG_MS); 676 Addr vaddr = req->getVaddr(); 677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 679 680 if (m5Reg.prot) { 681 DPRINTF(GPUTLB, "In protected mode.\n"); 682 // make sure we are in 64-bit mode 683 assert(m5Reg.mode == LongMode); 684 685 // If paging is enabled, do the translation. 686 if (m5Reg.paging) { 687 DPRINTF(GPUTLB, "Paging enabled.\n"); 688 //update LRU stack on a hit 689 GpuTlbEntry entry = lookup(vaddr, true); 690* 691 if (entry) 692 tlb_hit = true; 693 694 if (!update_stats) { 695 // functional tlb access for memory initialization 696 // i.e., memory seeding or instr. seeding -> don't update 697 // TLB and stats 698 return tlb_hit; 699 } 700 701 localNumTLBAccesses++; 702 703 if (!entry) { 704 localNumTLBMisses++; 705 } else { 706 localNumTLBHits++; 707 } 708 } 709 } 710 711 return tlb_hit; 712 } 713 714 Fault 715 GpuTLB::translate(RequestPtr req, ThreadContext tc, 716* Translation translation, Mode mode, 717* bool &delayedResponse, bool timing, int &latency) 718 { 719 uint32_t flags = req->getFlags(); 720 int seg = flags & SegmentFlagMask; 721 bool storeCheck = flags & (StoreCheck << FlagShift); 722 723 // If this is true, we're dealing with a request 724 // to a non-memory address space. 725 if (seg == SEGMENT_REG_MS) { 726 return translateInt(req, tc); 727 } 728 729 delayedResponse = false; 730 Addr vaddr = req->getVaddr(); 731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 732 733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 734 735 // If protected mode has been enabled... 736 if (m5Reg.prot) { 737 DPRINTF(GPUTLB, "In protected mode.\n"); 738 // If we're not in 64-bit mode, do protection/limit checks 739 if (m5Reg.mode != LongMode) { 740 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 741 "protection.\n"); 742 743 // Check for a null segment selector. 744 if (!(seg == SEGMENT_REG_TSG \|\| seg == SYS_SEGMENT_REG_IDTR \|\| 745 seg == SEGMENT_REG_HS \|\| seg == SEGMENT_REG_LS) 746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 747 return std::make_shared<GeneralProtection>(0); 748 } 749 750 bool expandDown = false; 751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 752 753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 754 if (!attr.writable && (mode == BaseTLB::Write \|\| 755 storeCheck)) 756 return std::make_shared<GeneralProtection>(0); 757 758 if (!attr.readable && mode == BaseTLB::Read) 759 return std::make_shared<GeneralProtection>(0); 760 761 expandDown = attr.expandDown; 762 763 } 764 765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 767 // This assumes we're not in 64 bit mode. If we were, the 768 // default address size is 64 bits, overridable to 32. 769 int size = 32; 770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 772 773 if ((csAttr.defaultSize && sizeOverride) \|\| 774 (!csAttr.defaultSize && !sizeOverride)) { 775 size = 16; 776 } 777 778 Addr offset = bits(vaddr - base, size - 1, 0); 779 Addr endOffset = offset + req->getSize() - 1; 780 781 if (expandDown) { 782 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 783 warn_once("Expand down segments are untested.\n"); 784 785 if (offset <= limit \|\| endOffset <= limit) 786 return std::make_shared<GeneralProtection>(0); 787 } else { 788 if (offset > limit \|\| endOffset > limit) 789 return std::make_shared<GeneralProtection>(0); 790 } 791 } 792 793 // If paging is enabled, do the translation. 794 if (m5Reg.paging) { 795 DPRINTF(GPUTLB, "Paging enabled.\n"); 796 // The vaddr already has the segment base applied. 797 GpuTlbEntry entry = lookup(vaddr); 798* localNumTLBAccesses++; 799 800 if (!entry) { 801 localNumTLBMisses++; 802 if (timing) { 803 latency = missLatency1; 804 } 805 806 if (FullSystem) { 807 fatal("GpuTLB doesn't support full-system mode\n"); 808 } else { 809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 810 "at pc %#x.\n", vaddr, tc->instAddr()); 811 812 Process p = tc->getProcessPtr(); 813* GpuTlbEntry newEntry; 814 bool success = p->pTable->lookup(vaddr, newEntry); 815 816 if (!success && mode != BaseTLB::Execute) { 817 // penalize a "page fault" more 818 if (timing) { 819 latency += missLatency2; 820 } 821 822 if (p->fixupStackFault(vaddr)) 823 success = p->pTable->lookup(vaddr, newEntry); 824 } 825 826 if (!success) { 827 return std::make_shared<PageFault>(vaddr, true, 828 mode, true, 829 false); 830 } else { 831 newEntry.valid = success; 832 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 833 834 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 835 alignedVaddr, newEntry.pageStart()); 836 837 entry = insert(alignedVaddr, newEntry); 838 } 839 840 DPRINTF(GPUTLB, "Miss was serviced.\n"); 841 } 842 } else { 843 localNumTLBHits++; 844 845 if (timing) { 846 latency = hitLatency; 847 } 848 } 849 850 // Do paging protection checks. 851 bool inUser = (m5Reg.cpl == 3 && 852 !(flags & (CPL0FlagBit << FlagShift))); 853 854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 855 bool badWrite = (!entry->writable && (inUser \|\| cr0.wp)); 856 857 if ((inUser && !entry->user) \|\| (mode == BaseTLB::Write && 858 badWrite)) { 859 // The page must have been present to get into the TLB in 860 // the first place. We'll assume the reserved bits are 861 // fine even though we're not checking them. 862 return std::make_shared<PageFault>(vaddr, true, mode, 863 inUser, false); 864 } 865 866 if (storeCheck && badWrite) { 867 // This would fault if this were a write, so return a page 868 // fault that reflects that happening. 869 return std::make_shared<PageFault>(vaddr, true, 870 BaseTLB::Write, 871 inUser, false); 872 } 873 874 875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 876 "checks.\n", entry->paddr); 877 878 int page_size = entry->size(); 879 Addr paddr = entry->paddr \| (vaddr & (page_size - 1)); 880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 881 req->setPaddr(paddr); 882 883 if (entry->uncacheable) 884 req->setFlags(Request::UNCACHEABLE); 885 } else { 886 //Use the address which already has segmentation applied. 887 DPRINTF(GPUTLB, "Paging disabled.\n"); 888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 889 req->setPaddr(vaddr); 890 } 891 } else { 892 // Real mode 893 DPRINTF(GPUTLB, "In real mode.\n"); 894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 895 req->setPaddr(vaddr); 896 } 897 898 // Check for an access to the local APIC 899 if (FullSystem) { 900 LocalApicBase localApicBase = 901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 902 903 Addr baseAddr = localApicBase.base * PageBytes; 904 Addr paddr = req->getPaddr(); 905 906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 907 // Force the access to be uncacheable. 908 req->setFlags(Request::UNCACHEABLE); 909 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 910 paddr - baseAddr)); 911 } 912 } 913 914 return NoFault; 915 }; 916 917 Fault 918 GpuTLB::translateAtomic(RequestPtr req, ThreadContext tc, Mode mode, 919* int &latency) 920 { 921 bool delayedResponse; 922 923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 924 latency); 925 } 926 927 void 928 GpuTLB::translateTiming(RequestPtr req, ThreadContext tc, 929* Translation translation, Mode mode, int &latency) 930* { 931 bool delayedResponse; 932 assert(translation); 933 934 Fault fault = GpuTLB::translate(req, tc, translation, mode, 935 delayedResponse, true, latency); 936 937 if (!delayedResponse) 938 translation->finish(fault, req, tc, mode); 939 } 940 941 Walker* 942 GpuTLB::getWalker() 943 { 944 return walker; 945 } 946 947 948 void 949 GpuTLB::serialize(CheckpointOut &cp) const 950 { 951 } 952 953 void 954 GpuTLB::unserialize(CheckpointIn &cp) 955 { 956 } 957 958 void 959 GpuTLB::regStats() 960 {	1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 / 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/output.hh" 49#include "base/trace.hh" 50#include "cpu/base.hh" 51#include "cpu/thread_context.hh" 52#include "debug/GPUPrefetch.hh" 53#include "debug/GPUTLB.hh" 54#include "mem/packet_access.hh" 55#include "mem/page_table.hh" 56#include "mem/request.hh" 57#include "sim/process.hh" 58 59namespace X86ISA 60{ 61 62 GpuTLB::GpuTLB(const Params p) 63 : MemObject(p), configAddress(0), size(p->size), 64 cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this) 65 { 66 assoc = p->assoc; 67 assert(assoc <= size); 68 numSets = size/assoc; 69 allocationPolicy = p->allocationPolicy; 70 hasMemSidePort = false; 71 accessDistance = p->accessDistance; 72 clock = p->clk_domain->clockPeriod(); 73 74 tlb = new GpuTlbEntry[size]; 75 std::memset(tlb, 0, sizeof(GpuTlbEntry) * size); 76 77 freeList.resize(numSets); 78 entryList.resize(numSets); 79 80 for (int set = 0; set < numSets; ++set) { 81 for (int way = 0; way < assoc; ++way) { 82 int x = setassoc + way; 83 freeList[set].push_back(&tlb[x]); 84 } 85 } 86 87 FA = (size == assoc); 88 89 /* 90 * @warning: the set-associative version assumes you have a 91 * fixed page size of 4KB. 92 * If the page size is greather than 4KB (as defined in the 93 * TheISA::PageBytes), then there are various issues w/ the current 94 * implementation (you'd have the same 8KB page being replicated in 95 * different sets etc) 96 / 97 setMask = numSets - 1; 98 99 #if 0 100* // GpuTLB doesn't yet support full system 101 walker = p->walker; 102 walker->setTLB(this); 103 #endif 104 105 maxCoalescedReqs = p->maxOutstandingReqs; 106 107 // Do not allow maxCoalescedReqs to be more than the TLB associativity 108 if (maxCoalescedReqs > assoc) { 109 maxCoalescedReqs = assoc; 110 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 111 } 112 113 outstandingReqs = 0; 114 hitLatency = p->hitLatency; 115 missLatency1 = p->missLatency1; 116 missLatency2 = p->missLatency2; 117 118 // create the slave ports based on the number of connected ports 119 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 120 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 121 name(), i), this, i)); 122 } 123 124 // create the master ports based on the number of connected ports 125 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 126 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 127 name(), i), this, i)); 128 } 129 } 130 131 // fixme: this is never called? 132 GpuTLB::~GpuTLB() 133 { 134 // make sure all the hash-maps are empty 135 assert(translationReturnEvent.empty()); 136 137 // delete the TLB 138 delete[] tlb; 139 } 140 141 BaseSlavePort& 142 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 143 { 144 if (if_name == "slave") { 145 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 146 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 147 } 148 149 return cpuSidePort[idx]; 150* } else { 151 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 152 } 153 } 154 155 BaseMasterPort& 156 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 157 { 158 if (if_name == "master") { 159 if (idx >= static_cast<PortID>(memSidePort.size())) { 160 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 161 } 162 163 hasMemSidePort = true; 164 165 return memSidePort[idx]; 166* } else { 167 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 168 } 169 } 170 171 GpuTlbEntry* 172 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry) 173 { 174 GpuTlbEntry newEntry = nullptr; 175* 176 /** 177 * vpn holds the virtual page address 178 * The least significant bits are simply masked 179 / 180* int set = (vpn >> TheISA::PageShift) & setMask; 181 182 if (!freeList[set].empty()) { 183 newEntry = freeList[set].front(); 184 freeList[set].pop_front(); 185 } else { 186 newEntry = entryList[set].back(); 187 entryList[set].pop_back(); 188 } 189 190 newEntry = entry; 191* newEntry->vaddr = vpn; 192 entryList[set].push_front(newEntry); 193 194 return newEntry; 195 } 196 197 GpuTLB::EntryList::iterator 198 GpuTLB::lookupIt(Addr va, bool update_lru) 199 { 200 int set = (va >> TheISA::PageShift) & setMask; 201 202 if (FA) { 203 assert(!set); 204 } 205 206 auto entry = entryList[set].begin(); 207 for (; entry != entryList[set].end(); ++entry) { 208 int page_size = (entry)->size(); 209* 210 if ((entry)->vaddr <= va && (entry)->vaddr + page_size > va) { 211 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 212 "with size %#x.\n", va, (entry)->vaddr, page_size); 213* 214 if (update_lru) { 215 entryList[set].push_front(entry); 216* entryList[set].erase(entry); 217 entry = entryList[set].begin(); 218 } 219 220 break; 221 } 222 } 223 224 return entry; 225 } 226 227 GpuTlbEntry* 228 GpuTLB::lookup(Addr va, bool update_lru) 229 { 230 int set = (va >> TheISA::PageShift) & setMask; 231 232 auto entry = lookupIt(va, update_lru); 233 234 if (entry == entryList[set].end()) 235 return nullptr; 236 else 237 return entry; 238* } 239 240 void 241 GpuTLB::invalidateAll() 242 { 243 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 244 245 for (int i = 0; i < numSets; ++i) { 246 while (!entryList[i].empty()) { 247 GpuTlbEntry entry = entryList[i].front(); 248* entryList[i].pop_front(); 249 freeList[i].push_back(entry); 250 } 251 } 252 } 253 254 void 255 GpuTLB::setConfigAddress(uint32_t addr) 256 { 257 configAddress = addr; 258 } 259 260 void 261 GpuTLB::invalidateNonGlobal() 262 { 263 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 264 265 for (int i = 0; i < numSets; ++i) { 266 for (auto entryIt = entryList[i].begin(); 267 entryIt != entryList[i].end();) { 268 if (!(entryIt)->global) { 269* freeList[i].push_back(entryIt); 270* entryList[i].erase(entryIt++); 271 } else { 272 ++entryIt; 273 } 274 } 275 } 276 } 277 278 void 279 GpuTLB::demapPage(Addr va, uint64_t asn) 280 { 281 282 int set = (va >> TheISA::PageShift) & setMask; 283 auto entry = lookupIt(va, false); 284 285 if (entry != entryList[set].end()) { 286 freeList[set].push_back(entry); 287* entryList[set].erase(entry); 288 } 289 } 290 291 Fault 292 GpuTLB::translateInt(RequestPtr req, ThreadContext tc) 293* { 294 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 295 Addr vaddr = req->getVaddr(); 296 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 297 298 if (prefix == IntAddrPrefixCPUID) { 299 panic("CPUID memory space not yet implemented!\n"); 300 } else if (prefix == IntAddrPrefixMSR) { 301 vaddr = vaddr >> 3; 302 req->setFlags(Request::MMAPPED_IPR); 303 Addr regNum = 0; 304 305 switch (vaddr & ~IntAddrPrefixMask) { 306 case 0x10: 307 regNum = MISCREG_TSC; 308 break; 309 case 0x1B: 310 regNum = MISCREG_APIC_BASE; 311 break; 312 case 0xFE: 313 regNum = MISCREG_MTRRCAP; 314 break; 315 case 0x174: 316 regNum = MISCREG_SYSENTER_CS; 317 break; 318 case 0x175: 319 regNum = MISCREG_SYSENTER_ESP; 320 break; 321 case 0x176: 322 regNum = MISCREG_SYSENTER_EIP; 323 break; 324 case 0x179: 325 regNum = MISCREG_MCG_CAP; 326 break; 327 case 0x17A: 328 regNum = MISCREG_MCG_STATUS; 329 break; 330 case 0x17B: 331 regNum = MISCREG_MCG_CTL; 332 break; 333 case 0x1D9: 334 regNum = MISCREG_DEBUG_CTL_MSR; 335 break; 336 case 0x1DB: 337 regNum = MISCREG_LAST_BRANCH_FROM_IP; 338 break; 339 case 0x1DC: 340 regNum = MISCREG_LAST_BRANCH_TO_IP; 341 break; 342 case 0x1DD: 343 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 344 break; 345 case 0x1DE: 346 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 347 break; 348 case 0x200: 349 regNum = MISCREG_MTRR_PHYS_BASE_0; 350 break; 351 case 0x201: 352 regNum = MISCREG_MTRR_PHYS_MASK_0; 353 break; 354 case 0x202: 355 regNum = MISCREG_MTRR_PHYS_BASE_1; 356 break; 357 case 0x203: 358 regNum = MISCREG_MTRR_PHYS_MASK_1; 359 break; 360 case 0x204: 361 regNum = MISCREG_MTRR_PHYS_BASE_2; 362 break; 363 case 0x205: 364 regNum = MISCREG_MTRR_PHYS_MASK_2; 365 break; 366 case 0x206: 367 regNum = MISCREG_MTRR_PHYS_BASE_3; 368 break; 369 case 0x207: 370 regNum = MISCREG_MTRR_PHYS_MASK_3; 371 break; 372 case 0x208: 373 regNum = MISCREG_MTRR_PHYS_BASE_4; 374 break; 375 case 0x209: 376 regNum = MISCREG_MTRR_PHYS_MASK_4; 377 break; 378 case 0x20A: 379 regNum = MISCREG_MTRR_PHYS_BASE_5; 380 break; 381 case 0x20B: 382 regNum = MISCREG_MTRR_PHYS_MASK_5; 383 break; 384 case 0x20C: 385 regNum = MISCREG_MTRR_PHYS_BASE_6; 386 break; 387 case 0x20D: 388 regNum = MISCREG_MTRR_PHYS_MASK_6; 389 break; 390 case 0x20E: 391 regNum = MISCREG_MTRR_PHYS_BASE_7; 392 break; 393 case 0x20F: 394 regNum = MISCREG_MTRR_PHYS_MASK_7; 395 break; 396 case 0x250: 397 regNum = MISCREG_MTRR_FIX_64K_00000; 398 break; 399 case 0x258: 400 regNum = MISCREG_MTRR_FIX_16K_80000; 401 break; 402 case 0x259: 403 regNum = MISCREG_MTRR_FIX_16K_A0000; 404 break; 405 case 0x268: 406 regNum = MISCREG_MTRR_FIX_4K_C0000; 407 break; 408 case 0x269: 409 regNum = MISCREG_MTRR_FIX_4K_C8000; 410 break; 411 case 0x26A: 412 regNum = MISCREG_MTRR_FIX_4K_D0000; 413 break; 414 case 0x26B: 415 regNum = MISCREG_MTRR_FIX_4K_D8000; 416 break; 417 case 0x26C: 418 regNum = MISCREG_MTRR_FIX_4K_E0000; 419 break; 420 case 0x26D: 421 regNum = MISCREG_MTRR_FIX_4K_E8000; 422 break; 423 case 0x26E: 424 regNum = MISCREG_MTRR_FIX_4K_F0000; 425 break; 426 case 0x26F: 427 regNum = MISCREG_MTRR_FIX_4K_F8000; 428 break; 429 case 0x277: 430 regNum = MISCREG_PAT; 431 break; 432 case 0x2FF: 433 regNum = MISCREG_DEF_TYPE; 434 break; 435 case 0x400: 436 regNum = MISCREG_MC0_CTL; 437 break; 438 case 0x404: 439 regNum = MISCREG_MC1_CTL; 440 break; 441 case 0x408: 442 regNum = MISCREG_MC2_CTL; 443 break; 444 case 0x40C: 445 regNum = MISCREG_MC3_CTL; 446 break; 447 case 0x410: 448 regNum = MISCREG_MC4_CTL; 449 break; 450 case 0x414: 451 regNum = MISCREG_MC5_CTL; 452 break; 453 case 0x418: 454 regNum = MISCREG_MC6_CTL; 455 break; 456 case 0x41C: 457 regNum = MISCREG_MC7_CTL; 458 break; 459 case 0x401: 460 regNum = MISCREG_MC0_STATUS; 461 break; 462 case 0x405: 463 regNum = MISCREG_MC1_STATUS; 464 break; 465 case 0x409: 466 regNum = MISCREG_MC2_STATUS; 467 break; 468 case 0x40D: 469 regNum = MISCREG_MC3_STATUS; 470 break; 471 case 0x411: 472 regNum = MISCREG_MC4_STATUS; 473 break; 474 case 0x415: 475 regNum = MISCREG_MC5_STATUS; 476 break; 477 case 0x419: 478 regNum = MISCREG_MC6_STATUS; 479 break; 480 case 0x41D: 481 regNum = MISCREG_MC7_STATUS; 482 break; 483 case 0x402: 484 regNum = MISCREG_MC0_ADDR; 485 break; 486 case 0x406: 487 regNum = MISCREG_MC1_ADDR; 488 break; 489 case 0x40A: 490 regNum = MISCREG_MC2_ADDR; 491 break; 492 case 0x40E: 493 regNum = MISCREG_MC3_ADDR; 494 break; 495 case 0x412: 496 regNum = MISCREG_MC4_ADDR; 497 break; 498 case 0x416: 499 regNum = MISCREG_MC5_ADDR; 500 break; 501 case 0x41A: 502 regNum = MISCREG_MC6_ADDR; 503 break; 504 case 0x41E: 505 regNum = MISCREG_MC7_ADDR; 506 break; 507 case 0x403: 508 regNum = MISCREG_MC0_MISC; 509 break; 510 case 0x407: 511 regNum = MISCREG_MC1_MISC; 512 break; 513 case 0x40B: 514 regNum = MISCREG_MC2_MISC; 515 break; 516 case 0x40F: 517 regNum = MISCREG_MC3_MISC; 518 break; 519 case 0x413: 520 regNum = MISCREG_MC4_MISC; 521 break; 522 case 0x417: 523 regNum = MISCREG_MC5_MISC; 524 break; 525 case 0x41B: 526 regNum = MISCREG_MC6_MISC; 527 break; 528 case 0x41F: 529 regNum = MISCREG_MC7_MISC; 530 break; 531 case 0xC0000080: 532 regNum = MISCREG_EFER; 533 break; 534 case 0xC0000081: 535 regNum = MISCREG_STAR; 536 break; 537 case 0xC0000082: 538 regNum = MISCREG_LSTAR; 539 break; 540 case 0xC0000083: 541 regNum = MISCREG_CSTAR; 542 break; 543 case 0xC0000084: 544 regNum = MISCREG_SF_MASK; 545 break; 546 case 0xC0000100: 547 regNum = MISCREG_FS_BASE; 548 break; 549 case 0xC0000101: 550 regNum = MISCREG_GS_BASE; 551 break; 552 case 0xC0000102: 553 regNum = MISCREG_KERNEL_GS_BASE; 554 break; 555 case 0xC0000103: 556 regNum = MISCREG_TSC_AUX; 557 break; 558 case 0xC0010000: 559 regNum = MISCREG_PERF_EVT_SEL0; 560 break; 561 case 0xC0010001: 562 regNum = MISCREG_PERF_EVT_SEL1; 563 break; 564 case 0xC0010002: 565 regNum = MISCREG_PERF_EVT_SEL2; 566 break; 567 case 0xC0010003: 568 regNum = MISCREG_PERF_EVT_SEL3; 569 break; 570 case 0xC0010004: 571 regNum = MISCREG_PERF_EVT_CTR0; 572 break; 573 case 0xC0010005: 574 regNum = MISCREG_PERF_EVT_CTR1; 575 break; 576 case 0xC0010006: 577 regNum = MISCREG_PERF_EVT_CTR2; 578 break; 579 case 0xC0010007: 580 regNum = MISCREG_PERF_EVT_CTR3; 581 break; 582 case 0xC0010010: 583 regNum = MISCREG_SYSCFG; 584 break; 585 case 0xC0010016: 586 regNum = MISCREG_IORR_BASE0; 587 break; 588 case 0xC0010017: 589 regNum = MISCREG_IORR_BASE1; 590 break; 591 case 0xC0010018: 592 regNum = MISCREG_IORR_MASK0; 593 break; 594 case 0xC0010019: 595 regNum = MISCREG_IORR_MASK1; 596 break; 597 case 0xC001001A: 598 regNum = MISCREG_TOP_MEM; 599 break; 600 case 0xC001001D: 601 regNum = MISCREG_TOP_MEM2; 602 break; 603 case 0xC0010114: 604 regNum = MISCREG_VM_CR; 605 break; 606 case 0xC0010115: 607 regNum = MISCREG_IGNNE; 608 break; 609 case 0xC0010116: 610 regNum = MISCREG_SMM_CTL; 611 break; 612 case 0xC0010117: 613 regNum = MISCREG_VM_HSAVE_PA; 614 break; 615 default: 616 return std::make_shared<GeneralProtection>(0); 617 } 618 //The index is multiplied by the size of a MiscReg so that 619 //any memory dependence calculations will not see these as 620 //overlapping. 621 req->setPaddr(regNum * sizeof(MiscReg)); 622 return NoFault; 623 } else if (prefix == IntAddrPrefixIO) { 624 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 625 // bitmap in the TSS. 626 627 Addr IOPort = vaddr & ~IntAddrPrefixMask; 628 // Make sure the address fits in the expected 16 bit IO address 629 // space. 630 assert(!(IOPort & ~0xFFFF)); 631 632 if (IOPort == 0xCF8 && req->getSize() == 4) { 633 req->setFlags(Request::MMAPPED_IPR); 634 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 635 } else if ((IOPort & ~mask(2)) == 0xCFC) { 636 req->setFlags(Request::UNCACHEABLE); 637 638 Addr configAddress = 639 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 640 641 if (bits(configAddress, 31, 31)) { 642 req->setPaddr(PhysAddrPrefixPciConfig \| 643 mbits(configAddress, 30, 2) \| 644 (IOPort & mask(2))); 645 } else { 646 req->setPaddr(PhysAddrPrefixIO \| IOPort); 647 } 648 } else { 649 req->setFlags(Request::UNCACHEABLE); 650 req->setPaddr(PhysAddrPrefixIO \| IOPort); 651 } 652 return NoFault; 653 } else { 654 panic("Access to unrecognized internal address space %#x.\n", 655 prefix); 656 } 657 } 658 659 /** 660 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 661 * and false on a TLB miss. 662 * Many of the checks about different modes have been converted to 663 * assertions, since these parts of the code are not really used. 664 * On a hit it will update the LRU stack. 665 / 666* bool 667 GpuTLB::tlbLookup(RequestPtr req, ThreadContext tc, bool update_stats) 668* { 669 bool tlb_hit = false; 670 #ifndef NDEBUG 671 uint32_t flags = req->getFlags(); 672 int seg = flags & SegmentFlagMask; 673 #endif 674 675 assert(seg != SEGMENT_REG_MS); 676 Addr vaddr = req->getVaddr(); 677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 679 680 if (m5Reg.prot) { 681 DPRINTF(GPUTLB, "In protected mode.\n"); 682 // make sure we are in 64-bit mode 683 assert(m5Reg.mode == LongMode); 684 685 // If paging is enabled, do the translation. 686 if (m5Reg.paging) { 687 DPRINTF(GPUTLB, "Paging enabled.\n"); 688 //update LRU stack on a hit 689 GpuTlbEntry entry = lookup(vaddr, true); 690* 691 if (entry) 692 tlb_hit = true; 693 694 if (!update_stats) { 695 // functional tlb access for memory initialization 696 // i.e., memory seeding or instr. seeding -> don't update 697 // TLB and stats 698 return tlb_hit; 699 } 700 701 localNumTLBAccesses++; 702 703 if (!entry) { 704 localNumTLBMisses++; 705 } else { 706 localNumTLBHits++; 707 } 708 } 709 } 710 711 return tlb_hit; 712 } 713 714 Fault 715 GpuTLB::translate(RequestPtr req, ThreadContext tc, 716* Translation translation, Mode mode, 717* bool &delayedResponse, bool timing, int &latency) 718 { 719 uint32_t flags = req->getFlags(); 720 int seg = flags & SegmentFlagMask; 721 bool storeCheck = flags & (StoreCheck << FlagShift); 722 723 // If this is true, we're dealing with a request 724 // to a non-memory address space. 725 if (seg == SEGMENT_REG_MS) { 726 return translateInt(req, tc); 727 } 728 729 delayedResponse = false; 730 Addr vaddr = req->getVaddr(); 731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 732 733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 734 735 // If protected mode has been enabled... 736 if (m5Reg.prot) { 737 DPRINTF(GPUTLB, "In protected mode.\n"); 738 // If we're not in 64-bit mode, do protection/limit checks 739 if (m5Reg.mode != LongMode) { 740 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 741 "protection.\n"); 742 743 // Check for a null segment selector. 744 if (!(seg == SEGMENT_REG_TSG \|\| seg == SYS_SEGMENT_REG_IDTR \|\| 745 seg == SEGMENT_REG_HS \|\| seg == SEGMENT_REG_LS) 746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 747 return std::make_shared<GeneralProtection>(0); 748 } 749 750 bool expandDown = false; 751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 752 753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 754 if (!attr.writable && (mode == BaseTLB::Write \|\| 755 storeCheck)) 756 return std::make_shared<GeneralProtection>(0); 757 758 if (!attr.readable && mode == BaseTLB::Read) 759 return std::make_shared<GeneralProtection>(0); 760 761 expandDown = attr.expandDown; 762 763 } 764 765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 767 // This assumes we're not in 64 bit mode. If we were, the 768 // default address size is 64 bits, overridable to 32. 769 int size = 32; 770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 772 773 if ((csAttr.defaultSize && sizeOverride) \|\| 774 (!csAttr.defaultSize && !sizeOverride)) { 775 size = 16; 776 } 777 778 Addr offset = bits(vaddr - base, size - 1, 0); 779 Addr endOffset = offset + req->getSize() - 1; 780 781 if (expandDown) { 782 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 783 warn_once("Expand down segments are untested.\n"); 784 785 if (offset <= limit \|\| endOffset <= limit) 786 return std::make_shared<GeneralProtection>(0); 787 } else { 788 if (offset > limit \|\| endOffset > limit) 789 return std::make_shared<GeneralProtection>(0); 790 } 791 } 792 793 // If paging is enabled, do the translation. 794 if (m5Reg.paging) { 795 DPRINTF(GPUTLB, "Paging enabled.\n"); 796 // The vaddr already has the segment base applied. 797 GpuTlbEntry entry = lookup(vaddr); 798* localNumTLBAccesses++; 799 800 if (!entry) { 801 localNumTLBMisses++; 802 if (timing) { 803 latency = missLatency1; 804 } 805 806 if (FullSystem) { 807 fatal("GpuTLB doesn't support full-system mode\n"); 808 } else { 809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 810 "at pc %#x.\n", vaddr, tc->instAddr()); 811 812 Process p = tc->getProcessPtr(); 813* GpuTlbEntry newEntry; 814 bool success = p->pTable->lookup(vaddr, newEntry); 815 816 if (!success && mode != BaseTLB::Execute) { 817 // penalize a "page fault" more 818 if (timing) { 819 latency += missLatency2; 820 } 821 822 if (p->fixupStackFault(vaddr)) 823 success = p->pTable->lookup(vaddr, newEntry); 824 } 825 826 if (!success) { 827 return std::make_shared<PageFault>(vaddr, true, 828 mode, true, 829 false); 830 } else { 831 newEntry.valid = success; 832 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 833 834 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 835 alignedVaddr, newEntry.pageStart()); 836 837 entry = insert(alignedVaddr, newEntry); 838 } 839 840 DPRINTF(GPUTLB, "Miss was serviced.\n"); 841 } 842 } else { 843 localNumTLBHits++; 844 845 if (timing) { 846 latency = hitLatency; 847 } 848 } 849 850 // Do paging protection checks. 851 bool inUser = (m5Reg.cpl == 3 && 852 !(flags & (CPL0FlagBit << FlagShift))); 853 854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 855 bool badWrite = (!entry->writable && (inUser \|\| cr0.wp)); 856 857 if ((inUser && !entry->user) \|\| (mode == BaseTLB::Write && 858 badWrite)) { 859 // The page must have been present to get into the TLB in 860 // the first place. We'll assume the reserved bits are 861 // fine even though we're not checking them. 862 return std::make_shared<PageFault>(vaddr, true, mode, 863 inUser, false); 864 } 865 866 if (storeCheck && badWrite) { 867 // This would fault if this were a write, so return a page 868 // fault that reflects that happening. 869 return std::make_shared<PageFault>(vaddr, true, 870 BaseTLB::Write, 871 inUser, false); 872 } 873 874 875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 876 "checks.\n", entry->paddr); 877 878 int page_size = entry->size(); 879 Addr paddr = entry->paddr \| (vaddr & (page_size - 1)); 880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 881 req->setPaddr(paddr); 882 883 if (entry->uncacheable) 884 req->setFlags(Request::UNCACHEABLE); 885 } else { 886 //Use the address which already has segmentation applied. 887 DPRINTF(GPUTLB, "Paging disabled.\n"); 888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 889 req->setPaddr(vaddr); 890 } 891 } else { 892 // Real mode 893 DPRINTF(GPUTLB, "In real mode.\n"); 894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 895 req->setPaddr(vaddr); 896 } 897 898 // Check for an access to the local APIC 899 if (FullSystem) { 900 LocalApicBase localApicBase = 901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 902 903 Addr baseAddr = localApicBase.base * PageBytes; 904 Addr paddr = req->getPaddr(); 905 906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 907 // Force the access to be uncacheable. 908 req->setFlags(Request::UNCACHEABLE); 909 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 910 paddr - baseAddr)); 911 } 912 } 913 914 return NoFault; 915 }; 916 917 Fault 918 GpuTLB::translateAtomic(RequestPtr req, ThreadContext tc, Mode mode, 919* int &latency) 920 { 921 bool delayedResponse; 922 923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 924 latency); 925 } 926 927 void 928 GpuTLB::translateTiming(RequestPtr req, ThreadContext tc, 929* Translation translation, Mode mode, int &latency) 930* { 931 bool delayedResponse; 932 assert(translation); 933 934 Fault fault = GpuTLB::translate(req, tc, translation, mode, 935 delayedResponse, true, latency); 936 937 if (!delayedResponse) 938 translation->finish(fault, req, tc, mode); 939 } 940 941 Walker* 942 GpuTLB::getWalker() 943 { 944 return walker; 945 } 946 947 948 void 949 GpuTLB::serialize(CheckpointOut &cp) const 950 { 951 } 952 953 void 954 GpuTLB::unserialize(CheckpointIn &cp) 955 { 956 } 957 958 void 959 GpuTLB::regStats() 960 {
	961 MemObject::regStats(); 962
961 localNumTLBAccesses 962 .name(name() + ".local_TLB_accesses") 963 .desc("Number of TLB accesses") 964 ; 965 966 localNumTLBHits 967 .name(name() + ".local_TLB_hits") 968 .desc("Number of TLB hits") 969 ; 970 971 localNumTLBMisses 972 .name(name() + ".local_TLB_misses") 973 .desc("Number of TLB misses") 974 ; 975 976 localTLBMissRate 977 .name(name() + ".local_TLB_miss_rate") 978 .desc("TLB miss rate") 979 ; 980 981 accessCycles 982 .name(name() + ".access_cycles") 983 .desc("Cycles spent accessing this TLB level") 984 ; 985 986 pageTableCycles 987 .name(name() + ".page_table_cycles") 988 .desc("Cycles spent accessing the page table") 989 ; 990 991 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 992 993 numUniquePages 994 .name(name() + ".unique_pages") 995 .desc("Number of unique pages touched") 996 ; 997 998 localCycles 999 .name(name() + ".local_cycles") 1000 .desc("Number of cycles spent in queue for all incoming reqs") 1001 ; 1002 1003 localLatency 1004 .name(name() + ".local_latency") 1005 .desc("Avg. latency over incoming coalesced reqs") 1006 ; 1007 1008 localLatency = localCycles / localNumTLBAccesses; 1009 1010 globalNumTLBAccesses 1011 .name(name() + ".global_TLB_accesses") 1012 .desc("Number of TLB accesses") 1013 ; 1014 1015 globalNumTLBHits 1016 .name(name() + ".global_TLB_hits") 1017 .desc("Number of TLB hits") 1018 ; 1019 1020 globalNumTLBMisses 1021 .name(name() + ".global_TLB_misses") 1022 .desc("Number of TLB misses") 1023 ; 1024 1025 globalTLBMissRate 1026 .name(name() + ".global_TLB_miss_rate") 1027 .desc("TLB miss rate") 1028 ; 1029 1030 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1031 1032 avgReuseDistance 1033 .name(name() + ".avg_reuse_distance") 1034 .desc("avg. reuse distance over all pages (in ticks)") 1035 ; 1036 1037 } 1038 1039 /** 1040 * Do the TLB lookup for this coalesced request and schedule 1041 * another event <TLB access latency> cycles later. 1042 / 1043* 1044 void 1045 GpuTLB::issueTLBLookup(PacketPtr pkt) 1046 { 1047 assert(pkt); 1048 assert(pkt->senderState); 1049 1050 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1051 TheISA::PageBytes); 1052 1053 TranslationState sender_state = 1054* safe_cast<TranslationState>(pkt->senderState); 1055* 1056 bool update_stats = !sender_state->prefetch; 1057 ThreadContext * tmp_tc = sender_state->tc; 1058 1059 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1060 virt_page_addr); 1061 1062 int req_cnt = sender_state->reqCnt.back(); 1063 1064 if (update_stats) { 1065 accessCycles -= (curTick() * req_cnt); 1066 localCycles -= curTick(); 1067 updatePageFootprint(virt_page_addr); 1068 globalNumTLBAccesses += req_cnt; 1069 } 1070 1071 tlbOutcome lookup_outcome = TLB_MISS; 1072 RequestPtr tmp_req = pkt->req; 1073 1074 // Access the TLB and figure out if it's a hit or a miss. 1075 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1076 1077 if (success) { 1078 lookup_outcome = TLB_HIT; 1079 // Put the entry in SenderState 1080 GpuTlbEntry entry = lookup(tmp_req->getVaddr(), false); 1081* assert(entry); 1082 1083 sender_state->tlbEntry = 1084 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1085 1086 if (update_stats) { 1087 // the reqCnt has an entry per level, so its size tells us 1088 // which level we are in 1089 sender_state->hitLevel = sender_state->reqCnt.size(); 1090 globalNumTLBHits += req_cnt; 1091 } 1092 } else { 1093 if (update_stats) 1094 globalNumTLBMisses += req_cnt; 1095 } 1096 1097 /* 1098 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1099 * as the TLB access latency. 1100 * 1101 * We create and schedule a new TLBEvent which will help us take the 1102 * appropriate actions (e.g., update TLB on a hit, send request to lower 1103 * level TLB on a miss, or start a page walk if this was the last-level 1104 * TLB) 1105 / 1106* TLBEvent tlb_event = 1107* new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1108 1109 if (translationReturnEvent.count(virt_page_addr)) { 1110 panic("Virtual Page Address %#x already has a return event\n", 1111 virt_page_addr); 1112 } 1113 1114 translationReturnEvent[virt_page_addr] = tlb_event; 1115 assert(tlb_event); 1116 1117 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1118 curTick() + this->ticks(hitLatency)); 1119 1120 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1121 } 1122 1123 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1124 PacketPtr _pkt) 1125 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1126 outcome(tlb_outcome), pkt(_pkt) 1127 { 1128 } 1129 1130 /** 1131 * Do Paging protection checks. If we encounter a page fault, then 1132 * an assertion is fired. 1133 / 1134* void 1135 GpuTLB::pagingProtectionChecks(ThreadContext tc, PacketPtr pkt, 1136* GpuTlbEntry * tlb_entry, Mode mode) 1137 { 1138 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1139 uint32_t flags = pkt->req->getFlags(); 1140 bool storeCheck = flags & (StoreCheck << FlagShift); 1141 1142 // Do paging protection checks. 1143 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1144 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1145 1146 bool badWrite = (!tlb_entry->writable && (inUser \|\| cr0.wp)); 1147 1148 if ((inUser && !tlb_entry->user) \|\| 1149 (mode == BaseTLB::Write && badWrite)) { 1150 // The page must have been present to get into the TLB in 1151 // the first place. We'll assume the reserved bits are 1152 // fine even though we're not checking them. 1153 assert(false); 1154 } 1155 1156 if (storeCheck && badWrite) { 1157 // This would fault if this were a write, so return a page 1158 // fault that reflects that happening. 1159 assert(false); 1160 } 1161 } 1162 1163 /** 1164 * handleTranslationReturn is called on a TLB hit, 1165 * when a TLB miss returns or when a page fault returns. 1166 * The latter calls handelHit with TLB miss as tlbOutcome. 1167 / 1168* void 1169 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1170 PacketPtr pkt) 1171 { 1172 1173 assert(pkt); 1174 Addr vaddr = pkt->req->getVaddr(); 1175 1176 TranslationState sender_state = 1177* safe_cast<TranslationState>(pkt->senderState); 1178* 1179 ThreadContext tc = sender_state->tc; 1180* Mode mode = sender_state->tlbMode; 1181 1182 GpuTlbEntry local_entry, new_entry; 1183 1184 if (tlb_outcome == TLB_HIT) { 1185 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1186 local_entry = sender_state->tlbEntry; 1187 } else { 1188 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1189 vaddr); 1190 1191 // We are returning either from a page walk or from a hit at a lower 1192 // TLB level. The senderState should be "carrying" a pointer to the 1193 // correct TLBEntry. 1194 new_entry = sender_state->tlbEntry; 1195 assert(new_entry); 1196 local_entry = new_entry; 1197 1198 if (allocationPolicy) { 1199 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1200 virt_page_addr); 1201 1202 local_entry = insert(virt_page_addr, new_entry); 1203* } 1204 1205 assert(local_entry); 1206 } 1207 1208 /** 1209 * At this point the packet carries an up-to-date tlbEntry pointer 1210 * in its senderState. 1211 * Next step is to do the paging protection checks. 1212 / 1213* DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1214 "while paddr was %#x.\n", local_entry->vaddr, 1215 local_entry->paddr); 1216 1217 pagingProtectionChecks(tc, pkt, local_entry, mode); 1218 int page_size = local_entry->size(); 1219 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1220 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1221 1222 // Since this packet will be sent through the cpu side slave port, 1223 // it must be converted to a response pkt if it is not one already 1224 if (pkt->isRequest()) { 1225 pkt->makeTimingResponse(); 1226 } 1227 1228 pkt->req->setPaddr(paddr); 1229 1230 if (local_entry->uncacheable) { 1231 pkt->req->setFlags(Request::UNCACHEABLE); 1232 } 1233 1234 //send packet back to coalescer 1235 cpuSidePort[0]->sendTimingResp(pkt); 1236 //schedule cleanup event 1237 cleanupQueue.push(virt_page_addr); 1238 1239 // schedule this only once per cycle. 1240 // The check is required because we might have multiple translations 1241 // returning the same cycle 1242 // this is a maximum priority event and must be on the same cycle 1243 // as the cleanup event in TLBCoalescer to avoid a race with 1244 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1245 if (!cleanupEvent.scheduled()) 1246 schedule(cleanupEvent, curTick()); 1247 } 1248 1249 /** 1250 * Here we take the appropriate actions based on the result of the 1251 * TLB lookup. 1252 / 1253* void 1254 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1255 PacketPtr pkt) 1256 { 1257 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1258 1259 assert(translationReturnEvent[virtPageAddr]); 1260 assert(pkt); 1261 1262 TranslationState tmp_sender_state = 1263* safe_cast<TranslationState>(pkt->senderState); 1264* 1265 int req_cnt = tmp_sender_state->reqCnt.back(); 1266 bool update_stats = !tmp_sender_state->prefetch; 1267 1268 1269 if (outcome == TLB_HIT) { 1270 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1271 1272 if (update_stats) { 1273 accessCycles += (req_cnt * curTick()); 1274 localCycles += curTick(); 1275 } 1276 1277 } else if (outcome == TLB_MISS) { 1278 1279 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1280 if (update_stats) { 1281 accessCycles += (req_cntcurTick()); 1282* localCycles += curTick(); 1283 } 1284 1285 if (hasMemSidePort) { 1286 // the one cyle added here represent the delay from when we get 1287 // the reply back till when we propagate it to the coalescer 1288 // above. 1289 if (update_stats) { 1290 accessCycles += (req_cnt * 1); 1291 localCycles += 1; 1292 } 1293 1294 /** 1295 * There is a TLB below. Send the coalesced request. 1296 * We actually send the very first packet of all the 1297 * pending packets for this virtual page address. 1298 / 1299* if (!memSidePort[0]->sendTimingReq(pkt)) { 1300 DPRINTF(GPUTLB, "Failed sending translation request to " 1301 "lower level TLB for addr %#x\n", virtPageAddr); 1302 1303 memSidePort[0]->retries.push_back(pkt); 1304 } else { 1305 DPRINTF(GPUTLB, "Sent translation request to lower level " 1306 "TLB for addr %#x\n", virtPageAddr); 1307 } 1308 } else { 1309 //this is the last level TLB. Start a page walk 1310 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1311 "addr %#x\n", virtPageAddr); 1312 1313 if (update_stats) 1314 pageTableCycles -= (req_cntcurTick()); 1315* 1316 TLBEvent tlb_event = translationReturnEvent[virtPageAddr]; 1317* assert(tlb_event); 1318 tlb_event->updateOutcome(PAGE_WALK); 1319 schedule(tlb_event, curTick() + ticks(missLatency2)); 1320 } 1321 } else if (outcome == PAGE_WALK) { 1322 if (update_stats) 1323 pageTableCycles += (req_cntcurTick()); 1324* 1325 // Need to access the page table and update the TLB 1326 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1327 virtPageAddr); 1328 1329 TranslationState sender_state = 1330* safe_cast<TranslationState>(pkt->senderState); 1331* 1332 Process p = sender_state->tc->getProcessPtr(); 1333* TlbEntry newEntry; 1334 Addr vaddr = pkt->req->getVaddr(); 1335 #ifndef NDEBUG 1336 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1337 assert(alignedVaddr == virtPageAddr); 1338 #endif 1339 bool success; 1340 success = p->pTable->lookup(vaddr, newEntry); 1341 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1342 if (p->fixupStackFault(vaddr)) { 1343 success = p->pTable->lookup(vaddr, newEntry); 1344 } 1345 } 1346 1347 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1348 newEntry.pageStart()); 1349 1350 sender_state->tlbEntry = 1351 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success); 1352 1353 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1354 } else if (outcome == MISS_RETURN) { 1355 /** we add an extra cycle in the return path of the translation 1356 * requests in between the various TLB levels. 1357 / 1358* handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1359 } else { 1360 assert(false); 1361 } 1362 } 1363 1364 void 1365 GpuTLB::TLBEvent::process() 1366 { 1367 tlb->translationReturn(virtPageAddr, outcome, pkt); 1368 } 1369 1370 const char* 1371 GpuTLB::TLBEvent::description() const 1372 { 1373 return "trigger translationDoneEvent"; 1374 } 1375 1376 void 1377 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1378 { 1379 outcome = _outcome; 1380 } 1381 1382 Addr 1383 GpuTLB::TLBEvent::getTLBEventVaddr() 1384 { 1385 return virtPageAddr; 1386 } 1387 1388 /* 1389 * recvTiming receives a coalesced timing request from a TLBCoalescer 1390 * and it calls issueTLBLookup() 1391 * It only rejects the packet if we have exceeded the max 1392 * outstanding number of requests for the TLB 1393 / 1394* bool 1395 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1396 { 1397 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1398 tlb->issueTLBLookup(pkt); 1399 // update number of outstanding translation requests 1400 tlb->outstandingReqs++; 1401 return true; 1402 } else { 1403 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1404 tlb->outstandingReqs); 1405 return false; 1406 } 1407 } 1408 1409 /** 1410 * handleFuncTranslationReturn is called on a TLB hit, 1411 * when a TLB miss returns or when a page fault returns. 1412 * It updates LRU, inserts the TLB entry on a miss 1413 * depending on the allocation policy and does the required 1414 * protection checks. It does NOT create a new packet to 1415 * update the packet's addr; this is done in hsail-gpu code. 1416 / 1417* void 1418 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1419 { 1420 TranslationState sender_state = 1421* safe_cast<TranslationState>(pkt->senderState); 1422* 1423 ThreadContext tc = sender_state->tc; 1424* Mode mode = sender_state->tlbMode; 1425 Addr vaddr = pkt->req->getVaddr(); 1426 1427 GpuTlbEntry local_entry, new_entry; 1428 1429 if (tlb_outcome == TLB_HIT) { 1430 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1431 "%#x\n", vaddr); 1432 1433 local_entry = sender_state->tlbEntry; 1434 } else { 1435 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1436 "%#x\n", vaddr); 1437 1438 // We are returning either from a page walk or from a hit at a lower 1439 // TLB level. The senderState should be "carrying" a pointer to the 1440 // correct TLBEntry. 1441 new_entry = sender_state->tlbEntry; 1442 assert(new_entry); 1443 local_entry = new_entry; 1444 1445 if (allocationPolicy) { 1446 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1447 1448 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1449 virt_page_addr); 1450 1451 local_entry = insert(virt_page_addr, new_entry); 1452* } 1453 1454 assert(local_entry); 1455 } 1456 1457 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1458 "while paddr was %#x.\n", local_entry->vaddr, 1459 local_entry->paddr); 1460 1461 // Do paging checks if it's a normal functional access. If it's for a 1462 // prefetch, then sometimes you can try to prefetch something that won't 1463 // pass protection. We don't actually want to fault becuase there is no 1464 // demand access to deem this a violation. Just put it in the TLB and 1465 // it will fault if indeed a future demand access touches it in 1466 // violation. 1467 if (!sender_state->prefetch && sender_state->tlbEntry->valid) 1468 pagingProtectionChecks(tc, pkt, local_entry, mode); 1469 1470 int page_size = local_entry->size(); 1471 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1472 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1473 1474 pkt->req->setPaddr(paddr); 1475 1476 if (local_entry->uncacheable) 1477 pkt->req->setFlags(Request::UNCACHEABLE); 1478 } 1479 1480 // This is used for atomic translations. Need to 1481 // make it all happen during the same cycle. 1482 void 1483 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1484 { 1485 TranslationState sender_state = 1486* safe_cast<TranslationState>(pkt->senderState); 1487* 1488 ThreadContext tc = sender_state->tc; 1489* bool update_stats = !sender_state->prefetch; 1490 1491 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1492 TheISA::PageBytes); 1493 1494 if (update_stats) 1495 tlb->updatePageFootprint(virt_page_addr); 1496 1497 // do the TLB lookup without updating the stats 1498 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1499 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1500 1501 // functional mode means no coalescing 1502 // global metrics are the same as the local metrics 1503 if (update_stats) { 1504 tlb->globalNumTLBAccesses++; 1505 1506 if (success) { 1507 sender_state->hitLevel = sender_state->reqCnt.size(); 1508 tlb->globalNumTLBHits++; 1509 } 1510 } 1511 1512 if (!success) { 1513 if (update_stats) 1514 tlb->globalNumTLBMisses++; 1515 if (tlb->hasMemSidePort) { 1516 // there is a TLB below -> propagate down the TLB hierarchy 1517 tlb->memSidePort[0]->sendFunctional(pkt); 1518 // If no valid translation from a prefetch, then just return 1519 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1520 return; 1521 } else { 1522 // Need to access the page table and update the TLB 1523 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1524 virt_page_addr); 1525 1526 Process p = tc->getProcessPtr(); 1527* TlbEntry newEntry; 1528 1529 Addr vaddr = pkt->req->getVaddr(); 1530 #ifndef NDEBUG 1531 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1532 assert(alignedVaddr == virt_page_addr); 1533 #endif 1534 1535 bool success = p->pTable->lookup(vaddr, newEntry); 1536 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1537 if (p->fixupStackFault(vaddr)) 1538 success = p->pTable->lookup(vaddr, newEntry); 1539 } 1540 1541 if (!sender_state->prefetch) { 1542 // no PageFaults are permitted after 1543 // the second page table lookup 1544 assert(success); 1545 1546 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1547 newEntry.pageStart()); 1548 1549 sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr, 1550 newEntry.paddr, 1551 success); 1552 } else { 1553 // If this was a prefetch, then do the normal thing if it 1554 // was a successful translation. Otherwise, send an empty 1555 // TLB entry back so that it can be figured out as empty and 1556 // handled accordingly. 1557 if (success) { 1558 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1559 newEntry.pageStart()); 1560 1561 sender_state->tlbEntry = new GpuTlbEntry(0, 1562 newEntry.vaddr, 1563 newEntry.paddr, 1564 success); 1565 } else { 1566 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1567 alignedVaddr); 1568 1569 sender_state->tlbEntry = new GpuTlbEntry(); 1570 1571 return; 1572 } 1573 } 1574 } 1575 } else { 1576 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1577 tlb->lookup(pkt->req->getVaddr())); 1578 1579 GpuTlbEntry entry = tlb->lookup(pkt->req->getVaddr(), 1580* update_stats); 1581 1582 assert(entry); 1583 1584 sender_state->tlbEntry = 1585 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1586 } 1587 // This is the function that would populate pkt->req with the paddr of 1588 // the translation. But if no translation happens (i.e Prefetch fails) 1589 // then the early returns in the above code wiill keep this function 1590 // from executing. 1591 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1592 } 1593 1594 void 1595 GpuTLB::CpuSidePort::recvReqRetry() 1596 { 1597 // The CPUSidePort never sends anything but replies. No retries 1598 // expected. 1599 assert(false); 1600 } 1601 1602 AddrRangeList 1603 GpuTLB::CpuSidePort::getAddrRanges() const 1604 { 1605 // currently not checked by the master 1606 AddrRangeList ranges; 1607 1608 return ranges; 1609 } 1610 1611 /** 1612 * MemSidePort receives the packet back. 1613 * We need to call the handleTranslationReturn 1614 * and propagate up the hierarchy. 1615 / 1616* bool 1617 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1618 { 1619 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1620 TheISA::PageBytes); 1621 1622 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1623 virt_page_addr); 1624 1625 TLBEvent tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1626* assert(tlb_event); 1627 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1628 1629 tlb_event->updateOutcome(MISS_RETURN); 1630 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1631 1632 return true; 1633 } 1634 1635 void 1636 GpuTLB::MemSidePort::recvReqRetry() 1637 { 1638 // No retries should reach the TLB. The retries 1639 // should only reach the TLBCoalescer. 1640 assert(false); 1641 } 1642 1643 void 1644 GpuTLB::cleanup() 1645 { 1646 while (!cleanupQueue.empty()) { 1647 Addr cleanup_addr = cleanupQueue.front(); 1648 cleanupQueue.pop(); 1649 1650 // delete TLBEvent 1651 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1652 delete old_tlb_event; 1653 translationReturnEvent.erase(cleanup_addr); 1654 1655 // update number of outstanding requests 1656 outstandingReqs--; 1657 } 1658 1659 /** the higher level coalescer should retry if it has 1660 * any pending requests. 1661 / 1662* for (int i = 0; i < cpuSidePort.size(); ++i) { 1663 cpuSidePort[i]->sendRetryReq(); 1664 } 1665 } 1666 1667 void 1668 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1669 { 1670 1671 std::pair<AccessPatternTable::iterator, bool> ret; 1672 1673 AccessInfo tmp_access_info; 1674 tmp_access_info.lastTimeAccessed = 0; 1675 tmp_access_info.accessesPerPage = 0; 1676 tmp_access_info.totalReuseDistance = 0; 1677 tmp_access_info.sumDistance = 0; 1678 tmp_access_info.meanDistance = 0; 1679 1680 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1681 tmp_access_info)); 1682 1683 bool first_page_access = ret.second; 1684 1685 if (first_page_access) { 1686 numUniquePages++; 1687 } else { 1688 int accessed_before; 1689 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1690 ret.first->second.totalReuseDistance += accessed_before; 1691 } 1692 1693 ret.first->second.accessesPerPage++; 1694 ret.first->second.lastTimeAccessed = curTick(); 1695 1696 if (accessDistance) { 1697 ret.first->second.localTLBAccesses 1698 .push_back(localNumTLBAccesses.value()); 1699 } 1700 } 1701 1702 void 1703 GpuTLB::exitCallback() 1704 { 1705 std::ostream page_stat_file = nullptr; 1706* 1707 if (accessDistance) { 1708 1709 // print per page statistics to a separate file (.csv format) 1710 // simout is the gem5 output directory (default is m5out or the one 1711 // specified with -d 1712 page_stat_file = simout.create(name().c_str())->stream(); 1713 1714 // print header 1715 page_stat_file << "page,max_access_distance,mean_access_distance, " 1716* << "stddev_distance" << std::endl; 1717 } 1718 1719 // update avg. reuse distance footprint 1720 AccessPatternTable::iterator iter, iter_begin, iter_end; 1721 unsigned int sum_avg_reuse_distance_per_page = 0; 1722 1723 // iterate through all pages seen by this TLB 1724 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1725 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1726 iter->second.accessesPerPage; 1727 1728 if (accessDistance) { 1729 unsigned int tmp = iter->second.localTLBAccesses[0]; 1730 unsigned int prev = tmp; 1731 1732 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1733 if (i) { 1734 tmp = prev + 1; 1735 } 1736 1737 prev = iter->second.localTLBAccesses[i]; 1738 // update the localTLBAccesses value 1739 // with the actual differece 1740 iter->second.localTLBAccesses[i] -= tmp; 1741 // compute the sum of AccessDistance per page 1742 // used later for mean 1743 iter->second.sumDistance += 1744 iter->second.localTLBAccesses[i]; 1745 } 1746 1747 iter->second.meanDistance = 1748 iter->second.sumDistance / iter->second.accessesPerPage; 1749 1750 // compute std_dev and max (we need a second round because we 1751 // need to know the mean value 1752 unsigned int max_distance = 0; 1753 unsigned int stddev_distance = 0; 1754 1755 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1756 unsigned int tmp_access_distance = 1757 iter->second.localTLBAccesses[i]; 1758 1759 if (tmp_access_distance > max_distance) { 1760 max_distance = tmp_access_distance; 1761 } 1762 1763 unsigned int diff = 1764 tmp_access_distance - iter->second.meanDistance; 1765 stddev_distance += pow(diff, 2); 1766 1767 } 1768 1769 stddev_distance = 1770 sqrt(stddev_distance/iter->second.accessesPerPage); 1771 1772 if (page_stat_file) { 1773 page_stat_file << std::hex << iter->first << ","; 1774* page_stat_file << std::dec << max_distance << ","; 1775* page_stat_file << std::dec << iter->second.meanDistance 1776* << ","; 1777 page_stat_file << std::dec << stddev_distance; 1778* page_stat_file << std::endl; 1779* } 1780 1781 // erase the localTLBAccesses array 1782 iter->second.localTLBAccesses.clear(); 1783 } 1784 } 1785 1786 if (!TLBFootprint.empty()) { 1787 avgReuseDistance = 1788 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1789 } 1790 1791 //clear the TLBFootprint map 1792 TLBFootprint.clear(); 1793 } 1794} // namespace X86ISA 1795 1796X86ISA::GpuTLB* 1797X86GPUTLBParams::create() 1798{ 1799 return new X86ISA::GpuTLB(this); 1800} 1801	963 localNumTLBAccesses 964 .name(name() + ".local_TLB_accesses") 965 .desc("Number of TLB accesses") 966 ; 967 968 localNumTLBHits 969 .name(name() + ".local_TLB_hits") 970 .desc("Number of TLB hits") 971 ; 972 973 localNumTLBMisses 974 .name(name() + ".local_TLB_misses") 975 .desc("Number of TLB misses") 976 ; 977 978 localTLBMissRate 979 .name(name() + ".local_TLB_miss_rate") 980 .desc("TLB miss rate") 981 ; 982 983 accessCycles 984 .name(name() + ".access_cycles") 985 .desc("Cycles spent accessing this TLB level") 986 ; 987 988 pageTableCycles 989 .name(name() + ".page_table_cycles") 990 .desc("Cycles spent accessing the page table") 991 ; 992 993 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 994 995 numUniquePages 996 .name(name() + ".unique_pages") 997 .desc("Number of unique pages touched") 998 ; 999 1000 localCycles 1001 .name(name() + ".local_cycles") 1002 .desc("Number of cycles spent in queue for all incoming reqs") 1003 ; 1004 1005 localLatency 1006 .name(name() + ".local_latency") 1007 .desc("Avg. latency over incoming coalesced reqs") 1008 ; 1009 1010 localLatency = localCycles / localNumTLBAccesses; 1011 1012 globalNumTLBAccesses 1013 .name(name() + ".global_TLB_accesses") 1014 .desc("Number of TLB accesses") 1015 ; 1016 1017 globalNumTLBHits 1018 .name(name() + ".global_TLB_hits") 1019 .desc("Number of TLB hits") 1020 ; 1021 1022 globalNumTLBMisses 1023 .name(name() + ".global_TLB_misses") 1024 .desc("Number of TLB misses") 1025 ; 1026 1027 globalTLBMissRate 1028 .name(name() + ".global_TLB_miss_rate") 1029 .desc("TLB miss rate") 1030 ; 1031 1032 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1033 1034 avgReuseDistance 1035 .name(name() + ".avg_reuse_distance") 1036 .desc("avg. reuse distance over all pages (in ticks)") 1037 ; 1038 1039 } 1040 1041 /** 1042 * Do the TLB lookup for this coalesced request and schedule 1043 * another event <TLB access latency> cycles later. 1044 / 1045* 1046 void 1047 GpuTLB::issueTLBLookup(PacketPtr pkt) 1048 { 1049 assert(pkt); 1050 assert(pkt->senderState); 1051 1052 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1053 TheISA::PageBytes); 1054 1055 TranslationState sender_state = 1056* safe_cast<TranslationState>(pkt->senderState); 1057* 1058 bool update_stats = !sender_state->prefetch; 1059 ThreadContext * tmp_tc = sender_state->tc; 1060 1061 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1062 virt_page_addr); 1063 1064 int req_cnt = sender_state->reqCnt.back(); 1065 1066 if (update_stats) { 1067 accessCycles -= (curTick() * req_cnt); 1068 localCycles -= curTick(); 1069 updatePageFootprint(virt_page_addr); 1070 globalNumTLBAccesses += req_cnt; 1071 } 1072 1073 tlbOutcome lookup_outcome = TLB_MISS; 1074 RequestPtr tmp_req = pkt->req; 1075 1076 // Access the TLB and figure out if it's a hit or a miss. 1077 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1078 1079 if (success) { 1080 lookup_outcome = TLB_HIT; 1081 // Put the entry in SenderState 1082 GpuTlbEntry entry = lookup(tmp_req->getVaddr(), false); 1083* assert(entry); 1084 1085 sender_state->tlbEntry = 1086 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1087 1088 if (update_stats) { 1089 // the reqCnt has an entry per level, so its size tells us 1090 // which level we are in 1091 sender_state->hitLevel = sender_state->reqCnt.size(); 1092 globalNumTLBHits += req_cnt; 1093 } 1094 } else { 1095 if (update_stats) 1096 globalNumTLBMisses += req_cnt; 1097 } 1098 1099 /* 1100 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1101 * as the TLB access latency. 1102 * 1103 * We create and schedule a new TLBEvent which will help us take the 1104 * appropriate actions (e.g., update TLB on a hit, send request to lower 1105 * level TLB on a miss, or start a page walk if this was the last-level 1106 * TLB) 1107 / 1108* TLBEvent tlb_event = 1109* new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1110 1111 if (translationReturnEvent.count(virt_page_addr)) { 1112 panic("Virtual Page Address %#x already has a return event\n", 1113 virt_page_addr); 1114 } 1115 1116 translationReturnEvent[virt_page_addr] = tlb_event; 1117 assert(tlb_event); 1118 1119 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1120 curTick() + this->ticks(hitLatency)); 1121 1122 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1123 } 1124 1125 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1126 PacketPtr _pkt) 1127 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1128 outcome(tlb_outcome), pkt(_pkt) 1129 { 1130 } 1131 1132 /** 1133 * Do Paging protection checks. If we encounter a page fault, then 1134 * an assertion is fired. 1135 / 1136* void 1137 GpuTLB::pagingProtectionChecks(ThreadContext tc, PacketPtr pkt, 1138* GpuTlbEntry * tlb_entry, Mode mode) 1139 { 1140 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1141 uint32_t flags = pkt->req->getFlags(); 1142 bool storeCheck = flags & (StoreCheck << FlagShift); 1143 1144 // Do paging protection checks. 1145 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1146 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1147 1148 bool badWrite = (!tlb_entry->writable && (inUser \|\| cr0.wp)); 1149 1150 if ((inUser && !tlb_entry->user) \|\| 1151 (mode == BaseTLB::Write && badWrite)) { 1152 // The page must have been present to get into the TLB in 1153 // the first place. We'll assume the reserved bits are 1154 // fine even though we're not checking them. 1155 assert(false); 1156 } 1157 1158 if (storeCheck && badWrite) { 1159 // This would fault if this were a write, so return a page 1160 // fault that reflects that happening. 1161 assert(false); 1162 } 1163 } 1164 1165 /** 1166 * handleTranslationReturn is called on a TLB hit, 1167 * when a TLB miss returns or when a page fault returns. 1168 * The latter calls handelHit with TLB miss as tlbOutcome. 1169 / 1170* void 1171 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1172 PacketPtr pkt) 1173 { 1174 1175 assert(pkt); 1176 Addr vaddr = pkt->req->getVaddr(); 1177 1178 TranslationState sender_state = 1179* safe_cast<TranslationState>(pkt->senderState); 1180* 1181 ThreadContext tc = sender_state->tc; 1182* Mode mode = sender_state->tlbMode; 1183 1184 GpuTlbEntry local_entry, new_entry; 1185 1186 if (tlb_outcome == TLB_HIT) { 1187 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1188 local_entry = sender_state->tlbEntry; 1189 } else { 1190 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1191 vaddr); 1192 1193 // We are returning either from a page walk or from a hit at a lower 1194 // TLB level. The senderState should be "carrying" a pointer to the 1195 // correct TLBEntry. 1196 new_entry = sender_state->tlbEntry; 1197 assert(new_entry); 1198 local_entry = new_entry; 1199 1200 if (allocationPolicy) { 1201 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1202 virt_page_addr); 1203 1204 local_entry = insert(virt_page_addr, new_entry); 1205* } 1206 1207 assert(local_entry); 1208 } 1209 1210 /** 1211 * At this point the packet carries an up-to-date tlbEntry pointer 1212 * in its senderState. 1213 * Next step is to do the paging protection checks. 1214 / 1215* DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1216 "while paddr was %#x.\n", local_entry->vaddr, 1217 local_entry->paddr); 1218 1219 pagingProtectionChecks(tc, pkt, local_entry, mode); 1220 int page_size = local_entry->size(); 1221 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1222 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1223 1224 // Since this packet will be sent through the cpu side slave port, 1225 // it must be converted to a response pkt if it is not one already 1226 if (pkt->isRequest()) { 1227 pkt->makeTimingResponse(); 1228 } 1229 1230 pkt->req->setPaddr(paddr); 1231 1232 if (local_entry->uncacheable) { 1233 pkt->req->setFlags(Request::UNCACHEABLE); 1234 } 1235 1236 //send packet back to coalescer 1237 cpuSidePort[0]->sendTimingResp(pkt); 1238 //schedule cleanup event 1239 cleanupQueue.push(virt_page_addr); 1240 1241 // schedule this only once per cycle. 1242 // The check is required because we might have multiple translations 1243 // returning the same cycle 1244 // this is a maximum priority event and must be on the same cycle 1245 // as the cleanup event in TLBCoalescer to avoid a race with 1246 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1247 if (!cleanupEvent.scheduled()) 1248 schedule(cleanupEvent, curTick()); 1249 } 1250 1251 /** 1252 * Here we take the appropriate actions based on the result of the 1253 * TLB lookup. 1254 / 1255* void 1256 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1257 PacketPtr pkt) 1258 { 1259 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1260 1261 assert(translationReturnEvent[virtPageAddr]); 1262 assert(pkt); 1263 1264 TranslationState tmp_sender_state = 1265* safe_cast<TranslationState>(pkt->senderState); 1266* 1267 int req_cnt = tmp_sender_state->reqCnt.back(); 1268 bool update_stats = !tmp_sender_state->prefetch; 1269 1270 1271 if (outcome == TLB_HIT) { 1272 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1273 1274 if (update_stats) { 1275 accessCycles += (req_cnt * curTick()); 1276 localCycles += curTick(); 1277 } 1278 1279 } else if (outcome == TLB_MISS) { 1280 1281 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1282 if (update_stats) { 1283 accessCycles += (req_cntcurTick()); 1284* localCycles += curTick(); 1285 } 1286 1287 if (hasMemSidePort) { 1288 // the one cyle added here represent the delay from when we get 1289 // the reply back till when we propagate it to the coalescer 1290 // above. 1291 if (update_stats) { 1292 accessCycles += (req_cnt * 1); 1293 localCycles += 1; 1294 } 1295 1296 /** 1297 * There is a TLB below. Send the coalesced request. 1298 * We actually send the very first packet of all the 1299 * pending packets for this virtual page address. 1300 / 1301* if (!memSidePort[0]->sendTimingReq(pkt)) { 1302 DPRINTF(GPUTLB, "Failed sending translation request to " 1303 "lower level TLB for addr %#x\n", virtPageAddr); 1304 1305 memSidePort[0]->retries.push_back(pkt); 1306 } else { 1307 DPRINTF(GPUTLB, "Sent translation request to lower level " 1308 "TLB for addr %#x\n", virtPageAddr); 1309 } 1310 } else { 1311 //this is the last level TLB. Start a page walk 1312 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1313 "addr %#x\n", virtPageAddr); 1314 1315 if (update_stats) 1316 pageTableCycles -= (req_cntcurTick()); 1317* 1318 TLBEvent tlb_event = translationReturnEvent[virtPageAddr]; 1319* assert(tlb_event); 1320 tlb_event->updateOutcome(PAGE_WALK); 1321 schedule(tlb_event, curTick() + ticks(missLatency2)); 1322 } 1323 } else if (outcome == PAGE_WALK) { 1324 if (update_stats) 1325 pageTableCycles += (req_cntcurTick()); 1326* 1327 // Need to access the page table and update the TLB 1328 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1329 virtPageAddr); 1330 1331 TranslationState sender_state = 1332* safe_cast<TranslationState>(pkt->senderState); 1333* 1334 Process p = sender_state->tc->getProcessPtr(); 1335* TlbEntry newEntry; 1336 Addr vaddr = pkt->req->getVaddr(); 1337 #ifndef NDEBUG 1338 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1339 assert(alignedVaddr == virtPageAddr); 1340 #endif 1341 bool success; 1342 success = p->pTable->lookup(vaddr, newEntry); 1343 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1344 if (p->fixupStackFault(vaddr)) { 1345 success = p->pTable->lookup(vaddr, newEntry); 1346 } 1347 } 1348 1349 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1350 newEntry.pageStart()); 1351 1352 sender_state->tlbEntry = 1353 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success); 1354 1355 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1356 } else if (outcome == MISS_RETURN) { 1357 /** we add an extra cycle in the return path of the translation 1358 * requests in between the various TLB levels. 1359 / 1360* handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1361 } else { 1362 assert(false); 1363 } 1364 } 1365 1366 void 1367 GpuTLB::TLBEvent::process() 1368 { 1369 tlb->translationReturn(virtPageAddr, outcome, pkt); 1370 } 1371 1372 const char* 1373 GpuTLB::TLBEvent::description() const 1374 { 1375 return "trigger translationDoneEvent"; 1376 } 1377 1378 void 1379 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1380 { 1381 outcome = _outcome; 1382 } 1383 1384 Addr 1385 GpuTLB::TLBEvent::getTLBEventVaddr() 1386 { 1387 return virtPageAddr; 1388 } 1389 1390 /* 1391 * recvTiming receives a coalesced timing request from a TLBCoalescer 1392 * and it calls issueTLBLookup() 1393 * It only rejects the packet if we have exceeded the max 1394 * outstanding number of requests for the TLB 1395 / 1396* bool 1397 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1398 { 1399 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1400 tlb->issueTLBLookup(pkt); 1401 // update number of outstanding translation requests 1402 tlb->outstandingReqs++; 1403 return true; 1404 } else { 1405 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1406 tlb->outstandingReqs); 1407 return false; 1408 } 1409 } 1410 1411 /** 1412 * handleFuncTranslationReturn is called on a TLB hit, 1413 * when a TLB miss returns or when a page fault returns. 1414 * It updates LRU, inserts the TLB entry on a miss 1415 * depending on the allocation policy and does the required 1416 * protection checks. It does NOT create a new packet to 1417 * update the packet's addr; this is done in hsail-gpu code. 1418 / 1419* void 1420 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1421 { 1422 TranslationState sender_state = 1423* safe_cast<TranslationState>(pkt->senderState); 1424* 1425 ThreadContext tc = sender_state->tc; 1426* Mode mode = sender_state->tlbMode; 1427 Addr vaddr = pkt->req->getVaddr(); 1428 1429 GpuTlbEntry local_entry, new_entry; 1430 1431 if (tlb_outcome == TLB_HIT) { 1432 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1433 "%#x\n", vaddr); 1434 1435 local_entry = sender_state->tlbEntry; 1436 } else { 1437 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1438 "%#x\n", vaddr); 1439 1440 // We are returning either from a page walk or from a hit at a lower 1441 // TLB level. The senderState should be "carrying" a pointer to the 1442 // correct TLBEntry. 1443 new_entry = sender_state->tlbEntry; 1444 assert(new_entry); 1445 local_entry = new_entry; 1446 1447 if (allocationPolicy) { 1448 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1449 1450 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1451 virt_page_addr); 1452 1453 local_entry = insert(virt_page_addr, new_entry); 1454* } 1455 1456 assert(local_entry); 1457 } 1458 1459 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1460 "while paddr was %#x.\n", local_entry->vaddr, 1461 local_entry->paddr); 1462 1463 // Do paging checks if it's a normal functional access. If it's for a 1464 // prefetch, then sometimes you can try to prefetch something that won't 1465 // pass protection. We don't actually want to fault becuase there is no 1466 // demand access to deem this a violation. Just put it in the TLB and 1467 // it will fault if indeed a future demand access touches it in 1468 // violation. 1469 if (!sender_state->prefetch && sender_state->tlbEntry->valid) 1470 pagingProtectionChecks(tc, pkt, local_entry, mode); 1471 1472 int page_size = local_entry->size(); 1473 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1474 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1475 1476 pkt->req->setPaddr(paddr); 1477 1478 if (local_entry->uncacheable) 1479 pkt->req->setFlags(Request::UNCACHEABLE); 1480 } 1481 1482 // This is used for atomic translations. Need to 1483 // make it all happen during the same cycle. 1484 void 1485 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1486 { 1487 TranslationState sender_state = 1488* safe_cast<TranslationState>(pkt->senderState); 1489* 1490 ThreadContext tc = sender_state->tc; 1491* bool update_stats = !sender_state->prefetch; 1492 1493 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1494 TheISA::PageBytes); 1495 1496 if (update_stats) 1497 tlb->updatePageFootprint(virt_page_addr); 1498 1499 // do the TLB lookup without updating the stats 1500 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1501 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1502 1503 // functional mode means no coalescing 1504 // global metrics are the same as the local metrics 1505 if (update_stats) { 1506 tlb->globalNumTLBAccesses++; 1507 1508 if (success) { 1509 sender_state->hitLevel = sender_state->reqCnt.size(); 1510 tlb->globalNumTLBHits++; 1511 } 1512 } 1513 1514 if (!success) { 1515 if (update_stats) 1516 tlb->globalNumTLBMisses++; 1517 if (tlb->hasMemSidePort) { 1518 // there is a TLB below -> propagate down the TLB hierarchy 1519 tlb->memSidePort[0]->sendFunctional(pkt); 1520 // If no valid translation from a prefetch, then just return 1521 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1522 return; 1523 } else { 1524 // Need to access the page table and update the TLB 1525 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1526 virt_page_addr); 1527 1528 Process p = tc->getProcessPtr(); 1529* TlbEntry newEntry; 1530 1531 Addr vaddr = pkt->req->getVaddr(); 1532 #ifndef NDEBUG 1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1534 assert(alignedVaddr == virt_page_addr); 1535 #endif 1536 1537 bool success = p->pTable->lookup(vaddr, newEntry); 1538 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1539 if (p->fixupStackFault(vaddr)) 1540 success = p->pTable->lookup(vaddr, newEntry); 1541 } 1542 1543 if (!sender_state->prefetch) { 1544 // no PageFaults are permitted after 1545 // the second page table lookup 1546 assert(success); 1547 1548 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1549 newEntry.pageStart()); 1550 1551 sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr, 1552 newEntry.paddr, 1553 success); 1554 } else { 1555 // If this was a prefetch, then do the normal thing if it 1556 // was a successful translation. Otherwise, send an empty 1557 // TLB entry back so that it can be figured out as empty and 1558 // handled accordingly. 1559 if (success) { 1560 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1561 newEntry.pageStart()); 1562 1563 sender_state->tlbEntry = new GpuTlbEntry(0, 1564 newEntry.vaddr, 1565 newEntry.paddr, 1566 success); 1567 } else { 1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1569 alignedVaddr); 1570 1571 sender_state->tlbEntry = new GpuTlbEntry(); 1572 1573 return; 1574 } 1575 } 1576 } 1577 } else { 1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1579 tlb->lookup(pkt->req->getVaddr())); 1580 1581 GpuTlbEntry entry = tlb->lookup(pkt->req->getVaddr(), 1582* update_stats); 1583 1584 assert(entry); 1585 1586 sender_state->tlbEntry = 1587 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1588 } 1589 // This is the function that would populate pkt->req with the paddr of 1590 // the translation. But if no translation happens (i.e Prefetch fails) 1591 // then the early returns in the above code wiill keep this function 1592 // from executing. 1593 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1594 } 1595 1596 void 1597 GpuTLB::CpuSidePort::recvReqRetry() 1598 { 1599 // The CPUSidePort never sends anything but replies. No retries 1600 // expected. 1601 assert(false); 1602 } 1603 1604 AddrRangeList 1605 GpuTLB::CpuSidePort::getAddrRanges() const 1606 { 1607 // currently not checked by the master 1608 AddrRangeList ranges; 1609 1610 return ranges; 1611 } 1612 1613 /** 1614 * MemSidePort receives the packet back. 1615 * We need to call the handleTranslationReturn 1616 * and propagate up the hierarchy. 1617 / 1618* bool 1619 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1620 { 1621 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1622 TheISA::PageBytes); 1623 1624 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1625 virt_page_addr); 1626 1627 TLBEvent tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1628* assert(tlb_event); 1629 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1630 1631 tlb_event->updateOutcome(MISS_RETURN); 1632 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1633 1634 return true; 1635 } 1636 1637 void 1638 GpuTLB::MemSidePort::recvReqRetry() 1639 { 1640 // No retries should reach the TLB. The retries 1641 // should only reach the TLBCoalescer. 1642 assert(false); 1643 } 1644 1645 void 1646 GpuTLB::cleanup() 1647 { 1648 while (!cleanupQueue.empty()) { 1649 Addr cleanup_addr = cleanupQueue.front(); 1650 cleanupQueue.pop(); 1651 1652 // delete TLBEvent 1653 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1654 delete old_tlb_event; 1655 translationReturnEvent.erase(cleanup_addr); 1656 1657 // update number of outstanding requests 1658 outstandingReqs--; 1659 } 1660 1661 /** the higher level coalescer should retry if it has 1662 * any pending requests. 1663 / 1664* for (int i = 0; i < cpuSidePort.size(); ++i) { 1665 cpuSidePort[i]->sendRetryReq(); 1666 } 1667 } 1668 1669 void 1670 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1671 { 1672 1673 std::pair<AccessPatternTable::iterator, bool> ret; 1674 1675 AccessInfo tmp_access_info; 1676 tmp_access_info.lastTimeAccessed = 0; 1677 tmp_access_info.accessesPerPage = 0; 1678 tmp_access_info.totalReuseDistance = 0; 1679 tmp_access_info.sumDistance = 0; 1680 tmp_access_info.meanDistance = 0; 1681 1682 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1683 tmp_access_info)); 1684 1685 bool first_page_access = ret.second; 1686 1687 if (first_page_access) { 1688 numUniquePages++; 1689 } else { 1690 int accessed_before; 1691 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1692 ret.first->second.totalReuseDistance += accessed_before; 1693 } 1694 1695 ret.first->second.accessesPerPage++; 1696 ret.first->second.lastTimeAccessed = curTick(); 1697 1698 if (accessDistance) { 1699 ret.first->second.localTLBAccesses 1700 .push_back(localNumTLBAccesses.value()); 1701 } 1702 } 1703 1704 void 1705 GpuTLB::exitCallback() 1706 { 1707 std::ostream page_stat_file = nullptr; 1708* 1709 if (accessDistance) { 1710 1711 // print per page statistics to a separate file (.csv format) 1712 // simout is the gem5 output directory (default is m5out or the one 1713 // specified with -d 1714 page_stat_file = simout.create(name().c_str())->stream(); 1715 1716 // print header 1717 page_stat_file << "page,max_access_distance,mean_access_distance, " 1718* << "stddev_distance" << std::endl; 1719 } 1720 1721 // update avg. reuse distance footprint 1722 AccessPatternTable::iterator iter, iter_begin, iter_end; 1723 unsigned int sum_avg_reuse_distance_per_page = 0; 1724 1725 // iterate through all pages seen by this TLB 1726 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1727 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1728 iter->second.accessesPerPage; 1729 1730 if (accessDistance) { 1731 unsigned int tmp = iter->second.localTLBAccesses[0]; 1732 unsigned int prev = tmp; 1733 1734 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1735 if (i) { 1736 tmp = prev + 1; 1737 } 1738 1739 prev = iter->second.localTLBAccesses[i]; 1740 // update the localTLBAccesses value 1741 // with the actual differece 1742 iter->second.localTLBAccesses[i] -= tmp; 1743 // compute the sum of AccessDistance per page 1744 // used later for mean 1745 iter->second.sumDistance += 1746 iter->second.localTLBAccesses[i]; 1747 } 1748 1749 iter->second.meanDistance = 1750 iter->second.sumDistance / iter->second.accessesPerPage; 1751 1752 // compute std_dev and max (we need a second round because we 1753 // need to know the mean value 1754 unsigned int max_distance = 0; 1755 unsigned int stddev_distance = 0; 1756 1757 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1758 unsigned int tmp_access_distance = 1759 iter->second.localTLBAccesses[i]; 1760 1761 if (tmp_access_distance > max_distance) { 1762 max_distance = tmp_access_distance; 1763 } 1764 1765 unsigned int diff = 1766 tmp_access_distance - iter->second.meanDistance; 1767 stddev_distance += pow(diff, 2); 1768 1769 } 1770 1771 stddev_distance = 1772 sqrt(stddev_distance/iter->second.accessesPerPage); 1773 1774 if (page_stat_file) { 1775 page_stat_file << std::hex << iter->first << ","; 1776* page_stat_file << std::dec << max_distance << ","; 1777* page_stat_file << std::dec << iter->second.meanDistance 1778* << ","; 1779 page_stat_file << std::dec << stddev_distance; 1780* page_stat_file << std::endl; 1781* } 1782 1783 // erase the localTLBAccesses array 1784 iter->second.localTLBAccesses.clear(); 1785 } 1786 } 1787 1788 if (!TLBFootprint.empty()) { 1789 avgReuseDistance = 1790 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1791 } 1792 1793 //clear the TLBFootprint map 1794 TLBFootprint.clear(); 1795 } 1796} // namespace X86ISA 1797 1798X86ISA::GpuTLB* 1799X86GPUTLBParams::create() 1800{ 1801 return new X86ISA::GpuTLB(this); 1802} 1803