gpu_tlb.cc revision 13892
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/logging.hh" 49#include "base/output.hh" 50#include "base/trace.hh" 51#include "cpu/base.hh" 52#include "cpu/thread_context.hh" 53#include "debug/GPUPrefetch.hh" 54#include "debug/GPUTLB.hh" 55#include "mem/packet_access.hh" 56#include "mem/page_table.hh" 57#include "mem/request.hh" 58#include "sim/process.hh" 59 60namespace X86ISA 61{ 62 63 GpuTLB::GpuTLB(const Params *p) 64 : ClockedObject(p), configAddress(0), size(p->size), 65 cleanupEvent([this]{ cleanup(); }, name(), false, 66 Event::Maximum_Pri), 67 exitEvent([this]{ exitCallback(); }, name()) 68 { 69 assoc = p->assoc; 70 assert(assoc <= size); 71 numSets = size/assoc; 72 allocationPolicy = p->allocationPolicy; 73 hasMemSidePort = false; 74 accessDistance = p->accessDistance; 75 clock = p->clk_domain->clockPeriod(); 76 77 tlb.assign(size, TlbEntry()); 78 79 freeList.resize(numSets); 80 entryList.resize(numSets); 81 82 for (int set = 0; set < numSets; ++set) { 83 for (int way = 0; way < assoc; ++way) { 84 int x = set * assoc + way; 85 freeList[set].push_back(&tlb.at(x)); 86 } 87 } 88 89 FA = (size == assoc); 90 91 /** 92 * @warning: the set-associative version assumes you have a 93 * fixed page size of 4KB. 94 * If the page size is greather than 4KB (as defined in the 95 * TheISA::PageBytes), then there are various issues w/ the current 96 * implementation (you'd have the same 8KB page being replicated in 97 * different sets etc) 98 */ 99 setMask = numSets - 1; 100 101 #if 0 102 // GpuTLB doesn't yet support full system 103 walker = p->walker; 104 walker->setTLB(this); 105 #endif 106 107 maxCoalescedReqs = p->maxOutstandingReqs; 108 109 // Do not allow maxCoalescedReqs to be more than the TLB associativity 110 if (maxCoalescedReqs > assoc) { 111 maxCoalescedReqs = assoc; 112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 113 } 114 115 outstandingReqs = 0; 116 hitLatency = p->hitLatency; 117 missLatency1 = p->missLatency1; 118 missLatency2 = p->missLatency2; 119 120 // create the slave ports based on the number of connected ports 121 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 122 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 123 name(), i), this, i)); 124 } 125 126 // create the master ports based on the number of connected ports 127 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 128 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 129 name(), i), this, i)); 130 } 131 } 132 133 // fixme: this is never called? 134 GpuTLB::~GpuTLB() 135 { 136 // make sure all the hash-maps are empty 137 assert(translationReturnEvent.empty()); 138 } 139 140 Port & 141 GpuTLB::getPort(const std::string &if_name, PortID idx) 142 { 143 if (if_name == "slave") { 144 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 145 panic("TLBCoalescer::getPort: unknown index %d\n", idx); 146 } 147 148 return *cpuSidePort[idx]; 149 } else if (if_name == "master") { 150 if (idx >= static_cast<PortID>(memSidePort.size())) { 151 panic("TLBCoalescer::getPort: unknown index %d\n", idx); 152 } 153 154 hasMemSidePort = true; 155 156 return *memSidePort[idx]; 157 } else { 158 panic("TLBCoalescer::getPort: unknown port %s\n", if_name); 159 } 160 } 161 162 TlbEntry* 163 GpuTLB::insert(Addr vpn, TlbEntry &entry) 164 { 165 TlbEntry *newEntry = nullptr; 166 167 /** 168 * vpn holds the virtual page address 169 * The least significant bits are simply masked 170 */ 171 int set = (vpn >> TheISA::PageShift) & setMask; 172 173 if (!freeList[set].empty()) { 174 newEntry = freeList[set].front(); 175 freeList[set].pop_front(); 176 } else { 177 newEntry = entryList[set].back(); 178 entryList[set].pop_back(); 179 } 180 181 *newEntry = entry; 182 newEntry->vaddr = vpn; 183 entryList[set].push_front(newEntry); 184 185 return newEntry; 186 } 187 188 GpuTLB::EntryList::iterator 189 GpuTLB::lookupIt(Addr va, bool update_lru) 190 { 191 int set = (va >> TheISA::PageShift) & setMask; 192 193 if (FA) { 194 assert(!set); 195 } 196 197 auto entry = entryList[set].begin(); 198 for (; entry != entryList[set].end(); ++entry) { 199 int page_size = (*entry)->size(); 200 201 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { 202 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 203 "with size %#x.\n", va, (*entry)->vaddr, page_size); 204 205 if (update_lru) { 206 entryList[set].push_front(*entry); 207 entryList[set].erase(entry); 208 entry = entryList[set].begin(); 209 } 210 211 break; 212 } 213 } 214 215 return entry; 216 } 217 218 TlbEntry* 219 GpuTLB::lookup(Addr va, bool update_lru) 220 { 221 int set = (va >> TheISA::PageShift) & setMask; 222 223 auto entry = lookupIt(va, update_lru); 224 225 if (entry == entryList[set].end()) 226 return nullptr; 227 else 228 return *entry; 229 } 230 231 void 232 GpuTLB::invalidateAll() 233 { 234 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 235 236 for (int i = 0; i < numSets; ++i) { 237 while (!entryList[i].empty()) { 238 TlbEntry *entry = entryList[i].front(); 239 entryList[i].pop_front(); 240 freeList[i].push_back(entry); 241 } 242 } 243 } 244 245 void 246 GpuTLB::setConfigAddress(uint32_t addr) 247 { 248 configAddress = addr; 249 } 250 251 void 252 GpuTLB::invalidateNonGlobal() 253 { 254 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 255 256 for (int i = 0; i < numSets; ++i) { 257 for (auto entryIt = entryList[i].begin(); 258 entryIt != entryList[i].end();) { 259 if (!(*entryIt)->global) { 260 freeList[i].push_back(*entryIt); 261 entryList[i].erase(entryIt++); 262 } else { 263 ++entryIt; 264 } 265 } 266 } 267 } 268 269 void 270 GpuTLB::demapPage(Addr va, uint64_t asn) 271 { 272 273 int set = (va >> TheISA::PageShift) & setMask; 274 auto entry = lookupIt(va, false); 275 276 if (entry != entryList[set].end()) { 277 freeList[set].push_back(*entry); 278 entryList[set].erase(entry); 279 } 280 } 281 282 Fault 283 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc) 284 { 285 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 286 Addr vaddr = req->getVaddr(); 287 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 288 289 if (prefix == IntAddrPrefixCPUID) { 290 panic("CPUID memory space not yet implemented!\n"); 291 } else if (prefix == IntAddrPrefixMSR) { 292 vaddr = vaddr >> 3; 293 req->setFlags(Request::MMAPPED_IPR); 294 Addr regNum = 0; 295 296 switch (vaddr & ~IntAddrPrefixMask) { 297 case 0x10: 298 regNum = MISCREG_TSC; 299 break; 300 case 0x1B: 301 regNum = MISCREG_APIC_BASE; 302 break; 303 case 0xFE: 304 regNum = MISCREG_MTRRCAP; 305 break; 306 case 0x174: 307 regNum = MISCREG_SYSENTER_CS; 308 break; 309 case 0x175: 310 regNum = MISCREG_SYSENTER_ESP; 311 break; 312 case 0x176: 313 regNum = MISCREG_SYSENTER_EIP; 314 break; 315 case 0x179: 316 regNum = MISCREG_MCG_CAP; 317 break; 318 case 0x17A: 319 regNum = MISCREG_MCG_STATUS; 320 break; 321 case 0x17B: 322 regNum = MISCREG_MCG_CTL; 323 break; 324 case 0x1D9: 325 regNum = MISCREG_DEBUG_CTL_MSR; 326 break; 327 case 0x1DB: 328 regNum = MISCREG_LAST_BRANCH_FROM_IP; 329 break; 330 case 0x1DC: 331 regNum = MISCREG_LAST_BRANCH_TO_IP; 332 break; 333 case 0x1DD: 334 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 335 break; 336 case 0x1DE: 337 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 338 break; 339 case 0x200: 340 regNum = MISCREG_MTRR_PHYS_BASE_0; 341 break; 342 case 0x201: 343 regNum = MISCREG_MTRR_PHYS_MASK_0; 344 break; 345 case 0x202: 346 regNum = MISCREG_MTRR_PHYS_BASE_1; 347 break; 348 case 0x203: 349 regNum = MISCREG_MTRR_PHYS_MASK_1; 350 break; 351 case 0x204: 352 regNum = MISCREG_MTRR_PHYS_BASE_2; 353 break; 354 case 0x205: 355 regNum = MISCREG_MTRR_PHYS_MASK_2; 356 break; 357 case 0x206: 358 regNum = MISCREG_MTRR_PHYS_BASE_3; 359 break; 360 case 0x207: 361 regNum = MISCREG_MTRR_PHYS_MASK_3; 362 break; 363 case 0x208: 364 regNum = MISCREG_MTRR_PHYS_BASE_4; 365 break; 366 case 0x209: 367 regNum = MISCREG_MTRR_PHYS_MASK_4; 368 break; 369 case 0x20A: 370 regNum = MISCREG_MTRR_PHYS_BASE_5; 371 break; 372 case 0x20B: 373 regNum = MISCREG_MTRR_PHYS_MASK_5; 374 break; 375 case 0x20C: 376 regNum = MISCREG_MTRR_PHYS_BASE_6; 377 break; 378 case 0x20D: 379 regNum = MISCREG_MTRR_PHYS_MASK_6; 380 break; 381 case 0x20E: 382 regNum = MISCREG_MTRR_PHYS_BASE_7; 383 break; 384 case 0x20F: 385 regNum = MISCREG_MTRR_PHYS_MASK_7; 386 break; 387 case 0x250: 388 regNum = MISCREG_MTRR_FIX_64K_00000; 389 break; 390 case 0x258: 391 regNum = MISCREG_MTRR_FIX_16K_80000; 392 break; 393 case 0x259: 394 regNum = MISCREG_MTRR_FIX_16K_A0000; 395 break; 396 case 0x268: 397 regNum = MISCREG_MTRR_FIX_4K_C0000; 398 break; 399 case 0x269: 400 regNum = MISCREG_MTRR_FIX_4K_C8000; 401 break; 402 case 0x26A: 403 regNum = MISCREG_MTRR_FIX_4K_D0000; 404 break; 405 case 0x26B: 406 regNum = MISCREG_MTRR_FIX_4K_D8000; 407 break; 408 case 0x26C: 409 regNum = MISCREG_MTRR_FIX_4K_E0000; 410 break; 411 case 0x26D: 412 regNum = MISCREG_MTRR_FIX_4K_E8000; 413 break; 414 case 0x26E: 415 regNum = MISCREG_MTRR_FIX_4K_F0000; 416 break; 417 case 0x26F: 418 regNum = MISCREG_MTRR_FIX_4K_F8000; 419 break; 420 case 0x277: 421 regNum = MISCREG_PAT; 422 break; 423 case 0x2FF: 424 regNum = MISCREG_DEF_TYPE; 425 break; 426 case 0x400: 427 regNum = MISCREG_MC0_CTL; 428 break; 429 case 0x404: 430 regNum = MISCREG_MC1_CTL; 431 break; 432 case 0x408: 433 regNum = MISCREG_MC2_CTL; 434 break; 435 case 0x40C: 436 regNum = MISCREG_MC3_CTL; 437 break; 438 case 0x410: 439 regNum = MISCREG_MC4_CTL; 440 break; 441 case 0x414: 442 regNum = MISCREG_MC5_CTL; 443 break; 444 case 0x418: 445 regNum = MISCREG_MC6_CTL; 446 break; 447 case 0x41C: 448 regNum = MISCREG_MC7_CTL; 449 break; 450 case 0x401: 451 regNum = MISCREG_MC0_STATUS; 452 break; 453 case 0x405: 454 regNum = MISCREG_MC1_STATUS; 455 break; 456 case 0x409: 457 regNum = MISCREG_MC2_STATUS; 458 break; 459 case 0x40D: 460 regNum = MISCREG_MC3_STATUS; 461 break; 462 case 0x411: 463 regNum = MISCREG_MC4_STATUS; 464 break; 465 case 0x415: 466 regNum = MISCREG_MC5_STATUS; 467 break; 468 case 0x419: 469 regNum = MISCREG_MC6_STATUS; 470 break; 471 case 0x41D: 472 regNum = MISCREG_MC7_STATUS; 473 break; 474 case 0x402: 475 regNum = MISCREG_MC0_ADDR; 476 break; 477 case 0x406: 478 regNum = MISCREG_MC1_ADDR; 479 break; 480 case 0x40A: 481 regNum = MISCREG_MC2_ADDR; 482 break; 483 case 0x40E: 484 regNum = MISCREG_MC3_ADDR; 485 break; 486 case 0x412: 487 regNum = MISCREG_MC4_ADDR; 488 break; 489 case 0x416: 490 regNum = MISCREG_MC5_ADDR; 491 break; 492 case 0x41A: 493 regNum = MISCREG_MC6_ADDR; 494 break; 495 case 0x41E: 496 regNum = MISCREG_MC7_ADDR; 497 break; 498 case 0x403: 499 regNum = MISCREG_MC0_MISC; 500 break; 501 case 0x407: 502 regNum = MISCREG_MC1_MISC; 503 break; 504 case 0x40B: 505 regNum = MISCREG_MC2_MISC; 506 break; 507 case 0x40F: 508 regNum = MISCREG_MC3_MISC; 509 break; 510 case 0x413: 511 regNum = MISCREG_MC4_MISC; 512 break; 513 case 0x417: 514 regNum = MISCREG_MC5_MISC; 515 break; 516 case 0x41B: 517 regNum = MISCREG_MC6_MISC; 518 break; 519 case 0x41F: 520 regNum = MISCREG_MC7_MISC; 521 break; 522 case 0xC0000080: 523 regNum = MISCREG_EFER; 524 break; 525 case 0xC0000081: 526 regNum = MISCREG_STAR; 527 break; 528 case 0xC0000082: 529 regNum = MISCREG_LSTAR; 530 break; 531 case 0xC0000083: 532 regNum = MISCREG_CSTAR; 533 break; 534 case 0xC0000084: 535 regNum = MISCREG_SF_MASK; 536 break; 537 case 0xC0000100: 538 regNum = MISCREG_FS_BASE; 539 break; 540 case 0xC0000101: 541 regNum = MISCREG_GS_BASE; 542 break; 543 case 0xC0000102: 544 regNum = MISCREG_KERNEL_GS_BASE; 545 break; 546 case 0xC0000103: 547 regNum = MISCREG_TSC_AUX; 548 break; 549 case 0xC0010000: 550 regNum = MISCREG_PERF_EVT_SEL0; 551 break; 552 case 0xC0010001: 553 regNum = MISCREG_PERF_EVT_SEL1; 554 break; 555 case 0xC0010002: 556 regNum = MISCREG_PERF_EVT_SEL2; 557 break; 558 case 0xC0010003: 559 regNum = MISCREG_PERF_EVT_SEL3; 560 break; 561 case 0xC0010004: 562 regNum = MISCREG_PERF_EVT_CTR0; 563 break; 564 case 0xC0010005: 565 regNum = MISCREG_PERF_EVT_CTR1; 566 break; 567 case 0xC0010006: 568 regNum = MISCREG_PERF_EVT_CTR2; 569 break; 570 case 0xC0010007: 571 regNum = MISCREG_PERF_EVT_CTR3; 572 break; 573 case 0xC0010010: 574 regNum = MISCREG_SYSCFG; 575 break; 576 case 0xC0010016: 577 regNum = MISCREG_IORR_BASE0; 578 break; 579 case 0xC0010017: 580 regNum = MISCREG_IORR_BASE1; 581 break; 582 case 0xC0010018: 583 regNum = MISCREG_IORR_MASK0; 584 break; 585 case 0xC0010019: 586 regNum = MISCREG_IORR_MASK1; 587 break; 588 case 0xC001001A: 589 regNum = MISCREG_TOP_MEM; 590 break; 591 case 0xC001001D: 592 regNum = MISCREG_TOP_MEM2; 593 break; 594 case 0xC0010114: 595 regNum = MISCREG_VM_CR; 596 break; 597 case 0xC0010115: 598 regNum = MISCREG_IGNNE; 599 break; 600 case 0xC0010116: 601 regNum = MISCREG_SMM_CTL; 602 break; 603 case 0xC0010117: 604 regNum = MISCREG_VM_HSAVE_PA; 605 break; 606 default: 607 return std::make_shared<GeneralProtection>(0); 608 } 609 //The index is multiplied by the size of a MiscReg so that 610 //any memory dependence calculations will not see these as 611 //overlapping. 612 req->setPaddr(regNum * sizeof(RegVal)); 613 return NoFault; 614 } else if (prefix == IntAddrPrefixIO) { 615 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 616 // bitmap in the TSS. 617 618 Addr IOPort = vaddr & ~IntAddrPrefixMask; 619 // Make sure the address fits in the expected 16 bit IO address 620 // space. 621 assert(!(IOPort & ~0xFFFF)); 622 623 if (IOPort == 0xCF8 && req->getSize() == 4) { 624 req->setFlags(Request::MMAPPED_IPR); 625 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal)); 626 } else if ((IOPort & ~mask(2)) == 0xCFC) { 627 req->setFlags(Request::UNCACHEABLE); 628 629 Addr configAddress = 630 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 631 632 if (bits(configAddress, 31, 31)) { 633 req->setPaddr(PhysAddrPrefixPciConfig | 634 mbits(configAddress, 30, 2) | 635 (IOPort & mask(2))); 636 } else { 637 req->setPaddr(PhysAddrPrefixIO | IOPort); 638 } 639 } else { 640 req->setFlags(Request::UNCACHEABLE); 641 req->setPaddr(PhysAddrPrefixIO | IOPort); 642 } 643 return NoFault; 644 } else { 645 panic("Access to unrecognized internal address space %#x.\n", 646 prefix); 647 } 648 } 649 650 /** 651 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 652 * and false on a TLB miss. 653 * Many of the checks about different modes have been converted to 654 * assertions, since these parts of the code are not really used. 655 * On a hit it will update the LRU stack. 656 */ 657 bool 658 GpuTLB::tlbLookup(const RequestPtr &req, 659 ThreadContext *tc, bool update_stats) 660 { 661 bool tlb_hit = false; 662 #ifndef NDEBUG 663 uint32_t flags = req->getFlags(); 664 int seg = flags & SegmentFlagMask; 665 #endif 666 667 assert(seg != SEGMENT_REG_MS); 668 Addr vaddr = req->getVaddr(); 669 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 670 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 671 672 if (m5Reg.prot) { 673 DPRINTF(GPUTLB, "In protected mode.\n"); 674 // make sure we are in 64-bit mode 675 assert(m5Reg.mode == LongMode); 676 677 // If paging is enabled, do the translation. 678 if (m5Reg.paging) { 679 DPRINTF(GPUTLB, "Paging enabled.\n"); 680 //update LRU stack on a hit 681 TlbEntry *entry = lookup(vaddr, true); 682 683 if (entry) 684 tlb_hit = true; 685 686 if (!update_stats) { 687 // functional tlb access for memory initialization 688 // i.e., memory seeding or instr. seeding -> don't update 689 // TLB and stats 690 return tlb_hit; 691 } 692 693 localNumTLBAccesses++; 694 695 if (!entry) { 696 localNumTLBMisses++; 697 } else { 698 localNumTLBHits++; 699 } 700 } 701 } 702 703 return tlb_hit; 704 } 705 706 Fault 707 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc, 708 Translation *translation, Mode mode, 709 bool &delayedResponse, bool timing, int &latency) 710 { 711 uint32_t flags = req->getFlags(); 712 int seg = flags & SegmentFlagMask; 713 bool storeCheck = flags & (StoreCheck << FlagShift); 714 715 // If this is true, we're dealing with a request 716 // to a non-memory address space. 717 if (seg == SEGMENT_REG_MS) { 718 return translateInt(req, tc); 719 } 720 721 delayedResponse = false; 722 Addr vaddr = req->getVaddr(); 723 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 724 725 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 726 727 // If protected mode has been enabled... 728 if (m5Reg.prot) { 729 DPRINTF(GPUTLB, "In protected mode.\n"); 730 // If we're not in 64-bit mode, do protection/limit checks 731 if (m5Reg.mode != LongMode) { 732 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 733 "protection.\n"); 734 735 // Check for a null segment selector. 736 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || 737 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) 738 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 739 return std::make_shared<GeneralProtection>(0); 740 } 741 742 bool expandDown = false; 743 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 744 745 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 746 if (!attr.writable && (mode == BaseTLB::Write || 747 storeCheck)) 748 return std::make_shared<GeneralProtection>(0); 749 750 if (!attr.readable && mode == BaseTLB::Read) 751 return std::make_shared<GeneralProtection>(0); 752 753 expandDown = attr.expandDown; 754 755 } 756 757 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 758 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 759 // This assumes we're not in 64 bit mode. If we were, the 760 // default address size is 64 bits, overridable to 32. 761 int size = 32; 762 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 763 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 764 765 if ((csAttr.defaultSize && sizeOverride) || 766 (!csAttr.defaultSize && !sizeOverride)) { 767 size = 16; 768 } 769 770 Addr offset = bits(vaddr - base, size - 1, 0); 771 Addr endOffset = offset + req->getSize() - 1; 772 773 if (expandDown) { 774 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 775 warn_once("Expand down segments are untested.\n"); 776 777 if (offset <= limit || endOffset <= limit) 778 return std::make_shared<GeneralProtection>(0); 779 } else { 780 if (offset > limit || endOffset > limit) 781 return std::make_shared<GeneralProtection>(0); 782 } 783 } 784 785 // If paging is enabled, do the translation. 786 if (m5Reg.paging) { 787 DPRINTF(GPUTLB, "Paging enabled.\n"); 788 // The vaddr already has the segment base applied. 789 TlbEntry *entry = lookup(vaddr); 790 localNumTLBAccesses++; 791 792 if (!entry) { 793 localNumTLBMisses++; 794 if (timing) { 795 latency = missLatency1; 796 } 797 798 if (FullSystem) { 799 fatal("GpuTLB doesn't support full-system mode\n"); 800 } else { 801 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 802 "at pc %#x.\n", vaddr, tc->instAddr()); 803 804 Process *p = tc->getProcessPtr(); 805 const EmulationPageTable::Entry *pte = 806 p->pTable->lookup(vaddr); 807 808 if (!pte && mode != BaseTLB::Execute) { 809 // penalize a "page fault" more 810 if (timing) 811 latency += missLatency2; 812 813 if (p->fixupStackFault(vaddr)) 814 pte = p->pTable->lookup(vaddr); 815 } 816 817 if (!pte) { 818 return std::make_shared<PageFault>(vaddr, true, 819 mode, true, 820 false); 821 } else { 822 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 823 824 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 825 alignedVaddr, pte->paddr); 826 827 TlbEntry gpuEntry(p->pid(), alignedVaddr, 828 pte->paddr, false, false); 829 entry = insert(alignedVaddr, gpuEntry); 830 } 831 832 DPRINTF(GPUTLB, "Miss was serviced.\n"); 833 } 834 } else { 835 localNumTLBHits++; 836 837 if (timing) { 838 latency = hitLatency; 839 } 840 } 841 842 // Do paging protection checks. 843 bool inUser = (m5Reg.cpl == 3 && 844 !(flags & (CPL0FlagBit << FlagShift))); 845 846 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 847 bool badWrite = (!entry->writable && (inUser || cr0.wp)); 848 849 if ((inUser && !entry->user) || (mode == BaseTLB::Write && 850 badWrite)) { 851 // The page must have been present to get into the TLB in 852 // the first place. We'll assume the reserved bits are 853 // fine even though we're not checking them. 854 return std::make_shared<PageFault>(vaddr, true, mode, 855 inUser, false); 856 } 857 858 if (storeCheck && badWrite) { 859 // This would fault if this were a write, so return a page 860 // fault that reflects that happening. 861 return std::make_shared<PageFault>(vaddr, true, 862 BaseTLB::Write, 863 inUser, false); 864 } 865 866 867 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 868 "checks.\n", entry->paddr); 869 870 int page_size = entry->size(); 871 Addr paddr = entry->paddr | (vaddr & (page_size - 1)); 872 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 873 req->setPaddr(paddr); 874 875 if (entry->uncacheable) 876 req->setFlags(Request::UNCACHEABLE); 877 } else { 878 //Use the address which already has segmentation applied. 879 DPRINTF(GPUTLB, "Paging disabled.\n"); 880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 881 req->setPaddr(vaddr); 882 } 883 } else { 884 // Real mode 885 DPRINTF(GPUTLB, "In real mode.\n"); 886 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 887 req->setPaddr(vaddr); 888 } 889 890 // Check for an access to the local APIC 891 if (FullSystem) { 892 LocalApicBase localApicBase = 893 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 894 895 Addr baseAddr = localApicBase.base * PageBytes; 896 Addr paddr = req->getPaddr(); 897 898 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 899 // Force the access to be uncacheable. 900 req->setFlags(Request::UNCACHEABLE); 901 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 902 paddr - baseAddr)); 903 } 904 } 905 906 return NoFault; 907 }; 908 909 Fault 910 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc, 911 Mode mode, int &latency) 912 { 913 bool delayedResponse; 914 915 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 916 latency); 917 } 918 919 void 920 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc, 921 Translation *translation, Mode mode, int &latency) 922 { 923 bool delayedResponse; 924 assert(translation); 925 926 Fault fault = GpuTLB::translate(req, tc, translation, mode, 927 delayedResponse, true, latency); 928 929 if (!delayedResponse) 930 translation->finish(fault, req, tc, mode); 931 } 932 933 Walker* 934 GpuTLB::getWalker() 935 { 936 return walker; 937 } 938 939 940 void 941 GpuTLB::serialize(CheckpointOut &cp) const 942 { 943 } 944 945 void 946 GpuTLB::unserialize(CheckpointIn &cp) 947 { 948 } 949 950 void 951 GpuTLB::regStats() 952 { 953 ClockedObject::regStats(); 954 955 localNumTLBAccesses 956 .name(name() + ".local_TLB_accesses") 957 .desc("Number of TLB accesses") 958 ; 959 960 localNumTLBHits 961 .name(name() + ".local_TLB_hits") 962 .desc("Number of TLB hits") 963 ; 964 965 localNumTLBMisses 966 .name(name() + ".local_TLB_misses") 967 .desc("Number of TLB misses") 968 ; 969 970 localTLBMissRate 971 .name(name() + ".local_TLB_miss_rate") 972 .desc("TLB miss rate") 973 ; 974 975 accessCycles 976 .name(name() + ".access_cycles") 977 .desc("Cycles spent accessing this TLB level") 978 ; 979 980 pageTableCycles 981 .name(name() + ".page_table_cycles") 982 .desc("Cycles spent accessing the page table") 983 ; 984 985 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 986 987 numUniquePages 988 .name(name() + ".unique_pages") 989 .desc("Number of unique pages touched") 990 ; 991 992 localCycles 993 .name(name() + ".local_cycles") 994 .desc("Number of cycles spent in queue for all incoming reqs") 995 ; 996 997 localLatency 998 .name(name() + ".local_latency") 999 .desc("Avg. latency over incoming coalesced reqs") 1000 ; 1001 1002 localLatency = localCycles / localNumTLBAccesses; 1003 1004 globalNumTLBAccesses 1005 .name(name() + ".global_TLB_accesses") 1006 .desc("Number of TLB accesses") 1007 ; 1008 1009 globalNumTLBHits 1010 .name(name() + ".global_TLB_hits") 1011 .desc("Number of TLB hits") 1012 ; 1013 1014 globalNumTLBMisses 1015 .name(name() + ".global_TLB_misses") 1016 .desc("Number of TLB misses") 1017 ; 1018 1019 globalTLBMissRate 1020 .name(name() + ".global_TLB_miss_rate") 1021 .desc("TLB miss rate") 1022 ; 1023 1024 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1025 1026 avgReuseDistance 1027 .name(name() + ".avg_reuse_distance") 1028 .desc("avg. reuse distance over all pages (in ticks)") 1029 ; 1030 1031 } 1032 1033 /** 1034 * Do the TLB lookup for this coalesced request and schedule 1035 * another event <TLB access latency> cycles later. 1036 */ 1037 1038 void 1039 GpuTLB::issueTLBLookup(PacketPtr pkt) 1040 { 1041 assert(pkt); 1042 assert(pkt->senderState); 1043 1044 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1045 TheISA::PageBytes); 1046 1047 TranslationState *sender_state = 1048 safe_cast<TranslationState*>(pkt->senderState); 1049 1050 bool update_stats = !sender_state->prefetch; 1051 ThreadContext * tmp_tc = sender_state->tc; 1052 1053 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1054 virt_page_addr); 1055 1056 int req_cnt = sender_state->reqCnt.back(); 1057 1058 if (update_stats) { 1059 accessCycles -= (curTick() * req_cnt); 1060 localCycles -= curTick(); 1061 updatePageFootprint(virt_page_addr); 1062 globalNumTLBAccesses += req_cnt; 1063 } 1064 1065 tlbOutcome lookup_outcome = TLB_MISS; 1066 const RequestPtr &tmp_req = pkt->req; 1067 1068 // Access the TLB and figure out if it's a hit or a miss. 1069 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1070 1071 if (success) { 1072 lookup_outcome = TLB_HIT; 1073 // Put the entry in SenderState 1074 TlbEntry *entry = lookup(tmp_req->getVaddr(), false); 1075 assert(entry); 1076 1077 auto p = sender_state->tc->getProcessPtr(); 1078 sender_state->tlbEntry = 1079 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1080 false, false); 1081 1082 if (update_stats) { 1083 // the reqCnt has an entry per level, so its size tells us 1084 // which level we are in 1085 sender_state->hitLevel = sender_state->reqCnt.size(); 1086 globalNumTLBHits += req_cnt; 1087 } 1088 } else { 1089 if (update_stats) 1090 globalNumTLBMisses += req_cnt; 1091 } 1092 1093 /* 1094 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1095 * as the TLB access latency. 1096 * 1097 * We create and schedule a new TLBEvent which will help us take the 1098 * appropriate actions (e.g., update TLB on a hit, send request to lower 1099 * level TLB on a miss, or start a page walk if this was the last-level 1100 * TLB) 1101 */ 1102 TLBEvent *tlb_event = 1103 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1104 1105 if (translationReturnEvent.count(virt_page_addr)) { 1106 panic("Virtual Page Address %#x already has a return event\n", 1107 virt_page_addr); 1108 } 1109 1110 translationReturnEvent[virt_page_addr] = tlb_event; 1111 assert(tlb_event); 1112 1113 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1114 curTick() + this->ticks(hitLatency)); 1115 1116 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1117 } 1118 1119 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1120 PacketPtr _pkt) 1121 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1122 outcome(tlb_outcome), pkt(_pkt) 1123 { 1124 } 1125 1126 /** 1127 * Do Paging protection checks. If we encounter a page fault, then 1128 * an assertion is fired. 1129 */ 1130 void 1131 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, 1132 TlbEntry * tlb_entry, Mode mode) 1133 { 1134 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1135 uint32_t flags = pkt->req->getFlags(); 1136 bool storeCheck = flags & (StoreCheck << FlagShift); 1137 1138 // Do paging protection checks. 1139 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1140 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1141 1142 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); 1143 1144 if ((inUser && !tlb_entry->user) || 1145 (mode == BaseTLB::Write && badWrite)) { 1146 // The page must have been present to get into the TLB in 1147 // the first place. We'll assume the reserved bits are 1148 // fine even though we're not checking them. 1149 panic("Page fault detected"); 1150 } 1151 1152 if (storeCheck && badWrite) { 1153 // This would fault if this were a write, so return a page 1154 // fault that reflects that happening. 1155 panic("Page fault detected"); 1156 } 1157 } 1158 1159 /** 1160 * handleTranslationReturn is called on a TLB hit, 1161 * when a TLB miss returns or when a page fault returns. 1162 * The latter calls handelHit with TLB miss as tlbOutcome. 1163 */ 1164 void 1165 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1166 PacketPtr pkt) 1167 { 1168 1169 assert(pkt); 1170 Addr vaddr = pkt->req->getVaddr(); 1171 1172 TranslationState *sender_state = 1173 safe_cast<TranslationState*>(pkt->senderState); 1174 1175 ThreadContext *tc = sender_state->tc; 1176 Mode mode = sender_state->tlbMode; 1177 1178 TlbEntry *local_entry, *new_entry; 1179 1180 if (tlb_outcome == TLB_HIT) { 1181 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1182 local_entry = sender_state->tlbEntry; 1183 } else { 1184 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1185 vaddr); 1186 1187 // We are returning either from a page walk or from a hit at a lower 1188 // TLB level. The senderState should be "carrying" a pointer to the 1189 // correct TLBEntry. 1190 new_entry = sender_state->tlbEntry; 1191 assert(new_entry); 1192 local_entry = new_entry; 1193 1194 if (allocationPolicy) { 1195 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1196 virt_page_addr); 1197 1198 local_entry = insert(virt_page_addr, *new_entry); 1199 } 1200 1201 assert(local_entry); 1202 } 1203 1204 /** 1205 * At this point the packet carries an up-to-date tlbEntry pointer 1206 * in its senderState. 1207 * Next step is to do the paging protection checks. 1208 */ 1209 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1210 "while paddr was %#x.\n", local_entry->vaddr, 1211 local_entry->paddr); 1212 1213 pagingProtectionChecks(tc, pkt, local_entry, mode); 1214 int page_size = local_entry->size(); 1215 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1216 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1217 1218 // Since this packet will be sent through the cpu side slave port, 1219 // it must be converted to a response pkt if it is not one already 1220 if (pkt->isRequest()) { 1221 pkt->makeTimingResponse(); 1222 } 1223 1224 pkt->req->setPaddr(paddr); 1225 1226 if (local_entry->uncacheable) { 1227 pkt->req->setFlags(Request::UNCACHEABLE); 1228 } 1229 1230 //send packet back to coalescer 1231 cpuSidePort[0]->sendTimingResp(pkt); 1232 //schedule cleanup event 1233 cleanupQueue.push(virt_page_addr); 1234 1235 // schedule this only once per cycle. 1236 // The check is required because we might have multiple translations 1237 // returning the same cycle 1238 // this is a maximum priority event and must be on the same cycle 1239 // as the cleanup event in TLBCoalescer to avoid a race with 1240 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1241 if (!cleanupEvent.scheduled()) 1242 schedule(cleanupEvent, curTick()); 1243 } 1244 1245 /** 1246 * Here we take the appropriate actions based on the result of the 1247 * TLB lookup. 1248 */ 1249 void 1250 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1251 PacketPtr pkt) 1252 { 1253 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1254 1255 assert(translationReturnEvent[virtPageAddr]); 1256 assert(pkt); 1257 1258 TranslationState *tmp_sender_state = 1259 safe_cast<TranslationState*>(pkt->senderState); 1260 1261 int req_cnt = tmp_sender_state->reqCnt.back(); 1262 bool update_stats = !tmp_sender_state->prefetch; 1263 1264 1265 if (outcome == TLB_HIT) { 1266 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1267 1268 if (update_stats) { 1269 accessCycles += (req_cnt * curTick()); 1270 localCycles += curTick(); 1271 } 1272 1273 } else if (outcome == TLB_MISS) { 1274 1275 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1276 if (update_stats) { 1277 accessCycles += (req_cnt*curTick()); 1278 localCycles += curTick(); 1279 } 1280 1281 if (hasMemSidePort) { 1282 // the one cyle added here represent the delay from when we get 1283 // the reply back till when we propagate it to the coalescer 1284 // above. 1285 if (update_stats) { 1286 accessCycles += (req_cnt * 1); 1287 localCycles += 1; 1288 } 1289 1290 /** 1291 * There is a TLB below. Send the coalesced request. 1292 * We actually send the very first packet of all the 1293 * pending packets for this virtual page address. 1294 */ 1295 if (!memSidePort[0]->sendTimingReq(pkt)) { 1296 DPRINTF(GPUTLB, "Failed sending translation request to " 1297 "lower level TLB for addr %#x\n", virtPageAddr); 1298 1299 memSidePort[0]->retries.push_back(pkt); 1300 } else { 1301 DPRINTF(GPUTLB, "Sent translation request to lower level " 1302 "TLB for addr %#x\n", virtPageAddr); 1303 } 1304 } else { 1305 //this is the last level TLB. Start a page walk 1306 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1307 "addr %#x\n", virtPageAddr); 1308 1309 if (update_stats) 1310 pageTableCycles -= (req_cnt*curTick()); 1311 1312 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; 1313 assert(tlb_event); 1314 tlb_event->updateOutcome(PAGE_WALK); 1315 schedule(tlb_event, curTick() + ticks(missLatency2)); 1316 } 1317 } else if (outcome == PAGE_WALK) { 1318 if (update_stats) 1319 pageTableCycles += (req_cnt*curTick()); 1320 1321 // Need to access the page table and update the TLB 1322 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1323 virtPageAddr); 1324 1325 TranslationState *sender_state = 1326 safe_cast<TranslationState*>(pkt->senderState); 1327 1328 Process *p = sender_state->tc->getProcessPtr(); 1329 Addr vaddr = pkt->req->getVaddr(); 1330 #ifndef NDEBUG 1331 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1332 assert(alignedVaddr == virtPageAddr); 1333 #endif 1334 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr); 1335 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1336 p->fixupStackFault(vaddr)) { 1337 pte = p->pTable->lookup(vaddr); 1338 } 1339 1340 if (pte) { 1341 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1342 pte->paddr); 1343 1344 sender_state->tlbEntry = 1345 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, 1346 false); 1347 } else { 1348 sender_state->tlbEntry = nullptr; 1349 } 1350 1351 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1352 } else if (outcome == MISS_RETURN) { 1353 /** we add an extra cycle in the return path of the translation 1354 * requests in between the various TLB levels. 1355 */ 1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1357 } else { 1358 panic("Unexpected TLB outcome %d", outcome); 1359 } 1360 } 1361 1362 void 1363 GpuTLB::TLBEvent::process() 1364 { 1365 tlb->translationReturn(virtPageAddr, outcome, pkt); 1366 } 1367 1368 const char* 1369 GpuTLB::TLBEvent::description() const 1370 { 1371 return "trigger translationDoneEvent"; 1372 } 1373 1374 void 1375 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1376 { 1377 outcome = _outcome; 1378 } 1379 1380 Addr 1381 GpuTLB::TLBEvent::getTLBEventVaddr() 1382 { 1383 return virtPageAddr; 1384 } 1385 1386 /* 1387 * recvTiming receives a coalesced timing request from a TLBCoalescer 1388 * and it calls issueTLBLookup() 1389 * It only rejects the packet if we have exceeded the max 1390 * outstanding number of requests for the TLB 1391 */ 1392 bool 1393 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1394 { 1395 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1396 tlb->issueTLBLookup(pkt); 1397 // update number of outstanding translation requests 1398 tlb->outstandingReqs++; 1399 return true; 1400 } else { 1401 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1402 tlb->outstandingReqs); 1403 return false; 1404 } 1405 } 1406 1407 /** 1408 * handleFuncTranslationReturn is called on a TLB hit, 1409 * when a TLB miss returns or when a page fault returns. 1410 * It updates LRU, inserts the TLB entry on a miss 1411 * depending on the allocation policy and does the required 1412 * protection checks. It does NOT create a new packet to 1413 * update the packet's addr; this is done in hsail-gpu code. 1414 */ 1415 void 1416 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1417 { 1418 TranslationState *sender_state = 1419 safe_cast<TranslationState*>(pkt->senderState); 1420 1421 ThreadContext *tc = sender_state->tc; 1422 Mode mode = sender_state->tlbMode; 1423 Addr vaddr = pkt->req->getVaddr(); 1424 1425 TlbEntry *local_entry, *new_entry; 1426 1427 if (tlb_outcome == TLB_HIT) { 1428 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1429 "%#x\n", vaddr); 1430 1431 local_entry = sender_state->tlbEntry; 1432 } else { 1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1434 "%#x\n", vaddr); 1435 1436 // We are returning either from a page walk or from a hit at a lower 1437 // TLB level. The senderState should be "carrying" a pointer to the 1438 // correct TLBEntry. 1439 new_entry = sender_state->tlbEntry; 1440 assert(new_entry); 1441 local_entry = new_entry; 1442 1443 if (allocationPolicy) { 1444 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1445 1446 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1447 virt_page_addr); 1448 1449 local_entry = insert(virt_page_addr, *new_entry); 1450 } 1451 1452 assert(local_entry); 1453 } 1454 1455 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1456 "while paddr was %#x.\n", local_entry->vaddr, 1457 local_entry->paddr); 1458 1459 /** 1460 * Do paging checks if it's a normal functional access. If it's for a 1461 * prefetch, then sometimes you can try to prefetch something that 1462 * won't pass protection. We don't actually want to fault becuase there 1463 * is no demand access to deem this a violation. Just put it in the 1464 * TLB and it will fault if indeed a future demand access touches it in 1465 * violation. 1466 * 1467 * This feature could be used to explore security issues around 1468 * speculative memory accesses. 1469 */ 1470 if (!sender_state->prefetch && sender_state->tlbEntry) 1471 pagingProtectionChecks(tc, pkt, local_entry, mode); 1472 1473 int page_size = local_entry->size(); 1474 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1475 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1476 1477 pkt->req->setPaddr(paddr); 1478 1479 if (local_entry->uncacheable) 1480 pkt->req->setFlags(Request::UNCACHEABLE); 1481 } 1482 1483 // This is used for atomic translations. Need to 1484 // make it all happen during the same cycle. 1485 void 1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1487 { 1488 TranslationState *sender_state = 1489 safe_cast<TranslationState*>(pkt->senderState); 1490 1491 ThreadContext *tc = sender_state->tc; 1492 bool update_stats = !sender_state->prefetch; 1493 1494 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1495 TheISA::PageBytes); 1496 1497 if (update_stats) 1498 tlb->updatePageFootprint(virt_page_addr); 1499 1500 // do the TLB lookup without updating the stats 1501 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1502 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1503 1504 // functional mode means no coalescing 1505 // global metrics are the same as the local metrics 1506 if (update_stats) { 1507 tlb->globalNumTLBAccesses++; 1508 1509 if (success) { 1510 sender_state->hitLevel = sender_state->reqCnt.size(); 1511 tlb->globalNumTLBHits++; 1512 } 1513 } 1514 1515 if (!success) { 1516 if (update_stats) 1517 tlb->globalNumTLBMisses++; 1518 if (tlb->hasMemSidePort) { 1519 // there is a TLB below -> propagate down the TLB hierarchy 1520 tlb->memSidePort[0]->sendFunctional(pkt); 1521 // If no valid translation from a prefetch, then just return 1522 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1523 return; 1524 } else { 1525 // Need to access the page table and update the TLB 1526 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1527 virt_page_addr); 1528 1529 Process *p = tc->getProcessPtr(); 1530 1531 Addr vaddr = pkt->req->getVaddr(); 1532 #ifndef NDEBUG 1533 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1534 assert(alignedVaddr == virt_page_addr); 1535 #endif 1536 1537 const EmulationPageTable::Entry *pte = 1538 p->pTable->lookup(vaddr); 1539 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1540 p->fixupStackFault(vaddr)) { 1541 pte = p->pTable->lookup(vaddr); 1542 } 1543 1544 if (!sender_state->prefetch) { 1545 // no PageFaults are permitted after 1546 // the second page table lookup 1547 assert(pte); 1548 1549 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1550 pte->paddr); 1551 1552 sender_state->tlbEntry = 1553 new TlbEntry(p->pid(), virt_page_addr, 1554 pte->paddr, false, false); 1555 } else { 1556 // If this was a prefetch, then do the normal thing if it 1557 // was a successful translation. Otherwise, send an empty 1558 // TLB entry back so that it can be figured out as empty and 1559 // handled accordingly. 1560 if (pte) { 1561 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1562 pte->paddr); 1563 1564 sender_state->tlbEntry = 1565 new TlbEntry(p->pid(), virt_page_addr, 1566 pte->paddr, false, false); 1567 } else { 1568 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1569 alignedVaddr); 1570 1571 sender_state->tlbEntry = nullptr; 1572 1573 return; 1574 } 1575 } 1576 } 1577 } else { 1578 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1579 tlb->lookup(pkt->req->getVaddr())); 1580 1581 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), 1582 update_stats); 1583 1584 assert(entry); 1585 1586 auto p = sender_state->tc->getProcessPtr(); 1587 sender_state->tlbEntry = 1588 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1589 false, false); 1590 } 1591 // This is the function that would populate pkt->req with the paddr of 1592 // the translation. But if no translation happens (i.e Prefetch fails) 1593 // then the early returns in the above code wiill keep this function 1594 // from executing. 1595 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1596 } 1597 1598 void 1599 GpuTLB::CpuSidePort::recvReqRetry() 1600 { 1601 // The CPUSidePort never sends anything but replies. No retries 1602 // expected. 1603 panic("recvReqRetry called"); 1604 } 1605 1606 AddrRangeList 1607 GpuTLB::CpuSidePort::getAddrRanges() const 1608 { 1609 // currently not checked by the master 1610 AddrRangeList ranges; 1611 1612 return ranges; 1613 } 1614 1615 /** 1616 * MemSidePort receives the packet back. 1617 * We need to call the handleTranslationReturn 1618 * and propagate up the hierarchy. 1619 */ 1620 bool 1621 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1622 { 1623 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1624 TheISA::PageBytes); 1625 1626 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1627 virt_page_addr); 1628 1629 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1630 assert(tlb_event); 1631 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1632 1633 tlb_event->updateOutcome(MISS_RETURN); 1634 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1635 1636 return true; 1637 } 1638 1639 void 1640 GpuTLB::MemSidePort::recvReqRetry() 1641 { 1642 // No retries should reach the TLB. The retries 1643 // should only reach the TLBCoalescer. 1644 panic("recvReqRetry called"); 1645 } 1646 1647 void 1648 GpuTLB::cleanup() 1649 { 1650 while (!cleanupQueue.empty()) { 1651 Addr cleanup_addr = cleanupQueue.front(); 1652 cleanupQueue.pop(); 1653 1654 // delete TLBEvent 1655 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1656 delete old_tlb_event; 1657 translationReturnEvent.erase(cleanup_addr); 1658 1659 // update number of outstanding requests 1660 outstandingReqs--; 1661 } 1662 1663 /** the higher level coalescer should retry if it has 1664 * any pending requests. 1665 */ 1666 for (int i = 0; i < cpuSidePort.size(); ++i) { 1667 cpuSidePort[i]->sendRetryReq(); 1668 } 1669 } 1670 1671 void 1672 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1673 { 1674 1675 std::pair<AccessPatternTable::iterator, bool> ret; 1676 1677 AccessInfo tmp_access_info; 1678 tmp_access_info.lastTimeAccessed = 0; 1679 tmp_access_info.accessesPerPage = 0; 1680 tmp_access_info.totalReuseDistance = 0; 1681 tmp_access_info.sumDistance = 0; 1682 tmp_access_info.meanDistance = 0; 1683 1684 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1685 tmp_access_info)); 1686 1687 bool first_page_access = ret.second; 1688 1689 if (first_page_access) { 1690 numUniquePages++; 1691 } else { 1692 int accessed_before; 1693 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1694 ret.first->second.totalReuseDistance += accessed_before; 1695 } 1696 1697 ret.first->second.accessesPerPage++; 1698 ret.first->second.lastTimeAccessed = curTick(); 1699 1700 if (accessDistance) { 1701 ret.first->second.localTLBAccesses 1702 .push_back(localNumTLBAccesses.value()); 1703 } 1704 } 1705 1706 void 1707 GpuTLB::exitCallback() 1708 { 1709 std::ostream *page_stat_file = nullptr; 1710 1711 if (accessDistance) { 1712 1713 // print per page statistics to a separate file (.csv format) 1714 // simout is the gem5 output directory (default is m5out or the one 1715 // specified with -d 1716 page_stat_file = simout.create(name().c_str())->stream(); 1717 1718 // print header 1719 *page_stat_file << "page,max_access_distance,mean_access_distance, " 1720 << "stddev_distance" << std::endl; 1721 } 1722 1723 // update avg. reuse distance footprint 1724 AccessPatternTable::iterator iter, iter_begin, iter_end; 1725 unsigned int sum_avg_reuse_distance_per_page = 0; 1726 1727 // iterate through all pages seen by this TLB 1728 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1729 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1730 iter->second.accessesPerPage; 1731 1732 if (accessDistance) { 1733 unsigned int tmp = iter->second.localTLBAccesses[0]; 1734 unsigned int prev = tmp; 1735 1736 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1737 if (i) { 1738 tmp = prev + 1; 1739 } 1740 1741 prev = iter->second.localTLBAccesses[i]; 1742 // update the localTLBAccesses value 1743 // with the actual differece 1744 iter->second.localTLBAccesses[i] -= tmp; 1745 // compute the sum of AccessDistance per page 1746 // used later for mean 1747 iter->second.sumDistance += 1748 iter->second.localTLBAccesses[i]; 1749 } 1750 1751 iter->second.meanDistance = 1752 iter->second.sumDistance / iter->second.accessesPerPage; 1753 1754 // compute std_dev and max (we need a second round because we 1755 // need to know the mean value 1756 unsigned int max_distance = 0; 1757 unsigned int stddev_distance = 0; 1758 1759 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1760 unsigned int tmp_access_distance = 1761 iter->second.localTLBAccesses[i]; 1762 1763 if (tmp_access_distance > max_distance) { 1764 max_distance = tmp_access_distance; 1765 } 1766 1767 unsigned int diff = 1768 tmp_access_distance - iter->second.meanDistance; 1769 stddev_distance += pow(diff, 2); 1770 1771 } 1772 1773 stddev_distance = 1774 sqrt(stddev_distance/iter->second.accessesPerPage); 1775 1776 if (page_stat_file) { 1777 *page_stat_file << std::hex << iter->first << ","; 1778 *page_stat_file << std::dec << max_distance << ","; 1779 *page_stat_file << std::dec << iter->second.meanDistance 1780 << ","; 1781 *page_stat_file << std::dec << stddev_distance; 1782 *page_stat_file << std::endl; 1783 } 1784 1785 // erase the localTLBAccesses array 1786 iter->second.localTLBAccesses.clear(); 1787 } 1788 } 1789 1790 if (!TLBFootprint.empty()) { 1791 avgReuseDistance = 1792 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1793 } 1794 1795 //clear the TLBFootprint map 1796 TLBFootprint.clear(); 1797 } 1798} // namespace X86ISA 1799 1800X86ISA::GpuTLB* 1801X86GPUTLBParams::create() 1802{ 1803 return new X86ISA::GpuTLB(this); 1804} 1805 1806