gpu_tlb.cc revision 12717
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/output.hh" 49#include "base/trace.hh" 50#include "cpu/base.hh" 51#include "cpu/thread_context.hh" 52#include "debug/GPUPrefetch.hh" 53#include "debug/GPUTLB.hh" 54#include "mem/packet_access.hh" 55#include "mem/page_table.hh" 56#include "mem/request.hh" 57#include "sim/process.hh" 58 59namespace X86ISA 60{ 61 62 GpuTLB::GpuTLB(const Params *p) 63 : MemObject(p), configAddress(0), size(p->size), 64 cleanupEvent([this]{ cleanup(); }, name(), false, 65 Event::Maximum_Pri), 66 exitEvent([this]{ exitCallback(); }, name()) 67 { 68 assoc = p->assoc; 69 assert(assoc <= size); 70 numSets = size/assoc; 71 allocationPolicy = p->allocationPolicy; 72 hasMemSidePort = false; 73 accessDistance = p->accessDistance; 74 clock = p->clk_domain->clockPeriod(); 75 76 tlb.assign(size, TlbEntry()); 77 78 freeList.resize(numSets); 79 entryList.resize(numSets); 80 81 for (int set = 0; set < numSets; ++set) { 82 for (int way = 0; way < assoc; ++way) { 83 int x = set * assoc + way; 84 freeList[set].push_back(&tlb.at(x)); 85 } 86 } 87 88 FA = (size == assoc); 89 90 /** 91 * @warning: the set-associative version assumes you have a 92 * fixed page size of 4KB. 93 * If the page size is greather than 4KB (as defined in the 94 * TheISA::PageBytes), then there are various issues w/ the current 95 * implementation (you'd have the same 8KB page being replicated in 96 * different sets etc) 97 */ 98 setMask = numSets - 1; 99 100 #if 0 101 // GpuTLB doesn't yet support full system 102 walker = p->walker; 103 walker->setTLB(this); 104 #endif 105 106 maxCoalescedReqs = p->maxOutstandingReqs; 107 108 // Do not allow maxCoalescedReqs to be more than the TLB associativity 109 if (maxCoalescedReqs > assoc) { 110 maxCoalescedReqs = assoc; 111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 112 } 113 114 outstandingReqs = 0; 115 hitLatency = p->hitLatency; 116 missLatency1 = p->missLatency1; 117 missLatency2 = p->missLatency2; 118 119 // create the slave ports based on the number of connected ports 120 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 121 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 122 name(), i), this, i)); 123 } 124 125 // create the master ports based on the number of connected ports 126 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 127 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 128 name(), i), this, i)); 129 } 130 } 131 132 // fixme: this is never called? 133 GpuTLB::~GpuTLB() 134 { 135 // make sure all the hash-maps are empty 136 assert(translationReturnEvent.empty()); 137 } 138 139 BaseSlavePort& 140 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 141 { 142 if (if_name == "slave") { 143 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 145 } 146 147 return *cpuSidePort[idx]; 148 } else { 149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 150 } 151 } 152 153 BaseMasterPort& 154 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 155 { 156 if (if_name == "master") { 157 if (idx >= static_cast<PortID>(memSidePort.size())) { 158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 159 } 160 161 hasMemSidePort = true; 162 163 return *memSidePort[idx]; 164 } else { 165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 166 } 167 } 168 169 TlbEntry* 170 GpuTLB::insert(Addr vpn, TlbEntry &entry) 171 { 172 TlbEntry *newEntry = nullptr; 173 174 /** 175 * vpn holds the virtual page address 176 * The least significant bits are simply masked 177 */ 178 int set = (vpn >> TheISA::PageShift) & setMask; 179 180 if (!freeList[set].empty()) { 181 newEntry = freeList[set].front(); 182 freeList[set].pop_front(); 183 } else { 184 newEntry = entryList[set].back(); 185 entryList[set].pop_back(); 186 } 187 188 *newEntry = entry; 189 newEntry->vaddr = vpn; 190 entryList[set].push_front(newEntry); 191 192 return newEntry; 193 } 194 195 GpuTLB::EntryList::iterator 196 GpuTLB::lookupIt(Addr va, bool update_lru) 197 { 198 int set = (va >> TheISA::PageShift) & setMask; 199 200 if (FA) { 201 assert(!set); 202 } 203 204 auto entry = entryList[set].begin(); 205 for (; entry != entryList[set].end(); ++entry) { 206 int page_size = (*entry)->size(); 207 208 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { 209 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 210 "with size %#x.\n", va, (*entry)->vaddr, page_size); 211 212 if (update_lru) { 213 entryList[set].push_front(*entry); 214 entryList[set].erase(entry); 215 entry = entryList[set].begin(); 216 } 217 218 break; 219 } 220 } 221 222 return entry; 223 } 224 225 TlbEntry* 226 GpuTLB::lookup(Addr va, bool update_lru) 227 { 228 int set = (va >> TheISA::PageShift) & setMask; 229 230 auto entry = lookupIt(va, update_lru); 231 232 if (entry == entryList[set].end()) 233 return nullptr; 234 else 235 return *entry; 236 } 237 238 void 239 GpuTLB::invalidateAll() 240 { 241 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 242 243 for (int i = 0; i < numSets; ++i) { 244 while (!entryList[i].empty()) { 245 TlbEntry *entry = entryList[i].front(); 246 entryList[i].pop_front(); 247 freeList[i].push_back(entry); 248 } 249 } 250 } 251 252 void 253 GpuTLB::setConfigAddress(uint32_t addr) 254 { 255 configAddress = addr; 256 } 257 258 void 259 GpuTLB::invalidateNonGlobal() 260 { 261 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 262 263 for (int i = 0; i < numSets; ++i) { 264 for (auto entryIt = entryList[i].begin(); 265 entryIt != entryList[i].end();) { 266 if (!(*entryIt)->global) { 267 freeList[i].push_back(*entryIt); 268 entryList[i].erase(entryIt++); 269 } else { 270 ++entryIt; 271 } 272 } 273 } 274 } 275 276 void 277 GpuTLB::demapPage(Addr va, uint64_t asn) 278 { 279 280 int set = (va >> TheISA::PageShift) & setMask; 281 auto entry = lookupIt(va, false); 282 283 if (entry != entryList[set].end()) { 284 freeList[set].push_back(*entry); 285 entryList[set].erase(entry); 286 } 287 } 288 289 Fault 290 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc) 291 { 292 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 293 Addr vaddr = req->getVaddr(); 294 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 295 296 if (prefix == IntAddrPrefixCPUID) { 297 panic("CPUID memory space not yet implemented!\n"); 298 } else if (prefix == IntAddrPrefixMSR) { 299 vaddr = vaddr >> 3; 300 req->setFlags(Request::MMAPPED_IPR); 301 Addr regNum = 0; 302 303 switch (vaddr & ~IntAddrPrefixMask) { 304 case 0x10: 305 regNum = MISCREG_TSC; 306 break; 307 case 0x1B: 308 regNum = MISCREG_APIC_BASE; 309 break; 310 case 0xFE: 311 regNum = MISCREG_MTRRCAP; 312 break; 313 case 0x174: 314 regNum = MISCREG_SYSENTER_CS; 315 break; 316 case 0x175: 317 regNum = MISCREG_SYSENTER_ESP; 318 break; 319 case 0x176: 320 regNum = MISCREG_SYSENTER_EIP; 321 break; 322 case 0x179: 323 regNum = MISCREG_MCG_CAP; 324 break; 325 case 0x17A: 326 regNum = MISCREG_MCG_STATUS; 327 break; 328 case 0x17B: 329 regNum = MISCREG_MCG_CTL; 330 break; 331 case 0x1D9: 332 regNum = MISCREG_DEBUG_CTL_MSR; 333 break; 334 case 0x1DB: 335 regNum = MISCREG_LAST_BRANCH_FROM_IP; 336 break; 337 case 0x1DC: 338 regNum = MISCREG_LAST_BRANCH_TO_IP; 339 break; 340 case 0x1DD: 341 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 342 break; 343 case 0x1DE: 344 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 345 break; 346 case 0x200: 347 regNum = MISCREG_MTRR_PHYS_BASE_0; 348 break; 349 case 0x201: 350 regNum = MISCREG_MTRR_PHYS_MASK_0; 351 break; 352 case 0x202: 353 regNum = MISCREG_MTRR_PHYS_BASE_1; 354 break; 355 case 0x203: 356 regNum = MISCREG_MTRR_PHYS_MASK_1; 357 break; 358 case 0x204: 359 regNum = MISCREG_MTRR_PHYS_BASE_2; 360 break; 361 case 0x205: 362 regNum = MISCREG_MTRR_PHYS_MASK_2; 363 break; 364 case 0x206: 365 regNum = MISCREG_MTRR_PHYS_BASE_3; 366 break; 367 case 0x207: 368 regNum = MISCREG_MTRR_PHYS_MASK_3; 369 break; 370 case 0x208: 371 regNum = MISCREG_MTRR_PHYS_BASE_4; 372 break; 373 case 0x209: 374 regNum = MISCREG_MTRR_PHYS_MASK_4; 375 break; 376 case 0x20A: 377 regNum = MISCREG_MTRR_PHYS_BASE_5; 378 break; 379 case 0x20B: 380 regNum = MISCREG_MTRR_PHYS_MASK_5; 381 break; 382 case 0x20C: 383 regNum = MISCREG_MTRR_PHYS_BASE_6; 384 break; 385 case 0x20D: 386 regNum = MISCREG_MTRR_PHYS_MASK_6; 387 break; 388 case 0x20E: 389 regNum = MISCREG_MTRR_PHYS_BASE_7; 390 break; 391 case 0x20F: 392 regNum = MISCREG_MTRR_PHYS_MASK_7; 393 break; 394 case 0x250: 395 regNum = MISCREG_MTRR_FIX_64K_00000; 396 break; 397 case 0x258: 398 regNum = MISCREG_MTRR_FIX_16K_80000; 399 break; 400 case 0x259: 401 regNum = MISCREG_MTRR_FIX_16K_A0000; 402 break; 403 case 0x268: 404 regNum = MISCREG_MTRR_FIX_4K_C0000; 405 break; 406 case 0x269: 407 regNum = MISCREG_MTRR_FIX_4K_C8000; 408 break; 409 case 0x26A: 410 regNum = MISCREG_MTRR_FIX_4K_D0000; 411 break; 412 case 0x26B: 413 regNum = MISCREG_MTRR_FIX_4K_D8000; 414 break; 415 case 0x26C: 416 regNum = MISCREG_MTRR_FIX_4K_E0000; 417 break; 418 case 0x26D: 419 regNum = MISCREG_MTRR_FIX_4K_E8000; 420 break; 421 case 0x26E: 422 regNum = MISCREG_MTRR_FIX_4K_F0000; 423 break; 424 case 0x26F: 425 regNum = MISCREG_MTRR_FIX_4K_F8000; 426 break; 427 case 0x277: 428 regNum = MISCREG_PAT; 429 break; 430 case 0x2FF: 431 regNum = MISCREG_DEF_TYPE; 432 break; 433 case 0x400: 434 regNum = MISCREG_MC0_CTL; 435 break; 436 case 0x404: 437 regNum = MISCREG_MC1_CTL; 438 break; 439 case 0x408: 440 regNum = MISCREG_MC2_CTL; 441 break; 442 case 0x40C: 443 regNum = MISCREG_MC3_CTL; 444 break; 445 case 0x410: 446 regNum = MISCREG_MC4_CTL; 447 break; 448 case 0x414: 449 regNum = MISCREG_MC5_CTL; 450 break; 451 case 0x418: 452 regNum = MISCREG_MC6_CTL; 453 break; 454 case 0x41C: 455 regNum = MISCREG_MC7_CTL; 456 break; 457 case 0x401: 458 regNum = MISCREG_MC0_STATUS; 459 break; 460 case 0x405: 461 regNum = MISCREG_MC1_STATUS; 462 break; 463 case 0x409: 464 regNum = MISCREG_MC2_STATUS; 465 break; 466 case 0x40D: 467 regNum = MISCREG_MC3_STATUS; 468 break; 469 case 0x411: 470 regNum = MISCREG_MC4_STATUS; 471 break; 472 case 0x415: 473 regNum = MISCREG_MC5_STATUS; 474 break; 475 case 0x419: 476 regNum = MISCREG_MC6_STATUS; 477 break; 478 case 0x41D: 479 regNum = MISCREG_MC7_STATUS; 480 break; 481 case 0x402: 482 regNum = MISCREG_MC0_ADDR; 483 break; 484 case 0x406: 485 regNum = MISCREG_MC1_ADDR; 486 break; 487 case 0x40A: 488 regNum = MISCREG_MC2_ADDR; 489 break; 490 case 0x40E: 491 regNum = MISCREG_MC3_ADDR; 492 break; 493 case 0x412: 494 regNum = MISCREG_MC4_ADDR; 495 break; 496 case 0x416: 497 regNum = MISCREG_MC5_ADDR; 498 break; 499 case 0x41A: 500 regNum = MISCREG_MC6_ADDR; 501 break; 502 case 0x41E: 503 regNum = MISCREG_MC7_ADDR; 504 break; 505 case 0x403: 506 regNum = MISCREG_MC0_MISC; 507 break; 508 case 0x407: 509 regNum = MISCREG_MC1_MISC; 510 break; 511 case 0x40B: 512 regNum = MISCREG_MC2_MISC; 513 break; 514 case 0x40F: 515 regNum = MISCREG_MC3_MISC; 516 break; 517 case 0x413: 518 regNum = MISCREG_MC4_MISC; 519 break; 520 case 0x417: 521 regNum = MISCREG_MC5_MISC; 522 break; 523 case 0x41B: 524 regNum = MISCREG_MC6_MISC; 525 break; 526 case 0x41F: 527 regNum = MISCREG_MC7_MISC; 528 break; 529 case 0xC0000080: 530 regNum = MISCREG_EFER; 531 break; 532 case 0xC0000081: 533 regNum = MISCREG_STAR; 534 break; 535 case 0xC0000082: 536 regNum = MISCREG_LSTAR; 537 break; 538 case 0xC0000083: 539 regNum = MISCREG_CSTAR; 540 break; 541 case 0xC0000084: 542 regNum = MISCREG_SF_MASK; 543 break; 544 case 0xC0000100: 545 regNum = MISCREG_FS_BASE; 546 break; 547 case 0xC0000101: 548 regNum = MISCREG_GS_BASE; 549 break; 550 case 0xC0000102: 551 regNum = MISCREG_KERNEL_GS_BASE; 552 break; 553 case 0xC0000103: 554 regNum = MISCREG_TSC_AUX; 555 break; 556 case 0xC0010000: 557 regNum = MISCREG_PERF_EVT_SEL0; 558 break; 559 case 0xC0010001: 560 regNum = MISCREG_PERF_EVT_SEL1; 561 break; 562 case 0xC0010002: 563 regNum = MISCREG_PERF_EVT_SEL2; 564 break; 565 case 0xC0010003: 566 regNum = MISCREG_PERF_EVT_SEL3; 567 break; 568 case 0xC0010004: 569 regNum = MISCREG_PERF_EVT_CTR0; 570 break; 571 case 0xC0010005: 572 regNum = MISCREG_PERF_EVT_CTR1; 573 break; 574 case 0xC0010006: 575 regNum = MISCREG_PERF_EVT_CTR2; 576 break; 577 case 0xC0010007: 578 regNum = MISCREG_PERF_EVT_CTR3; 579 break; 580 case 0xC0010010: 581 regNum = MISCREG_SYSCFG; 582 break; 583 case 0xC0010016: 584 regNum = MISCREG_IORR_BASE0; 585 break; 586 case 0xC0010017: 587 regNum = MISCREG_IORR_BASE1; 588 break; 589 case 0xC0010018: 590 regNum = MISCREG_IORR_MASK0; 591 break; 592 case 0xC0010019: 593 regNum = MISCREG_IORR_MASK1; 594 break; 595 case 0xC001001A: 596 regNum = MISCREG_TOP_MEM; 597 break; 598 case 0xC001001D: 599 regNum = MISCREG_TOP_MEM2; 600 break; 601 case 0xC0010114: 602 regNum = MISCREG_VM_CR; 603 break; 604 case 0xC0010115: 605 regNum = MISCREG_IGNNE; 606 break; 607 case 0xC0010116: 608 regNum = MISCREG_SMM_CTL; 609 break; 610 case 0xC0010117: 611 regNum = MISCREG_VM_HSAVE_PA; 612 break; 613 default: 614 return std::make_shared<GeneralProtection>(0); 615 } 616 //The index is multiplied by the size of a MiscReg so that 617 //any memory dependence calculations will not see these as 618 //overlapping. 619 req->setPaddr(regNum * sizeof(MiscReg)); 620 return NoFault; 621 } else if (prefix == IntAddrPrefixIO) { 622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 623 // bitmap in the TSS. 624 625 Addr IOPort = vaddr & ~IntAddrPrefixMask; 626 // Make sure the address fits in the expected 16 bit IO address 627 // space. 628 assert(!(IOPort & ~0xFFFF)); 629 630 if (IOPort == 0xCF8 && req->getSize() == 4) { 631 req->setFlags(Request::MMAPPED_IPR); 632 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 633 } else if ((IOPort & ~mask(2)) == 0xCFC) { 634 req->setFlags(Request::UNCACHEABLE); 635 636 Addr configAddress = 637 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 638 639 if (bits(configAddress, 31, 31)) { 640 req->setPaddr(PhysAddrPrefixPciConfig | 641 mbits(configAddress, 30, 2) | 642 (IOPort & mask(2))); 643 } else { 644 req->setPaddr(PhysAddrPrefixIO | IOPort); 645 } 646 } else { 647 req->setFlags(Request::UNCACHEABLE); 648 req->setPaddr(PhysAddrPrefixIO | IOPort); 649 } 650 return NoFault; 651 } else { 652 panic("Access to unrecognized internal address space %#x.\n", 653 prefix); 654 } 655 } 656 657 /** 658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 659 * and false on a TLB miss. 660 * Many of the checks about different modes have been converted to 661 * assertions, since these parts of the code are not really used. 662 * On a hit it will update the LRU stack. 663 */ 664 bool 665 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats) 666 { 667 bool tlb_hit = false; 668 #ifndef NDEBUG 669 uint32_t flags = req->getFlags(); 670 int seg = flags & SegmentFlagMask; 671 #endif 672 673 assert(seg != SEGMENT_REG_MS); 674 Addr vaddr = req->getVaddr(); 675 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 676 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 677 678 if (m5Reg.prot) { 679 DPRINTF(GPUTLB, "In protected mode.\n"); 680 // make sure we are in 64-bit mode 681 assert(m5Reg.mode == LongMode); 682 683 // If paging is enabled, do the translation. 684 if (m5Reg.paging) { 685 DPRINTF(GPUTLB, "Paging enabled.\n"); 686 //update LRU stack on a hit 687 TlbEntry *entry = lookup(vaddr, true); 688 689 if (entry) 690 tlb_hit = true; 691 692 if (!update_stats) { 693 // functional tlb access for memory initialization 694 // i.e., memory seeding or instr. seeding -> don't update 695 // TLB and stats 696 return tlb_hit; 697 } 698 699 localNumTLBAccesses++; 700 701 if (!entry) { 702 localNumTLBMisses++; 703 } else { 704 localNumTLBHits++; 705 } 706 } 707 } 708 709 return tlb_hit; 710 } 711 712 Fault 713 GpuTLB::translate(RequestPtr req, ThreadContext *tc, 714 Translation *translation, Mode mode, 715 bool &delayedResponse, bool timing, int &latency) 716 { 717 uint32_t flags = req->getFlags(); 718 int seg = flags & SegmentFlagMask; 719 bool storeCheck = flags & (StoreCheck << FlagShift); 720 721 // If this is true, we're dealing with a request 722 // to a non-memory address space. 723 if (seg == SEGMENT_REG_MS) { 724 return translateInt(req, tc); 725 } 726 727 delayedResponse = false; 728 Addr vaddr = req->getVaddr(); 729 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 730 731 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 732 733 // If protected mode has been enabled... 734 if (m5Reg.prot) { 735 DPRINTF(GPUTLB, "In protected mode.\n"); 736 // If we're not in 64-bit mode, do protection/limit checks 737 if (m5Reg.mode != LongMode) { 738 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 739 "protection.\n"); 740 741 // Check for a null segment selector. 742 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || 743 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) 744 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 745 return std::make_shared<GeneralProtection>(0); 746 } 747 748 bool expandDown = false; 749 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 750 751 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 752 if (!attr.writable && (mode == BaseTLB::Write || 753 storeCheck)) 754 return std::make_shared<GeneralProtection>(0); 755 756 if (!attr.readable && mode == BaseTLB::Read) 757 return std::make_shared<GeneralProtection>(0); 758 759 expandDown = attr.expandDown; 760 761 } 762 763 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 764 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 765 // This assumes we're not in 64 bit mode. If we were, the 766 // default address size is 64 bits, overridable to 32. 767 int size = 32; 768 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 769 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 770 771 if ((csAttr.defaultSize && sizeOverride) || 772 (!csAttr.defaultSize && !sizeOverride)) { 773 size = 16; 774 } 775 776 Addr offset = bits(vaddr - base, size - 1, 0); 777 Addr endOffset = offset + req->getSize() - 1; 778 779 if (expandDown) { 780 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 781 warn_once("Expand down segments are untested.\n"); 782 783 if (offset <= limit || endOffset <= limit) 784 return std::make_shared<GeneralProtection>(0); 785 } else { 786 if (offset > limit || endOffset > limit) 787 return std::make_shared<GeneralProtection>(0); 788 } 789 } 790 791 // If paging is enabled, do the translation. 792 if (m5Reg.paging) { 793 DPRINTF(GPUTLB, "Paging enabled.\n"); 794 // The vaddr already has the segment base applied. 795 TlbEntry *entry = lookup(vaddr); 796 localNumTLBAccesses++; 797 798 if (!entry) { 799 localNumTLBMisses++; 800 if (timing) { 801 latency = missLatency1; 802 } 803 804 if (FullSystem) { 805 fatal("GpuTLB doesn't support full-system mode\n"); 806 } else { 807 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 808 "at pc %#x.\n", vaddr, tc->instAddr()); 809 810 Process *p = tc->getProcessPtr(); 811 const EmulationPageTable::Entry *pte = 812 p->pTable->lookup(vaddr); 813 814 if (!pte && mode != BaseTLB::Execute) { 815 // penalize a "page fault" more 816 if (timing) 817 latency += missLatency2; 818 819 if (p->fixupStackFault(vaddr)) 820 pte = p->pTable->lookup(vaddr); 821 } 822 823 if (!pte) { 824 return std::make_shared<PageFault>(vaddr, true, 825 mode, true, 826 false); 827 } else { 828 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 829 830 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 831 alignedVaddr, pte->paddr); 832 833 TlbEntry gpuEntry(p->pid(), alignedVaddr, 834 pte->paddr, false, false); 835 entry = insert(alignedVaddr, gpuEntry); 836 } 837 838 DPRINTF(GPUTLB, "Miss was serviced.\n"); 839 } 840 } else { 841 localNumTLBHits++; 842 843 if (timing) { 844 latency = hitLatency; 845 } 846 } 847 848 // Do paging protection checks. 849 bool inUser = (m5Reg.cpl == 3 && 850 !(flags & (CPL0FlagBit << FlagShift))); 851 852 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 853 bool badWrite = (!entry->writable && (inUser || cr0.wp)); 854 855 if ((inUser && !entry->user) || (mode == BaseTLB::Write && 856 badWrite)) { 857 // The page must have been present to get into the TLB in 858 // the first place. We'll assume the reserved bits are 859 // fine even though we're not checking them. 860 return std::make_shared<PageFault>(vaddr, true, mode, 861 inUser, false); 862 } 863 864 if (storeCheck && badWrite) { 865 // This would fault if this were a write, so return a page 866 // fault that reflects that happening. 867 return std::make_shared<PageFault>(vaddr, true, 868 BaseTLB::Write, 869 inUser, false); 870 } 871 872 873 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 874 "checks.\n", entry->paddr); 875 876 int page_size = entry->size(); 877 Addr paddr = entry->paddr | (vaddr & (page_size - 1)); 878 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 879 req->setPaddr(paddr); 880 881 if (entry->uncacheable) 882 req->setFlags(Request::UNCACHEABLE); 883 } else { 884 //Use the address which already has segmentation applied. 885 DPRINTF(GPUTLB, "Paging disabled.\n"); 886 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 887 req->setPaddr(vaddr); 888 } 889 } else { 890 // Real mode 891 DPRINTF(GPUTLB, "In real mode.\n"); 892 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 893 req->setPaddr(vaddr); 894 } 895 896 // Check for an access to the local APIC 897 if (FullSystem) { 898 LocalApicBase localApicBase = 899 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 900 901 Addr baseAddr = localApicBase.base * PageBytes; 902 Addr paddr = req->getPaddr(); 903 904 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 905 // Force the access to be uncacheable. 906 req->setFlags(Request::UNCACHEABLE); 907 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 908 paddr - baseAddr)); 909 } 910 } 911 912 return NoFault; 913 }; 914 915 Fault 916 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, 917 int &latency) 918 { 919 bool delayedResponse; 920 921 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 922 latency); 923 } 924 925 void 926 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc, 927 Translation *translation, Mode mode, int &latency) 928 { 929 bool delayedResponse; 930 assert(translation); 931 932 Fault fault = GpuTLB::translate(req, tc, translation, mode, 933 delayedResponse, true, latency); 934 935 if (!delayedResponse) 936 translation->finish(fault, req, tc, mode); 937 } 938 939 Walker* 940 GpuTLB::getWalker() 941 { 942 return walker; 943 } 944 945 946 void 947 GpuTLB::serialize(CheckpointOut &cp) const 948 { 949 } 950 951 void 952 GpuTLB::unserialize(CheckpointIn &cp) 953 { 954 } 955 956 void 957 GpuTLB::regStats() 958 { 959 MemObject::regStats(); 960 961 localNumTLBAccesses 962 .name(name() + ".local_TLB_accesses") 963 .desc("Number of TLB accesses") 964 ; 965 966 localNumTLBHits 967 .name(name() + ".local_TLB_hits") 968 .desc("Number of TLB hits") 969 ; 970 971 localNumTLBMisses 972 .name(name() + ".local_TLB_misses") 973 .desc("Number of TLB misses") 974 ; 975 976 localTLBMissRate 977 .name(name() + ".local_TLB_miss_rate") 978 .desc("TLB miss rate") 979 ; 980 981 accessCycles 982 .name(name() + ".access_cycles") 983 .desc("Cycles spent accessing this TLB level") 984 ; 985 986 pageTableCycles 987 .name(name() + ".page_table_cycles") 988 .desc("Cycles spent accessing the page table") 989 ; 990 991 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 992 993 numUniquePages 994 .name(name() + ".unique_pages") 995 .desc("Number of unique pages touched") 996 ; 997 998 localCycles 999 .name(name() + ".local_cycles") 1000 .desc("Number of cycles spent in queue for all incoming reqs") 1001 ; 1002 1003 localLatency 1004 .name(name() + ".local_latency") 1005 .desc("Avg. latency over incoming coalesced reqs") 1006 ; 1007 1008 localLatency = localCycles / localNumTLBAccesses; 1009 1010 globalNumTLBAccesses 1011 .name(name() + ".global_TLB_accesses") 1012 .desc("Number of TLB accesses") 1013 ; 1014 1015 globalNumTLBHits 1016 .name(name() + ".global_TLB_hits") 1017 .desc("Number of TLB hits") 1018 ; 1019 1020 globalNumTLBMisses 1021 .name(name() + ".global_TLB_misses") 1022 .desc("Number of TLB misses") 1023 ; 1024 1025 globalTLBMissRate 1026 .name(name() + ".global_TLB_miss_rate") 1027 .desc("TLB miss rate") 1028 ; 1029 1030 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1031 1032 avgReuseDistance 1033 .name(name() + ".avg_reuse_distance") 1034 .desc("avg. reuse distance over all pages (in ticks)") 1035 ; 1036 1037 } 1038 1039 /** 1040 * Do the TLB lookup for this coalesced request and schedule 1041 * another event <TLB access latency> cycles later. 1042 */ 1043 1044 void 1045 GpuTLB::issueTLBLookup(PacketPtr pkt) 1046 { 1047 assert(pkt); 1048 assert(pkt->senderState); 1049 1050 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1051 TheISA::PageBytes); 1052 1053 TranslationState *sender_state = 1054 safe_cast<TranslationState*>(pkt->senderState); 1055 1056 bool update_stats = !sender_state->prefetch; 1057 ThreadContext * tmp_tc = sender_state->tc; 1058 1059 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1060 virt_page_addr); 1061 1062 int req_cnt = sender_state->reqCnt.back(); 1063 1064 if (update_stats) { 1065 accessCycles -= (curTick() * req_cnt); 1066 localCycles -= curTick(); 1067 updatePageFootprint(virt_page_addr); 1068 globalNumTLBAccesses += req_cnt; 1069 } 1070 1071 tlbOutcome lookup_outcome = TLB_MISS; 1072 RequestPtr tmp_req = pkt->req; 1073 1074 // Access the TLB and figure out if it's a hit or a miss. 1075 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1076 1077 if (success) { 1078 lookup_outcome = TLB_HIT; 1079 // Put the entry in SenderState 1080 TlbEntry *entry = lookup(tmp_req->getVaddr(), false); 1081 assert(entry); 1082 1083 auto p = sender_state->tc->getProcessPtr(); 1084 sender_state->tlbEntry = 1085 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1086 false, false); 1087 1088 if (update_stats) { 1089 // the reqCnt has an entry per level, so its size tells us 1090 // which level we are in 1091 sender_state->hitLevel = sender_state->reqCnt.size(); 1092 globalNumTLBHits += req_cnt; 1093 } 1094 } else { 1095 if (update_stats) 1096 globalNumTLBMisses += req_cnt; 1097 } 1098 1099 /* 1100 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1101 * as the TLB access latency. 1102 * 1103 * We create and schedule a new TLBEvent which will help us take the 1104 * appropriate actions (e.g., update TLB on a hit, send request to lower 1105 * level TLB on a miss, or start a page walk if this was the last-level 1106 * TLB) 1107 */ 1108 TLBEvent *tlb_event = 1109 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1110 1111 if (translationReturnEvent.count(virt_page_addr)) { 1112 panic("Virtual Page Address %#x already has a return event\n", 1113 virt_page_addr); 1114 } 1115 1116 translationReturnEvent[virt_page_addr] = tlb_event; 1117 assert(tlb_event); 1118 1119 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1120 curTick() + this->ticks(hitLatency)); 1121 1122 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1123 } 1124 1125 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1126 PacketPtr _pkt) 1127 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1128 outcome(tlb_outcome), pkt(_pkt) 1129 { 1130 } 1131 1132 /** 1133 * Do Paging protection checks. If we encounter a page fault, then 1134 * an assertion is fired. 1135 */ 1136 void 1137 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, 1138 TlbEntry * tlb_entry, Mode mode) 1139 { 1140 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1141 uint32_t flags = pkt->req->getFlags(); 1142 bool storeCheck = flags & (StoreCheck << FlagShift); 1143 1144 // Do paging protection checks. 1145 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1146 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1147 1148 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); 1149 1150 if ((inUser && !tlb_entry->user) || 1151 (mode == BaseTLB::Write && badWrite)) { 1152 // The page must have been present to get into the TLB in 1153 // the first place. We'll assume the reserved bits are 1154 // fine even though we're not checking them. 1155 assert(false); 1156 } 1157 1158 if (storeCheck && badWrite) { 1159 // This would fault if this were a write, so return a page 1160 // fault that reflects that happening. 1161 assert(false); 1162 } 1163 } 1164 1165 /** 1166 * handleTranslationReturn is called on a TLB hit, 1167 * when a TLB miss returns or when a page fault returns. 1168 * The latter calls handelHit with TLB miss as tlbOutcome. 1169 */ 1170 void 1171 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1172 PacketPtr pkt) 1173 { 1174 1175 assert(pkt); 1176 Addr vaddr = pkt->req->getVaddr(); 1177 1178 TranslationState *sender_state = 1179 safe_cast<TranslationState*>(pkt->senderState); 1180 1181 ThreadContext *tc = sender_state->tc; 1182 Mode mode = sender_state->tlbMode; 1183 1184 TlbEntry *local_entry, *new_entry; 1185 1186 if (tlb_outcome == TLB_HIT) { 1187 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1188 local_entry = sender_state->tlbEntry; 1189 } else { 1190 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1191 vaddr); 1192 1193 // We are returning either from a page walk or from a hit at a lower 1194 // TLB level. The senderState should be "carrying" a pointer to the 1195 // correct TLBEntry. 1196 new_entry = sender_state->tlbEntry; 1197 assert(new_entry); 1198 local_entry = new_entry; 1199 1200 if (allocationPolicy) { 1201 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1202 virt_page_addr); 1203 1204 local_entry = insert(virt_page_addr, *new_entry); 1205 } 1206 1207 assert(local_entry); 1208 } 1209 1210 /** 1211 * At this point the packet carries an up-to-date tlbEntry pointer 1212 * in its senderState. 1213 * Next step is to do the paging protection checks. 1214 */ 1215 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1216 "while paddr was %#x.\n", local_entry->vaddr, 1217 local_entry->paddr); 1218 1219 pagingProtectionChecks(tc, pkt, local_entry, mode); 1220 int page_size = local_entry->size(); 1221 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1222 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1223 1224 // Since this packet will be sent through the cpu side slave port, 1225 // it must be converted to a response pkt if it is not one already 1226 if (pkt->isRequest()) { 1227 pkt->makeTimingResponse(); 1228 } 1229 1230 pkt->req->setPaddr(paddr); 1231 1232 if (local_entry->uncacheable) { 1233 pkt->req->setFlags(Request::UNCACHEABLE); 1234 } 1235 1236 //send packet back to coalescer 1237 cpuSidePort[0]->sendTimingResp(pkt); 1238 //schedule cleanup event 1239 cleanupQueue.push(virt_page_addr); 1240 1241 // schedule this only once per cycle. 1242 // The check is required because we might have multiple translations 1243 // returning the same cycle 1244 // this is a maximum priority event and must be on the same cycle 1245 // as the cleanup event in TLBCoalescer to avoid a race with 1246 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1247 if (!cleanupEvent.scheduled()) 1248 schedule(cleanupEvent, curTick()); 1249 } 1250 1251 /** 1252 * Here we take the appropriate actions based on the result of the 1253 * TLB lookup. 1254 */ 1255 void 1256 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1257 PacketPtr pkt) 1258 { 1259 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1260 1261 assert(translationReturnEvent[virtPageAddr]); 1262 assert(pkt); 1263 1264 TranslationState *tmp_sender_state = 1265 safe_cast<TranslationState*>(pkt->senderState); 1266 1267 int req_cnt = tmp_sender_state->reqCnt.back(); 1268 bool update_stats = !tmp_sender_state->prefetch; 1269 1270 1271 if (outcome == TLB_HIT) { 1272 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1273 1274 if (update_stats) { 1275 accessCycles += (req_cnt * curTick()); 1276 localCycles += curTick(); 1277 } 1278 1279 } else if (outcome == TLB_MISS) { 1280 1281 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1282 if (update_stats) { 1283 accessCycles += (req_cnt*curTick()); 1284 localCycles += curTick(); 1285 } 1286 1287 if (hasMemSidePort) { 1288 // the one cyle added here represent the delay from when we get 1289 // the reply back till when we propagate it to the coalescer 1290 // above. 1291 if (update_stats) { 1292 accessCycles += (req_cnt * 1); 1293 localCycles += 1; 1294 } 1295 1296 /** 1297 * There is a TLB below. Send the coalesced request. 1298 * We actually send the very first packet of all the 1299 * pending packets for this virtual page address. 1300 */ 1301 if (!memSidePort[0]->sendTimingReq(pkt)) { 1302 DPRINTF(GPUTLB, "Failed sending translation request to " 1303 "lower level TLB for addr %#x\n", virtPageAddr); 1304 1305 memSidePort[0]->retries.push_back(pkt); 1306 } else { 1307 DPRINTF(GPUTLB, "Sent translation request to lower level " 1308 "TLB for addr %#x\n", virtPageAddr); 1309 } 1310 } else { 1311 //this is the last level TLB. Start a page walk 1312 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1313 "addr %#x\n", virtPageAddr); 1314 1315 if (update_stats) 1316 pageTableCycles -= (req_cnt*curTick()); 1317 1318 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; 1319 assert(tlb_event); 1320 tlb_event->updateOutcome(PAGE_WALK); 1321 schedule(tlb_event, curTick() + ticks(missLatency2)); 1322 } 1323 } else if (outcome == PAGE_WALK) { 1324 if (update_stats) 1325 pageTableCycles += (req_cnt*curTick()); 1326 1327 // Need to access the page table and update the TLB 1328 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1329 virtPageAddr); 1330 1331 TranslationState *sender_state = 1332 safe_cast<TranslationState*>(pkt->senderState); 1333 1334 Process *p = sender_state->tc->getProcessPtr(); 1335 Addr vaddr = pkt->req->getVaddr(); 1336 #ifndef NDEBUG 1337 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1338 assert(alignedVaddr == virtPageAddr); 1339 #endif 1340 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr); 1341 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1342 p->fixupStackFault(vaddr)) { 1343 pte = p->pTable->lookup(vaddr); 1344 } 1345 1346 if (pte) { 1347 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1348 pte->paddr); 1349 1350 sender_state->tlbEntry = 1351 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, 1352 false); 1353 } else { 1354 sender_state->tlbEntry = nullptr; 1355 } 1356 1357 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1358 } else if (outcome == MISS_RETURN) { 1359 /** we add an extra cycle in the return path of the translation 1360 * requests in between the various TLB levels. 1361 */ 1362 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1363 } else { 1364 assert(false); 1365 } 1366 } 1367 1368 void 1369 GpuTLB::TLBEvent::process() 1370 { 1371 tlb->translationReturn(virtPageAddr, outcome, pkt); 1372 } 1373 1374 const char* 1375 GpuTLB::TLBEvent::description() const 1376 { 1377 return "trigger translationDoneEvent"; 1378 } 1379 1380 void 1381 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1382 { 1383 outcome = _outcome; 1384 } 1385 1386 Addr 1387 GpuTLB::TLBEvent::getTLBEventVaddr() 1388 { 1389 return virtPageAddr; 1390 } 1391 1392 /* 1393 * recvTiming receives a coalesced timing request from a TLBCoalescer 1394 * and it calls issueTLBLookup() 1395 * It only rejects the packet if we have exceeded the max 1396 * outstanding number of requests for the TLB 1397 */ 1398 bool 1399 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1400 { 1401 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1402 tlb->issueTLBLookup(pkt); 1403 // update number of outstanding translation requests 1404 tlb->outstandingReqs++; 1405 return true; 1406 } else { 1407 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1408 tlb->outstandingReqs); 1409 return false; 1410 } 1411 } 1412 1413 /** 1414 * handleFuncTranslationReturn is called on a TLB hit, 1415 * when a TLB miss returns or when a page fault returns. 1416 * It updates LRU, inserts the TLB entry on a miss 1417 * depending on the allocation policy and does the required 1418 * protection checks. It does NOT create a new packet to 1419 * update the packet's addr; this is done in hsail-gpu code. 1420 */ 1421 void 1422 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1423 { 1424 TranslationState *sender_state = 1425 safe_cast<TranslationState*>(pkt->senderState); 1426 1427 ThreadContext *tc = sender_state->tc; 1428 Mode mode = sender_state->tlbMode; 1429 Addr vaddr = pkt->req->getVaddr(); 1430 1431 TlbEntry *local_entry, *new_entry; 1432 1433 if (tlb_outcome == TLB_HIT) { 1434 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1435 "%#x\n", vaddr); 1436 1437 local_entry = sender_state->tlbEntry; 1438 } else { 1439 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1440 "%#x\n", vaddr); 1441 1442 // We are returning either from a page walk or from a hit at a lower 1443 // TLB level. The senderState should be "carrying" a pointer to the 1444 // correct TLBEntry. 1445 new_entry = sender_state->tlbEntry; 1446 assert(new_entry); 1447 local_entry = new_entry; 1448 1449 if (allocationPolicy) { 1450 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1451 1452 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1453 virt_page_addr); 1454 1455 local_entry = insert(virt_page_addr, *new_entry); 1456 } 1457 1458 assert(local_entry); 1459 } 1460 1461 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1462 "while paddr was %#x.\n", local_entry->vaddr, 1463 local_entry->paddr); 1464 1465 /** 1466 * Do paging checks if it's a normal functional access. If it's for a 1467 * prefetch, then sometimes you can try to prefetch something that 1468 * won't pass protection. We don't actually want to fault becuase there 1469 * is no demand access to deem this a violation. Just put it in the 1470 * TLB and it will fault if indeed a future demand access touches it in 1471 * violation. 1472 * 1473 * This feature could be used to explore security issues around 1474 * speculative memory accesses. 1475 */ 1476 if (!sender_state->prefetch && sender_state->tlbEntry) 1477 pagingProtectionChecks(tc, pkt, local_entry, mode); 1478 1479 int page_size = local_entry->size(); 1480 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1481 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1482 1483 pkt->req->setPaddr(paddr); 1484 1485 if (local_entry->uncacheable) 1486 pkt->req->setFlags(Request::UNCACHEABLE); 1487 } 1488 1489 // This is used for atomic translations. Need to 1490 // make it all happen during the same cycle. 1491 void 1492 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1493 { 1494 TranslationState *sender_state = 1495 safe_cast<TranslationState*>(pkt->senderState); 1496 1497 ThreadContext *tc = sender_state->tc; 1498 bool update_stats = !sender_state->prefetch; 1499 1500 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1501 TheISA::PageBytes); 1502 1503 if (update_stats) 1504 tlb->updatePageFootprint(virt_page_addr); 1505 1506 // do the TLB lookup without updating the stats 1507 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1508 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1509 1510 // functional mode means no coalescing 1511 // global metrics are the same as the local metrics 1512 if (update_stats) { 1513 tlb->globalNumTLBAccesses++; 1514 1515 if (success) { 1516 sender_state->hitLevel = sender_state->reqCnt.size(); 1517 tlb->globalNumTLBHits++; 1518 } 1519 } 1520 1521 if (!success) { 1522 if (update_stats) 1523 tlb->globalNumTLBMisses++; 1524 if (tlb->hasMemSidePort) { 1525 // there is a TLB below -> propagate down the TLB hierarchy 1526 tlb->memSidePort[0]->sendFunctional(pkt); 1527 // If no valid translation from a prefetch, then just return 1528 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1529 return; 1530 } else { 1531 // Need to access the page table and update the TLB 1532 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1533 virt_page_addr); 1534 1535 Process *p = tc->getProcessPtr(); 1536 1537 Addr vaddr = pkt->req->getVaddr(); 1538 #ifndef NDEBUG 1539 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1540 assert(alignedVaddr == virt_page_addr); 1541 #endif 1542 1543 const EmulationPageTable::Entry *pte = 1544 p->pTable->lookup(vaddr); 1545 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1546 p->fixupStackFault(vaddr)) { 1547 pte = p->pTable->lookup(vaddr); 1548 } 1549 1550 if (!sender_state->prefetch) { 1551 // no PageFaults are permitted after 1552 // the second page table lookup 1553 assert(pte); 1554 1555 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1556 pte->paddr); 1557 1558 sender_state->tlbEntry = 1559 new TlbEntry(p->pid(), virt_page_addr, 1560 pte->paddr, false, false); 1561 } else { 1562 // If this was a prefetch, then do the normal thing if it 1563 // was a successful translation. Otherwise, send an empty 1564 // TLB entry back so that it can be figured out as empty and 1565 // handled accordingly. 1566 if (pte) { 1567 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1568 pte->paddr); 1569 1570 sender_state->tlbEntry = 1571 new TlbEntry(p->pid(), virt_page_addr, 1572 pte->paddr, false, false); 1573 } else { 1574 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1575 alignedVaddr); 1576 1577 sender_state->tlbEntry = nullptr; 1578 1579 return; 1580 } 1581 } 1582 } 1583 } else { 1584 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1585 tlb->lookup(pkt->req->getVaddr())); 1586 1587 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), 1588 update_stats); 1589 1590 assert(entry); 1591 1592 auto p = sender_state->tc->getProcessPtr(); 1593 sender_state->tlbEntry = 1594 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1595 false, false); 1596 } 1597 // This is the function that would populate pkt->req with the paddr of 1598 // the translation. But if no translation happens (i.e Prefetch fails) 1599 // then the early returns in the above code wiill keep this function 1600 // from executing. 1601 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1602 } 1603 1604 void 1605 GpuTLB::CpuSidePort::recvReqRetry() 1606 { 1607 // The CPUSidePort never sends anything but replies. No retries 1608 // expected. 1609 assert(false); 1610 } 1611 1612 AddrRangeList 1613 GpuTLB::CpuSidePort::getAddrRanges() const 1614 { 1615 // currently not checked by the master 1616 AddrRangeList ranges; 1617 1618 return ranges; 1619 } 1620 1621 /** 1622 * MemSidePort receives the packet back. 1623 * We need to call the handleTranslationReturn 1624 * and propagate up the hierarchy. 1625 */ 1626 bool 1627 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1628 { 1629 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1630 TheISA::PageBytes); 1631 1632 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1633 virt_page_addr); 1634 1635 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1636 assert(tlb_event); 1637 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1638 1639 tlb_event->updateOutcome(MISS_RETURN); 1640 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1641 1642 return true; 1643 } 1644 1645 void 1646 GpuTLB::MemSidePort::recvReqRetry() 1647 { 1648 // No retries should reach the TLB. The retries 1649 // should only reach the TLBCoalescer. 1650 assert(false); 1651 } 1652 1653 void 1654 GpuTLB::cleanup() 1655 { 1656 while (!cleanupQueue.empty()) { 1657 Addr cleanup_addr = cleanupQueue.front(); 1658 cleanupQueue.pop(); 1659 1660 // delete TLBEvent 1661 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1662 delete old_tlb_event; 1663 translationReturnEvent.erase(cleanup_addr); 1664 1665 // update number of outstanding requests 1666 outstandingReqs--; 1667 } 1668 1669 /** the higher level coalescer should retry if it has 1670 * any pending requests. 1671 */ 1672 for (int i = 0; i < cpuSidePort.size(); ++i) { 1673 cpuSidePort[i]->sendRetryReq(); 1674 } 1675 } 1676 1677 void 1678 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1679 { 1680 1681 std::pair<AccessPatternTable::iterator, bool> ret; 1682 1683 AccessInfo tmp_access_info; 1684 tmp_access_info.lastTimeAccessed = 0; 1685 tmp_access_info.accessesPerPage = 0; 1686 tmp_access_info.totalReuseDistance = 0; 1687 tmp_access_info.sumDistance = 0; 1688 tmp_access_info.meanDistance = 0; 1689 1690 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1691 tmp_access_info)); 1692 1693 bool first_page_access = ret.second; 1694 1695 if (first_page_access) { 1696 numUniquePages++; 1697 } else { 1698 int accessed_before; 1699 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1700 ret.first->second.totalReuseDistance += accessed_before; 1701 } 1702 1703 ret.first->second.accessesPerPage++; 1704 ret.first->second.lastTimeAccessed = curTick(); 1705 1706 if (accessDistance) { 1707 ret.first->second.localTLBAccesses 1708 .push_back(localNumTLBAccesses.value()); 1709 } 1710 } 1711 1712 void 1713 GpuTLB::exitCallback() 1714 { 1715 std::ostream *page_stat_file = nullptr; 1716 1717 if (accessDistance) { 1718 1719 // print per page statistics to a separate file (.csv format) 1720 // simout is the gem5 output directory (default is m5out or the one 1721 // specified with -d 1722 page_stat_file = simout.create(name().c_str())->stream(); 1723 1724 // print header 1725 *page_stat_file << "page,max_access_distance,mean_access_distance, " 1726 << "stddev_distance" << std::endl; 1727 } 1728 1729 // update avg. reuse distance footprint 1730 AccessPatternTable::iterator iter, iter_begin, iter_end; 1731 unsigned int sum_avg_reuse_distance_per_page = 0; 1732 1733 // iterate through all pages seen by this TLB 1734 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1735 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1736 iter->second.accessesPerPage; 1737 1738 if (accessDistance) { 1739 unsigned int tmp = iter->second.localTLBAccesses[0]; 1740 unsigned int prev = tmp; 1741 1742 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1743 if (i) { 1744 tmp = prev + 1; 1745 } 1746 1747 prev = iter->second.localTLBAccesses[i]; 1748 // update the localTLBAccesses value 1749 // with the actual differece 1750 iter->second.localTLBAccesses[i] -= tmp; 1751 // compute the sum of AccessDistance per page 1752 // used later for mean 1753 iter->second.sumDistance += 1754 iter->second.localTLBAccesses[i]; 1755 } 1756 1757 iter->second.meanDistance = 1758 iter->second.sumDistance / iter->second.accessesPerPage; 1759 1760 // compute std_dev and max (we need a second round because we 1761 // need to know the mean value 1762 unsigned int max_distance = 0; 1763 unsigned int stddev_distance = 0; 1764 1765 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1766 unsigned int tmp_access_distance = 1767 iter->second.localTLBAccesses[i]; 1768 1769 if (tmp_access_distance > max_distance) { 1770 max_distance = tmp_access_distance; 1771 } 1772 1773 unsigned int diff = 1774 tmp_access_distance - iter->second.meanDistance; 1775 stddev_distance += pow(diff, 2); 1776 1777 } 1778 1779 stddev_distance = 1780 sqrt(stddev_distance/iter->second.accessesPerPage); 1781 1782 if (page_stat_file) { 1783 *page_stat_file << std::hex << iter->first << ","; 1784 *page_stat_file << std::dec << max_distance << ","; 1785 *page_stat_file << std::dec << iter->second.meanDistance 1786 << ","; 1787 *page_stat_file << std::dec << stddev_distance; 1788 *page_stat_file << std::endl; 1789 } 1790 1791 // erase the localTLBAccesses array 1792 iter->second.localTLBAccesses.clear(); 1793 } 1794 } 1795 1796 if (!TLBFootprint.empty()) { 1797 avgReuseDistance = 1798 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1799 } 1800 1801 //clear the TLBFootprint map 1802 TLBFootprint.clear(); 1803 } 1804} // namespace X86ISA 1805 1806X86ISA::GpuTLB* 1807X86GPUTLBParams::create() 1808{ 1809 return new X86ISA::GpuTLB(this); 1810} 1811 1812