1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/logging.hh" 49#include "base/output.hh" 50#include "base/trace.hh" 51#include "cpu/base.hh" 52#include "cpu/thread_context.hh" 53#include "debug/GPUPrefetch.hh" 54#include "debug/GPUTLB.hh" 55#include "mem/packet_access.hh" 56#include "mem/page_table.hh" 57#include "mem/request.hh" 58#include "sim/process.hh" 59 60namespace X86ISA 61{ 62 63 GpuTLB::GpuTLB(const Params *p) 64 : ClockedObject(p), configAddress(0), size(p->size), 65 cleanupEvent([this]{ cleanup(); }, name(), false, 66 Event::Maximum_Pri), 67 exitEvent([this]{ exitCallback(); }, name()) 68 { 69 assoc = p->assoc; 70 assert(assoc <= size); 71 numSets = size/assoc; 72 allocationPolicy = p->allocationPolicy; 73 hasMemSidePort = false; 74 accessDistance = p->accessDistance; 75 clock = p->clk_domain->clockPeriod(); 76 77 tlb.assign(size, TlbEntry()); 78 79 freeList.resize(numSets); 80 entryList.resize(numSets); 81 82 for (int set = 0; set < numSets; ++set) { 83 for (int way = 0; way < assoc; ++way) { 84 int x = set * assoc + way; 85 freeList[set].push_back(&tlb.at(x)); 86 } 87 } 88 89 FA = (size == assoc); 90 91 /** 92 * @warning: the set-associative version assumes you have a 93 * fixed page size of 4KB. 94 * If the page size is greather than 4KB (as defined in the 95 * TheISA::PageBytes), then there are various issues w/ the current 96 * implementation (you'd have the same 8KB page being replicated in 97 * different sets etc) 98 */ 99 setMask = numSets - 1; 100 101 maxCoalescedReqs = p->maxOutstandingReqs; 102 103 // Do not allow maxCoalescedReqs to be more than the TLB associativity 104 if (maxCoalescedReqs > assoc) { 105 maxCoalescedReqs = assoc; 106 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 107 } 108 109 outstandingReqs = 0; 110 hitLatency = p->hitLatency; 111 missLatency1 = p->missLatency1; 112 missLatency2 = p->missLatency2; 113 114 // create the slave ports based on the number of connected ports 115 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 116 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 117 name(), i), this, i)); 118 } 119 120 // create the master ports based on the number of connected ports 121 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 122 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 123 name(), i), this, i)); 124 } 125 } 126 127 // fixme: this is never called? 128 GpuTLB::~GpuTLB() 129 { 130 // make sure all the hash-maps are empty 131 assert(translationReturnEvent.empty()); 132 } 133 134 Port & 135 GpuTLB::getPort(const std::string &if_name, PortID idx) 136 { 137 if (if_name == "slave") { 138 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 139 panic("TLBCoalescer::getPort: unknown index %d\n", idx); 140 } 141 142 return *cpuSidePort[idx]; 143 } else if (if_name == "master") { 144 if (idx >= static_cast<PortID>(memSidePort.size())) { 145 panic("TLBCoalescer::getPort: unknown index %d\n", idx); 146 } 147 148 hasMemSidePort = true; 149 150 return *memSidePort[idx]; 151 } else { 152 panic("TLBCoalescer::getPort: unknown port %s\n", if_name); 153 } 154 } 155 156 TlbEntry* 157 GpuTLB::insert(Addr vpn, TlbEntry &entry) 158 { 159 TlbEntry *newEntry = nullptr; 160 161 /** 162 * vpn holds the virtual page address 163 * The least significant bits are simply masked 164 */ 165 int set = (vpn >> TheISA::PageShift) & setMask; 166 167 if (!freeList[set].empty()) { 168 newEntry = freeList[set].front(); 169 freeList[set].pop_front(); 170 } else { 171 newEntry = entryList[set].back(); 172 entryList[set].pop_back(); 173 } 174 175 *newEntry = entry; 176 newEntry->vaddr = vpn; 177 entryList[set].push_front(newEntry); 178 179 return newEntry; 180 } 181 182 GpuTLB::EntryList::iterator 183 GpuTLB::lookupIt(Addr va, bool update_lru) 184 { 185 int set = (va >> TheISA::PageShift) & setMask; 186 187 if (FA) { 188 assert(!set); 189 } 190 191 auto entry = entryList[set].begin(); 192 for (; entry != entryList[set].end(); ++entry) { 193 int page_size = (*entry)->size(); 194 195 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { 196 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 197 "with size %#x.\n", va, (*entry)->vaddr, page_size); 198 199 if (update_lru) { 200 entryList[set].push_front(*entry); 201 entryList[set].erase(entry); 202 entry = entryList[set].begin(); 203 } 204 205 break; 206 } 207 } 208 209 return entry; 210 } 211 212 TlbEntry* 213 GpuTLB::lookup(Addr va, bool update_lru) 214 { 215 int set = (va >> TheISA::PageShift) & setMask; 216 217 auto entry = lookupIt(va, update_lru); 218 219 if (entry == entryList[set].end()) 220 return nullptr; 221 else 222 return *entry; 223 } 224 225 void 226 GpuTLB::invalidateAll() 227 { 228 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 229 230 for (int i = 0; i < numSets; ++i) { 231 while (!entryList[i].empty()) { 232 TlbEntry *entry = entryList[i].front(); 233 entryList[i].pop_front(); 234 freeList[i].push_back(entry); 235 } 236 } 237 } 238 239 void 240 GpuTLB::setConfigAddress(uint32_t addr) 241 { 242 configAddress = addr; 243 } 244 245 void 246 GpuTLB::invalidateNonGlobal() 247 { 248 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 249 250 for (int i = 0; i < numSets; ++i) { 251 for (auto entryIt = entryList[i].begin(); 252 entryIt != entryList[i].end();) { 253 if (!(*entryIt)->global) { 254 freeList[i].push_back(*entryIt); 255 entryList[i].erase(entryIt++); 256 } else { 257 ++entryIt; 258 } 259 } 260 } 261 } 262 263 void 264 GpuTLB::demapPage(Addr va, uint64_t asn) 265 { 266 267 int set = (va >> TheISA::PageShift) & setMask; 268 auto entry = lookupIt(va, false); 269 270 if (entry != entryList[set].end()) { 271 freeList[set].push_back(*entry); 272 entryList[set].erase(entry); 273 } 274 } 275 276 Fault 277 GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc) 278 { 279 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 280 Addr vaddr = req->getVaddr(); 281 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 282 283 if (prefix == IntAddrPrefixCPUID) { 284 panic("CPUID memory space not yet implemented!\n"); 285 } else if (prefix == IntAddrPrefixMSR) { 286 vaddr = vaddr >> 3; 287 req->setFlags(Request::MMAPPED_IPR); 288 Addr regNum = 0; 289 290 switch (vaddr & ~IntAddrPrefixMask) { 291 case 0x10: 292 regNum = MISCREG_TSC; 293 break; 294 case 0x1B: 295 regNum = MISCREG_APIC_BASE; 296 break; 297 case 0xFE: 298 regNum = MISCREG_MTRRCAP; 299 break; 300 case 0x174: 301 regNum = MISCREG_SYSENTER_CS; 302 break; 303 case 0x175: 304 regNum = MISCREG_SYSENTER_ESP; 305 break; 306 case 0x176: 307 regNum = MISCREG_SYSENTER_EIP; 308 break; 309 case 0x179: 310 regNum = MISCREG_MCG_CAP; 311 break; 312 case 0x17A: 313 regNum = MISCREG_MCG_STATUS; 314 break; 315 case 0x17B: 316 regNum = MISCREG_MCG_CTL; 317 break; 318 case 0x1D9: 319 regNum = MISCREG_DEBUG_CTL_MSR; 320 break; 321 case 0x1DB: 322 regNum = MISCREG_LAST_BRANCH_FROM_IP; 323 break; 324 case 0x1DC: 325 regNum = MISCREG_LAST_BRANCH_TO_IP; 326 break; 327 case 0x1DD: 328 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 329 break; 330 case 0x1DE: 331 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 332 break; 333 case 0x200: 334 regNum = MISCREG_MTRR_PHYS_BASE_0; 335 break; 336 case 0x201: 337 regNum = MISCREG_MTRR_PHYS_MASK_0; 338 break; 339 case 0x202: 340 regNum = MISCREG_MTRR_PHYS_BASE_1; 341 break; 342 case 0x203: 343 regNum = MISCREG_MTRR_PHYS_MASK_1; 344 break; 345 case 0x204: 346 regNum = MISCREG_MTRR_PHYS_BASE_2; 347 break; 348 case 0x205: 349 regNum = MISCREG_MTRR_PHYS_MASK_2; 350 break; 351 case 0x206: 352 regNum = MISCREG_MTRR_PHYS_BASE_3; 353 break; 354 case 0x207: 355 regNum = MISCREG_MTRR_PHYS_MASK_3; 356 break; 357 case 0x208: 358 regNum = MISCREG_MTRR_PHYS_BASE_4; 359 break; 360 case 0x209: 361 regNum = MISCREG_MTRR_PHYS_MASK_4; 362 break; 363 case 0x20A: 364 regNum = MISCREG_MTRR_PHYS_BASE_5; 365 break; 366 case 0x20B: 367 regNum = MISCREG_MTRR_PHYS_MASK_5; 368 break; 369 case 0x20C: 370 regNum = MISCREG_MTRR_PHYS_BASE_6; 371 break; 372 case 0x20D: 373 regNum = MISCREG_MTRR_PHYS_MASK_6; 374 break; 375 case 0x20E: 376 regNum = MISCREG_MTRR_PHYS_BASE_7; 377 break; 378 case 0x20F: 379 regNum = MISCREG_MTRR_PHYS_MASK_7; 380 break; 381 case 0x250: 382 regNum = MISCREG_MTRR_FIX_64K_00000; 383 break; 384 case 0x258: 385 regNum = MISCREG_MTRR_FIX_16K_80000; 386 break; 387 case 0x259: 388 regNum = MISCREG_MTRR_FIX_16K_A0000; 389 break; 390 case 0x268: 391 regNum = MISCREG_MTRR_FIX_4K_C0000; 392 break; 393 case 0x269: 394 regNum = MISCREG_MTRR_FIX_4K_C8000; 395 break; 396 case 0x26A: 397 regNum = MISCREG_MTRR_FIX_4K_D0000; 398 break; 399 case 0x26B: 400 regNum = MISCREG_MTRR_FIX_4K_D8000; 401 break; 402 case 0x26C: 403 regNum = MISCREG_MTRR_FIX_4K_E0000; 404 break; 405 case 0x26D: 406 regNum = MISCREG_MTRR_FIX_4K_E8000; 407 break; 408 case 0x26E: 409 regNum = MISCREG_MTRR_FIX_4K_F0000; 410 break; 411 case 0x26F: 412 regNum = MISCREG_MTRR_FIX_4K_F8000; 413 break; 414 case 0x277: 415 regNum = MISCREG_PAT; 416 break; 417 case 0x2FF: 418 regNum = MISCREG_DEF_TYPE; 419 break; 420 case 0x400: 421 regNum = MISCREG_MC0_CTL; 422 break; 423 case 0x404: 424 regNum = MISCREG_MC1_CTL; 425 break; 426 case 0x408: 427 regNum = MISCREG_MC2_CTL; 428 break; 429 case 0x40C: 430 regNum = MISCREG_MC3_CTL; 431 break; 432 case 0x410: 433 regNum = MISCREG_MC4_CTL; 434 break; 435 case 0x414: 436 regNum = MISCREG_MC5_CTL; 437 break; 438 case 0x418: 439 regNum = MISCREG_MC6_CTL; 440 break; 441 case 0x41C: 442 regNum = MISCREG_MC7_CTL; 443 break; 444 case 0x401: 445 regNum = MISCREG_MC0_STATUS; 446 break; 447 case 0x405: 448 regNum = MISCREG_MC1_STATUS; 449 break; 450 case 0x409: 451 regNum = MISCREG_MC2_STATUS; 452 break; 453 case 0x40D: 454 regNum = MISCREG_MC3_STATUS; 455 break; 456 case 0x411: 457 regNum = MISCREG_MC4_STATUS; 458 break; 459 case 0x415: 460 regNum = MISCREG_MC5_STATUS; 461 break; 462 case 0x419: 463 regNum = MISCREG_MC6_STATUS; 464 break; 465 case 0x41D: 466 regNum = MISCREG_MC7_STATUS; 467 break; 468 case 0x402: 469 regNum = MISCREG_MC0_ADDR; 470 break; 471 case 0x406: 472 regNum = MISCREG_MC1_ADDR; 473 break; 474 case 0x40A: 475 regNum = MISCREG_MC2_ADDR; 476 break; 477 case 0x40E: 478 regNum = MISCREG_MC3_ADDR; 479 break; 480 case 0x412: 481 regNum = MISCREG_MC4_ADDR; 482 break; 483 case 0x416: 484 regNum = MISCREG_MC5_ADDR; 485 break; 486 case 0x41A: 487 regNum = MISCREG_MC6_ADDR; 488 break; 489 case 0x41E: 490 regNum = MISCREG_MC7_ADDR; 491 break; 492 case 0x403: 493 regNum = MISCREG_MC0_MISC; 494 break; 495 case 0x407: 496 regNum = MISCREG_MC1_MISC; 497 break; 498 case 0x40B: 499 regNum = MISCREG_MC2_MISC; 500 break; 501 case 0x40F: 502 regNum = MISCREG_MC3_MISC; 503 break; 504 case 0x413: 505 regNum = MISCREG_MC4_MISC; 506 break; 507 case 0x417: 508 regNum = MISCREG_MC5_MISC; 509 break; 510 case 0x41B: 511 regNum = MISCREG_MC6_MISC; 512 break; 513 case 0x41F: 514 regNum = MISCREG_MC7_MISC; 515 break; 516 case 0xC0000080: 517 regNum = MISCREG_EFER; 518 break; 519 case 0xC0000081: 520 regNum = MISCREG_STAR; 521 break; 522 case 0xC0000082: 523 regNum = MISCREG_LSTAR; 524 break; 525 case 0xC0000083: 526 regNum = MISCREG_CSTAR; 527 break; 528 case 0xC0000084: 529 regNum = MISCREG_SF_MASK; 530 break; 531 case 0xC0000100: 532 regNum = MISCREG_FS_BASE; 533 break; 534 case 0xC0000101: 535 regNum = MISCREG_GS_BASE; 536 break; 537 case 0xC0000102: 538 regNum = MISCREG_KERNEL_GS_BASE; 539 break; 540 case 0xC0000103: 541 regNum = MISCREG_TSC_AUX; 542 break; 543 case 0xC0010000: 544 regNum = MISCREG_PERF_EVT_SEL0; 545 break; 546 case 0xC0010001: 547 regNum = MISCREG_PERF_EVT_SEL1; 548 break; 549 case 0xC0010002: 550 regNum = MISCREG_PERF_EVT_SEL2; 551 break; 552 case 0xC0010003: 553 regNum = MISCREG_PERF_EVT_SEL3; 554 break; 555 case 0xC0010004: 556 regNum = MISCREG_PERF_EVT_CTR0; 557 break; 558 case 0xC0010005: 559 regNum = MISCREG_PERF_EVT_CTR1; 560 break; 561 case 0xC0010006: 562 regNum = MISCREG_PERF_EVT_CTR2; 563 break; 564 case 0xC0010007: 565 regNum = MISCREG_PERF_EVT_CTR3; 566 break; 567 case 0xC0010010: 568 regNum = MISCREG_SYSCFG; 569 break; 570 case 0xC0010016: 571 regNum = MISCREG_IORR_BASE0; 572 break; 573 case 0xC0010017: 574 regNum = MISCREG_IORR_BASE1; 575 break; 576 case 0xC0010018: 577 regNum = MISCREG_IORR_MASK0; 578 break; 579 case 0xC0010019: 580 regNum = MISCREG_IORR_MASK1; 581 break; 582 case 0xC001001A: 583 regNum = MISCREG_TOP_MEM; 584 break; 585 case 0xC001001D: 586 regNum = MISCREG_TOP_MEM2; 587 break; 588 case 0xC0010114: 589 regNum = MISCREG_VM_CR; 590 break; 591 case 0xC0010115: 592 regNum = MISCREG_IGNNE; 593 break; 594 case 0xC0010116: 595 regNum = MISCREG_SMM_CTL; 596 break; 597 case 0xC0010117: 598 regNum = MISCREG_VM_HSAVE_PA; 599 break; 600 default: 601 return std::make_shared<GeneralProtection>(0); 602 } 603 //The index is multiplied by the size of a MiscReg so that 604 //any memory dependence calculations will not see these as 605 //overlapping. 606 req->setPaddr(regNum * sizeof(RegVal)); 607 return NoFault; 608 } else if (prefix == IntAddrPrefixIO) { 609 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 610 // bitmap in the TSS. 611 612 Addr IOPort = vaddr & ~IntAddrPrefixMask; 613 // Make sure the address fits in the expected 16 bit IO address 614 // space. 615 assert(!(IOPort & ~0xFFFF)); 616 617 if (IOPort == 0xCF8 && req->getSize() == 4) { 618 req->setFlags(Request::MMAPPED_IPR); 619 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal)); 620 } else if ((IOPort & ~mask(2)) == 0xCFC) { 621 req->setFlags(Request::UNCACHEABLE); 622 623 Addr configAddress = 624 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 625 626 if (bits(configAddress, 31, 31)) { 627 req->setPaddr(PhysAddrPrefixPciConfig | 628 mbits(configAddress, 30, 2) | 629 (IOPort & mask(2))); 630 } else { 631 req->setPaddr(PhysAddrPrefixIO | IOPort); 632 } 633 } else { 634 req->setFlags(Request::UNCACHEABLE); 635 req->setPaddr(PhysAddrPrefixIO | IOPort); 636 } 637 return NoFault; 638 } else { 639 panic("Access to unrecognized internal address space %#x.\n", 640 prefix); 641 } 642 } 643 644 /** 645 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 646 * and false on a TLB miss. 647 * Many of the checks about different modes have been converted to 648 * assertions, since these parts of the code are not really used. 649 * On a hit it will update the LRU stack. 650 */ 651 bool 652 GpuTLB::tlbLookup(const RequestPtr &req, 653 ThreadContext *tc, bool update_stats) 654 { 655 bool tlb_hit = false; 656 #ifndef NDEBUG 657 uint32_t flags = req->getFlags(); 658 int seg = flags & SegmentFlagMask; 659 #endif 660 661 assert(seg != SEGMENT_REG_MS); 662 Addr vaddr = req->getVaddr(); 663 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 664 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 665 666 if (m5Reg.prot) { 667 DPRINTF(GPUTLB, "In protected mode.\n"); 668 // make sure we are in 64-bit mode 669 assert(m5Reg.mode == LongMode); 670 671 // If paging is enabled, do the translation. 672 if (m5Reg.paging) { 673 DPRINTF(GPUTLB, "Paging enabled.\n"); 674 //update LRU stack on a hit 675 TlbEntry *entry = lookup(vaddr, true); 676 677 if (entry) 678 tlb_hit = true; 679 680 if (!update_stats) { 681 // functional tlb access for memory initialization 682 // i.e., memory seeding or instr. seeding -> don't update 683 // TLB and stats 684 return tlb_hit; 685 } 686 687 localNumTLBAccesses++; 688 689 if (!entry) { 690 localNumTLBMisses++; 691 } else { 692 localNumTLBHits++; 693 } 694 } 695 } 696 697 return tlb_hit; 698 } 699 700 Fault 701 GpuTLB::translate(const RequestPtr &req, ThreadContext *tc, 702 Translation *translation, Mode mode, 703 bool &delayedResponse, bool timing, int &latency) 704 { 705 uint32_t flags = req->getFlags(); 706 int seg = flags & SegmentFlagMask; 707 bool storeCheck = flags & (StoreCheck << FlagShift); 708 709 // If this is true, we're dealing with a request 710 // to a non-memory address space. 711 if (seg == SEGMENT_REG_MS) { 712 return translateInt(req, tc); 713 } 714 715 delayedResponse = false; 716 Addr vaddr = req->getVaddr(); 717 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 718 719 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 720 721 // If protected mode has been enabled... 722 if (m5Reg.prot) { 723 DPRINTF(GPUTLB, "In protected mode.\n"); 724 // If we're not in 64-bit mode, do protection/limit checks 725 if (m5Reg.mode != LongMode) { 726 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 727 "protection.\n"); 728 729 // Check for a null segment selector. 730 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || 731 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) 732 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 733 return std::make_shared<GeneralProtection>(0); 734 } 735 736 bool expandDown = false; 737 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 738 739 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 740 if (!attr.writable && (mode == BaseTLB::Write || 741 storeCheck)) 742 return std::make_shared<GeneralProtection>(0); 743 744 if (!attr.readable && mode == BaseTLB::Read) 745 return std::make_shared<GeneralProtection>(0); 746 747 expandDown = attr.expandDown; 748 749 } 750 751 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 752 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 753 // This assumes we're not in 64 bit mode. If we were, the 754 // default address size is 64 bits, overridable to 32. 755 int size = 32; 756 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 757 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 758 759 if ((csAttr.defaultSize && sizeOverride) || 760 (!csAttr.defaultSize && !sizeOverride)) { 761 size = 16; 762 } 763 764 Addr offset = bits(vaddr - base, size - 1, 0); 765 Addr endOffset = offset + req->getSize() - 1; 766 767 if (expandDown) { 768 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 769 warn_once("Expand down segments are untested.\n"); 770 771 if (offset <= limit || endOffset <= limit) 772 return std::make_shared<GeneralProtection>(0); 773 } else { 774 if (offset > limit || endOffset > limit) 775 return std::make_shared<GeneralProtection>(0); 776 } 777 } 778 779 // If paging is enabled, do the translation. 780 if (m5Reg.paging) { 781 DPRINTF(GPUTLB, "Paging enabled.\n"); 782 // The vaddr already has the segment base applied. 783 TlbEntry *entry = lookup(vaddr); 784 localNumTLBAccesses++; 785 786 if (!entry) { 787 localNumTLBMisses++; 788 if (timing) { 789 latency = missLatency1; 790 } 791 792 if (FullSystem) { 793 fatal("GpuTLB doesn't support full-system mode\n"); 794 } else { 795 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 796 "at pc %#x.\n", vaddr, tc->instAddr()); 797 798 Process *p = tc->getProcessPtr(); 799 const EmulationPageTable::Entry *pte = 800 p->pTable->lookup(vaddr); 801 802 if (!pte && mode != BaseTLB::Execute) { 803 // penalize a "page fault" more 804 if (timing) 805 latency += missLatency2; 806 807 if (p->fixupStackFault(vaddr)) 808 pte = p->pTable->lookup(vaddr); 809 } 810 811 if (!pte) { 812 return std::make_shared<PageFault>(vaddr, true, 813 mode, true, 814 false); 815 } else { 816 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 817 818 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 819 alignedVaddr, pte->paddr); 820 821 TlbEntry gpuEntry(p->pid(), alignedVaddr, 822 pte->paddr, false, false); 823 entry = insert(alignedVaddr, gpuEntry); 824 } 825 826 DPRINTF(GPUTLB, "Miss was serviced.\n"); 827 } 828 } else { 829 localNumTLBHits++; 830 831 if (timing) { 832 latency = hitLatency; 833 } 834 } 835 836 // Do paging protection checks. 837 bool inUser = (m5Reg.cpl == 3 && 838 !(flags & (CPL0FlagBit << FlagShift))); 839 840 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 841 bool badWrite = (!entry->writable && (inUser || cr0.wp)); 842 843 if ((inUser && !entry->user) || (mode == BaseTLB::Write && 844 badWrite)) { 845 // The page must have been present to get into the TLB in 846 // the first place. We'll assume the reserved bits are 847 // fine even though we're not checking them. 848 return std::make_shared<PageFault>(vaddr, true, mode, 849 inUser, false); 850 } 851 852 if (storeCheck && badWrite) { 853 // This would fault if this were a write, so return a page 854 // fault that reflects that happening. 855 return std::make_shared<PageFault>(vaddr, true, 856 BaseTLB::Write, 857 inUser, false); 858 } 859 860 861 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 862 "checks.\n", entry->paddr); 863 864 int page_size = entry->size(); 865 Addr paddr = entry->paddr | (vaddr & (page_size - 1)); 866 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 867 req->setPaddr(paddr); 868 869 if (entry->uncacheable) 870 req->setFlags(Request::UNCACHEABLE); 871 } else { 872 //Use the address which already has segmentation applied. 873 DPRINTF(GPUTLB, "Paging disabled.\n"); 874 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 875 req->setPaddr(vaddr); 876 } 877 } else { 878 // Real mode 879 DPRINTF(GPUTLB, "In real mode.\n"); 880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 881 req->setPaddr(vaddr); 882 } 883 884 // Check for an access to the local APIC 885 if (FullSystem) { 886 LocalApicBase localApicBase = 887 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 888 889 Addr baseAddr = localApicBase.base * PageBytes; 890 Addr paddr = req->getPaddr(); 891 892 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 893 // Force the access to be uncacheable. 894 req->setFlags(Request::UNCACHEABLE); 895 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 896 paddr - baseAddr)); 897 } 898 } 899 900 return NoFault; 901 }; 902 903 Fault 904 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc, 905 Mode mode, int &latency) 906 { 907 bool delayedResponse; 908 909 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 910 latency); 911 } 912 913 void 914 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc, 915 Translation *translation, Mode mode, int &latency) 916 { 917 bool delayedResponse; 918 assert(translation); 919 920 Fault fault = GpuTLB::translate(req, tc, translation, mode, 921 delayedResponse, true, latency); 922 923 if (!delayedResponse) 924 translation->finish(fault, req, tc, mode); 925 } 926 927 Walker* 928 GpuTLB::getWalker() 929 { 930 return walker; 931 } 932 933 934 void 935 GpuTLB::serialize(CheckpointOut &cp) const 936 { 937 } 938 939 void 940 GpuTLB::unserialize(CheckpointIn &cp) 941 { 942 } 943 944 void 945 GpuTLB::regStats() 946 { 947 ClockedObject::regStats(); 948 949 localNumTLBAccesses 950 .name(name() + ".local_TLB_accesses") 951 .desc("Number of TLB accesses") 952 ; 953 954 localNumTLBHits 955 .name(name() + ".local_TLB_hits") 956 .desc("Number of TLB hits") 957 ; 958 959 localNumTLBMisses 960 .name(name() + ".local_TLB_misses") 961 .desc("Number of TLB misses") 962 ; 963 964 localTLBMissRate 965 .name(name() + ".local_TLB_miss_rate") 966 .desc("TLB miss rate") 967 ; 968 969 accessCycles 970 .name(name() + ".access_cycles") 971 .desc("Cycles spent accessing this TLB level") 972 ; 973 974 pageTableCycles 975 .name(name() + ".page_table_cycles") 976 .desc("Cycles spent accessing the page table") 977 ; 978 979 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 980 981 numUniquePages 982 .name(name() + ".unique_pages") 983 .desc("Number of unique pages touched") 984 ; 985 986 localCycles 987 .name(name() + ".local_cycles") 988 .desc("Number of cycles spent in queue for all incoming reqs") 989 ; 990 991 localLatency 992 .name(name() + ".local_latency") 993 .desc("Avg. latency over incoming coalesced reqs") 994 ; 995 996 localLatency = localCycles / localNumTLBAccesses; 997 998 globalNumTLBAccesses 999 .name(name() + ".global_TLB_accesses") 1000 .desc("Number of TLB accesses") 1001 ; 1002 1003 globalNumTLBHits 1004 .name(name() + ".global_TLB_hits") 1005 .desc("Number of TLB hits") 1006 ; 1007 1008 globalNumTLBMisses 1009 .name(name() + ".global_TLB_misses") 1010 .desc("Number of TLB misses") 1011 ; 1012 1013 globalTLBMissRate 1014 .name(name() + ".global_TLB_miss_rate") 1015 .desc("TLB miss rate") 1016 ; 1017 1018 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1019 1020 avgReuseDistance 1021 .name(name() + ".avg_reuse_distance") 1022 .desc("avg. reuse distance over all pages (in ticks)") 1023 ; 1024 1025 } 1026 1027 /** 1028 * Do the TLB lookup for this coalesced request and schedule 1029 * another event <TLB access latency> cycles later. 1030 */ 1031 1032 void 1033 GpuTLB::issueTLBLookup(PacketPtr pkt) 1034 { 1035 assert(pkt); 1036 assert(pkt->senderState); 1037 1038 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1039 TheISA::PageBytes); 1040 1041 TranslationState *sender_state = 1042 safe_cast<TranslationState*>(pkt->senderState); 1043 1044 bool update_stats = !sender_state->prefetch; 1045 ThreadContext * tmp_tc = sender_state->tc; 1046 1047 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1048 virt_page_addr); 1049 1050 int req_cnt = sender_state->reqCnt.back(); 1051 1052 if (update_stats) { 1053 accessCycles -= (curTick() * req_cnt); 1054 localCycles -= curTick(); 1055 updatePageFootprint(virt_page_addr); 1056 globalNumTLBAccesses += req_cnt; 1057 } 1058 1059 tlbOutcome lookup_outcome = TLB_MISS; 1060 const RequestPtr &tmp_req = pkt->req; 1061 1062 // Access the TLB and figure out if it's a hit or a miss. 1063 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1064 1065 if (success) { 1066 lookup_outcome = TLB_HIT; 1067 // Put the entry in SenderState 1068 TlbEntry *entry = lookup(tmp_req->getVaddr(), false); 1069 assert(entry); 1070 1071 auto p = sender_state->tc->getProcessPtr(); 1072 sender_state->tlbEntry = 1073 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1074 false, false); 1075 1076 if (update_stats) { 1077 // the reqCnt has an entry per level, so its size tells us 1078 // which level we are in 1079 sender_state->hitLevel = sender_state->reqCnt.size(); 1080 globalNumTLBHits += req_cnt; 1081 } 1082 } else { 1083 if (update_stats) 1084 globalNumTLBMisses += req_cnt; 1085 } 1086 1087 /* 1088 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1089 * as the TLB access latency. 1090 * 1091 * We create and schedule a new TLBEvent which will help us take the 1092 * appropriate actions (e.g., update TLB on a hit, send request to lower 1093 * level TLB on a miss, or start a page walk if this was the last-level 1094 * TLB) 1095 */ 1096 TLBEvent *tlb_event = 1097 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1098 1099 if (translationReturnEvent.count(virt_page_addr)) { 1100 panic("Virtual Page Address %#x already has a return event\n", 1101 virt_page_addr); 1102 } 1103 1104 translationReturnEvent[virt_page_addr] = tlb_event; 1105 assert(tlb_event); 1106 1107 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1108 curTick() + this->ticks(hitLatency)); 1109 1110 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1111 } 1112 1113 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1114 PacketPtr _pkt) 1115 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1116 outcome(tlb_outcome), pkt(_pkt) 1117 { 1118 } 1119 1120 /** 1121 * Do Paging protection checks. If we encounter a page fault, then 1122 * an assertion is fired. 1123 */ 1124 void 1125 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, 1126 TlbEntry * tlb_entry, Mode mode) 1127 { 1128 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1129 uint32_t flags = pkt->req->getFlags(); 1130 bool storeCheck = flags & (StoreCheck << FlagShift); 1131 1132 // Do paging protection checks. 1133 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1134 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1135 1136 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); 1137 1138 if ((inUser && !tlb_entry->user) || 1139 (mode == BaseTLB::Write && badWrite)) { 1140 // The page must have been present to get into the TLB in 1141 // the first place. We'll assume the reserved bits are 1142 // fine even though we're not checking them. 1143 panic("Page fault detected"); 1144 } 1145 1146 if (storeCheck && badWrite) { 1147 // This would fault if this were a write, so return a page 1148 // fault that reflects that happening. 1149 panic("Page fault detected"); 1150 } 1151 } 1152 1153 /** 1154 * handleTranslationReturn is called on a TLB hit, 1155 * when a TLB miss returns or when a page fault returns. 1156 * The latter calls handelHit with TLB miss as tlbOutcome. 1157 */ 1158 void 1159 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1160 PacketPtr pkt) 1161 { 1162 1163 assert(pkt); 1164 Addr vaddr = pkt->req->getVaddr(); 1165 1166 TranslationState *sender_state = 1167 safe_cast<TranslationState*>(pkt->senderState); 1168 1169 ThreadContext *tc = sender_state->tc; 1170 Mode mode = sender_state->tlbMode; 1171 1172 TlbEntry *local_entry, *new_entry; 1173 1174 if (tlb_outcome == TLB_HIT) { 1175 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1176 local_entry = sender_state->tlbEntry; 1177 } else { 1178 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1179 vaddr); 1180 1181 // We are returning either from a page walk or from a hit at a lower 1182 // TLB level. The senderState should be "carrying" a pointer to the 1183 // correct TLBEntry. 1184 new_entry = sender_state->tlbEntry; 1185 assert(new_entry); 1186 local_entry = new_entry; 1187 1188 if (allocationPolicy) { 1189 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1190 virt_page_addr); 1191 1192 local_entry = insert(virt_page_addr, *new_entry); 1193 } 1194 1195 assert(local_entry); 1196 } 1197 1198 /** 1199 * At this point the packet carries an up-to-date tlbEntry pointer 1200 * in its senderState. 1201 * Next step is to do the paging protection checks. 1202 */ 1203 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1204 "while paddr was %#x.\n", local_entry->vaddr, 1205 local_entry->paddr); 1206 1207 pagingProtectionChecks(tc, pkt, local_entry, mode); 1208 int page_size = local_entry->size(); 1209 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1210 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1211 1212 // Since this packet will be sent through the cpu side slave port, 1213 // it must be converted to a response pkt if it is not one already 1214 if (pkt->isRequest()) { 1215 pkt->makeTimingResponse(); 1216 } 1217 1218 pkt->req->setPaddr(paddr); 1219 1220 if (local_entry->uncacheable) { 1221 pkt->req->setFlags(Request::UNCACHEABLE); 1222 } 1223 1224 //send packet back to coalescer 1225 cpuSidePort[0]->sendTimingResp(pkt); 1226 //schedule cleanup event 1227 cleanupQueue.push(virt_page_addr); 1228 1229 // schedule this only once per cycle. 1230 // The check is required because we might have multiple translations 1231 // returning the same cycle 1232 // this is a maximum priority event and must be on the same cycle 1233 // as the cleanup event in TLBCoalescer to avoid a race with 1234 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1235 if (!cleanupEvent.scheduled()) 1236 schedule(cleanupEvent, curTick()); 1237 } 1238 1239 /** 1240 * Here we take the appropriate actions based on the result of the 1241 * TLB lookup. 1242 */ 1243 void 1244 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1245 PacketPtr pkt) 1246 { 1247 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1248 1249 assert(translationReturnEvent[virtPageAddr]); 1250 assert(pkt); 1251 1252 TranslationState *tmp_sender_state = 1253 safe_cast<TranslationState*>(pkt->senderState); 1254 1255 int req_cnt = tmp_sender_state->reqCnt.back(); 1256 bool update_stats = !tmp_sender_state->prefetch; 1257 1258 1259 if (outcome == TLB_HIT) { 1260 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1261 1262 if (update_stats) { 1263 accessCycles += (req_cnt * curTick()); 1264 localCycles += curTick(); 1265 } 1266 1267 } else if (outcome == TLB_MISS) { 1268 1269 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1270 if (update_stats) { 1271 accessCycles += (req_cnt*curTick()); 1272 localCycles += curTick(); 1273 } 1274 1275 if (hasMemSidePort) { 1276 // the one cyle added here represent the delay from when we get 1277 // the reply back till when we propagate it to the coalescer 1278 // above. 1279 if (update_stats) { 1280 accessCycles += (req_cnt * 1); 1281 localCycles += 1; 1282 } 1283 1284 /** 1285 * There is a TLB below. Send the coalesced request. 1286 * We actually send the very first packet of all the 1287 * pending packets for this virtual page address. 1288 */ 1289 if (!memSidePort[0]->sendTimingReq(pkt)) { 1290 DPRINTF(GPUTLB, "Failed sending translation request to " 1291 "lower level TLB for addr %#x\n", virtPageAddr); 1292 1293 memSidePort[0]->retries.push_back(pkt); 1294 } else { 1295 DPRINTF(GPUTLB, "Sent translation request to lower level " 1296 "TLB for addr %#x\n", virtPageAddr); 1297 } 1298 } else { 1299 //this is the last level TLB. Start a page walk 1300 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1301 "addr %#x\n", virtPageAddr); 1302 1303 if (update_stats) 1304 pageTableCycles -= (req_cnt*curTick()); 1305 1306 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; 1307 assert(tlb_event); 1308 tlb_event->updateOutcome(PAGE_WALK); 1309 schedule(tlb_event, curTick() + ticks(missLatency2)); 1310 } 1311 } else if (outcome == PAGE_WALK) { 1312 if (update_stats) 1313 pageTableCycles += (req_cnt*curTick()); 1314 1315 // Need to access the page table and update the TLB 1316 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1317 virtPageAddr); 1318 1319 TranslationState *sender_state = 1320 safe_cast<TranslationState*>(pkt->senderState); 1321 1322 Process *p = sender_state->tc->getProcessPtr(); 1323 Addr vaddr = pkt->req->getVaddr(); 1324 #ifndef NDEBUG 1325 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1326 assert(alignedVaddr == virtPageAddr); 1327 #endif 1328 const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr); 1329 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1330 p->fixupStackFault(vaddr)) { 1331 pte = p->pTable->lookup(vaddr); 1332 } 1333 1334 if (pte) { 1335 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1336 pte->paddr); 1337 1338 sender_state->tlbEntry = 1339 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, 1340 false); 1341 } else { 1342 sender_state->tlbEntry = nullptr; 1343 } 1344 1345 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1346 } else if (outcome == MISS_RETURN) { 1347 /** we add an extra cycle in the return path of the translation 1348 * requests in between the various TLB levels. 1349 */ 1350 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1351 } else { 1352 panic("Unexpected TLB outcome %d", outcome); 1353 } 1354 } 1355 1356 void 1357 GpuTLB::TLBEvent::process() 1358 { 1359 tlb->translationReturn(virtPageAddr, outcome, pkt); 1360 } 1361 1362 const char* 1363 GpuTLB::TLBEvent::description() const 1364 { 1365 return "trigger translationDoneEvent"; 1366 } 1367 1368 void 1369 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1370 { 1371 outcome = _outcome; 1372 } 1373 1374 Addr 1375 GpuTLB::TLBEvent::getTLBEventVaddr() 1376 { 1377 return virtPageAddr; 1378 } 1379 1380 /* 1381 * recvTiming receives a coalesced timing request from a TLBCoalescer 1382 * and it calls issueTLBLookup() 1383 * It only rejects the packet if we have exceeded the max 1384 * outstanding number of requests for the TLB 1385 */ 1386 bool 1387 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1388 { 1389 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1390 tlb->issueTLBLookup(pkt); 1391 // update number of outstanding translation requests 1392 tlb->outstandingReqs++; 1393 return true; 1394 } else { 1395 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1396 tlb->outstandingReqs); 1397 return false; 1398 } 1399 } 1400 1401 /** 1402 * handleFuncTranslationReturn is called on a TLB hit, 1403 * when a TLB miss returns or when a page fault returns. 1404 * It updates LRU, inserts the TLB entry on a miss 1405 * depending on the allocation policy and does the required 1406 * protection checks. It does NOT create a new packet to 1407 * update the packet's addr; this is done in hsail-gpu code. 1408 */ 1409 void 1410 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1411 { 1412 TranslationState *sender_state = 1413 safe_cast<TranslationState*>(pkt->senderState); 1414 1415 ThreadContext *tc = sender_state->tc; 1416 Mode mode = sender_state->tlbMode; 1417 Addr vaddr = pkt->req->getVaddr(); 1418 1419 TlbEntry *local_entry, *new_entry; 1420 1421 if (tlb_outcome == TLB_HIT) { 1422 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1423 "%#x\n", vaddr); 1424 1425 local_entry = sender_state->tlbEntry; 1426 } else { 1427 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1428 "%#x\n", vaddr); 1429 1430 // We are returning either from a page walk or from a hit at a lower 1431 // TLB level. The senderState should be "carrying" a pointer to the 1432 // correct TLBEntry. 1433 new_entry = sender_state->tlbEntry; 1434 assert(new_entry); 1435 local_entry = new_entry; 1436 1437 if (allocationPolicy) { 1438 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1439 1440 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1441 virt_page_addr); 1442 1443 local_entry = insert(virt_page_addr, *new_entry); 1444 } 1445 1446 assert(local_entry); 1447 } 1448 1449 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1450 "while paddr was %#x.\n", local_entry->vaddr, 1451 local_entry->paddr); 1452 1453 /** 1454 * Do paging checks if it's a normal functional access. If it's for a 1455 * prefetch, then sometimes you can try to prefetch something that 1456 * won't pass protection. We don't actually want to fault becuase there 1457 * is no demand access to deem this a violation. Just put it in the 1458 * TLB and it will fault if indeed a future demand access touches it in 1459 * violation. 1460 * 1461 * This feature could be used to explore security issues around 1462 * speculative memory accesses. 1463 */ 1464 if (!sender_state->prefetch && sender_state->tlbEntry) 1465 pagingProtectionChecks(tc, pkt, local_entry, mode); 1466 1467 int page_size = local_entry->size(); 1468 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1469 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1470 1471 pkt->req->setPaddr(paddr); 1472 1473 if (local_entry->uncacheable) 1474 pkt->req->setFlags(Request::UNCACHEABLE); 1475 } 1476 1477 // This is used for atomic translations. Need to 1478 // make it all happen during the same cycle. 1479 void 1480 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1481 { 1482 TranslationState *sender_state = 1483 safe_cast<TranslationState*>(pkt->senderState); 1484 1485 ThreadContext *tc = sender_state->tc; 1486 bool update_stats = !sender_state->prefetch; 1487 1488 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1489 TheISA::PageBytes); 1490 1491 if (update_stats) 1492 tlb->updatePageFootprint(virt_page_addr); 1493 1494 // do the TLB lookup without updating the stats 1495 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1496 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1497 1498 // functional mode means no coalescing 1499 // global metrics are the same as the local metrics 1500 if (update_stats) { 1501 tlb->globalNumTLBAccesses++; 1502 1503 if (success) { 1504 sender_state->hitLevel = sender_state->reqCnt.size(); 1505 tlb->globalNumTLBHits++; 1506 } 1507 } 1508 1509 if (!success) { 1510 if (update_stats) 1511 tlb->globalNumTLBMisses++; 1512 if (tlb->hasMemSidePort) { 1513 // there is a TLB below -> propagate down the TLB hierarchy 1514 tlb->memSidePort[0]->sendFunctional(pkt); 1515 // If no valid translation from a prefetch, then just return 1516 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1517 return; 1518 } else { 1519 // Need to access the page table and update the TLB 1520 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1521 virt_page_addr); 1522 1523 Process *p = tc->getProcessPtr(); 1524 1525 Addr vaddr = pkt->req->getVaddr(); 1526 #ifndef NDEBUG 1527 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1528 assert(alignedVaddr == virt_page_addr); 1529 #endif 1530 1531 const EmulationPageTable::Entry *pte = 1532 p->pTable->lookup(vaddr); 1533 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1534 p->fixupStackFault(vaddr)) { 1535 pte = p->pTable->lookup(vaddr); 1536 } 1537 1538 if (!sender_state->prefetch) { 1539 // no PageFaults are permitted after 1540 // the second page table lookup 1541 assert(pte); 1542 1543 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1544 pte->paddr); 1545 1546 sender_state->tlbEntry = 1547 new TlbEntry(p->pid(), virt_page_addr, 1548 pte->paddr, false, false); 1549 } else { 1550 // If this was a prefetch, then do the normal thing if it 1551 // was a successful translation. Otherwise, send an empty 1552 // TLB entry back so that it can be figured out as empty and 1553 // handled accordingly. 1554 if (pte) { 1555 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1556 pte->paddr); 1557 1558 sender_state->tlbEntry = 1559 new TlbEntry(p->pid(), virt_page_addr, 1560 pte->paddr, false, false); 1561 } else { 1562 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1563 alignedVaddr); 1564 1565 sender_state->tlbEntry = nullptr; 1566 1567 return; 1568 } 1569 } 1570 } 1571 } else { 1572 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1573 tlb->lookup(pkt->req->getVaddr())); 1574 1575 TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), 1576 update_stats); 1577 1578 assert(entry); 1579 1580 auto p = sender_state->tc->getProcessPtr(); 1581 sender_state->tlbEntry = 1582 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1583 false, false); 1584 } 1585 // This is the function that would populate pkt->req with the paddr of 1586 // the translation. But if no translation happens (i.e Prefetch fails) 1587 // then the early returns in the above code wiill keep this function 1588 // from executing. 1589 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1590 } 1591 1592 void 1593 GpuTLB::CpuSidePort::recvReqRetry() 1594 { 1595 // The CPUSidePort never sends anything but replies. No retries 1596 // expected. 1597 panic("recvReqRetry called"); 1598 } 1599 1600 AddrRangeList 1601 GpuTLB::CpuSidePort::getAddrRanges() const 1602 { 1603 // currently not checked by the master 1604 AddrRangeList ranges; 1605 1606 return ranges; 1607 } 1608 1609 /** 1610 * MemSidePort receives the packet back. 1611 * We need to call the handleTranslationReturn 1612 * and propagate up the hierarchy. 1613 */ 1614 bool 1615 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1616 { 1617 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1618 TheISA::PageBytes); 1619 1620 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1621 virt_page_addr); 1622 1623 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1624 assert(tlb_event); 1625 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1626 1627 tlb_event->updateOutcome(MISS_RETURN); 1628 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1629 1630 return true; 1631 } 1632 1633 void 1634 GpuTLB::MemSidePort::recvReqRetry() 1635 { 1636 // No retries should reach the TLB. The retries 1637 // should only reach the TLBCoalescer. 1638 panic("recvReqRetry called"); 1639 } 1640 1641 void 1642 GpuTLB::cleanup() 1643 { 1644 while (!cleanupQueue.empty()) { 1645 Addr cleanup_addr = cleanupQueue.front(); 1646 cleanupQueue.pop(); 1647 1648 // delete TLBEvent 1649 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1650 delete old_tlb_event; 1651 translationReturnEvent.erase(cleanup_addr); 1652 1653 // update number of outstanding requests 1654 outstandingReqs--; 1655 } 1656 1657 /** the higher level coalescer should retry if it has 1658 * any pending requests. 1659 */ 1660 for (int i = 0; i < cpuSidePort.size(); ++i) { 1661 cpuSidePort[i]->sendRetryReq(); 1662 } 1663 } 1664 1665 void 1666 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1667 { 1668 1669 std::pair<AccessPatternTable::iterator, bool> ret; 1670 1671 AccessInfo tmp_access_info; 1672 tmp_access_info.lastTimeAccessed = 0; 1673 tmp_access_info.accessesPerPage = 0; 1674 tmp_access_info.totalReuseDistance = 0; 1675 tmp_access_info.sumDistance = 0; 1676 tmp_access_info.meanDistance = 0; 1677 1678 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1679 tmp_access_info)); 1680 1681 bool first_page_access = ret.second; 1682 1683 if (first_page_access) { 1684 numUniquePages++; 1685 } else { 1686 int accessed_before; 1687 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1688 ret.first->second.totalReuseDistance += accessed_before; 1689 } 1690 1691 ret.first->second.accessesPerPage++; 1692 ret.first->second.lastTimeAccessed = curTick(); 1693 1694 if (accessDistance) { 1695 ret.first->second.localTLBAccesses 1696 .push_back(localNumTLBAccesses.value()); 1697 } 1698 } 1699 1700 void 1701 GpuTLB::exitCallback() 1702 { 1703 std::ostream *page_stat_file = nullptr; 1704 1705 if (accessDistance) { 1706 1707 // print per page statistics to a separate file (.csv format) 1708 // simout is the gem5 output directory (default is m5out or the one 1709 // specified with -d 1710 page_stat_file = simout.create(name().c_str())->stream(); 1711 1712 // print header 1713 *page_stat_file << "page,max_access_distance,mean_access_distance, " 1714 << "stddev_distance" << std::endl; 1715 } 1716 1717 // update avg. reuse distance footprint 1718 AccessPatternTable::iterator iter, iter_begin, iter_end; 1719 unsigned int sum_avg_reuse_distance_per_page = 0; 1720 1721 // iterate through all pages seen by this TLB 1722 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1723 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1724 iter->second.accessesPerPage; 1725 1726 if (accessDistance) { 1727 unsigned int tmp = iter->second.localTLBAccesses[0]; 1728 unsigned int prev = tmp; 1729 1730 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1731 if (i) { 1732 tmp = prev + 1; 1733 } 1734 1735 prev = iter->second.localTLBAccesses[i]; 1736 // update the localTLBAccesses value 1737 // with the actual differece 1738 iter->second.localTLBAccesses[i] -= tmp; 1739 // compute the sum of AccessDistance per page 1740 // used later for mean 1741 iter->second.sumDistance += 1742 iter->second.localTLBAccesses[i]; 1743 } 1744 1745 iter->second.meanDistance = 1746 iter->second.sumDistance / iter->second.accessesPerPage; 1747 1748 // compute std_dev and max (we need a second round because we 1749 // need to know the mean value 1750 unsigned int max_distance = 0; 1751 unsigned int stddev_distance = 0; 1752 1753 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1754 unsigned int tmp_access_distance = 1755 iter->second.localTLBAccesses[i]; 1756 1757 if (tmp_access_distance > max_distance) { 1758 max_distance = tmp_access_distance; 1759 } 1760 1761 unsigned int diff = 1762 tmp_access_distance - iter->second.meanDistance; 1763 stddev_distance += pow(diff, 2); 1764 1765 } 1766 1767 stddev_distance = 1768 sqrt(stddev_distance/iter->second.accessesPerPage); 1769 1770 if (page_stat_file) { 1771 *page_stat_file << std::hex << iter->first << ","; 1772 *page_stat_file << std::dec << max_distance << ","; 1773 *page_stat_file << std::dec << iter->second.meanDistance 1774 << ","; 1775 *page_stat_file << std::dec << stddev_distance; 1776 *page_stat_file << std::endl; 1777 } 1778 1779 // erase the localTLBAccesses array 1780 iter->second.localTLBAccesses.clear(); 1781 } 1782 } 1783 1784 if (!TLBFootprint.empty()) { 1785 avgReuseDistance = 1786 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1787 } 1788 1789 //clear the TLBFootprint map 1790 TLBFootprint.clear(); 1791 } 1792} // namespace X86ISA 1793 1794X86ISA::GpuTLB* 1795X86GPUTLBParams::create() 1796{ 1797 return new X86ISA::GpuTLB(this); 1798} 1799 1800