gpu_tlb.cc revision 11704
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh" 48#include "base/output.hh" 49#include "base/trace.hh" 50#include "cpu/base.hh" 51#include "cpu/thread_context.hh" 52#include "debug/GPUPrefetch.hh" 53#include "debug/GPUTLB.hh" 54#include "mem/packet_access.hh" 55#include "mem/page_table.hh" 56#include "mem/request.hh" 57#include "sim/process.hh" 58 59namespace X86ISA 60{ 61 62 GpuTLB::GpuTLB(const Params *p) 63 : MemObject(p), configAddress(0), size(p->size), 64 cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this) 65 { 66 assoc = p->assoc; 67 assert(assoc <= size); 68 numSets = size/assoc; 69 allocationPolicy = p->allocationPolicy; 70 hasMemSidePort = false; 71 accessDistance = p->accessDistance; 72 clock = p->clk_domain->clockPeriod(); 73 74 tlb.assign(size, GpuTlbEntry()); 75 76 freeList.resize(numSets); 77 entryList.resize(numSets); 78 79 for (int set = 0; set < numSets; ++set) { 80 for (int way = 0; way < assoc; ++way) { 81 int x = set * assoc + way; 82 freeList[set].push_back(&tlb.at(x)); 83 } 84 } 85 86 FA = (size == assoc); 87 88 /** 89 * @warning: the set-associative version assumes you have a 90 * fixed page size of 4KB. 91 * If the page size is greather than 4KB (as defined in the 92 * TheISA::PageBytes), then there are various issues w/ the current 93 * implementation (you'd have the same 8KB page being replicated in 94 * different sets etc) 95 */ 96 setMask = numSets - 1; 97 98 #if 0 99 // GpuTLB doesn't yet support full system 100 walker = p->walker; 101 walker->setTLB(this); 102 #endif 103 104 maxCoalescedReqs = p->maxOutstandingReqs; 105 106 // Do not allow maxCoalescedReqs to be more than the TLB associativity 107 if (maxCoalescedReqs > assoc) { 108 maxCoalescedReqs = assoc; 109 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 110 } 111 112 outstandingReqs = 0; 113 hitLatency = p->hitLatency; 114 missLatency1 = p->missLatency1; 115 missLatency2 = p->missLatency2; 116 117 // create the slave ports based on the number of connected ports 118 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 119 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 120 name(), i), this, i)); 121 } 122 123 // create the master ports based on the number of connected ports 124 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 125 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 126 name(), i), this, i)); 127 } 128 } 129 130 // fixme: this is never called? 131 GpuTLB::~GpuTLB() 132 { 133 // make sure all the hash-maps are empty 134 assert(translationReturnEvent.empty()); 135 } 136 137 BaseSlavePort& 138 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 139 { 140 if (if_name == "slave") { 141 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 142 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 143 } 144 145 return *cpuSidePort[idx]; 146 } else { 147 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 148 } 149 } 150 151 BaseMasterPort& 152 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 153 { 154 if (if_name == "master") { 155 if (idx >= static_cast<PortID>(memSidePort.size())) { 156 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 157 } 158 159 hasMemSidePort = true; 160 161 return *memSidePort[idx]; 162 } else { 163 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 164 } 165 } 166 167 GpuTlbEntry* 168 GpuTLB::insert(Addr vpn, GpuTlbEntry &entry) 169 { 170 GpuTlbEntry *newEntry = nullptr; 171 172 /** 173 * vpn holds the virtual page address 174 * The least significant bits are simply masked 175 */ 176 int set = (vpn >> TheISA::PageShift) & setMask; 177 178 if (!freeList[set].empty()) { 179 newEntry = freeList[set].front(); 180 freeList[set].pop_front(); 181 } else { 182 newEntry = entryList[set].back(); 183 entryList[set].pop_back(); 184 } 185 186 *newEntry = entry; 187 newEntry->vaddr = vpn; 188 entryList[set].push_front(newEntry); 189 190 return newEntry; 191 } 192 193 GpuTLB::EntryList::iterator 194 GpuTLB::lookupIt(Addr va, bool update_lru) 195 { 196 int set = (va >> TheISA::PageShift) & setMask; 197 198 if (FA) { 199 assert(!set); 200 } 201 202 auto entry = entryList[set].begin(); 203 for (; entry != entryList[set].end(); ++entry) { 204 int page_size = (*entry)->size(); 205 206 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { 207 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 208 "with size %#x.\n", va, (*entry)->vaddr, page_size); 209 210 if (update_lru) { 211 entryList[set].push_front(*entry); 212 entryList[set].erase(entry); 213 entry = entryList[set].begin(); 214 } 215 216 break; 217 } 218 } 219 220 return entry; 221 } 222 223 GpuTlbEntry* 224 GpuTLB::lookup(Addr va, bool update_lru) 225 { 226 int set = (va >> TheISA::PageShift) & setMask; 227 228 auto entry = lookupIt(va, update_lru); 229 230 if (entry == entryList[set].end()) 231 return nullptr; 232 else 233 return *entry; 234 } 235 236 void 237 GpuTLB::invalidateAll() 238 { 239 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 240 241 for (int i = 0; i < numSets; ++i) { 242 while (!entryList[i].empty()) { 243 GpuTlbEntry *entry = entryList[i].front(); 244 entryList[i].pop_front(); 245 freeList[i].push_back(entry); 246 } 247 } 248 } 249 250 void 251 GpuTLB::setConfigAddress(uint32_t addr) 252 { 253 configAddress = addr; 254 } 255 256 void 257 GpuTLB::invalidateNonGlobal() 258 { 259 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 260 261 for (int i = 0; i < numSets; ++i) { 262 for (auto entryIt = entryList[i].begin(); 263 entryIt != entryList[i].end();) { 264 if (!(*entryIt)->global) { 265 freeList[i].push_back(*entryIt); 266 entryList[i].erase(entryIt++); 267 } else { 268 ++entryIt; 269 } 270 } 271 } 272 } 273 274 void 275 GpuTLB::demapPage(Addr va, uint64_t asn) 276 { 277 278 int set = (va >> TheISA::PageShift) & setMask; 279 auto entry = lookupIt(va, false); 280 281 if (entry != entryList[set].end()) { 282 freeList[set].push_back(*entry); 283 entryList[set].erase(entry); 284 } 285 } 286 287 Fault 288 GpuTLB::translateInt(RequestPtr req, ThreadContext *tc) 289 { 290 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 291 Addr vaddr = req->getVaddr(); 292 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 293 294 if (prefix == IntAddrPrefixCPUID) { 295 panic("CPUID memory space not yet implemented!\n"); 296 } else if (prefix == IntAddrPrefixMSR) { 297 vaddr = vaddr >> 3; 298 req->setFlags(Request::MMAPPED_IPR); 299 Addr regNum = 0; 300 301 switch (vaddr & ~IntAddrPrefixMask) { 302 case 0x10: 303 regNum = MISCREG_TSC; 304 break; 305 case 0x1B: 306 regNum = MISCREG_APIC_BASE; 307 break; 308 case 0xFE: 309 regNum = MISCREG_MTRRCAP; 310 break; 311 case 0x174: 312 regNum = MISCREG_SYSENTER_CS; 313 break; 314 case 0x175: 315 regNum = MISCREG_SYSENTER_ESP; 316 break; 317 case 0x176: 318 regNum = MISCREG_SYSENTER_EIP; 319 break; 320 case 0x179: 321 regNum = MISCREG_MCG_CAP; 322 break; 323 case 0x17A: 324 regNum = MISCREG_MCG_STATUS; 325 break; 326 case 0x17B: 327 regNum = MISCREG_MCG_CTL; 328 break; 329 case 0x1D9: 330 regNum = MISCREG_DEBUG_CTL_MSR; 331 break; 332 case 0x1DB: 333 regNum = MISCREG_LAST_BRANCH_FROM_IP; 334 break; 335 case 0x1DC: 336 regNum = MISCREG_LAST_BRANCH_TO_IP; 337 break; 338 case 0x1DD: 339 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 340 break; 341 case 0x1DE: 342 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 343 break; 344 case 0x200: 345 regNum = MISCREG_MTRR_PHYS_BASE_0; 346 break; 347 case 0x201: 348 regNum = MISCREG_MTRR_PHYS_MASK_0; 349 break; 350 case 0x202: 351 regNum = MISCREG_MTRR_PHYS_BASE_1; 352 break; 353 case 0x203: 354 regNum = MISCREG_MTRR_PHYS_MASK_1; 355 break; 356 case 0x204: 357 regNum = MISCREG_MTRR_PHYS_BASE_2; 358 break; 359 case 0x205: 360 regNum = MISCREG_MTRR_PHYS_MASK_2; 361 break; 362 case 0x206: 363 regNum = MISCREG_MTRR_PHYS_BASE_3; 364 break; 365 case 0x207: 366 regNum = MISCREG_MTRR_PHYS_MASK_3; 367 break; 368 case 0x208: 369 regNum = MISCREG_MTRR_PHYS_BASE_4; 370 break; 371 case 0x209: 372 regNum = MISCREG_MTRR_PHYS_MASK_4; 373 break; 374 case 0x20A: 375 regNum = MISCREG_MTRR_PHYS_BASE_5; 376 break; 377 case 0x20B: 378 regNum = MISCREG_MTRR_PHYS_MASK_5; 379 break; 380 case 0x20C: 381 regNum = MISCREG_MTRR_PHYS_BASE_6; 382 break; 383 case 0x20D: 384 regNum = MISCREG_MTRR_PHYS_MASK_6; 385 break; 386 case 0x20E: 387 regNum = MISCREG_MTRR_PHYS_BASE_7; 388 break; 389 case 0x20F: 390 regNum = MISCREG_MTRR_PHYS_MASK_7; 391 break; 392 case 0x250: 393 regNum = MISCREG_MTRR_FIX_64K_00000; 394 break; 395 case 0x258: 396 regNum = MISCREG_MTRR_FIX_16K_80000; 397 break; 398 case 0x259: 399 regNum = MISCREG_MTRR_FIX_16K_A0000; 400 break; 401 case 0x268: 402 regNum = MISCREG_MTRR_FIX_4K_C0000; 403 break; 404 case 0x269: 405 regNum = MISCREG_MTRR_FIX_4K_C8000; 406 break; 407 case 0x26A: 408 regNum = MISCREG_MTRR_FIX_4K_D0000; 409 break; 410 case 0x26B: 411 regNum = MISCREG_MTRR_FIX_4K_D8000; 412 break; 413 case 0x26C: 414 regNum = MISCREG_MTRR_FIX_4K_E0000; 415 break; 416 case 0x26D: 417 regNum = MISCREG_MTRR_FIX_4K_E8000; 418 break; 419 case 0x26E: 420 regNum = MISCREG_MTRR_FIX_4K_F0000; 421 break; 422 case 0x26F: 423 regNum = MISCREG_MTRR_FIX_4K_F8000; 424 break; 425 case 0x277: 426 regNum = MISCREG_PAT; 427 break; 428 case 0x2FF: 429 regNum = MISCREG_DEF_TYPE; 430 break; 431 case 0x400: 432 regNum = MISCREG_MC0_CTL; 433 break; 434 case 0x404: 435 regNum = MISCREG_MC1_CTL; 436 break; 437 case 0x408: 438 regNum = MISCREG_MC2_CTL; 439 break; 440 case 0x40C: 441 regNum = MISCREG_MC3_CTL; 442 break; 443 case 0x410: 444 regNum = MISCREG_MC4_CTL; 445 break; 446 case 0x414: 447 regNum = MISCREG_MC5_CTL; 448 break; 449 case 0x418: 450 regNum = MISCREG_MC6_CTL; 451 break; 452 case 0x41C: 453 regNum = MISCREG_MC7_CTL; 454 break; 455 case 0x401: 456 regNum = MISCREG_MC0_STATUS; 457 break; 458 case 0x405: 459 regNum = MISCREG_MC1_STATUS; 460 break; 461 case 0x409: 462 regNum = MISCREG_MC2_STATUS; 463 break; 464 case 0x40D: 465 regNum = MISCREG_MC3_STATUS; 466 break; 467 case 0x411: 468 regNum = MISCREG_MC4_STATUS; 469 break; 470 case 0x415: 471 regNum = MISCREG_MC5_STATUS; 472 break; 473 case 0x419: 474 regNum = MISCREG_MC6_STATUS; 475 break; 476 case 0x41D: 477 regNum = MISCREG_MC7_STATUS; 478 break; 479 case 0x402: 480 regNum = MISCREG_MC0_ADDR; 481 break; 482 case 0x406: 483 regNum = MISCREG_MC1_ADDR; 484 break; 485 case 0x40A: 486 regNum = MISCREG_MC2_ADDR; 487 break; 488 case 0x40E: 489 regNum = MISCREG_MC3_ADDR; 490 break; 491 case 0x412: 492 regNum = MISCREG_MC4_ADDR; 493 break; 494 case 0x416: 495 regNum = MISCREG_MC5_ADDR; 496 break; 497 case 0x41A: 498 regNum = MISCREG_MC6_ADDR; 499 break; 500 case 0x41E: 501 regNum = MISCREG_MC7_ADDR; 502 break; 503 case 0x403: 504 regNum = MISCREG_MC0_MISC; 505 break; 506 case 0x407: 507 regNum = MISCREG_MC1_MISC; 508 break; 509 case 0x40B: 510 regNum = MISCREG_MC2_MISC; 511 break; 512 case 0x40F: 513 regNum = MISCREG_MC3_MISC; 514 break; 515 case 0x413: 516 regNum = MISCREG_MC4_MISC; 517 break; 518 case 0x417: 519 regNum = MISCREG_MC5_MISC; 520 break; 521 case 0x41B: 522 regNum = MISCREG_MC6_MISC; 523 break; 524 case 0x41F: 525 regNum = MISCREG_MC7_MISC; 526 break; 527 case 0xC0000080: 528 regNum = MISCREG_EFER; 529 break; 530 case 0xC0000081: 531 regNum = MISCREG_STAR; 532 break; 533 case 0xC0000082: 534 regNum = MISCREG_LSTAR; 535 break; 536 case 0xC0000083: 537 regNum = MISCREG_CSTAR; 538 break; 539 case 0xC0000084: 540 regNum = MISCREG_SF_MASK; 541 break; 542 case 0xC0000100: 543 regNum = MISCREG_FS_BASE; 544 break; 545 case 0xC0000101: 546 regNum = MISCREG_GS_BASE; 547 break; 548 case 0xC0000102: 549 regNum = MISCREG_KERNEL_GS_BASE; 550 break; 551 case 0xC0000103: 552 regNum = MISCREG_TSC_AUX; 553 break; 554 case 0xC0010000: 555 regNum = MISCREG_PERF_EVT_SEL0; 556 break; 557 case 0xC0010001: 558 regNum = MISCREG_PERF_EVT_SEL1; 559 break; 560 case 0xC0010002: 561 regNum = MISCREG_PERF_EVT_SEL2; 562 break; 563 case 0xC0010003: 564 regNum = MISCREG_PERF_EVT_SEL3; 565 break; 566 case 0xC0010004: 567 regNum = MISCREG_PERF_EVT_CTR0; 568 break; 569 case 0xC0010005: 570 regNum = MISCREG_PERF_EVT_CTR1; 571 break; 572 case 0xC0010006: 573 regNum = MISCREG_PERF_EVT_CTR2; 574 break; 575 case 0xC0010007: 576 regNum = MISCREG_PERF_EVT_CTR3; 577 break; 578 case 0xC0010010: 579 regNum = MISCREG_SYSCFG; 580 break; 581 case 0xC0010016: 582 regNum = MISCREG_IORR_BASE0; 583 break; 584 case 0xC0010017: 585 regNum = MISCREG_IORR_BASE1; 586 break; 587 case 0xC0010018: 588 regNum = MISCREG_IORR_MASK0; 589 break; 590 case 0xC0010019: 591 regNum = MISCREG_IORR_MASK1; 592 break; 593 case 0xC001001A: 594 regNum = MISCREG_TOP_MEM; 595 break; 596 case 0xC001001D: 597 regNum = MISCREG_TOP_MEM2; 598 break; 599 case 0xC0010114: 600 regNum = MISCREG_VM_CR; 601 break; 602 case 0xC0010115: 603 regNum = MISCREG_IGNNE; 604 break; 605 case 0xC0010116: 606 regNum = MISCREG_SMM_CTL; 607 break; 608 case 0xC0010117: 609 regNum = MISCREG_VM_HSAVE_PA; 610 break; 611 default: 612 return std::make_shared<GeneralProtection>(0); 613 } 614 //The index is multiplied by the size of a MiscReg so that 615 //any memory dependence calculations will not see these as 616 //overlapping. 617 req->setPaddr(regNum * sizeof(MiscReg)); 618 return NoFault; 619 } else if (prefix == IntAddrPrefixIO) { 620 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 621 // bitmap in the TSS. 622 623 Addr IOPort = vaddr & ~IntAddrPrefixMask; 624 // Make sure the address fits in the expected 16 bit IO address 625 // space. 626 assert(!(IOPort & ~0xFFFF)); 627 628 if (IOPort == 0xCF8 && req->getSize() == 4) { 629 req->setFlags(Request::MMAPPED_IPR); 630 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 631 } else if ((IOPort & ~mask(2)) == 0xCFC) { 632 req->setFlags(Request::UNCACHEABLE); 633 634 Addr configAddress = 635 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 636 637 if (bits(configAddress, 31, 31)) { 638 req->setPaddr(PhysAddrPrefixPciConfig | 639 mbits(configAddress, 30, 2) | 640 (IOPort & mask(2))); 641 } else { 642 req->setPaddr(PhysAddrPrefixIO | IOPort); 643 } 644 } else { 645 req->setFlags(Request::UNCACHEABLE); 646 req->setPaddr(PhysAddrPrefixIO | IOPort); 647 } 648 return NoFault; 649 } else { 650 panic("Access to unrecognized internal address space %#x.\n", 651 prefix); 652 } 653 } 654 655 /** 656 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 657 * and false on a TLB miss. 658 * Many of the checks about different modes have been converted to 659 * assertions, since these parts of the code are not really used. 660 * On a hit it will update the LRU stack. 661 */ 662 bool 663 GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats) 664 { 665 bool tlb_hit = false; 666 #ifndef NDEBUG 667 uint32_t flags = req->getFlags(); 668 int seg = flags & SegmentFlagMask; 669 #endif 670 671 assert(seg != SEGMENT_REG_MS); 672 Addr vaddr = req->getVaddr(); 673 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 674 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 675 676 if (m5Reg.prot) { 677 DPRINTF(GPUTLB, "In protected mode.\n"); 678 // make sure we are in 64-bit mode 679 assert(m5Reg.mode == LongMode); 680 681 // If paging is enabled, do the translation. 682 if (m5Reg.paging) { 683 DPRINTF(GPUTLB, "Paging enabled.\n"); 684 //update LRU stack on a hit 685 GpuTlbEntry *entry = lookup(vaddr, true); 686 687 if (entry) 688 tlb_hit = true; 689 690 if (!update_stats) { 691 // functional tlb access for memory initialization 692 // i.e., memory seeding or instr. seeding -> don't update 693 // TLB and stats 694 return tlb_hit; 695 } 696 697 localNumTLBAccesses++; 698 699 if (!entry) { 700 localNumTLBMisses++; 701 } else { 702 localNumTLBHits++; 703 } 704 } 705 } 706 707 return tlb_hit; 708 } 709 710 Fault 711 GpuTLB::translate(RequestPtr req, ThreadContext *tc, 712 Translation *translation, Mode mode, 713 bool &delayedResponse, bool timing, int &latency) 714 { 715 uint32_t flags = req->getFlags(); 716 int seg = flags & SegmentFlagMask; 717 bool storeCheck = flags & (StoreCheck << FlagShift); 718 719 // If this is true, we're dealing with a request 720 // to a non-memory address space. 721 if (seg == SEGMENT_REG_MS) { 722 return translateInt(req, tc); 723 } 724 725 delayedResponse = false; 726 Addr vaddr = req->getVaddr(); 727 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 728 729 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 730 731 // If protected mode has been enabled... 732 if (m5Reg.prot) { 733 DPRINTF(GPUTLB, "In protected mode.\n"); 734 // If we're not in 64-bit mode, do protection/limit checks 735 if (m5Reg.mode != LongMode) { 736 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 737 "protection.\n"); 738 739 // Check for a null segment selector. 740 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || 741 seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) 742 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 743 return std::make_shared<GeneralProtection>(0); 744 } 745 746 bool expandDown = false; 747 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 748 749 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 750 if (!attr.writable && (mode == BaseTLB::Write || 751 storeCheck)) 752 return std::make_shared<GeneralProtection>(0); 753 754 if (!attr.readable && mode == BaseTLB::Read) 755 return std::make_shared<GeneralProtection>(0); 756 757 expandDown = attr.expandDown; 758 759 } 760 761 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 762 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 763 // This assumes we're not in 64 bit mode. If we were, the 764 // default address size is 64 bits, overridable to 32. 765 int size = 32; 766 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 767 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 768 769 if ((csAttr.defaultSize && sizeOverride) || 770 (!csAttr.defaultSize && !sizeOverride)) { 771 size = 16; 772 } 773 774 Addr offset = bits(vaddr - base, size - 1, 0); 775 Addr endOffset = offset + req->getSize() - 1; 776 777 if (expandDown) { 778 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 779 warn_once("Expand down segments are untested.\n"); 780 781 if (offset <= limit || endOffset <= limit) 782 return std::make_shared<GeneralProtection>(0); 783 } else { 784 if (offset > limit || endOffset > limit) 785 return std::make_shared<GeneralProtection>(0); 786 } 787 } 788 789 // If paging is enabled, do the translation. 790 if (m5Reg.paging) { 791 DPRINTF(GPUTLB, "Paging enabled.\n"); 792 // The vaddr already has the segment base applied. 793 GpuTlbEntry *entry = lookup(vaddr); 794 localNumTLBAccesses++; 795 796 if (!entry) { 797 localNumTLBMisses++; 798 if (timing) { 799 latency = missLatency1; 800 } 801 802 if (FullSystem) { 803 fatal("GpuTLB doesn't support full-system mode\n"); 804 } else { 805 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 806 "at pc %#x.\n", vaddr, tc->instAddr()); 807 808 Process *p = tc->getProcessPtr(); 809 GpuTlbEntry newEntry; 810 bool success = p->pTable->lookup(vaddr, newEntry); 811 812 if (!success && mode != BaseTLB::Execute) { 813 // penalize a "page fault" more 814 if (timing) { 815 latency += missLatency2; 816 } 817 818 if (p->fixupStackFault(vaddr)) 819 success = p->pTable->lookup(vaddr, newEntry); 820 } 821 822 if (!success) { 823 return std::make_shared<PageFault>(vaddr, true, 824 mode, true, 825 false); 826 } else { 827 newEntry.valid = success; 828 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 829 830 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 831 alignedVaddr, newEntry.pageStart()); 832 833 entry = insert(alignedVaddr, newEntry); 834 } 835 836 DPRINTF(GPUTLB, "Miss was serviced.\n"); 837 } 838 } else { 839 localNumTLBHits++; 840 841 if (timing) { 842 latency = hitLatency; 843 } 844 } 845 846 // Do paging protection checks. 847 bool inUser = (m5Reg.cpl == 3 && 848 !(flags & (CPL0FlagBit << FlagShift))); 849 850 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 851 bool badWrite = (!entry->writable && (inUser || cr0.wp)); 852 853 if ((inUser && !entry->user) || (mode == BaseTLB::Write && 854 badWrite)) { 855 // The page must have been present to get into the TLB in 856 // the first place. We'll assume the reserved bits are 857 // fine even though we're not checking them. 858 return std::make_shared<PageFault>(vaddr, true, mode, 859 inUser, false); 860 } 861 862 if (storeCheck && badWrite) { 863 // This would fault if this were a write, so return a page 864 // fault that reflects that happening. 865 return std::make_shared<PageFault>(vaddr, true, 866 BaseTLB::Write, 867 inUser, false); 868 } 869 870 871 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 872 "checks.\n", entry->paddr); 873 874 int page_size = entry->size(); 875 Addr paddr = entry->paddr | (vaddr & (page_size - 1)); 876 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 877 req->setPaddr(paddr); 878 879 if (entry->uncacheable) 880 req->setFlags(Request::UNCACHEABLE); 881 } else { 882 //Use the address which already has segmentation applied. 883 DPRINTF(GPUTLB, "Paging disabled.\n"); 884 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 885 req->setPaddr(vaddr); 886 } 887 } else { 888 // Real mode 889 DPRINTF(GPUTLB, "In real mode.\n"); 890 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 891 req->setPaddr(vaddr); 892 } 893 894 // Check for an access to the local APIC 895 if (FullSystem) { 896 LocalApicBase localApicBase = 897 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 898 899 Addr baseAddr = localApicBase.base * PageBytes; 900 Addr paddr = req->getPaddr(); 901 902 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 903 // Force the access to be uncacheable. 904 req->setFlags(Request::UNCACHEABLE); 905 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 906 paddr - baseAddr)); 907 } 908 } 909 910 return NoFault; 911 }; 912 913 Fault 914 GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, 915 int &latency) 916 { 917 bool delayedResponse; 918 919 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 920 latency); 921 } 922 923 void 924 GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc, 925 Translation *translation, Mode mode, int &latency) 926 { 927 bool delayedResponse; 928 assert(translation); 929 930 Fault fault = GpuTLB::translate(req, tc, translation, mode, 931 delayedResponse, true, latency); 932 933 if (!delayedResponse) 934 translation->finish(fault, req, tc, mode); 935 } 936 937 Walker* 938 GpuTLB::getWalker() 939 { 940 return walker; 941 } 942 943 944 void 945 GpuTLB::serialize(CheckpointOut &cp) const 946 { 947 } 948 949 void 950 GpuTLB::unserialize(CheckpointIn &cp) 951 { 952 } 953 954 void 955 GpuTLB::regStats() 956 { 957 MemObject::regStats(); 958 959 localNumTLBAccesses 960 .name(name() + ".local_TLB_accesses") 961 .desc("Number of TLB accesses") 962 ; 963 964 localNumTLBHits 965 .name(name() + ".local_TLB_hits") 966 .desc("Number of TLB hits") 967 ; 968 969 localNumTLBMisses 970 .name(name() + ".local_TLB_misses") 971 .desc("Number of TLB misses") 972 ; 973 974 localTLBMissRate 975 .name(name() + ".local_TLB_miss_rate") 976 .desc("TLB miss rate") 977 ; 978 979 accessCycles 980 .name(name() + ".access_cycles") 981 .desc("Cycles spent accessing this TLB level") 982 ; 983 984 pageTableCycles 985 .name(name() + ".page_table_cycles") 986 .desc("Cycles spent accessing the page table") 987 ; 988 989 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 990 991 numUniquePages 992 .name(name() + ".unique_pages") 993 .desc("Number of unique pages touched") 994 ; 995 996 localCycles 997 .name(name() + ".local_cycles") 998 .desc("Number of cycles spent in queue for all incoming reqs") 999 ; 1000 1001 localLatency 1002 .name(name() + ".local_latency") 1003 .desc("Avg. latency over incoming coalesced reqs") 1004 ; 1005 1006 localLatency = localCycles / localNumTLBAccesses; 1007 1008 globalNumTLBAccesses 1009 .name(name() + ".global_TLB_accesses") 1010 .desc("Number of TLB accesses") 1011 ; 1012 1013 globalNumTLBHits 1014 .name(name() + ".global_TLB_hits") 1015 .desc("Number of TLB hits") 1016 ; 1017 1018 globalNumTLBMisses 1019 .name(name() + ".global_TLB_misses") 1020 .desc("Number of TLB misses") 1021 ; 1022 1023 globalTLBMissRate 1024 .name(name() + ".global_TLB_miss_rate") 1025 .desc("TLB miss rate") 1026 ; 1027 1028 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1029 1030 avgReuseDistance 1031 .name(name() + ".avg_reuse_distance") 1032 .desc("avg. reuse distance over all pages (in ticks)") 1033 ; 1034 1035 } 1036 1037 /** 1038 * Do the TLB lookup for this coalesced request and schedule 1039 * another event <TLB access latency> cycles later. 1040 */ 1041 1042 void 1043 GpuTLB::issueTLBLookup(PacketPtr pkt) 1044 { 1045 assert(pkt); 1046 assert(pkt->senderState); 1047 1048 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1049 TheISA::PageBytes); 1050 1051 TranslationState *sender_state = 1052 safe_cast<TranslationState*>(pkt->senderState); 1053 1054 bool update_stats = !sender_state->prefetch; 1055 ThreadContext * tmp_tc = sender_state->tc; 1056 1057 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1058 virt_page_addr); 1059 1060 int req_cnt = sender_state->reqCnt.back(); 1061 1062 if (update_stats) { 1063 accessCycles -= (curTick() * req_cnt); 1064 localCycles -= curTick(); 1065 updatePageFootprint(virt_page_addr); 1066 globalNumTLBAccesses += req_cnt; 1067 } 1068 1069 tlbOutcome lookup_outcome = TLB_MISS; 1070 RequestPtr tmp_req = pkt->req; 1071 1072 // Access the TLB and figure out if it's a hit or a miss. 1073 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1074 1075 if (success) { 1076 lookup_outcome = TLB_HIT; 1077 // Put the entry in SenderState 1078 GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false); 1079 assert(entry); 1080 1081 sender_state->tlbEntry = 1082 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1083 1084 if (update_stats) { 1085 // the reqCnt has an entry per level, so its size tells us 1086 // which level we are in 1087 sender_state->hitLevel = sender_state->reqCnt.size(); 1088 globalNumTLBHits += req_cnt; 1089 } 1090 } else { 1091 if (update_stats) 1092 globalNumTLBMisses += req_cnt; 1093 } 1094 1095 /* 1096 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1097 * as the TLB access latency. 1098 * 1099 * We create and schedule a new TLBEvent which will help us take the 1100 * appropriate actions (e.g., update TLB on a hit, send request to lower 1101 * level TLB on a miss, or start a page walk if this was the last-level 1102 * TLB) 1103 */ 1104 TLBEvent *tlb_event = 1105 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1106 1107 if (translationReturnEvent.count(virt_page_addr)) { 1108 panic("Virtual Page Address %#x already has a return event\n", 1109 virt_page_addr); 1110 } 1111 1112 translationReturnEvent[virt_page_addr] = tlb_event; 1113 assert(tlb_event); 1114 1115 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1116 curTick() + this->ticks(hitLatency)); 1117 1118 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1119 } 1120 1121 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1122 PacketPtr _pkt) 1123 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1124 outcome(tlb_outcome), pkt(_pkt) 1125 { 1126 } 1127 1128 /** 1129 * Do Paging protection checks. If we encounter a page fault, then 1130 * an assertion is fired. 1131 */ 1132 void 1133 GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, 1134 GpuTlbEntry * tlb_entry, Mode mode) 1135 { 1136 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1137 uint32_t flags = pkt->req->getFlags(); 1138 bool storeCheck = flags & (StoreCheck << FlagShift); 1139 1140 // Do paging protection checks. 1141 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1142 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1143 1144 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); 1145 1146 if ((inUser && !tlb_entry->user) || 1147 (mode == BaseTLB::Write && badWrite)) { 1148 // The page must have been present to get into the TLB in 1149 // the first place. We'll assume the reserved bits are 1150 // fine even though we're not checking them. 1151 assert(false); 1152 } 1153 1154 if (storeCheck && badWrite) { 1155 // This would fault if this were a write, so return a page 1156 // fault that reflects that happening. 1157 assert(false); 1158 } 1159 } 1160 1161 /** 1162 * handleTranslationReturn is called on a TLB hit, 1163 * when a TLB miss returns or when a page fault returns. 1164 * The latter calls handelHit with TLB miss as tlbOutcome. 1165 */ 1166 void 1167 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1168 PacketPtr pkt) 1169 { 1170 1171 assert(pkt); 1172 Addr vaddr = pkt->req->getVaddr(); 1173 1174 TranslationState *sender_state = 1175 safe_cast<TranslationState*>(pkt->senderState); 1176 1177 ThreadContext *tc = sender_state->tc; 1178 Mode mode = sender_state->tlbMode; 1179 1180 GpuTlbEntry *local_entry, *new_entry; 1181 1182 if (tlb_outcome == TLB_HIT) { 1183 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1184 local_entry = sender_state->tlbEntry; 1185 } else { 1186 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1187 vaddr); 1188 1189 // We are returning either from a page walk or from a hit at a lower 1190 // TLB level. The senderState should be "carrying" a pointer to the 1191 // correct TLBEntry. 1192 new_entry = sender_state->tlbEntry; 1193 assert(new_entry); 1194 local_entry = new_entry; 1195 1196 if (allocationPolicy) { 1197 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1198 virt_page_addr); 1199 1200 local_entry = insert(virt_page_addr, *new_entry); 1201 } 1202 1203 assert(local_entry); 1204 } 1205 1206 /** 1207 * At this point the packet carries an up-to-date tlbEntry pointer 1208 * in its senderState. 1209 * Next step is to do the paging protection checks. 1210 */ 1211 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1212 "while paddr was %#x.\n", local_entry->vaddr, 1213 local_entry->paddr); 1214 1215 pagingProtectionChecks(tc, pkt, local_entry, mode); 1216 int page_size = local_entry->size(); 1217 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1218 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1219 1220 // Since this packet will be sent through the cpu side slave port, 1221 // it must be converted to a response pkt if it is not one already 1222 if (pkt->isRequest()) { 1223 pkt->makeTimingResponse(); 1224 } 1225 1226 pkt->req->setPaddr(paddr); 1227 1228 if (local_entry->uncacheable) { 1229 pkt->req->setFlags(Request::UNCACHEABLE); 1230 } 1231 1232 //send packet back to coalescer 1233 cpuSidePort[0]->sendTimingResp(pkt); 1234 //schedule cleanup event 1235 cleanupQueue.push(virt_page_addr); 1236 1237 // schedule this only once per cycle. 1238 // The check is required because we might have multiple translations 1239 // returning the same cycle 1240 // this is a maximum priority event and must be on the same cycle 1241 // as the cleanup event in TLBCoalescer to avoid a race with 1242 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1243 if (!cleanupEvent.scheduled()) 1244 schedule(cleanupEvent, curTick()); 1245 } 1246 1247 /** 1248 * Here we take the appropriate actions based on the result of the 1249 * TLB lookup. 1250 */ 1251 void 1252 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1253 PacketPtr pkt) 1254 { 1255 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1256 1257 assert(translationReturnEvent[virtPageAddr]); 1258 assert(pkt); 1259 1260 TranslationState *tmp_sender_state = 1261 safe_cast<TranslationState*>(pkt->senderState); 1262 1263 int req_cnt = tmp_sender_state->reqCnt.back(); 1264 bool update_stats = !tmp_sender_state->prefetch; 1265 1266 1267 if (outcome == TLB_HIT) { 1268 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1269 1270 if (update_stats) { 1271 accessCycles += (req_cnt * curTick()); 1272 localCycles += curTick(); 1273 } 1274 1275 } else if (outcome == TLB_MISS) { 1276 1277 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1278 if (update_stats) { 1279 accessCycles += (req_cnt*curTick()); 1280 localCycles += curTick(); 1281 } 1282 1283 if (hasMemSidePort) { 1284 // the one cyle added here represent the delay from when we get 1285 // the reply back till when we propagate it to the coalescer 1286 // above. 1287 if (update_stats) { 1288 accessCycles += (req_cnt * 1); 1289 localCycles += 1; 1290 } 1291 1292 /** 1293 * There is a TLB below. Send the coalesced request. 1294 * We actually send the very first packet of all the 1295 * pending packets for this virtual page address. 1296 */ 1297 if (!memSidePort[0]->sendTimingReq(pkt)) { 1298 DPRINTF(GPUTLB, "Failed sending translation request to " 1299 "lower level TLB for addr %#x\n", virtPageAddr); 1300 1301 memSidePort[0]->retries.push_back(pkt); 1302 } else { 1303 DPRINTF(GPUTLB, "Sent translation request to lower level " 1304 "TLB for addr %#x\n", virtPageAddr); 1305 } 1306 } else { 1307 //this is the last level TLB. Start a page walk 1308 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1309 "addr %#x\n", virtPageAddr); 1310 1311 if (update_stats) 1312 pageTableCycles -= (req_cnt*curTick()); 1313 1314 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; 1315 assert(tlb_event); 1316 tlb_event->updateOutcome(PAGE_WALK); 1317 schedule(tlb_event, curTick() + ticks(missLatency2)); 1318 } 1319 } else if (outcome == PAGE_WALK) { 1320 if (update_stats) 1321 pageTableCycles += (req_cnt*curTick()); 1322 1323 // Need to access the page table and update the TLB 1324 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1325 virtPageAddr); 1326 1327 TranslationState *sender_state = 1328 safe_cast<TranslationState*>(pkt->senderState); 1329 1330 Process *p = sender_state->tc->getProcessPtr(); 1331 TlbEntry newEntry; 1332 Addr vaddr = pkt->req->getVaddr(); 1333 #ifndef NDEBUG 1334 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1335 assert(alignedVaddr == virtPageAddr); 1336 #endif 1337 bool success; 1338 success = p->pTable->lookup(vaddr, newEntry); 1339 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1340 if (p->fixupStackFault(vaddr)) { 1341 success = p->pTable->lookup(vaddr, newEntry); 1342 } 1343 } 1344 1345 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1346 newEntry.pageStart()); 1347 1348 sender_state->tlbEntry = 1349 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success); 1350 1351 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1352 } else if (outcome == MISS_RETURN) { 1353 /** we add an extra cycle in the return path of the translation 1354 * requests in between the various TLB levels. 1355 */ 1356 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1357 } else { 1358 assert(false); 1359 } 1360 } 1361 1362 void 1363 GpuTLB::TLBEvent::process() 1364 { 1365 tlb->translationReturn(virtPageAddr, outcome, pkt); 1366 } 1367 1368 const char* 1369 GpuTLB::TLBEvent::description() const 1370 { 1371 return "trigger translationDoneEvent"; 1372 } 1373 1374 void 1375 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1376 { 1377 outcome = _outcome; 1378 } 1379 1380 Addr 1381 GpuTLB::TLBEvent::getTLBEventVaddr() 1382 { 1383 return virtPageAddr; 1384 } 1385 1386 /* 1387 * recvTiming receives a coalesced timing request from a TLBCoalescer 1388 * and it calls issueTLBLookup() 1389 * It only rejects the packet if we have exceeded the max 1390 * outstanding number of requests for the TLB 1391 */ 1392 bool 1393 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1394 { 1395 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1396 tlb->issueTLBLookup(pkt); 1397 // update number of outstanding translation requests 1398 tlb->outstandingReqs++; 1399 return true; 1400 } else { 1401 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1402 tlb->outstandingReqs); 1403 return false; 1404 } 1405 } 1406 1407 /** 1408 * handleFuncTranslationReturn is called on a TLB hit, 1409 * when a TLB miss returns or when a page fault returns. 1410 * It updates LRU, inserts the TLB entry on a miss 1411 * depending on the allocation policy and does the required 1412 * protection checks. It does NOT create a new packet to 1413 * update the packet's addr; this is done in hsail-gpu code. 1414 */ 1415 void 1416 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1417 { 1418 TranslationState *sender_state = 1419 safe_cast<TranslationState*>(pkt->senderState); 1420 1421 ThreadContext *tc = sender_state->tc; 1422 Mode mode = sender_state->tlbMode; 1423 Addr vaddr = pkt->req->getVaddr(); 1424 1425 GpuTlbEntry *local_entry, *new_entry; 1426 1427 if (tlb_outcome == TLB_HIT) { 1428 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1429 "%#x\n", vaddr); 1430 1431 local_entry = sender_state->tlbEntry; 1432 } else { 1433 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1434 "%#x\n", vaddr); 1435 1436 // We are returning either from a page walk or from a hit at a lower 1437 // TLB level. The senderState should be "carrying" a pointer to the 1438 // correct TLBEntry. 1439 new_entry = sender_state->tlbEntry; 1440 assert(new_entry); 1441 local_entry = new_entry; 1442 1443 if (allocationPolicy) { 1444 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1445 1446 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1447 virt_page_addr); 1448 1449 local_entry = insert(virt_page_addr, *new_entry); 1450 } 1451 1452 assert(local_entry); 1453 } 1454 1455 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1456 "while paddr was %#x.\n", local_entry->vaddr, 1457 local_entry->paddr); 1458 1459 // Do paging checks if it's a normal functional access. If it's for a 1460 // prefetch, then sometimes you can try to prefetch something that won't 1461 // pass protection. We don't actually want to fault becuase there is no 1462 // demand access to deem this a violation. Just put it in the TLB and 1463 // it will fault if indeed a future demand access touches it in 1464 // violation. 1465 if (!sender_state->prefetch && sender_state->tlbEntry->valid) 1466 pagingProtectionChecks(tc, pkt, local_entry, mode); 1467 1468 int page_size = local_entry->size(); 1469 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); 1470 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1471 1472 pkt->req->setPaddr(paddr); 1473 1474 if (local_entry->uncacheable) 1475 pkt->req->setFlags(Request::UNCACHEABLE); 1476 } 1477 1478 // This is used for atomic translations. Need to 1479 // make it all happen during the same cycle. 1480 void 1481 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1482 { 1483 TranslationState *sender_state = 1484 safe_cast<TranslationState*>(pkt->senderState); 1485 1486 ThreadContext *tc = sender_state->tc; 1487 bool update_stats = !sender_state->prefetch; 1488 1489 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1490 TheISA::PageBytes); 1491 1492 if (update_stats) 1493 tlb->updatePageFootprint(virt_page_addr); 1494 1495 // do the TLB lookup without updating the stats 1496 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1497 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1498 1499 // functional mode means no coalescing 1500 // global metrics are the same as the local metrics 1501 if (update_stats) { 1502 tlb->globalNumTLBAccesses++; 1503 1504 if (success) { 1505 sender_state->hitLevel = sender_state->reqCnt.size(); 1506 tlb->globalNumTLBHits++; 1507 } 1508 } 1509 1510 if (!success) { 1511 if (update_stats) 1512 tlb->globalNumTLBMisses++; 1513 if (tlb->hasMemSidePort) { 1514 // there is a TLB below -> propagate down the TLB hierarchy 1515 tlb->memSidePort[0]->sendFunctional(pkt); 1516 // If no valid translation from a prefetch, then just return 1517 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1518 return; 1519 } else { 1520 // Need to access the page table and update the TLB 1521 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1522 virt_page_addr); 1523 1524 Process *p = tc->getProcessPtr(); 1525 TlbEntry newEntry; 1526 1527 Addr vaddr = pkt->req->getVaddr(); 1528 #ifndef NDEBUG 1529 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1530 assert(alignedVaddr == virt_page_addr); 1531 #endif 1532 1533 bool success = p->pTable->lookup(vaddr, newEntry); 1534 if (!success && sender_state->tlbMode != BaseTLB::Execute) { 1535 if (p->fixupStackFault(vaddr)) 1536 success = p->pTable->lookup(vaddr, newEntry); 1537 } 1538 1539 if (!sender_state->prefetch) { 1540 // no PageFaults are permitted after 1541 // the second page table lookup 1542 assert(success); 1543 1544 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1545 newEntry.pageStart()); 1546 1547 sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr, 1548 newEntry.paddr, 1549 success); 1550 } else { 1551 // If this was a prefetch, then do the normal thing if it 1552 // was a successful translation. Otherwise, send an empty 1553 // TLB entry back so that it can be figured out as empty and 1554 // handled accordingly. 1555 if (success) { 1556 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1557 newEntry.pageStart()); 1558 1559 sender_state->tlbEntry = new GpuTlbEntry(0, 1560 newEntry.vaddr, 1561 newEntry.paddr, 1562 success); 1563 } else { 1564 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1565 alignedVaddr); 1566 1567 sender_state->tlbEntry = new GpuTlbEntry(); 1568 1569 return; 1570 } 1571 } 1572 } 1573 } else { 1574 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1575 tlb->lookup(pkt->req->getVaddr())); 1576 1577 GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), 1578 update_stats); 1579 1580 assert(entry); 1581 1582 sender_state->tlbEntry = 1583 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); 1584 } 1585 // This is the function that would populate pkt->req with the paddr of 1586 // the translation. But if no translation happens (i.e Prefetch fails) 1587 // then the early returns in the above code wiill keep this function 1588 // from executing. 1589 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1590 } 1591 1592 void 1593 GpuTLB::CpuSidePort::recvReqRetry() 1594 { 1595 // The CPUSidePort never sends anything but replies. No retries 1596 // expected. 1597 assert(false); 1598 } 1599 1600 AddrRangeList 1601 GpuTLB::CpuSidePort::getAddrRanges() const 1602 { 1603 // currently not checked by the master 1604 AddrRangeList ranges; 1605 1606 return ranges; 1607 } 1608 1609 /** 1610 * MemSidePort receives the packet back. 1611 * We need to call the handleTranslationReturn 1612 * and propagate up the hierarchy. 1613 */ 1614 bool 1615 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1616 { 1617 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1618 TheISA::PageBytes); 1619 1620 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1621 virt_page_addr); 1622 1623 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1624 assert(tlb_event); 1625 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1626 1627 tlb_event->updateOutcome(MISS_RETURN); 1628 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1629 1630 return true; 1631 } 1632 1633 void 1634 GpuTLB::MemSidePort::recvReqRetry() 1635 { 1636 // No retries should reach the TLB. The retries 1637 // should only reach the TLBCoalescer. 1638 assert(false); 1639 } 1640 1641 void 1642 GpuTLB::cleanup() 1643 { 1644 while (!cleanupQueue.empty()) { 1645 Addr cleanup_addr = cleanupQueue.front(); 1646 cleanupQueue.pop(); 1647 1648 // delete TLBEvent 1649 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1650 delete old_tlb_event; 1651 translationReturnEvent.erase(cleanup_addr); 1652 1653 // update number of outstanding requests 1654 outstandingReqs--; 1655 } 1656 1657 /** the higher level coalescer should retry if it has 1658 * any pending requests. 1659 */ 1660 for (int i = 0; i < cpuSidePort.size(); ++i) { 1661 cpuSidePort[i]->sendRetryReq(); 1662 } 1663 } 1664 1665 void 1666 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1667 { 1668 1669 std::pair<AccessPatternTable::iterator, bool> ret; 1670 1671 AccessInfo tmp_access_info; 1672 tmp_access_info.lastTimeAccessed = 0; 1673 tmp_access_info.accessesPerPage = 0; 1674 tmp_access_info.totalReuseDistance = 0; 1675 tmp_access_info.sumDistance = 0; 1676 tmp_access_info.meanDistance = 0; 1677 1678 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1679 tmp_access_info)); 1680 1681 bool first_page_access = ret.second; 1682 1683 if (first_page_access) { 1684 numUniquePages++; 1685 } else { 1686 int accessed_before; 1687 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1688 ret.first->second.totalReuseDistance += accessed_before; 1689 } 1690 1691 ret.first->second.accessesPerPage++; 1692 ret.first->second.lastTimeAccessed = curTick(); 1693 1694 if (accessDistance) { 1695 ret.first->second.localTLBAccesses 1696 .push_back(localNumTLBAccesses.value()); 1697 } 1698 } 1699 1700 void 1701 GpuTLB::exitCallback() 1702 { 1703 std::ostream *page_stat_file = nullptr; 1704 1705 if (accessDistance) { 1706 1707 // print per page statistics to a separate file (.csv format) 1708 // simout is the gem5 output directory (default is m5out or the one 1709 // specified with -d 1710 page_stat_file = simout.create(name().c_str())->stream(); 1711 1712 // print header 1713 *page_stat_file << "page,max_access_distance,mean_access_distance, " 1714 << "stddev_distance" << std::endl; 1715 } 1716 1717 // update avg. reuse distance footprint 1718 AccessPatternTable::iterator iter, iter_begin, iter_end; 1719 unsigned int sum_avg_reuse_distance_per_page = 0; 1720 1721 // iterate through all pages seen by this TLB 1722 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1723 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1724 iter->second.accessesPerPage; 1725 1726 if (accessDistance) { 1727 unsigned int tmp = iter->second.localTLBAccesses[0]; 1728 unsigned int prev = tmp; 1729 1730 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1731 if (i) { 1732 tmp = prev + 1; 1733 } 1734 1735 prev = iter->second.localTLBAccesses[i]; 1736 // update the localTLBAccesses value 1737 // with the actual differece 1738 iter->second.localTLBAccesses[i] -= tmp; 1739 // compute the sum of AccessDistance per page 1740 // used later for mean 1741 iter->second.sumDistance += 1742 iter->second.localTLBAccesses[i]; 1743 } 1744 1745 iter->second.meanDistance = 1746 iter->second.sumDistance / iter->second.accessesPerPage; 1747 1748 // compute std_dev and max (we need a second round because we 1749 // need to know the mean value 1750 unsigned int max_distance = 0; 1751 unsigned int stddev_distance = 0; 1752 1753 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1754 unsigned int tmp_access_distance = 1755 iter->second.localTLBAccesses[i]; 1756 1757 if (tmp_access_distance > max_distance) { 1758 max_distance = tmp_access_distance; 1759 } 1760 1761 unsigned int diff = 1762 tmp_access_distance - iter->second.meanDistance; 1763 stddev_distance += pow(diff, 2); 1764 1765 } 1766 1767 stddev_distance = 1768 sqrt(stddev_distance/iter->second.accessesPerPage); 1769 1770 if (page_stat_file) { 1771 *page_stat_file << std::hex << iter->first << ","; 1772 *page_stat_file << std::dec << max_distance << ","; 1773 *page_stat_file << std::dec << iter->second.meanDistance 1774 << ","; 1775 *page_stat_file << std::dec << stddev_distance; 1776 *page_stat_file << std::endl; 1777 } 1778 1779 // erase the localTLBAccesses array 1780 iter->second.localTLBAccesses.clear(); 1781 } 1782 } 1783 1784 if (!TLBFootprint.empty()) { 1785 avgReuseDistance = 1786 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1787 } 1788 1789 //clear the TLBFootprint map 1790 TLBFootprint.clear(); 1791 } 1792} // namespace X86ISA 1793 1794X86ISA::GpuTLB* 1795X86GPUTLBParams::create() 1796{ 1797 return new X86ISA::GpuTLB(this); 1798} 1799 1800