Cross Reference: /gem5/src/gpu-compute/gpu

Deleted Added

sdiff udiff text old ( 12749:223c83ed9979 ) new ( 13449:2f7efa89c58b )

full compact

gpu_tlb.cc (12749:223c83ed9979)	gpu_tlb.cc (13449:2f7efa89c58b)
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh"	1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/gpu_tlb.hh" 37 38#include <cmath> 39#include <cstring> 40 41#include "arch/x86/faults.hh" 42#include "arch/x86/insts/microldstop.hh" 43#include "arch/x86/pagetable.hh" 44#include "arch/x86/pagetable_walker.hh" 45#include "arch/x86/regs/misc.hh" 46#include "arch/x86/x86_traits.hh" 47#include "base/bitfield.hh"
	48#include "base/logging.hh"
48#include "base/output.hh" 49#include "base/trace.hh" 50#include "cpu/base.hh" 51#include "cpu/thread_context.hh" 52#include "debug/GPUPrefetch.hh" 53#include "debug/GPUTLB.hh" 54#include "mem/packet_access.hh" 55#include "mem/page_table.hh" 56#include "mem/request.hh" 57#include "sim/process.hh" 58 59namespace X86ISA 60{ 61 62 GpuTLB::GpuTLB(const Params p) 63 : MemObject(p), configAddress(0), size(p->size), 64 cleanupEvent([this]{ cleanup(); }, name(), false, 65 Event::Maximum_Pri), 66 exitEvent([this]{ exitCallback(); }, name()) 67 { 68 assoc = p->assoc; 69 assert(assoc <= size); 70 numSets = size/assoc; 71 allocationPolicy = p->allocationPolicy; 72 hasMemSidePort = false; 73 accessDistance = p->accessDistance; 74 clock = p->clk_domain->clockPeriod(); 75 76 tlb.assign(size, TlbEntry()); 77 78 freeList.resize(numSets); 79 entryList.resize(numSets); 80 81 for (int set = 0; set < numSets; ++set) { 82 for (int way = 0; way < assoc; ++way) { 83 int x = set assoc + way; 84 freeList[set].push_back(&tlb.at(x)); 85 } 86 } 87 88 FA = (size == assoc); 89 90 /** 91 * @warning: the set-associative version assumes you have a 92 * fixed page size of 4KB. 93 * If the page size is greather than 4KB (as defined in the 94 * TheISA::PageBytes), then there are various issues w/ the current 95 * implementation (you'd have the same 8KB page being replicated in 96 * different sets etc) 97 / 98 setMask = numSets - 1; 99 100* #if 0 101 // GpuTLB doesn't yet support full system 102 walker = p->walker; 103 walker->setTLB(this); 104 #endif 105 106 maxCoalescedReqs = p->maxOutstandingReqs; 107 108 // Do not allow maxCoalescedReqs to be more than the TLB associativity 109 if (maxCoalescedReqs > assoc) { 110 maxCoalescedReqs = assoc; 111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 112 } 113 114 outstandingReqs = 0; 115 hitLatency = p->hitLatency; 116 missLatency1 = p->missLatency1; 117 missLatency2 = p->missLatency2; 118 119 // create the slave ports based on the number of connected ports 120 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 121 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 122 name(), i), this, i)); 123 } 124 125 // create the master ports based on the number of connected ports 126 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 127 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 128 name(), i), this, i)); 129 } 130 } 131 132 // fixme: this is never called? 133 GpuTLB::~GpuTLB() 134 { 135 // make sure all the hash-maps are empty 136 assert(translationReturnEvent.empty()); 137 } 138 139 BaseSlavePort& 140 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 141 { 142 if (if_name == "slave") { 143 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 145 } 146 147 return cpuSidePort[idx]; 148* } else { 149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 150 } 151 } 152 153 BaseMasterPort& 154 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 155 { 156 if (if_name == "master") { 157 if (idx >= static_cast<PortID>(memSidePort.size())) { 158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 159 } 160 161 hasMemSidePort = true; 162 163 return memSidePort[idx]; 164* } else { 165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 166 } 167 } 168 169 TlbEntry* 170 GpuTLB::insert(Addr vpn, TlbEntry &entry) 171 { 172 TlbEntry newEntry = nullptr; 173* 174 /** 175 * vpn holds the virtual page address 176 * The least significant bits are simply masked 177 / 178* int set = (vpn >> TheISA::PageShift) & setMask; 179 180 if (!freeList[set].empty()) { 181 newEntry = freeList[set].front(); 182 freeList[set].pop_front(); 183 } else { 184 newEntry = entryList[set].back(); 185 entryList[set].pop_back(); 186 } 187 188 newEntry = entry; 189* newEntry->vaddr = vpn; 190 entryList[set].push_front(newEntry); 191 192 return newEntry; 193 } 194 195 GpuTLB::EntryList::iterator 196 GpuTLB::lookupIt(Addr va, bool update_lru) 197 { 198 int set = (va >> TheISA::PageShift) & setMask; 199 200 if (FA) { 201 assert(!set); 202 } 203 204 auto entry = entryList[set].begin(); 205 for (; entry != entryList[set].end(); ++entry) { 206 int page_size = (entry)->size(); 207* 208 if ((entry)->vaddr <= va && (entry)->vaddr + page_size > va) { 209 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 210 "with size %#x.\n", va, (entry)->vaddr, page_size); 211* 212 if (update_lru) { 213 entryList[set].push_front(entry); 214* entryList[set].erase(entry); 215 entry = entryList[set].begin(); 216 } 217 218 break; 219 } 220 } 221 222 return entry; 223 } 224 225 TlbEntry* 226 GpuTLB::lookup(Addr va, bool update_lru) 227 { 228 int set = (va >> TheISA::PageShift) & setMask; 229 230 auto entry = lookupIt(va, update_lru); 231 232 if (entry == entryList[set].end()) 233 return nullptr; 234 else 235 return entry; 236* } 237 238 void 239 GpuTLB::invalidateAll() 240 { 241 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 242 243 for (int i = 0; i < numSets; ++i) { 244 while (!entryList[i].empty()) { 245 TlbEntry entry = entryList[i].front(); 246* entryList[i].pop_front(); 247 freeList[i].push_back(entry); 248 } 249 } 250 } 251 252 void 253 GpuTLB::setConfigAddress(uint32_t addr) 254 { 255 configAddress = addr; 256 } 257 258 void 259 GpuTLB::invalidateNonGlobal() 260 { 261 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 262 263 for (int i = 0; i < numSets; ++i) { 264 for (auto entryIt = entryList[i].begin(); 265 entryIt != entryList[i].end();) { 266 if (!(entryIt)->global) { 267* freeList[i].push_back(entryIt); 268* entryList[i].erase(entryIt++); 269 } else { 270 ++entryIt; 271 } 272 } 273 } 274 } 275 276 void 277 GpuTLB::demapPage(Addr va, uint64_t asn) 278 { 279 280 int set = (va >> TheISA::PageShift) & setMask; 281 auto entry = lookupIt(va, false); 282 283 if (entry != entryList[set].end()) { 284 freeList[set].push_back(entry); 285* entryList[set].erase(entry); 286 } 287 } 288 289 Fault 290 GpuTLB::translateInt(const RequestPtr &req, ThreadContext tc) 291* { 292 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 293 Addr vaddr = req->getVaddr(); 294 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 295 296 if (prefix == IntAddrPrefixCPUID) { 297 panic("CPUID memory space not yet implemented!\n"); 298 } else if (prefix == IntAddrPrefixMSR) { 299 vaddr = vaddr >> 3; 300 req->setFlags(Request::MMAPPED_IPR); 301 Addr regNum = 0; 302 303 switch (vaddr & ~IntAddrPrefixMask) { 304 case 0x10: 305 regNum = MISCREG_TSC; 306 break; 307 case 0x1B: 308 regNum = MISCREG_APIC_BASE; 309 break; 310 case 0xFE: 311 regNum = MISCREG_MTRRCAP; 312 break; 313 case 0x174: 314 regNum = MISCREG_SYSENTER_CS; 315 break; 316 case 0x175: 317 regNum = MISCREG_SYSENTER_ESP; 318 break; 319 case 0x176: 320 regNum = MISCREG_SYSENTER_EIP; 321 break; 322 case 0x179: 323 regNum = MISCREG_MCG_CAP; 324 break; 325 case 0x17A: 326 regNum = MISCREG_MCG_STATUS; 327 break; 328 case 0x17B: 329 regNum = MISCREG_MCG_CTL; 330 break; 331 case 0x1D9: 332 regNum = MISCREG_DEBUG_CTL_MSR; 333 break; 334 case 0x1DB: 335 regNum = MISCREG_LAST_BRANCH_FROM_IP; 336 break; 337 case 0x1DC: 338 regNum = MISCREG_LAST_BRANCH_TO_IP; 339 break; 340 case 0x1DD: 341 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 342 break; 343 case 0x1DE: 344 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 345 break; 346 case 0x200: 347 regNum = MISCREG_MTRR_PHYS_BASE_0; 348 break; 349 case 0x201: 350 regNum = MISCREG_MTRR_PHYS_MASK_0; 351 break; 352 case 0x202: 353 regNum = MISCREG_MTRR_PHYS_BASE_1; 354 break; 355 case 0x203: 356 regNum = MISCREG_MTRR_PHYS_MASK_1; 357 break; 358 case 0x204: 359 regNum = MISCREG_MTRR_PHYS_BASE_2; 360 break; 361 case 0x205: 362 regNum = MISCREG_MTRR_PHYS_MASK_2; 363 break; 364 case 0x206: 365 regNum = MISCREG_MTRR_PHYS_BASE_3; 366 break; 367 case 0x207: 368 regNum = MISCREG_MTRR_PHYS_MASK_3; 369 break; 370 case 0x208: 371 regNum = MISCREG_MTRR_PHYS_BASE_4; 372 break; 373 case 0x209: 374 regNum = MISCREG_MTRR_PHYS_MASK_4; 375 break; 376 case 0x20A: 377 regNum = MISCREG_MTRR_PHYS_BASE_5; 378 break; 379 case 0x20B: 380 regNum = MISCREG_MTRR_PHYS_MASK_5; 381 break; 382 case 0x20C: 383 regNum = MISCREG_MTRR_PHYS_BASE_6; 384 break; 385 case 0x20D: 386 regNum = MISCREG_MTRR_PHYS_MASK_6; 387 break; 388 case 0x20E: 389 regNum = MISCREG_MTRR_PHYS_BASE_7; 390 break; 391 case 0x20F: 392 regNum = MISCREG_MTRR_PHYS_MASK_7; 393 break; 394 case 0x250: 395 regNum = MISCREG_MTRR_FIX_64K_00000; 396 break; 397 case 0x258: 398 regNum = MISCREG_MTRR_FIX_16K_80000; 399 break; 400 case 0x259: 401 regNum = MISCREG_MTRR_FIX_16K_A0000; 402 break; 403 case 0x268: 404 regNum = MISCREG_MTRR_FIX_4K_C0000; 405 break; 406 case 0x269: 407 regNum = MISCREG_MTRR_FIX_4K_C8000; 408 break; 409 case 0x26A: 410 regNum = MISCREG_MTRR_FIX_4K_D0000; 411 break; 412 case 0x26B: 413 regNum = MISCREG_MTRR_FIX_4K_D8000; 414 break; 415 case 0x26C: 416 regNum = MISCREG_MTRR_FIX_4K_E0000; 417 break; 418 case 0x26D: 419 regNum = MISCREG_MTRR_FIX_4K_E8000; 420 break; 421 case 0x26E: 422 regNum = MISCREG_MTRR_FIX_4K_F0000; 423 break; 424 case 0x26F: 425 regNum = MISCREG_MTRR_FIX_4K_F8000; 426 break; 427 case 0x277: 428 regNum = MISCREG_PAT; 429 break; 430 case 0x2FF: 431 regNum = MISCREG_DEF_TYPE; 432 break; 433 case 0x400: 434 regNum = MISCREG_MC0_CTL; 435 break; 436 case 0x404: 437 regNum = MISCREG_MC1_CTL; 438 break; 439 case 0x408: 440 regNum = MISCREG_MC2_CTL; 441 break; 442 case 0x40C: 443 regNum = MISCREG_MC3_CTL; 444 break; 445 case 0x410: 446 regNum = MISCREG_MC4_CTL; 447 break; 448 case 0x414: 449 regNum = MISCREG_MC5_CTL; 450 break; 451 case 0x418: 452 regNum = MISCREG_MC6_CTL; 453 break; 454 case 0x41C: 455 regNum = MISCREG_MC7_CTL; 456 break; 457 case 0x401: 458 regNum = MISCREG_MC0_STATUS; 459 break; 460 case 0x405: 461 regNum = MISCREG_MC1_STATUS; 462 break; 463 case 0x409: 464 regNum = MISCREG_MC2_STATUS; 465 break; 466 case 0x40D: 467 regNum = MISCREG_MC3_STATUS; 468 break; 469 case 0x411: 470 regNum = MISCREG_MC4_STATUS; 471 break; 472 case 0x415: 473 regNum = MISCREG_MC5_STATUS; 474 break; 475 case 0x419: 476 regNum = MISCREG_MC6_STATUS; 477 break; 478 case 0x41D: 479 regNum = MISCREG_MC7_STATUS; 480 break; 481 case 0x402: 482 regNum = MISCREG_MC0_ADDR; 483 break; 484 case 0x406: 485 regNum = MISCREG_MC1_ADDR; 486 break; 487 case 0x40A: 488 regNum = MISCREG_MC2_ADDR; 489 break; 490 case 0x40E: 491 regNum = MISCREG_MC3_ADDR; 492 break; 493 case 0x412: 494 regNum = MISCREG_MC4_ADDR; 495 break; 496 case 0x416: 497 regNum = MISCREG_MC5_ADDR; 498 break; 499 case 0x41A: 500 regNum = MISCREG_MC6_ADDR; 501 break; 502 case 0x41E: 503 regNum = MISCREG_MC7_ADDR; 504 break; 505 case 0x403: 506 regNum = MISCREG_MC0_MISC; 507 break; 508 case 0x407: 509 regNum = MISCREG_MC1_MISC; 510 break; 511 case 0x40B: 512 regNum = MISCREG_MC2_MISC; 513 break; 514 case 0x40F: 515 regNum = MISCREG_MC3_MISC; 516 break; 517 case 0x413: 518 regNum = MISCREG_MC4_MISC; 519 break; 520 case 0x417: 521 regNum = MISCREG_MC5_MISC; 522 break; 523 case 0x41B: 524 regNum = MISCREG_MC6_MISC; 525 break; 526 case 0x41F: 527 regNum = MISCREG_MC7_MISC; 528 break; 529 case 0xC0000080: 530 regNum = MISCREG_EFER; 531 break; 532 case 0xC0000081: 533 regNum = MISCREG_STAR; 534 break; 535 case 0xC0000082: 536 regNum = MISCREG_LSTAR; 537 break; 538 case 0xC0000083: 539 regNum = MISCREG_CSTAR; 540 break; 541 case 0xC0000084: 542 regNum = MISCREG_SF_MASK; 543 break; 544 case 0xC0000100: 545 regNum = MISCREG_FS_BASE; 546 break; 547 case 0xC0000101: 548 regNum = MISCREG_GS_BASE; 549 break; 550 case 0xC0000102: 551 regNum = MISCREG_KERNEL_GS_BASE; 552 break; 553 case 0xC0000103: 554 regNum = MISCREG_TSC_AUX; 555 break; 556 case 0xC0010000: 557 regNum = MISCREG_PERF_EVT_SEL0; 558 break; 559 case 0xC0010001: 560 regNum = MISCREG_PERF_EVT_SEL1; 561 break; 562 case 0xC0010002: 563 regNum = MISCREG_PERF_EVT_SEL2; 564 break; 565 case 0xC0010003: 566 regNum = MISCREG_PERF_EVT_SEL3; 567 break; 568 case 0xC0010004: 569 regNum = MISCREG_PERF_EVT_CTR0; 570 break; 571 case 0xC0010005: 572 regNum = MISCREG_PERF_EVT_CTR1; 573 break; 574 case 0xC0010006: 575 regNum = MISCREG_PERF_EVT_CTR2; 576 break; 577 case 0xC0010007: 578 regNum = MISCREG_PERF_EVT_CTR3; 579 break; 580 case 0xC0010010: 581 regNum = MISCREG_SYSCFG; 582 break; 583 case 0xC0010016: 584 regNum = MISCREG_IORR_BASE0; 585 break; 586 case 0xC0010017: 587 regNum = MISCREG_IORR_BASE1; 588 break; 589 case 0xC0010018: 590 regNum = MISCREG_IORR_MASK0; 591 break; 592 case 0xC0010019: 593 regNum = MISCREG_IORR_MASK1; 594 break; 595 case 0xC001001A: 596 regNum = MISCREG_TOP_MEM; 597 break; 598 case 0xC001001D: 599 regNum = MISCREG_TOP_MEM2; 600 break; 601 case 0xC0010114: 602 regNum = MISCREG_VM_CR; 603 break; 604 case 0xC0010115: 605 regNum = MISCREG_IGNNE; 606 break; 607 case 0xC0010116: 608 regNum = MISCREG_SMM_CTL; 609 break; 610 case 0xC0010117: 611 regNum = MISCREG_VM_HSAVE_PA; 612 break; 613 default: 614 return std::make_shared<GeneralProtection>(0); 615 } 616 //The index is multiplied by the size of a MiscReg so that 617 //any memory dependence calculations will not see these as 618 //overlapping. 619 req->setPaddr(regNum * sizeof(MiscReg)); 620 return NoFault; 621 } else if (prefix == IntAddrPrefixIO) { 622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 623 // bitmap in the TSS. 624 625 Addr IOPort = vaddr & ~IntAddrPrefixMask; 626 // Make sure the address fits in the expected 16 bit IO address 627 // space. 628 assert(!(IOPort & ~0xFFFF)); 629 630 if (IOPort == 0xCF8 && req->getSize() == 4) { 631 req->setFlags(Request::MMAPPED_IPR); 632 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 633 } else if ((IOPort & ~mask(2)) == 0xCFC) { 634 req->setFlags(Request::UNCACHEABLE); 635 636 Addr configAddress = 637 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 638 639 if (bits(configAddress, 31, 31)) { 640 req->setPaddr(PhysAddrPrefixPciConfig \| 641 mbits(configAddress, 30, 2) \| 642 (IOPort & mask(2))); 643 } else { 644 req->setPaddr(PhysAddrPrefixIO \| IOPort); 645 } 646 } else { 647 req->setFlags(Request::UNCACHEABLE); 648 req->setPaddr(PhysAddrPrefixIO \| IOPort); 649 } 650 return NoFault; 651 } else { 652 panic("Access to unrecognized internal address space %#x.\n", 653 prefix); 654 } 655 } 656 657 /** 658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 659 * and false on a TLB miss. 660 * Many of the checks about different modes have been converted to 661 * assertions, since these parts of the code are not really used. 662 * On a hit it will update the LRU stack. 663 / 664* bool 665 GpuTLB::tlbLookup(const RequestPtr &req, 666 ThreadContext tc, bool update_stats) 667* { 668 bool tlb_hit = false; 669 #ifndef NDEBUG 670 uint32_t flags = req->getFlags(); 671 int seg = flags & SegmentFlagMask; 672 #endif 673 674 assert(seg != SEGMENT_REG_MS); 675 Addr vaddr = req->getVaddr(); 676 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 677 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 678 679 if (m5Reg.prot) { 680 DPRINTF(GPUTLB, "In protected mode.\n"); 681 // make sure we are in 64-bit mode 682 assert(m5Reg.mode == LongMode); 683 684 // If paging is enabled, do the translation. 685 if (m5Reg.paging) { 686 DPRINTF(GPUTLB, "Paging enabled.\n"); 687 //update LRU stack on a hit 688 TlbEntry entry = lookup(vaddr, true); 689* 690 if (entry) 691 tlb_hit = true; 692 693 if (!update_stats) { 694 // functional tlb access for memory initialization 695 // i.e., memory seeding or instr. seeding -> don't update 696 // TLB and stats 697 return tlb_hit; 698 } 699 700 localNumTLBAccesses++; 701 702 if (!entry) { 703 localNumTLBMisses++; 704 } else { 705 localNumTLBHits++; 706 } 707 } 708 } 709 710 return tlb_hit; 711 } 712 713 Fault 714 GpuTLB::translate(const RequestPtr &req, ThreadContext tc, 715* Translation translation, Mode mode, 716* bool &delayedResponse, bool timing, int &latency) 717 { 718 uint32_t flags = req->getFlags(); 719 int seg = flags & SegmentFlagMask; 720 bool storeCheck = flags & (StoreCheck << FlagShift); 721 722 // If this is true, we're dealing with a request 723 // to a non-memory address space. 724 if (seg == SEGMENT_REG_MS) { 725 return translateInt(req, tc); 726 } 727 728 delayedResponse = false; 729 Addr vaddr = req->getVaddr(); 730 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 731 732 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 733 734 // If protected mode has been enabled... 735 if (m5Reg.prot) { 736 DPRINTF(GPUTLB, "In protected mode.\n"); 737 // If we're not in 64-bit mode, do protection/limit checks 738 if (m5Reg.mode != LongMode) { 739 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 740 "protection.\n"); 741 742 // Check for a null segment selector. 743 if (!(seg == SEGMENT_REG_TSG \|\| seg == SYS_SEGMENT_REG_IDTR \|\| 744 seg == SEGMENT_REG_HS \|\| seg == SEGMENT_REG_LS) 745 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 746 return std::make_shared<GeneralProtection>(0); 747 } 748 749 bool expandDown = false; 750 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 751 752 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 753 if (!attr.writable && (mode == BaseTLB::Write \|\| 754 storeCheck)) 755 return std::make_shared<GeneralProtection>(0); 756 757 if (!attr.readable && mode == BaseTLB::Read) 758 return std::make_shared<GeneralProtection>(0); 759 760 expandDown = attr.expandDown; 761 762 } 763 764 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 765 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 766 // This assumes we're not in 64 bit mode. If we were, the 767 // default address size is 64 bits, overridable to 32. 768 int size = 32; 769 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 770 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 771 772 if ((csAttr.defaultSize && sizeOverride) \|\| 773 (!csAttr.defaultSize && !sizeOverride)) { 774 size = 16; 775 } 776 777 Addr offset = bits(vaddr - base, size - 1, 0); 778 Addr endOffset = offset + req->getSize() - 1; 779 780 if (expandDown) { 781 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 782 warn_once("Expand down segments are untested.\n"); 783 784 if (offset <= limit \|\| endOffset <= limit) 785 return std::make_shared<GeneralProtection>(0); 786 } else { 787 if (offset > limit \|\| endOffset > limit) 788 return std::make_shared<GeneralProtection>(0); 789 } 790 } 791 792 // If paging is enabled, do the translation. 793 if (m5Reg.paging) { 794 DPRINTF(GPUTLB, "Paging enabled.\n"); 795 // The vaddr already has the segment base applied. 796 TlbEntry entry = lookup(vaddr); 797* localNumTLBAccesses++; 798 799 if (!entry) { 800 localNumTLBMisses++; 801 if (timing) { 802 latency = missLatency1; 803 } 804 805 if (FullSystem) { 806 fatal("GpuTLB doesn't support full-system mode\n"); 807 } else { 808 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 809 "at pc %#x.\n", vaddr, tc->instAddr()); 810 811 Process p = tc->getProcessPtr(); 812* const EmulationPageTable::Entry pte = 813* p->pTable->lookup(vaddr); 814 815 if (!pte && mode != BaseTLB::Execute) { 816 // penalize a "page fault" more 817 if (timing) 818 latency += missLatency2; 819 820 if (p->fixupStackFault(vaddr)) 821 pte = p->pTable->lookup(vaddr); 822 } 823 824 if (!pte) { 825 return std::make_shared<PageFault>(vaddr, true, 826 mode, true, 827 false); 828 } else { 829 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 830 831 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 832 alignedVaddr, pte->paddr); 833 834 TlbEntry gpuEntry(p->pid(), alignedVaddr, 835 pte->paddr, false, false); 836 entry = insert(alignedVaddr, gpuEntry); 837 } 838 839 DPRINTF(GPUTLB, "Miss was serviced.\n"); 840 } 841 } else { 842 localNumTLBHits++; 843 844 if (timing) { 845 latency = hitLatency; 846 } 847 } 848 849 // Do paging protection checks. 850 bool inUser = (m5Reg.cpl == 3 && 851 !(flags & (CPL0FlagBit << FlagShift))); 852 853 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 854 bool badWrite = (!entry->writable && (inUser \|\| cr0.wp)); 855 856 if ((inUser && !entry->user) \|\| (mode == BaseTLB::Write && 857 badWrite)) { 858 // The page must have been present to get into the TLB in 859 // the first place. We'll assume the reserved bits are 860 // fine even though we're not checking them. 861 return std::make_shared<PageFault>(vaddr, true, mode, 862 inUser, false); 863 } 864 865 if (storeCheck && badWrite) { 866 // This would fault if this were a write, so return a page 867 // fault that reflects that happening. 868 return std::make_shared<PageFault>(vaddr, true, 869 BaseTLB::Write, 870 inUser, false); 871 } 872 873 874 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 875 "checks.\n", entry->paddr); 876 877 int page_size = entry->size(); 878 Addr paddr = entry->paddr \| (vaddr & (page_size - 1)); 879 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 880 req->setPaddr(paddr); 881 882 if (entry->uncacheable) 883 req->setFlags(Request::UNCACHEABLE); 884 } else { 885 //Use the address which already has segmentation applied. 886 DPRINTF(GPUTLB, "Paging disabled.\n"); 887 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 888 req->setPaddr(vaddr); 889 } 890 } else { 891 // Real mode 892 DPRINTF(GPUTLB, "In real mode.\n"); 893 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 894 req->setPaddr(vaddr); 895 } 896 897 // Check for an access to the local APIC 898 if (FullSystem) { 899 LocalApicBase localApicBase = 900 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 901 902 Addr baseAddr = localApicBase.base * PageBytes; 903 Addr paddr = req->getPaddr(); 904 905 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 906 // Force the access to be uncacheable. 907 req->setFlags(Request::UNCACHEABLE); 908 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 909 paddr - baseAddr)); 910 } 911 } 912 913 return NoFault; 914 }; 915 916 Fault 917 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext tc, 918* Mode mode, int &latency) 919 { 920 bool delayedResponse; 921 922 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 923 latency); 924 } 925 926 void 927 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext tc, 928* Translation translation, Mode mode, int &latency) 929* { 930 bool delayedResponse; 931 assert(translation); 932 933 Fault fault = GpuTLB::translate(req, tc, translation, mode, 934 delayedResponse, true, latency); 935 936 if (!delayedResponse) 937 translation->finish(fault, req, tc, mode); 938 } 939 940 Walker* 941 GpuTLB::getWalker() 942 { 943 return walker; 944 } 945 946 947 void 948 GpuTLB::serialize(CheckpointOut &cp) const 949 { 950 } 951 952 void 953 GpuTLB::unserialize(CheckpointIn &cp) 954 { 955 } 956 957 void 958 GpuTLB::regStats() 959 { 960 MemObject::regStats(); 961 962 localNumTLBAccesses 963 .name(name() + ".local_TLB_accesses") 964 .desc("Number of TLB accesses") 965 ; 966 967 localNumTLBHits 968 .name(name() + ".local_TLB_hits") 969 .desc("Number of TLB hits") 970 ; 971 972 localNumTLBMisses 973 .name(name() + ".local_TLB_misses") 974 .desc("Number of TLB misses") 975 ; 976 977 localTLBMissRate 978 .name(name() + ".local_TLB_miss_rate") 979 .desc("TLB miss rate") 980 ; 981 982 accessCycles 983 .name(name() + ".access_cycles") 984 .desc("Cycles spent accessing this TLB level") 985 ; 986 987 pageTableCycles 988 .name(name() + ".page_table_cycles") 989 .desc("Cycles spent accessing the page table") 990 ; 991 992 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 993 994 numUniquePages 995 .name(name() + ".unique_pages") 996 .desc("Number of unique pages touched") 997 ; 998 999 localCycles 1000 .name(name() + ".local_cycles") 1001 .desc("Number of cycles spent in queue for all incoming reqs") 1002 ; 1003 1004 localLatency 1005 .name(name() + ".local_latency") 1006 .desc("Avg. latency over incoming coalesced reqs") 1007 ; 1008 1009 localLatency = localCycles / localNumTLBAccesses; 1010 1011 globalNumTLBAccesses 1012 .name(name() + ".global_TLB_accesses") 1013 .desc("Number of TLB accesses") 1014 ; 1015 1016 globalNumTLBHits 1017 .name(name() + ".global_TLB_hits") 1018 .desc("Number of TLB hits") 1019 ; 1020 1021 globalNumTLBMisses 1022 .name(name() + ".global_TLB_misses") 1023 .desc("Number of TLB misses") 1024 ; 1025 1026 globalTLBMissRate 1027 .name(name() + ".global_TLB_miss_rate") 1028 .desc("TLB miss rate") 1029 ; 1030 1031 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1032 1033 avgReuseDistance 1034 .name(name() + ".avg_reuse_distance") 1035 .desc("avg. reuse distance over all pages (in ticks)") 1036 ; 1037 1038 } 1039 1040 /** 1041 * Do the TLB lookup for this coalesced request and schedule 1042 * another event <TLB access latency> cycles later. 1043 / 1044* 1045 void 1046 GpuTLB::issueTLBLookup(PacketPtr pkt) 1047 { 1048 assert(pkt); 1049 assert(pkt->senderState); 1050 1051 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1052 TheISA::PageBytes); 1053 1054 TranslationState sender_state = 1055* safe_cast<TranslationState>(pkt->senderState); 1056* 1057 bool update_stats = !sender_state->prefetch; 1058 ThreadContext * tmp_tc = sender_state->tc; 1059 1060 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1061 virt_page_addr); 1062 1063 int req_cnt = sender_state->reqCnt.back(); 1064 1065 if (update_stats) { 1066 accessCycles -= (curTick() * req_cnt); 1067 localCycles -= curTick(); 1068 updatePageFootprint(virt_page_addr); 1069 globalNumTLBAccesses += req_cnt; 1070 } 1071 1072 tlbOutcome lookup_outcome = TLB_MISS; 1073 const RequestPtr &tmp_req = pkt->req; 1074 1075 // Access the TLB and figure out if it's a hit or a miss. 1076 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1077 1078 if (success) { 1079 lookup_outcome = TLB_HIT; 1080 // Put the entry in SenderState 1081 TlbEntry entry = lookup(tmp_req->getVaddr(), false); 1082* assert(entry); 1083 1084 auto p = sender_state->tc->getProcessPtr(); 1085 sender_state->tlbEntry = 1086 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1087 false, false); 1088 1089 if (update_stats) { 1090 // the reqCnt has an entry per level, so its size tells us 1091 // which level we are in 1092 sender_state->hitLevel = sender_state->reqCnt.size(); 1093 globalNumTLBHits += req_cnt; 1094 } 1095 } else { 1096 if (update_stats) 1097 globalNumTLBMisses += req_cnt; 1098 } 1099 1100 /* 1101 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1102 * as the TLB access latency. 1103 * 1104 * We create and schedule a new TLBEvent which will help us take the 1105 * appropriate actions (e.g., update TLB on a hit, send request to lower 1106 * level TLB on a miss, or start a page walk if this was the last-level 1107 * TLB) 1108 / 1109* TLBEvent tlb_event = 1110* new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1111 1112 if (translationReturnEvent.count(virt_page_addr)) { 1113 panic("Virtual Page Address %#x already has a return event\n", 1114 virt_page_addr); 1115 } 1116 1117 translationReturnEvent[virt_page_addr] = tlb_event; 1118 assert(tlb_event); 1119 1120 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1121 curTick() + this->ticks(hitLatency)); 1122 1123 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1124 } 1125 1126 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1127 PacketPtr _pkt) 1128 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1129 outcome(tlb_outcome), pkt(_pkt) 1130 { 1131 } 1132 1133 /** 1134 * Do Paging protection checks. If we encounter a page fault, then 1135 * an assertion is fired. 1136 / 1137* void 1138 GpuTLB::pagingProtectionChecks(ThreadContext tc, PacketPtr pkt, 1139* TlbEntry * tlb_entry, Mode mode) 1140 { 1141 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1142 uint32_t flags = pkt->req->getFlags(); 1143 bool storeCheck = flags & (StoreCheck << FlagShift); 1144 1145 // Do paging protection checks. 1146 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1147 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1148 1149 bool badWrite = (!tlb_entry->writable && (inUser \|\| cr0.wp)); 1150 1151 if ((inUser && !tlb_entry->user) \|\| 1152 (mode == BaseTLB::Write && badWrite)) {	49#include "base/output.hh" 50#include "base/trace.hh" 51#include "cpu/base.hh" 52#include "cpu/thread_context.hh" 53#include "debug/GPUPrefetch.hh" 54#include "debug/GPUTLB.hh" 55#include "mem/packet_access.hh" 56#include "mem/page_table.hh" 57#include "mem/request.hh" 58#include "sim/process.hh" 59 60namespace X86ISA 61{ 62 63 GpuTLB::GpuTLB(const Params p) 64 : MemObject(p), configAddress(0), size(p->size), 65 cleanupEvent([this]{ cleanup(); }, name(), false, 66 Event::Maximum_Pri), 67 exitEvent([this]{ exitCallback(); }, name()) 68 { 69 assoc = p->assoc; 70 assert(assoc <= size); 71 numSets = size/assoc; 72 allocationPolicy = p->allocationPolicy; 73 hasMemSidePort = false; 74 accessDistance = p->accessDistance; 75 clock = p->clk_domain->clockPeriod(); 76 77 tlb.assign(size, TlbEntry()); 78 79 freeList.resize(numSets); 80 entryList.resize(numSets); 81 82 for (int set = 0; set < numSets; ++set) { 83 for (int way = 0; way < assoc; ++way) { 84 int x = set assoc + way; 85 freeList[set].push_back(&tlb.at(x)); 86 } 87 } 88 89 FA = (size == assoc); 90 91 /** 92 * @warning: the set-associative version assumes you have a 93 * fixed page size of 4KB. 94 * If the page size is greather than 4KB (as defined in the 95 * TheISA::PageBytes), then there are various issues w/ the current 96 * implementation (you'd have the same 8KB page being replicated in 97 * different sets etc) 98 / 99 setMask = numSets - 1; 100* 101 #if 0 102 // GpuTLB doesn't yet support full system 103 walker = p->walker; 104 walker->setTLB(this); 105 #endif 106 107 maxCoalescedReqs = p->maxOutstandingReqs; 108 109 // Do not allow maxCoalescedReqs to be more than the TLB associativity 110 if (maxCoalescedReqs > assoc) { 111 maxCoalescedReqs = assoc; 112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); 113 } 114 115 outstandingReqs = 0; 116 hitLatency = p->hitLatency; 117 missLatency1 = p->missLatency1; 118 missLatency2 = p->missLatency2; 119 120 // create the slave ports based on the number of connected ports 121 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 122 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", 123 name(), i), this, i)); 124 } 125 126 // create the master ports based on the number of connected ports 127 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 128 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", 129 name(), i), this, i)); 130 } 131 } 132 133 // fixme: this is never called? 134 GpuTLB::~GpuTLB() 135 { 136 // make sure all the hash-maps are empty 137 assert(translationReturnEvent.empty()); 138 } 139 140 BaseSlavePort& 141 GpuTLB::getSlavePort(const std::string &if_name, PortID idx) 142 { 143 if (if_name == "slave") { 144 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 145 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 146 } 147 148 return cpuSidePort[idx]; 149* } else { 150 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 151 } 152 } 153 154 BaseMasterPort& 155 GpuTLB::getMasterPort(const std::string &if_name, PortID idx) 156 { 157 if (if_name == "master") { 158 if (idx >= static_cast<PortID>(memSidePort.size())) { 159 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 160 } 161 162 hasMemSidePort = true; 163 164 return memSidePort[idx]; 165* } else { 166 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 167 } 168 } 169 170 TlbEntry* 171 GpuTLB::insert(Addr vpn, TlbEntry &entry) 172 { 173 TlbEntry newEntry = nullptr; 174* 175 /** 176 * vpn holds the virtual page address 177 * The least significant bits are simply masked 178 / 179* int set = (vpn >> TheISA::PageShift) & setMask; 180 181 if (!freeList[set].empty()) { 182 newEntry = freeList[set].front(); 183 freeList[set].pop_front(); 184 } else { 185 newEntry = entryList[set].back(); 186 entryList[set].pop_back(); 187 } 188 189 newEntry = entry; 190* newEntry->vaddr = vpn; 191 entryList[set].push_front(newEntry); 192 193 return newEntry; 194 } 195 196 GpuTLB::EntryList::iterator 197 GpuTLB::lookupIt(Addr va, bool update_lru) 198 { 199 int set = (va >> TheISA::PageShift) & setMask; 200 201 if (FA) { 202 assert(!set); 203 } 204 205 auto entry = entryList[set].begin(); 206 for (; entry != entryList[set].end(); ++entry) { 207 int page_size = (entry)->size(); 208* 209 if ((entry)->vaddr <= va && (entry)->vaddr + page_size > va) { 210 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " 211 "with size %#x.\n", va, (entry)->vaddr, page_size); 212* 213 if (update_lru) { 214 entryList[set].push_front(entry); 215* entryList[set].erase(entry); 216 entry = entryList[set].begin(); 217 } 218 219 break; 220 } 221 } 222 223 return entry; 224 } 225 226 TlbEntry* 227 GpuTLB::lookup(Addr va, bool update_lru) 228 { 229 int set = (va >> TheISA::PageShift) & setMask; 230 231 auto entry = lookupIt(va, update_lru); 232 233 if (entry == entryList[set].end()) 234 return nullptr; 235 else 236 return entry; 237* } 238 239 void 240 GpuTLB::invalidateAll() 241 { 242 DPRINTF(GPUTLB, "Invalidating all entries.\n"); 243 244 for (int i = 0; i < numSets; ++i) { 245 while (!entryList[i].empty()) { 246 TlbEntry entry = entryList[i].front(); 247* entryList[i].pop_front(); 248 freeList[i].push_back(entry); 249 } 250 } 251 } 252 253 void 254 GpuTLB::setConfigAddress(uint32_t addr) 255 { 256 configAddress = addr; 257 } 258 259 void 260 GpuTLB::invalidateNonGlobal() 261 { 262 DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); 263 264 for (int i = 0; i < numSets; ++i) { 265 for (auto entryIt = entryList[i].begin(); 266 entryIt != entryList[i].end();) { 267 if (!(entryIt)->global) { 268* freeList[i].push_back(entryIt); 269* entryList[i].erase(entryIt++); 270 } else { 271 ++entryIt; 272 } 273 } 274 } 275 } 276 277 void 278 GpuTLB::demapPage(Addr va, uint64_t asn) 279 { 280 281 int set = (va >> TheISA::PageShift) & setMask; 282 auto entry = lookupIt(va, false); 283 284 if (entry != entryList[set].end()) { 285 freeList[set].push_back(entry); 286* entryList[set].erase(entry); 287 } 288 } 289 290 Fault 291 GpuTLB::translateInt(const RequestPtr &req, ThreadContext tc) 292* { 293 DPRINTF(GPUTLB, "Addresses references internal memory.\n"); 294 Addr vaddr = req->getVaddr(); 295 Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; 296 297 if (prefix == IntAddrPrefixCPUID) { 298 panic("CPUID memory space not yet implemented!\n"); 299 } else if (prefix == IntAddrPrefixMSR) { 300 vaddr = vaddr >> 3; 301 req->setFlags(Request::MMAPPED_IPR); 302 Addr regNum = 0; 303 304 switch (vaddr & ~IntAddrPrefixMask) { 305 case 0x10: 306 regNum = MISCREG_TSC; 307 break; 308 case 0x1B: 309 regNum = MISCREG_APIC_BASE; 310 break; 311 case 0xFE: 312 regNum = MISCREG_MTRRCAP; 313 break; 314 case 0x174: 315 regNum = MISCREG_SYSENTER_CS; 316 break; 317 case 0x175: 318 regNum = MISCREG_SYSENTER_ESP; 319 break; 320 case 0x176: 321 regNum = MISCREG_SYSENTER_EIP; 322 break; 323 case 0x179: 324 regNum = MISCREG_MCG_CAP; 325 break; 326 case 0x17A: 327 regNum = MISCREG_MCG_STATUS; 328 break; 329 case 0x17B: 330 regNum = MISCREG_MCG_CTL; 331 break; 332 case 0x1D9: 333 regNum = MISCREG_DEBUG_CTL_MSR; 334 break; 335 case 0x1DB: 336 regNum = MISCREG_LAST_BRANCH_FROM_IP; 337 break; 338 case 0x1DC: 339 regNum = MISCREG_LAST_BRANCH_TO_IP; 340 break; 341 case 0x1DD: 342 regNum = MISCREG_LAST_EXCEPTION_FROM_IP; 343 break; 344 case 0x1DE: 345 regNum = MISCREG_LAST_EXCEPTION_TO_IP; 346 break; 347 case 0x200: 348 regNum = MISCREG_MTRR_PHYS_BASE_0; 349 break; 350 case 0x201: 351 regNum = MISCREG_MTRR_PHYS_MASK_0; 352 break; 353 case 0x202: 354 regNum = MISCREG_MTRR_PHYS_BASE_1; 355 break; 356 case 0x203: 357 regNum = MISCREG_MTRR_PHYS_MASK_1; 358 break; 359 case 0x204: 360 regNum = MISCREG_MTRR_PHYS_BASE_2; 361 break; 362 case 0x205: 363 regNum = MISCREG_MTRR_PHYS_MASK_2; 364 break; 365 case 0x206: 366 regNum = MISCREG_MTRR_PHYS_BASE_3; 367 break; 368 case 0x207: 369 regNum = MISCREG_MTRR_PHYS_MASK_3; 370 break; 371 case 0x208: 372 regNum = MISCREG_MTRR_PHYS_BASE_4; 373 break; 374 case 0x209: 375 regNum = MISCREG_MTRR_PHYS_MASK_4; 376 break; 377 case 0x20A: 378 regNum = MISCREG_MTRR_PHYS_BASE_5; 379 break; 380 case 0x20B: 381 regNum = MISCREG_MTRR_PHYS_MASK_5; 382 break; 383 case 0x20C: 384 regNum = MISCREG_MTRR_PHYS_BASE_6; 385 break; 386 case 0x20D: 387 regNum = MISCREG_MTRR_PHYS_MASK_6; 388 break; 389 case 0x20E: 390 regNum = MISCREG_MTRR_PHYS_BASE_7; 391 break; 392 case 0x20F: 393 regNum = MISCREG_MTRR_PHYS_MASK_7; 394 break; 395 case 0x250: 396 regNum = MISCREG_MTRR_FIX_64K_00000; 397 break; 398 case 0x258: 399 regNum = MISCREG_MTRR_FIX_16K_80000; 400 break; 401 case 0x259: 402 regNum = MISCREG_MTRR_FIX_16K_A0000; 403 break; 404 case 0x268: 405 regNum = MISCREG_MTRR_FIX_4K_C0000; 406 break; 407 case 0x269: 408 regNum = MISCREG_MTRR_FIX_4K_C8000; 409 break; 410 case 0x26A: 411 regNum = MISCREG_MTRR_FIX_4K_D0000; 412 break; 413 case 0x26B: 414 regNum = MISCREG_MTRR_FIX_4K_D8000; 415 break; 416 case 0x26C: 417 regNum = MISCREG_MTRR_FIX_4K_E0000; 418 break; 419 case 0x26D: 420 regNum = MISCREG_MTRR_FIX_4K_E8000; 421 break; 422 case 0x26E: 423 regNum = MISCREG_MTRR_FIX_4K_F0000; 424 break; 425 case 0x26F: 426 regNum = MISCREG_MTRR_FIX_4K_F8000; 427 break; 428 case 0x277: 429 regNum = MISCREG_PAT; 430 break; 431 case 0x2FF: 432 regNum = MISCREG_DEF_TYPE; 433 break; 434 case 0x400: 435 regNum = MISCREG_MC0_CTL; 436 break; 437 case 0x404: 438 regNum = MISCREG_MC1_CTL; 439 break; 440 case 0x408: 441 regNum = MISCREG_MC2_CTL; 442 break; 443 case 0x40C: 444 regNum = MISCREG_MC3_CTL; 445 break; 446 case 0x410: 447 regNum = MISCREG_MC4_CTL; 448 break; 449 case 0x414: 450 regNum = MISCREG_MC5_CTL; 451 break; 452 case 0x418: 453 regNum = MISCREG_MC6_CTL; 454 break; 455 case 0x41C: 456 regNum = MISCREG_MC7_CTL; 457 break; 458 case 0x401: 459 regNum = MISCREG_MC0_STATUS; 460 break; 461 case 0x405: 462 regNum = MISCREG_MC1_STATUS; 463 break; 464 case 0x409: 465 regNum = MISCREG_MC2_STATUS; 466 break; 467 case 0x40D: 468 regNum = MISCREG_MC3_STATUS; 469 break; 470 case 0x411: 471 regNum = MISCREG_MC4_STATUS; 472 break; 473 case 0x415: 474 regNum = MISCREG_MC5_STATUS; 475 break; 476 case 0x419: 477 regNum = MISCREG_MC6_STATUS; 478 break; 479 case 0x41D: 480 regNum = MISCREG_MC7_STATUS; 481 break; 482 case 0x402: 483 regNum = MISCREG_MC0_ADDR; 484 break; 485 case 0x406: 486 regNum = MISCREG_MC1_ADDR; 487 break; 488 case 0x40A: 489 regNum = MISCREG_MC2_ADDR; 490 break; 491 case 0x40E: 492 regNum = MISCREG_MC3_ADDR; 493 break; 494 case 0x412: 495 regNum = MISCREG_MC4_ADDR; 496 break; 497 case 0x416: 498 regNum = MISCREG_MC5_ADDR; 499 break; 500 case 0x41A: 501 regNum = MISCREG_MC6_ADDR; 502 break; 503 case 0x41E: 504 regNum = MISCREG_MC7_ADDR; 505 break; 506 case 0x403: 507 regNum = MISCREG_MC0_MISC; 508 break; 509 case 0x407: 510 regNum = MISCREG_MC1_MISC; 511 break; 512 case 0x40B: 513 regNum = MISCREG_MC2_MISC; 514 break; 515 case 0x40F: 516 regNum = MISCREG_MC3_MISC; 517 break; 518 case 0x413: 519 regNum = MISCREG_MC4_MISC; 520 break; 521 case 0x417: 522 regNum = MISCREG_MC5_MISC; 523 break; 524 case 0x41B: 525 regNum = MISCREG_MC6_MISC; 526 break; 527 case 0x41F: 528 regNum = MISCREG_MC7_MISC; 529 break; 530 case 0xC0000080: 531 regNum = MISCREG_EFER; 532 break; 533 case 0xC0000081: 534 regNum = MISCREG_STAR; 535 break; 536 case 0xC0000082: 537 regNum = MISCREG_LSTAR; 538 break; 539 case 0xC0000083: 540 regNum = MISCREG_CSTAR; 541 break; 542 case 0xC0000084: 543 regNum = MISCREG_SF_MASK; 544 break; 545 case 0xC0000100: 546 regNum = MISCREG_FS_BASE; 547 break; 548 case 0xC0000101: 549 regNum = MISCREG_GS_BASE; 550 break; 551 case 0xC0000102: 552 regNum = MISCREG_KERNEL_GS_BASE; 553 break; 554 case 0xC0000103: 555 regNum = MISCREG_TSC_AUX; 556 break; 557 case 0xC0010000: 558 regNum = MISCREG_PERF_EVT_SEL0; 559 break; 560 case 0xC0010001: 561 regNum = MISCREG_PERF_EVT_SEL1; 562 break; 563 case 0xC0010002: 564 regNum = MISCREG_PERF_EVT_SEL2; 565 break; 566 case 0xC0010003: 567 regNum = MISCREG_PERF_EVT_SEL3; 568 break; 569 case 0xC0010004: 570 regNum = MISCREG_PERF_EVT_CTR0; 571 break; 572 case 0xC0010005: 573 regNum = MISCREG_PERF_EVT_CTR1; 574 break; 575 case 0xC0010006: 576 regNum = MISCREG_PERF_EVT_CTR2; 577 break; 578 case 0xC0010007: 579 regNum = MISCREG_PERF_EVT_CTR3; 580 break; 581 case 0xC0010010: 582 regNum = MISCREG_SYSCFG; 583 break; 584 case 0xC0010016: 585 regNum = MISCREG_IORR_BASE0; 586 break; 587 case 0xC0010017: 588 regNum = MISCREG_IORR_BASE1; 589 break; 590 case 0xC0010018: 591 regNum = MISCREG_IORR_MASK0; 592 break; 593 case 0xC0010019: 594 regNum = MISCREG_IORR_MASK1; 595 break; 596 case 0xC001001A: 597 regNum = MISCREG_TOP_MEM; 598 break; 599 case 0xC001001D: 600 regNum = MISCREG_TOP_MEM2; 601 break; 602 case 0xC0010114: 603 regNum = MISCREG_VM_CR; 604 break; 605 case 0xC0010115: 606 regNum = MISCREG_IGNNE; 607 break; 608 case 0xC0010116: 609 regNum = MISCREG_SMM_CTL; 610 break; 611 case 0xC0010117: 612 regNum = MISCREG_VM_HSAVE_PA; 613 break; 614 default: 615 return std::make_shared<GeneralProtection>(0); 616 } 617 //The index is multiplied by the size of a MiscReg so that 618 //any memory dependence calculations will not see these as 619 //overlapping. 620 req->setPaddr(regNum * sizeof(MiscReg)); 621 return NoFault; 622 } else if (prefix == IntAddrPrefixIO) { 623 // TODO If CPL > IOPL or in virtual mode, check the I/O permission 624 // bitmap in the TSS. 625 626 Addr IOPort = vaddr & ~IntAddrPrefixMask; 627 // Make sure the address fits in the expected 16 bit IO address 628 // space. 629 assert(!(IOPort & ~0xFFFF)); 630 631 if (IOPort == 0xCF8 && req->getSize() == 4) { 632 req->setFlags(Request::MMAPPED_IPR); 633 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); 634 } else if ((IOPort & ~mask(2)) == 0xCFC) { 635 req->setFlags(Request::UNCACHEABLE); 636 637 Addr configAddress = 638 tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); 639 640 if (bits(configAddress, 31, 31)) { 641 req->setPaddr(PhysAddrPrefixPciConfig \| 642 mbits(configAddress, 30, 2) \| 643 (IOPort & mask(2))); 644 } else { 645 req->setPaddr(PhysAddrPrefixIO \| IOPort); 646 } 647 } else { 648 req->setFlags(Request::UNCACHEABLE); 649 req->setPaddr(PhysAddrPrefixIO \| IOPort); 650 } 651 return NoFault; 652 } else { 653 panic("Access to unrecognized internal address space %#x.\n", 654 prefix); 655 } 656 } 657 658 /** 659 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit 660 * and false on a TLB miss. 661 * Many of the checks about different modes have been converted to 662 * assertions, since these parts of the code are not really used. 663 * On a hit it will update the LRU stack. 664 / 665* bool 666 GpuTLB::tlbLookup(const RequestPtr &req, 667 ThreadContext tc, bool update_stats) 668* { 669 bool tlb_hit = false; 670 #ifndef NDEBUG 671 uint32_t flags = req->getFlags(); 672 int seg = flags & SegmentFlagMask; 673 #endif 674 675 assert(seg != SEGMENT_REG_MS); 676 Addr vaddr = req->getVaddr(); 677 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); 678 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 679 680 if (m5Reg.prot) { 681 DPRINTF(GPUTLB, "In protected mode.\n"); 682 // make sure we are in 64-bit mode 683 assert(m5Reg.mode == LongMode); 684 685 // If paging is enabled, do the translation. 686 if (m5Reg.paging) { 687 DPRINTF(GPUTLB, "Paging enabled.\n"); 688 //update LRU stack on a hit 689 TlbEntry entry = lookup(vaddr, true); 690* 691 if (entry) 692 tlb_hit = true; 693 694 if (!update_stats) { 695 // functional tlb access for memory initialization 696 // i.e., memory seeding or instr. seeding -> don't update 697 // TLB and stats 698 return tlb_hit; 699 } 700 701 localNumTLBAccesses++; 702 703 if (!entry) { 704 localNumTLBMisses++; 705 } else { 706 localNumTLBHits++; 707 } 708 } 709 } 710 711 return tlb_hit; 712 } 713 714 Fault 715 GpuTLB::translate(const RequestPtr &req, ThreadContext tc, 716* Translation translation, Mode mode, 717* bool &delayedResponse, bool timing, int &latency) 718 { 719 uint32_t flags = req->getFlags(); 720 int seg = flags & SegmentFlagMask; 721 bool storeCheck = flags & (StoreCheck << FlagShift); 722 723 // If this is true, we're dealing with a request 724 // to a non-memory address space. 725 if (seg == SEGMENT_REG_MS) { 726 return translateInt(req, tc); 727 } 728 729 delayedResponse = false; 730 Addr vaddr = req->getVaddr(); 731 DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); 732 733 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 734 735 // If protected mode has been enabled... 736 if (m5Reg.prot) { 737 DPRINTF(GPUTLB, "In protected mode.\n"); 738 // If we're not in 64-bit mode, do protection/limit checks 739 if (m5Reg.mode != LongMode) { 740 DPRINTF(GPUTLB, "Not in long mode. Checking segment " 741 "protection.\n"); 742 743 // Check for a null segment selector. 744 if (!(seg == SEGMENT_REG_TSG \|\| seg == SYS_SEGMENT_REG_IDTR \|\| 745 seg == SEGMENT_REG_HS \|\| seg == SEGMENT_REG_LS) 746 && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { 747 return std::make_shared<GeneralProtection>(0); 748 } 749 750 bool expandDown = false; 751 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); 752 753 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { 754 if (!attr.writable && (mode == BaseTLB::Write \|\| 755 storeCheck)) 756 return std::make_shared<GeneralProtection>(0); 757 758 if (!attr.readable && mode == BaseTLB::Read) 759 return std::make_shared<GeneralProtection>(0); 760 761 expandDown = attr.expandDown; 762 763 } 764 765 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); 766 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); 767 // This assumes we're not in 64 bit mode. If we were, the 768 // default address size is 64 bits, overridable to 32. 769 int size = 32; 770 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); 771 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); 772 773 if ((csAttr.defaultSize && sizeOverride) \|\| 774 (!csAttr.defaultSize && !sizeOverride)) { 775 size = 16; 776 } 777 778 Addr offset = bits(vaddr - base, size - 1, 0); 779 Addr endOffset = offset + req->getSize() - 1; 780 781 if (expandDown) { 782 DPRINTF(GPUTLB, "Checking an expand down segment.\n"); 783 warn_once("Expand down segments are untested.\n"); 784 785 if (offset <= limit \|\| endOffset <= limit) 786 return std::make_shared<GeneralProtection>(0); 787 } else { 788 if (offset > limit \|\| endOffset > limit) 789 return std::make_shared<GeneralProtection>(0); 790 } 791 } 792 793 // If paging is enabled, do the translation. 794 if (m5Reg.paging) { 795 DPRINTF(GPUTLB, "Paging enabled.\n"); 796 // The vaddr already has the segment base applied. 797 TlbEntry entry = lookup(vaddr); 798* localNumTLBAccesses++; 799 800 if (!entry) { 801 localNumTLBMisses++; 802 if (timing) { 803 latency = missLatency1; 804 } 805 806 if (FullSystem) { 807 fatal("GpuTLB doesn't support full-system mode\n"); 808 } else { 809 DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " 810 "at pc %#x.\n", vaddr, tc->instAddr()); 811 812 Process p = tc->getProcessPtr(); 813* const EmulationPageTable::Entry pte = 814* p->pTable->lookup(vaddr); 815 816 if (!pte && mode != BaseTLB::Execute) { 817 // penalize a "page fault" more 818 if (timing) 819 latency += missLatency2; 820 821 if (p->fixupStackFault(vaddr)) 822 pte = p->pTable->lookup(vaddr); 823 } 824 825 if (!pte) { 826 return std::make_shared<PageFault>(vaddr, true, 827 mode, true, 828 false); 829 } else { 830 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 831 832 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", 833 alignedVaddr, pte->paddr); 834 835 TlbEntry gpuEntry(p->pid(), alignedVaddr, 836 pte->paddr, false, false); 837 entry = insert(alignedVaddr, gpuEntry); 838 } 839 840 DPRINTF(GPUTLB, "Miss was serviced.\n"); 841 } 842 } else { 843 localNumTLBHits++; 844 845 if (timing) { 846 latency = hitLatency; 847 } 848 } 849 850 // Do paging protection checks. 851 bool inUser = (m5Reg.cpl == 3 && 852 !(flags & (CPL0FlagBit << FlagShift))); 853 854 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 855 bool badWrite = (!entry->writable && (inUser \|\| cr0.wp)); 856 857 if ((inUser && !entry->user) \|\| (mode == BaseTLB::Write && 858 badWrite)) { 859 // The page must have been present to get into the TLB in 860 // the first place. We'll assume the reserved bits are 861 // fine even though we're not checking them. 862 return std::make_shared<PageFault>(vaddr, true, mode, 863 inUser, false); 864 } 865 866 if (storeCheck && badWrite) { 867 // This would fault if this were a write, so return a page 868 // fault that reflects that happening. 869 return std::make_shared<PageFault>(vaddr, true, 870 BaseTLB::Write, 871 inUser, false); 872 } 873 874 875 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " 876 "checks.\n", entry->paddr); 877 878 int page_size = entry->size(); 879 Addr paddr = entry->paddr \| (vaddr & (page_size - 1)); 880 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 881 req->setPaddr(paddr); 882 883 if (entry->uncacheable) 884 req->setFlags(Request::UNCACHEABLE); 885 } else { 886 //Use the address which already has segmentation applied. 887 DPRINTF(GPUTLB, "Paging disabled.\n"); 888 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 889 req->setPaddr(vaddr); 890 } 891 } else { 892 // Real mode 893 DPRINTF(GPUTLB, "In real mode.\n"); 894 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); 895 req->setPaddr(vaddr); 896 } 897 898 // Check for an access to the local APIC 899 if (FullSystem) { 900 LocalApicBase localApicBase = 901 tc->readMiscRegNoEffect(MISCREG_APIC_BASE); 902 903 Addr baseAddr = localApicBase.base * PageBytes; 904 Addr paddr = req->getPaddr(); 905 906 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { 907 // Force the access to be uncacheable. 908 req->setFlags(Request::UNCACHEABLE); 909 req->setPaddr(x86LocalAPICAddress(tc->contextId(), 910 paddr - baseAddr)); 911 } 912 } 913 914 return NoFault; 915 }; 916 917 Fault 918 GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext tc, 919* Mode mode, int &latency) 920 { 921 bool delayedResponse; 922 923 return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, 924 latency); 925 } 926 927 void 928 GpuTLB::translateTiming(const RequestPtr &req, ThreadContext tc, 929* Translation translation, Mode mode, int &latency) 930* { 931 bool delayedResponse; 932 assert(translation); 933 934 Fault fault = GpuTLB::translate(req, tc, translation, mode, 935 delayedResponse, true, latency); 936 937 if (!delayedResponse) 938 translation->finish(fault, req, tc, mode); 939 } 940 941 Walker* 942 GpuTLB::getWalker() 943 { 944 return walker; 945 } 946 947 948 void 949 GpuTLB::serialize(CheckpointOut &cp) const 950 { 951 } 952 953 void 954 GpuTLB::unserialize(CheckpointIn &cp) 955 { 956 } 957 958 void 959 GpuTLB::regStats() 960 { 961 MemObject::regStats(); 962 963 localNumTLBAccesses 964 .name(name() + ".local_TLB_accesses") 965 .desc("Number of TLB accesses") 966 ; 967 968 localNumTLBHits 969 .name(name() + ".local_TLB_hits") 970 .desc("Number of TLB hits") 971 ; 972 973 localNumTLBMisses 974 .name(name() + ".local_TLB_misses") 975 .desc("Number of TLB misses") 976 ; 977 978 localTLBMissRate 979 .name(name() + ".local_TLB_miss_rate") 980 .desc("TLB miss rate") 981 ; 982 983 accessCycles 984 .name(name() + ".access_cycles") 985 .desc("Cycles spent accessing this TLB level") 986 ; 987 988 pageTableCycles 989 .name(name() + ".page_table_cycles") 990 .desc("Cycles spent accessing the page table") 991 ; 992 993 localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; 994 995 numUniquePages 996 .name(name() + ".unique_pages") 997 .desc("Number of unique pages touched") 998 ; 999 1000 localCycles 1001 .name(name() + ".local_cycles") 1002 .desc("Number of cycles spent in queue for all incoming reqs") 1003 ; 1004 1005 localLatency 1006 .name(name() + ".local_latency") 1007 .desc("Avg. latency over incoming coalesced reqs") 1008 ; 1009 1010 localLatency = localCycles / localNumTLBAccesses; 1011 1012 globalNumTLBAccesses 1013 .name(name() + ".global_TLB_accesses") 1014 .desc("Number of TLB accesses") 1015 ; 1016 1017 globalNumTLBHits 1018 .name(name() + ".global_TLB_hits") 1019 .desc("Number of TLB hits") 1020 ; 1021 1022 globalNumTLBMisses 1023 .name(name() + ".global_TLB_misses") 1024 .desc("Number of TLB misses") 1025 ; 1026 1027 globalTLBMissRate 1028 .name(name() + ".global_TLB_miss_rate") 1029 .desc("TLB miss rate") 1030 ; 1031 1032 globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; 1033 1034 avgReuseDistance 1035 .name(name() + ".avg_reuse_distance") 1036 .desc("avg. reuse distance over all pages (in ticks)") 1037 ; 1038 1039 } 1040 1041 /** 1042 * Do the TLB lookup for this coalesced request and schedule 1043 * another event <TLB access latency> cycles later. 1044 / 1045* 1046 void 1047 GpuTLB::issueTLBLookup(PacketPtr pkt) 1048 { 1049 assert(pkt); 1050 assert(pkt->senderState); 1051 1052 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1053 TheISA::PageBytes); 1054 1055 TranslationState sender_state = 1056* safe_cast<TranslationState>(pkt->senderState); 1057* 1058 bool update_stats = !sender_state->prefetch; 1059 ThreadContext * tmp_tc = sender_state->tc; 1060 1061 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", 1062 virt_page_addr); 1063 1064 int req_cnt = sender_state->reqCnt.back(); 1065 1066 if (update_stats) { 1067 accessCycles -= (curTick() * req_cnt); 1068 localCycles -= curTick(); 1069 updatePageFootprint(virt_page_addr); 1070 globalNumTLBAccesses += req_cnt; 1071 } 1072 1073 tlbOutcome lookup_outcome = TLB_MISS; 1074 const RequestPtr &tmp_req = pkt->req; 1075 1076 // Access the TLB and figure out if it's a hit or a miss. 1077 bool success = tlbLookup(tmp_req, tmp_tc, update_stats); 1078 1079 if (success) { 1080 lookup_outcome = TLB_HIT; 1081 // Put the entry in SenderState 1082 TlbEntry entry = lookup(tmp_req->getVaddr(), false); 1083* assert(entry); 1084 1085 auto p = sender_state->tc->getProcessPtr(); 1086 sender_state->tlbEntry = 1087 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1088 false, false); 1089 1090 if (update_stats) { 1091 // the reqCnt has an entry per level, so its size tells us 1092 // which level we are in 1093 sender_state->hitLevel = sender_state->reqCnt.size(); 1094 globalNumTLBHits += req_cnt; 1095 } 1096 } else { 1097 if (update_stats) 1098 globalNumTLBMisses += req_cnt; 1099 } 1100 1101 /* 1102 * We now know the TLB lookup outcome (if it's a hit or a miss), as well 1103 * as the TLB access latency. 1104 * 1105 * We create and schedule a new TLBEvent which will help us take the 1106 * appropriate actions (e.g., update TLB on a hit, send request to lower 1107 * level TLB on a miss, or start a page walk if this was the last-level 1108 * TLB) 1109 / 1110* TLBEvent tlb_event = 1111* new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); 1112 1113 if (translationReturnEvent.count(virt_page_addr)) { 1114 panic("Virtual Page Address %#x already has a return event\n", 1115 virt_page_addr); 1116 } 1117 1118 translationReturnEvent[virt_page_addr] = tlb_event; 1119 assert(tlb_event); 1120 1121 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", 1122 curTick() + this->ticks(hitLatency)); 1123 1124 schedule(tlb_event, curTick() + this->ticks(hitLatency)); 1125 } 1126 1127 GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, 1128 PacketPtr _pkt) 1129 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), 1130 outcome(tlb_outcome), pkt(_pkt) 1131 { 1132 } 1133 1134 /** 1135 * Do Paging protection checks. If we encounter a page fault, then 1136 * an assertion is fired. 1137 / 1138* void 1139 GpuTLB::pagingProtectionChecks(ThreadContext tc, PacketPtr pkt, 1140* TlbEntry * tlb_entry, Mode mode) 1141 { 1142 HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 1143 uint32_t flags = pkt->req->getFlags(); 1144 bool storeCheck = flags & (StoreCheck << FlagShift); 1145 1146 // Do paging protection checks. 1147 bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); 1148 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); 1149 1150 bool badWrite = (!tlb_entry->writable && (inUser \|\| cr0.wp)); 1151 1152 if ((inUser && !tlb_entry->user) \|\| 1153 (mode == BaseTLB::Write && badWrite)) {
1153 // The page must have been present to get into the TLB in 1154 // the first place. We'll assume the reserved bits are 1155 // fine even though we're not checking them. 1156 assert(false);	1154 // The page must have been present to get into the TLB in 1155 // the first place. We'll assume the reserved bits are 1156 // fine even though we're not checking them. 1157 panic("Page fault detected");
1157 } 1158 1159 if (storeCheck && badWrite) {	1158 } 1159 1160 if (storeCheck && badWrite) {
1160 // This would fault if this were a write, so return a page 1161 // fault that reflects that happening. 1162 assert(false);	1161 // This would fault if this were a write, so return a page 1162 // fault that reflects that happening. 1163 panic("Page fault detected");
1163 } 1164 } 1165 1166 /** 1167 * handleTranslationReturn is called on a TLB hit, 1168 * when a TLB miss returns or when a page fault returns. 1169 * The latter calls handelHit with TLB miss as tlbOutcome. 1170 / 1171* void 1172 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1173 PacketPtr pkt) 1174 { 1175 1176 assert(pkt); 1177 Addr vaddr = pkt->req->getVaddr(); 1178 1179 TranslationState sender_state = 1180* safe_cast<TranslationState>(pkt->senderState); 1181* 1182 ThreadContext tc = sender_state->tc; 1183* Mode mode = sender_state->tlbMode; 1184 1185 TlbEntry local_entry, new_entry; 1186 1187 if (tlb_outcome == TLB_HIT) { 1188 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1189 local_entry = sender_state->tlbEntry; 1190 } else { 1191 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1192 vaddr); 1193 1194 // We are returning either from a page walk or from a hit at a lower 1195 // TLB level. The senderState should be "carrying" a pointer to the 1196 // correct TLBEntry. 1197 new_entry = sender_state->tlbEntry; 1198 assert(new_entry); 1199 local_entry = new_entry; 1200 1201 if (allocationPolicy) { 1202 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1203 virt_page_addr); 1204 1205 local_entry = insert(virt_page_addr, new_entry); 1206* } 1207 1208 assert(local_entry); 1209 } 1210 1211 /** 1212 * At this point the packet carries an up-to-date tlbEntry pointer 1213 * in its senderState. 1214 * Next step is to do the paging protection checks. 1215 / 1216* DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1217 "while paddr was %#x.\n", local_entry->vaddr, 1218 local_entry->paddr); 1219 1220 pagingProtectionChecks(tc, pkt, local_entry, mode); 1221 int page_size = local_entry->size(); 1222 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1223 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1224 1225 // Since this packet will be sent through the cpu side slave port, 1226 // it must be converted to a response pkt if it is not one already 1227 if (pkt->isRequest()) { 1228 pkt->makeTimingResponse(); 1229 } 1230 1231 pkt->req->setPaddr(paddr); 1232 1233 if (local_entry->uncacheable) { 1234 pkt->req->setFlags(Request::UNCACHEABLE); 1235 } 1236 1237 //send packet back to coalescer 1238 cpuSidePort[0]->sendTimingResp(pkt); 1239 //schedule cleanup event 1240 cleanupQueue.push(virt_page_addr); 1241 1242 // schedule this only once per cycle. 1243 // The check is required because we might have multiple translations 1244 // returning the same cycle 1245 // this is a maximum priority event and must be on the same cycle 1246 // as the cleanup event in TLBCoalescer to avoid a race with 1247 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1248 if (!cleanupEvent.scheduled()) 1249 schedule(cleanupEvent, curTick()); 1250 } 1251 1252 /** 1253 * Here we take the appropriate actions based on the result of the 1254 * TLB lookup. 1255 / 1256* void 1257 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1258 PacketPtr pkt) 1259 { 1260 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1261 1262 assert(translationReturnEvent[virtPageAddr]); 1263 assert(pkt); 1264 1265 TranslationState tmp_sender_state = 1266* safe_cast<TranslationState>(pkt->senderState); 1267* 1268 int req_cnt = tmp_sender_state->reqCnt.back(); 1269 bool update_stats = !tmp_sender_state->prefetch; 1270 1271 1272 if (outcome == TLB_HIT) { 1273 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1274 1275 if (update_stats) { 1276 accessCycles += (req_cnt * curTick()); 1277 localCycles += curTick(); 1278 } 1279 1280 } else if (outcome == TLB_MISS) { 1281 1282 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1283 if (update_stats) { 1284 accessCycles += (req_cntcurTick()); 1285* localCycles += curTick(); 1286 } 1287 1288 if (hasMemSidePort) { 1289 // the one cyle added here represent the delay from when we get 1290 // the reply back till when we propagate it to the coalescer 1291 // above. 1292 if (update_stats) { 1293 accessCycles += (req_cnt * 1); 1294 localCycles += 1; 1295 } 1296 1297 /** 1298 * There is a TLB below. Send the coalesced request. 1299 * We actually send the very first packet of all the 1300 * pending packets for this virtual page address. 1301 / 1302* if (!memSidePort[0]->sendTimingReq(pkt)) { 1303 DPRINTF(GPUTLB, "Failed sending translation request to " 1304 "lower level TLB for addr %#x\n", virtPageAddr); 1305 1306 memSidePort[0]->retries.push_back(pkt); 1307 } else { 1308 DPRINTF(GPUTLB, "Sent translation request to lower level " 1309 "TLB for addr %#x\n", virtPageAddr); 1310 } 1311 } else { 1312 //this is the last level TLB. Start a page walk 1313 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1314 "addr %#x\n", virtPageAddr); 1315 1316 if (update_stats) 1317 pageTableCycles -= (req_cntcurTick()); 1318* 1319 TLBEvent tlb_event = translationReturnEvent[virtPageAddr]; 1320* assert(tlb_event); 1321 tlb_event->updateOutcome(PAGE_WALK); 1322 schedule(tlb_event, curTick() + ticks(missLatency2)); 1323 } 1324 } else if (outcome == PAGE_WALK) { 1325 if (update_stats) 1326 pageTableCycles += (req_cntcurTick()); 1327* 1328 // Need to access the page table and update the TLB 1329 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1330 virtPageAddr); 1331 1332 TranslationState sender_state = 1333* safe_cast<TranslationState>(pkt->senderState); 1334* 1335 Process p = sender_state->tc->getProcessPtr(); 1336* Addr vaddr = pkt->req->getVaddr(); 1337 #ifndef NDEBUG 1338 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1339 assert(alignedVaddr == virtPageAddr); 1340 #endif 1341 const EmulationPageTable::Entry pte = p->pTable->lookup(vaddr); 1342* if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1343 p->fixupStackFault(vaddr)) { 1344 pte = p->pTable->lookup(vaddr); 1345 } 1346 1347 if (pte) { 1348 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1349 pte->paddr); 1350 1351 sender_state->tlbEntry = 1352 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, 1353 false); 1354 } else { 1355 sender_state->tlbEntry = nullptr; 1356 } 1357 1358 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1359 } else if (outcome == MISS_RETURN) { 1360 /** we add an extra cycle in the return path of the translation 1361 * requests in between the various TLB levels. 1362 / 1363* handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1364 } else {	1164 } 1165 } 1166 1167 /** 1168 * handleTranslationReturn is called on a TLB hit, 1169 * when a TLB miss returns or when a page fault returns. 1170 * The latter calls handelHit with TLB miss as tlbOutcome. 1171 / 1172* void 1173 GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, 1174 PacketPtr pkt) 1175 { 1176 1177 assert(pkt); 1178 Addr vaddr = pkt->req->getVaddr(); 1179 1180 TranslationState sender_state = 1181* safe_cast<TranslationState>(pkt->senderState); 1182* 1183 ThreadContext tc = sender_state->tc; 1184* Mode mode = sender_state->tlbMode; 1185 1186 TlbEntry local_entry, new_entry; 1187 1188 if (tlb_outcome == TLB_HIT) { 1189 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); 1190 local_entry = sender_state->tlbEntry; 1191 } else { 1192 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", 1193 vaddr); 1194 1195 // We are returning either from a page walk or from a hit at a lower 1196 // TLB level. The senderState should be "carrying" a pointer to the 1197 // correct TLBEntry. 1198 new_entry = sender_state->tlbEntry; 1199 assert(new_entry); 1200 local_entry = new_entry; 1201 1202 if (allocationPolicy) { 1203 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1204 virt_page_addr); 1205 1206 local_entry = insert(virt_page_addr, new_entry); 1207* } 1208 1209 assert(local_entry); 1210 } 1211 1212 /** 1213 * At this point the packet carries an up-to-date tlbEntry pointer 1214 * in its senderState. 1215 * Next step is to do the paging protection checks. 1216 / 1217* DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1218 "while paddr was %#x.\n", local_entry->vaddr, 1219 local_entry->paddr); 1220 1221 pagingProtectionChecks(tc, pkt, local_entry, mode); 1222 int page_size = local_entry->size(); 1223 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1224 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1225 1226 // Since this packet will be sent through the cpu side slave port, 1227 // it must be converted to a response pkt if it is not one already 1228 if (pkt->isRequest()) { 1229 pkt->makeTimingResponse(); 1230 } 1231 1232 pkt->req->setPaddr(paddr); 1233 1234 if (local_entry->uncacheable) { 1235 pkt->req->setFlags(Request::UNCACHEABLE); 1236 } 1237 1238 //send packet back to coalescer 1239 cpuSidePort[0]->sendTimingResp(pkt); 1240 //schedule cleanup event 1241 cleanupQueue.push(virt_page_addr); 1242 1243 // schedule this only once per cycle. 1244 // The check is required because we might have multiple translations 1245 // returning the same cycle 1246 // this is a maximum priority event and must be on the same cycle 1247 // as the cleanup event in TLBCoalescer to avoid a race with 1248 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry 1249 if (!cleanupEvent.scheduled()) 1250 schedule(cleanupEvent, curTick()); 1251 } 1252 1253 /** 1254 * Here we take the appropriate actions based on the result of the 1255 * TLB lookup. 1256 / 1257* void 1258 GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, 1259 PacketPtr pkt) 1260 { 1261 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); 1262 1263 assert(translationReturnEvent[virtPageAddr]); 1264 assert(pkt); 1265 1266 TranslationState tmp_sender_state = 1267* safe_cast<TranslationState>(pkt->senderState); 1268* 1269 int req_cnt = tmp_sender_state->reqCnt.back(); 1270 bool update_stats = !tmp_sender_state->prefetch; 1271 1272 1273 if (outcome == TLB_HIT) { 1274 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); 1275 1276 if (update_stats) { 1277 accessCycles += (req_cnt * curTick()); 1278 localCycles += curTick(); 1279 } 1280 1281 } else if (outcome == TLB_MISS) { 1282 1283 DPRINTF(GPUTLB, "This is a TLB miss\n"); 1284 if (update_stats) { 1285 accessCycles += (req_cntcurTick()); 1286* localCycles += curTick(); 1287 } 1288 1289 if (hasMemSidePort) { 1290 // the one cyle added here represent the delay from when we get 1291 // the reply back till when we propagate it to the coalescer 1292 // above. 1293 if (update_stats) { 1294 accessCycles += (req_cnt * 1); 1295 localCycles += 1; 1296 } 1297 1298 /** 1299 * There is a TLB below. Send the coalesced request. 1300 * We actually send the very first packet of all the 1301 * pending packets for this virtual page address. 1302 / 1303* if (!memSidePort[0]->sendTimingReq(pkt)) { 1304 DPRINTF(GPUTLB, "Failed sending translation request to " 1305 "lower level TLB for addr %#x\n", virtPageAddr); 1306 1307 memSidePort[0]->retries.push_back(pkt); 1308 } else { 1309 DPRINTF(GPUTLB, "Sent translation request to lower level " 1310 "TLB for addr %#x\n", virtPageAddr); 1311 } 1312 } else { 1313 //this is the last level TLB. Start a page walk 1314 DPRINTF(GPUTLB, "Last level TLB - start a page walk for " 1315 "addr %#x\n", virtPageAddr); 1316 1317 if (update_stats) 1318 pageTableCycles -= (req_cntcurTick()); 1319* 1320 TLBEvent tlb_event = translationReturnEvent[virtPageAddr]; 1321* assert(tlb_event); 1322 tlb_event->updateOutcome(PAGE_WALK); 1323 schedule(tlb_event, curTick() + ticks(missLatency2)); 1324 } 1325 } else if (outcome == PAGE_WALK) { 1326 if (update_stats) 1327 pageTableCycles += (req_cntcurTick()); 1328* 1329 // Need to access the page table and update the TLB 1330 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1331 virtPageAddr); 1332 1333 TranslationState sender_state = 1334* safe_cast<TranslationState>(pkt->senderState); 1335* 1336 Process p = sender_state->tc->getProcessPtr(); 1337* Addr vaddr = pkt->req->getVaddr(); 1338 #ifndef NDEBUG 1339 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1340 assert(alignedVaddr == virtPageAddr); 1341 #endif 1342 const EmulationPageTable::Entry pte = p->pTable->lookup(vaddr); 1343* if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1344 p->fixupStackFault(vaddr)) { 1345 pte = p->pTable->lookup(vaddr); 1346 } 1347 1348 if (pte) { 1349 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1350 pte->paddr); 1351 1352 sender_state->tlbEntry = 1353 new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, 1354 false); 1355 } else { 1356 sender_state->tlbEntry = nullptr; 1357 } 1358 1359 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1360 } else if (outcome == MISS_RETURN) { 1361 /** we add an extra cycle in the return path of the translation 1362 * requests in between the various TLB levels. 1363 / 1364* handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); 1365 } else {
1365 assert(false);	1366 panic("Unexpected TLB outcome %d", outcome);
1366 } 1367 } 1368 1369 void 1370 GpuTLB::TLBEvent::process() 1371 { 1372 tlb->translationReturn(virtPageAddr, outcome, pkt); 1373 } 1374 1375 const char* 1376 GpuTLB::TLBEvent::description() const 1377 { 1378 return "trigger translationDoneEvent"; 1379 } 1380 1381 void 1382 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1383 { 1384 outcome = _outcome; 1385 } 1386 1387 Addr 1388 GpuTLB::TLBEvent::getTLBEventVaddr() 1389 { 1390 return virtPageAddr; 1391 } 1392 1393 /* 1394 * recvTiming receives a coalesced timing request from a TLBCoalescer 1395 * and it calls issueTLBLookup() 1396 * It only rejects the packet if we have exceeded the max 1397 * outstanding number of requests for the TLB 1398 / 1399* bool 1400 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1401 { 1402 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1403 tlb->issueTLBLookup(pkt); 1404 // update number of outstanding translation requests 1405 tlb->outstandingReqs++; 1406 return true; 1407 } else { 1408 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1409 tlb->outstandingReqs); 1410 return false; 1411 } 1412 } 1413 1414 /** 1415 * handleFuncTranslationReturn is called on a TLB hit, 1416 * when a TLB miss returns or when a page fault returns. 1417 * It updates LRU, inserts the TLB entry on a miss 1418 * depending on the allocation policy and does the required 1419 * protection checks. It does NOT create a new packet to 1420 * update the packet's addr; this is done in hsail-gpu code. 1421 / 1422* void 1423 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1424 { 1425 TranslationState sender_state = 1426* safe_cast<TranslationState>(pkt->senderState); 1427* 1428 ThreadContext tc = sender_state->tc; 1429* Mode mode = sender_state->tlbMode; 1430 Addr vaddr = pkt->req->getVaddr(); 1431 1432 TlbEntry local_entry, new_entry; 1433 1434 if (tlb_outcome == TLB_HIT) { 1435 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1436 "%#x\n", vaddr); 1437 1438 local_entry = sender_state->tlbEntry; 1439 } else { 1440 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1441 "%#x\n", vaddr); 1442 1443 // We are returning either from a page walk or from a hit at a lower 1444 // TLB level. The senderState should be "carrying" a pointer to the 1445 // correct TLBEntry. 1446 new_entry = sender_state->tlbEntry; 1447 assert(new_entry); 1448 local_entry = new_entry; 1449 1450 if (allocationPolicy) { 1451 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1452 1453 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1454 virt_page_addr); 1455 1456 local_entry = insert(virt_page_addr, new_entry); 1457* } 1458 1459 assert(local_entry); 1460 } 1461 1462 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1463 "while paddr was %#x.\n", local_entry->vaddr, 1464 local_entry->paddr); 1465 1466 /** 1467 * Do paging checks if it's a normal functional access. If it's for a 1468 * prefetch, then sometimes you can try to prefetch something that 1469 * won't pass protection. We don't actually want to fault becuase there 1470 * is no demand access to deem this a violation. Just put it in the 1471 * TLB and it will fault if indeed a future demand access touches it in 1472 * violation. 1473 * 1474 * This feature could be used to explore security issues around 1475 * speculative memory accesses. 1476 / 1477* if (!sender_state->prefetch && sender_state->tlbEntry) 1478 pagingProtectionChecks(tc, pkt, local_entry, mode); 1479 1480 int page_size = local_entry->size(); 1481 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1482 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1483 1484 pkt->req->setPaddr(paddr); 1485 1486 if (local_entry->uncacheable) 1487 pkt->req->setFlags(Request::UNCACHEABLE); 1488 } 1489 1490 // This is used for atomic translations. Need to 1491 // make it all happen during the same cycle. 1492 void 1493 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1494 { 1495 TranslationState sender_state = 1496* safe_cast<TranslationState>(pkt->senderState); 1497* 1498 ThreadContext tc = sender_state->tc; 1499* bool update_stats = !sender_state->prefetch; 1500 1501 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1502 TheISA::PageBytes); 1503 1504 if (update_stats) 1505 tlb->updatePageFootprint(virt_page_addr); 1506 1507 // do the TLB lookup without updating the stats 1508 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1509 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1510 1511 // functional mode means no coalescing 1512 // global metrics are the same as the local metrics 1513 if (update_stats) { 1514 tlb->globalNumTLBAccesses++; 1515 1516 if (success) { 1517 sender_state->hitLevel = sender_state->reqCnt.size(); 1518 tlb->globalNumTLBHits++; 1519 } 1520 } 1521 1522 if (!success) { 1523 if (update_stats) 1524 tlb->globalNumTLBMisses++; 1525 if (tlb->hasMemSidePort) { 1526 // there is a TLB below -> propagate down the TLB hierarchy 1527 tlb->memSidePort[0]->sendFunctional(pkt); 1528 // If no valid translation from a prefetch, then just return 1529 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1530 return; 1531 } else { 1532 // Need to access the page table and update the TLB 1533 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1534 virt_page_addr); 1535 1536 Process p = tc->getProcessPtr(); 1537* 1538 Addr vaddr = pkt->req->getVaddr(); 1539 #ifndef NDEBUG 1540 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1541 assert(alignedVaddr == virt_page_addr); 1542 #endif 1543 1544 const EmulationPageTable::Entry pte = 1545* p->pTable->lookup(vaddr); 1546 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1547 p->fixupStackFault(vaddr)) { 1548 pte = p->pTable->lookup(vaddr); 1549 } 1550 1551 if (!sender_state->prefetch) { 1552 // no PageFaults are permitted after 1553 // the second page table lookup 1554 assert(pte); 1555 1556 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1557 pte->paddr); 1558 1559 sender_state->tlbEntry = 1560 new TlbEntry(p->pid(), virt_page_addr, 1561 pte->paddr, false, false); 1562 } else { 1563 // If this was a prefetch, then do the normal thing if it 1564 // was a successful translation. Otherwise, send an empty 1565 // TLB entry back so that it can be figured out as empty and 1566 // handled accordingly. 1567 if (pte) { 1568 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1569 pte->paddr); 1570 1571 sender_state->tlbEntry = 1572 new TlbEntry(p->pid(), virt_page_addr, 1573 pte->paddr, false, false); 1574 } else { 1575 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1576 alignedVaddr); 1577 1578 sender_state->tlbEntry = nullptr; 1579 1580 return; 1581 } 1582 } 1583 } 1584 } else { 1585 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1586 tlb->lookup(pkt->req->getVaddr())); 1587 1588 TlbEntry entry = tlb->lookup(pkt->req->getVaddr(), 1589* update_stats); 1590 1591 assert(entry); 1592 1593 auto p = sender_state->tc->getProcessPtr(); 1594 sender_state->tlbEntry = 1595 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1596 false, false); 1597 } 1598 // This is the function that would populate pkt->req with the paddr of 1599 // the translation. But if no translation happens (i.e Prefetch fails) 1600 // then the early returns in the above code wiill keep this function 1601 // from executing. 1602 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1603 } 1604 1605 void 1606 GpuTLB::CpuSidePort::recvReqRetry() 1607 { 1608 // The CPUSidePort never sends anything but replies. No retries 1609 // expected.	1367 } 1368 } 1369 1370 void 1371 GpuTLB::TLBEvent::process() 1372 { 1373 tlb->translationReturn(virtPageAddr, outcome, pkt); 1374 } 1375 1376 const char* 1377 GpuTLB::TLBEvent::description() const 1378 { 1379 return "trigger translationDoneEvent"; 1380 } 1381 1382 void 1383 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) 1384 { 1385 outcome = _outcome; 1386 } 1387 1388 Addr 1389 GpuTLB::TLBEvent::getTLBEventVaddr() 1390 { 1391 return virtPageAddr; 1392 } 1393 1394 /* 1395 * recvTiming receives a coalesced timing request from a TLBCoalescer 1396 * and it calls issueTLBLookup() 1397 * It only rejects the packet if we have exceeded the max 1398 * outstanding number of requests for the TLB 1399 / 1400* bool 1401 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) 1402 { 1403 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { 1404 tlb->issueTLBLookup(pkt); 1405 // update number of outstanding translation requests 1406 tlb->outstandingReqs++; 1407 return true; 1408 } else { 1409 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", 1410 tlb->outstandingReqs); 1411 return false; 1412 } 1413 } 1414 1415 /** 1416 * handleFuncTranslationReturn is called on a TLB hit, 1417 * when a TLB miss returns or when a page fault returns. 1418 * It updates LRU, inserts the TLB entry on a miss 1419 * depending on the allocation policy and does the required 1420 * protection checks. It does NOT create a new packet to 1421 * update the packet's addr; this is done in hsail-gpu code. 1422 / 1423* void 1424 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) 1425 { 1426 TranslationState sender_state = 1427* safe_cast<TranslationState>(pkt->senderState); 1428* 1429 ThreadContext tc = sender_state->tc; 1430* Mode mode = sender_state->tlbMode; 1431 Addr vaddr = pkt->req->getVaddr(); 1432 1433 TlbEntry local_entry, new_entry; 1434 1435 if (tlb_outcome == TLB_HIT) { 1436 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " 1437 "%#x\n", vaddr); 1438 1439 local_entry = sender_state->tlbEntry; 1440 } else { 1441 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " 1442 "%#x\n", vaddr); 1443 1444 // We are returning either from a page walk or from a hit at a lower 1445 // TLB level. The senderState should be "carrying" a pointer to the 1446 // correct TLBEntry. 1447 new_entry = sender_state->tlbEntry; 1448 assert(new_entry); 1449 local_entry = new_entry; 1450 1451 if (allocationPolicy) { 1452 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); 1453 1454 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", 1455 virt_page_addr); 1456 1457 local_entry = insert(virt_page_addr, new_entry); 1458* } 1459 1460 assert(local_entry); 1461 } 1462 1463 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " 1464 "while paddr was %#x.\n", local_entry->vaddr, 1465 local_entry->paddr); 1466 1467 /** 1468 * Do paging checks if it's a normal functional access. If it's for a 1469 * prefetch, then sometimes you can try to prefetch something that 1470 * won't pass protection. We don't actually want to fault becuase there 1471 * is no demand access to deem this a violation. Just put it in the 1472 * TLB and it will fault if indeed a future demand access touches it in 1473 * violation. 1474 * 1475 * This feature could be used to explore security issues around 1476 * speculative memory accesses. 1477 / 1478* if (!sender_state->prefetch && sender_state->tlbEntry) 1479 pagingProtectionChecks(tc, pkt, local_entry, mode); 1480 1481 int page_size = local_entry->size(); 1482 Addr paddr = local_entry->paddr \| (vaddr & (page_size - 1)); 1483 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); 1484 1485 pkt->req->setPaddr(paddr); 1486 1487 if (local_entry->uncacheable) 1488 pkt->req->setFlags(Request::UNCACHEABLE); 1489 } 1490 1491 // This is used for atomic translations. Need to 1492 // make it all happen during the same cycle. 1493 void 1494 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) 1495 { 1496 TranslationState sender_state = 1497* safe_cast<TranslationState>(pkt->senderState); 1498* 1499 ThreadContext tc = sender_state->tc; 1500* bool update_stats = !sender_state->prefetch; 1501 1502 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1503 TheISA::PageBytes); 1504 1505 if (update_stats) 1506 tlb->updatePageFootprint(virt_page_addr); 1507 1508 // do the TLB lookup without updating the stats 1509 bool success = tlb->tlbLookup(pkt->req, tc, update_stats); 1510 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; 1511 1512 // functional mode means no coalescing 1513 // global metrics are the same as the local metrics 1514 if (update_stats) { 1515 tlb->globalNumTLBAccesses++; 1516 1517 if (success) { 1518 sender_state->hitLevel = sender_state->reqCnt.size(); 1519 tlb->globalNumTLBHits++; 1520 } 1521 } 1522 1523 if (!success) { 1524 if (update_stats) 1525 tlb->globalNumTLBMisses++; 1526 if (tlb->hasMemSidePort) { 1527 // there is a TLB below -> propagate down the TLB hierarchy 1528 tlb->memSidePort[0]->sendFunctional(pkt); 1529 // If no valid translation from a prefetch, then just return 1530 if (sender_state->prefetch && !pkt->req->hasPaddr()) 1531 return; 1532 } else { 1533 // Need to access the page table and update the TLB 1534 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", 1535 virt_page_addr); 1536 1537 Process p = tc->getProcessPtr(); 1538* 1539 Addr vaddr = pkt->req->getVaddr(); 1540 #ifndef NDEBUG 1541 Addr alignedVaddr = p->pTable->pageAlign(vaddr); 1542 assert(alignedVaddr == virt_page_addr); 1543 #endif 1544 1545 const EmulationPageTable::Entry pte = 1546* p->pTable->lookup(vaddr); 1547 if (!pte && sender_state->tlbMode != BaseTLB::Execute && 1548 p->fixupStackFault(vaddr)) { 1549 pte = p->pTable->lookup(vaddr); 1550 } 1551 1552 if (!sender_state->prefetch) { 1553 // no PageFaults are permitted after 1554 // the second page table lookup 1555 assert(pte); 1556 1557 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1558 pte->paddr); 1559 1560 sender_state->tlbEntry = 1561 new TlbEntry(p->pid(), virt_page_addr, 1562 pte->paddr, false, false); 1563 } else { 1564 // If this was a prefetch, then do the normal thing if it 1565 // was a successful translation. Otherwise, send an empty 1566 // TLB entry back so that it can be figured out as empty and 1567 // handled accordingly. 1568 if (pte) { 1569 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, 1570 pte->paddr); 1571 1572 sender_state->tlbEntry = 1573 new TlbEntry(p->pid(), virt_page_addr, 1574 pte->paddr, false, false); 1575 } else { 1576 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", 1577 alignedVaddr); 1578 1579 sender_state->tlbEntry = nullptr; 1580 1581 return; 1582 } 1583 } 1584 } 1585 } else { 1586 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", 1587 tlb->lookup(pkt->req->getVaddr())); 1588 1589 TlbEntry entry = tlb->lookup(pkt->req->getVaddr(), 1590* update_stats); 1591 1592 assert(entry); 1593 1594 auto p = sender_state->tc->getProcessPtr(); 1595 sender_state->tlbEntry = 1596 new TlbEntry(p->pid(), entry->vaddr, entry->paddr, 1597 false, false); 1598 } 1599 // This is the function that would populate pkt->req with the paddr of 1600 // the translation. But if no translation happens (i.e Prefetch fails) 1601 // then the early returns in the above code wiill keep this function 1602 // from executing. 1603 tlb->handleFuncTranslationReturn(pkt, tlb_outcome); 1604 } 1605 1606 void 1607 GpuTLB::CpuSidePort::recvReqRetry() 1608 { 1609 // The CPUSidePort never sends anything but replies. No retries 1610 // expected.
1610 assert(false);	1611 panic("recvReqRetry called");
1611 } 1612 1613 AddrRangeList 1614 GpuTLB::CpuSidePort::getAddrRanges() const 1615 { 1616 // currently not checked by the master 1617 AddrRangeList ranges; 1618 1619 return ranges; 1620 } 1621 1622 /** 1623 * MemSidePort receives the packet back. 1624 * We need to call the handleTranslationReturn 1625 * and propagate up the hierarchy. 1626 / 1627* bool 1628 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1629 { 1630 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1631 TheISA::PageBytes); 1632 1633 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1634 virt_page_addr); 1635 1636 TLBEvent tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1637* assert(tlb_event); 1638 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1639 1640 tlb_event->updateOutcome(MISS_RETURN); 1641 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1642 1643 return true; 1644 } 1645 1646 void 1647 GpuTLB::MemSidePort::recvReqRetry() 1648 { 1649 // No retries should reach the TLB. The retries 1650 // should only reach the TLBCoalescer.	1612 } 1613 1614 AddrRangeList 1615 GpuTLB::CpuSidePort::getAddrRanges() const 1616 { 1617 // currently not checked by the master 1618 AddrRangeList ranges; 1619 1620 return ranges; 1621 } 1622 1623 /** 1624 * MemSidePort receives the packet back. 1625 * We need to call the handleTranslationReturn 1626 * and propagate up the hierarchy. 1627 / 1628* bool 1629 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) 1630 { 1631 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), 1632 TheISA::PageBytes); 1633 1634 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", 1635 virt_page_addr); 1636 1637 TLBEvent tlb_event = tlb->translationReturnEvent[virt_page_addr]; 1638* assert(tlb_event); 1639 assert(virt_page_addr == tlb_event->getTLBEventVaddr()); 1640 1641 tlb_event->updateOutcome(MISS_RETURN); 1642 tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); 1643 1644 return true; 1645 } 1646 1647 void 1648 GpuTLB::MemSidePort::recvReqRetry() 1649 { 1650 // No retries should reach the TLB. The retries 1651 // should only reach the TLBCoalescer.
1651 assert(false);	1652 panic("recvReqRetry called");
1652 } 1653 1654 void 1655 GpuTLB::cleanup() 1656 { 1657 while (!cleanupQueue.empty()) { 1658 Addr cleanup_addr = cleanupQueue.front(); 1659 cleanupQueue.pop(); 1660 1661 // delete TLBEvent 1662 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1663 delete old_tlb_event; 1664 translationReturnEvent.erase(cleanup_addr); 1665 1666 // update number of outstanding requests 1667 outstandingReqs--; 1668 } 1669 1670 /** the higher level coalescer should retry if it has 1671 * any pending requests. 1672 / 1673* for (int i = 0; i < cpuSidePort.size(); ++i) { 1674 cpuSidePort[i]->sendRetryReq(); 1675 } 1676 } 1677 1678 void 1679 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1680 { 1681 1682 std::pair<AccessPatternTable::iterator, bool> ret; 1683 1684 AccessInfo tmp_access_info; 1685 tmp_access_info.lastTimeAccessed = 0; 1686 tmp_access_info.accessesPerPage = 0; 1687 tmp_access_info.totalReuseDistance = 0; 1688 tmp_access_info.sumDistance = 0; 1689 tmp_access_info.meanDistance = 0; 1690 1691 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1692 tmp_access_info)); 1693 1694 bool first_page_access = ret.second; 1695 1696 if (first_page_access) { 1697 numUniquePages++; 1698 } else { 1699 int accessed_before; 1700 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1701 ret.first->second.totalReuseDistance += accessed_before; 1702 } 1703 1704 ret.first->second.accessesPerPage++; 1705 ret.first->second.lastTimeAccessed = curTick(); 1706 1707 if (accessDistance) { 1708 ret.first->second.localTLBAccesses 1709 .push_back(localNumTLBAccesses.value()); 1710 } 1711 } 1712 1713 void 1714 GpuTLB::exitCallback() 1715 { 1716 std::ostream page_stat_file = nullptr; 1717* 1718 if (accessDistance) { 1719 1720 // print per page statistics to a separate file (.csv format) 1721 // simout is the gem5 output directory (default is m5out or the one 1722 // specified with -d 1723 page_stat_file = simout.create(name().c_str())->stream(); 1724 1725 // print header 1726 page_stat_file << "page,max_access_distance,mean_access_distance, " 1727* << "stddev_distance" << std::endl; 1728 } 1729 1730 // update avg. reuse distance footprint 1731 AccessPatternTable::iterator iter, iter_begin, iter_end; 1732 unsigned int sum_avg_reuse_distance_per_page = 0; 1733 1734 // iterate through all pages seen by this TLB 1735 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1736 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1737 iter->second.accessesPerPage; 1738 1739 if (accessDistance) { 1740 unsigned int tmp = iter->second.localTLBAccesses[0]; 1741 unsigned int prev = tmp; 1742 1743 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1744 if (i) { 1745 tmp = prev + 1; 1746 } 1747 1748 prev = iter->second.localTLBAccesses[i]; 1749 // update the localTLBAccesses value 1750 // with the actual differece 1751 iter->second.localTLBAccesses[i] -= tmp; 1752 // compute the sum of AccessDistance per page 1753 // used later for mean 1754 iter->second.sumDistance += 1755 iter->second.localTLBAccesses[i]; 1756 } 1757 1758 iter->second.meanDistance = 1759 iter->second.sumDistance / iter->second.accessesPerPage; 1760 1761 // compute std_dev and max (we need a second round because we 1762 // need to know the mean value 1763 unsigned int max_distance = 0; 1764 unsigned int stddev_distance = 0; 1765 1766 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1767 unsigned int tmp_access_distance = 1768 iter->second.localTLBAccesses[i]; 1769 1770 if (tmp_access_distance > max_distance) { 1771 max_distance = tmp_access_distance; 1772 } 1773 1774 unsigned int diff = 1775 tmp_access_distance - iter->second.meanDistance; 1776 stddev_distance += pow(diff, 2); 1777 1778 } 1779 1780 stddev_distance = 1781 sqrt(stddev_distance/iter->second.accessesPerPage); 1782 1783 if (page_stat_file) { 1784 page_stat_file << std::hex << iter->first << ","; 1785* page_stat_file << std::dec << max_distance << ","; 1786* page_stat_file << std::dec << iter->second.meanDistance 1787* << ","; 1788 page_stat_file << std::dec << stddev_distance; 1789* page_stat_file << std::endl; 1790* } 1791 1792 // erase the localTLBAccesses array 1793 iter->second.localTLBAccesses.clear(); 1794 } 1795 } 1796 1797 if (!TLBFootprint.empty()) { 1798 avgReuseDistance = 1799 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1800 } 1801 1802 //clear the TLBFootprint map 1803 TLBFootprint.clear(); 1804 } 1805} // namespace X86ISA 1806 1807X86ISA::GpuTLB* 1808X86GPUTLBParams::create() 1809{ 1810 return new X86ISA::GpuTLB(this); 1811} 1812	1653 } 1654 1655 void 1656 GpuTLB::cleanup() 1657 { 1658 while (!cleanupQueue.empty()) { 1659 Addr cleanup_addr = cleanupQueue.front(); 1660 cleanupQueue.pop(); 1661 1662 // delete TLBEvent 1663 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; 1664 delete old_tlb_event; 1665 translationReturnEvent.erase(cleanup_addr); 1666 1667 // update number of outstanding requests 1668 outstandingReqs--; 1669 } 1670 1671 /** the higher level coalescer should retry if it has 1672 * any pending requests. 1673 / 1674* for (int i = 0; i < cpuSidePort.size(); ++i) { 1675 cpuSidePort[i]->sendRetryReq(); 1676 } 1677 } 1678 1679 void 1680 GpuTLB::updatePageFootprint(Addr virt_page_addr) 1681 { 1682 1683 std::pair<AccessPatternTable::iterator, bool> ret; 1684 1685 AccessInfo tmp_access_info; 1686 tmp_access_info.lastTimeAccessed = 0; 1687 tmp_access_info.accessesPerPage = 0; 1688 tmp_access_info.totalReuseDistance = 0; 1689 tmp_access_info.sumDistance = 0; 1690 tmp_access_info.meanDistance = 0; 1691 1692 ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, 1693 tmp_access_info)); 1694 1695 bool first_page_access = ret.second; 1696 1697 if (first_page_access) { 1698 numUniquePages++; 1699 } else { 1700 int accessed_before; 1701 accessed_before = curTick() - ret.first->second.lastTimeAccessed; 1702 ret.first->second.totalReuseDistance += accessed_before; 1703 } 1704 1705 ret.first->second.accessesPerPage++; 1706 ret.first->second.lastTimeAccessed = curTick(); 1707 1708 if (accessDistance) { 1709 ret.first->second.localTLBAccesses 1710 .push_back(localNumTLBAccesses.value()); 1711 } 1712 } 1713 1714 void 1715 GpuTLB::exitCallback() 1716 { 1717 std::ostream page_stat_file = nullptr; 1718* 1719 if (accessDistance) { 1720 1721 // print per page statistics to a separate file (.csv format) 1722 // simout is the gem5 output directory (default is m5out or the one 1723 // specified with -d 1724 page_stat_file = simout.create(name().c_str())->stream(); 1725 1726 // print header 1727 page_stat_file << "page,max_access_distance,mean_access_distance, " 1728* << "stddev_distance" << std::endl; 1729 } 1730 1731 // update avg. reuse distance footprint 1732 AccessPatternTable::iterator iter, iter_begin, iter_end; 1733 unsigned int sum_avg_reuse_distance_per_page = 0; 1734 1735 // iterate through all pages seen by this TLB 1736 for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { 1737 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / 1738 iter->second.accessesPerPage; 1739 1740 if (accessDistance) { 1741 unsigned int tmp = iter->second.localTLBAccesses[0]; 1742 unsigned int prev = tmp; 1743 1744 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1745 if (i) { 1746 tmp = prev + 1; 1747 } 1748 1749 prev = iter->second.localTLBAccesses[i]; 1750 // update the localTLBAccesses value 1751 // with the actual differece 1752 iter->second.localTLBAccesses[i] -= tmp; 1753 // compute the sum of AccessDistance per page 1754 // used later for mean 1755 iter->second.sumDistance += 1756 iter->second.localTLBAccesses[i]; 1757 } 1758 1759 iter->second.meanDistance = 1760 iter->second.sumDistance / iter->second.accessesPerPage; 1761 1762 // compute std_dev and max (we need a second round because we 1763 // need to know the mean value 1764 unsigned int max_distance = 0; 1765 unsigned int stddev_distance = 0; 1766 1767 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { 1768 unsigned int tmp_access_distance = 1769 iter->second.localTLBAccesses[i]; 1770 1771 if (tmp_access_distance > max_distance) { 1772 max_distance = tmp_access_distance; 1773 } 1774 1775 unsigned int diff = 1776 tmp_access_distance - iter->second.meanDistance; 1777 stddev_distance += pow(diff, 2); 1778 1779 } 1780 1781 stddev_distance = 1782 sqrt(stddev_distance/iter->second.accessesPerPage); 1783 1784 if (page_stat_file) { 1785 page_stat_file << std::hex << iter->first << ","; 1786* page_stat_file << std::dec << max_distance << ","; 1787* page_stat_file << std::dec << iter->second.meanDistance 1788* << ","; 1789 page_stat_file << std::dec << stddev_distance; 1790* page_stat_file << std::endl; 1791* } 1792 1793 // erase the localTLBAccesses array 1794 iter->second.localTLBAccesses.clear(); 1795 } 1796 } 1797 1798 if (!TLBFootprint.empty()) { 1799 avgReuseDistance = 1800 sum_avg_reuse_distance_per_page / TLBFootprint.size(); 1801 } 1802 1803 //clear the TLBFootprint map 1804 TLBFootprint.clear(); 1805 } 1806} // namespace X86ISA 1807 1808X86ISA::GpuTLB* 1809X86GPUTLBParams::create() 1810{ 1811 return new X86ISA::GpuTLB(this); 1812} 1813