pagetable_walker.cc revision 8232
1/* 2 * Copyright (c) 2007 The Hewlett-Packard Development Company 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#include "arch/x86/pagetable.hh" 41#include "arch/x86/pagetable_walker.hh" 42#include "arch/x86/tlb.hh" 43#include "arch/x86/vtophys.hh" 44#include "base/bitfield.hh" 45#include "cpu/base.hh" 46#include "cpu/thread_context.hh" 47#include "debug/PageTableWalker.hh" 48#include "mem/packet_access.hh" 49#include "mem/request.hh" 50#include "sim/system.hh" 51 52namespace X86ISA { 53 54// Unfortunately, the placement of the base field in a page table entry is 55// very erratic and would make a mess here. It might be moved here at some 56// point in the future. 57BitUnion64(PageTableEntry) 58 Bitfield<63> nx; 59 Bitfield<11, 9> avl; 60 Bitfield<8> g; 61 Bitfield<7> ps; 62 Bitfield<6> d; 63 Bitfield<5> a; 64 Bitfield<4> pcd; 65 Bitfield<3> pwt; 66 Bitfield<2> u; 67 Bitfield<1> w; 68 Bitfield<0> p; 69EndBitUnion(PageTableEntry) 70 71Fault 72Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation, 73 RequestPtr _req, BaseTLB::Mode _mode) 74{ 75 // TODO: in timing mode, instead of blocking when there are other 76 // outstanding requests, see if this request can be coalesced with 77 // another one (i.e. either coalesce or start walk) 78 WalkerState * newState = new WalkerState(this, _translation, _req); 79 newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing); 80 if (currStates.size()) { 81 assert(newState->isTiming()); 82 DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size()); 83 currStates.push_back(newState); 84 return NoFault; 85 } else { 86 currStates.push_back(newState); 87 Fault fault = newState->startWalk(); 88 if (!newState->isTiming()) { 89 currStates.pop_front(); 90 delete newState; 91 } 92 return fault; 93 } 94} 95 96Fault 97Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize, 98 BaseTLB::Mode _mode) 99{ 100 funcState.initState(_tc, _mode); 101 return funcState.startFunctional(addr, pageSize); 102} 103 104bool 105Walker::WalkerPort::recvTiming(PacketPtr pkt) 106{ 107 return walker->recvTiming(pkt); 108} 109 110bool 111Walker::recvTiming(PacketPtr pkt) 112{ 113 if (pkt->isResponse() || pkt->wasNacked()) { 114 WalkerSenderState * senderState = 115 dynamic_cast<WalkerSenderState *>(pkt->senderState); 116 pkt->senderState = senderState->saved; 117 WalkerState * senderWalk = senderState->senderWalk; 118 bool walkComplete = senderWalk->recvPacket(pkt); 119 delete senderState; 120 if (walkComplete) { 121 std::list<WalkerState *>::iterator iter; 122 for (iter = currStates.begin(); iter != currStates.end(); iter++) { 123 WalkerState * walkerState = *(iter); 124 if (walkerState == senderWalk) { 125 iter = currStates.erase(iter); 126 break; 127 } 128 } 129 delete senderWalk; 130 // Since we block requests when another is outstanding, we 131 // need to check if there is a waiting request to be serviced 132 if (currStates.size()) { 133 WalkerState * newState = currStates.front(); 134 if (!newState->wasStarted()) 135 newState->startWalk(); 136 } 137 } 138 } else { 139 DPRINTF(PageTableWalker, "Received strange packet\n"); 140 } 141 return true; 142} 143 144Tick 145Walker::WalkerPort::recvAtomic(PacketPtr pkt) 146{ 147 return 0; 148} 149 150void 151Walker::WalkerPort::recvFunctional(PacketPtr pkt) 152{ 153 return; 154} 155 156void 157Walker::WalkerPort::recvStatusChange(Status status) 158{ 159 if (status == RangeChange) { 160 if (!snoopRangeSent) { 161 snoopRangeSent = true; 162 sendStatusChange(Port::RangeChange); 163 } 164 return; 165 } 166 167 panic("Unexpected recvStatusChange.\n"); 168} 169 170void 171Walker::WalkerPort::recvRetry() 172{ 173 walker->recvRetry(); 174} 175 176void 177Walker::recvRetry() 178{ 179 std::list<WalkerState *>::iterator iter; 180 for (iter = currStates.begin(); iter != currStates.end(); iter++) { 181 WalkerState * walkerState = *(iter); 182 if (walkerState->isRetrying()) { 183 walkerState->retry(); 184 } 185 } 186} 187 188bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt) 189{ 190 pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState); 191 return port.sendTiming(pkt); 192} 193 194Port * 195Walker::getPort(const std::string &if_name, int idx) 196{ 197 if (if_name == "port") 198 return &port; 199 else 200 panic("No page table walker port named %s!\n", if_name); 201} 202 203void 204Walker::WalkerState::initState(ThreadContext * _tc, 205 BaseTLB::Mode _mode, bool _isTiming) 206{ 207 assert(state == Ready); 208 started = false; 209 tc = _tc; 210 mode = _mode; 211 timing = _isTiming; 212} 213 214Fault 215Walker::WalkerState::startWalk() 216{ 217 Fault fault = NoFault; 218 assert(started == false); 219 started = true; 220 setupWalk(req->getVaddr()); 221 if (timing) { 222 nextState = state; 223 state = Waiting; 224 timingFault = NoFault; 225 sendPackets(); 226 } else { 227 do { 228 walker->port.sendAtomic(read); 229 PacketPtr write = NULL; 230 fault = stepWalk(write); 231 assert(fault == NoFault || read == NULL); 232 state = nextState; 233 nextState = Ready; 234 if (write) 235 walker->port.sendAtomic(write); 236 } while(read); 237 state = Ready; 238 nextState = Waiting; 239 } 240 return fault; 241} 242 243Fault 244Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize) 245{ 246 Fault fault = NoFault; 247 assert(started == false); 248 started = true; 249 setupWalk(addr); 250 251 do { 252 walker->port.sendFunctional(read); 253 // On a functional access (page table lookup), writes should 254 // not happen so this pointer is ignored after stepWalk 255 PacketPtr write = NULL; 256 fault = stepWalk(write); 257 assert(fault == NoFault || read == NULL); 258 state = nextState; 259 nextState = Ready; 260 } while(read); 261 pageSize = entry.size; 262 addr = entry.paddr; 263 264 return fault; 265} 266 267Fault 268Walker::WalkerState::stepWalk(PacketPtr &write) 269{ 270 assert(state != Ready && state != Waiting); 271 Fault fault = NoFault; 272 write = NULL; 273 PageTableEntry pte; 274 if (dataSize == 8) 275 pte = read->get<uint64_t>(); 276 else 277 pte = read->get<uint32_t>(); 278 VAddr vaddr = entry.vaddr; 279 bool uncacheable = pte.pcd; 280 Addr nextRead = 0; 281 bool doWrite = false; 282 bool doTLBInsert = false; 283 bool doEndWalk = false; 284 bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX; 285 switch(state) { 286 case LongPML4: 287 DPRINTF(PageTableWalker, 288 "Got long mode PML4 entry %#016x.\n", (uint64_t)pte); 289 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize; 290 doWrite = !pte.a; 291 pte.a = 1; 292 entry.writable = pte.w; 293 entry.user = pte.u; 294 if (badNX || !pte.p) { 295 doEndWalk = true; 296 fault = pageFault(pte.p); 297 break; 298 } 299 entry.noExec = pte.nx; 300 nextState = LongPDP; 301 break; 302 case LongPDP: 303 DPRINTF(PageTableWalker, 304 "Got long mode PDP entry %#016x.\n", (uint64_t)pte); 305 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize; 306 doWrite = !pte.a; 307 pte.a = 1; 308 entry.writable = entry.writable && pte.w; 309 entry.user = entry.user && pte.u; 310 if (badNX || !pte.p) { 311 doEndWalk = true; 312 fault = pageFault(pte.p); 313 break; 314 } 315 nextState = LongPD; 316 break; 317 case LongPD: 318 DPRINTF(PageTableWalker, 319 "Got long mode PD entry %#016x.\n", (uint64_t)pte); 320 doWrite = !pte.a; 321 pte.a = 1; 322 entry.writable = entry.writable && pte.w; 323 entry.user = entry.user && pte.u; 324 if (badNX || !pte.p) { 325 doEndWalk = true; 326 fault = pageFault(pte.p); 327 break; 328 } 329 if (!pte.ps) { 330 // 4 KB page 331 entry.size = 4 * (1 << 10); 332 nextRead = 333 ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize; 334 nextState = LongPTE; 335 break; 336 } else { 337 // 2 MB page 338 entry.size = 2 * (1 << 20); 339 entry.paddr = (uint64_t)pte & (mask(31) << 21); 340 entry.uncacheable = uncacheable; 341 entry.global = pte.g; 342 entry.patBit = bits(pte, 12); 343 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); 344 doTLBInsert = true; 345 doEndWalk = true; 346 break; 347 } 348 case LongPTE: 349 DPRINTF(PageTableWalker, 350 "Got long mode PTE entry %#016x.\n", (uint64_t)pte); 351 doWrite = !pte.a; 352 pte.a = 1; 353 entry.writable = entry.writable && pte.w; 354 entry.user = entry.user && pte.u; 355 if (badNX || !pte.p) { 356 doEndWalk = true; 357 fault = pageFault(pte.p); 358 break; 359 } 360 entry.paddr = (uint64_t)pte & (mask(40) << 12); 361 entry.uncacheable = uncacheable; 362 entry.global = pte.g; 363 entry.patBit = bits(pte, 12); 364 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 365 doTLBInsert = true; 366 doEndWalk = true; 367 break; 368 case PAEPDP: 369 DPRINTF(PageTableWalker, 370 "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte); 371 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize; 372 if (!pte.p) { 373 doEndWalk = true; 374 fault = pageFault(pte.p); 375 break; 376 } 377 nextState = PAEPD; 378 break; 379 case PAEPD: 380 DPRINTF(PageTableWalker, 381 "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte); 382 doWrite = !pte.a; 383 pte.a = 1; 384 entry.writable = pte.w; 385 entry.user = pte.u; 386 if (badNX || !pte.p) { 387 doEndWalk = true; 388 fault = pageFault(pte.p); 389 break; 390 } 391 if (!pte.ps) { 392 // 4 KB page 393 entry.size = 4 * (1 << 10); 394 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize; 395 nextState = PAEPTE; 396 break; 397 } else { 398 // 2 MB page 399 entry.size = 2 * (1 << 20); 400 entry.paddr = (uint64_t)pte & (mask(31) << 21); 401 entry.uncacheable = uncacheable; 402 entry.global = pte.g; 403 entry.patBit = bits(pte, 12); 404 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); 405 doTLBInsert = true; 406 doEndWalk = true; 407 break; 408 } 409 case PAEPTE: 410 DPRINTF(PageTableWalker, 411 "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte); 412 doWrite = !pte.a; 413 pte.a = 1; 414 entry.writable = entry.writable && pte.w; 415 entry.user = entry.user && pte.u; 416 if (badNX || !pte.p) { 417 doEndWalk = true; 418 fault = pageFault(pte.p); 419 break; 420 } 421 entry.paddr = (uint64_t)pte & (mask(40) << 12); 422 entry.uncacheable = uncacheable; 423 entry.global = pte.g; 424 entry.patBit = bits(pte, 7); 425 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 426 doTLBInsert = true; 427 doEndWalk = true; 428 break; 429 case PSEPD: 430 DPRINTF(PageTableWalker, 431 "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte); 432 doWrite = !pte.a; 433 pte.a = 1; 434 entry.writable = pte.w; 435 entry.user = pte.u; 436 if (!pte.p) { 437 doEndWalk = true; 438 fault = pageFault(pte.p); 439 break; 440 } 441 if (!pte.ps) { 442 // 4 KB page 443 entry.size = 4 * (1 << 10); 444 nextRead = 445 ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; 446 nextState = PTE; 447 break; 448 } else { 449 // 4 MB page 450 entry.size = 4 * (1 << 20); 451 entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; 452 entry.uncacheable = uncacheable; 453 entry.global = pte.g; 454 entry.patBit = bits(pte, 12); 455 entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); 456 doTLBInsert = true; 457 doEndWalk = true; 458 break; 459 } 460 case PD: 461 DPRINTF(PageTableWalker, 462 "Got legacy mode PD entry %#08x.\n", (uint32_t)pte); 463 doWrite = !pte.a; 464 pte.a = 1; 465 entry.writable = pte.w; 466 entry.user = pte.u; 467 if (!pte.p) { 468 doEndWalk = true; 469 fault = pageFault(pte.p); 470 break; 471 } 472 // 4 KB page 473 entry.size = 4 * (1 << 10); 474 nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; 475 nextState = PTE; 476 break; 477 case PTE: 478 DPRINTF(PageTableWalker, 479 "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte); 480 doWrite = !pte.a; 481 pte.a = 1; 482 entry.writable = pte.w; 483 entry.user = pte.u; 484 if (!pte.p) { 485 doEndWalk = true; 486 fault = pageFault(pte.p); 487 break; 488 } 489 entry.paddr = (uint64_t)pte & (mask(20) << 12); 490 entry.uncacheable = uncacheable; 491 entry.global = pte.g; 492 entry.patBit = bits(pte, 7); 493 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 494 doTLBInsert = true; 495 doEndWalk = true; 496 break; 497 default: 498 panic("Unknown page table walker state %d!\n"); 499 } 500 if (doEndWalk) { 501 if (doTLBInsert) 502 if (!functional) 503 walker->tlb->insert(entry.vaddr, entry); 504 endWalk(); 505 } else { 506 PacketPtr oldRead = read; 507 //If we didn't return, we're setting up another read. 508 Request::Flags flags = oldRead->req->getFlags(); 509 flags.set(Request::UNCACHEABLE, uncacheable); 510 RequestPtr request = 511 new Request(nextRead, oldRead->getSize(), flags); 512 read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast); 513 read->allocate(); 514 // If we need to write, adjust the read packet to write the modified 515 // value back to memory. 516 if (doWrite) { 517 write = oldRead; 518 write->set<uint64_t>(pte); 519 write->cmd = MemCmd::WriteReq; 520 write->setDest(Packet::Broadcast); 521 } else { 522 write = NULL; 523 delete oldRead->req; 524 delete oldRead; 525 } 526 } 527 return fault; 528} 529 530void 531Walker::WalkerState::endWalk() 532{ 533 nextState = Ready; 534 delete read->req; 535 delete read; 536 read = NULL; 537} 538 539void 540Walker::WalkerState::setupWalk(Addr vaddr) 541{ 542 VAddr addr = vaddr; 543 CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); 544 // Check if we're in long mode or not 545 Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); 546 dataSize = 8; 547 Addr topAddr; 548 if (efer.lma) { 549 // Do long mode. 550 state = LongPML4; 551 topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize; 552 enableNX = efer.nxe; 553 } else { 554 // We're in some flavor of legacy mode. 555 CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); 556 if (cr4.pae) { 557 // Do legacy PAE. 558 state = PAEPDP; 559 topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize; 560 enableNX = efer.nxe; 561 } else { 562 dataSize = 4; 563 topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize; 564 if (cr4.pse) { 565 // Do legacy PSE. 566 state = PSEPD; 567 } else { 568 // Do legacy non PSE. 569 state = PD; 570 } 571 enableNX = false; 572 } 573 } 574 575 nextState = Ready; 576 entry.vaddr = vaddr; 577 578 Request::Flags flags = Request::PHYSICAL; 579 if (cr3.pcd) 580 flags.set(Request::UNCACHEABLE); 581 RequestPtr request = new Request(topAddr, dataSize, flags); 582 read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast); 583 read->allocate(); 584} 585 586bool 587Walker::WalkerState::recvPacket(PacketPtr pkt) 588{ 589 if (pkt->isResponse() && !pkt->wasNacked()) { 590 assert(inflight); 591 assert(state == Waiting); 592 assert(!read); 593 inflight--; 594 if (pkt->isRead()) { 595 state = nextState; 596 nextState = Ready; 597 PacketPtr write = NULL; 598 read = pkt; 599 timingFault = stepWalk(write); 600 state = Waiting; 601 assert(timingFault == NoFault || read == NULL); 602 if (write) { 603 writes.push_back(write); 604 } 605 sendPackets(); 606 } else { 607 sendPackets(); 608 } 609 if (inflight == 0 && read == NULL && writes.size() == 0) { 610 state = Ready; 611 nextState = Waiting; 612 if (timingFault == NoFault) { 613 /* 614 * Finish the translation. Now that we now the right entry is 615 * in the TLB, this should work with no memory accesses. 616 * There could be new faults unrelated to the table walk like 617 * permissions violations, so we'll need the return value as 618 * well. 619 */ 620 bool delayedResponse; 621 Fault fault = walker->tlb->translate(req, tc, NULL, mode, 622 delayedResponse, true); 623 assert(!delayedResponse); 624 // Let the CPU continue. 625 translation->finish(fault, req, tc, mode); 626 } else { 627 // There was a fault during the walk. Let the CPU know. 628 translation->finish(timingFault, req, tc, mode); 629 } 630 return true; 631 } 632 } else if (pkt->wasNacked()) { 633 DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n"); 634 pkt->reinitNacked(); 635 if (!walker->sendTiming(this, pkt)) { 636 inflight--; 637 retrying = true; 638 if (pkt->isWrite()) { 639 writes.push_back(pkt); 640 } else { 641 assert(!read); 642 read = pkt; 643 } 644 } 645 } 646 return false; 647} 648 649void 650Walker::WalkerState::sendPackets() 651{ 652 //If we're already waiting for the port to become available, just return. 653 if (retrying) 654 return; 655 656 //Reads always have priority 657 if (read) { 658 PacketPtr pkt = read; 659 read = NULL; 660 inflight++; 661 if (!walker->sendTiming(this, pkt)) { 662 retrying = true; 663 read = pkt; 664 inflight--; 665 return; 666 } 667 } 668 //Send off as many of the writes as we can. 669 while (writes.size()) { 670 PacketPtr write = writes.back(); 671 writes.pop_back(); 672 inflight++; 673 if (!walker->sendTiming(this, write)) { 674 retrying = true; 675 writes.push_back(write); 676 inflight--; 677 return; 678 } 679 } 680} 681 682bool 683Walker::WalkerState::isRetrying() 684{ 685 return retrying; 686} 687 688bool 689Walker::WalkerState::isTiming() 690{ 691 return timing; 692} 693 694bool 695Walker::WalkerState::wasStarted() 696{ 697 return started; 698} 699 700void 701Walker::WalkerState::retry() 702{ 703 retrying = false; 704 sendPackets(); 705} 706 707Fault 708Walker::WalkerState::pageFault(bool present) 709{ 710 DPRINTF(PageTableWalker, "Raising page fault.\n"); 711 HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 712 if (mode == BaseTLB::Execute && !enableNX) 713 mode = BaseTLB::Read; 714 return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false); 715} 716 717/* end namespace X86ISA */ } 718 719X86ISA::Walker * 720X86PagetableWalkerParams::create() 721{ 722 return new X86ISA::Walker(this); 723} 724