pagetable_walker.cc revision 8922
1/* 2 * Copyright (c) 2007 The Hewlett-Packard Development Company 3 * All rights reserved. 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#include "arch/x86/pagetable.hh" 41#include "arch/x86/pagetable_walker.hh" 42#include "arch/x86/tlb.hh" 43#include "arch/x86/vtophys.hh" 44#include "base/bitfield.hh" 45#include "cpu/base.hh" 46#include "cpu/thread_context.hh" 47#include "debug/PageTableWalker.hh" 48#include "mem/packet_access.hh" 49#include "mem/request.hh" 50#include "sim/system.hh" 51 52namespace X86ISA { 53 54// Unfortunately, the placement of the base field in a page table entry is 55// very erratic and would make a mess here. It might be moved here at some 56// point in the future. 57BitUnion64(PageTableEntry) 58 Bitfield<63> nx; 59 Bitfield<11, 9> avl; 60 Bitfield<8> g; 61 Bitfield<7> ps; 62 Bitfield<6> d; 63 Bitfield<5> a; 64 Bitfield<4> pcd; 65 Bitfield<3> pwt; 66 Bitfield<2> u; 67 Bitfield<1> w; 68 Bitfield<0> p; 69EndBitUnion(PageTableEntry) 70 71Fault 72Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation, 73 RequestPtr _req, BaseTLB::Mode _mode) 74{ 75 // TODO: in timing mode, instead of blocking when there are other 76 // outstanding requests, see if this request can be coalesced with 77 // another one (i.e. either coalesce or start walk) 78 WalkerState * newState = new WalkerState(this, _translation, _req); 79 newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing); 80 if (currStates.size()) { 81 assert(newState->isTiming()); 82 DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size()); 83 currStates.push_back(newState); 84 return NoFault; 85 } else { 86 currStates.push_back(newState); 87 Fault fault = newState->startWalk(); 88 if (!newState->isTiming()) { 89 currStates.pop_front(); 90 delete newState; 91 } 92 return fault; 93 } 94} 95 96Fault 97Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize, 98 BaseTLB::Mode _mode) 99{ 100 funcState.initState(_tc, _mode); 101 return funcState.startFunctional(addr, pageSize); 102} 103 104bool 105Walker::WalkerPort::recvTiming(PacketPtr pkt) 106{ 107 return walker->recvTiming(pkt); 108} 109 110bool 111Walker::recvTiming(PacketPtr pkt) 112{ 113 if (pkt->isResponse() || pkt->wasNacked()) { 114 WalkerSenderState * senderState = 115 dynamic_cast<WalkerSenderState *>(pkt->senderState); 116 pkt->senderState = senderState->saved; 117 WalkerState * senderWalk = senderState->senderWalk; 118 bool walkComplete = senderWalk->recvPacket(pkt); 119 delete senderState; 120 if (walkComplete) { 121 std::list<WalkerState *>::iterator iter; 122 for (iter = currStates.begin(); iter != currStates.end(); iter++) { 123 WalkerState * walkerState = *(iter); 124 if (walkerState == senderWalk) { 125 iter = currStates.erase(iter); 126 break; 127 } 128 } 129 delete senderWalk; 130 // Since we block requests when another is outstanding, we 131 // need to check if there is a waiting request to be serviced 132 if (currStates.size()) { 133 WalkerState * newState = currStates.front(); 134 if (!newState->wasStarted()) 135 newState->startWalk(); 136 } 137 } 138 } else { 139 DPRINTF(PageTableWalker, "Received strange packet\n"); 140 } 141 return true; 142} 143 144Tick 145Walker::WalkerPort::recvAtomic(PacketPtr pkt) 146{ 147 return 0; 148} 149 150void 151Walker::WalkerPort::recvFunctional(PacketPtr pkt) 152{ 153 return; 154} 155 156void 157Walker::WalkerPort::recvRetry() 158{ 159 walker->recvRetry(); 160} 161 162void 163Walker::recvRetry() 164{ 165 std::list<WalkerState *>::iterator iter; 166 for (iter = currStates.begin(); iter != currStates.end(); iter++) { 167 WalkerState * walkerState = *(iter); 168 if (walkerState->isRetrying()) { 169 walkerState->retry(); 170 } 171 } 172} 173 174bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt) 175{ 176 pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState); 177 return port.sendTiming(pkt); 178} 179 180MasterPort & 181Walker::getMasterPort(const std::string &if_name, int idx) 182{ 183 if (if_name == "port") 184 return port; 185 else 186 return MemObject::getMasterPort(if_name, idx); 187} 188 189void 190Walker::WalkerState::initState(ThreadContext * _tc, 191 BaseTLB::Mode _mode, bool _isTiming) 192{ 193 assert(state == Ready); 194 started = false; 195 tc = _tc; 196 mode = _mode; 197 timing = _isTiming; 198} 199 200Fault 201Walker::WalkerState::startWalk() 202{ 203 Fault fault = NoFault; 204 assert(started == false); 205 started = true; 206 setupWalk(req->getVaddr()); 207 if (timing) { 208 nextState = state; 209 state = Waiting; 210 timingFault = NoFault; 211 sendPackets(); 212 } else { 213 do { 214 walker->port.sendAtomic(read); 215 PacketPtr write = NULL; 216 fault = stepWalk(write); 217 assert(fault == NoFault || read == NULL); 218 state = nextState; 219 nextState = Ready; 220 if (write) 221 walker->port.sendAtomic(write); 222 } while(read); 223 state = Ready; 224 nextState = Waiting; 225 } 226 return fault; 227} 228 229Fault 230Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize) 231{ 232 Fault fault = NoFault; 233 assert(started == false); 234 started = true; 235 setupWalk(addr); 236 237 do { 238 walker->port.sendFunctional(read); 239 // On a functional access (page table lookup), writes should 240 // not happen so this pointer is ignored after stepWalk 241 PacketPtr write = NULL; 242 fault = stepWalk(write); 243 assert(fault == NoFault || read == NULL); 244 state = nextState; 245 nextState = Ready; 246 } while(read); 247 pageSize = entry.size; 248 addr = entry.paddr; 249 250 return fault; 251} 252 253Fault 254Walker::WalkerState::stepWalk(PacketPtr &write) 255{ 256 assert(state != Ready && state != Waiting); 257 Fault fault = NoFault; 258 write = NULL; 259 PageTableEntry pte; 260 if (dataSize == 8) 261 pte = read->get<uint64_t>(); 262 else 263 pte = read->get<uint32_t>(); 264 VAddr vaddr = entry.vaddr; 265 bool uncacheable = pte.pcd; 266 Addr nextRead = 0; 267 bool doWrite = false; 268 bool doTLBInsert = false; 269 bool doEndWalk = false; 270 bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX; 271 switch(state) { 272 case LongPML4: 273 DPRINTF(PageTableWalker, 274 "Got long mode PML4 entry %#016x.\n", (uint64_t)pte); 275 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize; 276 doWrite = !pte.a; 277 pte.a = 1; 278 entry.writable = pte.w; 279 entry.user = pte.u; 280 if (badNX || !pte.p) { 281 doEndWalk = true; 282 fault = pageFault(pte.p); 283 break; 284 } 285 entry.noExec = pte.nx; 286 nextState = LongPDP; 287 break; 288 case LongPDP: 289 DPRINTF(PageTableWalker, 290 "Got long mode PDP entry %#016x.\n", (uint64_t)pte); 291 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize; 292 doWrite = !pte.a; 293 pte.a = 1; 294 entry.writable = entry.writable && pte.w; 295 entry.user = entry.user && pte.u; 296 if (badNX || !pte.p) { 297 doEndWalk = true; 298 fault = pageFault(pte.p); 299 break; 300 } 301 nextState = LongPD; 302 break; 303 case LongPD: 304 DPRINTF(PageTableWalker, 305 "Got long mode PD entry %#016x.\n", (uint64_t)pte); 306 doWrite = !pte.a; 307 pte.a = 1; 308 entry.writable = entry.writable && pte.w; 309 entry.user = entry.user && pte.u; 310 if (badNX || !pte.p) { 311 doEndWalk = true; 312 fault = pageFault(pte.p); 313 break; 314 } 315 if (!pte.ps) { 316 // 4 KB page 317 entry.size = 4 * (1 << 10); 318 nextRead = 319 ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize; 320 nextState = LongPTE; 321 break; 322 } else { 323 // 2 MB page 324 entry.size = 2 * (1 << 20); 325 entry.paddr = (uint64_t)pte & (mask(31) << 21); 326 entry.uncacheable = uncacheable; 327 entry.global = pte.g; 328 entry.patBit = bits(pte, 12); 329 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); 330 doTLBInsert = true; 331 doEndWalk = true; 332 break; 333 } 334 case LongPTE: 335 DPRINTF(PageTableWalker, 336 "Got long mode PTE entry %#016x.\n", (uint64_t)pte); 337 doWrite = !pte.a; 338 pte.a = 1; 339 entry.writable = entry.writable && pte.w; 340 entry.user = entry.user && pte.u; 341 if (badNX || !pte.p) { 342 doEndWalk = true; 343 fault = pageFault(pte.p); 344 break; 345 } 346 entry.paddr = (uint64_t)pte & (mask(40) << 12); 347 entry.uncacheable = uncacheable; 348 entry.global = pte.g; 349 entry.patBit = bits(pte, 12); 350 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 351 doTLBInsert = true; 352 doEndWalk = true; 353 break; 354 case PAEPDP: 355 DPRINTF(PageTableWalker, 356 "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte); 357 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize; 358 if (!pte.p) { 359 doEndWalk = true; 360 fault = pageFault(pte.p); 361 break; 362 } 363 nextState = PAEPD; 364 break; 365 case PAEPD: 366 DPRINTF(PageTableWalker, 367 "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte); 368 doWrite = !pte.a; 369 pte.a = 1; 370 entry.writable = pte.w; 371 entry.user = pte.u; 372 if (badNX || !pte.p) { 373 doEndWalk = true; 374 fault = pageFault(pte.p); 375 break; 376 } 377 if (!pte.ps) { 378 // 4 KB page 379 entry.size = 4 * (1 << 10); 380 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize; 381 nextState = PAEPTE; 382 break; 383 } else { 384 // 2 MB page 385 entry.size = 2 * (1 << 20); 386 entry.paddr = (uint64_t)pte & (mask(31) << 21); 387 entry.uncacheable = uncacheable; 388 entry.global = pte.g; 389 entry.patBit = bits(pte, 12); 390 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); 391 doTLBInsert = true; 392 doEndWalk = true; 393 break; 394 } 395 case PAEPTE: 396 DPRINTF(PageTableWalker, 397 "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte); 398 doWrite = !pte.a; 399 pte.a = 1; 400 entry.writable = entry.writable && pte.w; 401 entry.user = entry.user && pte.u; 402 if (badNX || !pte.p) { 403 doEndWalk = true; 404 fault = pageFault(pte.p); 405 break; 406 } 407 entry.paddr = (uint64_t)pte & (mask(40) << 12); 408 entry.uncacheable = uncacheable; 409 entry.global = pte.g; 410 entry.patBit = bits(pte, 7); 411 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 412 doTLBInsert = true; 413 doEndWalk = true; 414 break; 415 case PSEPD: 416 DPRINTF(PageTableWalker, 417 "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte); 418 doWrite = !pte.a; 419 pte.a = 1; 420 entry.writable = pte.w; 421 entry.user = pte.u; 422 if (!pte.p) { 423 doEndWalk = true; 424 fault = pageFault(pte.p); 425 break; 426 } 427 if (!pte.ps) { 428 // 4 KB page 429 entry.size = 4 * (1 << 10); 430 nextRead = 431 ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; 432 nextState = PTE; 433 break; 434 } else { 435 // 4 MB page 436 entry.size = 4 * (1 << 20); 437 entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; 438 entry.uncacheable = uncacheable; 439 entry.global = pte.g; 440 entry.patBit = bits(pte, 12); 441 entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); 442 doTLBInsert = true; 443 doEndWalk = true; 444 break; 445 } 446 case PD: 447 DPRINTF(PageTableWalker, 448 "Got legacy mode PD entry %#08x.\n", (uint32_t)pte); 449 doWrite = !pte.a; 450 pte.a = 1; 451 entry.writable = pte.w; 452 entry.user = pte.u; 453 if (!pte.p) { 454 doEndWalk = true; 455 fault = pageFault(pte.p); 456 break; 457 } 458 // 4 KB page 459 entry.size = 4 * (1 << 10); 460 nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; 461 nextState = PTE; 462 break; 463 case PTE: 464 DPRINTF(PageTableWalker, 465 "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte); 466 doWrite = !pte.a; 467 pte.a = 1; 468 entry.writable = pte.w; 469 entry.user = pte.u; 470 if (!pte.p) { 471 doEndWalk = true; 472 fault = pageFault(pte.p); 473 break; 474 } 475 entry.paddr = (uint64_t)pte & (mask(20) << 12); 476 entry.uncacheable = uncacheable; 477 entry.global = pte.g; 478 entry.patBit = bits(pte, 7); 479 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); 480 doTLBInsert = true; 481 doEndWalk = true; 482 break; 483 default: 484 panic("Unknown page table walker state %d!\n"); 485 } 486 if (doEndWalk) { 487 if (doTLBInsert) 488 if (!functional) 489 walker->tlb->insert(entry.vaddr, entry); 490 endWalk(); 491 } else { 492 PacketPtr oldRead = read; 493 //If we didn't return, we're setting up another read. 494 Request::Flags flags = oldRead->req->getFlags(); 495 flags.set(Request::UNCACHEABLE, uncacheable); 496 RequestPtr request = 497 new Request(nextRead, oldRead->getSize(), flags, walker->masterId); 498 read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast); 499 read->allocate(); 500 // If we need to write, adjust the read packet to write the modified 501 // value back to memory. 502 if (doWrite) { 503 write = oldRead; 504 write->set<uint64_t>(pte); 505 write->cmd = MemCmd::WriteReq; 506 write->setDest(Packet::Broadcast); 507 } else { 508 write = NULL; 509 delete oldRead->req; 510 delete oldRead; 511 } 512 } 513 return fault; 514} 515 516void 517Walker::WalkerState::endWalk() 518{ 519 nextState = Ready; 520 delete read->req; 521 delete read; 522 read = NULL; 523} 524 525void 526Walker::WalkerState::setupWalk(Addr vaddr) 527{ 528 VAddr addr = vaddr; 529 CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); 530 // Check if we're in long mode or not 531 Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); 532 dataSize = 8; 533 Addr topAddr; 534 if (efer.lma) { 535 // Do long mode. 536 state = LongPML4; 537 topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize; 538 enableNX = efer.nxe; 539 } else { 540 // We're in some flavor of legacy mode. 541 CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); 542 if (cr4.pae) { 543 // Do legacy PAE. 544 state = PAEPDP; 545 topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize; 546 enableNX = efer.nxe; 547 } else { 548 dataSize = 4; 549 topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize; 550 if (cr4.pse) { 551 // Do legacy PSE. 552 state = PSEPD; 553 } else { 554 // Do legacy non PSE. 555 state = PD; 556 } 557 enableNX = false; 558 } 559 } 560 561 nextState = Ready; 562 entry.vaddr = vaddr; 563 564 Request::Flags flags = Request::PHYSICAL; 565 if (cr3.pcd) 566 flags.set(Request::UNCACHEABLE); 567 RequestPtr request = new Request(topAddr, dataSize, flags, walker->masterId); 568 read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast); 569 read->allocate(); 570} 571 572bool 573Walker::WalkerState::recvPacket(PacketPtr pkt) 574{ 575 if (pkt->isResponse() && !pkt->wasNacked()) { 576 assert(inflight); 577 assert(state == Waiting); 578 assert(!read); 579 inflight--; 580 if (pkt->isRead()) { 581 state = nextState; 582 nextState = Ready; 583 PacketPtr write = NULL; 584 read = pkt; 585 timingFault = stepWalk(write); 586 state = Waiting; 587 assert(timingFault == NoFault || read == NULL); 588 if (write) { 589 writes.push_back(write); 590 } 591 sendPackets(); 592 } else { 593 sendPackets(); 594 } 595 if (inflight == 0 && read == NULL && writes.size() == 0) { 596 state = Ready; 597 nextState = Waiting; 598 if (timingFault == NoFault) { 599 /* 600 * Finish the translation. Now that we now the right entry is 601 * in the TLB, this should work with no memory accesses. 602 * There could be new faults unrelated to the table walk like 603 * permissions violations, so we'll need the return value as 604 * well. 605 */ 606 bool delayedResponse; 607 Fault fault = walker->tlb->translate(req, tc, NULL, mode, 608 delayedResponse, true); 609 assert(!delayedResponse); 610 // Let the CPU continue. 611 translation->finish(fault, req, tc, mode); 612 } else { 613 // There was a fault during the walk. Let the CPU know. 614 translation->finish(timingFault, req, tc, mode); 615 } 616 return true; 617 } 618 } else if (pkt->wasNacked()) { 619 DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n"); 620 pkt->reinitNacked(); 621 if (!walker->sendTiming(this, pkt)) { 622 inflight--; 623 retrying = true; 624 if (pkt->isWrite()) { 625 writes.push_back(pkt); 626 } else { 627 assert(!read); 628 read = pkt; 629 } 630 } 631 } 632 return false; 633} 634 635void 636Walker::WalkerState::sendPackets() 637{ 638 //If we're already waiting for the port to become available, just return. 639 if (retrying) 640 return; 641 642 //Reads always have priority 643 if (read) { 644 PacketPtr pkt = read; 645 read = NULL; 646 inflight++; 647 if (!walker->sendTiming(this, pkt)) { 648 retrying = true; 649 read = pkt; 650 inflight--; 651 return; 652 } 653 } 654 //Send off as many of the writes as we can. 655 while (writes.size()) { 656 PacketPtr write = writes.back(); 657 writes.pop_back(); 658 inflight++; 659 if (!walker->sendTiming(this, write)) { 660 retrying = true; 661 writes.push_back(write); 662 inflight--; 663 return; 664 } 665 } 666} 667 668bool 669Walker::WalkerState::isRetrying() 670{ 671 return retrying; 672} 673 674bool 675Walker::WalkerState::isTiming() 676{ 677 return timing; 678} 679 680bool 681Walker::WalkerState::wasStarted() 682{ 683 return started; 684} 685 686void 687Walker::WalkerState::retry() 688{ 689 retrying = false; 690 sendPackets(); 691} 692 693Fault 694Walker::WalkerState::pageFault(bool present) 695{ 696 DPRINTF(PageTableWalker, "Raising page fault.\n"); 697 HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); 698 if (mode == BaseTLB::Execute && !enableNX) 699 mode = BaseTLB::Read; 700 return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false); 701} 702 703/* end namespace X86ISA */ } 704 705X86ISA::Walker * 706X86PagetableWalkerParams::create() 707{ 708 return new X86ISA::Walker(this); 709} 710