1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder. You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "base/trie.hh"
58#include "cpu/base.hh"
59#include "cpu/thread_context.hh"
60#include "debug/PageTableWalker.hh"
61#include "mem/packet_access.hh"
62#include "mem/request.hh"
63
64namespace X86ISA {
65
66// Unfortunately, the placement of the base field in a page table entry is
67// very erratic and would make a mess here. It might be moved here at some
68// point in the future.
69BitUnion64(PageTableEntry)
70 Bitfield<63> nx;
71 Bitfield<11, 9> avl;
72 Bitfield<8> g;
73 Bitfield<7> ps;
74 Bitfield<6> d;
75 Bitfield<5> a;
76 Bitfield<4> pcd;
77 Bitfield<3> pwt;
78 Bitfield<2> u;
79 Bitfield<1> w;
80 Bitfield<0> p;
81EndBitUnion(PageTableEntry)
82
83Fault
84Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
85 RequestPtr _req, BaseTLB::Mode _mode)
86{
87 // TODO: in timing mode, instead of blocking when there are other
88 // outstanding requests, see if this request can be coalesced with
89 // another one (i.e. either coalesce or start walk)
90 WalkerState * newState = new WalkerState(this, _translation, _req);
91 newState->initState(_tc, _mode, sys->isTimingMode());
92 if (currStates.size()) {
93 assert(newState->isTiming());
94 DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
95 currStates.push_back(newState);
96 return NoFault;
97 } else {
98 currStates.push_back(newState);
99 Fault fault = newState->startWalk();
100 if (!newState->isTiming()) {
101 currStates.pop_front();
102 delete newState;
103 }
104 return fault;
105 }
106}
107
108Fault
109Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
110 BaseTLB::Mode _mode)
111{
112 funcState.initState(_tc, _mode);
113 return funcState.startFunctional(addr, logBytes);
114}
115
116bool
117Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
118{
119 return walker->recvTimingResp(pkt);
120}
121
122bool
123Walker::recvTimingResp(PacketPtr pkt)
124{
125 WalkerSenderState * senderState =
126 dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
127 WalkerState * senderWalk = senderState->senderWalk;
128 bool walkComplete = senderWalk->recvPacket(pkt);
129 delete senderState;
130 if (walkComplete) {
131 std::list<WalkerState *>::iterator iter;
132 for (iter = currStates.begin(); iter != currStates.end(); iter++) {
133 WalkerState * walkerState = *(iter);
134 if (walkerState == senderWalk) {
135 iter = currStates.erase(iter);
136 break;
137 }
138 }
139 delete senderWalk;
140 // Since we block requests when another is outstanding, we
141 // need to check if there is a waiting request to be serviced
142 if (currStates.size()) {
143 WalkerState * newState = currStates.front();
144 if (!newState->wasStarted())
145 newState->startWalk();
146 }
142 if (currStates.size())
143 startWalkWrapper();
144 }
145 return true;
146}
147
148void
149Walker::WalkerPort::recvRetry()
150{
151 walker->recvRetry();
152}
153
154void
155Walker::recvRetry()
156{
157 std::list<WalkerState *>::iterator iter;
158 for (iter = currStates.begin(); iter != currStates.end(); iter++) {
159 WalkerState * walkerState = *(iter);
160 if (walkerState->isRetrying()) {
161 walkerState->retry();
162 }
163 }
164}
165
166bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
167{
168 pkt->pushSenderState(new WalkerSenderState(sendingState));
169 return port.sendTimingReq(pkt);
170}
171
172BaseMasterPort &
173Walker::getMasterPort(const std::string &if_name, PortID idx)
174{
175 if (if_name == "port")
176 return port;
177 else
178 return MemObject::getMasterPort(if_name, idx);
179}
180
181void
182Walker::WalkerState::initState(ThreadContext * _tc,
183 BaseTLB::Mode _mode, bool _isTiming)
184{
185 assert(state == Ready);
186 started = false;
187 tc = _tc;
188 mode = _mode;
189 timing = _isTiming;
190}
191
192void
193Walker::startWalkWrapper()
194{
195 unsigned num_squashed = 0;
196 WalkerState *currState = currStates.front();
197 while ((num_squashed < numSquashable) && currState &&
198 currState->translation->squashed()) {
199 currStates.pop_front();
200 num_squashed++;
201
202 DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
203 currState->req->getVaddr());
204
205 // finish the translation which will delete the translation object
206 currState->translation->finish(new UnimpFault("Squashed Inst"),
207 currState->req, currState->tc, currState->mode);
208
209 // delete the current request
210 delete currState;
211
212 // check the next translation request, if it exists
213 if (currStates.size())
214 currState = currStates.front();
215 else
216 currState = NULL;
217 }
218 if (currState && !currState->wasStarted())
219 currState->startWalk();
220}
221
222Fault
223Walker::WalkerState::startWalk()
224{
225 Fault fault = NoFault;
226 assert(started == false);
227 started = true;
228 setupWalk(req->getVaddr());
229 if (timing) {
230 nextState = state;
231 state = Waiting;
232 timingFault = NoFault;
233 sendPackets();
234 } else {
235 do {
236 walker->port.sendAtomic(read);
237 PacketPtr write = NULL;
238 fault = stepWalk(write);
239 assert(fault == NoFault || read == NULL);
240 state = nextState;
241 nextState = Ready;
242 if (write)
243 walker->port.sendAtomic(write);
244 } while(read);
245 state = Ready;
246 nextState = Waiting;
247 }
248 return fault;
249}
250
251Fault
252Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
253{
254 Fault fault = NoFault;
255 assert(started == false);
256 started = true;
257 setupWalk(addr);
258
259 do {
260 walker->port.sendFunctional(read);
261 // On a functional access (page table lookup), writes should
262 // not happen so this pointer is ignored after stepWalk
263 PacketPtr write = NULL;
264 fault = stepWalk(write);
265 assert(fault == NoFault || read == NULL);
266 state = nextState;
267 nextState = Ready;
268 } while(read);
269 logBytes = entry.logBytes;
270 addr = entry.paddr;
271
272 return fault;
273}
274
275Fault
276Walker::WalkerState::stepWalk(PacketPtr &write)
277{
278 assert(state != Ready && state != Waiting);
279 Fault fault = NoFault;
280 write = NULL;
281 PageTableEntry pte;
282 if (dataSize == 8)
283 pte = read->get<uint64_t>();
284 else
285 pte = read->get<uint32_t>();
286 VAddr vaddr = entry.vaddr;
287 bool uncacheable = pte.pcd;
288 Addr nextRead = 0;
289 bool doWrite = false;
290 bool doTLBInsert = false;
291 bool doEndWalk = false;
292 bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
293 switch(state) {
294 case LongPML4:
295 DPRINTF(PageTableWalker,
296 "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
297 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
298 doWrite = !pte.a;
299 pte.a = 1;
300 entry.writable = pte.w;
301 entry.user = pte.u;
302 if (badNX || !pte.p) {
303 doEndWalk = true;
304 fault = pageFault(pte.p);
305 break;
306 }
307 entry.noExec = pte.nx;
308 nextState = LongPDP;
309 break;
310 case LongPDP:
311 DPRINTF(PageTableWalker,
312 "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
313 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
314 doWrite = !pte.a;
315 pte.a = 1;
316 entry.writable = entry.writable && pte.w;
317 entry.user = entry.user && pte.u;
318 if (badNX || !pte.p) {
319 doEndWalk = true;
320 fault = pageFault(pte.p);
321 break;
322 }
323 nextState = LongPD;
324 break;
325 case LongPD:
326 DPRINTF(PageTableWalker,
327 "Got long mode PD entry %#016x.\n", (uint64_t)pte);
328 doWrite = !pte.a;
329 pte.a = 1;
330 entry.writable = entry.writable && pte.w;
331 entry.user = entry.user && pte.u;
332 if (badNX || !pte.p) {
333 doEndWalk = true;
334 fault = pageFault(pte.p);
335 break;
336 }
337 if (!pte.ps) {
338 // 4 KB page
339 entry.logBytes = 12;
340 nextRead =
341 ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
342 nextState = LongPTE;
343 break;
344 } else {
345 // 2 MB page
346 entry.logBytes = 21;
347 entry.paddr = (uint64_t)pte & (mask(31) << 21);
348 entry.uncacheable = uncacheable;
349 entry.global = pte.g;
350 entry.patBit = bits(pte, 12);
351 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
352 doTLBInsert = true;
353 doEndWalk = true;
354 break;
355 }
356 case LongPTE:
357 DPRINTF(PageTableWalker,
358 "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
359 doWrite = !pte.a;
360 pte.a = 1;
361 entry.writable = entry.writable && pte.w;
362 entry.user = entry.user && pte.u;
363 if (badNX || !pte.p) {
364 doEndWalk = true;
365 fault = pageFault(pte.p);
366 break;
367 }
368 entry.paddr = (uint64_t)pte & (mask(40) << 12);
369 entry.uncacheable = uncacheable;
370 entry.global = pte.g;
371 entry.patBit = bits(pte, 12);
372 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
373 doTLBInsert = true;
374 doEndWalk = true;
375 break;
376 case PAEPDP:
377 DPRINTF(PageTableWalker,
378 "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
379 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
380 if (!pte.p) {
381 doEndWalk = true;
382 fault = pageFault(pte.p);
383 break;
384 }
385 nextState = PAEPD;
386 break;
387 case PAEPD:
388 DPRINTF(PageTableWalker,
389 "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
390 doWrite = !pte.a;
391 pte.a = 1;
392 entry.writable = pte.w;
393 entry.user = pte.u;
394 if (badNX || !pte.p) {
395 doEndWalk = true;
396 fault = pageFault(pte.p);
397 break;
398 }
399 if (!pte.ps) {
400 // 4 KB page
401 entry.logBytes = 12;
402 nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
403 nextState = PAEPTE;
404 break;
405 } else {
406 // 2 MB page
407 entry.logBytes = 21;
408 entry.paddr = (uint64_t)pte & (mask(31) << 21);
409 entry.uncacheable = uncacheable;
410 entry.global = pte.g;
411 entry.patBit = bits(pte, 12);
412 entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
413 doTLBInsert = true;
414 doEndWalk = true;
415 break;
416 }
417 case PAEPTE:
418 DPRINTF(PageTableWalker,
419 "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
420 doWrite = !pte.a;
421 pte.a = 1;
422 entry.writable = entry.writable && pte.w;
423 entry.user = entry.user && pte.u;
424 if (badNX || !pte.p) {
425 doEndWalk = true;
426 fault = pageFault(pte.p);
427 break;
428 }
429 entry.paddr = (uint64_t)pte & (mask(40) << 12);
430 entry.uncacheable = uncacheable;
431 entry.global = pte.g;
432 entry.patBit = bits(pte, 7);
433 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
434 doTLBInsert = true;
435 doEndWalk = true;
436 break;
437 case PSEPD:
438 DPRINTF(PageTableWalker,
439 "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
440 doWrite = !pte.a;
441 pte.a = 1;
442 entry.writable = pte.w;
443 entry.user = pte.u;
444 if (!pte.p) {
445 doEndWalk = true;
446 fault = pageFault(pte.p);
447 break;
448 }
449 if (!pte.ps) {
450 // 4 KB page
451 entry.logBytes = 12;
452 nextRead =
453 ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
454 nextState = PTE;
455 break;
456 } else {
457 // 4 MB page
458 entry.logBytes = 21;
459 entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
460 entry.uncacheable = uncacheable;
461 entry.global = pte.g;
462 entry.patBit = bits(pte, 12);
463 entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
464 doTLBInsert = true;
465 doEndWalk = true;
466 break;
467 }
468 case PD:
469 DPRINTF(PageTableWalker,
470 "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
471 doWrite = !pte.a;
472 pte.a = 1;
473 entry.writable = pte.w;
474 entry.user = pte.u;
475 if (!pte.p) {
476 doEndWalk = true;
477 fault = pageFault(pte.p);
478 break;
479 }
480 // 4 KB page
481 entry.logBytes = 12;
482 nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
483 nextState = PTE;
484 break;
485 case PTE:
486 DPRINTF(PageTableWalker,
487 "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
488 doWrite = !pte.a;
489 pte.a = 1;
490 entry.writable = pte.w;
491 entry.user = pte.u;
492 if (!pte.p) {
493 doEndWalk = true;
494 fault = pageFault(pte.p);
495 break;
496 }
497 entry.paddr = (uint64_t)pte & (mask(20) << 12);
498 entry.uncacheable = uncacheable;
499 entry.global = pte.g;
500 entry.patBit = bits(pte, 7);
501 entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
502 doTLBInsert = true;
503 doEndWalk = true;
504 break;
505 default:
506 panic("Unknown page table walker state %d!\n");
507 }
508 if (doEndWalk) {
509 if (doTLBInsert)
510 if (!functional)
511 walker->tlb->insert(entry.vaddr, entry);
512 endWalk();
513 } else {
514 PacketPtr oldRead = read;
515 //If we didn't return, we're setting up another read.
516 Request::Flags flags = oldRead->req->getFlags();
517 flags.set(Request::UNCACHEABLE, uncacheable);
518 RequestPtr request =
519 new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
520 read = new Packet(request, MemCmd::ReadReq);
521 read->allocate();
522 // If we need to write, adjust the read packet to write the modified
523 // value back to memory.
524 if (doWrite) {
525 write = oldRead;
526 write->set<uint64_t>(pte);
527 write->cmd = MemCmd::WriteReq;
528 write->clearDest();
529 } else {
530 write = NULL;
531 delete oldRead->req;
532 delete oldRead;
533 }
534 }
535 return fault;
536}
537
538void
539Walker::WalkerState::endWalk()
540{
541 nextState = Ready;
542 delete read->req;
543 delete read;
544 read = NULL;
545}
546
547void
548Walker::WalkerState::setupWalk(Addr vaddr)
549{
550 VAddr addr = vaddr;
551 CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
552 // Check if we're in long mode or not
553 Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
554 dataSize = 8;
555 Addr topAddr;
556 if (efer.lma) {
557 // Do long mode.
558 state = LongPML4;
559 topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
560 enableNX = efer.nxe;
561 } else {
562 // We're in some flavor of legacy mode.
563 CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
564 if (cr4.pae) {
565 // Do legacy PAE.
566 state = PAEPDP;
567 topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
568 enableNX = efer.nxe;
569 } else {
570 dataSize = 4;
571 topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
572 if (cr4.pse) {
573 // Do legacy PSE.
574 state = PSEPD;
575 } else {
576 // Do legacy non PSE.
577 state = PD;
578 }
579 enableNX = false;
580 }
581 }
582
583 nextState = Ready;
584 entry.vaddr = vaddr;
585
586 Request::Flags flags = Request::PHYSICAL;
587 if (cr3.pcd)
588 flags.set(Request::UNCACHEABLE);
589 RequestPtr request = new Request(topAddr, dataSize, flags,
590 walker->masterId);
591 read = new Packet(request, MemCmd::ReadReq);
592 read->allocate();
593}
594
595bool
596Walker::WalkerState::recvPacket(PacketPtr pkt)
597{
598 assert(pkt->isResponse());
599 assert(inflight);
600 assert(state == Waiting);
601 assert(!read);
602 inflight--;
603 if (pkt->isRead()) {
604 // @todo someone should pay for this
605 pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
606
607 state = nextState;
608 nextState = Ready;
609 PacketPtr write = NULL;
610 read = pkt;
611 timingFault = stepWalk(write);
612 state = Waiting;
613 assert(timingFault == NoFault || read == NULL);
614 if (write) {
615 writes.push_back(write);
616 }
617 sendPackets();
618 } else {
619 sendPackets();
620 }
621 if (inflight == 0 && read == NULL && writes.size() == 0) {
622 state = Ready;
623 nextState = Waiting;
624 if (timingFault == NoFault) {
625 /*
626 * Finish the translation. Now that we now the right entry is
627 * in the TLB, this should work with no memory accesses.
628 * There could be new faults unrelated to the table walk like
629 * permissions violations, so we'll need the return value as
630 * well.
631 */
632 bool delayedResponse;
633 Fault fault = walker->tlb->translate(req, tc, NULL, mode,
634 delayedResponse, true);
635 assert(!delayedResponse);
636 // Let the CPU continue.
637 translation->finish(fault, req, tc, mode);
638 } else {
639 // There was a fault during the walk. Let the CPU know.
640 translation->finish(timingFault, req, tc, mode);
641 }
642 return true;
643 }
644
645 return false;
646}
647
648void
649Walker::WalkerState::sendPackets()
650{
651 //If we're already waiting for the port to become available, just return.
652 if (retrying)
653 return;
654
655 //Reads always have priority
656 if (read) {
657 PacketPtr pkt = read;
658 read = NULL;
659 inflight++;
660 if (!walker->sendTiming(this, pkt)) {
661 retrying = true;
662 read = pkt;
663 inflight--;
664 return;
665 }
666 }
667 //Send off as many of the writes as we can.
668 while (writes.size()) {
669 PacketPtr write = writes.back();
670 writes.pop_back();
671 inflight++;
672 if (!walker->sendTiming(this, write)) {
673 retrying = true;
674 writes.push_back(write);
675 inflight--;
676 return;
677 }
678 }
679}
680
681bool
682Walker::WalkerState::isRetrying()
683{
684 return retrying;
685}
686
687bool
688Walker::WalkerState::isTiming()
689{
690 return timing;
691}
692
693bool
694Walker::WalkerState::wasStarted()
695{
696 return started;
697}
698
699void
700Walker::WalkerState::retry()
701{
702 retrying = false;
703 sendPackets();
704}
705
706Fault
707Walker::WalkerState::pageFault(bool present)
708{
709 DPRINTF(PageTableWalker, "Raising page fault.\n");
710 HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
711 if (mode == BaseTLB::Execute && !enableNX)
712 mode = BaseTLB::Read;
713 return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
714}
715
716/* end namespace X86ISA */ }
717
718X86ISA::Walker *
719X86PagetableWalkerParams::create()
720{
721 return new X86ISA::Walker(this);
722}