pagetable_walker.cc revision 8922
1/*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/x86/pagetable.hh"
41#include "arch/x86/pagetable_walker.hh"
42#include "arch/x86/tlb.hh"
43#include "arch/x86/vtophys.hh"
44#include "base/bitfield.hh"
45#include "cpu/base.hh"
46#include "cpu/thread_context.hh"
47#include "debug/PageTableWalker.hh"
48#include "mem/packet_access.hh"
49#include "mem/request.hh"
50#include "sim/system.hh"
51
52namespace X86ISA {
53
54// Unfortunately, the placement of the base field in a page table entry is
55// very erratic and would make a mess here. It might be moved here at some
56// point in the future.
57BitUnion64(PageTableEntry)
58    Bitfield<63> nx;
59    Bitfield<11, 9> avl;
60    Bitfield<8> g;
61    Bitfield<7> ps;
62    Bitfield<6> d;
63    Bitfield<5> a;
64    Bitfield<4> pcd;
65    Bitfield<3> pwt;
66    Bitfield<2> u;
67    Bitfield<1> w;
68    Bitfield<0> p;
69EndBitUnion(PageTableEntry)
70
71Fault
72Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
73              RequestPtr _req, BaseTLB::Mode _mode)
74{
75    // TODO: in timing mode, instead of blocking when there are other
76    // outstanding requests, see if this request can be coalesced with
77    // another one (i.e. either coalesce or start walk)
78    WalkerState * newState = new WalkerState(this, _translation, _req);
79    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
80    if (currStates.size()) {
81        assert(newState->isTiming());
82        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
83        currStates.push_back(newState);
84        return NoFault;
85    } else {
86        currStates.push_back(newState);
87        Fault fault = newState->startWalk();
88        if (!newState->isTiming()) {
89            currStates.pop_front();
90            delete newState;
91        }
92        return fault;
93    }
94}
95
96Fault
97Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize,
98              BaseTLB::Mode _mode)
99{
100    funcState.initState(_tc, _mode);
101    return funcState.startFunctional(addr, pageSize);
102}
103
104bool
105Walker::WalkerPort::recvTiming(PacketPtr pkt)
106{
107    return walker->recvTiming(pkt);
108}
109
110bool
111Walker::recvTiming(PacketPtr pkt)
112{
113    if (pkt->isResponse() || pkt->wasNacked()) {
114        WalkerSenderState * senderState =
115                dynamic_cast<WalkerSenderState *>(pkt->senderState);
116        pkt->senderState = senderState->saved;
117        WalkerState * senderWalk = senderState->senderWalk;
118        bool walkComplete = senderWalk->recvPacket(pkt);
119        delete senderState;
120        if (walkComplete) {
121            std::list<WalkerState *>::iterator iter;
122            for (iter = currStates.begin(); iter != currStates.end(); iter++) {
123                WalkerState * walkerState = *(iter);
124                if (walkerState == senderWalk) {
125                    iter = currStates.erase(iter);
126                    break;
127                }
128            }
129            delete senderWalk;
130            // Since we block requests when another is outstanding, we
131            // need to check if there is a waiting request to be serviced
132            if (currStates.size()) {
133                WalkerState * newState = currStates.front();
134                if (!newState->wasStarted())
135                    newState->startWalk();
136            }
137        }
138    } else {
139        DPRINTF(PageTableWalker, "Received strange packet\n");
140    }
141    return true;
142}
143
144Tick
145Walker::WalkerPort::recvAtomic(PacketPtr pkt)
146{
147    return 0;
148}
149
150void
151Walker::WalkerPort::recvFunctional(PacketPtr pkt)
152{
153    return;
154}
155
156void
157Walker::WalkerPort::recvRetry()
158{
159    walker->recvRetry();
160}
161
162void
163Walker::recvRetry()
164{
165    std::list<WalkerState *>::iterator iter;
166    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
167        WalkerState * walkerState = *(iter);
168        if (walkerState->isRetrying()) {
169            walkerState->retry();
170        }
171    }
172}
173
174bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
175{
176    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
177    return port.sendTiming(pkt);
178}
179
180MasterPort &
181Walker::getMasterPort(const std::string &if_name, int idx)
182{
183    if (if_name == "port")
184        return port;
185    else
186        return MemObject::getMasterPort(if_name, idx);
187}
188
189void
190Walker::WalkerState::initState(ThreadContext * _tc,
191        BaseTLB::Mode _mode, bool _isTiming)
192{
193    assert(state == Ready);
194    started = false;
195    tc = _tc;
196    mode = _mode;
197    timing = _isTiming;
198}
199
200Fault
201Walker::WalkerState::startWalk()
202{
203    Fault fault = NoFault;
204    assert(started == false);
205    started = true;
206    setupWalk(req->getVaddr());
207    if (timing) {
208        nextState = state;
209        state = Waiting;
210        timingFault = NoFault;
211        sendPackets();
212    } else {
213        do {
214            walker->port.sendAtomic(read);
215            PacketPtr write = NULL;
216            fault = stepWalk(write);
217            assert(fault == NoFault || read == NULL);
218            state = nextState;
219            nextState = Ready;
220            if (write)
221                walker->port.sendAtomic(write);
222        } while(read);
223        state = Ready;
224        nextState = Waiting;
225    }
226    return fault;
227}
228
229Fault
230Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize)
231{
232    Fault fault = NoFault;
233    assert(started == false);
234    started = true;
235    setupWalk(addr);
236
237    do {
238        walker->port.sendFunctional(read);
239        // On a functional access (page table lookup), writes should
240        // not happen so this pointer is ignored after stepWalk
241        PacketPtr write = NULL;
242        fault = stepWalk(write);
243        assert(fault == NoFault || read == NULL);
244        state = nextState;
245        nextState = Ready;
246    } while(read);
247    pageSize = entry.size;
248    addr = entry.paddr;
249
250    return fault;
251}
252
253Fault
254Walker::WalkerState::stepWalk(PacketPtr &write)
255{
256    assert(state != Ready && state != Waiting);
257    Fault fault = NoFault;
258    write = NULL;
259    PageTableEntry pte;
260    if (dataSize == 8)
261        pte = read->get<uint64_t>();
262    else
263        pte = read->get<uint32_t>();
264    VAddr vaddr = entry.vaddr;
265    bool uncacheable = pte.pcd;
266    Addr nextRead = 0;
267    bool doWrite = false;
268    bool doTLBInsert = false;
269    bool doEndWalk = false;
270    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
271    switch(state) {
272      case LongPML4:
273        DPRINTF(PageTableWalker,
274                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
275        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
276        doWrite = !pte.a;
277        pte.a = 1;
278        entry.writable = pte.w;
279        entry.user = pte.u;
280        if (badNX || !pte.p) {
281            doEndWalk = true;
282            fault = pageFault(pte.p);
283            break;
284        }
285        entry.noExec = pte.nx;
286        nextState = LongPDP;
287        break;
288      case LongPDP:
289        DPRINTF(PageTableWalker,
290                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
291        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
292        doWrite = !pte.a;
293        pte.a = 1;
294        entry.writable = entry.writable && pte.w;
295        entry.user = entry.user && pte.u;
296        if (badNX || !pte.p) {
297            doEndWalk = true;
298            fault = pageFault(pte.p);
299            break;
300        }
301        nextState = LongPD;
302        break;
303      case LongPD:
304        DPRINTF(PageTableWalker,
305                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
306        doWrite = !pte.a;
307        pte.a = 1;
308        entry.writable = entry.writable && pte.w;
309        entry.user = entry.user && pte.u;
310        if (badNX || !pte.p) {
311            doEndWalk = true;
312            fault = pageFault(pte.p);
313            break;
314        }
315        if (!pte.ps) {
316            // 4 KB page
317            entry.size = 4 * (1 << 10);
318            nextRead =
319                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
320            nextState = LongPTE;
321            break;
322        } else {
323            // 2 MB page
324            entry.size = 2 * (1 << 20);
325            entry.paddr = (uint64_t)pte & (mask(31) << 21);
326            entry.uncacheable = uncacheable;
327            entry.global = pte.g;
328            entry.patBit = bits(pte, 12);
329            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
330            doTLBInsert = true;
331            doEndWalk = true;
332            break;
333        }
334      case LongPTE:
335        DPRINTF(PageTableWalker,
336                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
337        doWrite = !pte.a;
338        pte.a = 1;
339        entry.writable = entry.writable && pte.w;
340        entry.user = entry.user && pte.u;
341        if (badNX || !pte.p) {
342            doEndWalk = true;
343            fault = pageFault(pte.p);
344            break;
345        }
346        entry.paddr = (uint64_t)pte & (mask(40) << 12);
347        entry.uncacheable = uncacheable;
348        entry.global = pte.g;
349        entry.patBit = bits(pte, 12);
350        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
351        doTLBInsert = true;
352        doEndWalk = true;
353        break;
354      case PAEPDP:
355        DPRINTF(PageTableWalker,
356                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
357        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
358        if (!pte.p) {
359            doEndWalk = true;
360            fault = pageFault(pte.p);
361            break;
362        }
363        nextState = PAEPD;
364        break;
365      case PAEPD:
366        DPRINTF(PageTableWalker,
367                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
368        doWrite = !pte.a;
369        pte.a = 1;
370        entry.writable = pte.w;
371        entry.user = pte.u;
372        if (badNX || !pte.p) {
373            doEndWalk = true;
374            fault = pageFault(pte.p);
375            break;
376        }
377        if (!pte.ps) {
378            // 4 KB page
379            entry.size = 4 * (1 << 10);
380            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
381            nextState = PAEPTE;
382            break;
383        } else {
384            // 2 MB page
385            entry.size = 2 * (1 << 20);
386            entry.paddr = (uint64_t)pte & (mask(31) << 21);
387            entry.uncacheable = uncacheable;
388            entry.global = pte.g;
389            entry.patBit = bits(pte, 12);
390            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
391            doTLBInsert = true;
392            doEndWalk = true;
393            break;
394        }
395      case PAEPTE:
396        DPRINTF(PageTableWalker,
397                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
398        doWrite = !pte.a;
399        pte.a = 1;
400        entry.writable = entry.writable && pte.w;
401        entry.user = entry.user && pte.u;
402        if (badNX || !pte.p) {
403            doEndWalk = true;
404            fault = pageFault(pte.p);
405            break;
406        }
407        entry.paddr = (uint64_t)pte & (mask(40) << 12);
408        entry.uncacheable = uncacheable;
409        entry.global = pte.g;
410        entry.patBit = bits(pte, 7);
411        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
412        doTLBInsert = true;
413        doEndWalk = true;
414        break;
415      case PSEPD:
416        DPRINTF(PageTableWalker,
417                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
418        doWrite = !pte.a;
419        pte.a = 1;
420        entry.writable = pte.w;
421        entry.user = pte.u;
422        if (!pte.p) {
423            doEndWalk = true;
424            fault = pageFault(pte.p);
425            break;
426        }
427        if (!pte.ps) {
428            // 4 KB page
429            entry.size = 4 * (1 << 10);
430            nextRead =
431                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
432            nextState = PTE;
433            break;
434        } else {
435            // 4 MB page
436            entry.size = 4 * (1 << 20);
437            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
438            entry.uncacheable = uncacheable;
439            entry.global = pte.g;
440            entry.patBit = bits(pte, 12);
441            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
442            doTLBInsert = true;
443            doEndWalk = true;
444            break;
445        }
446      case PD:
447        DPRINTF(PageTableWalker,
448                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
449        doWrite = !pte.a;
450        pte.a = 1;
451        entry.writable = pte.w;
452        entry.user = pte.u;
453        if (!pte.p) {
454            doEndWalk = true;
455            fault = pageFault(pte.p);
456            break;
457        }
458        // 4 KB page
459        entry.size = 4 * (1 << 10);
460        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
461        nextState = PTE;
462        break;
463      case PTE:
464        DPRINTF(PageTableWalker,
465                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
466        doWrite = !pte.a;
467        pte.a = 1;
468        entry.writable = pte.w;
469        entry.user = pte.u;
470        if (!pte.p) {
471            doEndWalk = true;
472            fault = pageFault(pte.p);
473            break;
474        }
475        entry.paddr = (uint64_t)pte & (mask(20) << 12);
476        entry.uncacheable = uncacheable;
477        entry.global = pte.g;
478        entry.patBit = bits(pte, 7);
479        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
480        doTLBInsert = true;
481        doEndWalk = true;
482        break;
483      default:
484        panic("Unknown page table walker state %d!\n");
485    }
486    if (doEndWalk) {
487        if (doTLBInsert)
488            if (!functional)
489                walker->tlb->insert(entry.vaddr, entry);
490        endWalk();
491    } else {
492        PacketPtr oldRead = read;
493        //If we didn't return, we're setting up another read.
494        Request::Flags flags = oldRead->req->getFlags();
495        flags.set(Request::UNCACHEABLE, uncacheable);
496        RequestPtr request =
497            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
498        read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
499        read->allocate();
500        // If we need to write, adjust the read packet to write the modified
501        // value back to memory.
502        if (doWrite) {
503            write = oldRead;
504            write->set<uint64_t>(pte);
505            write->cmd = MemCmd::WriteReq;
506            write->setDest(Packet::Broadcast);
507        } else {
508            write = NULL;
509            delete oldRead->req;
510            delete oldRead;
511        }
512    }
513    return fault;
514}
515
516void
517Walker::WalkerState::endWalk()
518{
519    nextState = Ready;
520    delete read->req;
521    delete read;
522    read = NULL;
523}
524
525void
526Walker::WalkerState::setupWalk(Addr vaddr)
527{
528    VAddr addr = vaddr;
529    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
530    // Check if we're in long mode or not
531    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
532    dataSize = 8;
533    Addr topAddr;
534    if (efer.lma) {
535        // Do long mode.
536        state = LongPML4;
537        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
538        enableNX = efer.nxe;
539    } else {
540        // We're in some flavor of legacy mode.
541        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
542        if (cr4.pae) {
543            // Do legacy PAE.
544            state = PAEPDP;
545            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
546            enableNX = efer.nxe;
547        } else {
548            dataSize = 4;
549            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
550            if (cr4.pse) {
551                // Do legacy PSE.
552                state = PSEPD;
553            } else {
554                // Do legacy non PSE.
555                state = PD;
556            }
557            enableNX = false;
558        }
559    }
560
561    nextState = Ready;
562    entry.vaddr = vaddr;
563
564    Request::Flags flags = Request::PHYSICAL;
565    if (cr3.pcd)
566        flags.set(Request::UNCACHEABLE);
567    RequestPtr request = new Request(topAddr, dataSize, flags, walker->masterId);
568    read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
569    read->allocate();
570}
571
572bool
573Walker::WalkerState::recvPacket(PacketPtr pkt)
574{
575    if (pkt->isResponse() && !pkt->wasNacked()) {
576        assert(inflight);
577        assert(state == Waiting);
578        assert(!read);
579        inflight--;
580        if (pkt->isRead()) {
581            state = nextState;
582            nextState = Ready;
583            PacketPtr write = NULL;
584            read = pkt;
585            timingFault = stepWalk(write);
586            state = Waiting;
587            assert(timingFault == NoFault || read == NULL);
588            if (write) {
589                writes.push_back(write);
590            }
591            sendPackets();
592        } else {
593            sendPackets();
594        }
595        if (inflight == 0 && read == NULL && writes.size() == 0) {
596            state = Ready;
597            nextState = Waiting;
598            if (timingFault == NoFault) {
599                /*
600                 * Finish the translation. Now that we now the right entry is
601                 * in the TLB, this should work with no memory accesses.
602                 * There could be new faults unrelated to the table walk like
603                 * permissions violations, so we'll need the return value as
604                 * well.
605                 */
606                bool delayedResponse;
607                Fault fault = walker->tlb->translate(req, tc, NULL, mode,
608                        delayedResponse, true);
609                assert(!delayedResponse);
610                // Let the CPU continue.
611                translation->finish(fault, req, tc, mode);
612            } else {
613                // There was a fault during the walk. Let the CPU know.
614                translation->finish(timingFault, req, tc, mode);
615            }
616            return true;
617        }
618    } else if (pkt->wasNacked()) {
619        DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n");
620        pkt->reinitNacked();
621        if (!walker->sendTiming(this, pkt)) {
622            inflight--;
623            retrying = true;
624            if (pkt->isWrite()) {
625                writes.push_back(pkt);
626            } else {
627                assert(!read);
628                read = pkt;
629            }
630        }
631    }
632    return false;
633}
634
635void
636Walker::WalkerState::sendPackets()
637{
638    //If we're already waiting for the port to become available, just return.
639    if (retrying)
640        return;
641
642    //Reads always have priority
643    if (read) {
644        PacketPtr pkt = read;
645        read = NULL;
646        inflight++;
647        if (!walker->sendTiming(this, pkt)) {
648            retrying = true;
649            read = pkt;
650            inflight--;
651            return;
652        }
653    }
654    //Send off as many of the writes as we can.
655    while (writes.size()) {
656        PacketPtr write = writes.back();
657        writes.pop_back();
658        inflight++;
659        if (!walker->sendTiming(this, write)) {
660            retrying = true;
661            writes.push_back(write);
662            inflight--;
663            return;
664        }
665    }
666}
667
668bool
669Walker::WalkerState::isRetrying()
670{
671    return retrying;
672}
673
674bool
675Walker::WalkerState::isTiming()
676{
677    return timing;
678}
679
680bool
681Walker::WalkerState::wasStarted()
682{
683    return started;
684}
685
686void
687Walker::WalkerState::retry()
688{
689    retrying = false;
690    sendPackets();
691}
692
693Fault
694Walker::WalkerState::pageFault(bool present)
695{
696    DPRINTF(PageTableWalker, "Raising page fault.\n");
697    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
698    if (mode == BaseTLB::Execute && !enableNX)
699        mode = BaseTLB::Read;
700    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
701}
702
703/* end namespace X86ISA */ }
704
705X86ISA::Walker *
706X86PagetableWalkerParams::create()
707{
708    return new X86ISA::Walker(this);
709}
710