pagetable_walker.cc revision 9542:683991c46ac8
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "base/trie.hh"
58#include "cpu/base.hh"
59#include "cpu/thread_context.hh"
60#include "debug/PageTableWalker.hh"
61#include "mem/packet_access.hh"
62#include "mem/request.hh"
63
64namespace X86ISA {
65
66// Unfortunately, the placement of the base field in a page table entry is
67// very erratic and would make a mess here. It might be moved here at some
68// point in the future.
69BitUnion64(PageTableEntry)
70    Bitfield<63> nx;
71    Bitfield<11, 9> avl;
72    Bitfield<8> g;
73    Bitfield<7> ps;
74    Bitfield<6> d;
75    Bitfield<5> a;
76    Bitfield<4> pcd;
77    Bitfield<3> pwt;
78    Bitfield<2> u;
79    Bitfield<1> w;
80    Bitfield<0> p;
81EndBitUnion(PageTableEntry)
82
83Fault
84Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
85              RequestPtr _req, BaseTLB::Mode _mode)
86{
87    // TODO: in timing mode, instead of blocking when there are other
88    // outstanding requests, see if this request can be coalesced with
89    // another one (i.e. either coalesce or start walk)
90    WalkerState * newState = new WalkerState(this, _translation, _req);
91    newState->initState(_tc, _mode, sys->isTimingMode());
92    if (currStates.size()) {
93        assert(newState->isTiming());
94        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
95        currStates.push_back(newState);
96        return NoFault;
97    } else {
98        currStates.push_back(newState);
99        Fault fault = newState->startWalk();
100        if (!newState->isTiming()) {
101            currStates.pop_front();
102            delete newState;
103        }
104        return fault;
105    }
106}
107
108Fault
109Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
110              BaseTLB::Mode _mode)
111{
112    funcState.initState(_tc, _mode);
113    return funcState.startFunctional(addr, logBytes);
114}
115
116bool
117Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
118{
119    return walker->recvTimingResp(pkt);
120}
121
122bool
123Walker::recvTimingResp(PacketPtr pkt)
124{
125    WalkerSenderState * senderState =
126        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
127    WalkerState * senderWalk = senderState->senderWalk;
128    bool walkComplete = senderWalk->recvPacket(pkt);
129    delete senderState;
130    if (walkComplete) {
131        std::list<WalkerState *>::iterator iter;
132        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
133            WalkerState * walkerState = *(iter);
134            if (walkerState == senderWalk) {
135                iter = currStates.erase(iter);
136                break;
137            }
138        }
139        delete senderWalk;
140        // Since we block requests when another is outstanding, we
141        // need to check if there is a waiting request to be serviced
142        if (currStates.size()) {
143            WalkerState * newState = currStates.front();
144            if (!newState->wasStarted())
145                newState->startWalk();
146        }
147    }
148    return true;
149}
150
151void
152Walker::WalkerPort::recvRetry()
153{
154    walker->recvRetry();
155}
156
157void
158Walker::recvRetry()
159{
160    std::list<WalkerState *>::iterator iter;
161    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
162        WalkerState * walkerState = *(iter);
163        if (walkerState->isRetrying()) {
164            walkerState->retry();
165        }
166    }
167}
168
169bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
170{
171    pkt->pushSenderState(new WalkerSenderState(sendingState));
172    return port.sendTimingReq(pkt);
173}
174
175BaseMasterPort &
176Walker::getMasterPort(const std::string &if_name, PortID idx)
177{
178    if (if_name == "port")
179        return port;
180    else
181        return MemObject::getMasterPort(if_name, idx);
182}
183
184void
185Walker::WalkerState::initState(ThreadContext * _tc,
186        BaseTLB::Mode _mode, bool _isTiming)
187{
188    assert(state == Ready);
189    started = false;
190    tc = _tc;
191    mode = _mode;
192    timing = _isTiming;
193}
194
195Fault
196Walker::WalkerState::startWalk()
197{
198    Fault fault = NoFault;
199    assert(started == false);
200    started = true;
201    setupWalk(req->getVaddr());
202    if (timing) {
203        nextState = state;
204        state = Waiting;
205        timingFault = NoFault;
206        sendPackets();
207    } else {
208        do {
209            walker->port.sendAtomic(read);
210            PacketPtr write = NULL;
211            fault = stepWalk(write);
212            assert(fault == NoFault || read == NULL);
213            state = nextState;
214            nextState = Ready;
215            if (write)
216                walker->port.sendAtomic(write);
217        } while(read);
218        state = Ready;
219        nextState = Waiting;
220    }
221    return fault;
222}
223
224Fault
225Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
226{
227    Fault fault = NoFault;
228    assert(started == false);
229    started = true;
230    setupWalk(addr);
231
232    do {
233        walker->port.sendFunctional(read);
234        // On a functional access (page table lookup), writes should
235        // not happen so this pointer is ignored after stepWalk
236        PacketPtr write = NULL;
237        fault = stepWalk(write);
238        assert(fault == NoFault || read == NULL);
239        state = nextState;
240        nextState = Ready;
241    } while(read);
242    logBytes = entry.logBytes;
243    addr = entry.paddr;
244
245    return fault;
246}
247
248Fault
249Walker::WalkerState::stepWalk(PacketPtr &write)
250{
251    assert(state != Ready && state != Waiting);
252    Fault fault = NoFault;
253    write = NULL;
254    PageTableEntry pte;
255    if (dataSize == 8)
256        pte = read->get<uint64_t>();
257    else
258        pte = read->get<uint32_t>();
259    VAddr vaddr = entry.vaddr;
260    bool uncacheable = pte.pcd;
261    Addr nextRead = 0;
262    bool doWrite = false;
263    bool doTLBInsert = false;
264    bool doEndWalk = false;
265    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
266    switch(state) {
267      case LongPML4:
268        DPRINTF(PageTableWalker,
269                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
270        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
271        doWrite = !pte.a;
272        pte.a = 1;
273        entry.writable = pte.w;
274        entry.user = pte.u;
275        if (badNX || !pte.p) {
276            doEndWalk = true;
277            fault = pageFault(pte.p);
278            break;
279        }
280        entry.noExec = pte.nx;
281        nextState = LongPDP;
282        break;
283      case LongPDP:
284        DPRINTF(PageTableWalker,
285                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
286        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
287        doWrite = !pte.a;
288        pte.a = 1;
289        entry.writable = entry.writable && pte.w;
290        entry.user = entry.user && pte.u;
291        if (badNX || !pte.p) {
292            doEndWalk = true;
293            fault = pageFault(pte.p);
294            break;
295        }
296        nextState = LongPD;
297        break;
298      case LongPD:
299        DPRINTF(PageTableWalker,
300                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
301        doWrite = !pte.a;
302        pte.a = 1;
303        entry.writable = entry.writable && pte.w;
304        entry.user = entry.user && pte.u;
305        if (badNX || !pte.p) {
306            doEndWalk = true;
307            fault = pageFault(pte.p);
308            break;
309        }
310        if (!pte.ps) {
311            // 4 KB page
312            entry.logBytes = 12;
313            nextRead =
314                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
315            nextState = LongPTE;
316            break;
317        } else {
318            // 2 MB page
319            entry.logBytes = 21;
320            entry.paddr = (uint64_t)pte & (mask(31) << 21);
321            entry.uncacheable = uncacheable;
322            entry.global = pte.g;
323            entry.patBit = bits(pte, 12);
324            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
325            doTLBInsert = true;
326            doEndWalk = true;
327            break;
328        }
329      case LongPTE:
330        DPRINTF(PageTableWalker,
331                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
332        doWrite = !pte.a;
333        pte.a = 1;
334        entry.writable = entry.writable && pte.w;
335        entry.user = entry.user && pte.u;
336        if (badNX || !pte.p) {
337            doEndWalk = true;
338            fault = pageFault(pte.p);
339            break;
340        }
341        entry.paddr = (uint64_t)pte & (mask(40) << 12);
342        entry.uncacheable = uncacheable;
343        entry.global = pte.g;
344        entry.patBit = bits(pte, 12);
345        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
346        doTLBInsert = true;
347        doEndWalk = true;
348        break;
349      case PAEPDP:
350        DPRINTF(PageTableWalker,
351                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
352        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
353        if (!pte.p) {
354            doEndWalk = true;
355            fault = pageFault(pte.p);
356            break;
357        }
358        nextState = PAEPD;
359        break;
360      case PAEPD:
361        DPRINTF(PageTableWalker,
362                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
363        doWrite = !pte.a;
364        pte.a = 1;
365        entry.writable = pte.w;
366        entry.user = pte.u;
367        if (badNX || !pte.p) {
368            doEndWalk = true;
369            fault = pageFault(pte.p);
370            break;
371        }
372        if (!pte.ps) {
373            // 4 KB page
374            entry.logBytes = 12;
375            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
376            nextState = PAEPTE;
377            break;
378        } else {
379            // 2 MB page
380            entry.logBytes = 21;
381            entry.paddr = (uint64_t)pte & (mask(31) << 21);
382            entry.uncacheable = uncacheable;
383            entry.global = pte.g;
384            entry.patBit = bits(pte, 12);
385            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
386            doTLBInsert = true;
387            doEndWalk = true;
388            break;
389        }
390      case PAEPTE:
391        DPRINTF(PageTableWalker,
392                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
393        doWrite = !pte.a;
394        pte.a = 1;
395        entry.writable = entry.writable && pte.w;
396        entry.user = entry.user && pte.u;
397        if (badNX || !pte.p) {
398            doEndWalk = true;
399            fault = pageFault(pte.p);
400            break;
401        }
402        entry.paddr = (uint64_t)pte & (mask(40) << 12);
403        entry.uncacheable = uncacheable;
404        entry.global = pte.g;
405        entry.patBit = bits(pte, 7);
406        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
407        doTLBInsert = true;
408        doEndWalk = true;
409        break;
410      case PSEPD:
411        DPRINTF(PageTableWalker,
412                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
413        doWrite = !pte.a;
414        pte.a = 1;
415        entry.writable = pte.w;
416        entry.user = pte.u;
417        if (!pte.p) {
418            doEndWalk = true;
419            fault = pageFault(pte.p);
420            break;
421        }
422        if (!pte.ps) {
423            // 4 KB page
424            entry.logBytes = 12;
425            nextRead =
426                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
427            nextState = PTE;
428            break;
429        } else {
430            // 4 MB page
431            entry.logBytes = 21;
432            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
433            entry.uncacheable = uncacheable;
434            entry.global = pte.g;
435            entry.patBit = bits(pte, 12);
436            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
437            doTLBInsert = true;
438            doEndWalk = true;
439            break;
440        }
441      case PD:
442        DPRINTF(PageTableWalker,
443                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
444        doWrite = !pte.a;
445        pte.a = 1;
446        entry.writable = pte.w;
447        entry.user = pte.u;
448        if (!pte.p) {
449            doEndWalk = true;
450            fault = pageFault(pte.p);
451            break;
452        }
453        // 4 KB page
454        entry.logBytes = 12;
455        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
456        nextState = PTE;
457        break;
458      case PTE:
459        DPRINTF(PageTableWalker,
460                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
461        doWrite = !pte.a;
462        pte.a = 1;
463        entry.writable = pte.w;
464        entry.user = pte.u;
465        if (!pte.p) {
466            doEndWalk = true;
467            fault = pageFault(pte.p);
468            break;
469        }
470        entry.paddr = (uint64_t)pte & (mask(20) << 12);
471        entry.uncacheable = uncacheable;
472        entry.global = pte.g;
473        entry.patBit = bits(pte, 7);
474        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
475        doTLBInsert = true;
476        doEndWalk = true;
477        break;
478      default:
479        panic("Unknown page table walker state %d!\n");
480    }
481    if (doEndWalk) {
482        if (doTLBInsert)
483            if (!functional)
484                walker->tlb->insert(entry.vaddr, entry);
485        endWalk();
486    } else {
487        PacketPtr oldRead = read;
488        //If we didn't return, we're setting up another read.
489        Request::Flags flags = oldRead->req->getFlags();
490        flags.set(Request::UNCACHEABLE, uncacheable);
491        RequestPtr request =
492            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
493        read = new Packet(request, MemCmd::ReadReq);
494        read->allocate();
495        // If we need to write, adjust the read packet to write the modified
496        // value back to memory.
497        if (doWrite) {
498            write = oldRead;
499            write->set<uint64_t>(pte);
500            write->cmd = MemCmd::WriteReq;
501            write->clearDest();
502        } else {
503            write = NULL;
504            delete oldRead->req;
505            delete oldRead;
506        }
507    }
508    return fault;
509}
510
511void
512Walker::WalkerState::endWalk()
513{
514    nextState = Ready;
515    delete read->req;
516    delete read;
517    read = NULL;
518}
519
520void
521Walker::WalkerState::setupWalk(Addr vaddr)
522{
523    VAddr addr = vaddr;
524    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
525    // Check if we're in long mode or not
526    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
527    dataSize = 8;
528    Addr topAddr;
529    if (efer.lma) {
530        // Do long mode.
531        state = LongPML4;
532        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
533        enableNX = efer.nxe;
534    } else {
535        // We're in some flavor of legacy mode.
536        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
537        if (cr4.pae) {
538            // Do legacy PAE.
539            state = PAEPDP;
540            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
541            enableNX = efer.nxe;
542        } else {
543            dataSize = 4;
544            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
545            if (cr4.pse) {
546                // Do legacy PSE.
547                state = PSEPD;
548            } else {
549                // Do legacy non PSE.
550                state = PD;
551            }
552            enableNX = false;
553        }
554    }
555
556    nextState = Ready;
557    entry.vaddr = vaddr;
558
559    Request::Flags flags = Request::PHYSICAL;
560    if (cr3.pcd)
561        flags.set(Request::UNCACHEABLE);
562    RequestPtr request = new Request(topAddr, dataSize, flags,
563                                     walker->masterId);
564    read = new Packet(request, MemCmd::ReadReq);
565    read->allocate();
566}
567
568bool
569Walker::WalkerState::recvPacket(PacketPtr pkt)
570{
571    assert(pkt->isResponse());
572    assert(inflight);
573    assert(state == Waiting);
574    assert(!read);
575    inflight--;
576    if (pkt->isRead()) {
577        state = nextState;
578        nextState = Ready;
579        PacketPtr write = NULL;
580        read = pkt;
581        timingFault = stepWalk(write);
582        state = Waiting;
583        assert(timingFault == NoFault || read == NULL);
584        if (write) {
585            writes.push_back(write);
586        }
587        sendPackets();
588    } else {
589        sendPackets();
590    }
591    if (inflight == 0 && read == NULL && writes.size() == 0) {
592        state = Ready;
593        nextState = Waiting;
594        if (timingFault == NoFault) {
595            /*
596             * Finish the translation. Now that we now the right entry is
597             * in the TLB, this should work with no memory accesses.
598             * There could be new faults unrelated to the table walk like
599             * permissions violations, so we'll need the return value as
600             * well.
601             */
602            bool delayedResponse;
603            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
604                                                 delayedResponse, true);
605            assert(!delayedResponse);
606            // Let the CPU continue.
607            translation->finish(fault, req, tc, mode);
608        } else {
609            // There was a fault during the walk. Let the CPU know.
610            translation->finish(timingFault, req, tc, mode);
611        }
612        return true;
613    }
614
615    return false;
616}
617
618void
619Walker::WalkerState::sendPackets()
620{
621    //If we're already waiting for the port to become available, just return.
622    if (retrying)
623        return;
624
625    //Reads always have priority
626    if (read) {
627        PacketPtr pkt = read;
628        read = NULL;
629        inflight++;
630        if (!walker->sendTiming(this, pkt)) {
631            retrying = true;
632            read = pkt;
633            inflight--;
634            return;
635        }
636    }
637    //Send off as many of the writes as we can.
638    while (writes.size()) {
639        PacketPtr write = writes.back();
640        writes.pop_back();
641        inflight++;
642        if (!walker->sendTiming(this, write)) {
643            retrying = true;
644            writes.push_back(write);
645            inflight--;
646            return;
647        }
648    }
649}
650
651bool
652Walker::WalkerState::isRetrying()
653{
654    return retrying;
655}
656
657bool
658Walker::WalkerState::isTiming()
659{
660    return timing;
661}
662
663bool
664Walker::WalkerState::wasStarted()
665{
666    return started;
667}
668
669void
670Walker::WalkerState::retry()
671{
672    retrying = false;
673    sendPackets();
674}
675
676Fault
677Walker::WalkerState::pageFault(bool present)
678{
679    DPRINTF(PageTableWalker, "Raising page fault.\n");
680    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
681    if (mode == BaseTLB::Execute && !enableNX)
682        mode = BaseTLB::Read;
683    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
684}
685
686/* end namespace X86ISA */ }
687
688X86ISA::Walker *
689X86PagetableWalkerParams::create()
690{
691    return new X86ISA::Walker(this);
692}
693