pagetable_walker.cc revision 9165
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "base/trie.hh"
58#include "cpu/base.hh"
59#include "cpu/thread_context.hh"
60#include "debug/PageTableWalker.hh"
61#include "mem/packet_access.hh"
62#include "mem/request.hh"
63
64namespace X86ISA {
65
66// Unfortunately, the placement of the base field in a page table entry is
67// very erratic and would make a mess here. It might be moved here at some
68// point in the future.
69BitUnion64(PageTableEntry)
70    Bitfield<63> nx;
71    Bitfield<11, 9> avl;
72    Bitfield<8> g;
73    Bitfield<7> ps;
74    Bitfield<6> d;
75    Bitfield<5> a;
76    Bitfield<4> pcd;
77    Bitfield<3> pwt;
78    Bitfield<2> u;
79    Bitfield<1> w;
80    Bitfield<0> p;
81EndBitUnion(PageTableEntry)
82
83Fault
84Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
85              RequestPtr _req, BaseTLB::Mode _mode)
86{
87    // TODO: in timing mode, instead of blocking when there are other
88    // outstanding requests, see if this request can be coalesced with
89    // another one (i.e. either coalesce or start walk)
90    WalkerState * newState = new WalkerState(this, _translation, _req);
91    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
92    if (currStates.size()) {
93        assert(newState->isTiming());
94        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
95        currStates.push_back(newState);
96        return NoFault;
97    } else {
98        currStates.push_back(newState);
99        Fault fault = newState->startWalk();
100        if (!newState->isTiming()) {
101            currStates.pop_front();
102            delete newState;
103        }
104        return fault;
105    }
106}
107
108Fault
109Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
110              BaseTLB::Mode _mode)
111{
112    funcState.initState(_tc, _mode);
113    return funcState.startFunctional(addr, logBytes);
114}
115
116bool
117Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
118{
119    return walker->recvTimingResp(pkt);
120}
121
122bool
123Walker::recvTimingResp(PacketPtr pkt)
124{
125    WalkerSenderState * senderState =
126        dynamic_cast<WalkerSenderState *>(pkt->senderState);
127    pkt->senderState = senderState->saved;
128    WalkerState * senderWalk = senderState->senderWalk;
129    bool walkComplete = senderWalk->recvPacket(pkt);
130    delete senderState;
131    if (walkComplete) {
132        std::list<WalkerState *>::iterator iter;
133        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
134            WalkerState * walkerState = *(iter);
135            if (walkerState == senderWalk) {
136                iter = currStates.erase(iter);
137                break;
138            }
139        }
140        delete senderWalk;
141        // Since we block requests when another is outstanding, we
142        // need to check if there is a waiting request to be serviced
143        if (currStates.size()) {
144            WalkerState * newState = currStates.front();
145            if (!newState->wasStarted())
146                newState->startWalk();
147        }
148    }
149    return true;
150}
151
152void
153Walker::WalkerPort::recvRetry()
154{
155    walker->recvRetry();
156}
157
158void
159Walker::recvRetry()
160{
161    std::list<WalkerState *>::iterator iter;
162    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
163        WalkerState * walkerState = *(iter);
164        if (walkerState->isRetrying()) {
165            walkerState->retry();
166        }
167    }
168}
169
170bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
171{
172    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
173    return port.sendTimingReq(pkt);
174}
175
176MasterPort &
177Walker::getMasterPort(const std::string &if_name, int idx)
178{
179    if (if_name == "port")
180        return port;
181    else
182        return MemObject::getMasterPort(if_name, idx);
183}
184
185void
186Walker::WalkerState::initState(ThreadContext * _tc,
187        BaseTLB::Mode _mode, bool _isTiming)
188{
189    assert(state == Ready);
190    started = false;
191    tc = _tc;
192    mode = _mode;
193    timing = _isTiming;
194}
195
196Fault
197Walker::WalkerState::startWalk()
198{
199    Fault fault = NoFault;
200    assert(started == false);
201    started = true;
202    setupWalk(req->getVaddr());
203    if (timing) {
204        nextState = state;
205        state = Waiting;
206        timingFault = NoFault;
207        sendPackets();
208    } else {
209        do {
210            walker->port.sendAtomic(read);
211            PacketPtr write = NULL;
212            fault = stepWalk(write);
213            assert(fault == NoFault || read == NULL);
214            state = nextState;
215            nextState = Ready;
216            if (write)
217                walker->port.sendAtomic(write);
218        } while(read);
219        state = Ready;
220        nextState = Waiting;
221    }
222    return fault;
223}
224
225Fault
226Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
227{
228    Fault fault = NoFault;
229    assert(started == false);
230    started = true;
231    setupWalk(addr);
232
233    do {
234        walker->port.sendFunctional(read);
235        // On a functional access (page table lookup), writes should
236        // not happen so this pointer is ignored after stepWalk
237        PacketPtr write = NULL;
238        fault = stepWalk(write);
239        assert(fault == NoFault || read == NULL);
240        state = nextState;
241        nextState = Ready;
242    } while(read);
243    logBytes = entry.logBytes;
244    addr = entry.paddr;
245
246    return fault;
247}
248
249Fault
250Walker::WalkerState::stepWalk(PacketPtr &write)
251{
252    assert(state != Ready && state != Waiting);
253    Fault fault = NoFault;
254    write = NULL;
255    PageTableEntry pte;
256    if (dataSize == 8)
257        pte = read->get<uint64_t>();
258    else
259        pte = read->get<uint32_t>();
260    VAddr vaddr = entry.vaddr;
261    bool uncacheable = pte.pcd;
262    Addr nextRead = 0;
263    bool doWrite = false;
264    bool doTLBInsert = false;
265    bool doEndWalk = false;
266    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
267    switch(state) {
268      case LongPML4:
269        DPRINTF(PageTableWalker,
270                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
271        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
272        doWrite = !pte.a;
273        pte.a = 1;
274        entry.writable = pte.w;
275        entry.user = pte.u;
276        if (badNX || !pte.p) {
277            doEndWalk = true;
278            fault = pageFault(pte.p);
279            break;
280        }
281        entry.noExec = pte.nx;
282        nextState = LongPDP;
283        break;
284      case LongPDP:
285        DPRINTF(PageTableWalker,
286                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
287        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
288        doWrite = !pte.a;
289        pte.a = 1;
290        entry.writable = entry.writable && pte.w;
291        entry.user = entry.user && pte.u;
292        if (badNX || !pte.p) {
293            doEndWalk = true;
294            fault = pageFault(pte.p);
295            break;
296        }
297        nextState = LongPD;
298        break;
299      case LongPD:
300        DPRINTF(PageTableWalker,
301                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
302        doWrite = !pte.a;
303        pte.a = 1;
304        entry.writable = entry.writable && pte.w;
305        entry.user = entry.user && pte.u;
306        if (badNX || !pte.p) {
307            doEndWalk = true;
308            fault = pageFault(pte.p);
309            break;
310        }
311        if (!pte.ps) {
312            // 4 KB page
313            entry.logBytes = 12;
314            nextRead =
315                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
316            nextState = LongPTE;
317            break;
318        } else {
319            // 2 MB page
320            entry.logBytes = 21;
321            entry.paddr = (uint64_t)pte & (mask(31) << 21);
322            entry.uncacheable = uncacheable;
323            entry.global = pte.g;
324            entry.patBit = bits(pte, 12);
325            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
326            doTLBInsert = true;
327            doEndWalk = true;
328            break;
329        }
330      case LongPTE:
331        DPRINTF(PageTableWalker,
332                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
333        doWrite = !pte.a;
334        pte.a = 1;
335        entry.writable = entry.writable && pte.w;
336        entry.user = entry.user && pte.u;
337        if (badNX || !pte.p) {
338            doEndWalk = true;
339            fault = pageFault(pte.p);
340            break;
341        }
342        entry.paddr = (uint64_t)pte & (mask(40) << 12);
343        entry.uncacheable = uncacheable;
344        entry.global = pte.g;
345        entry.patBit = bits(pte, 12);
346        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
347        doTLBInsert = true;
348        doEndWalk = true;
349        break;
350      case PAEPDP:
351        DPRINTF(PageTableWalker,
352                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
353        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
354        if (!pte.p) {
355            doEndWalk = true;
356            fault = pageFault(pte.p);
357            break;
358        }
359        nextState = PAEPD;
360        break;
361      case PAEPD:
362        DPRINTF(PageTableWalker,
363                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
364        doWrite = !pte.a;
365        pte.a = 1;
366        entry.writable = pte.w;
367        entry.user = pte.u;
368        if (badNX || !pte.p) {
369            doEndWalk = true;
370            fault = pageFault(pte.p);
371            break;
372        }
373        if (!pte.ps) {
374            // 4 KB page
375            entry.logBytes = 12;
376            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
377            nextState = PAEPTE;
378            break;
379        } else {
380            // 2 MB page
381            entry.logBytes = 21;
382            entry.paddr = (uint64_t)pte & (mask(31) << 21);
383            entry.uncacheable = uncacheable;
384            entry.global = pte.g;
385            entry.patBit = bits(pte, 12);
386            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
387            doTLBInsert = true;
388            doEndWalk = true;
389            break;
390        }
391      case PAEPTE:
392        DPRINTF(PageTableWalker,
393                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
394        doWrite = !pte.a;
395        pte.a = 1;
396        entry.writable = entry.writable && pte.w;
397        entry.user = entry.user && pte.u;
398        if (badNX || !pte.p) {
399            doEndWalk = true;
400            fault = pageFault(pte.p);
401            break;
402        }
403        entry.paddr = (uint64_t)pte & (mask(40) << 12);
404        entry.uncacheable = uncacheable;
405        entry.global = pte.g;
406        entry.patBit = bits(pte, 7);
407        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
408        doTLBInsert = true;
409        doEndWalk = true;
410        break;
411      case PSEPD:
412        DPRINTF(PageTableWalker,
413                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
414        doWrite = !pte.a;
415        pte.a = 1;
416        entry.writable = pte.w;
417        entry.user = pte.u;
418        if (!pte.p) {
419            doEndWalk = true;
420            fault = pageFault(pte.p);
421            break;
422        }
423        if (!pte.ps) {
424            // 4 KB page
425            entry.logBytes = 12;
426            nextRead =
427                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
428            nextState = PTE;
429            break;
430        } else {
431            // 4 MB page
432            entry.logBytes = 21;
433            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
434            entry.uncacheable = uncacheable;
435            entry.global = pte.g;
436            entry.patBit = bits(pte, 12);
437            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
438            doTLBInsert = true;
439            doEndWalk = true;
440            break;
441        }
442      case PD:
443        DPRINTF(PageTableWalker,
444                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
445        doWrite = !pte.a;
446        pte.a = 1;
447        entry.writable = pte.w;
448        entry.user = pte.u;
449        if (!pte.p) {
450            doEndWalk = true;
451            fault = pageFault(pte.p);
452            break;
453        }
454        // 4 KB page
455        entry.logBytes = 12;
456        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
457        nextState = PTE;
458        break;
459      case PTE:
460        DPRINTF(PageTableWalker,
461                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
462        doWrite = !pte.a;
463        pte.a = 1;
464        entry.writable = pte.w;
465        entry.user = pte.u;
466        if (!pte.p) {
467            doEndWalk = true;
468            fault = pageFault(pte.p);
469            break;
470        }
471        entry.paddr = (uint64_t)pte & (mask(20) << 12);
472        entry.uncacheable = uncacheable;
473        entry.global = pte.g;
474        entry.patBit = bits(pte, 7);
475        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
476        doTLBInsert = true;
477        doEndWalk = true;
478        break;
479      default:
480        panic("Unknown page table walker state %d!\n");
481    }
482    if (doEndWalk) {
483        if (doTLBInsert)
484            if (!functional)
485                walker->tlb->insert(entry.vaddr, entry);
486        endWalk();
487    } else {
488        PacketPtr oldRead = read;
489        //If we didn't return, we're setting up another read.
490        Request::Flags flags = oldRead->req->getFlags();
491        flags.set(Request::UNCACHEABLE, uncacheable);
492        RequestPtr request =
493            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
494        read = new Packet(request, MemCmd::ReadReq);
495        read->allocate();
496        // If we need to write, adjust the read packet to write the modified
497        // value back to memory.
498        if (doWrite) {
499            write = oldRead;
500            write->set<uint64_t>(pte);
501            write->cmd = MemCmd::WriteReq;
502            write->clearDest();
503        } else {
504            write = NULL;
505            delete oldRead->req;
506            delete oldRead;
507        }
508    }
509    return fault;
510}
511
512void
513Walker::WalkerState::endWalk()
514{
515    nextState = Ready;
516    delete read->req;
517    delete read;
518    read = NULL;
519}
520
521void
522Walker::WalkerState::setupWalk(Addr vaddr)
523{
524    VAddr addr = vaddr;
525    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
526    // Check if we're in long mode or not
527    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
528    dataSize = 8;
529    Addr topAddr;
530    if (efer.lma) {
531        // Do long mode.
532        state = LongPML4;
533        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
534        enableNX = efer.nxe;
535    } else {
536        // We're in some flavor of legacy mode.
537        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
538        if (cr4.pae) {
539            // Do legacy PAE.
540            state = PAEPDP;
541            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
542            enableNX = efer.nxe;
543        } else {
544            dataSize = 4;
545            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
546            if (cr4.pse) {
547                // Do legacy PSE.
548                state = PSEPD;
549            } else {
550                // Do legacy non PSE.
551                state = PD;
552            }
553            enableNX = false;
554        }
555    }
556
557    nextState = Ready;
558    entry.vaddr = vaddr;
559
560    Request::Flags flags = Request::PHYSICAL;
561    if (cr3.pcd)
562        flags.set(Request::UNCACHEABLE);
563    RequestPtr request = new Request(topAddr, dataSize, flags,
564                                     walker->masterId);
565    read = new Packet(request, MemCmd::ReadReq);
566    read->allocate();
567}
568
569bool
570Walker::WalkerState::recvPacket(PacketPtr pkt)
571{
572    assert(pkt->isResponse());
573    assert(inflight);
574    assert(state == Waiting);
575    assert(!read);
576    inflight--;
577    if (pkt->isRead()) {
578        state = nextState;
579        nextState = Ready;
580        PacketPtr write = NULL;
581        read = pkt;
582        timingFault = stepWalk(write);
583        state = Waiting;
584        assert(timingFault == NoFault || read == NULL);
585        if (write) {
586            writes.push_back(write);
587        }
588        sendPackets();
589    } else {
590        sendPackets();
591    }
592    if (inflight == 0 && read == NULL && writes.size() == 0) {
593        state = Ready;
594        nextState = Waiting;
595        if (timingFault == NoFault) {
596            /*
597             * Finish the translation. Now that we now the right entry is
598             * in the TLB, this should work with no memory accesses.
599             * There could be new faults unrelated to the table walk like
600             * permissions violations, so we'll need the return value as
601             * well.
602             */
603            bool delayedResponse;
604            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
605                                                 delayedResponse, true);
606            assert(!delayedResponse);
607            // Let the CPU continue.
608            translation->finish(fault, req, tc, mode);
609        } else {
610            // There was a fault during the walk. Let the CPU know.
611            translation->finish(timingFault, req, tc, mode);
612        }
613        return true;
614    }
615
616    return false;
617}
618
619void
620Walker::WalkerState::sendPackets()
621{
622    //If we're already waiting for the port to become available, just return.
623    if (retrying)
624        return;
625
626    //Reads always have priority
627    if (read) {
628        PacketPtr pkt = read;
629        read = NULL;
630        inflight++;
631        if (!walker->sendTiming(this, pkt)) {
632            retrying = true;
633            read = pkt;
634            inflight--;
635            return;
636        }
637    }
638    //Send off as many of the writes as we can.
639    while (writes.size()) {
640        PacketPtr write = writes.back();
641        writes.pop_back();
642        inflight++;
643        if (!walker->sendTiming(this, write)) {
644            retrying = true;
645            writes.push_back(write);
646            inflight--;
647            return;
648        }
649    }
650}
651
652bool
653Walker::WalkerState::isRetrying()
654{
655    return retrying;
656}
657
658bool
659Walker::WalkerState::isTiming()
660{
661    return timing;
662}
663
664bool
665Walker::WalkerState::wasStarted()
666{
667    return started;
668}
669
670void
671Walker::WalkerState::retry()
672{
673    retrying = false;
674    sendPackets();
675}
676
677Fault
678Walker::WalkerState::pageFault(bool present)
679{
680    DPRINTF(PageTableWalker, "Raising page fault.\n");
681    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
682    if (mode == BaseTLB::Execute && !enableNX)
683        mode = BaseTLB::Read;
684    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
685}
686
687/* end namespace X86ISA */ }
688
689X86ISA::Walker *
690X86PagetableWalkerParams::create()
691{
692    return new X86ISA::Walker(this);
693}
694