pagetable_walker.cc revision 8948:e95ee70f876c
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "cpu/base.hh"
58#include "cpu/thread_context.hh"
59#include "debug/PageTableWalker.hh"
60#include "mem/packet_access.hh"
61#include "mem/request.hh"
62#include "sim/system.hh"
63
64namespace X86ISA {
65
66// Unfortunately, the placement of the base field in a page table entry is
67// very erratic and would make a mess here. It might be moved here at some
68// point in the future.
69BitUnion64(PageTableEntry)
70    Bitfield<63> nx;
71    Bitfield<11, 9> avl;
72    Bitfield<8> g;
73    Bitfield<7> ps;
74    Bitfield<6> d;
75    Bitfield<5> a;
76    Bitfield<4> pcd;
77    Bitfield<3> pwt;
78    Bitfield<2> u;
79    Bitfield<1> w;
80    Bitfield<0> p;
81EndBitUnion(PageTableEntry)
82
83Fault
84Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
85              RequestPtr _req, BaseTLB::Mode _mode)
86{
87    // TODO: in timing mode, instead of blocking when there are other
88    // outstanding requests, see if this request can be coalesced with
89    // another one (i.e. either coalesce or start walk)
90    WalkerState * newState = new WalkerState(this, _translation, _req);
91    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
92    if (currStates.size()) {
93        assert(newState->isTiming());
94        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
95        currStates.push_back(newState);
96        return NoFault;
97    } else {
98        currStates.push_back(newState);
99        Fault fault = newState->startWalk();
100        if (!newState->isTiming()) {
101            currStates.pop_front();
102            delete newState;
103        }
104        return fault;
105    }
106}
107
108Fault
109Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize,
110              BaseTLB::Mode _mode)
111{
112    funcState.initState(_tc, _mode);
113    return funcState.startFunctional(addr, pageSize);
114}
115
116bool
117Walker::WalkerPort::recvTiming(PacketPtr pkt)
118{
119    return walker->recvTiming(pkt);
120}
121
122bool
123Walker::recvTiming(PacketPtr pkt)
124{
125    assert(pkt->isResponse());
126    WalkerSenderState * senderState =
127        dynamic_cast<WalkerSenderState *>(pkt->senderState);
128    pkt->senderState = senderState->saved;
129    WalkerState * senderWalk = senderState->senderWalk;
130    bool walkComplete = senderWalk->recvPacket(pkt);
131    delete senderState;
132    if (walkComplete) {
133        std::list<WalkerState *>::iterator iter;
134        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
135            WalkerState * walkerState = *(iter);
136            if (walkerState == senderWalk) {
137                iter = currStates.erase(iter);
138                break;
139            }
140        }
141        delete senderWalk;
142        // Since we block requests when another is outstanding, we
143        // need to check if there is a waiting request to be serviced
144        if (currStates.size()) {
145            WalkerState * newState = currStates.front();
146            if (!newState->wasStarted())
147                newState->startWalk();
148        }
149    }
150    return true;
151}
152
153void
154Walker::WalkerPort::recvRetry()
155{
156    walker->recvRetry();
157}
158
159void
160Walker::recvRetry()
161{
162    std::list<WalkerState *>::iterator iter;
163    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
164        WalkerState * walkerState = *(iter);
165        if (walkerState->isRetrying()) {
166            walkerState->retry();
167        }
168    }
169}
170
171bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
172{
173    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
174    return port.sendTiming(pkt);
175}
176
177MasterPort &
178Walker::getMasterPort(const std::string &if_name, int idx)
179{
180    if (if_name == "port")
181        return port;
182    else
183        return MemObject::getMasterPort(if_name, idx);
184}
185
186void
187Walker::WalkerState::initState(ThreadContext * _tc,
188        BaseTLB::Mode _mode, bool _isTiming)
189{
190    assert(state == Ready);
191    started = false;
192    tc = _tc;
193    mode = _mode;
194    timing = _isTiming;
195}
196
197Fault
198Walker::WalkerState::startWalk()
199{
200    Fault fault = NoFault;
201    assert(started == false);
202    started = true;
203    setupWalk(req->getVaddr());
204    if (timing) {
205        nextState = state;
206        state = Waiting;
207        timingFault = NoFault;
208        sendPackets();
209    } else {
210        do {
211            walker->port.sendAtomic(read);
212            PacketPtr write = NULL;
213            fault = stepWalk(write);
214            assert(fault == NoFault || read == NULL);
215            state = nextState;
216            nextState = Ready;
217            if (write)
218                walker->port.sendAtomic(write);
219        } while(read);
220        state = Ready;
221        nextState = Waiting;
222    }
223    return fault;
224}
225
226Fault
227Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize)
228{
229    Fault fault = NoFault;
230    assert(started == false);
231    started = true;
232    setupWalk(addr);
233
234    do {
235        walker->port.sendFunctional(read);
236        // On a functional access (page table lookup), writes should
237        // not happen so this pointer is ignored after stepWalk
238        PacketPtr write = NULL;
239        fault = stepWalk(write);
240        assert(fault == NoFault || read == NULL);
241        state = nextState;
242        nextState = Ready;
243    } while(read);
244    pageSize = entry.size;
245    addr = entry.paddr;
246
247    return fault;
248}
249
250Fault
251Walker::WalkerState::stepWalk(PacketPtr &write)
252{
253    assert(state != Ready && state != Waiting);
254    Fault fault = NoFault;
255    write = NULL;
256    PageTableEntry pte;
257    if (dataSize == 8)
258        pte = read->get<uint64_t>();
259    else
260        pte = read->get<uint32_t>();
261    VAddr vaddr = entry.vaddr;
262    bool uncacheable = pte.pcd;
263    Addr nextRead = 0;
264    bool doWrite = false;
265    bool doTLBInsert = false;
266    bool doEndWalk = false;
267    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
268    switch(state) {
269      case LongPML4:
270        DPRINTF(PageTableWalker,
271                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
272        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
273        doWrite = !pte.a;
274        pte.a = 1;
275        entry.writable = pte.w;
276        entry.user = pte.u;
277        if (badNX || !pte.p) {
278            doEndWalk = true;
279            fault = pageFault(pte.p);
280            break;
281        }
282        entry.noExec = pte.nx;
283        nextState = LongPDP;
284        break;
285      case LongPDP:
286        DPRINTF(PageTableWalker,
287                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
288        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
289        doWrite = !pte.a;
290        pte.a = 1;
291        entry.writable = entry.writable && pte.w;
292        entry.user = entry.user && pte.u;
293        if (badNX || !pte.p) {
294            doEndWalk = true;
295            fault = pageFault(pte.p);
296            break;
297        }
298        nextState = LongPD;
299        break;
300      case LongPD:
301        DPRINTF(PageTableWalker,
302                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
303        doWrite = !pte.a;
304        pte.a = 1;
305        entry.writable = entry.writable && pte.w;
306        entry.user = entry.user && pte.u;
307        if (badNX || !pte.p) {
308            doEndWalk = true;
309            fault = pageFault(pte.p);
310            break;
311        }
312        if (!pte.ps) {
313            // 4 KB page
314            entry.size = 4 * (1 << 10);
315            nextRead =
316                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
317            nextState = LongPTE;
318            break;
319        } else {
320            // 2 MB page
321            entry.size = 2 * (1 << 20);
322            entry.paddr = (uint64_t)pte & (mask(31) << 21);
323            entry.uncacheable = uncacheable;
324            entry.global = pte.g;
325            entry.patBit = bits(pte, 12);
326            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
327            doTLBInsert = true;
328            doEndWalk = true;
329            break;
330        }
331      case LongPTE:
332        DPRINTF(PageTableWalker,
333                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
334        doWrite = !pte.a;
335        pte.a = 1;
336        entry.writable = entry.writable && pte.w;
337        entry.user = entry.user && pte.u;
338        if (badNX || !pte.p) {
339            doEndWalk = true;
340            fault = pageFault(pte.p);
341            break;
342        }
343        entry.paddr = (uint64_t)pte & (mask(40) << 12);
344        entry.uncacheable = uncacheable;
345        entry.global = pte.g;
346        entry.patBit = bits(pte, 12);
347        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
348        doTLBInsert = true;
349        doEndWalk = true;
350        break;
351      case PAEPDP:
352        DPRINTF(PageTableWalker,
353                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
354        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
355        if (!pte.p) {
356            doEndWalk = true;
357            fault = pageFault(pte.p);
358            break;
359        }
360        nextState = PAEPD;
361        break;
362      case PAEPD:
363        DPRINTF(PageTableWalker,
364                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
365        doWrite = !pte.a;
366        pte.a = 1;
367        entry.writable = pte.w;
368        entry.user = pte.u;
369        if (badNX || !pte.p) {
370            doEndWalk = true;
371            fault = pageFault(pte.p);
372            break;
373        }
374        if (!pte.ps) {
375            // 4 KB page
376            entry.size = 4 * (1 << 10);
377            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
378            nextState = PAEPTE;
379            break;
380        } else {
381            // 2 MB page
382            entry.size = 2 * (1 << 20);
383            entry.paddr = (uint64_t)pte & (mask(31) << 21);
384            entry.uncacheable = uncacheable;
385            entry.global = pte.g;
386            entry.patBit = bits(pte, 12);
387            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
388            doTLBInsert = true;
389            doEndWalk = true;
390            break;
391        }
392      case PAEPTE:
393        DPRINTF(PageTableWalker,
394                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
395        doWrite = !pte.a;
396        pte.a = 1;
397        entry.writable = entry.writable && pte.w;
398        entry.user = entry.user && pte.u;
399        if (badNX || !pte.p) {
400            doEndWalk = true;
401            fault = pageFault(pte.p);
402            break;
403        }
404        entry.paddr = (uint64_t)pte & (mask(40) << 12);
405        entry.uncacheable = uncacheable;
406        entry.global = pte.g;
407        entry.patBit = bits(pte, 7);
408        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
409        doTLBInsert = true;
410        doEndWalk = true;
411        break;
412      case PSEPD:
413        DPRINTF(PageTableWalker,
414                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
415        doWrite = !pte.a;
416        pte.a = 1;
417        entry.writable = pte.w;
418        entry.user = pte.u;
419        if (!pte.p) {
420            doEndWalk = true;
421            fault = pageFault(pte.p);
422            break;
423        }
424        if (!pte.ps) {
425            // 4 KB page
426            entry.size = 4 * (1 << 10);
427            nextRead =
428                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
429            nextState = PTE;
430            break;
431        } else {
432            // 4 MB page
433            entry.size = 4 * (1 << 20);
434            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
435            entry.uncacheable = uncacheable;
436            entry.global = pte.g;
437            entry.patBit = bits(pte, 12);
438            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
439            doTLBInsert = true;
440            doEndWalk = true;
441            break;
442        }
443      case PD:
444        DPRINTF(PageTableWalker,
445                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
446        doWrite = !pte.a;
447        pte.a = 1;
448        entry.writable = pte.w;
449        entry.user = pte.u;
450        if (!pte.p) {
451            doEndWalk = true;
452            fault = pageFault(pte.p);
453            break;
454        }
455        // 4 KB page
456        entry.size = 4 * (1 << 10);
457        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
458        nextState = PTE;
459        break;
460      case PTE:
461        DPRINTF(PageTableWalker,
462                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
463        doWrite = !pte.a;
464        pte.a = 1;
465        entry.writable = pte.w;
466        entry.user = pte.u;
467        if (!pte.p) {
468            doEndWalk = true;
469            fault = pageFault(pte.p);
470            break;
471        }
472        entry.paddr = (uint64_t)pte & (mask(20) << 12);
473        entry.uncacheable = uncacheable;
474        entry.global = pte.g;
475        entry.patBit = bits(pte, 7);
476        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
477        doTLBInsert = true;
478        doEndWalk = true;
479        break;
480      default:
481        panic("Unknown page table walker state %d!\n");
482    }
483    if (doEndWalk) {
484        if (doTLBInsert)
485            if (!functional)
486                walker->tlb->insert(entry.vaddr, entry);
487        endWalk();
488    } else {
489        PacketPtr oldRead = read;
490        //If we didn't return, we're setting up another read.
491        Request::Flags flags = oldRead->req->getFlags();
492        flags.set(Request::UNCACHEABLE, uncacheable);
493        RequestPtr request =
494            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
495        read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
496        read->allocate();
497        // If we need to write, adjust the read packet to write the modified
498        // value back to memory.
499        if (doWrite) {
500            write = oldRead;
501            write->set<uint64_t>(pte);
502            write->cmd = MemCmd::WriteReq;
503            write->setDest(Packet::Broadcast);
504        } else {
505            write = NULL;
506            delete oldRead->req;
507            delete oldRead;
508        }
509    }
510    return fault;
511}
512
513void
514Walker::WalkerState::endWalk()
515{
516    nextState = Ready;
517    delete read->req;
518    delete read;
519    read = NULL;
520}
521
522void
523Walker::WalkerState::setupWalk(Addr vaddr)
524{
525    VAddr addr = vaddr;
526    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
527    // Check if we're in long mode or not
528    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
529    dataSize = 8;
530    Addr topAddr;
531    if (efer.lma) {
532        // Do long mode.
533        state = LongPML4;
534        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
535        enableNX = efer.nxe;
536    } else {
537        // We're in some flavor of legacy mode.
538        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
539        if (cr4.pae) {
540            // Do legacy PAE.
541            state = PAEPDP;
542            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
543            enableNX = efer.nxe;
544        } else {
545            dataSize = 4;
546            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
547            if (cr4.pse) {
548                // Do legacy PSE.
549                state = PSEPD;
550            } else {
551                // Do legacy non PSE.
552                state = PD;
553            }
554            enableNX = false;
555        }
556    }
557
558    nextState = Ready;
559    entry.vaddr = vaddr;
560
561    Request::Flags flags = Request::PHYSICAL;
562    if (cr3.pcd)
563        flags.set(Request::UNCACHEABLE);
564    RequestPtr request = new Request(topAddr, dataSize, flags, walker->masterId);
565    read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
566    read->allocate();
567}
568
569bool
570Walker::WalkerState::recvPacket(PacketPtr pkt)
571{
572    assert(pkt->isResponse());
573    if (!pkt->wasNacked()) {
574        assert(inflight);
575        assert(state == Waiting);
576        assert(!read);
577        inflight--;
578        if (pkt->isRead()) {
579            state = nextState;
580            nextState = Ready;
581            PacketPtr write = NULL;
582            read = pkt;
583            timingFault = stepWalk(write);
584            state = Waiting;
585            assert(timingFault == NoFault || read == NULL);
586            if (write) {
587                writes.push_back(write);
588            }
589            sendPackets();
590        } else {
591            sendPackets();
592        }
593        if (inflight == 0 && read == NULL && writes.size() == 0) {
594            state = Ready;
595            nextState = Waiting;
596            if (timingFault == NoFault) {
597                /*
598                 * Finish the translation. Now that we now the right entry is
599                 * in the TLB, this should work with no memory accesses.
600                 * There could be new faults unrelated to the table walk like
601                 * permissions violations, so we'll need the return value as
602                 * well.
603                 */
604                bool delayedResponse;
605                Fault fault = walker->tlb->translate(req, tc, NULL, mode,
606                        delayedResponse, true);
607                assert(!delayedResponse);
608                // Let the CPU continue.
609                translation->finish(fault, req, tc, mode);
610            } else {
611                // There was a fault during the walk. Let the CPU know.
612                translation->finish(timingFault, req, tc, mode);
613            }
614            return true;
615        }
616    } else {
617        DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n");
618        pkt->reinitNacked();
619        if (!walker->sendTiming(this, pkt)) {
620            inflight--;
621            retrying = true;
622            if (pkt->isWrite()) {
623                writes.push_back(pkt);
624            } else {
625                assert(!read);
626                read = pkt;
627            }
628        }
629    }
630    return false;
631}
632
633void
634Walker::WalkerState::sendPackets()
635{
636    //If we're already waiting for the port to become available, just return.
637    if (retrying)
638        return;
639
640    //Reads always have priority
641    if (read) {
642        PacketPtr pkt = read;
643        read = NULL;
644        inflight++;
645        if (!walker->sendTiming(this, pkt)) {
646            retrying = true;
647            read = pkt;
648            inflight--;
649            return;
650        }
651    }
652    //Send off as many of the writes as we can.
653    while (writes.size()) {
654        PacketPtr write = writes.back();
655        writes.pop_back();
656        inflight++;
657        if (!walker->sendTiming(this, write)) {
658            retrying = true;
659            writes.push_back(write);
660            inflight--;
661            return;
662        }
663    }
664}
665
666bool
667Walker::WalkerState::isRetrying()
668{
669    return retrying;
670}
671
672bool
673Walker::WalkerState::isTiming()
674{
675    return timing;
676}
677
678bool
679Walker::WalkerState::wasStarted()
680{
681    return started;
682}
683
684void
685Walker::WalkerState::retry()
686{
687    retrying = false;
688    sendPackets();
689}
690
691Fault
692Walker::WalkerState::pageFault(bool present)
693{
694    DPRINTF(PageTableWalker, "Raising page fault.\n");
695    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
696    if (mode == BaseTLB::Execute && !enableNX)
697        mode = BaseTLB::Read;
698    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
699}
700
701/* end namespace X86ISA */ }
702
703X86ISA::Walker *
704X86PagetableWalkerParams::create()
705{
706    return new X86ISA::Walker(this);
707}
708