pagetable_walker.cc revision 7912:a9f05ab40763
1/*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/x86/pagetable.hh"
41#include "arch/x86/pagetable_walker.hh"
42#include "arch/x86/tlb.hh"
43#include "arch/x86/vtophys.hh"
44#include "base/bitfield.hh"
45#include "cpu/thread_context.hh"
46#include "cpu/base.hh"
47#include "mem/packet_access.hh"
48#include "mem/request.hh"
49#include "sim/system.hh"
50
51namespace X86ISA {
52
53// Unfortunately, the placement of the base field in a page table entry is
54// very erratic and would make a mess here. It might be moved here at some
55// point in the future.
56BitUnion64(PageTableEntry)
57    Bitfield<63> nx;
58    Bitfield<11, 9> avl;
59    Bitfield<8> g;
60    Bitfield<7> ps;
61    Bitfield<6> d;
62    Bitfield<5> a;
63    Bitfield<4> pcd;
64    Bitfield<3> pwt;
65    Bitfield<2> u;
66    Bitfield<1> w;
67    Bitfield<0> p;
68EndBitUnion(PageTableEntry)
69
70Fault
71Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
72              RequestPtr _req, BaseTLB::Mode _mode)
73{
74    // TODO: in timing mode, instead of blocking when there are other
75    // outstanding requests, see if this request can be coalesced with
76    // another one (i.e. either coalesce or start walk)
77    WalkerState * newState = new WalkerState(this, _translation, _req);
78    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
79    if (currStates.size()) {
80        assert(newState->isTiming());
81        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
82        currStates.push_back(newState);
83        return NoFault;
84    } else {
85        currStates.push_back(newState);
86        Fault fault = newState->startWalk();
87        if (!newState->isTiming()) {
88            currStates.pop_front();
89            delete newState;
90        }
91        return fault;
92    }
93}
94
95Fault
96Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize,
97              BaseTLB::Mode _mode)
98{
99    funcState.initState(_tc, _mode);
100    return funcState.startFunctional(addr, pageSize);
101}
102
103bool
104Walker::WalkerPort::recvTiming(PacketPtr pkt)
105{
106    return walker->recvTiming(pkt);
107}
108
109bool
110Walker::recvTiming(PacketPtr pkt)
111{
112    if (pkt->isResponse() || pkt->wasNacked()) {
113        WalkerSenderState * senderState =
114                dynamic_cast<WalkerSenderState *>(pkt->senderState);
115        pkt->senderState = senderState->saved;
116        WalkerState * senderWalk = senderState->senderWalk;
117        bool walkComplete = senderWalk->recvPacket(pkt);
118        delete senderState;
119        if (walkComplete) {
120            std::list<WalkerState *>::iterator iter;
121            for (iter = currStates.begin(); iter != currStates.end(); iter++) {
122                WalkerState * walkerState = *(iter);
123                if (walkerState == senderWalk) {
124                    iter = currStates.erase(iter);
125                    break;
126                }
127            }
128            delete senderWalk;
129            // Since we block requests when another is outstanding, we
130            // need to check if there is a waiting request to be serviced
131            if (currStates.size()) {
132                WalkerState * newState = currStates.front();
133                if (!newState->wasStarted())
134                    newState->startWalk();
135            }
136        }
137    } else {
138        DPRINTF(PageTableWalker, "Received strange packet\n");
139    }
140    return true;
141}
142
143Tick
144Walker::WalkerPort::recvAtomic(PacketPtr pkt)
145{
146    return 0;
147}
148
149void
150Walker::WalkerPort::recvFunctional(PacketPtr pkt)
151{
152    return;
153}
154
155void
156Walker::WalkerPort::recvStatusChange(Status status)
157{
158    if (status == RangeChange) {
159        if (!snoopRangeSent) {
160            snoopRangeSent = true;
161            sendStatusChange(Port::RangeChange);
162        }
163        return;
164    }
165
166    panic("Unexpected recvStatusChange.\n");
167}
168
169void
170Walker::WalkerPort::recvRetry()
171{
172    walker->recvRetry();
173}
174
175void
176Walker::recvRetry()
177{
178    std::list<WalkerState *>::iterator iter;
179    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
180        WalkerState * walkerState = *(iter);
181        if (walkerState->isRetrying()) {
182            walkerState->retry();
183        }
184    }
185}
186
187bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
188{
189    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
190    return port.sendTiming(pkt);
191}
192
193Port *
194Walker::getPort(const std::string &if_name, int idx)
195{
196    if (if_name == "port")
197        return &port;
198    else
199        panic("No page table walker port named %s!\n", if_name);
200}
201
202void
203Walker::WalkerState::initState(ThreadContext * _tc,
204        BaseTLB::Mode _mode, bool _isTiming)
205{
206    assert(state == Ready);
207    started = false;
208    tc = _tc;
209    mode = _mode;
210    timing = _isTiming;
211}
212
213Fault
214Walker::WalkerState::startWalk()
215{
216    Fault fault = NoFault;
217    assert(started == false);
218    started = true;
219    setupWalk(req->getVaddr());
220    if (timing) {
221        nextState = state;
222        state = Waiting;
223        timingFault = NoFault;
224        sendPackets();
225    } else {
226        do {
227            walker->port.sendAtomic(read);
228            PacketPtr write = NULL;
229            fault = stepWalk(write);
230            assert(fault == NoFault || read == NULL);
231            state = nextState;
232            nextState = Ready;
233            if (write)
234                walker->port.sendAtomic(write);
235        } while(read);
236        state = Ready;
237        nextState = Waiting;
238    }
239    return fault;
240}
241
242Fault
243Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize)
244{
245    Fault fault = NoFault;
246    assert(started == false);
247    started = true;
248    setupWalk(addr);
249
250    do {
251        walker->port.sendFunctional(read);
252        // On a functional access (page table lookup), writes should
253        // not happen so this pointer is ignored after stepWalk
254        PacketPtr write = NULL;
255        fault = stepWalk(write);
256        assert(fault == NoFault || read == NULL);
257        state = nextState;
258        nextState = Ready;
259    } while(read);
260    pageSize = entry.size;
261    addr = entry.paddr;
262
263    return fault;
264}
265
266Fault
267Walker::WalkerState::stepWalk(PacketPtr &write)
268{
269    assert(state != Ready && state != Waiting);
270    Fault fault = NoFault;
271    write = NULL;
272    PageTableEntry pte;
273    if (dataSize == 8)
274        pte = read->get<uint64_t>();
275    else
276        pte = read->get<uint32_t>();
277    VAddr vaddr = entry.vaddr;
278    bool uncacheable = pte.pcd;
279    Addr nextRead = 0;
280    bool doWrite = false;
281    bool doTLBInsert = false;
282    bool doEndWalk = false;
283    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
284    switch(state) {
285      case LongPML4:
286        DPRINTF(PageTableWalker,
287                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
288        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
289        doWrite = !pte.a;
290        pte.a = 1;
291        entry.writable = pte.w;
292        entry.user = pte.u;
293        if (badNX || !pte.p) {
294            doEndWalk = true;
295            fault = pageFault(pte.p);
296            break;
297        }
298        entry.noExec = pte.nx;
299        nextState = LongPDP;
300        break;
301      case LongPDP:
302        DPRINTF(PageTableWalker,
303                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
304        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
305        doWrite = !pte.a;
306        pte.a = 1;
307        entry.writable = entry.writable && pte.w;
308        entry.user = entry.user && pte.u;
309        if (badNX || !pte.p) {
310            doEndWalk = true;
311            fault = pageFault(pte.p);
312            break;
313        }
314        nextState = LongPD;
315        break;
316      case LongPD:
317        DPRINTF(PageTableWalker,
318                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
319        doWrite = !pte.a;
320        pte.a = 1;
321        entry.writable = entry.writable && pte.w;
322        entry.user = entry.user && pte.u;
323        if (badNX || !pte.p) {
324            doEndWalk = true;
325            fault = pageFault(pte.p);
326            break;
327        }
328        if (!pte.ps) {
329            // 4 KB page
330            entry.size = 4 * (1 << 10);
331            nextRead =
332                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
333            nextState = LongPTE;
334            break;
335        } else {
336            // 2 MB page
337            entry.size = 2 * (1 << 20);
338            entry.paddr = (uint64_t)pte & (mask(31) << 21);
339            entry.uncacheable = uncacheable;
340            entry.global = pte.g;
341            entry.patBit = bits(pte, 12);
342            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
343            doTLBInsert = true;
344            doEndWalk = true;
345            break;
346        }
347      case LongPTE:
348        DPRINTF(PageTableWalker,
349                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
350        doWrite = !pte.a;
351        pte.a = 1;
352        entry.writable = entry.writable && pte.w;
353        entry.user = entry.user && pte.u;
354        if (badNX || !pte.p) {
355            doEndWalk = true;
356            fault = pageFault(pte.p);
357            break;
358        }
359        entry.paddr = (uint64_t)pte & (mask(40) << 12);
360        entry.uncacheable = uncacheable;
361        entry.global = pte.g;
362        entry.patBit = bits(pte, 12);
363        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
364        doTLBInsert = true;
365        doEndWalk = true;
366        break;
367      case PAEPDP:
368        DPRINTF(PageTableWalker,
369                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
370        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
371        if (!pte.p) {
372            doEndWalk = true;
373            fault = pageFault(pte.p);
374            break;
375        }
376        nextState = PAEPD;
377        break;
378      case PAEPD:
379        DPRINTF(PageTableWalker,
380                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
381        doWrite = !pte.a;
382        pte.a = 1;
383        entry.writable = pte.w;
384        entry.user = pte.u;
385        if (badNX || !pte.p) {
386            doEndWalk = true;
387            fault = pageFault(pte.p);
388            break;
389        }
390        if (!pte.ps) {
391            // 4 KB page
392            entry.size = 4 * (1 << 10);
393            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
394            nextState = PAEPTE;
395            break;
396        } else {
397            // 2 MB page
398            entry.size = 2 * (1 << 20);
399            entry.paddr = (uint64_t)pte & (mask(31) << 21);
400            entry.uncacheable = uncacheable;
401            entry.global = pte.g;
402            entry.patBit = bits(pte, 12);
403            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
404            doTLBInsert = true;
405            doEndWalk = true;
406            break;
407        }
408      case PAEPTE:
409        DPRINTF(PageTableWalker,
410                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
411        doWrite = !pte.a;
412        pte.a = 1;
413        entry.writable = entry.writable && pte.w;
414        entry.user = entry.user && pte.u;
415        if (badNX || !pte.p) {
416            doEndWalk = true;
417            fault = pageFault(pte.p);
418            break;
419        }
420        entry.paddr = (uint64_t)pte & (mask(40) << 12);
421        entry.uncacheable = uncacheable;
422        entry.global = pte.g;
423        entry.patBit = bits(pte, 7);
424        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
425        doTLBInsert = true;
426        doEndWalk = true;
427        break;
428      case PSEPD:
429        DPRINTF(PageTableWalker,
430                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
431        doWrite = !pte.a;
432        pte.a = 1;
433        entry.writable = pte.w;
434        entry.user = pte.u;
435        if (!pte.p) {
436            doEndWalk = true;
437            fault = pageFault(pte.p);
438            break;
439        }
440        if (!pte.ps) {
441            // 4 KB page
442            entry.size = 4 * (1 << 10);
443            nextRead =
444                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
445            nextState = PTE;
446            break;
447        } else {
448            // 4 MB page
449            entry.size = 4 * (1 << 20);
450            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
451            entry.uncacheable = uncacheable;
452            entry.global = pte.g;
453            entry.patBit = bits(pte, 12);
454            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
455            doTLBInsert = true;
456            doEndWalk = true;
457            break;
458        }
459      case PD:
460        DPRINTF(PageTableWalker,
461                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
462        doWrite = !pte.a;
463        pte.a = 1;
464        entry.writable = pte.w;
465        entry.user = pte.u;
466        if (!pte.p) {
467            doEndWalk = true;
468            fault = pageFault(pte.p);
469            break;
470        }
471        // 4 KB page
472        entry.size = 4 * (1 << 10);
473        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
474        nextState = PTE;
475        break;
476      case PTE:
477        DPRINTF(PageTableWalker,
478                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
479        doWrite = !pte.a;
480        pte.a = 1;
481        entry.writable = pte.w;
482        entry.user = pte.u;
483        if (!pte.p) {
484            doEndWalk = true;
485            fault = pageFault(pte.p);
486            break;
487        }
488        entry.paddr = (uint64_t)pte & (mask(20) << 12);
489        entry.uncacheable = uncacheable;
490        entry.global = pte.g;
491        entry.patBit = bits(pte, 7);
492        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
493        doTLBInsert = true;
494        doEndWalk = true;
495        break;
496      default:
497        panic("Unknown page table walker state %d!\n");
498    }
499    if (doEndWalk) {
500        if (doTLBInsert)
501            if (!functional)
502                walker->tlb->insert(entry.vaddr, entry);
503        endWalk();
504    } else {
505        PacketPtr oldRead = read;
506        //If we didn't return, we're setting up another read.
507        Request::Flags flags = oldRead->req->getFlags();
508        flags.set(Request::UNCACHEABLE, uncacheable);
509        RequestPtr request =
510            new Request(nextRead, oldRead->getSize(), flags);
511        read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast);
512        read->allocate();
513        // If we need to write, adjust the read packet to write the modified
514        // value back to memory.
515        if (doWrite) {
516            write = oldRead;
517            write->set<uint64_t>(pte);
518            write->cmd = MemCmd::WriteReq;
519            write->setDest(Packet::Broadcast);
520        } else {
521            write = NULL;
522            delete oldRead->req;
523            delete oldRead;
524        }
525    }
526    return fault;
527}
528
529void
530Walker::WalkerState::endWalk()
531{
532    nextState = Ready;
533    delete read->req;
534    delete read;
535    read = NULL;
536}
537
538void
539Walker::WalkerState::setupWalk(Addr vaddr)
540{
541    VAddr addr = vaddr;
542    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
543    // Check if we're in long mode or not
544    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
545    dataSize = 8;
546    Addr topAddr;
547    if (efer.lma) {
548        // Do long mode.
549        state = LongPML4;
550        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
551        enableNX = efer.nxe;
552    } else {
553        // We're in some flavor of legacy mode.
554        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
555        if (cr4.pae) {
556            // Do legacy PAE.
557            state = PAEPDP;
558            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
559            enableNX = efer.nxe;
560        } else {
561            dataSize = 4;
562            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
563            if (cr4.pse) {
564                // Do legacy PSE.
565                state = PSEPD;
566            } else {
567                // Do legacy non PSE.
568                state = PD;
569            }
570            enableNX = false;
571        }
572    }
573
574    nextState = Ready;
575    entry.vaddr = vaddr;
576
577    Request::Flags flags = Request::PHYSICAL;
578    if (cr3.pcd)
579        flags.set(Request::UNCACHEABLE);
580    RequestPtr request = new Request(topAddr, dataSize, flags);
581    read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast);
582    read->allocate();
583}
584
585bool
586Walker::WalkerState::recvPacket(PacketPtr pkt)
587{
588    if (pkt->isResponse() && !pkt->wasNacked()) {
589        assert(inflight);
590        assert(state == Waiting);
591        assert(!read);
592        inflight--;
593        if (pkt->isRead()) {
594            state = nextState;
595            nextState = Ready;
596            PacketPtr write = NULL;
597            read = pkt;
598            timingFault = stepWalk(write);
599            state = Waiting;
600            assert(timingFault == NoFault || read == NULL);
601            if (write) {
602                writes.push_back(write);
603            }
604            sendPackets();
605        } else {
606            sendPackets();
607        }
608        if (inflight == 0 && read == NULL && writes.size() == 0) {
609            state = Ready;
610            nextState = Waiting;
611            if (timingFault == NoFault) {
612                /*
613                 * Finish the translation. Now that we now the right entry is
614                 * in the TLB, this should work with no memory accesses.
615                 * There could be new faults unrelated to the table walk like
616                 * permissions violations, so we'll need the return value as
617                 * well.
618                 */
619                bool delayedResponse;
620                Fault fault = walker->tlb->translate(req, tc, NULL, mode,
621                        delayedResponse, true);
622                assert(!delayedResponse);
623                // Let the CPU continue.
624                translation->finish(fault, req, tc, mode);
625            } else {
626                // There was a fault during the walk. Let the CPU know.
627                translation->finish(timingFault, req, tc, mode);
628            }
629            return true;
630        }
631    } else if (pkt->wasNacked()) {
632        DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n");
633        pkt->reinitNacked();
634        if (!walker->sendTiming(this, pkt)) {
635            inflight--;
636            retrying = true;
637            if (pkt->isWrite()) {
638                writes.push_back(pkt);
639            } else {
640                assert(!read);
641                read = pkt;
642            }
643        }
644    }
645    return false;
646}
647
648void
649Walker::WalkerState::sendPackets()
650{
651    //If we're already waiting for the port to become available, just return.
652    if (retrying)
653        return;
654
655    //Reads always have priority
656    if (read) {
657        PacketPtr pkt = read;
658        read = NULL;
659        inflight++;
660        if (!walker->sendTiming(this, pkt)) {
661            retrying = true;
662            read = pkt;
663            inflight--;
664            return;
665        }
666    }
667    //Send off as many of the writes as we can.
668    while (writes.size()) {
669        PacketPtr write = writes.back();
670        writes.pop_back();
671        inflight++;
672        if (!walker->sendTiming(this, write)) {
673            retrying = true;
674            writes.push_back(write);
675            inflight--;
676            return;
677        }
678    }
679}
680
681bool
682Walker::WalkerState::isRetrying()
683{
684    return retrying;
685}
686
687bool
688Walker::WalkerState::isTiming()
689{
690    return timing;
691}
692
693bool
694Walker::WalkerState::wasStarted()
695{
696    return started;
697}
698
699void
700Walker::WalkerState::retry()
701{
702    retrying = false;
703    sendPackets();
704}
705
706Fault
707Walker::WalkerState::pageFault(bool present)
708{
709    DPRINTF(PageTableWalker, "Raising page fault.\n");
710    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
711    if (mode == BaseTLB::Execute && !enableNX)
712        mode = BaseTLB::Read;
713    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
714}
715
716/* end namespace X86ISA */ }
717
718X86ISA::Walker *
719X86PagetableWalkerParams::create()
720{
721    return new X86ISA::Walker(this);
722}
723