pagetable_walker.cc revision 8232
1/*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/x86/pagetable.hh"
41#include "arch/x86/pagetable_walker.hh"
42#include "arch/x86/tlb.hh"
43#include "arch/x86/vtophys.hh"
44#include "base/bitfield.hh"
45#include "cpu/base.hh"
46#include "cpu/thread_context.hh"
47#include "debug/PageTableWalker.hh"
48#include "mem/packet_access.hh"
49#include "mem/request.hh"
50#include "sim/system.hh"
51
52namespace X86ISA {
53
54// Unfortunately, the placement of the base field in a page table entry is
55// very erratic and would make a mess here. It might be moved here at some
56// point in the future.
57BitUnion64(PageTableEntry)
58    Bitfield<63> nx;
59    Bitfield<11, 9> avl;
60    Bitfield<8> g;
61    Bitfield<7> ps;
62    Bitfield<6> d;
63    Bitfield<5> a;
64    Bitfield<4> pcd;
65    Bitfield<3> pwt;
66    Bitfield<2> u;
67    Bitfield<1> w;
68    Bitfield<0> p;
69EndBitUnion(PageTableEntry)
70
71Fault
72Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
73              RequestPtr _req, BaseTLB::Mode _mode)
74{
75    // TODO: in timing mode, instead of blocking when there are other
76    // outstanding requests, see if this request can be coalesced with
77    // another one (i.e. either coalesce or start walk)
78    WalkerState * newState = new WalkerState(this, _translation, _req);
79    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
80    if (currStates.size()) {
81        assert(newState->isTiming());
82        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
83        currStates.push_back(newState);
84        return NoFault;
85    } else {
86        currStates.push_back(newState);
87        Fault fault = newState->startWalk();
88        if (!newState->isTiming()) {
89            currStates.pop_front();
90            delete newState;
91        }
92        return fault;
93    }
94}
95
96Fault
97Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize,
98              BaseTLB::Mode _mode)
99{
100    funcState.initState(_tc, _mode);
101    return funcState.startFunctional(addr, pageSize);
102}
103
104bool
105Walker::WalkerPort::recvTiming(PacketPtr pkt)
106{
107    return walker->recvTiming(pkt);
108}
109
110bool
111Walker::recvTiming(PacketPtr pkt)
112{
113    if (pkt->isResponse() || pkt->wasNacked()) {
114        WalkerSenderState * senderState =
115                dynamic_cast<WalkerSenderState *>(pkt->senderState);
116        pkt->senderState = senderState->saved;
117        WalkerState * senderWalk = senderState->senderWalk;
118        bool walkComplete = senderWalk->recvPacket(pkt);
119        delete senderState;
120        if (walkComplete) {
121            std::list<WalkerState *>::iterator iter;
122            for (iter = currStates.begin(); iter != currStates.end(); iter++) {
123                WalkerState * walkerState = *(iter);
124                if (walkerState == senderWalk) {
125                    iter = currStates.erase(iter);
126                    break;
127                }
128            }
129            delete senderWalk;
130            // Since we block requests when another is outstanding, we
131            // need to check if there is a waiting request to be serviced
132            if (currStates.size()) {
133                WalkerState * newState = currStates.front();
134                if (!newState->wasStarted())
135                    newState->startWalk();
136            }
137        }
138    } else {
139        DPRINTF(PageTableWalker, "Received strange packet\n");
140    }
141    return true;
142}
143
144Tick
145Walker::WalkerPort::recvAtomic(PacketPtr pkt)
146{
147    return 0;
148}
149
150void
151Walker::WalkerPort::recvFunctional(PacketPtr pkt)
152{
153    return;
154}
155
156void
157Walker::WalkerPort::recvStatusChange(Status status)
158{
159    if (status == RangeChange) {
160        if (!snoopRangeSent) {
161            snoopRangeSent = true;
162            sendStatusChange(Port::RangeChange);
163        }
164        return;
165    }
166
167    panic("Unexpected recvStatusChange.\n");
168}
169
170void
171Walker::WalkerPort::recvRetry()
172{
173    walker->recvRetry();
174}
175
176void
177Walker::recvRetry()
178{
179    std::list<WalkerState *>::iterator iter;
180    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
181        WalkerState * walkerState = *(iter);
182        if (walkerState->isRetrying()) {
183            walkerState->retry();
184        }
185    }
186}
187
188bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
189{
190    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
191    return port.sendTiming(pkt);
192}
193
194Port *
195Walker::getPort(const std::string &if_name, int idx)
196{
197    if (if_name == "port")
198        return &port;
199    else
200        panic("No page table walker port named %s!\n", if_name);
201}
202
203void
204Walker::WalkerState::initState(ThreadContext * _tc,
205        BaseTLB::Mode _mode, bool _isTiming)
206{
207    assert(state == Ready);
208    started = false;
209    tc = _tc;
210    mode = _mode;
211    timing = _isTiming;
212}
213
214Fault
215Walker::WalkerState::startWalk()
216{
217    Fault fault = NoFault;
218    assert(started == false);
219    started = true;
220    setupWalk(req->getVaddr());
221    if (timing) {
222        nextState = state;
223        state = Waiting;
224        timingFault = NoFault;
225        sendPackets();
226    } else {
227        do {
228            walker->port.sendAtomic(read);
229            PacketPtr write = NULL;
230            fault = stepWalk(write);
231            assert(fault == NoFault || read == NULL);
232            state = nextState;
233            nextState = Ready;
234            if (write)
235                walker->port.sendAtomic(write);
236        } while(read);
237        state = Ready;
238        nextState = Waiting;
239    }
240    return fault;
241}
242
243Fault
244Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize)
245{
246    Fault fault = NoFault;
247    assert(started == false);
248    started = true;
249    setupWalk(addr);
250
251    do {
252        walker->port.sendFunctional(read);
253        // On a functional access (page table lookup), writes should
254        // not happen so this pointer is ignored after stepWalk
255        PacketPtr write = NULL;
256        fault = stepWalk(write);
257        assert(fault == NoFault || read == NULL);
258        state = nextState;
259        nextState = Ready;
260    } while(read);
261    pageSize = entry.size;
262    addr = entry.paddr;
263
264    return fault;
265}
266
267Fault
268Walker::WalkerState::stepWalk(PacketPtr &write)
269{
270    assert(state != Ready && state != Waiting);
271    Fault fault = NoFault;
272    write = NULL;
273    PageTableEntry pte;
274    if (dataSize == 8)
275        pte = read->get<uint64_t>();
276    else
277        pte = read->get<uint32_t>();
278    VAddr vaddr = entry.vaddr;
279    bool uncacheable = pte.pcd;
280    Addr nextRead = 0;
281    bool doWrite = false;
282    bool doTLBInsert = false;
283    bool doEndWalk = false;
284    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
285    switch(state) {
286      case LongPML4:
287        DPRINTF(PageTableWalker,
288                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
289        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
290        doWrite = !pte.a;
291        pte.a = 1;
292        entry.writable = pte.w;
293        entry.user = pte.u;
294        if (badNX || !pte.p) {
295            doEndWalk = true;
296            fault = pageFault(pte.p);
297            break;
298        }
299        entry.noExec = pte.nx;
300        nextState = LongPDP;
301        break;
302      case LongPDP:
303        DPRINTF(PageTableWalker,
304                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
305        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
306        doWrite = !pte.a;
307        pte.a = 1;
308        entry.writable = entry.writable && pte.w;
309        entry.user = entry.user && pte.u;
310        if (badNX || !pte.p) {
311            doEndWalk = true;
312            fault = pageFault(pte.p);
313            break;
314        }
315        nextState = LongPD;
316        break;
317      case LongPD:
318        DPRINTF(PageTableWalker,
319                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
320        doWrite = !pte.a;
321        pte.a = 1;
322        entry.writable = entry.writable && pte.w;
323        entry.user = entry.user && pte.u;
324        if (badNX || !pte.p) {
325            doEndWalk = true;
326            fault = pageFault(pte.p);
327            break;
328        }
329        if (!pte.ps) {
330            // 4 KB page
331            entry.size = 4 * (1 << 10);
332            nextRead =
333                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
334            nextState = LongPTE;
335            break;
336        } else {
337            // 2 MB page
338            entry.size = 2 * (1 << 20);
339            entry.paddr = (uint64_t)pte & (mask(31) << 21);
340            entry.uncacheable = uncacheable;
341            entry.global = pte.g;
342            entry.patBit = bits(pte, 12);
343            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
344            doTLBInsert = true;
345            doEndWalk = true;
346            break;
347        }
348      case LongPTE:
349        DPRINTF(PageTableWalker,
350                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
351        doWrite = !pte.a;
352        pte.a = 1;
353        entry.writable = entry.writable && pte.w;
354        entry.user = entry.user && pte.u;
355        if (badNX || !pte.p) {
356            doEndWalk = true;
357            fault = pageFault(pte.p);
358            break;
359        }
360        entry.paddr = (uint64_t)pte & (mask(40) << 12);
361        entry.uncacheable = uncacheable;
362        entry.global = pte.g;
363        entry.patBit = bits(pte, 12);
364        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
365        doTLBInsert = true;
366        doEndWalk = true;
367        break;
368      case PAEPDP:
369        DPRINTF(PageTableWalker,
370                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
371        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
372        if (!pte.p) {
373            doEndWalk = true;
374            fault = pageFault(pte.p);
375            break;
376        }
377        nextState = PAEPD;
378        break;
379      case PAEPD:
380        DPRINTF(PageTableWalker,
381                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
382        doWrite = !pte.a;
383        pte.a = 1;
384        entry.writable = pte.w;
385        entry.user = pte.u;
386        if (badNX || !pte.p) {
387            doEndWalk = true;
388            fault = pageFault(pte.p);
389            break;
390        }
391        if (!pte.ps) {
392            // 4 KB page
393            entry.size = 4 * (1 << 10);
394            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
395            nextState = PAEPTE;
396            break;
397        } else {
398            // 2 MB page
399            entry.size = 2 * (1 << 20);
400            entry.paddr = (uint64_t)pte & (mask(31) << 21);
401            entry.uncacheable = uncacheable;
402            entry.global = pte.g;
403            entry.patBit = bits(pte, 12);
404            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
405            doTLBInsert = true;
406            doEndWalk = true;
407            break;
408        }
409      case PAEPTE:
410        DPRINTF(PageTableWalker,
411                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
412        doWrite = !pte.a;
413        pte.a = 1;
414        entry.writable = entry.writable && pte.w;
415        entry.user = entry.user && pte.u;
416        if (badNX || !pte.p) {
417            doEndWalk = true;
418            fault = pageFault(pte.p);
419            break;
420        }
421        entry.paddr = (uint64_t)pte & (mask(40) << 12);
422        entry.uncacheable = uncacheable;
423        entry.global = pte.g;
424        entry.patBit = bits(pte, 7);
425        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
426        doTLBInsert = true;
427        doEndWalk = true;
428        break;
429      case PSEPD:
430        DPRINTF(PageTableWalker,
431                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
432        doWrite = !pte.a;
433        pte.a = 1;
434        entry.writable = pte.w;
435        entry.user = pte.u;
436        if (!pte.p) {
437            doEndWalk = true;
438            fault = pageFault(pte.p);
439            break;
440        }
441        if (!pte.ps) {
442            // 4 KB page
443            entry.size = 4 * (1 << 10);
444            nextRead =
445                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
446            nextState = PTE;
447            break;
448        } else {
449            // 4 MB page
450            entry.size = 4 * (1 << 20);
451            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
452            entry.uncacheable = uncacheable;
453            entry.global = pte.g;
454            entry.patBit = bits(pte, 12);
455            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
456            doTLBInsert = true;
457            doEndWalk = true;
458            break;
459        }
460      case PD:
461        DPRINTF(PageTableWalker,
462                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
463        doWrite = !pte.a;
464        pte.a = 1;
465        entry.writable = pte.w;
466        entry.user = pte.u;
467        if (!pte.p) {
468            doEndWalk = true;
469            fault = pageFault(pte.p);
470            break;
471        }
472        // 4 KB page
473        entry.size = 4 * (1 << 10);
474        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
475        nextState = PTE;
476        break;
477      case PTE:
478        DPRINTF(PageTableWalker,
479                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
480        doWrite = !pte.a;
481        pte.a = 1;
482        entry.writable = pte.w;
483        entry.user = pte.u;
484        if (!pte.p) {
485            doEndWalk = true;
486            fault = pageFault(pte.p);
487            break;
488        }
489        entry.paddr = (uint64_t)pte & (mask(20) << 12);
490        entry.uncacheable = uncacheable;
491        entry.global = pte.g;
492        entry.patBit = bits(pte, 7);
493        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
494        doTLBInsert = true;
495        doEndWalk = true;
496        break;
497      default:
498        panic("Unknown page table walker state %d!\n");
499    }
500    if (doEndWalk) {
501        if (doTLBInsert)
502            if (!functional)
503                walker->tlb->insert(entry.vaddr, entry);
504        endWalk();
505    } else {
506        PacketPtr oldRead = read;
507        //If we didn't return, we're setting up another read.
508        Request::Flags flags = oldRead->req->getFlags();
509        flags.set(Request::UNCACHEABLE, uncacheable);
510        RequestPtr request =
511            new Request(nextRead, oldRead->getSize(), flags);
512        read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
513        read->allocate();
514        // If we need to write, adjust the read packet to write the modified
515        // value back to memory.
516        if (doWrite) {
517            write = oldRead;
518            write->set<uint64_t>(pte);
519            write->cmd = MemCmd::WriteReq;
520            write->setDest(Packet::Broadcast);
521        } else {
522            write = NULL;
523            delete oldRead->req;
524            delete oldRead;
525        }
526    }
527    return fault;
528}
529
530void
531Walker::WalkerState::endWalk()
532{
533    nextState = Ready;
534    delete read->req;
535    delete read;
536    read = NULL;
537}
538
539void
540Walker::WalkerState::setupWalk(Addr vaddr)
541{
542    VAddr addr = vaddr;
543    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
544    // Check if we're in long mode or not
545    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
546    dataSize = 8;
547    Addr topAddr;
548    if (efer.lma) {
549        // Do long mode.
550        state = LongPML4;
551        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
552        enableNX = efer.nxe;
553    } else {
554        // We're in some flavor of legacy mode.
555        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
556        if (cr4.pae) {
557            // Do legacy PAE.
558            state = PAEPDP;
559            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
560            enableNX = efer.nxe;
561        } else {
562            dataSize = 4;
563            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
564            if (cr4.pse) {
565                // Do legacy PSE.
566                state = PSEPD;
567            } else {
568                // Do legacy non PSE.
569                state = PD;
570            }
571            enableNX = false;
572        }
573    }
574
575    nextState = Ready;
576    entry.vaddr = vaddr;
577
578    Request::Flags flags = Request::PHYSICAL;
579    if (cr3.pcd)
580        flags.set(Request::UNCACHEABLE);
581    RequestPtr request = new Request(topAddr, dataSize, flags);
582    read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
583    read->allocate();
584}
585
586bool
587Walker::WalkerState::recvPacket(PacketPtr pkt)
588{
589    if (pkt->isResponse() && !pkt->wasNacked()) {
590        assert(inflight);
591        assert(state == Waiting);
592        assert(!read);
593        inflight--;
594        if (pkt->isRead()) {
595            state = nextState;
596            nextState = Ready;
597            PacketPtr write = NULL;
598            read = pkt;
599            timingFault = stepWalk(write);
600            state = Waiting;
601            assert(timingFault == NoFault || read == NULL);
602            if (write) {
603                writes.push_back(write);
604            }
605            sendPackets();
606        } else {
607            sendPackets();
608        }
609        if (inflight == 0 && read == NULL && writes.size() == 0) {
610            state = Ready;
611            nextState = Waiting;
612            if (timingFault == NoFault) {
613                /*
614                 * Finish the translation. Now that we now the right entry is
615                 * in the TLB, this should work with no memory accesses.
616                 * There could be new faults unrelated to the table walk like
617                 * permissions violations, so we'll need the return value as
618                 * well.
619                 */
620                bool delayedResponse;
621                Fault fault = walker->tlb->translate(req, tc, NULL, mode,
622                        delayedResponse, true);
623                assert(!delayedResponse);
624                // Let the CPU continue.
625                translation->finish(fault, req, tc, mode);
626            } else {
627                // There was a fault during the walk. Let the CPU know.
628                translation->finish(timingFault, req, tc, mode);
629            }
630            return true;
631        }
632    } else if (pkt->wasNacked()) {
633        DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n");
634        pkt->reinitNacked();
635        if (!walker->sendTiming(this, pkt)) {
636            inflight--;
637            retrying = true;
638            if (pkt->isWrite()) {
639                writes.push_back(pkt);
640            } else {
641                assert(!read);
642                read = pkt;
643            }
644        }
645    }
646    return false;
647}
648
649void
650Walker::WalkerState::sendPackets()
651{
652    //If we're already waiting for the port to become available, just return.
653    if (retrying)
654        return;
655
656    //Reads always have priority
657    if (read) {
658        PacketPtr pkt = read;
659        read = NULL;
660        inflight++;
661        if (!walker->sendTiming(this, pkt)) {
662            retrying = true;
663            read = pkt;
664            inflight--;
665            return;
666        }
667    }
668    //Send off as many of the writes as we can.
669    while (writes.size()) {
670        PacketPtr write = writes.back();
671        writes.pop_back();
672        inflight++;
673        if (!walker->sendTiming(this, write)) {
674            retrying = true;
675            writes.push_back(write);
676            inflight--;
677            return;
678        }
679    }
680}
681
682bool
683Walker::WalkerState::isRetrying()
684{
685    return retrying;
686}
687
688bool
689Walker::WalkerState::isTiming()
690{
691    return timing;
692}
693
694bool
695Walker::WalkerState::wasStarted()
696{
697    return started;
698}
699
700void
701Walker::WalkerState::retry()
702{
703    retrying = false;
704    sendPackets();
705}
706
707Fault
708Walker::WalkerState::pageFault(bool present)
709{
710    DPRINTF(PageTableWalker, "Raising page fault.\n");
711    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
712    if (mode == BaseTLB::Execute && !enableNX)
713        mode = BaseTLB::Read;
714    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
715}
716
717/* end namespace X86ISA */ }
718
719X86ISA::Walker *
720X86PagetableWalkerParams::create()
721{
722    return new X86ISA::Walker(this);
723}
724