pagetable_walker.cc revision 9701:f02f3b6562d5
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "base/trie.hh"
58#include "cpu/base.hh"
59#include "cpu/thread_context.hh"
60#include "debug/PageTableWalker.hh"
61#include "mem/packet_access.hh"
62#include "mem/request.hh"
63
64namespace X86ISA {
65
66// Unfortunately, the placement of the base field in a page table entry is
67// very erratic and would make a mess here. It might be moved here at some
68// point in the future.
69BitUnion64(PageTableEntry)
70    Bitfield<63> nx;
71    Bitfield<11, 9> avl;
72    Bitfield<8> g;
73    Bitfield<7> ps;
74    Bitfield<6> d;
75    Bitfield<5> a;
76    Bitfield<4> pcd;
77    Bitfield<3> pwt;
78    Bitfield<2> u;
79    Bitfield<1> w;
80    Bitfield<0> p;
81EndBitUnion(PageTableEntry)
82
83Fault
84Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
85              RequestPtr _req, BaseTLB::Mode _mode)
86{
87    // TODO: in timing mode, instead of blocking when there are other
88    // outstanding requests, see if this request can be coalesced with
89    // another one (i.e. either coalesce or start walk)
90    WalkerState * newState = new WalkerState(this, _translation, _req);
91    newState->initState(_tc, _mode, sys->isTimingMode());
92    if (currStates.size()) {
93        assert(newState->isTiming());
94        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
95        currStates.push_back(newState);
96        return NoFault;
97    } else {
98        currStates.push_back(newState);
99        Fault fault = newState->startWalk();
100        if (!newState->isTiming()) {
101            currStates.pop_front();
102            delete newState;
103        }
104        return fault;
105    }
106}
107
108Fault
109Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
110              BaseTLB::Mode _mode)
111{
112    funcState.initState(_tc, _mode);
113    return funcState.startFunctional(addr, logBytes);
114}
115
116bool
117Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
118{
119    return walker->recvTimingResp(pkt);
120}
121
122bool
123Walker::recvTimingResp(PacketPtr pkt)
124{
125    WalkerSenderState * senderState =
126        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
127    WalkerState * senderWalk = senderState->senderWalk;
128    bool walkComplete = senderWalk->recvPacket(pkt);
129    delete senderState;
130    if (walkComplete) {
131        std::list<WalkerState *>::iterator iter;
132        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
133            WalkerState * walkerState = *(iter);
134            if (walkerState == senderWalk) {
135                iter = currStates.erase(iter);
136                break;
137            }
138        }
139        delete senderWalk;
140        // Since we block requests when another is outstanding, we
141        // need to check if there is a waiting request to be serviced
142        if (currStates.size())
143            startWalkWrapper();
144    }
145    return true;
146}
147
148void
149Walker::WalkerPort::recvRetry()
150{
151    walker->recvRetry();
152}
153
154void
155Walker::recvRetry()
156{
157    std::list<WalkerState *>::iterator iter;
158    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
159        WalkerState * walkerState = *(iter);
160        if (walkerState->isRetrying()) {
161            walkerState->retry();
162        }
163    }
164}
165
166bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
167{
168    pkt->pushSenderState(new WalkerSenderState(sendingState));
169    return port.sendTimingReq(pkt);
170}
171
172BaseMasterPort &
173Walker::getMasterPort(const std::string &if_name, PortID idx)
174{
175    if (if_name == "port")
176        return port;
177    else
178        return MemObject::getMasterPort(if_name, idx);
179}
180
181void
182Walker::WalkerState::initState(ThreadContext * _tc,
183        BaseTLB::Mode _mode, bool _isTiming)
184{
185    assert(state == Ready);
186    started = false;
187    tc = _tc;
188    mode = _mode;
189    timing = _isTiming;
190}
191
192void
193Walker::startWalkWrapper()
194{
195    unsigned num_squashed = 0;
196    WalkerState *currState = currStates.front();
197    while ((num_squashed < numSquashable) && currState &&
198        currState->translation->squashed()) {
199        currStates.pop_front();
200        num_squashed++;
201
202        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
203            currState->req->getVaddr());
204
205        // finish the translation which will delete the translation object
206        currState->translation->finish(new UnimpFault("Squashed Inst"),
207                currState->req, currState->tc, currState->mode);
208
209        // delete the current request
210        delete currState;
211
212        // check the next translation request, if it exists
213        if (currStates.size())
214            currState = currStates.front();
215        else
216            currState = NULL;
217    }
218    if (currState && !currState->wasStarted())
219        currState->startWalk();
220}
221
222Fault
223Walker::WalkerState::startWalk()
224{
225    Fault fault = NoFault;
226    assert(started == false);
227    started = true;
228    setupWalk(req->getVaddr());
229    if (timing) {
230        nextState = state;
231        state = Waiting;
232        timingFault = NoFault;
233        sendPackets();
234    } else {
235        do {
236            walker->port.sendAtomic(read);
237            PacketPtr write = NULL;
238            fault = stepWalk(write);
239            assert(fault == NoFault || read == NULL);
240            state = nextState;
241            nextState = Ready;
242            if (write)
243                walker->port.sendAtomic(write);
244        } while(read);
245        state = Ready;
246        nextState = Waiting;
247    }
248    return fault;
249}
250
251Fault
252Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
253{
254    Fault fault = NoFault;
255    assert(started == false);
256    started = true;
257    setupWalk(addr);
258
259    do {
260        walker->port.sendFunctional(read);
261        // On a functional access (page table lookup), writes should
262        // not happen so this pointer is ignored after stepWalk
263        PacketPtr write = NULL;
264        fault = stepWalk(write);
265        assert(fault == NoFault || read == NULL);
266        state = nextState;
267        nextState = Ready;
268    } while(read);
269    logBytes = entry.logBytes;
270    addr = entry.paddr;
271
272    return fault;
273}
274
275Fault
276Walker::WalkerState::stepWalk(PacketPtr &write)
277{
278    assert(state != Ready && state != Waiting);
279    Fault fault = NoFault;
280    write = NULL;
281    PageTableEntry pte;
282    if (dataSize == 8)
283        pte = read->get<uint64_t>();
284    else
285        pte = read->get<uint32_t>();
286    VAddr vaddr = entry.vaddr;
287    bool uncacheable = pte.pcd;
288    Addr nextRead = 0;
289    bool doWrite = false;
290    bool doTLBInsert = false;
291    bool doEndWalk = false;
292    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
293    switch(state) {
294      case LongPML4:
295        DPRINTF(PageTableWalker,
296                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
297        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
298        doWrite = !pte.a;
299        pte.a = 1;
300        entry.writable = pte.w;
301        entry.user = pte.u;
302        if (badNX || !pte.p) {
303            doEndWalk = true;
304            fault = pageFault(pte.p);
305            break;
306        }
307        entry.noExec = pte.nx;
308        nextState = LongPDP;
309        break;
310      case LongPDP:
311        DPRINTF(PageTableWalker,
312                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
313        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
314        doWrite = !pte.a;
315        pte.a = 1;
316        entry.writable = entry.writable && pte.w;
317        entry.user = entry.user && pte.u;
318        if (badNX || !pte.p) {
319            doEndWalk = true;
320            fault = pageFault(pte.p);
321            break;
322        }
323        nextState = LongPD;
324        break;
325      case LongPD:
326        DPRINTF(PageTableWalker,
327                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
328        doWrite = !pte.a;
329        pte.a = 1;
330        entry.writable = entry.writable && pte.w;
331        entry.user = entry.user && pte.u;
332        if (badNX || !pte.p) {
333            doEndWalk = true;
334            fault = pageFault(pte.p);
335            break;
336        }
337        if (!pte.ps) {
338            // 4 KB page
339            entry.logBytes = 12;
340            nextRead =
341                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
342            nextState = LongPTE;
343            break;
344        } else {
345            // 2 MB page
346            entry.logBytes = 21;
347            entry.paddr = (uint64_t)pte & (mask(31) << 21);
348            entry.uncacheable = uncacheable;
349            entry.global = pte.g;
350            entry.patBit = bits(pte, 12);
351            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
352            doTLBInsert = true;
353            doEndWalk = true;
354            break;
355        }
356      case LongPTE:
357        DPRINTF(PageTableWalker,
358                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
359        doWrite = !pte.a;
360        pte.a = 1;
361        entry.writable = entry.writable && pte.w;
362        entry.user = entry.user && pte.u;
363        if (badNX || !pte.p) {
364            doEndWalk = true;
365            fault = pageFault(pte.p);
366            break;
367        }
368        entry.paddr = (uint64_t)pte & (mask(40) << 12);
369        entry.uncacheable = uncacheable;
370        entry.global = pte.g;
371        entry.patBit = bits(pte, 12);
372        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
373        doTLBInsert = true;
374        doEndWalk = true;
375        break;
376      case PAEPDP:
377        DPRINTF(PageTableWalker,
378                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
379        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
380        if (!pte.p) {
381            doEndWalk = true;
382            fault = pageFault(pte.p);
383            break;
384        }
385        nextState = PAEPD;
386        break;
387      case PAEPD:
388        DPRINTF(PageTableWalker,
389                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
390        doWrite = !pte.a;
391        pte.a = 1;
392        entry.writable = pte.w;
393        entry.user = pte.u;
394        if (badNX || !pte.p) {
395            doEndWalk = true;
396            fault = pageFault(pte.p);
397            break;
398        }
399        if (!pte.ps) {
400            // 4 KB page
401            entry.logBytes = 12;
402            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
403            nextState = PAEPTE;
404            break;
405        } else {
406            // 2 MB page
407            entry.logBytes = 21;
408            entry.paddr = (uint64_t)pte & (mask(31) << 21);
409            entry.uncacheable = uncacheable;
410            entry.global = pte.g;
411            entry.patBit = bits(pte, 12);
412            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
413            doTLBInsert = true;
414            doEndWalk = true;
415            break;
416        }
417      case PAEPTE:
418        DPRINTF(PageTableWalker,
419                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
420        doWrite = !pte.a;
421        pte.a = 1;
422        entry.writable = entry.writable && pte.w;
423        entry.user = entry.user && pte.u;
424        if (badNX || !pte.p) {
425            doEndWalk = true;
426            fault = pageFault(pte.p);
427            break;
428        }
429        entry.paddr = (uint64_t)pte & (mask(40) << 12);
430        entry.uncacheable = uncacheable;
431        entry.global = pte.g;
432        entry.patBit = bits(pte, 7);
433        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
434        doTLBInsert = true;
435        doEndWalk = true;
436        break;
437      case PSEPD:
438        DPRINTF(PageTableWalker,
439                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
440        doWrite = !pte.a;
441        pte.a = 1;
442        entry.writable = pte.w;
443        entry.user = pte.u;
444        if (!pte.p) {
445            doEndWalk = true;
446            fault = pageFault(pte.p);
447            break;
448        }
449        if (!pte.ps) {
450            // 4 KB page
451            entry.logBytes = 12;
452            nextRead =
453                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
454            nextState = PTE;
455            break;
456        } else {
457            // 4 MB page
458            entry.logBytes = 21;
459            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
460            entry.uncacheable = uncacheable;
461            entry.global = pte.g;
462            entry.patBit = bits(pte, 12);
463            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
464            doTLBInsert = true;
465            doEndWalk = true;
466            break;
467        }
468      case PD:
469        DPRINTF(PageTableWalker,
470                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
471        doWrite = !pte.a;
472        pte.a = 1;
473        entry.writable = pte.w;
474        entry.user = pte.u;
475        if (!pte.p) {
476            doEndWalk = true;
477            fault = pageFault(pte.p);
478            break;
479        }
480        // 4 KB page
481        entry.logBytes = 12;
482        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
483        nextState = PTE;
484        break;
485      case PTE:
486        DPRINTF(PageTableWalker,
487                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
488        doWrite = !pte.a;
489        pte.a = 1;
490        entry.writable = pte.w;
491        entry.user = pte.u;
492        if (!pte.p) {
493            doEndWalk = true;
494            fault = pageFault(pte.p);
495            break;
496        }
497        entry.paddr = (uint64_t)pte & (mask(20) << 12);
498        entry.uncacheable = uncacheable;
499        entry.global = pte.g;
500        entry.patBit = bits(pte, 7);
501        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
502        doTLBInsert = true;
503        doEndWalk = true;
504        break;
505      default:
506        panic("Unknown page table walker state %d!\n");
507    }
508    if (doEndWalk) {
509        if (doTLBInsert)
510            if (!functional)
511                walker->tlb->insert(entry.vaddr, entry);
512        endWalk();
513    } else {
514        PacketPtr oldRead = read;
515        //If we didn't return, we're setting up another read.
516        Request::Flags flags = oldRead->req->getFlags();
517        flags.set(Request::UNCACHEABLE, uncacheable);
518        RequestPtr request =
519            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
520        read = new Packet(request, MemCmd::ReadReq);
521        read->allocate();
522        // If we need to write, adjust the read packet to write the modified
523        // value back to memory.
524        if (doWrite) {
525            write = oldRead;
526            write->set<uint64_t>(pte);
527            write->cmd = MemCmd::WriteReq;
528            write->clearDest();
529        } else {
530            write = NULL;
531            delete oldRead->req;
532            delete oldRead;
533        }
534    }
535    return fault;
536}
537
538void
539Walker::WalkerState::endWalk()
540{
541    nextState = Ready;
542    delete read->req;
543    delete read;
544    read = NULL;
545}
546
547void
548Walker::WalkerState::setupWalk(Addr vaddr)
549{
550    VAddr addr = vaddr;
551    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
552    // Check if we're in long mode or not
553    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
554    dataSize = 8;
555    Addr topAddr;
556    if (efer.lma) {
557        // Do long mode.
558        state = LongPML4;
559        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
560        enableNX = efer.nxe;
561    } else {
562        // We're in some flavor of legacy mode.
563        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
564        if (cr4.pae) {
565            // Do legacy PAE.
566            state = PAEPDP;
567            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
568            enableNX = efer.nxe;
569        } else {
570            dataSize = 4;
571            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
572            if (cr4.pse) {
573                // Do legacy PSE.
574                state = PSEPD;
575            } else {
576                // Do legacy non PSE.
577                state = PD;
578            }
579            enableNX = false;
580        }
581    }
582
583    nextState = Ready;
584    entry.vaddr = vaddr;
585
586    Request::Flags flags = Request::PHYSICAL;
587    if (cr3.pcd)
588        flags.set(Request::UNCACHEABLE);
589    RequestPtr request = new Request(topAddr, dataSize, flags,
590                                     walker->masterId);
591    read = new Packet(request, MemCmd::ReadReq);
592    read->allocate();
593}
594
595bool
596Walker::WalkerState::recvPacket(PacketPtr pkt)
597{
598    assert(pkt->isResponse());
599    assert(inflight);
600    assert(state == Waiting);
601    assert(!read);
602    inflight--;
603    if (pkt->isRead()) {
604        // @todo someone should pay for this
605        pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
606
607        state = nextState;
608        nextState = Ready;
609        PacketPtr write = NULL;
610        read = pkt;
611        timingFault = stepWalk(write);
612        state = Waiting;
613        assert(timingFault == NoFault || read == NULL);
614        if (write) {
615            writes.push_back(write);
616        }
617        sendPackets();
618    } else {
619        sendPackets();
620    }
621    if (inflight == 0 && read == NULL && writes.size() == 0) {
622        state = Ready;
623        nextState = Waiting;
624        if (timingFault == NoFault) {
625            /*
626             * Finish the translation. Now that we now the right entry is
627             * in the TLB, this should work with no memory accesses.
628             * There could be new faults unrelated to the table walk like
629             * permissions violations, so we'll need the return value as
630             * well.
631             */
632            bool delayedResponse;
633            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
634                                                 delayedResponse, true);
635            assert(!delayedResponse);
636            // Let the CPU continue.
637            translation->finish(fault, req, tc, mode);
638        } else {
639            // There was a fault during the walk. Let the CPU know.
640            translation->finish(timingFault, req, tc, mode);
641        }
642        return true;
643    }
644
645    return false;
646}
647
648void
649Walker::WalkerState::sendPackets()
650{
651    //If we're already waiting for the port to become available, just return.
652    if (retrying)
653        return;
654
655    //Reads always have priority
656    if (read) {
657        PacketPtr pkt = read;
658        read = NULL;
659        inflight++;
660        if (!walker->sendTiming(this, pkt)) {
661            retrying = true;
662            read = pkt;
663            inflight--;
664            return;
665        }
666    }
667    //Send off as many of the writes as we can.
668    while (writes.size()) {
669        PacketPtr write = writes.back();
670        writes.pop_back();
671        inflight++;
672        if (!walker->sendTiming(this, write)) {
673            retrying = true;
674            writes.push_back(write);
675            inflight--;
676            return;
677        }
678    }
679}
680
681bool
682Walker::WalkerState::isRetrying()
683{
684    return retrying;
685}
686
687bool
688Walker::WalkerState::isTiming()
689{
690    return timing;
691}
692
693bool
694Walker::WalkerState::wasStarted()
695{
696    return started;
697}
698
699void
700Walker::WalkerState::retry()
701{
702    retrying = false;
703    sendPackets();
704}
705
706Fault
707Walker::WalkerState::pageFault(bool present)
708{
709    DPRINTF(PageTableWalker, "Raising page fault.\n");
710    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
711    if (mode == BaseTLB::Execute && !enableNX)
712        mode = BaseTLB::Read;
713    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
714}
715
716/* end namespace X86ISA */ }
717
718X86ISA::Walker *
719X86PagetableWalkerParams::create()
720{
721    return new X86ISA::Walker(this);
722}
723