pagetable_walker.cc revision 13784
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable_walker.hh"
53
54#include <memory>
55
56#include "arch/x86/pagetable.hh"
57#include "arch/x86/tlb.hh"
58#include "arch/x86/vtophys.hh"
59#include "base/bitfield.hh"
60#include "base/trie.hh"
61#include "cpu/base.hh"
62#include "cpu/thread_context.hh"
63#include "debug/PageTableWalker.hh"
64#include "mem/packet_access.hh"
65#include "mem/request.hh"
66
67namespace X86ISA {
68
69Fault
70Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
71              const RequestPtr &_req, BaseTLB::Mode _mode)
72{
73    // TODO: in timing mode, instead of blocking when there are other
74    // outstanding requests, see if this request can be coalesced with
75    // another one (i.e. either coalesce or start walk)
76    WalkerState * newState = new WalkerState(this, _translation, _req);
77    newState->initState(_tc, _mode, sys->isTimingMode());
78    if (currStates.size()) {
79        assert(newState->isTiming());
80        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
81        currStates.push_back(newState);
82        return NoFault;
83    } else {
84        currStates.push_back(newState);
85        Fault fault = newState->startWalk();
86        if (!newState->isTiming()) {
87            currStates.pop_front();
88            delete newState;
89        }
90        return fault;
91    }
92}
93
94Fault
95Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
96              BaseTLB::Mode _mode)
97{
98    funcState.initState(_tc, _mode);
99    return funcState.startFunctional(addr, logBytes);
100}
101
102bool
103Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
104{
105    return walker->recvTimingResp(pkt);
106}
107
108bool
109Walker::recvTimingResp(PacketPtr pkt)
110{
111    WalkerSenderState * senderState =
112        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
113    WalkerState * senderWalk = senderState->senderWalk;
114    bool walkComplete = senderWalk->recvPacket(pkt);
115    delete senderState;
116    if (walkComplete) {
117        std::list<WalkerState *>::iterator iter;
118        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
119            WalkerState * walkerState = *(iter);
120            if (walkerState == senderWalk) {
121                iter = currStates.erase(iter);
122                break;
123            }
124        }
125        delete senderWalk;
126        // Since we block requests when another is outstanding, we
127        // need to check if there is a waiting request to be serviced
128        if (currStates.size() && !startWalkWrapperEvent.scheduled())
129            // delay sending any new requests until we are finished
130            // with the responses
131            schedule(startWalkWrapperEvent, clockEdge());
132    }
133    return true;
134}
135
136void
137Walker::WalkerPort::recvReqRetry()
138{
139    walker->recvReqRetry();
140}
141
142void
143Walker::recvReqRetry()
144{
145    std::list<WalkerState *>::iterator iter;
146    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
147        WalkerState * walkerState = *(iter);
148        if (walkerState->isRetrying()) {
149            walkerState->retry();
150        }
151    }
152}
153
154bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
155{
156    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
157    pkt->pushSenderState(walker_state);
158    if (port.sendTimingReq(pkt)) {
159        return true;
160    } else {
161        // undo the adding of the sender state and delete it, as we
162        // will do it again the next time we attempt to send it
163        pkt->popSenderState();
164        delete walker_state;
165        return false;
166    }
167
168}
169
170Port &
171Walker::getPort(const std::string &if_name, PortID idx)
172{
173    if (if_name == "port")
174        return port;
175    else
176        return MemObject::getPort(if_name, idx);
177}
178
179void
180Walker::WalkerState::initState(ThreadContext * _tc,
181        BaseTLB::Mode _mode, bool _isTiming)
182{
183    assert(state == Ready);
184    started = false;
185    tc = _tc;
186    mode = _mode;
187    timing = _isTiming;
188}
189
190void
191Walker::startWalkWrapper()
192{
193    unsigned num_squashed = 0;
194    WalkerState *currState = currStates.front();
195    while ((num_squashed < numSquashable) && currState &&
196        currState->translation->squashed()) {
197        currStates.pop_front();
198        num_squashed++;
199
200        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
201            currState->req->getVaddr());
202
203        // finish the translation which will delete the translation object
204        currState->translation->finish(
205            std::make_shared<UnimpFault>("Squashed Inst"),
206            currState->req, currState->tc, currState->mode);
207
208        // delete the current request
209        delete currState;
210
211        // check the next translation request, if it exists
212        if (currStates.size())
213            currState = currStates.front();
214        else
215            currState = NULL;
216    }
217    if (currState && !currState->wasStarted())
218        currState->startWalk();
219}
220
221Fault
222Walker::WalkerState::startWalk()
223{
224    Fault fault = NoFault;
225    assert(!started);
226    started = true;
227    setupWalk(req->getVaddr());
228    if (timing) {
229        nextState = state;
230        state = Waiting;
231        timingFault = NoFault;
232        sendPackets();
233    } else {
234        do {
235            walker->port.sendAtomic(read);
236            PacketPtr write = NULL;
237            fault = stepWalk(write);
238            assert(fault == NoFault || read == NULL);
239            state = nextState;
240            nextState = Ready;
241            if (write)
242                walker->port.sendAtomic(write);
243        } while (read);
244        state = Ready;
245        nextState = Waiting;
246    }
247    return fault;
248}
249
250Fault
251Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
252{
253    Fault fault = NoFault;
254    assert(!started);
255    started = true;
256    setupWalk(addr);
257
258    do {
259        walker->port.sendFunctional(read);
260        // On a functional access (page table lookup), writes should
261        // not happen so this pointer is ignored after stepWalk
262        PacketPtr write = NULL;
263        fault = stepWalk(write);
264        assert(fault == NoFault || read == NULL);
265        state = nextState;
266        nextState = Ready;
267    } while (read);
268    logBytes = entry.logBytes;
269    addr = entry.paddr;
270
271    return fault;
272}
273
274Fault
275Walker::WalkerState::stepWalk(PacketPtr &write)
276{
277    assert(state != Ready && state != Waiting);
278    Fault fault = NoFault;
279    write = NULL;
280    PageTableEntry pte;
281    if (dataSize == 8)
282        pte = read->getLE<uint64_t>();
283    else
284        pte = read->getLE<uint32_t>();
285    VAddr vaddr = entry.vaddr;
286    bool uncacheable = pte.pcd;
287    Addr nextRead = 0;
288    bool doWrite = false;
289    bool doTLBInsert = false;
290    bool doEndWalk = false;
291    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
292    switch(state) {
293      case LongPML4:
294        DPRINTF(PageTableWalker,
295                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
296        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
297        doWrite = !pte.a;
298        pte.a = 1;
299        entry.writable = pte.w;
300        entry.user = pte.u;
301        if (badNX || !pte.p) {
302            doEndWalk = true;
303            fault = pageFault(pte.p);
304            break;
305        }
306        entry.noExec = pte.nx;
307        nextState = LongPDP;
308        break;
309      case LongPDP:
310        DPRINTF(PageTableWalker,
311                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
312        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
313        doWrite = !pte.a;
314        pte.a = 1;
315        entry.writable = entry.writable && pte.w;
316        entry.user = entry.user && pte.u;
317        if (badNX || !pte.p) {
318            doEndWalk = true;
319            fault = pageFault(pte.p);
320            break;
321        }
322        nextState = LongPD;
323        break;
324      case LongPD:
325        DPRINTF(PageTableWalker,
326                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
327        doWrite = !pte.a;
328        pte.a = 1;
329        entry.writable = entry.writable && pte.w;
330        entry.user = entry.user && pte.u;
331        if (badNX || !pte.p) {
332            doEndWalk = true;
333            fault = pageFault(pte.p);
334            break;
335        }
336        if (!pte.ps) {
337            // 4 KB page
338            entry.logBytes = 12;
339            nextRead =
340                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
341            nextState = LongPTE;
342            break;
343        } else {
344            // 2 MB page
345            entry.logBytes = 21;
346            entry.paddr = (uint64_t)pte & (mask(31) << 21);
347            entry.uncacheable = uncacheable;
348            entry.global = pte.g;
349            entry.patBit = bits(pte, 12);
350            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
351            doTLBInsert = true;
352            doEndWalk = true;
353            break;
354        }
355      case LongPTE:
356        DPRINTF(PageTableWalker,
357                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
358        doWrite = !pte.a;
359        pte.a = 1;
360        entry.writable = entry.writable && pte.w;
361        entry.user = entry.user && pte.u;
362        if (badNX || !pte.p) {
363            doEndWalk = true;
364            fault = pageFault(pte.p);
365            break;
366        }
367        entry.paddr = (uint64_t)pte & (mask(40) << 12);
368        entry.uncacheable = uncacheable;
369        entry.global = pte.g;
370        entry.patBit = bits(pte, 12);
371        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
372        doTLBInsert = true;
373        doEndWalk = true;
374        break;
375      case PAEPDP:
376        DPRINTF(PageTableWalker,
377                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
378        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
379        if (!pte.p) {
380            doEndWalk = true;
381            fault = pageFault(pte.p);
382            break;
383        }
384        nextState = PAEPD;
385        break;
386      case PAEPD:
387        DPRINTF(PageTableWalker,
388                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
389        doWrite = !pte.a;
390        pte.a = 1;
391        entry.writable = pte.w;
392        entry.user = pte.u;
393        if (badNX || !pte.p) {
394            doEndWalk = true;
395            fault = pageFault(pte.p);
396            break;
397        }
398        if (!pte.ps) {
399            // 4 KB page
400            entry.logBytes = 12;
401            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
402            nextState = PAEPTE;
403            break;
404        } else {
405            // 2 MB page
406            entry.logBytes = 21;
407            entry.paddr = (uint64_t)pte & (mask(31) << 21);
408            entry.uncacheable = uncacheable;
409            entry.global = pte.g;
410            entry.patBit = bits(pte, 12);
411            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
412            doTLBInsert = true;
413            doEndWalk = true;
414            break;
415        }
416      case PAEPTE:
417        DPRINTF(PageTableWalker,
418                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
419        doWrite = !pte.a;
420        pte.a = 1;
421        entry.writable = entry.writable && pte.w;
422        entry.user = entry.user && pte.u;
423        if (badNX || !pte.p) {
424            doEndWalk = true;
425            fault = pageFault(pte.p);
426            break;
427        }
428        entry.paddr = (uint64_t)pte & (mask(40) << 12);
429        entry.uncacheable = uncacheable;
430        entry.global = pte.g;
431        entry.patBit = bits(pte, 7);
432        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
433        doTLBInsert = true;
434        doEndWalk = true;
435        break;
436      case PSEPD:
437        DPRINTF(PageTableWalker,
438                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
439        doWrite = !pte.a;
440        pte.a = 1;
441        entry.writable = pte.w;
442        entry.user = pte.u;
443        if (!pte.p) {
444            doEndWalk = true;
445            fault = pageFault(pte.p);
446            break;
447        }
448        if (!pte.ps) {
449            // 4 KB page
450            entry.logBytes = 12;
451            nextRead =
452                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
453            nextState = PTE;
454            break;
455        } else {
456            // 4 MB page
457            entry.logBytes = 21;
458            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
459            entry.uncacheable = uncacheable;
460            entry.global = pte.g;
461            entry.patBit = bits(pte, 12);
462            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
463            doTLBInsert = true;
464            doEndWalk = true;
465            break;
466        }
467      case PD:
468        DPRINTF(PageTableWalker,
469                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
470        doWrite = !pte.a;
471        pte.a = 1;
472        entry.writable = pte.w;
473        entry.user = pte.u;
474        if (!pte.p) {
475            doEndWalk = true;
476            fault = pageFault(pte.p);
477            break;
478        }
479        // 4 KB page
480        entry.logBytes = 12;
481        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
482        nextState = PTE;
483        break;
484      case PTE:
485        DPRINTF(PageTableWalker,
486                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
487        doWrite = !pte.a;
488        pte.a = 1;
489        entry.writable = pte.w;
490        entry.user = pte.u;
491        if (!pte.p) {
492            doEndWalk = true;
493            fault = pageFault(pte.p);
494            break;
495        }
496        entry.paddr = (uint64_t)pte & (mask(20) << 12);
497        entry.uncacheable = uncacheable;
498        entry.global = pte.g;
499        entry.patBit = bits(pte, 7);
500        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
501        doTLBInsert = true;
502        doEndWalk = true;
503        break;
504      default:
505        panic("Unknown page table walker state %d!\n");
506    }
507    if (doEndWalk) {
508        if (doTLBInsert)
509            if (!functional)
510                walker->tlb->insert(entry.vaddr, entry);
511        endWalk();
512    } else {
513        PacketPtr oldRead = read;
514        //If we didn't return, we're setting up another read.
515        Request::Flags flags = oldRead->req->getFlags();
516        flags.set(Request::UNCACHEABLE, uncacheable);
517        RequestPtr request = std::make_shared<Request>(
518            nextRead, oldRead->getSize(), flags, walker->masterId);
519        read = new Packet(request, MemCmd::ReadReq);
520        read->allocate();
521        // If we need to write, adjust the read packet to write the modified
522        // value back to memory.
523        if (doWrite) {
524            write = oldRead;
525            write->setLE<uint64_t>(pte);
526            write->cmd = MemCmd::WriteReq;
527        } else {
528            write = NULL;
529            delete oldRead;
530        }
531    }
532    return fault;
533}
534
535void
536Walker::WalkerState::endWalk()
537{
538    nextState = Ready;
539    delete read;
540    read = NULL;
541}
542
543void
544Walker::WalkerState::setupWalk(Addr vaddr)
545{
546    VAddr addr = vaddr;
547    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
548    // Check if we're in long mode or not
549    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
550    dataSize = 8;
551    Addr topAddr;
552    if (efer.lma) {
553        // Do long mode.
554        state = LongPML4;
555        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
556        enableNX = efer.nxe;
557    } else {
558        // We're in some flavor of legacy mode.
559        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
560        if (cr4.pae) {
561            // Do legacy PAE.
562            state = PAEPDP;
563            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
564            enableNX = efer.nxe;
565        } else {
566            dataSize = 4;
567            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
568            if (cr4.pse) {
569                // Do legacy PSE.
570                state = PSEPD;
571            } else {
572                // Do legacy non PSE.
573                state = PD;
574            }
575            enableNX = false;
576        }
577    }
578
579    nextState = Ready;
580    entry.vaddr = vaddr;
581
582    Request::Flags flags = Request::PHYSICAL;
583    if (cr3.pcd)
584        flags.set(Request::UNCACHEABLE);
585
586    RequestPtr request = std::make_shared<Request>(
587        topAddr, dataSize, flags, walker->masterId);
588
589    read = new Packet(request, MemCmd::ReadReq);
590    read->allocate();
591}
592
593bool
594Walker::WalkerState::recvPacket(PacketPtr pkt)
595{
596    assert(pkt->isResponse());
597    assert(inflight);
598    assert(state == Waiting);
599    inflight--;
600    if (pkt->isRead()) {
601        // should not have a pending read it we also had one outstanding
602        assert(!read);
603
604        // @todo someone should pay for this
605        pkt->headerDelay = pkt->payloadDelay = 0;
606
607        state = nextState;
608        nextState = Ready;
609        PacketPtr write = NULL;
610        read = pkt;
611        timingFault = stepWalk(write);
612        state = Waiting;
613        assert(timingFault == NoFault || read == NULL);
614        if (write) {
615            writes.push_back(write);
616        }
617        sendPackets();
618    } else {
619        sendPackets();
620    }
621    if (inflight == 0 && read == NULL && writes.size() == 0) {
622        state = Ready;
623        nextState = Waiting;
624        if (timingFault == NoFault) {
625            /*
626             * Finish the translation. Now that we know the right entry is
627             * in the TLB, this should work with no memory accesses.
628             * There could be new faults unrelated to the table walk like
629             * permissions violations, so we'll need the return value as
630             * well.
631             */
632            bool delayedResponse;
633            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
634                                                 delayedResponse, true);
635            assert(!delayedResponse);
636            // Let the CPU continue.
637            translation->finish(fault, req, tc, mode);
638        } else {
639            // There was a fault during the walk. Let the CPU know.
640            translation->finish(timingFault, req, tc, mode);
641        }
642        return true;
643    }
644
645    return false;
646}
647
648void
649Walker::WalkerState::sendPackets()
650{
651    //If we're already waiting for the port to become available, just return.
652    if (retrying)
653        return;
654
655    //Reads always have priority
656    if (read) {
657        PacketPtr pkt = read;
658        read = NULL;
659        inflight++;
660        if (!walker->sendTiming(this, pkt)) {
661            retrying = true;
662            read = pkt;
663            inflight--;
664            return;
665        }
666    }
667    //Send off as many of the writes as we can.
668    while (writes.size()) {
669        PacketPtr write = writes.back();
670        writes.pop_back();
671        inflight++;
672        if (!walker->sendTiming(this, write)) {
673            retrying = true;
674            writes.push_back(write);
675            inflight--;
676            return;
677        }
678    }
679}
680
681bool
682Walker::WalkerState::isRetrying()
683{
684    return retrying;
685}
686
687bool
688Walker::WalkerState::isTiming()
689{
690    return timing;
691}
692
693bool
694Walker::WalkerState::wasStarted()
695{
696    return started;
697}
698
699void
700Walker::WalkerState::retry()
701{
702    retrying = false;
703    sendPackets();
704}
705
706Fault
707Walker::WalkerState::pageFault(bool present)
708{
709    DPRINTF(PageTableWalker, "Raising page fault.\n");
710    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
711    if (mode == BaseTLB::Execute && !enableNX)
712        mode = BaseTLB::Read;
713    return std::make_shared<PageFault>(entry.vaddr, present, mode,
714                                       m5reg.cpl == 3, false);
715}
716
717/* end namespace X86ISA */ }
718
719X86ISA::Walker *
720X86PagetableWalkerParams::create()
721{
722    return new X86ISA::Walker(this);
723}
724