pagetable_walker.cc revision 11321:02e930db812d
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include <memory>
53
54#include "arch/x86/pagetable.hh"
55#include "arch/x86/pagetable_walker.hh"
56#include "arch/x86/tlb.hh"
57#include "arch/x86/vtophys.hh"
58#include "base/bitfield.hh"
59#include "base/trie.hh"
60#include "cpu/base.hh"
61#include "cpu/thread_context.hh"
62#include "debug/PageTableWalker.hh"
63#include "mem/packet_access.hh"
64#include "mem/request.hh"
65
66namespace X86ISA {
67
68Fault
69Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
70              RequestPtr _req, BaseTLB::Mode _mode)
71{
72    // TODO: in timing mode, instead of blocking when there are other
73    // outstanding requests, see if this request can be coalesced with
74    // another one (i.e. either coalesce or start walk)
75    WalkerState * newState = new WalkerState(this, _translation, _req);
76    newState->initState(_tc, _mode, sys->isTimingMode());
77    if (currStates.size()) {
78        assert(newState->isTiming());
79        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
80        currStates.push_back(newState);
81        return NoFault;
82    } else {
83        currStates.push_back(newState);
84        Fault fault = newState->startWalk();
85        if (!newState->isTiming()) {
86            currStates.pop_front();
87            delete newState;
88        }
89        return fault;
90    }
91}
92
93Fault
94Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
95              BaseTLB::Mode _mode)
96{
97    funcState.initState(_tc, _mode);
98    return funcState.startFunctional(addr, logBytes);
99}
100
101bool
102Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
103{
104    return walker->recvTimingResp(pkt);
105}
106
107bool
108Walker::recvTimingResp(PacketPtr pkt)
109{
110    WalkerSenderState * senderState =
111        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
112    WalkerState * senderWalk = senderState->senderWalk;
113    bool walkComplete = senderWalk->recvPacket(pkt);
114    delete senderState;
115    if (walkComplete) {
116        std::list<WalkerState *>::iterator iter;
117        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
118            WalkerState * walkerState = *(iter);
119            if (walkerState == senderWalk) {
120                iter = currStates.erase(iter);
121                break;
122            }
123        }
124        delete senderWalk;
125        // Since we block requests when another is outstanding, we
126        // need to check if there is a waiting request to be serviced
127        if (currStates.size() && !startWalkWrapperEvent.scheduled())
128            // delay sending any new requests until we are finished
129            // with the responses
130            schedule(startWalkWrapperEvent, clockEdge());
131    }
132    return true;
133}
134
135void
136Walker::WalkerPort::recvReqRetry()
137{
138    walker->recvReqRetry();
139}
140
141void
142Walker::recvReqRetry()
143{
144    std::list<WalkerState *>::iterator iter;
145    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
146        WalkerState * walkerState = *(iter);
147        if (walkerState->isRetrying()) {
148            walkerState->retry();
149        }
150    }
151}
152
153bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
154{
155    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
156    pkt->pushSenderState(walker_state);
157    if (port.sendTimingReq(pkt)) {
158        return true;
159    } else {
160        // undo the adding of the sender state and delete it, as we
161        // will do it again the next time we attempt to send it
162        pkt->popSenderState();
163        delete walker_state;
164        return false;
165    }
166
167}
168
169BaseMasterPort &
170Walker::getMasterPort(const std::string &if_name, PortID idx)
171{
172    if (if_name == "port")
173        return port;
174    else
175        return MemObject::getMasterPort(if_name, idx);
176}
177
178void
179Walker::WalkerState::initState(ThreadContext * _tc,
180        BaseTLB::Mode _mode, bool _isTiming)
181{
182    assert(state == Ready);
183    started = false;
184    tc = _tc;
185    mode = _mode;
186    timing = _isTiming;
187}
188
189void
190Walker::startWalkWrapper()
191{
192    unsigned num_squashed = 0;
193    WalkerState *currState = currStates.front();
194    while ((num_squashed < numSquashable) && currState &&
195        currState->translation->squashed()) {
196        currStates.pop_front();
197        num_squashed++;
198
199        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
200            currState->req->getVaddr());
201
202        // finish the translation which will delete the translation object
203        currState->translation->finish(
204            std::make_shared<UnimpFault>("Squashed Inst"),
205            currState->req, currState->tc, currState->mode);
206
207        // delete the current request
208        delete currState;
209
210        // check the next translation request, if it exists
211        if (currStates.size())
212            currState = currStates.front();
213        else
214            currState = NULL;
215    }
216    if (currState && !currState->wasStarted())
217        currState->startWalk();
218}
219
220Fault
221Walker::WalkerState::startWalk()
222{
223    Fault fault = NoFault;
224    assert(!started);
225    started = true;
226    setupWalk(req->getVaddr());
227    if (timing) {
228        nextState = state;
229        state = Waiting;
230        timingFault = NoFault;
231        sendPackets();
232    } else {
233        do {
234            walker->port.sendAtomic(read);
235            PacketPtr write = NULL;
236            fault = stepWalk(write);
237            assert(fault == NoFault || read == NULL);
238            state = nextState;
239            nextState = Ready;
240            if (write)
241                walker->port.sendAtomic(write);
242        } while (read);
243        state = Ready;
244        nextState = Waiting;
245    }
246    return fault;
247}
248
249Fault
250Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
251{
252    Fault fault = NoFault;
253    assert(!started);
254    started = true;
255    setupWalk(addr);
256
257    do {
258        walker->port.sendFunctional(read);
259        // On a functional access (page table lookup), writes should
260        // not happen so this pointer is ignored after stepWalk
261        PacketPtr write = NULL;
262        fault = stepWalk(write);
263        assert(fault == NoFault || read == NULL);
264        state = nextState;
265        nextState = Ready;
266    } while (read);
267    logBytes = entry.logBytes;
268    addr = entry.paddr;
269
270    return fault;
271}
272
273Fault
274Walker::WalkerState::stepWalk(PacketPtr &write)
275{
276    assert(state != Ready && state != Waiting);
277    Fault fault = NoFault;
278    write = NULL;
279    PageTableEntry pte;
280    if (dataSize == 8)
281        pte = read->get<uint64_t>();
282    else
283        pte = read->get<uint32_t>();
284    VAddr vaddr = entry.vaddr;
285    bool uncacheable = pte.pcd;
286    Addr nextRead = 0;
287    bool doWrite = false;
288    bool doTLBInsert = false;
289    bool doEndWalk = false;
290    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
291    switch(state) {
292      case LongPML4:
293        DPRINTF(PageTableWalker,
294                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
295        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
296        doWrite = !pte.a;
297        pte.a = 1;
298        entry.writable = pte.w;
299        entry.user = pte.u;
300        if (badNX || !pte.p) {
301            doEndWalk = true;
302            fault = pageFault(pte.p);
303            break;
304        }
305        entry.noExec = pte.nx;
306        nextState = LongPDP;
307        break;
308      case LongPDP:
309        DPRINTF(PageTableWalker,
310                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
311        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
312        doWrite = !pte.a;
313        pte.a = 1;
314        entry.writable = entry.writable && pte.w;
315        entry.user = entry.user && pte.u;
316        if (badNX || !pte.p) {
317            doEndWalk = true;
318            fault = pageFault(pte.p);
319            break;
320        }
321        nextState = LongPD;
322        break;
323      case LongPD:
324        DPRINTF(PageTableWalker,
325                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
326        doWrite = !pte.a;
327        pte.a = 1;
328        entry.writable = entry.writable && pte.w;
329        entry.user = entry.user && pte.u;
330        if (badNX || !pte.p) {
331            doEndWalk = true;
332            fault = pageFault(pte.p);
333            break;
334        }
335        if (!pte.ps) {
336            // 4 KB page
337            entry.logBytes = 12;
338            nextRead =
339                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
340            nextState = LongPTE;
341            break;
342        } else {
343            // 2 MB page
344            entry.logBytes = 21;
345            entry.paddr = (uint64_t)pte & (mask(31) << 21);
346            entry.uncacheable = uncacheable;
347            entry.global = pte.g;
348            entry.patBit = bits(pte, 12);
349            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
350            doTLBInsert = true;
351            doEndWalk = true;
352            break;
353        }
354      case LongPTE:
355        DPRINTF(PageTableWalker,
356                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
357        doWrite = !pte.a;
358        pte.a = 1;
359        entry.writable = entry.writable && pte.w;
360        entry.user = entry.user && pte.u;
361        if (badNX || !pte.p) {
362            doEndWalk = true;
363            fault = pageFault(pte.p);
364            break;
365        }
366        entry.paddr = (uint64_t)pte & (mask(40) << 12);
367        entry.uncacheable = uncacheable;
368        entry.global = pte.g;
369        entry.patBit = bits(pte, 12);
370        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
371        doTLBInsert = true;
372        doEndWalk = true;
373        break;
374      case PAEPDP:
375        DPRINTF(PageTableWalker,
376                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
377        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
378        if (!pte.p) {
379            doEndWalk = true;
380            fault = pageFault(pte.p);
381            break;
382        }
383        nextState = PAEPD;
384        break;
385      case PAEPD:
386        DPRINTF(PageTableWalker,
387                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
388        doWrite = !pte.a;
389        pte.a = 1;
390        entry.writable = pte.w;
391        entry.user = pte.u;
392        if (badNX || !pte.p) {
393            doEndWalk = true;
394            fault = pageFault(pte.p);
395            break;
396        }
397        if (!pte.ps) {
398            // 4 KB page
399            entry.logBytes = 12;
400            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
401            nextState = PAEPTE;
402            break;
403        } else {
404            // 2 MB page
405            entry.logBytes = 21;
406            entry.paddr = (uint64_t)pte & (mask(31) << 21);
407            entry.uncacheable = uncacheable;
408            entry.global = pte.g;
409            entry.patBit = bits(pte, 12);
410            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
411            doTLBInsert = true;
412            doEndWalk = true;
413            break;
414        }
415      case PAEPTE:
416        DPRINTF(PageTableWalker,
417                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
418        doWrite = !pte.a;
419        pte.a = 1;
420        entry.writable = entry.writable && pte.w;
421        entry.user = entry.user && pte.u;
422        if (badNX || !pte.p) {
423            doEndWalk = true;
424            fault = pageFault(pte.p);
425            break;
426        }
427        entry.paddr = (uint64_t)pte & (mask(40) << 12);
428        entry.uncacheable = uncacheable;
429        entry.global = pte.g;
430        entry.patBit = bits(pte, 7);
431        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
432        doTLBInsert = true;
433        doEndWalk = true;
434        break;
435      case PSEPD:
436        DPRINTF(PageTableWalker,
437                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
438        doWrite = !pte.a;
439        pte.a = 1;
440        entry.writable = pte.w;
441        entry.user = pte.u;
442        if (!pte.p) {
443            doEndWalk = true;
444            fault = pageFault(pte.p);
445            break;
446        }
447        if (!pte.ps) {
448            // 4 KB page
449            entry.logBytes = 12;
450            nextRead =
451                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
452            nextState = PTE;
453            break;
454        } else {
455            // 4 MB page
456            entry.logBytes = 21;
457            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
458            entry.uncacheable = uncacheable;
459            entry.global = pte.g;
460            entry.patBit = bits(pte, 12);
461            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
462            doTLBInsert = true;
463            doEndWalk = true;
464            break;
465        }
466      case PD:
467        DPRINTF(PageTableWalker,
468                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
469        doWrite = !pte.a;
470        pte.a = 1;
471        entry.writable = pte.w;
472        entry.user = pte.u;
473        if (!pte.p) {
474            doEndWalk = true;
475            fault = pageFault(pte.p);
476            break;
477        }
478        // 4 KB page
479        entry.logBytes = 12;
480        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
481        nextState = PTE;
482        break;
483      case PTE:
484        DPRINTF(PageTableWalker,
485                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
486        doWrite = !pte.a;
487        pte.a = 1;
488        entry.writable = pte.w;
489        entry.user = pte.u;
490        if (!pte.p) {
491            doEndWalk = true;
492            fault = pageFault(pte.p);
493            break;
494        }
495        entry.paddr = (uint64_t)pte & (mask(20) << 12);
496        entry.uncacheable = uncacheable;
497        entry.global = pte.g;
498        entry.patBit = bits(pte, 7);
499        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
500        doTLBInsert = true;
501        doEndWalk = true;
502        break;
503      default:
504        panic("Unknown page table walker state %d!\n");
505    }
506    if (doEndWalk) {
507        if (doTLBInsert)
508            if (!functional)
509                walker->tlb->insert(entry.vaddr, entry);
510        endWalk();
511    } else {
512        PacketPtr oldRead = read;
513        //If we didn't return, we're setting up another read.
514        Request::Flags flags = oldRead->req->getFlags();
515        flags.set(Request::UNCACHEABLE, uncacheable);
516        RequestPtr request =
517            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
518        read = new Packet(request, MemCmd::ReadReq);
519        read->allocate();
520        // If we need to write, adjust the read packet to write the modified
521        // value back to memory.
522        if (doWrite) {
523            write = oldRead;
524            write->set<uint64_t>(pte);
525            write->cmd = MemCmd::WriteReq;
526        } else {
527            write = NULL;
528            delete oldRead->req;
529            delete oldRead;
530        }
531    }
532    return fault;
533}
534
535void
536Walker::WalkerState::endWalk()
537{
538    nextState = Ready;
539    delete read->req;
540    delete read;
541    read = NULL;
542}
543
544void
545Walker::WalkerState::setupWalk(Addr vaddr)
546{
547    VAddr addr = vaddr;
548    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
549    // Check if we're in long mode or not
550    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
551    dataSize = 8;
552    Addr topAddr;
553    if (efer.lma) {
554        // Do long mode.
555        state = LongPML4;
556        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
557        enableNX = efer.nxe;
558    } else {
559        // We're in some flavor of legacy mode.
560        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
561        if (cr4.pae) {
562            // Do legacy PAE.
563            state = PAEPDP;
564            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
565            enableNX = efer.nxe;
566        } else {
567            dataSize = 4;
568            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
569            if (cr4.pse) {
570                // Do legacy PSE.
571                state = PSEPD;
572            } else {
573                // Do legacy non PSE.
574                state = PD;
575            }
576            enableNX = false;
577        }
578    }
579
580    nextState = Ready;
581    entry.vaddr = vaddr;
582
583    Request::Flags flags = Request::PHYSICAL;
584    if (cr3.pcd)
585        flags.set(Request::UNCACHEABLE);
586    RequestPtr request = new Request(topAddr, dataSize, flags,
587                                     walker->masterId);
588    read = new Packet(request, MemCmd::ReadReq);
589    read->allocate();
590}
591
592bool
593Walker::WalkerState::recvPacket(PacketPtr pkt)
594{
595    assert(pkt->isResponse());
596    assert(inflight);
597    assert(state == Waiting);
598    inflight--;
599    if (pkt->isRead()) {
600        // should not have a pending read it we also had one outstanding
601        assert(!read);
602
603        // @todo someone should pay for this
604        pkt->headerDelay = pkt->payloadDelay = 0;
605
606        state = nextState;
607        nextState = Ready;
608        PacketPtr write = NULL;
609        read = pkt;
610        timingFault = stepWalk(write);
611        state = Waiting;
612        assert(timingFault == NoFault || read == NULL);
613        if (write) {
614            writes.push_back(write);
615        }
616        sendPackets();
617    } else {
618        sendPackets();
619    }
620    if (inflight == 0 && read == NULL && writes.size() == 0) {
621        state = Ready;
622        nextState = Waiting;
623        if (timingFault == NoFault) {
624            /*
625             * Finish the translation. Now that we know the right entry is
626             * in the TLB, this should work with no memory accesses.
627             * There could be new faults unrelated to the table walk like
628             * permissions violations, so we'll need the return value as
629             * well.
630             */
631            bool delayedResponse;
632            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
633                                                 delayedResponse, true);
634            assert(!delayedResponse);
635            // Let the CPU continue.
636            translation->finish(fault, req, tc, mode);
637        } else {
638            // There was a fault during the walk. Let the CPU know.
639            translation->finish(timingFault, req, tc, mode);
640        }
641        return true;
642    }
643
644    return false;
645}
646
647void
648Walker::WalkerState::sendPackets()
649{
650    //If we're already waiting for the port to become available, just return.
651    if (retrying)
652        return;
653
654    //Reads always have priority
655    if (read) {
656        PacketPtr pkt = read;
657        read = NULL;
658        inflight++;
659        if (!walker->sendTiming(this, pkt)) {
660            retrying = true;
661            read = pkt;
662            inflight--;
663            return;
664        }
665    }
666    //Send off as many of the writes as we can.
667    while (writes.size()) {
668        PacketPtr write = writes.back();
669        writes.pop_back();
670        inflight++;
671        if (!walker->sendTiming(this, write)) {
672            retrying = true;
673            writes.push_back(write);
674            inflight--;
675            return;
676        }
677    }
678}
679
680bool
681Walker::WalkerState::isRetrying()
682{
683    return retrying;
684}
685
686bool
687Walker::WalkerState::isTiming()
688{
689    return timing;
690}
691
692bool
693Walker::WalkerState::wasStarted()
694{
695    return started;
696}
697
698void
699Walker::WalkerState::retry()
700{
701    retrying = false;
702    sendPackets();
703}
704
705Fault
706Walker::WalkerState::pageFault(bool present)
707{
708    DPRINTF(PageTableWalker, "Raising page fault.\n");
709    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
710    if (mode == BaseTLB::Execute && !enableNX)
711        mode = BaseTLB::Read;
712    return std::make_shared<PageFault>(entry.vaddr, present, mode,
713                                       m5reg.cpl == 3, false);
714}
715
716/* end namespace X86ISA */ }
717
718X86ISA::Walker *
719X86PagetableWalkerParams::create()
720{
721    return new X86ISA::Walker(this);
722}
723