pagetable_walker.cc revision 10654:e49bf4884c59
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include <memory>
53
54#include "arch/x86/pagetable.hh"
55#include "arch/x86/pagetable_walker.hh"
56#include "arch/x86/tlb.hh"
57#include "arch/x86/vtophys.hh"
58#include "base/bitfield.hh"
59#include "base/trie.hh"
60#include "cpu/base.hh"
61#include "cpu/thread_context.hh"
62#include "debug/PageTableWalker.hh"
63#include "mem/packet_access.hh"
64#include "mem/request.hh"
65
66namespace X86ISA {
67
68Fault
69Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
70              RequestPtr _req, BaseTLB::Mode _mode)
71{
72    // TODO: in timing mode, instead of blocking when there are other
73    // outstanding requests, see if this request can be coalesced with
74    // another one (i.e. either coalesce or start walk)
75    WalkerState * newState = new WalkerState(this, _translation, _req);
76    newState->initState(_tc, _mode, sys->isTimingMode());
77    if (currStates.size()) {
78        assert(newState->isTiming());
79        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
80        currStates.push_back(newState);
81        return NoFault;
82    } else {
83        currStates.push_back(newState);
84        Fault fault = newState->startWalk();
85        if (!newState->isTiming()) {
86            currStates.pop_front();
87            delete newState;
88        }
89        return fault;
90    }
91}
92
93Fault
94Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
95              BaseTLB::Mode _mode)
96{
97    funcState.initState(_tc, _mode);
98    return funcState.startFunctional(addr, logBytes);
99}
100
101bool
102Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
103{
104    return walker->recvTimingResp(pkt);
105}
106
107bool
108Walker::recvTimingResp(PacketPtr pkt)
109{
110    WalkerSenderState * senderState =
111        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
112    WalkerState * senderWalk = senderState->senderWalk;
113    bool walkComplete = senderWalk->recvPacket(pkt);
114    delete senderState;
115    if (walkComplete) {
116        std::list<WalkerState *>::iterator iter;
117        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
118            WalkerState * walkerState = *(iter);
119            if (walkerState == senderWalk) {
120                iter = currStates.erase(iter);
121                break;
122            }
123        }
124        delete senderWalk;
125        // Since we block requests when another is outstanding, we
126        // need to check if there is a waiting request to be serviced
127        if (currStates.size() && !startWalkWrapperEvent.scheduled())
128            // delay sending any new requests until we are finished
129            // with the responses
130            schedule(startWalkWrapperEvent, clockEdge());
131    }
132    return true;
133}
134
135void
136Walker::WalkerPort::recvRetry()
137{
138    walker->recvRetry();
139}
140
141void
142Walker::recvRetry()
143{
144    std::list<WalkerState *>::iterator iter;
145    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
146        WalkerState * walkerState = *(iter);
147        if (walkerState->isRetrying()) {
148            walkerState->retry();
149        }
150    }
151}
152
153bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
154{
155    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
156    pkt->pushSenderState(walker_state);
157    if (port.sendTimingReq(pkt)) {
158        return true;
159    } else {
160        // undo the adding of the sender state and delete it, as we
161        // will do it again the next time we attempt to send it
162        pkt->popSenderState();
163        delete walker_state;
164        return false;
165    }
166
167}
168
169BaseMasterPort &
170Walker::getMasterPort(const std::string &if_name, PortID idx)
171{
172    if (if_name == "port")
173        return port;
174    else
175        return MemObject::getMasterPort(if_name, idx);
176}
177
178void
179Walker::WalkerState::initState(ThreadContext * _tc,
180        BaseTLB::Mode _mode, bool _isTiming)
181{
182    assert(state == Ready);
183    started = false;
184    tc = _tc;
185    mode = _mode;
186    timing = _isTiming;
187}
188
189void
190Walker::startWalkWrapper()
191{
192    unsigned num_squashed = 0;
193    WalkerState *currState = currStates.front();
194    while ((num_squashed < numSquashable) && currState &&
195        currState->translation->squashed()) {
196        currStates.pop_front();
197        num_squashed++;
198
199        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
200            currState->req->getVaddr());
201
202        // finish the translation which will delete the translation object
203        currState->translation->finish(
204            std::make_shared<UnimpFault>("Squashed Inst"),
205            currState->req, currState->tc, currState->mode);
206
207        // delete the current request
208        delete currState;
209
210        // check the next translation request, if it exists
211        if (currStates.size())
212            currState = currStates.front();
213        else
214            currState = NULL;
215    }
216    if (currState && !currState->wasStarted())
217        currState->startWalk();
218}
219
220Fault
221Walker::WalkerState::startWalk()
222{
223    Fault fault = NoFault;
224    assert(!started);
225    started = true;
226    setupWalk(req->getVaddr());
227    if (timing) {
228        nextState = state;
229        state = Waiting;
230        timingFault = NoFault;
231        sendPackets();
232    } else {
233        do {
234            walker->port.sendAtomic(read);
235            PacketPtr write = NULL;
236            fault = stepWalk(write);
237            assert(fault == NoFault || read == NULL);
238            state = nextState;
239            nextState = Ready;
240            if (write)
241                walker->port.sendAtomic(write);
242        } while(read);
243        state = Ready;
244        nextState = Waiting;
245    }
246    return fault;
247}
248
249Fault
250Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
251{
252    Fault fault = NoFault;
253    assert(!started);
254    started = true;
255    setupWalk(addr);
256
257    do {
258        walker->port.sendFunctional(read);
259        // On a functional access (page table lookup), writes should
260        // not happen so this pointer is ignored after stepWalk
261        PacketPtr write = NULL;
262        fault = stepWalk(write);
263        assert(fault == NoFault || read == NULL);
264        state = nextState;
265        nextState = Ready;
266    } while(read);
267    logBytes = entry.logBytes;
268    addr = entry.paddr;
269
270    return fault;
271}
272
273Fault
274Walker::WalkerState::stepWalk(PacketPtr &write)
275{
276    assert(state != Ready && state != Waiting);
277    Fault fault = NoFault;
278    write = NULL;
279    PageTableEntry pte;
280    if (dataSize == 8)
281        pte = read->get<uint64_t>();
282    else
283        pte = read->get<uint32_t>();
284    VAddr vaddr = entry.vaddr;
285    bool uncacheable = pte.pcd;
286    Addr nextRead = 0;
287    bool doWrite = false;
288    bool doTLBInsert = false;
289    bool doEndWalk = false;
290    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
291    switch(state) {
292      case LongPML4:
293        DPRINTF(PageTableWalker,
294                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
295        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
296        doWrite = !pte.a;
297        pte.a = 1;
298        entry.writable = pte.w;
299        entry.user = pte.u;
300        if (badNX || !pte.p) {
301            doEndWalk = true;
302            fault = pageFault(pte.p);
303            break;
304        }
305        entry.noExec = pte.nx;
306        nextState = LongPDP;
307        break;
308      case LongPDP:
309        DPRINTF(PageTableWalker,
310                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
311        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
312        doWrite = !pte.a;
313        pte.a = 1;
314        entry.writable = entry.writable && pte.w;
315        entry.user = entry.user && pte.u;
316        if (badNX || !pte.p) {
317            doEndWalk = true;
318            fault = pageFault(pte.p);
319            break;
320        }
321        nextState = LongPD;
322        break;
323      case LongPD:
324        DPRINTF(PageTableWalker,
325                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
326        doWrite = !pte.a;
327        pte.a = 1;
328        entry.writable = entry.writable && pte.w;
329        entry.user = entry.user && pte.u;
330        if (badNX || !pte.p) {
331            doEndWalk = true;
332            fault = pageFault(pte.p);
333            break;
334        }
335        if (!pte.ps) {
336            // 4 KB page
337            entry.logBytes = 12;
338            nextRead =
339                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
340            nextState = LongPTE;
341            break;
342        } else {
343            // 2 MB page
344            entry.logBytes = 21;
345            entry.paddr = (uint64_t)pte & (mask(31) << 21);
346            entry.uncacheable = uncacheable;
347            entry.global = pte.g;
348            entry.patBit = bits(pte, 12);
349            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
350            doTLBInsert = true;
351            doEndWalk = true;
352            break;
353        }
354      case LongPTE:
355        DPRINTF(PageTableWalker,
356                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
357        doWrite = !pte.a;
358        pte.a = 1;
359        entry.writable = entry.writable && pte.w;
360        entry.user = entry.user && pte.u;
361        if (badNX || !pte.p) {
362            doEndWalk = true;
363            fault = pageFault(pte.p);
364            break;
365        }
366        entry.paddr = (uint64_t)pte & (mask(40) << 12);
367        entry.uncacheable = uncacheable;
368        entry.global = pte.g;
369        entry.patBit = bits(pte, 12);
370        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
371        doTLBInsert = true;
372        doEndWalk = true;
373        break;
374      case PAEPDP:
375        DPRINTF(PageTableWalker,
376                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
377        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
378        if (!pte.p) {
379            doEndWalk = true;
380            fault = pageFault(pte.p);
381            break;
382        }
383        nextState = PAEPD;
384        break;
385      case PAEPD:
386        DPRINTF(PageTableWalker,
387                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
388        doWrite = !pte.a;
389        pte.a = 1;
390        entry.writable = pte.w;
391        entry.user = pte.u;
392        if (badNX || !pte.p) {
393            doEndWalk = true;
394            fault = pageFault(pte.p);
395            break;
396        }
397        if (!pte.ps) {
398            // 4 KB page
399            entry.logBytes = 12;
400            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
401            nextState = PAEPTE;
402            break;
403        } else {
404            // 2 MB page
405            entry.logBytes = 21;
406            entry.paddr = (uint64_t)pte & (mask(31) << 21);
407            entry.uncacheable = uncacheable;
408            entry.global = pte.g;
409            entry.patBit = bits(pte, 12);
410            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
411            doTLBInsert = true;
412            doEndWalk = true;
413            break;
414        }
415      case PAEPTE:
416        DPRINTF(PageTableWalker,
417                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
418        doWrite = !pte.a;
419        pte.a = 1;
420        entry.writable = entry.writable && pte.w;
421        entry.user = entry.user && pte.u;
422        if (badNX || !pte.p) {
423            doEndWalk = true;
424            fault = pageFault(pte.p);
425            break;
426        }
427        entry.paddr = (uint64_t)pte & (mask(40) << 12);
428        entry.uncacheable = uncacheable;
429        entry.global = pte.g;
430        entry.patBit = bits(pte, 7);
431        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
432        doTLBInsert = true;
433        doEndWalk = true;
434        break;
435      case PSEPD:
436        DPRINTF(PageTableWalker,
437                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
438        doWrite = !pte.a;
439        pte.a = 1;
440        entry.writable = pte.w;
441        entry.user = pte.u;
442        if (!pte.p) {
443            doEndWalk = true;
444            fault = pageFault(pte.p);
445            break;
446        }
447        if (!pte.ps) {
448            // 4 KB page
449            entry.logBytes = 12;
450            nextRead =
451                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
452            nextState = PTE;
453            break;
454        } else {
455            // 4 MB page
456            entry.logBytes = 21;
457            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
458            entry.uncacheable = uncacheable;
459            entry.global = pte.g;
460            entry.patBit = bits(pte, 12);
461            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
462            doTLBInsert = true;
463            doEndWalk = true;
464            break;
465        }
466      case PD:
467        DPRINTF(PageTableWalker,
468                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
469        doWrite = !pte.a;
470        pte.a = 1;
471        entry.writable = pte.w;
472        entry.user = pte.u;
473        if (!pte.p) {
474            doEndWalk = true;
475            fault = pageFault(pte.p);
476            break;
477        }
478        // 4 KB page
479        entry.logBytes = 12;
480        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
481        nextState = PTE;
482        break;
483      case PTE:
484        DPRINTF(PageTableWalker,
485                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
486        doWrite = !pte.a;
487        pte.a = 1;
488        entry.writable = pte.w;
489        entry.user = pte.u;
490        if (!pte.p) {
491            doEndWalk = true;
492            fault = pageFault(pte.p);
493            break;
494        }
495        entry.paddr = (uint64_t)pte & (mask(20) << 12);
496        entry.uncacheable = uncacheable;
497        entry.global = pte.g;
498        entry.patBit = bits(pte, 7);
499        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
500        doTLBInsert = true;
501        doEndWalk = true;
502        break;
503      default:
504        panic("Unknown page table walker state %d!\n");
505    }
506    if (doEndWalk) {
507        if (doTLBInsert)
508            if (!functional)
509                walker->tlb->insert(entry.vaddr, entry);
510        endWalk();
511    } else {
512        PacketPtr oldRead = read;
513        //If we didn't return, we're setting up another read.
514        Request::Flags flags = oldRead->req->getFlags();
515        flags.set(Request::UNCACHEABLE, uncacheable);
516        RequestPtr request =
517            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
518        read = new Packet(request, MemCmd::ReadReq);
519        read->allocate();
520        // If we need to write, adjust the read packet to write the modified
521        // value back to memory.
522        if (doWrite) {
523            write = oldRead;
524            write->set<uint64_t>(pte);
525            write->cmd = MemCmd::WriteReq;
526            write->clearDest();
527        } else {
528            write = NULL;
529            delete oldRead->req;
530            delete oldRead;
531        }
532    }
533    return fault;
534}
535
536void
537Walker::WalkerState::endWalk()
538{
539    nextState = Ready;
540    delete read->req;
541    delete read;
542    read = NULL;
543}
544
545void
546Walker::WalkerState::setupWalk(Addr vaddr)
547{
548    VAddr addr = vaddr;
549    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
550    // Check if we're in long mode or not
551    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
552    dataSize = 8;
553    Addr topAddr;
554    if (efer.lma) {
555        // Do long mode.
556        state = LongPML4;
557        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
558        enableNX = efer.nxe;
559    } else {
560        // We're in some flavor of legacy mode.
561        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
562        if (cr4.pae) {
563            // Do legacy PAE.
564            state = PAEPDP;
565            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
566            enableNX = efer.nxe;
567        } else {
568            dataSize = 4;
569            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
570            if (cr4.pse) {
571                // Do legacy PSE.
572                state = PSEPD;
573            } else {
574                // Do legacy non PSE.
575                state = PD;
576            }
577            enableNX = false;
578        }
579    }
580
581    nextState = Ready;
582    entry.vaddr = vaddr;
583
584    Request::Flags flags = Request::PHYSICAL;
585    if (cr3.pcd)
586        flags.set(Request::UNCACHEABLE);
587    RequestPtr request = new Request(topAddr, dataSize, flags,
588                                     walker->masterId);
589    read = new Packet(request, MemCmd::ReadReq);
590    read->allocate();
591}
592
593bool
594Walker::WalkerState::recvPacket(PacketPtr pkt)
595{
596    assert(pkt->isResponse());
597    assert(inflight);
598    assert(state == Waiting);
599    inflight--;
600    if (pkt->isRead()) {
601        // should not have a pending read it we also had one outstanding
602        assert(!read);
603
604        // @todo someone should pay for this
605        pkt->firstWordDelay = pkt->lastWordDelay = 0;
606
607        state = nextState;
608        nextState = Ready;
609        PacketPtr write = NULL;
610        read = pkt;
611        timingFault = stepWalk(write);
612        state = Waiting;
613        assert(timingFault == NoFault || read == NULL);
614        if (write) {
615            writes.push_back(write);
616        }
617        sendPackets();
618    } else {
619        sendPackets();
620    }
621    if (inflight == 0 && read == NULL && writes.size() == 0) {
622        state = Ready;
623        nextState = Waiting;
624        if (timingFault == NoFault) {
625            /*
626             * Finish the translation. Now that we now the right entry is
627             * in the TLB, this should work with no memory accesses.
628             * There could be new faults unrelated to the table walk like
629             * permissions violations, so we'll need the return value as
630             * well.
631             */
632            bool delayedResponse;
633            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
634                                                 delayedResponse, true);
635            assert(!delayedResponse);
636            // Let the CPU continue.
637            translation->finish(fault, req, tc, mode);
638        } else {
639            // There was a fault during the walk. Let the CPU know.
640            translation->finish(timingFault, req, tc, mode);
641        }
642        return true;
643    }
644
645    return false;
646}
647
648void
649Walker::WalkerState::sendPackets()
650{
651    //If we're already waiting for the port to become available, just return.
652    if (retrying)
653        return;
654
655    //Reads always have priority
656    if (read) {
657        PacketPtr pkt = read;
658        read = NULL;
659        inflight++;
660        if (!walker->sendTiming(this, pkt)) {
661            retrying = true;
662            read = pkt;
663            inflight--;
664            return;
665        }
666    }
667    //Send off as many of the writes as we can.
668    while (writes.size()) {
669        PacketPtr write = writes.back();
670        writes.pop_back();
671        inflight++;
672        if (!walker->sendTiming(this, write)) {
673            retrying = true;
674            writes.push_back(write);
675            inflight--;
676            return;
677        }
678    }
679}
680
681bool
682Walker::WalkerState::isRetrying()
683{
684    return retrying;
685}
686
687bool
688Walker::WalkerState::isTiming()
689{
690    return timing;
691}
692
693bool
694Walker::WalkerState::wasStarted()
695{
696    return started;
697}
698
699void
700Walker::WalkerState::retry()
701{
702    retrying = false;
703    sendPackets();
704}
705
706Fault
707Walker::WalkerState::pageFault(bool present)
708{
709    DPRINTF(PageTableWalker, "Raising page fault.\n");
710    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
711    if (mode == BaseTLB::Execute && !enableNX)
712        mode = BaseTLB::Read;
713    return std::make_shared<PageFault>(entry.vaddr, present, mode,
714                                       m5reg.cpl == 3, false);
715}
716
717/* end namespace X86ISA */ }
718
719X86ISA::Walker *
720X86PagetableWalkerParams::create()
721{
722    return new X86ISA::Walker(this);
723}
724