pagetable_walker.cc revision 10474:799c8ee4ecba
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include <memory>
53
54#include "arch/x86/pagetable.hh"
55#include "arch/x86/pagetable_walker.hh"
56#include "arch/x86/tlb.hh"
57#include "arch/x86/vtophys.hh"
58#include "base/bitfield.hh"
59#include "base/trie.hh"
60#include "cpu/base.hh"
61#include "cpu/thread_context.hh"
62#include "debug/PageTableWalker.hh"
63#include "mem/packet_access.hh"
64#include "mem/request.hh"
65
66namespace X86ISA {
67
68Fault
69Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
70              RequestPtr _req, BaseTLB::Mode _mode)
71{
72    // TODO: in timing mode, instead of blocking when there are other
73    // outstanding requests, see if this request can be coalesced with
74    // another one (i.e. either coalesce or start walk)
75    WalkerState * newState = new WalkerState(this, _translation, _req);
76    newState->initState(_tc, _mode, sys->isTimingMode());
77    if (currStates.size()) {
78        assert(newState->isTiming());
79        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
80        currStates.push_back(newState);
81        return NoFault;
82    } else {
83        currStates.push_back(newState);
84        Fault fault = newState->startWalk();
85        if (!newState->isTiming()) {
86            currStates.pop_front();
87            delete newState;
88        }
89        return fault;
90    }
91}
92
93Fault
94Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
95              BaseTLB::Mode _mode)
96{
97    funcState.initState(_tc, _mode);
98    return funcState.startFunctional(addr, logBytes);
99}
100
101bool
102Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
103{
104    return walker->recvTimingResp(pkt);
105}
106
107bool
108Walker::recvTimingResp(PacketPtr pkt)
109{
110    WalkerSenderState * senderState =
111        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
112    WalkerState * senderWalk = senderState->senderWalk;
113    bool walkComplete = senderWalk->recvPacket(pkt);
114    delete senderState;
115    if (walkComplete) {
116        std::list<WalkerState *>::iterator iter;
117        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
118            WalkerState * walkerState = *(iter);
119            if (walkerState == senderWalk) {
120                iter = currStates.erase(iter);
121                break;
122            }
123        }
124        delete senderWalk;
125        // Since we block requests when another is outstanding, we
126        // need to check if there is a waiting request to be serviced
127        if (currStates.size())
128            startWalkWrapper();
129    }
130    return true;
131}
132
133void
134Walker::WalkerPort::recvRetry()
135{
136    walker->recvRetry();
137}
138
139void
140Walker::recvRetry()
141{
142    std::list<WalkerState *>::iterator iter;
143    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
144        WalkerState * walkerState = *(iter);
145        if (walkerState->isRetrying()) {
146            walkerState->retry();
147        }
148    }
149}
150
151bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
152{
153    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
154    pkt->pushSenderState(walker_state);
155    if (port.sendTimingReq(pkt)) {
156        return true;
157    } else {
158        // undo the adding of the sender state and delete it, as we
159        // will do it again the next time we attempt to send it
160        pkt->popSenderState();
161        delete walker_state;
162        return false;
163    }
164
165}
166
167BaseMasterPort &
168Walker::getMasterPort(const std::string &if_name, PortID idx)
169{
170    if (if_name == "port")
171        return port;
172    else
173        return MemObject::getMasterPort(if_name, idx);
174}
175
176void
177Walker::WalkerState::initState(ThreadContext * _tc,
178        BaseTLB::Mode _mode, bool _isTiming)
179{
180    assert(state == Ready);
181    started = false;
182    tc = _tc;
183    mode = _mode;
184    timing = _isTiming;
185}
186
187void
188Walker::startWalkWrapper()
189{
190    unsigned num_squashed = 0;
191    WalkerState *currState = currStates.front();
192    while ((num_squashed < numSquashable) && currState &&
193        currState->translation->squashed()) {
194        currStates.pop_front();
195        num_squashed++;
196
197        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
198            currState->req->getVaddr());
199
200        // finish the translation which will delete the translation object
201        currState->translation->finish(
202            std::make_shared<UnimpFault>("Squashed Inst"),
203            currState->req, currState->tc, currState->mode);
204
205        // delete the current request
206        delete currState;
207
208        // check the next translation request, if it exists
209        if (currStates.size())
210            currState = currStates.front();
211        else
212            currState = NULL;
213    }
214    if (currState && !currState->wasStarted())
215        currState->startWalk();
216}
217
218Fault
219Walker::WalkerState::startWalk()
220{
221    Fault fault = NoFault;
222    assert(!started);
223    started = true;
224    setupWalk(req->getVaddr());
225    if (timing) {
226        nextState = state;
227        state = Waiting;
228        timingFault = NoFault;
229        sendPackets();
230    } else {
231        do {
232            walker->port.sendAtomic(read);
233            PacketPtr write = NULL;
234            fault = stepWalk(write);
235            assert(fault == NoFault || read == NULL);
236            state = nextState;
237            nextState = Ready;
238            if (write)
239                walker->port.sendAtomic(write);
240        } while(read);
241        state = Ready;
242        nextState = Waiting;
243    }
244    return fault;
245}
246
247Fault
248Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
249{
250    Fault fault = NoFault;
251    assert(!started);
252    started = true;
253    setupWalk(addr);
254
255    do {
256        walker->port.sendFunctional(read);
257        // On a functional access (page table lookup), writes should
258        // not happen so this pointer is ignored after stepWalk
259        PacketPtr write = NULL;
260        fault = stepWalk(write);
261        assert(fault == NoFault || read == NULL);
262        state = nextState;
263        nextState = Ready;
264    } while(read);
265    logBytes = entry.logBytes;
266    addr = entry.paddr;
267
268    return fault;
269}
270
271Fault
272Walker::WalkerState::stepWalk(PacketPtr &write)
273{
274    assert(state != Ready && state != Waiting);
275    Fault fault = NoFault;
276    write = NULL;
277    PageTableEntry pte;
278    if (dataSize == 8)
279        pte = read->get<uint64_t>();
280    else
281        pte = read->get<uint32_t>();
282    VAddr vaddr = entry.vaddr;
283    bool uncacheable = pte.pcd;
284    Addr nextRead = 0;
285    bool doWrite = false;
286    bool doTLBInsert = false;
287    bool doEndWalk = false;
288    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
289    switch(state) {
290      case LongPML4:
291        DPRINTF(PageTableWalker,
292                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
293        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
294        doWrite = !pte.a;
295        pte.a = 1;
296        entry.writable = pte.w;
297        entry.user = pte.u;
298        if (badNX || !pte.p) {
299            doEndWalk = true;
300            fault = pageFault(pte.p);
301            break;
302        }
303        entry.noExec = pte.nx;
304        nextState = LongPDP;
305        break;
306      case LongPDP:
307        DPRINTF(PageTableWalker,
308                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
309        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
310        doWrite = !pte.a;
311        pte.a = 1;
312        entry.writable = entry.writable && pte.w;
313        entry.user = entry.user && pte.u;
314        if (badNX || !pte.p) {
315            doEndWalk = true;
316            fault = pageFault(pte.p);
317            break;
318        }
319        nextState = LongPD;
320        break;
321      case LongPD:
322        DPRINTF(PageTableWalker,
323                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
324        doWrite = !pte.a;
325        pte.a = 1;
326        entry.writable = entry.writable && pte.w;
327        entry.user = entry.user && pte.u;
328        if (badNX || !pte.p) {
329            doEndWalk = true;
330            fault = pageFault(pte.p);
331            break;
332        }
333        if (!pte.ps) {
334            // 4 KB page
335            entry.logBytes = 12;
336            nextRead =
337                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
338            nextState = LongPTE;
339            break;
340        } else {
341            // 2 MB page
342            entry.logBytes = 21;
343            entry.paddr = (uint64_t)pte & (mask(31) << 21);
344            entry.uncacheable = uncacheable;
345            entry.global = pte.g;
346            entry.patBit = bits(pte, 12);
347            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
348            doTLBInsert = true;
349            doEndWalk = true;
350            break;
351        }
352      case LongPTE:
353        DPRINTF(PageTableWalker,
354                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
355        doWrite = !pte.a;
356        pte.a = 1;
357        entry.writable = entry.writable && pte.w;
358        entry.user = entry.user && pte.u;
359        if (badNX || !pte.p) {
360            doEndWalk = true;
361            fault = pageFault(pte.p);
362            break;
363        }
364        entry.paddr = (uint64_t)pte & (mask(40) << 12);
365        entry.uncacheable = uncacheable;
366        entry.global = pte.g;
367        entry.patBit = bits(pte, 12);
368        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
369        doTLBInsert = true;
370        doEndWalk = true;
371        break;
372      case PAEPDP:
373        DPRINTF(PageTableWalker,
374                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
375        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
376        if (!pte.p) {
377            doEndWalk = true;
378            fault = pageFault(pte.p);
379            break;
380        }
381        nextState = PAEPD;
382        break;
383      case PAEPD:
384        DPRINTF(PageTableWalker,
385                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
386        doWrite = !pte.a;
387        pte.a = 1;
388        entry.writable = pte.w;
389        entry.user = pte.u;
390        if (badNX || !pte.p) {
391            doEndWalk = true;
392            fault = pageFault(pte.p);
393            break;
394        }
395        if (!pte.ps) {
396            // 4 KB page
397            entry.logBytes = 12;
398            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
399            nextState = PAEPTE;
400            break;
401        } else {
402            // 2 MB page
403            entry.logBytes = 21;
404            entry.paddr = (uint64_t)pte & (mask(31) << 21);
405            entry.uncacheable = uncacheable;
406            entry.global = pte.g;
407            entry.patBit = bits(pte, 12);
408            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
409            doTLBInsert = true;
410            doEndWalk = true;
411            break;
412        }
413      case PAEPTE:
414        DPRINTF(PageTableWalker,
415                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
416        doWrite = !pte.a;
417        pte.a = 1;
418        entry.writable = entry.writable && pte.w;
419        entry.user = entry.user && pte.u;
420        if (badNX || !pte.p) {
421            doEndWalk = true;
422            fault = pageFault(pte.p);
423            break;
424        }
425        entry.paddr = (uint64_t)pte & (mask(40) << 12);
426        entry.uncacheable = uncacheable;
427        entry.global = pte.g;
428        entry.patBit = bits(pte, 7);
429        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
430        doTLBInsert = true;
431        doEndWalk = true;
432        break;
433      case PSEPD:
434        DPRINTF(PageTableWalker,
435                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
436        doWrite = !pte.a;
437        pte.a = 1;
438        entry.writable = pte.w;
439        entry.user = pte.u;
440        if (!pte.p) {
441            doEndWalk = true;
442            fault = pageFault(pte.p);
443            break;
444        }
445        if (!pte.ps) {
446            // 4 KB page
447            entry.logBytes = 12;
448            nextRead =
449                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
450            nextState = PTE;
451            break;
452        } else {
453            // 4 MB page
454            entry.logBytes = 21;
455            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
456            entry.uncacheable = uncacheable;
457            entry.global = pte.g;
458            entry.patBit = bits(pte, 12);
459            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
460            doTLBInsert = true;
461            doEndWalk = true;
462            break;
463        }
464      case PD:
465        DPRINTF(PageTableWalker,
466                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
467        doWrite = !pte.a;
468        pte.a = 1;
469        entry.writable = pte.w;
470        entry.user = pte.u;
471        if (!pte.p) {
472            doEndWalk = true;
473            fault = pageFault(pte.p);
474            break;
475        }
476        // 4 KB page
477        entry.logBytes = 12;
478        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
479        nextState = PTE;
480        break;
481      case PTE:
482        DPRINTF(PageTableWalker,
483                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
484        doWrite = !pte.a;
485        pte.a = 1;
486        entry.writable = pte.w;
487        entry.user = pte.u;
488        if (!pte.p) {
489            doEndWalk = true;
490            fault = pageFault(pte.p);
491            break;
492        }
493        entry.paddr = (uint64_t)pte & (mask(20) << 12);
494        entry.uncacheable = uncacheable;
495        entry.global = pte.g;
496        entry.patBit = bits(pte, 7);
497        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
498        doTLBInsert = true;
499        doEndWalk = true;
500        break;
501      default:
502        panic("Unknown page table walker state %d!\n");
503    }
504    if (doEndWalk) {
505        if (doTLBInsert)
506            if (!functional)
507                walker->tlb->insert(entry.vaddr, entry);
508        endWalk();
509    } else {
510        PacketPtr oldRead = read;
511        //If we didn't return, we're setting up another read.
512        Request::Flags flags = oldRead->req->getFlags();
513        flags.set(Request::UNCACHEABLE, uncacheable);
514        RequestPtr request =
515            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
516        read = new Packet(request, MemCmd::ReadReq);
517        read->allocate();
518        // If we need to write, adjust the read packet to write the modified
519        // value back to memory.
520        if (doWrite) {
521            write = oldRead;
522            write->set<uint64_t>(pte);
523            write->cmd = MemCmd::WriteReq;
524            write->clearDest();
525        } else {
526            write = NULL;
527            delete oldRead->req;
528            delete oldRead;
529        }
530    }
531    return fault;
532}
533
534void
535Walker::WalkerState::endWalk()
536{
537    nextState = Ready;
538    delete read->req;
539    delete read;
540    read = NULL;
541}
542
543void
544Walker::WalkerState::setupWalk(Addr vaddr)
545{
546    VAddr addr = vaddr;
547    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
548    // Check if we're in long mode or not
549    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
550    dataSize = 8;
551    Addr topAddr;
552    if (efer.lma) {
553        // Do long mode.
554        state = LongPML4;
555        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
556        enableNX = efer.nxe;
557    } else {
558        // We're in some flavor of legacy mode.
559        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
560        if (cr4.pae) {
561            // Do legacy PAE.
562            state = PAEPDP;
563            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
564            enableNX = efer.nxe;
565        } else {
566            dataSize = 4;
567            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
568            if (cr4.pse) {
569                // Do legacy PSE.
570                state = PSEPD;
571            } else {
572                // Do legacy non PSE.
573                state = PD;
574            }
575            enableNX = false;
576        }
577    }
578
579    nextState = Ready;
580    entry.vaddr = vaddr;
581
582    Request::Flags flags = Request::PHYSICAL;
583    if (cr3.pcd)
584        flags.set(Request::UNCACHEABLE);
585    RequestPtr request = new Request(topAddr, dataSize, flags,
586                                     walker->masterId);
587    read = new Packet(request, MemCmd::ReadReq);
588    read->allocate();
589}
590
591bool
592Walker::WalkerState::recvPacket(PacketPtr pkt)
593{
594    assert(pkt->isResponse());
595    assert(inflight);
596    assert(state == Waiting);
597    inflight--;
598    if (pkt->isRead()) {
599        // should not have a pending read it we also had one outstanding
600        assert(!read);
601
602        // @todo someone should pay for this
603        pkt->firstWordDelay = pkt->lastWordDelay = 0;
604
605        state = nextState;
606        nextState = Ready;
607        PacketPtr write = NULL;
608        read = pkt;
609        timingFault = stepWalk(write);
610        state = Waiting;
611        assert(timingFault == NoFault || read == NULL);
612        if (write) {
613            writes.push_back(write);
614        }
615        sendPackets();
616    } else {
617        sendPackets();
618    }
619    if (inflight == 0 && read == NULL && writes.size() == 0) {
620        state = Ready;
621        nextState = Waiting;
622        if (timingFault == NoFault) {
623            /*
624             * Finish the translation. Now that we now the right entry is
625             * in the TLB, this should work with no memory accesses.
626             * There could be new faults unrelated to the table walk like
627             * permissions violations, so we'll need the return value as
628             * well.
629             */
630            bool delayedResponse;
631            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
632                                                 delayedResponse, true);
633            assert(!delayedResponse);
634            // Let the CPU continue.
635            translation->finish(fault, req, tc, mode);
636        } else {
637            // There was a fault during the walk. Let the CPU know.
638            translation->finish(timingFault, req, tc, mode);
639        }
640        return true;
641    }
642
643    return false;
644}
645
646void
647Walker::WalkerState::sendPackets()
648{
649    //If we're already waiting for the port to become available, just return.
650    if (retrying)
651        return;
652
653    //Reads always have priority
654    if (read) {
655        PacketPtr pkt = read;
656        read = NULL;
657        inflight++;
658        if (!walker->sendTiming(this, pkt)) {
659            retrying = true;
660            read = pkt;
661            inflight--;
662            return;
663        }
664    }
665    //Send off as many of the writes as we can.
666    while (writes.size()) {
667        PacketPtr write = writes.back();
668        writes.pop_back();
669        inflight++;
670        if (!walker->sendTiming(this, write)) {
671            retrying = true;
672            writes.push_back(write);
673            inflight--;
674            return;
675        }
676    }
677}
678
679bool
680Walker::WalkerState::isRetrying()
681{
682    return retrying;
683}
684
685bool
686Walker::WalkerState::isTiming()
687{
688    return timing;
689}
690
691bool
692Walker::WalkerState::wasStarted()
693{
694    return started;
695}
696
697void
698Walker::WalkerState::retry()
699{
700    retrying = false;
701    sendPackets();
702}
703
704Fault
705Walker::WalkerState::pageFault(bool present)
706{
707    DPRINTF(PageTableWalker, "Raising page fault.\n");
708    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
709    if (mode == BaseTLB::Execute && !enableNX)
710        mode = BaseTLB::Read;
711    return std::make_shared<PageFault>(entry.vaddr, present, mode,
712                                       m5reg.cpl == 3, false);
713}
714
715/* end namespace X86ISA */ }
716
717X86ISA::Walker *
718X86PagetableWalkerParams::create()
719{
720    return new X86ISA::Walker(this);
721}
722