pagetable_walker.cc revision 10405:7a618c07e663
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable.hh"
53#include "arch/x86/pagetable_walker.hh"
54#include "arch/x86/tlb.hh"
55#include "arch/x86/vtophys.hh"
56#include "base/bitfield.hh"
57#include "base/trie.hh"
58#include "cpu/base.hh"
59#include "cpu/thread_context.hh"
60#include "debug/PageTableWalker.hh"
61#include "mem/packet_access.hh"
62#include "mem/request.hh"
63
64namespace X86ISA {
65
66Fault
67Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
68              RequestPtr _req, BaseTLB::Mode _mode)
69{
70    // TODO: in timing mode, instead of blocking when there are other
71    // outstanding requests, see if this request can be coalesced with
72    // another one (i.e. either coalesce or start walk)
73    WalkerState * newState = new WalkerState(this, _translation, _req);
74    newState->initState(_tc, _mode, sys->isTimingMode());
75    if (currStates.size()) {
76        assert(newState->isTiming());
77        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
78        currStates.push_back(newState);
79        return NoFault;
80    } else {
81        currStates.push_back(newState);
82        Fault fault = newState->startWalk();
83        if (!newState->isTiming()) {
84            currStates.pop_front();
85            delete newState;
86        }
87        return fault;
88    }
89}
90
91Fault
92Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
93              BaseTLB::Mode _mode)
94{
95    funcState.initState(_tc, _mode);
96    return funcState.startFunctional(addr, logBytes);
97}
98
99bool
100Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
101{
102    return walker->recvTimingResp(pkt);
103}
104
105bool
106Walker::recvTimingResp(PacketPtr pkt)
107{
108    WalkerSenderState * senderState =
109        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
110    WalkerState * senderWalk = senderState->senderWalk;
111    bool walkComplete = senderWalk->recvPacket(pkt);
112    delete senderState;
113    if (walkComplete) {
114        std::list<WalkerState *>::iterator iter;
115        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
116            WalkerState * walkerState = *(iter);
117            if (walkerState == senderWalk) {
118                iter = currStates.erase(iter);
119                break;
120            }
121        }
122        delete senderWalk;
123        // Since we block requests when another is outstanding, we
124        // need to check if there is a waiting request to be serviced
125        if (currStates.size())
126            startWalkWrapper();
127    }
128    return true;
129}
130
131void
132Walker::WalkerPort::recvRetry()
133{
134    walker->recvRetry();
135}
136
137void
138Walker::recvRetry()
139{
140    std::list<WalkerState *>::iterator iter;
141    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
142        WalkerState * walkerState = *(iter);
143        if (walkerState->isRetrying()) {
144            walkerState->retry();
145        }
146    }
147}
148
149bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
150{
151    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
152    pkt->pushSenderState(walker_state);
153    if (port.sendTimingReq(pkt)) {
154        return true;
155    } else {
156        // undo the adding of the sender state and delete it, as we
157        // will do it again the next time we attempt to send it
158        pkt->popSenderState();
159        delete walker_state;
160        return false;
161    }
162
163}
164
165BaseMasterPort &
166Walker::getMasterPort(const std::string &if_name, PortID idx)
167{
168    if (if_name == "port")
169        return port;
170    else
171        return MemObject::getMasterPort(if_name, idx);
172}
173
174void
175Walker::WalkerState::initState(ThreadContext * _tc,
176        BaseTLB::Mode _mode, bool _isTiming)
177{
178    assert(state == Ready);
179    started = false;
180    tc = _tc;
181    mode = _mode;
182    timing = _isTiming;
183}
184
185void
186Walker::startWalkWrapper()
187{
188    unsigned num_squashed = 0;
189    WalkerState *currState = currStates.front();
190    while ((num_squashed < numSquashable) && currState &&
191        currState->translation->squashed()) {
192        currStates.pop_front();
193        num_squashed++;
194
195        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
196            currState->req->getVaddr());
197
198        // finish the translation which will delete the translation object
199        currState->translation->finish(new UnimpFault("Squashed Inst"),
200                currState->req, currState->tc, currState->mode);
201
202        // delete the current request
203        delete currState;
204
205        // check the next translation request, if it exists
206        if (currStates.size())
207            currState = currStates.front();
208        else
209            currState = NULL;
210    }
211    if (currState && !currState->wasStarted())
212        currState->startWalk();
213}
214
215Fault
216Walker::WalkerState::startWalk()
217{
218    Fault fault = NoFault;
219    assert(!started);
220    started = true;
221    setupWalk(req->getVaddr());
222    if (timing) {
223        nextState = state;
224        state = Waiting;
225        timingFault = NoFault;
226        sendPackets();
227    } else {
228        do {
229            walker->port.sendAtomic(read);
230            PacketPtr write = NULL;
231            fault = stepWalk(write);
232            assert(fault == NoFault || read == NULL);
233            state = nextState;
234            nextState = Ready;
235            if (write)
236                walker->port.sendAtomic(write);
237        } while(read);
238        state = Ready;
239        nextState = Waiting;
240    }
241    return fault;
242}
243
244Fault
245Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
246{
247    Fault fault = NoFault;
248    assert(!started);
249    started = true;
250    setupWalk(addr);
251
252    do {
253        walker->port.sendFunctional(read);
254        // On a functional access (page table lookup), writes should
255        // not happen so this pointer is ignored after stepWalk
256        PacketPtr write = NULL;
257        fault = stepWalk(write);
258        assert(fault == NoFault || read == NULL);
259        state = nextState;
260        nextState = Ready;
261    } while(read);
262    logBytes = entry.logBytes;
263    addr = entry.paddr;
264
265    return fault;
266}
267
268Fault
269Walker::WalkerState::stepWalk(PacketPtr &write)
270{
271    assert(state != Ready && state != Waiting);
272    Fault fault = NoFault;
273    write = NULL;
274    PageTableEntry pte;
275    if (dataSize == 8)
276        pte = read->get<uint64_t>();
277    else
278        pte = read->get<uint32_t>();
279    VAddr vaddr = entry.vaddr;
280    bool uncacheable = pte.pcd;
281    Addr nextRead = 0;
282    bool doWrite = false;
283    bool doTLBInsert = false;
284    bool doEndWalk = false;
285    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
286    switch(state) {
287      case LongPML4:
288        DPRINTF(PageTableWalker,
289                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
290        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
291        doWrite = !pte.a;
292        pte.a = 1;
293        entry.writable = pte.w;
294        entry.user = pte.u;
295        if (badNX || !pte.p) {
296            doEndWalk = true;
297            fault = pageFault(pte.p);
298            break;
299        }
300        entry.noExec = pte.nx;
301        nextState = LongPDP;
302        break;
303      case LongPDP:
304        DPRINTF(PageTableWalker,
305                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
306        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
307        doWrite = !pte.a;
308        pte.a = 1;
309        entry.writable = entry.writable && pte.w;
310        entry.user = entry.user && pte.u;
311        if (badNX || !pte.p) {
312            doEndWalk = true;
313            fault = pageFault(pte.p);
314            break;
315        }
316        nextState = LongPD;
317        break;
318      case LongPD:
319        DPRINTF(PageTableWalker,
320                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
321        doWrite = !pte.a;
322        pte.a = 1;
323        entry.writable = entry.writable && pte.w;
324        entry.user = entry.user && pte.u;
325        if (badNX || !pte.p) {
326            doEndWalk = true;
327            fault = pageFault(pte.p);
328            break;
329        }
330        if (!pte.ps) {
331            // 4 KB page
332            entry.logBytes = 12;
333            nextRead =
334                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
335            nextState = LongPTE;
336            break;
337        } else {
338            // 2 MB page
339            entry.logBytes = 21;
340            entry.paddr = (uint64_t)pte & (mask(31) << 21);
341            entry.uncacheable = uncacheable;
342            entry.global = pte.g;
343            entry.patBit = bits(pte, 12);
344            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
345            doTLBInsert = true;
346            doEndWalk = true;
347            break;
348        }
349      case LongPTE:
350        DPRINTF(PageTableWalker,
351                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
352        doWrite = !pte.a;
353        pte.a = 1;
354        entry.writable = entry.writable && pte.w;
355        entry.user = entry.user && pte.u;
356        if (badNX || !pte.p) {
357            doEndWalk = true;
358            fault = pageFault(pte.p);
359            break;
360        }
361        entry.paddr = (uint64_t)pte & (mask(40) << 12);
362        entry.uncacheable = uncacheable;
363        entry.global = pte.g;
364        entry.patBit = bits(pte, 12);
365        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
366        doTLBInsert = true;
367        doEndWalk = true;
368        break;
369      case PAEPDP:
370        DPRINTF(PageTableWalker,
371                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
372        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
373        if (!pte.p) {
374            doEndWalk = true;
375            fault = pageFault(pte.p);
376            break;
377        }
378        nextState = PAEPD;
379        break;
380      case PAEPD:
381        DPRINTF(PageTableWalker,
382                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
383        doWrite = !pte.a;
384        pte.a = 1;
385        entry.writable = pte.w;
386        entry.user = pte.u;
387        if (badNX || !pte.p) {
388            doEndWalk = true;
389            fault = pageFault(pte.p);
390            break;
391        }
392        if (!pte.ps) {
393            // 4 KB page
394            entry.logBytes = 12;
395            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
396            nextState = PAEPTE;
397            break;
398        } else {
399            // 2 MB page
400            entry.logBytes = 21;
401            entry.paddr = (uint64_t)pte & (mask(31) << 21);
402            entry.uncacheable = uncacheable;
403            entry.global = pte.g;
404            entry.patBit = bits(pte, 12);
405            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
406            doTLBInsert = true;
407            doEndWalk = true;
408            break;
409        }
410      case PAEPTE:
411        DPRINTF(PageTableWalker,
412                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
413        doWrite = !pte.a;
414        pte.a = 1;
415        entry.writable = entry.writable && pte.w;
416        entry.user = entry.user && pte.u;
417        if (badNX || !pte.p) {
418            doEndWalk = true;
419            fault = pageFault(pte.p);
420            break;
421        }
422        entry.paddr = (uint64_t)pte & (mask(40) << 12);
423        entry.uncacheable = uncacheable;
424        entry.global = pte.g;
425        entry.patBit = bits(pte, 7);
426        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
427        doTLBInsert = true;
428        doEndWalk = true;
429        break;
430      case PSEPD:
431        DPRINTF(PageTableWalker,
432                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
433        doWrite = !pte.a;
434        pte.a = 1;
435        entry.writable = pte.w;
436        entry.user = pte.u;
437        if (!pte.p) {
438            doEndWalk = true;
439            fault = pageFault(pte.p);
440            break;
441        }
442        if (!pte.ps) {
443            // 4 KB page
444            entry.logBytes = 12;
445            nextRead =
446                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
447            nextState = PTE;
448            break;
449        } else {
450            // 4 MB page
451            entry.logBytes = 21;
452            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
453            entry.uncacheable = uncacheable;
454            entry.global = pte.g;
455            entry.patBit = bits(pte, 12);
456            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
457            doTLBInsert = true;
458            doEndWalk = true;
459            break;
460        }
461      case PD:
462        DPRINTF(PageTableWalker,
463                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
464        doWrite = !pte.a;
465        pte.a = 1;
466        entry.writable = pte.w;
467        entry.user = pte.u;
468        if (!pte.p) {
469            doEndWalk = true;
470            fault = pageFault(pte.p);
471            break;
472        }
473        // 4 KB page
474        entry.logBytes = 12;
475        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
476        nextState = PTE;
477        break;
478      case PTE:
479        DPRINTF(PageTableWalker,
480                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
481        doWrite = !pte.a;
482        pte.a = 1;
483        entry.writable = pte.w;
484        entry.user = pte.u;
485        if (!pte.p) {
486            doEndWalk = true;
487            fault = pageFault(pte.p);
488            break;
489        }
490        entry.paddr = (uint64_t)pte & (mask(20) << 12);
491        entry.uncacheable = uncacheable;
492        entry.global = pte.g;
493        entry.patBit = bits(pte, 7);
494        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
495        doTLBInsert = true;
496        doEndWalk = true;
497        break;
498      default:
499        panic("Unknown page table walker state %d!\n");
500    }
501    if (doEndWalk) {
502        if (doTLBInsert)
503            if (!functional)
504                walker->tlb->insert(entry.vaddr, entry);
505        endWalk();
506    } else {
507        PacketPtr oldRead = read;
508        //If we didn't return, we're setting up another read.
509        Request::Flags flags = oldRead->req->getFlags();
510        flags.set(Request::UNCACHEABLE, uncacheable);
511        RequestPtr request =
512            new Request(nextRead, oldRead->getSize(), flags, walker->masterId);
513        read = new Packet(request, MemCmd::ReadReq);
514        read->allocate();
515        // If we need to write, adjust the read packet to write the modified
516        // value back to memory.
517        if (doWrite) {
518            write = oldRead;
519            write->set<uint64_t>(pte);
520            write->cmd = MemCmd::WriteReq;
521            write->clearDest();
522        } else {
523            write = NULL;
524            delete oldRead->req;
525            delete oldRead;
526        }
527    }
528    return fault;
529}
530
531void
532Walker::WalkerState::endWalk()
533{
534    nextState = Ready;
535    delete read->req;
536    delete read;
537    read = NULL;
538}
539
540void
541Walker::WalkerState::setupWalk(Addr vaddr)
542{
543    VAddr addr = vaddr;
544    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
545    // Check if we're in long mode or not
546    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
547    dataSize = 8;
548    Addr topAddr;
549    if (efer.lma) {
550        // Do long mode.
551        state = LongPML4;
552        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
553        enableNX = efer.nxe;
554    } else {
555        // We're in some flavor of legacy mode.
556        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
557        if (cr4.pae) {
558            // Do legacy PAE.
559            state = PAEPDP;
560            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
561            enableNX = efer.nxe;
562        } else {
563            dataSize = 4;
564            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
565            if (cr4.pse) {
566                // Do legacy PSE.
567                state = PSEPD;
568            } else {
569                // Do legacy non PSE.
570                state = PD;
571            }
572            enableNX = false;
573        }
574    }
575
576    nextState = Ready;
577    entry.vaddr = vaddr;
578
579    Request::Flags flags = Request::PHYSICAL;
580    if (cr3.pcd)
581        flags.set(Request::UNCACHEABLE);
582    RequestPtr request = new Request(topAddr, dataSize, flags,
583                                     walker->masterId);
584    read = new Packet(request, MemCmd::ReadReq);
585    read->allocate();
586}
587
588bool
589Walker::WalkerState::recvPacket(PacketPtr pkt)
590{
591    assert(pkt->isResponse());
592    assert(inflight);
593    assert(state == Waiting);
594    inflight--;
595    if (pkt->isRead()) {
596        // should not have a pending read it we also had one outstanding
597        assert(!read);
598
599        // @todo someone should pay for this
600        pkt->firstWordDelay = pkt->lastWordDelay = 0;
601
602        state = nextState;
603        nextState = Ready;
604        PacketPtr write = NULL;
605        read = pkt;
606        timingFault = stepWalk(write);
607        state = Waiting;
608        assert(timingFault == NoFault || read == NULL);
609        if (write) {
610            writes.push_back(write);
611        }
612        sendPackets();
613    } else {
614        sendPackets();
615    }
616    if (inflight == 0 && read == NULL && writes.size() == 0) {
617        state = Ready;
618        nextState = Waiting;
619        if (timingFault == NoFault) {
620            /*
621             * Finish the translation. Now that we now the right entry is
622             * in the TLB, this should work with no memory accesses.
623             * There could be new faults unrelated to the table walk like
624             * permissions violations, so we'll need the return value as
625             * well.
626             */
627            bool delayedResponse;
628            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
629                                                 delayedResponse, true);
630            assert(!delayedResponse);
631            // Let the CPU continue.
632            translation->finish(fault, req, tc, mode);
633        } else {
634            // There was a fault during the walk. Let the CPU know.
635            translation->finish(timingFault, req, tc, mode);
636        }
637        return true;
638    }
639
640    return false;
641}
642
643void
644Walker::WalkerState::sendPackets()
645{
646    //If we're already waiting for the port to become available, just return.
647    if (retrying)
648        return;
649
650    //Reads always have priority
651    if (read) {
652        PacketPtr pkt = read;
653        read = NULL;
654        inflight++;
655        if (!walker->sendTiming(this, pkt)) {
656            retrying = true;
657            read = pkt;
658            inflight--;
659            return;
660        }
661    }
662    //Send off as many of the writes as we can.
663    while (writes.size()) {
664        PacketPtr write = writes.back();
665        writes.pop_back();
666        inflight++;
667        if (!walker->sendTiming(this, write)) {
668            retrying = true;
669            writes.push_back(write);
670            inflight--;
671            return;
672        }
673    }
674}
675
676bool
677Walker::WalkerState::isRetrying()
678{
679    return retrying;
680}
681
682bool
683Walker::WalkerState::isTiming()
684{
685    return timing;
686}
687
688bool
689Walker::WalkerState::wasStarted()
690{
691    return started;
692}
693
694void
695Walker::WalkerState::retry()
696{
697    retrying = false;
698    sendPackets();
699}
700
701Fault
702Walker::WalkerState::pageFault(bool present)
703{
704    DPRINTF(PageTableWalker, "Raising page fault.\n");
705    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
706    if (mode == BaseTLB::Execute && !enableNX)
707        mode = BaseTLB::Read;
708    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
709}
710
711/* end namespace X86ISA */ }
712
713X86ISA::Walker *
714X86PagetableWalkerParams::create()
715{
716    return new X86ISA::Walker(this);
717}
718