pagetable_walker.cc revision 8711:c7e14f52c682
12199SN/A/*
22199SN/A * Copyright (c) 2007 The Hewlett-Packard Development Company
32199SN/A * All rights reserved.
42199SN/A *
52199SN/A * The license below extends only to copyright in the software and shall
62199SN/A * not be construed as granting a license to any other intellectual
72199SN/A * property including but not limited to intellectual property relating
82199SN/A * to a hardware implementation of the functionality of the software
92199SN/A * licensed hereunder.  You may use the software subject to the license
102199SN/A * terms below provided that you ensure that this notice is replicated
112199SN/A * unmodified and in its entirety in all distributions of the software,
122199SN/A * modified or unmodified, in source code or in binary form.
132199SN/A *
142199SN/A * Redistribution and use in source and binary forms, with or without
152199SN/A * modification, are permitted provided that the following conditions are
162199SN/A * met: redistributions of source code must retain the above copyright
172199SN/A * notice, this list of conditions and the following disclaimer;
182199SN/A * redistributions in binary form must reproduce the above copyright
192199SN/A * notice, this list of conditions and the following disclaimer in the
202199SN/A * documentation and/or other materials provided with the distribution;
212199SN/A * neither the name of the copyright holders nor the names of its
222199SN/A * contributors may be used to endorse or promote products derived from
232199SN/A * this software without specific prior written permission.
242199SN/A *
252199SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
262199SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
272665Ssaidi@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
282665Ssaidi@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
292199SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
302199SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
312202SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
322202SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
332199SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
342584SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
352474SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
362199SN/A *
372199SN/A * Authors: Gabe Black
382474SN/A */
392199SN/A
402202SN/A#include "arch/x86/pagetable.hh"
412474SN/A#include "arch/x86/pagetable_walker.hh"
422199SN/A#include "arch/x86/tlb.hh"
432199SN/A#include "arch/x86/vtophys.hh"
442199SN/A#include "base/bitfield.hh"
452202SN/A#include "cpu/base.hh"
462199SN/A#include "cpu/thread_context.hh"
472458SN/A#include "debug/PageTableWalker.hh"
482199SN/A#include "mem/packet_access.hh"
492199SN/A#include "mem/request.hh"
502199SN/A#include "sim/system.hh"
512199SN/A
522199SN/Anamespace X86ISA {
532199SN/A
542199SN/A// Unfortunately, the placement of the base field in a page table entry is
552199SN/A// very erratic and would make a mess here. It might be moved here at some
562199SN/A// point in the future.
572199SN/ABitUnion64(PageTableEntry)
582199SN/A    Bitfield<63> nx;
592199SN/A    Bitfield<11, 9> avl;
602199SN/A    Bitfield<8> g;
612199SN/A    Bitfield<7> ps;
622199SN/A    Bitfield<6> d;
632561SN/A    Bitfield<5> a;
642561SN/A    Bitfield<4> pcd;
652561SN/A    Bitfield<3> pwt;
662474SN/A    Bitfield<2> u;
672199SN/A    Bitfield<1> w;
68    Bitfield<0> p;
69EndBitUnion(PageTableEntry)
70
71Fault
72Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
73              RequestPtr _req, BaseTLB::Mode _mode)
74{
75    // TODO: in timing mode, instead of blocking when there are other
76    // outstanding requests, see if this request can be coalesced with
77    // another one (i.e. either coalesce or start walk)
78    WalkerState * newState = new WalkerState(this, _translation, _req);
79    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
80    if (currStates.size()) {
81        assert(newState->isTiming());
82        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
83        currStates.push_back(newState);
84        return NoFault;
85    } else {
86        currStates.push_back(newState);
87        Fault fault = newState->startWalk();
88        if (!newState->isTiming()) {
89            currStates.pop_front();
90            delete newState;
91        }
92        return fault;
93    }
94}
95
96Fault
97Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize,
98              BaseTLB::Mode _mode)
99{
100    funcState.initState(_tc, _mode);
101    return funcState.startFunctional(addr, pageSize);
102}
103
104bool
105Walker::WalkerPort::recvTiming(PacketPtr pkt)
106{
107    return walker->recvTiming(pkt);
108}
109
110bool
111Walker::recvTiming(PacketPtr pkt)
112{
113    if (pkt->isResponse() || pkt->wasNacked()) {
114        WalkerSenderState * senderState =
115                dynamic_cast<WalkerSenderState *>(pkt->senderState);
116        pkt->senderState = senderState->saved;
117        WalkerState * senderWalk = senderState->senderWalk;
118        bool walkComplete = senderWalk->recvPacket(pkt);
119        delete senderState;
120        if (walkComplete) {
121            std::list<WalkerState *>::iterator iter;
122            for (iter = currStates.begin(); iter != currStates.end(); iter++) {
123                WalkerState * walkerState = *(iter);
124                if (walkerState == senderWalk) {
125                    iter = currStates.erase(iter);
126                    break;
127                }
128            }
129            delete senderWalk;
130            // Since we block requests when another is outstanding, we
131            // need to check if there is a waiting request to be serviced
132            if (currStates.size()) {
133                WalkerState * newState = currStates.front();
134                if (!newState->wasStarted())
135                    newState->startWalk();
136            }
137        }
138    } else {
139        DPRINTF(PageTableWalker, "Received strange packet\n");
140    }
141    return true;
142}
143
144Tick
145Walker::WalkerPort::recvAtomic(PacketPtr pkt)
146{
147    return 0;
148}
149
150void
151Walker::WalkerPort::recvFunctional(PacketPtr pkt)
152{
153    return;
154}
155
156void
157Walker::WalkerPort::recvRangeChange()
158{
159}
160
161void
162Walker::WalkerPort::recvRetry()
163{
164    walker->recvRetry();
165}
166
167void
168Walker::recvRetry()
169{
170    std::list<WalkerState *>::iterator iter;
171    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
172        WalkerState * walkerState = *(iter);
173        if (walkerState->isRetrying()) {
174            walkerState->retry();
175        }
176    }
177}
178
179bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
180{
181    pkt->senderState = new WalkerSenderState(sendingState, pkt->senderState);
182    return port.sendTiming(pkt);
183}
184
185Port *
186Walker::getPort(const std::string &if_name, int idx)
187{
188    if (if_name == "port")
189        return &port;
190    else
191        panic("No page table walker port named %s!\n", if_name);
192}
193
194void
195Walker::WalkerState::initState(ThreadContext * _tc,
196        BaseTLB::Mode _mode, bool _isTiming)
197{
198    assert(state == Ready);
199    started = false;
200    tc = _tc;
201    mode = _mode;
202    timing = _isTiming;
203}
204
205Fault
206Walker::WalkerState::startWalk()
207{
208    Fault fault = NoFault;
209    assert(started == false);
210    started = true;
211    setupWalk(req->getVaddr());
212    if (timing) {
213        nextState = state;
214        state = Waiting;
215        timingFault = NoFault;
216        sendPackets();
217    } else {
218        do {
219            walker->port.sendAtomic(read);
220            PacketPtr write = NULL;
221            fault = stepWalk(write);
222            assert(fault == NoFault || read == NULL);
223            state = nextState;
224            nextState = Ready;
225            if (write)
226                walker->port.sendAtomic(write);
227        } while(read);
228        state = Ready;
229        nextState = Waiting;
230    }
231    return fault;
232}
233
234Fault
235Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize)
236{
237    Fault fault = NoFault;
238    assert(started == false);
239    started = true;
240    setupWalk(addr);
241
242    do {
243        walker->port.sendFunctional(read);
244        // On a functional access (page table lookup), writes should
245        // not happen so this pointer is ignored after stepWalk
246        PacketPtr write = NULL;
247        fault = stepWalk(write);
248        assert(fault == NoFault || read == NULL);
249        state = nextState;
250        nextState = Ready;
251    } while(read);
252    pageSize = entry.size;
253    addr = entry.paddr;
254
255    return fault;
256}
257
258Fault
259Walker::WalkerState::stepWalk(PacketPtr &write)
260{
261    assert(state != Ready && state != Waiting);
262    Fault fault = NoFault;
263    write = NULL;
264    PageTableEntry pte;
265    if (dataSize == 8)
266        pte = read->get<uint64_t>();
267    else
268        pte = read->get<uint32_t>();
269    VAddr vaddr = entry.vaddr;
270    bool uncacheable = pte.pcd;
271    Addr nextRead = 0;
272    bool doWrite = false;
273    bool doTLBInsert = false;
274    bool doEndWalk = false;
275    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
276    switch(state) {
277      case LongPML4:
278        DPRINTF(PageTableWalker,
279                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
280        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
281        doWrite = !pte.a;
282        pte.a = 1;
283        entry.writable = pte.w;
284        entry.user = pte.u;
285        if (badNX || !pte.p) {
286            doEndWalk = true;
287            fault = pageFault(pte.p);
288            break;
289        }
290        entry.noExec = pte.nx;
291        nextState = LongPDP;
292        break;
293      case LongPDP:
294        DPRINTF(PageTableWalker,
295                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
296        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
297        doWrite = !pte.a;
298        pte.a = 1;
299        entry.writable = entry.writable && pte.w;
300        entry.user = entry.user && pte.u;
301        if (badNX || !pte.p) {
302            doEndWalk = true;
303            fault = pageFault(pte.p);
304            break;
305        }
306        nextState = LongPD;
307        break;
308      case LongPD:
309        DPRINTF(PageTableWalker,
310                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
311        doWrite = !pte.a;
312        pte.a = 1;
313        entry.writable = entry.writable && pte.w;
314        entry.user = entry.user && pte.u;
315        if (badNX || !pte.p) {
316            doEndWalk = true;
317            fault = pageFault(pte.p);
318            break;
319        }
320        if (!pte.ps) {
321            // 4 KB page
322            entry.size = 4 * (1 << 10);
323            nextRead =
324                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
325            nextState = LongPTE;
326            break;
327        } else {
328            // 2 MB page
329            entry.size = 2 * (1 << 20);
330            entry.paddr = (uint64_t)pte & (mask(31) << 21);
331            entry.uncacheable = uncacheable;
332            entry.global = pte.g;
333            entry.patBit = bits(pte, 12);
334            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
335            doTLBInsert = true;
336            doEndWalk = true;
337            break;
338        }
339      case LongPTE:
340        DPRINTF(PageTableWalker,
341                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
342        doWrite = !pte.a;
343        pte.a = 1;
344        entry.writable = entry.writable && pte.w;
345        entry.user = entry.user && pte.u;
346        if (badNX || !pte.p) {
347            doEndWalk = true;
348            fault = pageFault(pte.p);
349            break;
350        }
351        entry.paddr = (uint64_t)pte & (mask(40) << 12);
352        entry.uncacheable = uncacheable;
353        entry.global = pte.g;
354        entry.patBit = bits(pte, 12);
355        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
356        doTLBInsert = true;
357        doEndWalk = true;
358        break;
359      case PAEPDP:
360        DPRINTF(PageTableWalker,
361                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
362        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
363        if (!pte.p) {
364            doEndWalk = true;
365            fault = pageFault(pte.p);
366            break;
367        }
368        nextState = PAEPD;
369        break;
370      case PAEPD:
371        DPRINTF(PageTableWalker,
372                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
373        doWrite = !pte.a;
374        pte.a = 1;
375        entry.writable = pte.w;
376        entry.user = pte.u;
377        if (badNX || !pte.p) {
378            doEndWalk = true;
379            fault = pageFault(pte.p);
380            break;
381        }
382        if (!pte.ps) {
383            // 4 KB page
384            entry.size = 4 * (1 << 10);
385            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
386            nextState = PAEPTE;
387            break;
388        } else {
389            // 2 MB page
390            entry.size = 2 * (1 << 20);
391            entry.paddr = (uint64_t)pte & (mask(31) << 21);
392            entry.uncacheable = uncacheable;
393            entry.global = pte.g;
394            entry.patBit = bits(pte, 12);
395            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
396            doTLBInsert = true;
397            doEndWalk = true;
398            break;
399        }
400      case PAEPTE:
401        DPRINTF(PageTableWalker,
402                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
403        doWrite = !pte.a;
404        pte.a = 1;
405        entry.writable = entry.writable && pte.w;
406        entry.user = entry.user && pte.u;
407        if (badNX || !pte.p) {
408            doEndWalk = true;
409            fault = pageFault(pte.p);
410            break;
411        }
412        entry.paddr = (uint64_t)pte & (mask(40) << 12);
413        entry.uncacheable = uncacheable;
414        entry.global = pte.g;
415        entry.patBit = bits(pte, 7);
416        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
417        doTLBInsert = true;
418        doEndWalk = true;
419        break;
420      case PSEPD:
421        DPRINTF(PageTableWalker,
422                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
423        doWrite = !pte.a;
424        pte.a = 1;
425        entry.writable = pte.w;
426        entry.user = pte.u;
427        if (!pte.p) {
428            doEndWalk = true;
429            fault = pageFault(pte.p);
430            break;
431        }
432        if (!pte.ps) {
433            // 4 KB page
434            entry.size = 4 * (1 << 10);
435            nextRead =
436                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
437            nextState = PTE;
438            break;
439        } else {
440            // 4 MB page
441            entry.size = 4 * (1 << 20);
442            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
443            entry.uncacheable = uncacheable;
444            entry.global = pte.g;
445            entry.patBit = bits(pte, 12);
446            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
447            doTLBInsert = true;
448            doEndWalk = true;
449            break;
450        }
451      case PD:
452        DPRINTF(PageTableWalker,
453                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
454        doWrite = !pte.a;
455        pte.a = 1;
456        entry.writable = pte.w;
457        entry.user = pte.u;
458        if (!pte.p) {
459            doEndWalk = true;
460            fault = pageFault(pte.p);
461            break;
462        }
463        // 4 KB page
464        entry.size = 4 * (1 << 10);
465        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
466        nextState = PTE;
467        break;
468      case PTE:
469        DPRINTF(PageTableWalker,
470                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
471        doWrite = !pte.a;
472        pte.a = 1;
473        entry.writable = pte.w;
474        entry.user = pte.u;
475        if (!pte.p) {
476            doEndWalk = true;
477            fault = pageFault(pte.p);
478            break;
479        }
480        entry.paddr = (uint64_t)pte & (mask(20) << 12);
481        entry.uncacheable = uncacheable;
482        entry.global = pte.g;
483        entry.patBit = bits(pte, 7);
484        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
485        doTLBInsert = true;
486        doEndWalk = true;
487        break;
488      default:
489        panic("Unknown page table walker state %d!\n");
490    }
491    if (doEndWalk) {
492        if (doTLBInsert)
493            if (!functional)
494                walker->tlb->insert(entry.vaddr, entry);
495        endWalk();
496    } else {
497        PacketPtr oldRead = read;
498        //If we didn't return, we're setting up another read.
499        Request::Flags flags = oldRead->req->getFlags();
500        flags.set(Request::UNCACHEABLE, uncacheable);
501        RequestPtr request =
502            new Request(nextRead, oldRead->getSize(), flags);
503        read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
504        read->allocate();
505        // If we need to write, adjust the read packet to write the modified
506        // value back to memory.
507        if (doWrite) {
508            write = oldRead;
509            write->set<uint64_t>(pte);
510            write->cmd = MemCmd::WriteReq;
511            write->setDest(Packet::Broadcast);
512        } else {
513            write = NULL;
514            delete oldRead->req;
515            delete oldRead;
516        }
517    }
518    return fault;
519}
520
521void
522Walker::WalkerState::endWalk()
523{
524    nextState = Ready;
525    delete read->req;
526    delete read;
527    read = NULL;
528}
529
530void
531Walker::WalkerState::setupWalk(Addr vaddr)
532{
533    VAddr addr = vaddr;
534    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
535    // Check if we're in long mode or not
536    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
537    dataSize = 8;
538    Addr topAddr;
539    if (efer.lma) {
540        // Do long mode.
541        state = LongPML4;
542        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
543        enableNX = efer.nxe;
544    } else {
545        // We're in some flavor of legacy mode.
546        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
547        if (cr4.pae) {
548            // Do legacy PAE.
549            state = PAEPDP;
550            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
551            enableNX = efer.nxe;
552        } else {
553            dataSize = 4;
554            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
555            if (cr4.pse) {
556                // Do legacy PSE.
557                state = PSEPD;
558            } else {
559                // Do legacy non PSE.
560                state = PD;
561            }
562            enableNX = false;
563        }
564    }
565
566    nextState = Ready;
567    entry.vaddr = vaddr;
568
569    Request::Flags flags = Request::PHYSICAL;
570    if (cr3.pcd)
571        flags.set(Request::UNCACHEABLE);
572    RequestPtr request = new Request(topAddr, dataSize, flags);
573    read = new Packet(request, MemCmd::ReadReq, Packet::Broadcast);
574    read->allocate();
575}
576
577bool
578Walker::WalkerState::recvPacket(PacketPtr pkt)
579{
580    if (pkt->isResponse() && !pkt->wasNacked()) {
581        assert(inflight);
582        assert(state == Waiting);
583        assert(!read);
584        inflight--;
585        if (pkt->isRead()) {
586            state = nextState;
587            nextState = Ready;
588            PacketPtr write = NULL;
589            read = pkt;
590            timingFault = stepWalk(write);
591            state = Waiting;
592            assert(timingFault == NoFault || read == NULL);
593            if (write) {
594                writes.push_back(write);
595            }
596            sendPackets();
597        } else {
598            sendPackets();
599        }
600        if (inflight == 0 && read == NULL && writes.size() == 0) {
601            state = Ready;
602            nextState = Waiting;
603            if (timingFault == NoFault) {
604                /*
605                 * Finish the translation. Now that we now the right entry is
606                 * in the TLB, this should work with no memory accesses.
607                 * There could be new faults unrelated to the table walk like
608                 * permissions violations, so we'll need the return value as
609                 * well.
610                 */
611                bool delayedResponse;
612                Fault fault = walker->tlb->translate(req, tc, NULL, mode,
613                        delayedResponse, true);
614                assert(!delayedResponse);
615                // Let the CPU continue.
616                translation->finish(fault, req, tc, mode);
617            } else {
618                // There was a fault during the walk. Let the CPU know.
619                translation->finish(timingFault, req, tc, mode);
620            }
621            return true;
622        }
623    } else if (pkt->wasNacked()) {
624        DPRINTF(PageTableWalker, "Request was nacked. Entering retry state\n");
625        pkt->reinitNacked();
626        if (!walker->sendTiming(this, pkt)) {
627            inflight--;
628            retrying = true;
629            if (pkt->isWrite()) {
630                writes.push_back(pkt);
631            } else {
632                assert(!read);
633                read = pkt;
634            }
635        }
636    }
637    return false;
638}
639
640void
641Walker::WalkerState::sendPackets()
642{
643    //If we're already waiting for the port to become available, just return.
644    if (retrying)
645        return;
646
647    //Reads always have priority
648    if (read) {
649        PacketPtr pkt = read;
650        read = NULL;
651        inflight++;
652        if (!walker->sendTiming(this, pkt)) {
653            retrying = true;
654            read = pkt;
655            inflight--;
656            return;
657        }
658    }
659    //Send off as many of the writes as we can.
660    while (writes.size()) {
661        PacketPtr write = writes.back();
662        writes.pop_back();
663        inflight++;
664        if (!walker->sendTiming(this, write)) {
665            retrying = true;
666            writes.push_back(write);
667            inflight--;
668            return;
669        }
670    }
671}
672
673bool
674Walker::WalkerState::isRetrying()
675{
676    return retrying;
677}
678
679bool
680Walker::WalkerState::isTiming()
681{
682    return timing;
683}
684
685bool
686Walker::WalkerState::wasStarted()
687{
688    return started;
689}
690
691void
692Walker::WalkerState::retry()
693{
694    retrying = false;
695    sendPackets();
696}
697
698Fault
699Walker::WalkerState::pageFault(bool present)
700{
701    DPRINTF(PageTableWalker, "Raising page fault.\n");
702    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
703    if (mode == BaseTLB::Execute && !enableNX)
704        mode = BaseTLB::Read;
705    return new PageFault(entry.vaddr, present, mode, m5reg.cpl == 3, false);
706}
707
708/* end namespace X86ISA */ }
709
710X86ISA::Walker *
711X86PagetableWalkerParams::create()
712{
713    return new X86ISA::Walker(this);
714}
715