pagetable_walker.cc revision 5895
1/*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * All rights reserved.
4 *
5 * Redistribution and use of this software in source and binary forms,
6 * with or without modification, are permitted provided that the
7 * following conditions are met:
8 *
9 * The software must be used only for Non-Commercial Use which means any
10 * use which is NOT directed to receiving any direct monetary
11 * compensation for, or commercial advantage from such use.  Illustrative
12 * examples of non-commercial use are academic research, personal study,
13 * teaching, education and corporate research & development.
14 * Illustrative examples of commercial use are distributing products for
15 * commercial advantage and providing services using the software for
16 * commercial advantage.
17 *
18 * If you wish to use this software or functionality therein that may be
19 * covered by patents for commercial use, please contact:
20 *     Director of Intellectual Property Licensing
21 *     Office of Strategy and Technology
22 *     Hewlett-Packard Company
23 *     1501 Page Mill Road
24 *     Palo Alto, California  94304
25 *
26 * Redistributions of source code must retain the above copyright notice,
27 * this list of conditions and the following disclaimer.  Redistributions
28 * in binary form must reproduce the above copyright notice, this list of
29 * conditions and the following disclaimer in the documentation and/or
30 * other materials provided with the distribution.  Neither the name of
31 * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
32 * contributors may be used to endorse or promote products derived from
33 * this software without specific prior written permission.  No right of
34 * sublicense is granted herewith.  Derivatives of the software and
35 * output created using the software may be prepared, but only for
36 * Non-Commercial Uses.  Derivatives of the software may be shared with
37 * others provided: (i) the others agree to abide by the list of
38 * conditions herein which includes the Non-Commercial Use restrictions;
39 * and (ii) such Derivatives of the software include the above copyright
40 * notice to acknowledge the contribution from this software where
41 * applicable, this list of conditions and the disclaimer below.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
46 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
47 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
53 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 *
55 * Authors: Gabe Black
56 */
57
58#include "arch/x86/pagetable.hh"
59#include "arch/x86/pagetable_walker.hh"
60#include "arch/x86/tlb.hh"
61#include "base/bitfield.hh"
62#include "cpu/thread_context.hh"
63#include "cpu/base.hh"
64#include "mem/packet_access.hh"
65#include "mem/request.hh"
66#include "sim/system.hh"
67
68namespace X86ISA {
69
70// Unfortunately, the placement of the base field in a page table entry is
71// very erratic and would make a mess here. It might be moved here at some
72// point in the future.
73BitUnion64(PageTableEntry)
74    Bitfield<63> nx;
75    Bitfield<11, 9> avl;
76    Bitfield<8> g;
77    Bitfield<7> ps;
78    Bitfield<6> d;
79    Bitfield<5> a;
80    Bitfield<4> pcd;
81    Bitfield<3> pwt;
82    Bitfield<2> u;
83    Bitfield<1> w;
84    Bitfield<0> p;
85EndBitUnion(PageTableEntry)
86
87Fault
88Walker::doNext(PacketPtr &read, PacketPtr &write)
89{
90    assert(state != Ready && state != Waiting);
91    write = NULL;
92    PageTableEntry pte;
93    if (size == 8)
94        pte = read->get<uint64_t>();
95    else
96        pte = read->get<uint32_t>();
97    VAddr vaddr = entry.vaddr;
98    bool uncacheable = pte.pcd;
99    Addr nextRead = 0;
100    bool doWrite = false;
101    bool badNX = pte.nx && (!tlb->allowNX() || !enableNX);
102    switch(state) {
103      case LongPML4:
104        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * size;
105        doWrite = !pte.a;
106        pte.a = 1;
107        entry.writable = pte.w;
108        entry.user = pte.u;
109        if (badNX || !pte.p) {
110            stop();
111            return pageFault(pte.p);
112        }
113        entry.noExec = pte.nx;
114        nextState = LongPDP;
115        break;
116      case LongPDP:
117        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * size;
118        doWrite = !pte.a;
119        pte.a = 1;
120        entry.writable = entry.writable && pte.w;
121        entry.user = entry.user && pte.u;
122        if (badNX || !pte.p) {
123            stop();
124            return pageFault(pte.p);
125        }
126        nextState = LongPD;
127        break;
128      case LongPD:
129        doWrite = !pte.a;
130        pte.a = 1;
131        entry.writable = entry.writable && pte.w;
132        entry.user = entry.user && pte.u;
133        if (badNX || !pte.p) {
134            stop();
135            return pageFault(pte.p);
136        }
137        if (!pte.ps) {
138            // 4 KB page
139            entry.size = 4 * (1 << 10);
140            nextRead =
141                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * size;
142            nextState = LongPTE;
143            break;
144        } else {
145            // 2 MB page
146            entry.size = 2 * (1 << 20);
147            entry.paddr = (uint64_t)pte & (mask(31) << 21);
148            entry.uncacheable = uncacheable;
149            entry.global = pte.g;
150            entry.patBit = bits(pte, 12);
151            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
152            tlb->insert(entry.vaddr, entry);
153            stop();
154            return NoFault;
155        }
156      case LongPTE:
157        doWrite = !pte.a;
158        pte.a = 1;
159        entry.writable = entry.writable && pte.w;
160        entry.user = entry.user && pte.u;
161        if (badNX || !pte.p) {
162            stop();
163            return pageFault(pte.p);
164        }
165        entry.paddr = (uint64_t)pte & (mask(40) << 12);
166        entry.uncacheable = uncacheable;
167        entry.global = pte.g;
168        entry.patBit = bits(pte, 12);
169        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
170        tlb->insert(entry.vaddr, entry);
171        stop();
172        return NoFault;
173      case PAEPDP:
174        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size;
175        if (!pte.p) {
176            stop();
177            return pageFault(pte.p);
178        }
179        nextState = PAEPD;
180        break;
181      case PAEPD:
182        doWrite = !pte.a;
183        pte.a = 1;
184        entry.writable = pte.w;
185        entry.user = pte.u;
186        if (badNX || !pte.p) {
187            stop();
188            return pageFault(pte.p);
189        }
190        if (!pte.ps) {
191            // 4 KB page
192            entry.size = 4 * (1 << 10);
193            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * size;
194            nextState = PAEPTE;
195            break;
196        } else {
197            // 2 MB page
198            entry.size = 2 * (1 << 20);
199            entry.paddr = (uint64_t)pte & (mask(31) << 21);
200            entry.uncacheable = uncacheable;
201            entry.global = pte.g;
202            entry.patBit = bits(pte, 12);
203            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
204            tlb->insert(entry.vaddr, entry);
205            stop();
206            return NoFault;
207        }
208      case PAEPTE:
209        doWrite = !pte.a;
210        pte.a = 1;
211        entry.writable = entry.writable && pte.w;
212        entry.user = entry.user && pte.u;
213        if (badNX || !pte.p) {
214            stop();
215            return pageFault(pte.p);
216        }
217        entry.paddr = (uint64_t)pte & (mask(40) << 12);
218        entry.uncacheable = uncacheable;
219        entry.global = pte.g;
220        entry.patBit = bits(pte, 7);
221        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
222        tlb->insert(entry.vaddr, entry);
223        stop();
224        return NoFault;
225      case PSEPD:
226        doWrite = !pte.a;
227        pte.a = 1;
228        entry.writable = pte.w;
229        entry.user = pte.u;
230        if (!pte.p) {
231            stop();
232            return pageFault(pte.p);
233        }
234        if (!pte.ps) {
235            // 4 KB page
236            entry.size = 4 * (1 << 10);
237            nextRead =
238                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size;
239            nextState = PTE;
240            break;
241        } else {
242            // 4 MB page
243            entry.size = 4 * (1 << 20);
244            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
245            entry.uncacheable = uncacheable;
246            entry.global = pte.g;
247            entry.patBit = bits(pte, 12);
248            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
249            tlb->insert(entry.vaddr, entry);
250            stop();
251            return NoFault;
252        }
253      case PD:
254        doWrite = !pte.a;
255        pte.a = 1;
256        entry.writable = pte.w;
257        entry.user = pte.u;
258        if (!pte.p) {
259            stop();
260            return pageFault(pte.p);
261        }
262        // 4 KB page
263        entry.size = 4 * (1 << 10);
264        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size;
265        nextState = PTE;
266        break;
267      case PTE:
268        doWrite = !pte.a;
269        pte.a = 1;
270        entry.writable = pte.w;
271        entry.user = pte.u;
272        if (!pte.p) {
273            stop();
274            return pageFault(pte.p);
275        }
276        entry.paddr = (uint64_t)pte & (mask(20) << 12);
277        entry.uncacheable = uncacheable;
278        entry.global = pte.g;
279        entry.patBit = bits(pte, 7);
280        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
281        tlb->insert(entry.vaddr, entry);
282        stop();
283        return NoFault;
284      default:
285        panic("Unknown page table walker state %d!\n");
286    }
287    PacketPtr oldRead = read;
288    //If we didn't return, we're setting up another read.
289    Request::Flags flags = oldRead->req->getFlags();
290    flags.set(Request::UNCACHEABLE, uncacheable);
291    RequestPtr request =
292        new Request(nextRead, oldRead->getSize(), flags);
293    read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast);
294    read->allocate();
295    //If we need to write, adjust the read packet to write the modified value
296    //back to memory.
297    if (doWrite) {
298        write = oldRead;
299        write->set<uint64_t>(pte);
300        write->cmd = MemCmd::WriteReq;
301        write->setDest(Packet::Broadcast);
302    } else {
303        write = NULL;
304        delete oldRead->req;
305        delete oldRead;
306    }
307    return NoFault;
308}
309
310Fault
311Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
312        RequestPtr _req, bool _write, bool _execute)
313{
314    assert(state == Ready);
315    assert(!tc);
316    tc = _tc;
317    req = _req;
318    Addr vaddr = req->getVaddr();
319    execute = _execute;
320    write = _write;
321    translation = _translation;
322
323    VAddr addr = vaddr;
324
325    //Figure out what we're doing.
326    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
327    Addr top = 0;
328    // Check if we're in long mode or not
329    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
330    size = 8;
331    if (efer.lma) {
332        // Do long mode.
333        state = LongPML4;
334        top = (cr3.longPdtb << 12) + addr.longl4 * size;
335        enableNX = efer.nxe;
336    } else {
337        // We're in some flavor of legacy mode.
338        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
339        if (cr4.pae) {
340            // Do legacy PAE.
341            state = PAEPDP;
342            top = (cr3.paePdtb << 5) + addr.pael3 * size;
343            enableNX = efer.nxe;
344        } else {
345            size = 4;
346            top = (cr3.pdtb << 12) + addr.norml2 * size;
347            if (cr4.pse) {
348                // Do legacy PSE.
349                state = PSEPD;
350            } else {
351                // Do legacy non PSE.
352                state = PD;
353            }
354            enableNX = false;
355        }
356    }
357
358    nextState = Ready;
359    entry.vaddr = vaddr;
360
361    Request::Flags flags = Request::PHYSICAL;
362    if (cr3.pcd)
363        flags.set(Request::UNCACHEABLE);
364    RequestPtr request = new Request(top, size, flags);
365    read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast);
366    read->allocate();
367    Enums::MemoryMode memMode = sys->getMemoryMode();
368    if (memMode == Enums::timing) {
369        timingFault = NoFault;
370        port.sendTiming(read);
371    } else if (memMode == Enums::atomic) {
372        Fault fault;
373        do {
374            port.sendAtomic(read);
375            PacketPtr write = NULL;
376            fault = doNext(read, write);
377            assert(fault == NoFault || read == NULL);
378            state = nextState;
379            nextState = Ready;
380            if (write)
381                port.sendAtomic(write);
382        } while(read);
383        tc = NULL;
384        state = Ready;
385        nextState = Waiting;
386        return fault;
387    } else {
388        panic("Unrecognized memory system mode.\n");
389    }
390    return NoFault;
391}
392
393bool
394Walker::WalkerPort::recvTiming(PacketPtr pkt)
395{
396    return walker->recvTiming(pkt);
397}
398
399bool
400Walker::recvTiming(PacketPtr pkt)
401{
402    inflight--;
403    if (pkt->isResponse() && !pkt->wasNacked()) {
404        if (pkt->isRead()) {
405            assert(inflight);
406            assert(state == Waiting);
407            assert(!read);
408            state = nextState;
409            nextState = Ready;
410            PacketPtr write = NULL;
411            timingFault = doNext(pkt, write);
412            state = Waiting;
413            read = pkt;
414            assert(timingFault == NoFault || read == NULL);
415            if (write) {
416                writes.push_back(write);
417            }
418            sendPackets();
419        } else {
420            sendPackets();
421        }
422        if (inflight == 0 && read == NULL && writes.size() == 0) {
423            tc = NULL;
424            state = Ready;
425            nextState = Waiting;
426            if (timingFault == NoFault) {
427                /*
428                 * Finish the translation. Now that we now the right entry is
429                 * in the TLB, this should work with no memory accesses.
430                 * There could be new faults unrelated to the table walk like
431                 * permissions violations, so we'll need the return value as
432                 * well.
433                 */
434                bool delayedResponse;
435                Fault fault = tlb->translate(req, tc, NULL, write, execute,
436                        delayedResponse, true);
437                assert(!delayedResponse);
438                // Let the CPU continue.
439                translation->finish(fault, req, tc, write);
440            } else {
441                // There was a fault during the walk. Let the CPU know.
442                translation->finish(timingFault, req, tc, write);
443            }
444        }
445    } else if (pkt->wasNacked()) {
446        pkt->reinitNacked();
447        if (!port.sendTiming(pkt)) {
448            retrying = true;
449            if (pkt->isWrite()) {
450                writes.push_back(pkt);
451            } else {
452                assert(!read);
453                read = pkt;
454            }
455        } else {
456            inflight++;
457        }
458    }
459    return true;
460}
461
462Tick
463Walker::WalkerPort::recvAtomic(PacketPtr pkt)
464{
465    return 0;
466}
467
468void
469Walker::WalkerPort::recvFunctional(PacketPtr pkt)
470{
471    return;
472}
473
474void
475Walker::WalkerPort::recvStatusChange(Status status)
476{
477    if (status == RangeChange) {
478        if (!snoopRangeSent) {
479            snoopRangeSent = true;
480            sendStatusChange(Port::RangeChange);
481        }
482        return;
483    }
484
485    panic("Unexpected recvStatusChange.\n");
486}
487
488void
489Walker::WalkerPort::recvRetry()
490{
491    walker->recvRetry();
492}
493
494void
495Walker::recvRetry()
496{
497    retrying = false;
498    sendPackets();
499}
500
501void
502Walker::sendPackets()
503{
504    //If we're already waiting for the port to become available, just return.
505    if (retrying)
506        return;
507
508    //Reads always have priority
509    if (read) {
510        if (!port.sendTiming(read)) {
511            retrying = true;
512            return;
513        } else {
514            inflight++;
515            delete read->req;
516            delete read;
517            read = NULL;
518        }
519    }
520    //Send off as many of the writes as we can.
521    while (writes.size()) {
522        PacketPtr write = writes.back();
523        if (!port.sendTiming(write)) {
524            retrying = true;
525            return;
526        } else {
527            inflight++;
528            delete write->req;
529            delete write;
530            writes.pop_back();
531        }
532    }
533}
534
535Port *
536Walker::getPort(const std::string &if_name, int idx)
537{
538    if (if_name == "port")
539        return &port;
540    else
541        panic("No page table walker port named %s!\n", if_name);
542}
543
544Fault
545Walker::pageFault(bool present)
546{
547    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
548    return new PageFault(entry.vaddr, present, write,
549            m5reg.cpl == 3, false, execute && enableNX);
550}
551
552}
553
554X86ISA::Walker *
555X86PagetableWalkerParams::create()
556{
557    return new X86ISA::Walker(this);
558}
559