1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007 The Hewlett-Packard Development Company
15 * All rights reserved.
16 *
17 * The license below extends only to copyright in the software and shall
18 * not be construed as granting a license to any other intellectual
19 * property including but not limited to intellectual property relating
20 * to a hardware implementation of the functionality of the software
21 * licensed hereunder.  You may use the software subject to the license
22 * terms below provided that you ensure that this notice is replicated
23 * unmodified and in its entirety in all distributions of the software,
24 * modified or unmodified, in source code or in binary form.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are
28 * met: redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer;
30 * redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution;
33 * neither the name of the copyright holders nor the names of its
34 * contributors may be used to endorse or promote products derived from
35 * this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 * Authors: Gabe Black
50 */
51
52#include "arch/x86/pagetable_walker.hh"
53
54#include <memory>
55
56#include "arch/x86/pagetable.hh"
57#include "arch/x86/tlb.hh"
58#include "arch/x86/vtophys.hh"
59#include "base/bitfield.hh"
60#include "base/trie.hh"
61#include "cpu/base.hh"
62#include "cpu/thread_context.hh"
63#include "debug/PageTableWalker.hh"
64#include "mem/packet_access.hh"
65#include "mem/request.hh"
66
67namespace X86ISA {
68
69Fault
70Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
71              const RequestPtr &_req, BaseTLB::Mode _mode)
72{
73    // TODO: in timing mode, instead of blocking when there are other
74    // outstanding requests, see if this request can be coalesced with
75    // another one (i.e. either coalesce or start walk)
76    WalkerState * newState = new WalkerState(this, _translation, _req);
77    newState->initState(_tc, _mode, sys->isTimingMode());
78    if (currStates.size()) {
79        assert(newState->isTiming());
80        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
81        currStates.push_back(newState);
82        return NoFault;
83    } else {
84        currStates.push_back(newState);
85        Fault fault = newState->startWalk();
86        if (!newState->isTiming()) {
87            currStates.pop_front();
88            delete newState;
89        }
90        return fault;
91    }
92}
93
94Fault
95Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes,
96              BaseTLB::Mode _mode)
97{
98    funcState.initState(_tc, _mode);
99    return funcState.startFunctional(addr, logBytes);
100}
101
102bool
103Walker::WalkerPort::recvTimingResp(PacketPtr pkt)
104{
105    return walker->recvTimingResp(pkt);
106}
107
108bool
109Walker::recvTimingResp(PacketPtr pkt)
110{
111    WalkerSenderState * senderState =
112        dynamic_cast<WalkerSenderState *>(pkt->popSenderState());
113    WalkerState * senderWalk = senderState->senderWalk;
114    bool walkComplete = senderWalk->recvPacket(pkt);
115    delete senderState;
116    if (walkComplete) {
117        std::list<WalkerState *>::iterator iter;
118        for (iter = currStates.begin(); iter != currStates.end(); iter++) {
119            WalkerState * walkerState = *(iter);
120            if (walkerState == senderWalk) {
121                iter = currStates.erase(iter);
122                break;
123            }
124        }
125        delete senderWalk;
126        // Since we block requests when another is outstanding, we
127        // need to check if there is a waiting request to be serviced
128        if (currStates.size() && !startWalkWrapperEvent.scheduled())
129            // delay sending any new requests until we are finished
130            // with the responses
131            schedule(startWalkWrapperEvent, clockEdge());
132    }
133    return true;
134}
135
136void
137Walker::WalkerPort::recvReqRetry()
138{
139    walker->recvReqRetry();
140}
141
142void
143Walker::recvReqRetry()
144{
145    std::list<WalkerState *>::iterator iter;
146    for (iter = currStates.begin(); iter != currStates.end(); iter++) {
147        WalkerState * walkerState = *(iter);
148        if (walkerState->isRetrying()) {
149            walkerState->retry();
150        }
151    }
152}
153
154bool Walker::sendTiming(WalkerState* sendingState, PacketPtr pkt)
155{
156    WalkerSenderState* walker_state = new WalkerSenderState(sendingState);
157    pkt->pushSenderState(walker_state);
158    if (port.sendTimingReq(pkt)) {
159        return true;
160    } else {
161        // undo the adding of the sender state and delete it, as we
162        // will do it again the next time we attempt to send it
163        pkt->popSenderState();
164        delete walker_state;
165        return false;
166    }
167
168}
169
170Port &
171Walker::getPort(const std::string &if_name, PortID idx)
172{
173    if (if_name == "port")
174        return port;
175    else
176        return ClockedObject::getPort(if_name, idx);
177}
178
179void
180Walker::WalkerState::initState(ThreadContext * _tc,
181        BaseTLB::Mode _mode, bool _isTiming)
182{
183    assert(state == Ready);
184    started = false;
185    tc = _tc;
186    mode = _mode;
187    timing = _isTiming;
188}
189
190void
191Walker::startWalkWrapper()
192{
193    unsigned num_squashed = 0;
194    WalkerState *currState = currStates.front();
195    while ((num_squashed < numSquashable) && currState &&
196        currState->translation->squashed()) {
197        currStates.pop_front();
198        num_squashed++;
199
200        DPRINTF(PageTableWalker, "Squashing table walk for address %#x\n",
201            currState->req->getVaddr());
202
203        // finish the translation which will delete the translation object
204        currState->translation->finish(
205            std::make_shared<UnimpFault>("Squashed Inst"),
206            currState->req, currState->tc, currState->mode);
207
208        // delete the current request if there are no inflight packets.
209        // if there is something in flight, delete when the packets are
210        // received and inflight is zero.
211        if (currState->numInflight() == 0) {
212            delete currState;
213        } else {
214            currState->squash();
215        }
216
217        // check the next translation request, if it exists
218        if (currStates.size())
219            currState = currStates.front();
220        else
221            currState = NULL;
222    }
223    if (currState && !currState->wasStarted())
224        currState->startWalk();
225}
226
227Fault
228Walker::WalkerState::startWalk()
229{
230    Fault fault = NoFault;
231    assert(!started);
232    started = true;
233    setupWalk(req->getVaddr());
234    if (timing) {
235        nextState = state;
236        state = Waiting;
237        timingFault = NoFault;
238        sendPackets();
239    } else {
240        do {
241            walker->port.sendAtomic(read);
242            PacketPtr write = NULL;
243            fault = stepWalk(write);
244            assert(fault == NoFault || read == NULL);
245            state = nextState;
246            nextState = Ready;
247            if (write)
248                walker->port.sendAtomic(write);
249        } while (read);
250        state = Ready;
251        nextState = Waiting;
252    }
253    return fault;
254}
255
256Fault
257Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes)
258{
259    Fault fault = NoFault;
260    assert(!started);
261    started = true;
262    setupWalk(addr);
263
264    do {
265        walker->port.sendFunctional(read);
266        // On a functional access (page table lookup), writes should
267        // not happen so this pointer is ignored after stepWalk
268        PacketPtr write = NULL;
269        fault = stepWalk(write);
270        assert(fault == NoFault || read == NULL);
271        state = nextState;
272        nextState = Ready;
273    } while (read);
274    logBytes = entry.logBytes;
275    addr = entry.paddr;
276
277    return fault;
278}
279
280Fault
281Walker::WalkerState::stepWalk(PacketPtr &write)
282{
283    assert(state != Ready && state != Waiting);
284    Fault fault = NoFault;
285    write = NULL;
286    PageTableEntry pte;
287    if (dataSize == 8)
288        pte = read->getLE<uint64_t>();
289    else
290        pte = read->getLE<uint32_t>();
291    VAddr vaddr = entry.vaddr;
292    bool uncacheable = pte.pcd;
293    Addr nextRead = 0;
294    bool doWrite = false;
295    bool doTLBInsert = false;
296    bool doEndWalk = false;
297    bool badNX = pte.nx && mode == BaseTLB::Execute && enableNX;
298    switch(state) {
299      case LongPML4:
300        DPRINTF(PageTableWalker,
301                "Got long mode PML4 entry %#016x.\n", (uint64_t)pte);
302        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * dataSize;
303        doWrite = !pte.a;
304        pte.a = 1;
305        entry.writable = pte.w;
306        entry.user = pte.u;
307        if (badNX || !pte.p) {
308            doEndWalk = true;
309            fault = pageFault(pte.p);
310            break;
311        }
312        entry.noExec = pte.nx;
313        nextState = LongPDP;
314        break;
315      case LongPDP:
316        DPRINTF(PageTableWalker,
317                "Got long mode PDP entry %#016x.\n", (uint64_t)pte);
318        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * dataSize;
319        doWrite = !pte.a;
320        pte.a = 1;
321        entry.writable = entry.writable && pte.w;
322        entry.user = entry.user && pte.u;
323        if (badNX || !pte.p) {
324            doEndWalk = true;
325            fault = pageFault(pte.p);
326            break;
327        }
328        nextState = LongPD;
329        break;
330      case LongPD:
331        DPRINTF(PageTableWalker,
332                "Got long mode PD entry %#016x.\n", (uint64_t)pte);
333        doWrite = !pte.a;
334        pte.a = 1;
335        entry.writable = entry.writable && pte.w;
336        entry.user = entry.user && pte.u;
337        if (badNX || !pte.p) {
338            doEndWalk = true;
339            fault = pageFault(pte.p);
340            break;
341        }
342        if (!pte.ps) {
343            // 4 KB page
344            entry.logBytes = 12;
345            nextRead =
346                ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize;
347            nextState = LongPTE;
348            break;
349        } else {
350            // 2 MB page
351            entry.logBytes = 21;
352            entry.paddr = (uint64_t)pte & (mask(31) << 21);
353            entry.uncacheable = uncacheable;
354            entry.global = pte.g;
355            entry.patBit = bits(pte, 12);
356            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
357            doTLBInsert = true;
358            doEndWalk = true;
359            break;
360        }
361      case LongPTE:
362        DPRINTF(PageTableWalker,
363                "Got long mode PTE entry %#016x.\n", (uint64_t)pte);
364        doWrite = !pte.a;
365        pte.a = 1;
366        entry.writable = entry.writable && pte.w;
367        entry.user = entry.user && pte.u;
368        if (badNX || !pte.p) {
369            doEndWalk = true;
370            fault = pageFault(pte.p);
371            break;
372        }
373        entry.paddr = (uint64_t)pte & (mask(40) << 12);
374        entry.uncacheable = uncacheable;
375        entry.global = pte.g;
376        entry.patBit = bits(pte, 12);
377        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
378        doTLBInsert = true;
379        doEndWalk = true;
380        break;
381      case PAEPDP:
382        DPRINTF(PageTableWalker,
383                "Got legacy mode PAE PDP entry %#08x.\n", (uint32_t)pte);
384        nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * dataSize;
385        if (!pte.p) {
386            doEndWalk = true;
387            fault = pageFault(pte.p);
388            break;
389        }
390        nextState = PAEPD;
391        break;
392      case PAEPD:
393        DPRINTF(PageTableWalker,
394                "Got legacy mode PAE PD entry %#08x.\n", (uint32_t)pte);
395        doWrite = !pte.a;
396        pte.a = 1;
397        entry.writable = pte.w;
398        entry.user = pte.u;
399        if (badNX || !pte.p) {
400            doEndWalk = true;
401            fault = pageFault(pte.p);
402            break;
403        }
404        if (!pte.ps) {
405            // 4 KB page
406            entry.logBytes = 12;
407            nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize;
408            nextState = PAEPTE;
409            break;
410        } else {
411            // 2 MB page
412            entry.logBytes = 21;
413            entry.paddr = (uint64_t)pte & (mask(31) << 21);
414            entry.uncacheable = uncacheable;
415            entry.global = pte.g;
416            entry.patBit = bits(pte, 12);
417            entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
418            doTLBInsert = true;
419            doEndWalk = true;
420            break;
421        }
422      case PAEPTE:
423        DPRINTF(PageTableWalker,
424                "Got legacy mode PAE PTE entry %#08x.\n", (uint32_t)pte);
425        doWrite = !pte.a;
426        pte.a = 1;
427        entry.writable = entry.writable && pte.w;
428        entry.user = entry.user && pte.u;
429        if (badNX || !pte.p) {
430            doEndWalk = true;
431            fault = pageFault(pte.p);
432            break;
433        }
434        entry.paddr = (uint64_t)pte & (mask(40) << 12);
435        entry.uncacheable = uncacheable;
436        entry.global = pte.g;
437        entry.patBit = bits(pte, 7);
438        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
439        doTLBInsert = true;
440        doEndWalk = true;
441        break;
442      case PSEPD:
443        DPRINTF(PageTableWalker,
444                "Got legacy mode PSE PD entry %#08x.\n", (uint32_t)pte);
445        doWrite = !pte.a;
446        pte.a = 1;
447        entry.writable = pte.w;
448        entry.user = pte.u;
449        if (!pte.p) {
450            doEndWalk = true;
451            fault = pageFault(pte.p);
452            break;
453        }
454        if (!pte.ps) {
455            // 4 KB page
456            entry.logBytes = 12;
457            nextRead =
458                ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
459            nextState = PTE;
460            break;
461        } else {
462            // 4 MB page
463            entry.logBytes = 21;
464            entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22;
465            entry.uncacheable = uncacheable;
466            entry.global = pte.g;
467            entry.patBit = bits(pte, 12);
468            entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
469            doTLBInsert = true;
470            doEndWalk = true;
471            break;
472        }
473      case PD:
474        DPRINTF(PageTableWalker,
475                "Got legacy mode PD entry %#08x.\n", (uint32_t)pte);
476        doWrite = !pte.a;
477        pte.a = 1;
478        entry.writable = pte.w;
479        entry.user = pte.u;
480        if (!pte.p) {
481            doEndWalk = true;
482            fault = pageFault(pte.p);
483            break;
484        }
485        // 4 KB page
486        entry.logBytes = 12;
487        nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize;
488        nextState = PTE;
489        break;
490      case PTE:
491        DPRINTF(PageTableWalker,
492                "Got legacy mode PTE entry %#08x.\n", (uint32_t)pte);
493        doWrite = !pte.a;
494        pte.a = 1;
495        entry.writable = pte.w;
496        entry.user = pte.u;
497        if (!pte.p) {
498            doEndWalk = true;
499            fault = pageFault(pte.p);
500            break;
501        }
502        entry.paddr = (uint64_t)pte & (mask(20) << 12);
503        entry.uncacheable = uncacheable;
504        entry.global = pte.g;
505        entry.patBit = bits(pte, 7);
506        entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
507        doTLBInsert = true;
508        doEndWalk = true;
509        break;
510      default:
511        panic("Unknown page table walker state %d!\n");
512    }
513    if (doEndWalk) {
514        if (doTLBInsert)
515            if (!functional)
516                walker->tlb->insert(entry.vaddr, entry);
517        endWalk();
518    } else {
519        PacketPtr oldRead = read;
520        //If we didn't return, we're setting up another read.
521        Request::Flags flags = oldRead->req->getFlags();
522        flags.set(Request::UNCACHEABLE, uncacheable);
523        RequestPtr request = std::make_shared<Request>(
524            nextRead, oldRead->getSize(), flags, walker->masterId);
525        read = new Packet(request, MemCmd::ReadReq);
526        read->allocate();
527        // If we need to write, adjust the read packet to write the modified
528        // value back to memory.
529        if (doWrite) {
530            write = oldRead;
531            write->setLE<uint64_t>(pte);
532            write->cmd = MemCmd::WriteReq;
533        } else {
534            write = NULL;
535            delete oldRead;
536        }
537    }
538    return fault;
539}
540
541void
542Walker::WalkerState::endWalk()
543{
544    nextState = Ready;
545    delete read;
546    read = NULL;
547}
548
549void
550Walker::WalkerState::setupWalk(Addr vaddr)
551{
552    VAddr addr = vaddr;
553    CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3);
554    // Check if we're in long mode or not
555    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
556    dataSize = 8;
557    Addr topAddr;
558    if (efer.lma) {
559        // Do long mode.
560        state = LongPML4;
561        topAddr = (cr3.longPdtb << 12) + addr.longl4 * dataSize;
562        enableNX = efer.nxe;
563    } else {
564        // We're in some flavor of legacy mode.
565        CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
566        if (cr4.pae) {
567            // Do legacy PAE.
568            state = PAEPDP;
569            topAddr = (cr3.paePdtb << 5) + addr.pael3 * dataSize;
570            enableNX = efer.nxe;
571        } else {
572            dataSize = 4;
573            topAddr = (cr3.pdtb << 12) + addr.norml2 * dataSize;
574            if (cr4.pse) {
575                // Do legacy PSE.
576                state = PSEPD;
577            } else {
578                // Do legacy non PSE.
579                state = PD;
580            }
581            enableNX = false;
582        }
583    }
584
585    nextState = Ready;
586    entry.vaddr = vaddr;
587
588    Request::Flags flags = Request::PHYSICAL;
589    if (cr3.pcd)
590        flags.set(Request::UNCACHEABLE);
591
592    RequestPtr request = std::make_shared<Request>(
593        topAddr, dataSize, flags, walker->masterId);
594
595    read = new Packet(request, MemCmd::ReadReq);
596    read->allocate();
597}
598
599bool
600Walker::WalkerState::recvPacket(PacketPtr pkt)
601{
602    assert(pkt->isResponse());
603    assert(inflight);
604    assert(state == Waiting);
605    inflight--;
606    if (squashed) {
607        // if were were squashed, return true once inflight is zero and
608        // this WalkerState will be freed there.
609        return (inflight == 0);
610    }
611    if (pkt->isRead()) {
612        // should not have a pending read it we also had one outstanding
613        assert(!read);
614
615        // @todo someone should pay for this
616        pkt->headerDelay = pkt->payloadDelay = 0;
617
618        state = nextState;
619        nextState = Ready;
620        PacketPtr write = NULL;
621        read = pkt;
622        timingFault = stepWalk(write);
623        state = Waiting;
624        assert(timingFault == NoFault || read == NULL);
625        if (write) {
626            writes.push_back(write);
627        }
628        sendPackets();
629    } else {
630        sendPackets();
631    }
632    if (inflight == 0 && read == NULL && writes.size() == 0) {
633        state = Ready;
634        nextState = Waiting;
635        if (timingFault == NoFault) {
636            /*
637             * Finish the translation. Now that we know the right entry is
638             * in the TLB, this should work with no memory accesses.
639             * There could be new faults unrelated to the table walk like
640             * permissions violations, so we'll need the return value as
641             * well.
642             */
643            bool delayedResponse;
644            Fault fault = walker->tlb->translate(req, tc, NULL, mode,
645                                                 delayedResponse, true);
646            assert(!delayedResponse);
647            // Let the CPU continue.
648            translation->finish(fault, req, tc, mode);
649        } else {
650            // There was a fault during the walk. Let the CPU know.
651            translation->finish(timingFault, req, tc, mode);
652        }
653        return true;
654    }
655
656    return false;
657}
658
659void
660Walker::WalkerState::sendPackets()
661{
662    //If we're already waiting for the port to become available, just return.
663    if (retrying)
664        return;
665
666    //Reads always have priority
667    if (read) {
668        PacketPtr pkt = read;
669        read = NULL;
670        inflight++;
671        if (!walker->sendTiming(this, pkt)) {
672            retrying = true;
673            read = pkt;
674            inflight--;
675            return;
676        }
677    }
678    //Send off as many of the writes as we can.
679    while (writes.size()) {
680        PacketPtr write = writes.back();
681        writes.pop_back();
682        inflight++;
683        if (!walker->sendTiming(this, write)) {
684            retrying = true;
685            writes.push_back(write);
686            inflight--;
687            return;
688        }
689    }
690}
691
692unsigned
693Walker::WalkerState::numInflight() const
694{
695    return inflight;
696}
697
698bool
699Walker::WalkerState::isRetrying()
700{
701    return retrying;
702}
703
704bool
705Walker::WalkerState::isTiming()
706{
707    return timing;
708}
709
710bool
711Walker::WalkerState::wasStarted()
712{
713    return started;
714}
715
716void
717Walker::WalkerState::squash()
718{
719    squashed = true;
720}
721
722void
723Walker::WalkerState::retry()
724{
725    retrying = false;
726    sendPackets();
727}
728
729Fault
730Walker::WalkerState::pageFault(bool present)
731{
732    DPRINTF(PageTableWalker, "Raising page fault.\n");
733    HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734    if (mode == BaseTLB::Execute && !enableNX)
735        mode = BaseTLB::Read;
736    return std::make_shared<PageFault>(entry.vaddr, present, mode,
737                                       m5reg.cpl == 3, false);
738}
739
740/* end namespace X86ISA */ }
741
742X86ISA::Walker *
743X86PagetableWalkerParams::create()
744{
745    return new X86ISA::Walker(this);
746}
747