x86_cpu.cc revision 9884:d1a5e147e72d
1/*
2 * Copyright (c) 2013 Andreas Sandberg
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Andreas Sandberg
29 */
30
31#include <linux/kvm.h>
32
33#include <algorithm>
34#include <cerrno>
35#include <memory>
36
37#include "arch/x86/regs/msr.hh"
38#include "arch/x86/cpuid.hh"
39#include "arch/x86/utility.hh"
40#include "arch/registers.hh"
41#include "cpu/kvm/base.hh"
42#include "cpu/kvm/x86_cpu.hh"
43#include "debug/Drain.hh"
44#include "debug/Kvm.hh"
45#include "debug/KvmContext.hh"
46#include "debug/KvmIO.hh"
47#include "debug/KvmInt.hh"
48
49using namespace X86ISA;
50
51#define MSR_TSC 0x10
52
53#define IO_PCI_CONF_ADDR 0xCF8
54#define IO_PCI_CONF_DATA_BASE 0xCFC
55
56#define FOREACH_IREG()                          \
57    do {                                        \
58        APPLY_IREG(rax, INTREG_RAX);            \
59        APPLY_IREG(rbx, INTREG_RBX);            \
60        APPLY_IREG(rcx, INTREG_RCX);            \
61        APPLY_IREG(rdx, INTREG_RDX);            \
62        APPLY_IREG(rsi, INTREG_RSI);            \
63        APPLY_IREG(rdi, INTREG_RDI);            \
64        APPLY_IREG(rsp, INTREG_RSP);            \
65        APPLY_IREG(rbp, INTREG_RBP);            \
66        APPLY_IREG(r8, INTREG_R8);              \
67        APPLY_IREG(r9, INTREG_R9);              \
68        APPLY_IREG(r10, INTREG_R10);            \
69        APPLY_IREG(r11, INTREG_R11);            \
70        APPLY_IREG(r12, INTREG_R12);            \
71        APPLY_IREG(r13, INTREG_R13);            \
72        APPLY_IREG(r14, INTREG_R14);            \
73        APPLY_IREG(r15, INTREG_R15);            \
74    } while(0)
75
76#define FOREACH_SREG()                                  \
77    do {                                                \
78        APPLY_SREG(cr0, MISCREG_CR0);                   \
79        APPLY_SREG(cr2, MISCREG_CR2);                   \
80        APPLY_SREG(cr3, MISCREG_CR3);                   \
81        APPLY_SREG(cr4, MISCREG_CR4);                   \
82        APPLY_SREG(cr8, MISCREG_CR8);                   \
83        APPLY_SREG(efer, MISCREG_EFER);                 \
84        APPLY_SREG(apic_base, MISCREG_APIC_BASE);       \
85    } while(0)
86
87#define FOREACH_DREG()                          \
88    do {                                        \
89        APPLY_DREG(db[0], MISCREG_DR0);         \
90        APPLY_DREG(db[1], MISCREG_DR1);         \
91        APPLY_DREG(db[2], MISCREG_DR2);         \
92        APPLY_DREG(db[3], MISCREG_DR3);         \
93        APPLY_DREG(dr6, MISCREG_DR6);           \
94        APPLY_DREG(dr7, MISCREG_DR7);           \
95    } while(0)
96
97#define FOREACH_SEGMENT()                                       \
98    do {                                                        \
99        APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE);   \
100        APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE);   \
101        APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE);   \
102        APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE);   \
103        APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE);   \
104        APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE);   \
105        APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE);   \
106        APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \
107    } while(0)
108
109#define FOREACH_DTABLE()                                        \
110    do {                                                        \
111        APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE);  \
112        APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \
113    } while(0)
114
115template<typename STRUCT, typename ENTRY>
116static STRUCT *newVarStruct(size_t entries)
117{
118    return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY));
119}
120
121static void
122dumpKvm(const struct kvm_regs &regs)
123{
124    inform("KVM register state:\n");
125
126#define APPLY_IREG(kreg, mreg)                  \
127    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
128
129    FOREACH_IREG();
130
131#undef APPLY_IREG
132
133    inform("\trip: 0x%llx\n", regs.rip);
134    inform("\trflags: 0x%llx\n", regs.rflags);
135}
136
137static void
138dumpKvm(const char *reg_name, const struct kvm_segment &seg)
139{
140    inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n"
141           "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n",
142           reg_name,
143           seg.base, seg.limit, seg.selector, seg.type,
144           seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable);
145}
146
147static void
148dumpKvm(const char *reg_name, const struct kvm_dtable &dtable)
149{
150    inform("\t%s: @0x%llx+%x\n",
151           reg_name, dtable.base, dtable.limit);
152}
153
154static void
155dumpKvm(const struct kvm_sregs &sregs)
156{
157#define APPLY_SREG(kreg, mreg)                          \
158    inform("\t" # kreg ": 0x%llx\n", sregs.kreg);
159#define APPLY_SEGMENT(kreg, idx)                \
160    dumpKvm(# kreg, sregs.kreg);
161#define APPLY_DTABLE(kreg, idx)                 \
162    dumpKvm(# kreg, sregs.kreg);
163
164    inform("Special registers:\n");
165    FOREACH_SEGMENT();
166    FOREACH_SREG();
167    FOREACH_DTABLE();
168
169    inform("Interrupt Bitmap:");
170    for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64)
171        inform("  0x%.8x", sregs.interrupt_bitmap[i / 64]);
172
173#undef APPLY_SREG
174#undef APPLY_SEGMENT
175#undef APPLY_DTABLE
176}
177
178#ifdef KVM_GET_DEBUGREGS
179static void
180dumpKvm(const struct kvm_debugregs &regs)
181{
182    inform("KVM debug state:\n");
183
184#define APPLY_DREG(kreg, mreg)                  \
185    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
186
187    FOREACH_DREG();
188
189#undef APPLY_DREG
190
191    inform("\tflags: 0x%llx\n", regs.flags);
192}
193#endif
194
195static void
196dumpKvm(const struct kvm_fpu &fpu)
197{
198    inform("FPU registers:\n");
199    inform("\tfcw: 0x%x\n", fpu.fcw);
200    inform("\tfsw: 0x%x\n", fpu.fsw);
201    inform("\tftwx: 0x%x\n", fpu.ftwx);
202    inform("\tlast_opcode: 0x%x\n", fpu.last_opcode);
203    inform("\tlast_ip: 0x%x\n", fpu.last_ip);
204    inform("\tlast_dp: 0x%x\n", fpu.last_dp);
205    inform("\tmxcsr: 0x%x\n", fpu.mxcsr);
206    inform("\tFP Stack:\n");
207    for (int i = 0; i < 8; ++i) {
208        const bool empty(!((fpu.ftwx >> i) & 0x1));
209        char hex[33];
210        for (int j = 0; j < 16; ++j)
211            snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]);
212        inform("\t\t%i: 0x%s%s\n", i, hex, empty ? " (e)" : "");
213    }
214    inform("\tXMM registers:\n");
215    for (int i = 0; i < 16; ++i) {
216        char hex[33];
217        for (int j = 0; j < 16; ++j)
218            snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]);
219        inform("\t\t%i: 0x%s\n", i, hex);
220    }
221}
222
223static void
224dumpKvm(const struct kvm_msrs &msrs)
225{
226    inform("MSRs:\n");
227
228    for (int i = 0; i < msrs.nmsrs; ++i) {
229        const struct kvm_msr_entry &e(msrs.entries[i]);
230
231        inform("\t0x%x: 0x%x\n", e.index, e.data);
232    }
233}
234
235static void
236dumpKvm(const struct kvm_xcrs &regs)
237{
238    inform("KVM XCR registers:\n");
239
240    inform("\tFlags: 0x%x\n", regs.flags);
241    for (int i = 0; i < regs.nr_xcrs; ++i) {
242        inform("\tXCR[0x%x]: 0x%x\n",
243               regs.xcrs[i].xcr,
244               regs.xcrs[i].value);
245    }
246}
247
248static void
249dumpKvm(const struct kvm_xsave &xsave)
250{
251    inform("KVM XSAVE:\n");
252
253    Trace::dump((Tick)-1, "xsave.region",
254                xsave.region, sizeof(xsave.region));
255}
256
257static void
258dumpKvm(const struct kvm_vcpu_events &events)
259{
260    inform("vCPU events:\n");
261
262    inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n",
263           events.exception.injected, events.exception.nr,
264           events.exception.has_error_code, events.exception.error_code);
265
266    inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n",
267           events.interrupt.injected, events.interrupt.nr,
268           events.interrupt.soft);
269
270    inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n",
271           events.nmi.injected, events.nmi.pending,
272           events.nmi.masked);
273
274    inform("\tSIPI vector: 0x%x\n", events.sipi_vector);
275    inform("\tFlags: 0x%x\n", events.flags);
276}
277
278static bool
279isCanonicalAddress(uint64_t addr)
280{
281    // x86-64 doesn't currently use the full 64-bit virtual address
282    // space, instead it uses signed 48 bit addresses that are
283    // sign-extended to 64 bits.  Such addresses are known as
284    // "canonical".
285    uint64_t upper_half(addr & 0xffff800000000000ULL);
286    return upper_half == 0 || upper_half == 0xffff800000000000;
287}
288
289static void
290checkSeg(const char *name, const int idx, const struct kvm_segment &seg,
291         struct kvm_sregs sregs)
292{
293    // Check the register base
294    switch (idx) {
295      case MISCREG_TSL:
296      case MISCREG_TR:
297      case MISCREG_FS:
298      case MISCREG_GS:
299        if (!isCanonicalAddress(seg.base))
300            warn("Illegal %s base: 0x%x\n", name, seg.base);
301        break;
302
303      case MISCREG_SS:
304      case MISCREG_DS:
305      case MISCREG_ES:
306        if (seg.unusable)
307            break;
308      case MISCREG_CS:
309        if (seg.base & 0xffffffff00000000ULL)
310            warn("Illegal %s base: 0x%x\n", name, seg.base);
311        break;
312    }
313
314    // Check the type
315    switch (idx) {
316      case MISCREG_CS:
317        switch (seg.type) {
318          case 3:
319            if (seg.dpl != 0)
320                warn("CS type is 3 but dpl != 0.\n");
321            break;
322          case 9:
323          case 11:
324            if (seg.dpl != sregs.ss.dpl)
325                warn("CS type is %i but CS DPL != SS DPL\n", seg.type);
326            break;
327          case 13:
328          case 15:
329            if (seg.dpl > sregs.ss.dpl)
330                warn("CS type is %i but CS DPL > SS DPL\n", seg.type);
331            break;
332          default:
333            warn("Illegal CS type: %i\n", seg.type);
334            break;
335        }
336        break;
337
338      case MISCREG_SS:
339        if (seg.unusable)
340            break;
341        switch (seg.type) {
342          case 3:
343            if (sregs.cs.type == 3 && seg.dpl != 0)
344                warn("CS type is 3, but SS DPL is != 0.\n");
345            /* FALLTHROUGH */
346          case 7:
347            if (!(sregs.cr0 & 1) && seg.dpl != 0)
348                warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl);
349            break;
350          default:
351            warn("Illegal SS type: %i\n", seg.type);
352            break;
353        }
354        break;
355
356      case MISCREG_DS:
357      case MISCREG_ES:
358      case MISCREG_FS:
359      case MISCREG_GS:
360        if (seg.unusable)
361            break;
362        if (!(seg.type & 0x1) ||
363            ((seg.type & 0x8) && !(seg.type & 0x2)))
364            warn("%s has an illegal type field: %i\n", name, seg.type);
365        break;
366
367      case MISCREG_TR:
368        // TODO: We should check the CPU mode
369        if (seg.type != 3 && seg.type != 11)
370            warn("%s: Illegal segment type (%i)\n", name, seg.type);
371        break;
372
373      case MISCREG_TSL:
374        if (seg.unusable)
375            break;
376        if (seg.type != 2)
377            warn("%s: Illegal segment type (%i)\n", name, seg.type);
378        break;
379    }
380
381    switch (idx) {
382      case MISCREG_SS:
383      case MISCREG_DS:
384      case MISCREG_ES:
385      case MISCREG_FS:
386      case MISCREG_GS:
387        if (seg.unusable)
388            break;
389      case MISCREG_CS:
390        if (!seg.s)
391            warn("%s: S flag not set\n", name);
392        break;
393
394      case MISCREG_TSL:
395        if (seg.unusable)
396            break;
397      case MISCREG_TR:
398        if (seg.s)
399            warn("%s: S flag is set\n", name);
400        break;
401    }
402
403    switch (idx) {
404      case MISCREG_SS:
405      case MISCREG_DS:
406      case MISCREG_ES:
407      case MISCREG_FS:
408      case MISCREG_GS:
409      case MISCREG_TSL:
410        if (seg.unusable)
411            break;
412      case MISCREG_TR:
413      case MISCREG_CS:
414        if (!seg.present)
415            warn("%s: P flag not set\n", name);
416
417        if (((seg.limit & 0xFFF) == 0 && seg.g) ||
418            ((seg.limit & 0xFFF00000) != 0 && !seg.g)) {
419            warn("%s limit (0x%x) and g (%i) combination is illegal.\n",
420                 name, seg.limit, seg.g);
421        }
422        break;
423    }
424
425    // TODO: Check CS DB
426}
427
428X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params)
429    : BaseKvmCPU(params)
430{
431    Kvm &kvm(vm.kvm);
432
433    if (!kvm.capSetTSSAddress())
434        panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n");
435    if (!kvm.capExtendedCPUID())
436        panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n");
437    if (!kvm.capUserNMI())
438        warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n");
439    if (!kvm.capVCPUEvents())
440        warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n");
441
442    haveDebugRegs = kvm.capDebugRegs();
443    haveXSave = kvm.capXSave();
444    haveXCRs = kvm.capXCRs();
445}
446
447X86KvmCPU::~X86KvmCPU()
448{
449}
450
451void
452X86KvmCPU::startup()
453{
454    BaseKvmCPU::startup();
455
456    updateCPUID();
457
458    io_req.setThreadContext(tc->contextId(), 0);
459
460    // TODO: Do we need to create an identity mapped TSS area? We
461    // should call kvm.vm.setTSSAddress() here in that case. It should
462    // only be needed for old versions of the virtualization
463    // extensions. We should make sure that the identity range is
464    // reserved in the e820 memory map in that case.
465}
466
467void
468X86KvmCPU::dump()
469{
470    dumpIntRegs();
471    dumpFpuRegs();
472    dumpSpecRegs();
473    dumpDebugRegs();
474    dumpXCRs();
475    dumpVCpuEvents();
476    dumpMSRs();
477    dumpXSave();
478}
479
480void
481X86KvmCPU::dumpFpuRegs() const
482{
483    struct kvm_fpu fpu;
484    getFPUState(fpu);
485    dumpKvm(fpu);
486}
487
488void
489X86KvmCPU::dumpIntRegs() const
490{
491    struct kvm_regs regs;
492    getRegisters(regs);
493    dumpKvm(regs);
494}
495
496void
497X86KvmCPU::dumpSpecRegs() const
498{
499    struct kvm_sregs sregs;
500    getSpecialRegisters(sregs);
501    dumpKvm(sregs);
502}
503
504void
505X86KvmCPU::dumpDebugRegs() const
506{
507    if (haveDebugRegs) {
508#ifdef KVM_GET_DEBUGREGS
509        struct kvm_debugregs dregs;
510        getDebugRegisters(dregs);
511        dumpKvm(dregs);
512#endif
513    } else {
514        inform("Debug registers not supported by kernel.\n");
515    }
516}
517
518void
519X86KvmCPU::dumpXCRs() const
520{
521    if (haveXCRs) {
522        struct kvm_xcrs xcrs;
523        getXCRs(xcrs);
524        dumpKvm(xcrs);
525    } else {
526        inform("XCRs not supported by kernel.\n");
527    }
528}
529
530void
531X86KvmCPU::dumpXSave() const
532{
533    if (haveXSave) {
534        struct kvm_xsave xsave;
535        getXSave(xsave);
536        dumpKvm(xsave);
537    } else {
538        inform("XSave not supported by kernel.\n");
539    }
540}
541
542void
543X86KvmCPU::dumpVCpuEvents() const
544{
545    struct kvm_vcpu_events events;
546    getVCpuEvents(events);
547    dumpKvm(events);
548}
549
550void
551X86KvmCPU::dumpMSRs() const
552{
553    const Kvm::MSRIndexVector &supported_msrs(vm.kvm.getSupportedMSRs());
554    std::unique_ptr<struct kvm_msrs> msrs(
555        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(
556            supported_msrs.size()));
557
558    msrs->nmsrs = supported_msrs.size();
559    for (int i = 0; i < supported_msrs.size(); ++i) {
560        struct kvm_msr_entry &e(msrs->entries[i]);
561        e.index = supported_msrs[i];
562        e.reserved = 0;
563        e.data = 0;
564    }
565    getMSRs(*msrs.get());
566
567    dumpKvm(*msrs.get());
568}
569
570void
571X86KvmCPU::updateKvmState()
572{
573    updateKvmStateRegs();
574    updateKvmStateSRegs();
575    updateKvmStateFPU();
576    updateKvmStateMSRs();
577
578    DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n");
579    if (DTRACE(KvmContext))
580        dump();
581}
582
583void
584X86KvmCPU::updateKvmStateRegs()
585{
586    struct kvm_regs regs;
587
588#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg)
589    FOREACH_IREG();
590#undef APPLY_IREG
591
592    regs.rip = tc->instAddr();
593
594    /* You might think that setting regs.rflags to the contents
595     * MISCREG_RFLAGS here would suffice. In that case you're
596     * mistaken. We need to reconstruct it from a bunch of ucode
597     * registers and wave a dead chicken over it (aka mask out and set
598     * reserved bits) to get it to work.
599     */
600    regs.rflags = X86ISA::getRFlags(tc);
601
602    setRegisters(regs);
603}
604
605static inline void
606setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg,
607                 const int index)
608{
609    SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index)));
610
611    kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
612    kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
613    kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index));
614    kvm_seg.type = attr.type;
615    kvm_seg.present = attr.present;
616    kvm_seg.dpl = attr.dpl;
617    kvm_seg.db = attr.defaultSize;
618    kvm_seg.s = attr.system;
619    kvm_seg.l = attr.longMode;
620    kvm_seg.g = attr.granularity;
621    kvm_seg.avl = attr.avl;
622
623    // A segment is unusable when the selector is zero. There is a
624    // attr.unusable flag in gem5, but it seems unused.
625    //
626    // TODO: Are there corner cases where this doesn't work?
627    kvm_seg.unusable = (kvm_seg.selector == 0);
628}
629
630static inline void
631setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable,
632                const int index)
633{
634    kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
635    kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
636}
637
638void
639X86KvmCPU::updateKvmStateSRegs()
640{
641    struct kvm_sregs sregs;
642
643#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg)
644#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx)
645#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx)
646
647    FOREACH_SREG();
648    FOREACH_SEGMENT();
649    FOREACH_DTABLE();
650
651#undef APPLY_SREG
652#undef APPLY_SEGMENT
653#undef APPLY_DTABLE
654
655    // Clear the interrupt bitmap
656    memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
657
658    RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS));
659    if (!rflags_nocc.vm) {
660        // Do segment verification if the CPU isn't entering virtual
661        // 8086 mode.  We currently assume that unrestricted guest
662        // mode is available.
663
664#define APPLY_SEGMENT(kreg, idx) \
665        checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs)
666
667        FOREACH_SEGMENT();
668#undef APPLY_SEGMENT
669    }
670    setSpecialRegisters(sregs);
671}
672void
673X86KvmCPU::updateKvmStateFPU()
674{
675    warn_once("X86KvmCPU::updateKvmStateFPU not implemented\n");
676}
677
678void
679X86KvmCPU::updateKvmStateMSRs()
680{
681    KvmMSRVector msrs;
682
683    const Kvm::MSRIndexVector &indices(getMsrIntersection());
684
685    for (auto it = indices.cbegin(); it != indices.cend(); ++it) {
686        struct kvm_msr_entry e;
687
688        e.index = *it;
689        e.reserved = 0;
690        e.data = tc->readMiscReg(msrMap.at(*it));
691        DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n",
692                e.index, e.data);
693
694        msrs.push_back(e);
695    }
696
697    setMSRs(msrs);
698}
699
700void
701X86KvmCPU::updateThreadContext()
702{
703    DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n");
704    if (DTRACE(KvmContext))
705        dump();
706
707    updateThreadContextRegs();
708    updateThreadContextSRegs();
709    updateThreadContextFPU();
710    updateThreadContextMSRs();
711
712    // The M5 misc reg caches some values from other
713    // registers. Writing to it with side effects causes it to be
714    // updated from its source registers.
715    tc->setMiscReg(MISCREG_M5_REG, 0);
716}
717
718void
719X86KvmCPU::updateThreadContextRegs()
720{
721    struct kvm_regs regs;
722    getRegisters(regs);
723
724#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg)
725
726    FOREACH_IREG();
727
728#undef APPLY_IREG
729
730    tc->pcState(PCState(regs.rip));
731
732    // Flags are spread out across multiple semi-magic registers so we
733    // need some special care when updating them.
734    X86ISA::setRFlags(tc, regs.rflags);
735}
736
737
738inline void
739setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg,
740                  const int index)
741{
742    SegAttr attr(0);
743
744    attr.type = kvm_seg.type;
745    attr.present = kvm_seg.present;
746    attr.dpl = kvm_seg.dpl;
747    attr.defaultSize = kvm_seg.db;
748    attr.system = kvm_seg.s;
749    attr.longMode = kvm_seg.l;
750    attr.granularity = kvm_seg.g;
751    attr.avl = kvm_seg.avl;
752    attr.unusable = kvm_seg.unusable;
753
754    // We need some setMiscReg magic here to keep the effective base
755    // addresses in sync. We need an up-to-date version of EFER, so
756    // make sure this is called after the sregs have been synced.
757    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base);
758    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit);
759    tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector);
760    tc->setMiscReg(MISCREG_SEG_ATTR(index), attr);
761}
762
763inline void
764setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable,
765                  const int index)
766{
767    // We need some setMiscReg magic here to keep the effective base
768    // addresses in sync. We need an up-to-date version of EFER, so
769    // make sure this is called after the sregs have been synced.
770    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base);
771    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit);
772}
773
774void
775X86KvmCPU::updateThreadContextSRegs()
776{
777    struct kvm_sregs sregs;
778    getSpecialRegisters(sregs);
779
780    assert(getKvmRunState()->apic_base == sregs.apic_base);
781    assert(getKvmRunState()->cr8 == sregs.cr8);
782
783#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg)
784#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
785#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
786    FOREACH_SREG();
787    FOREACH_SEGMENT();
788    FOREACH_DTABLE();
789#undef APPLY_SREG
790#undef APPLY_SEGMENT
791#undef APPLY_DTABLE
792}
793
794void
795X86KvmCPU::updateThreadContextFPU()
796{
797    warn_once("X86KvmCPU::updateThreadContextFPU not implemented\n");
798}
799
800void
801X86KvmCPU::updateThreadContextMSRs()
802{
803    const Kvm::MSRIndexVector &msrs(getMsrIntersection());
804
805    std::unique_ptr<struct kvm_msrs> kvm_msrs(
806        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
807    struct kvm_msr_entry *entry;
808
809    // Create a list of MSRs to read
810    kvm_msrs->nmsrs = msrs.size();
811    entry = &kvm_msrs->entries[0];
812    for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) {
813        entry->index = *it;
814        entry->reserved = 0;
815        entry->data = 0;
816    }
817
818    getMSRs(*kvm_msrs.get());
819
820    // Update M5's state
821    entry = &kvm_msrs->entries[0];
822    for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) {
823        DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n",
824                entry->index, entry->data);
825
826        tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data);
827    }
828}
829
830void
831X86KvmCPU::deliverInterrupts()
832{
833    syncThreadContext();
834
835    Fault fault(interrupts->getInterrupt(tc));
836    interrupts->updateIntrInfo(tc);
837
838    X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get()));
839    if (x86int) {
840        struct kvm_interrupt kvm_int;
841        kvm_int.irq = x86int->getVector();
842
843        DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n",
844                fault->name(), kvm_int.irq);
845
846        kvmInterrupt(kvm_int);
847    } else if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) {
848        DPRINTF(KvmInt, "Delivering NMI\n");
849        kvmNonMaskableInterrupt();
850    } else {
851        panic("KVM: Unknown interrupt type\n");
852    }
853
854}
855
856Tick
857X86KvmCPU::kvmRun(Tick ticks)
858{
859    struct kvm_run &kvm_run(*getKvmRunState());
860
861    if (interrupts->checkInterruptsRaw()) {
862        if (kvm_run.ready_for_interrupt_injection) {
863            // KVM claims that it is ready for an interrupt. It might
864            // be lying if we just updated rflags and disabled
865            // interrupts (e.g., by doing a CPU handover). Let's sync
866            // the thread context and check if there are /really/
867            // interrupts that should be delivered now.
868            syncThreadContext();
869            if (interrupts->checkInterrupts(tc)) {
870                DPRINTF(KvmInt,
871                        "M5 has pending interrupts, delivering interrupt.\n");
872
873                deliverInterrupts();
874            } else {
875                DPRINTF(KvmInt,
876                        "Interrupt delivery delayed due to KVM confusion.\n");
877                kvm_run.request_interrupt_window = 1;
878            }
879        } else if (!kvm_run.request_interrupt_window) {
880            DPRINTF(KvmInt,
881                    "M5 has pending interrupts, requesting interrupt "
882                    "window.\n");
883            kvm_run.request_interrupt_window = 1;
884        }
885    } else {
886        kvm_run.request_interrupt_window = 0;
887    }
888
889    return kvmRunWrapper(ticks);
890}
891
892Tick
893X86KvmCPU::kvmRunDrain()
894{
895    struct kvm_run &kvm_run(*getKvmRunState());
896
897    if (!archIsDrained()) {
898        DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n");
899
900        // Tell KVM to find a suitable place to deliver interrupts. This
901        // should ensure that pending interrupts have been delivered and
902        // things are reasonably consistent (i.e., no interrupts pending
903        // in the guest).
904        kvm_run.request_interrupt_window = 1;
905
906        // Limit the run to 1 millisecond. That is hopefully enough to
907        // reach an interrupt window. Otherwise, we'll just try again
908        // later.
909        return kvmRunWrapper(1 * SimClock::Float::ms);
910    } else {
911        DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n");
912
913        return kvmRunWrapper(0);
914    }
915}
916
917Tick
918X86KvmCPU::kvmRunWrapper(Tick ticks)
919{
920    struct kvm_run &kvm_run(*getKvmRunState());
921
922    // Synchronize the APIC base and CR8 here since they are present
923    // in the kvm_run struct, which makes the synchronization really
924    // cheap.
925    kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE);
926    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
927
928    const Tick run_ticks(BaseKvmCPU::kvmRun(ticks));
929
930    tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base);
931    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
932
933    return run_ticks;
934}
935
936uint64_t
937X86KvmCPU::getHostCycles() const
938{
939    return getMSR(MSR_TSC);
940}
941
942void
943X86KvmCPU::handleIOMiscReg32(int miscreg)
944{
945    struct kvm_run &kvm_run(*getKvmRunState());
946    const uint16_t port(kvm_run.io.port);
947
948    assert(kvm_run.exit_reason == KVM_EXIT_IO);
949
950    if (kvm_run.io.size != 4) {
951        panic("Unexpected IO size (%u) for address 0x%x.\n",
952              kvm_run.io.size, port);
953    }
954
955    if (kvm_run.io.count != 1) {
956        panic("Unexpected IO count (%u) for address 0x%x.\n",
957              kvm_run.io.count, port);
958    }
959
960    uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset));
961    if (kvm_run.io.direction == KVM_EXIT_IO_OUT)
962        tc->setMiscReg(miscreg, *data);
963    else
964        *data = tc->readMiscRegNoEffect(miscreg);
965}
966
967Tick
968X86KvmCPU::handleKvmExitIO()
969{
970    struct kvm_run &kvm_run(*getKvmRunState());
971    bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT);
972    unsigned char *guestData(getGuestData(kvm_run.io.data_offset));
973    Tick delay(0);
974    uint16_t port(kvm_run.io.port);
975    Addr pAddr;
976    const int count(kvm_run.io.count);
977
978    assert(kvm_run.io.direction == KVM_EXIT_IO_IN ||
979           kvm_run.io.direction == KVM_EXIT_IO_OUT);
980
981    DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n",
982            (isWrite ? "out" : "in"), kvm_run.io.port);
983
984    /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we
985     * don't use the TLB component, we need to intercept and handle
986     * the PCI configuration space IO ports here.
987     *
988     * The IO port PCI discovery mechanism uses one address register
989     * and one data register. We map the address register to a misc
990     * reg and use that to re-route data register accesses to the
991     * right location in the PCI configuration space.
992     */
993    if (port == IO_PCI_CONF_ADDR) {
994        handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS);
995        return 0;
996    } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) {
997        Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS));
998        if (pciConfigAddr & 0x80000000) {
999            pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) |
1000                                                (port & 0x3));
1001        } else {
1002            pAddr = X86ISA::x86IOAddress(port);
1003        }
1004    } else {
1005        pAddr = X86ISA::x86IOAddress(port);
1006    }
1007
1008    io_req.setPhys(pAddr, kvm_run.io.size, Request::UNCACHEABLE,
1009                   dataMasterId());
1010
1011    const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
1012    for (int i = 0; i < count; ++i) {
1013        Packet pkt(&io_req, cmd);
1014
1015        pkt.dataStatic(guestData);
1016        delay += dataPort.sendAtomic(&pkt);
1017
1018        guestData += kvm_run.io.size;
1019    }
1020
1021    return delay;
1022}
1023
1024Tick
1025X86KvmCPU::handleKvmExitIRQWindowOpen()
1026{
1027    // We don't need to do anything here since this is caught the next
1028    // time we execute kvmRun(). We still overload the exit event to
1029    // silence the warning about an unhandled exit event.
1030    return 0;
1031}
1032
1033bool
1034X86KvmCPU::archIsDrained() const
1035{
1036    struct kvm_vcpu_events events;
1037
1038    getVCpuEvents(events);
1039
1040    // We could probably handle this in a by re-inserting interrupts
1041    // that are pending into gem5 on a drain. However, that would
1042    // probably be tricky to do reliably, so we'll just prevent a
1043    // drain if there is anything pending in the
1044    // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code
1045    // executed in the guest by requesting an interrupt window if
1046    // there are pending interrupts.
1047    const bool pending_events(events.exception.injected ||
1048                              events.interrupt.injected ||
1049                              events.nmi.injected || events.nmi.pending);
1050
1051    if (pending_events) {
1052        DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n",
1053                events.exception.injected ? "exception" : "",
1054                events.interrupt.injected ? "interrupt" : "",
1055                events.nmi.injected ? "nmi[i]" : "",
1056                events.nmi.pending ? "nmi[p]" : "");
1057    }
1058
1059    return !pending_events;
1060}
1061
1062static struct kvm_cpuid_entry2
1063makeKvmCpuid(uint32_t function, uint32_t index,
1064             CpuidResult &result)
1065{
1066    struct kvm_cpuid_entry2 e;
1067    e.function = function;
1068    e.index = index;
1069    e.flags = 0;
1070    e.eax = (uint32_t)result.rax;
1071    e.ebx = (uint32_t)result.rbx;
1072    e.ecx = (uint32_t)result.rcx;
1073    e.edx = (uint32_t)result.rdx;
1074
1075    return e;
1076}
1077
1078void
1079X86KvmCPU::updateCPUID()
1080{
1081    Kvm::CPUIDVector m5_supported;
1082
1083    /* TODO: We currently don't support any of the functions that
1084     * iterate through data structures in the CPU using an index. It's
1085     * currently not a problem since M5 doesn't expose any of them at
1086     * the moment.
1087     */
1088
1089    /* Basic features */
1090    CpuidResult func0;
1091    X86ISA::doCpuid(tc, 0x0, 0, func0);
1092    for (uint32_t function = 0; function <= func0.rax; ++function) {
1093        CpuidResult cpuid;
1094        uint32_t idx(0);
1095
1096        X86ISA::doCpuid(tc, function, idx, cpuid);
1097        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1098    }
1099
1100    /* Extended features */
1101    CpuidResult efunc0;
1102    X86ISA::doCpuid(tc, 0x80000000, 0, efunc0);
1103    for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) {
1104        CpuidResult cpuid;
1105        uint32_t idx(0);
1106
1107        X86ISA::doCpuid(tc, function, idx, cpuid);
1108        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1109    }
1110
1111    setCPUID(m5_supported);
1112}
1113
1114void
1115X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid)
1116{
1117    if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1)
1118        panic("KVM: Failed to set guest CPUID2 (errno: %i)\n",
1119              errno);
1120}
1121
1122void
1123X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid)
1124{
1125    std::unique_ptr<struct kvm_cpuid2> kvm_cpuid(
1126        newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size()));
1127
1128    kvm_cpuid->nent = cpuid.size();
1129    std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries);
1130
1131    setCPUID(*kvm_cpuid);
1132}
1133
1134void
1135X86KvmCPU::setMSRs(const struct kvm_msrs &msrs)
1136{
1137    if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1)
1138        panic("KVM: Failed to set guest MSRs (errno: %i)\n",
1139              errno);
1140}
1141
1142void
1143X86KvmCPU::setMSRs(const KvmMSRVector &msrs)
1144{
1145    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1146        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1147
1148    kvm_msrs->nmsrs = msrs.size();
1149    std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries);
1150
1151    setMSRs(*kvm_msrs);
1152}
1153
1154void
1155X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const
1156{
1157    if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1)
1158        panic("KVM: Failed to get guest MSRs (errno: %i)\n",
1159              errno);
1160}
1161
1162
1163void
1164X86KvmCPU::setMSR(uint32_t index, uint64_t value)
1165{
1166    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1167        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1168    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1169
1170    kvm_msrs->nmsrs = 1;
1171    entry.index = index;
1172    entry.reserved = 0;
1173    entry.data = value;
1174
1175    setMSRs(*kvm_msrs.get());
1176}
1177
1178uint64_t
1179X86KvmCPU::getMSR(uint32_t index) const
1180{
1181    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1182        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1183    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1184
1185    kvm_msrs->nmsrs = 1;
1186    entry.index = index;
1187    entry.reserved = 0;
1188    entry.data = 0;
1189
1190    getMSRs(*kvm_msrs.get());
1191    return entry.data;
1192}
1193
1194const Kvm::MSRIndexVector &
1195X86KvmCPU::getMsrIntersection() const
1196{
1197    if (cachedMsrIntersection.empty()) {
1198        const Kvm::MSRIndexVector &kvm_msrs(vm.kvm.getSupportedMSRs());
1199
1200        DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n");
1201        for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) {
1202            if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) {
1203                cachedMsrIntersection.push_back(*it);
1204                DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it);
1205            } else {
1206                warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n",
1207                     *it);
1208            }
1209        }
1210    }
1211
1212    return cachedMsrIntersection;
1213}
1214
1215void
1216X86KvmCPU::getDebugRegisters(struct kvm_debugregs &regs) const
1217{
1218#ifdef KVM_GET_DEBUGREGS
1219    if (ioctl(KVM_GET_DEBUGREGS, &regs) == -1)
1220        panic("KVM: Failed to get guest debug registers\n");
1221#else
1222    panic("KVM: Unsupported getDebugRegisters call.\n");
1223#endif
1224}
1225
1226void
1227X86KvmCPU::setDebugRegisters(const struct kvm_debugregs &regs)
1228{
1229#ifdef KVM_SET_DEBUGREGS
1230    if (ioctl(KVM_SET_DEBUGREGS, (void *)&regs) == -1)
1231        panic("KVM: Failed to set guest debug registers\n");
1232#else
1233    panic("KVM: Unsupported setDebugRegisters call.\n");
1234#endif
1235}
1236
1237void
1238X86KvmCPU::getXCRs(struct kvm_xcrs &regs) const
1239{
1240    if (ioctl(KVM_GET_XCRS, &regs) == -1)
1241        panic("KVM: Failed to get guest debug registers\n");
1242}
1243
1244void
1245X86KvmCPU::setXCRs(const struct kvm_xcrs &regs)
1246{
1247    if (ioctl(KVM_SET_XCRS, (void *)&regs) == -1)
1248        panic("KVM: Failed to set guest debug registers\n");
1249}
1250
1251void
1252X86KvmCPU::getXSave(struct kvm_xsave &xsave) const
1253{
1254    if (ioctl(KVM_GET_XSAVE, &xsave) == -1)
1255        panic("KVM: Failed to get guest debug registers\n");
1256}
1257
1258void
1259X86KvmCPU::setXSave(const struct kvm_xsave &xsave)
1260{
1261    if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1)
1262        panic("KVM: Failed to set guest debug registers\n");
1263}
1264
1265
1266void
1267X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const
1268{
1269    if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1)
1270        panic("KVM: Failed to get guest debug registers\n");
1271}
1272
1273void
1274X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events)
1275{
1276    if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1)
1277        panic("KVM: Failed to set guest debug registers\n");
1278}
1279
1280X86KvmCPU *
1281X86KvmCPUParams::create()
1282{
1283    return new X86KvmCPU(this);
1284}
1285