x86_cpu.cc revision 10099
1/*
2 * Copyright (c) 2013 Andreas Sandberg
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Andreas Sandberg
29 */
30
31#include <linux/kvm.h>
32
33#include <algorithm>
34#include <cerrno>
35#include <memory>
36
37#include "arch/x86/regs/msr.hh"
38#include "arch/x86/cpuid.hh"
39#include "arch/x86/utility.hh"
40#include "arch/registers.hh"
41#include "cpu/kvm/base.hh"
42#include "cpu/kvm/x86_cpu.hh"
43#include "debug/Drain.hh"
44#include "debug/Kvm.hh"
45#include "debug/KvmContext.hh"
46#include "debug/KvmIO.hh"
47#include "debug/KvmInt.hh"
48
49using namespace X86ISA;
50
51#define MSR_TSC 0x10
52
53#define IO_PCI_CONF_ADDR 0xCF8
54#define IO_PCI_CONF_DATA_BASE 0xCFC
55
56// Task segment type of an inactive 32-bit or 64-bit task
57#define SEG_SYS_TYPE_TSS_AVAILABLE 9
58// Task segment type of an active 32-bit or 64-bit task
59#define SEG_SYS_TYPE_TSS_BUSY 11
60
61// Non-conforming accessed code segment
62#define SEG_CS_TYPE_ACCESSED 9
63// Non-conforming accessed code segment that can be read
64#define SEG_CS_TYPE_READ_ACCESSED 11
65
66// The lowest bit of the type field for normal segments (code and
67// data) is used to indicate that a segment has been accessed.
68#define SEG_TYPE_BIT_ACCESSED 1
69
70struct FXSave
71{
72    uint16_t fcw;
73    uint16_t fsw;
74    uint8_t ftwx;
75    uint8_t pad0;
76    uint16_t last_opcode;
77    union {
78        struct {
79            uint32_t fpu_ip;
80            uint16_t fpu_cs;
81            uint16_t pad1;
82            uint32_t fpu_dp;
83            uint16_t fpu_ds;
84            uint16_t pad2;
85        } ctrl32;
86
87        struct {
88            uint64_t fpu_ip;
89            uint64_t fpu_dp;
90        } ctrl64;
91    };
92    uint32_t mxcsr;
93    uint32_t mxcsr_mask;
94
95    uint8_t fpr[8][16];
96    uint8_t xmm[16][16];
97
98    uint64_t reserved[12];
99} M5_ATTR_PACKED;
100
101static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave");
102
103#define FOREACH_IREG()                          \
104    do {                                        \
105        APPLY_IREG(rax, INTREG_RAX);            \
106        APPLY_IREG(rbx, INTREG_RBX);            \
107        APPLY_IREG(rcx, INTREG_RCX);            \
108        APPLY_IREG(rdx, INTREG_RDX);            \
109        APPLY_IREG(rsi, INTREG_RSI);            \
110        APPLY_IREG(rdi, INTREG_RDI);            \
111        APPLY_IREG(rsp, INTREG_RSP);            \
112        APPLY_IREG(rbp, INTREG_RBP);            \
113        APPLY_IREG(r8, INTREG_R8);              \
114        APPLY_IREG(r9, INTREG_R9);              \
115        APPLY_IREG(r10, INTREG_R10);            \
116        APPLY_IREG(r11, INTREG_R11);            \
117        APPLY_IREG(r12, INTREG_R12);            \
118        APPLY_IREG(r13, INTREG_R13);            \
119        APPLY_IREG(r14, INTREG_R14);            \
120        APPLY_IREG(r15, INTREG_R15);            \
121    } while(0)
122
123#define FOREACH_SREG()                                  \
124    do {                                                \
125        APPLY_SREG(cr0, MISCREG_CR0);                   \
126        APPLY_SREG(cr2, MISCREG_CR2);                   \
127        APPLY_SREG(cr3, MISCREG_CR3);                   \
128        APPLY_SREG(cr4, MISCREG_CR4);                   \
129        APPLY_SREG(cr8, MISCREG_CR8);                   \
130        APPLY_SREG(efer, MISCREG_EFER);                 \
131        APPLY_SREG(apic_base, MISCREG_APIC_BASE);       \
132    } while(0)
133
134#define FOREACH_DREG()                          \
135    do {                                        \
136        APPLY_DREG(db[0], MISCREG_DR0);         \
137        APPLY_DREG(db[1], MISCREG_DR1);         \
138        APPLY_DREG(db[2], MISCREG_DR2);         \
139        APPLY_DREG(db[3], MISCREG_DR3);         \
140        APPLY_DREG(dr6, MISCREG_DR6);           \
141        APPLY_DREG(dr7, MISCREG_DR7);           \
142    } while(0)
143
144#define FOREACH_SEGMENT()                                       \
145    do {                                                        \
146        APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE);   \
147        APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE);   \
148        APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE);   \
149        APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE);   \
150        APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE);   \
151        APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE);   \
152        APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE);   \
153        APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \
154    } while(0)
155
156#define FOREACH_DTABLE()                                        \
157    do {                                                        \
158        APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE);  \
159        APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \
160    } while(0)
161
162template<typename STRUCT, typename ENTRY>
163static STRUCT *newVarStruct(size_t entries)
164{
165    return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY));
166}
167
168static void
169dumpKvm(const struct kvm_regs &regs)
170{
171    inform("KVM register state:\n");
172
173#define APPLY_IREG(kreg, mreg)                  \
174    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
175
176    FOREACH_IREG();
177
178#undef APPLY_IREG
179
180    inform("\trip: 0x%llx\n", regs.rip);
181    inform("\trflags: 0x%llx\n", regs.rflags);
182}
183
184static void
185dumpKvm(const char *reg_name, const struct kvm_segment &seg)
186{
187    inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n"
188           "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n",
189           reg_name,
190           seg.base, seg.limit, seg.selector, seg.type,
191           seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable);
192}
193
194static void
195dumpKvm(const char *reg_name, const struct kvm_dtable &dtable)
196{
197    inform("\t%s: @0x%llx+%x\n",
198           reg_name, dtable.base, dtable.limit);
199}
200
201static void
202dumpKvm(const struct kvm_sregs &sregs)
203{
204#define APPLY_SREG(kreg, mreg)                          \
205    inform("\t" # kreg ": 0x%llx\n", sregs.kreg);
206#define APPLY_SEGMENT(kreg, idx)                \
207    dumpKvm(# kreg, sregs.kreg);
208#define APPLY_DTABLE(kreg, idx)                 \
209    dumpKvm(# kreg, sregs.kreg);
210
211    inform("Special registers:\n");
212    FOREACH_SEGMENT();
213    FOREACH_SREG();
214    FOREACH_DTABLE();
215
216    inform("Interrupt Bitmap:");
217    for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64)
218        inform("  0x%.8x", sregs.interrupt_bitmap[i / 64]);
219
220#undef APPLY_SREG
221#undef APPLY_SEGMENT
222#undef APPLY_DTABLE
223}
224
225#ifdef KVM_GET_DEBUGREGS
226static void
227dumpKvm(const struct kvm_debugregs &regs)
228{
229    inform("KVM debug state:\n");
230
231#define APPLY_DREG(kreg, mreg)                  \
232    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
233
234    FOREACH_DREG();
235
236#undef APPLY_DREG
237
238    inform("\tflags: 0x%llx\n", regs.flags);
239}
240#endif
241
242static void
243dumpFpuSpec(const struct FXSave &xs)
244{
245    inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip);
246    inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp);
247    inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask);
248}
249
250static void
251dumpFpuSpec(const struct kvm_fpu &fpu)
252{
253    inform("\tlast_ip: 0x%x\n", fpu.last_ip);
254    inform("\tlast_dp: 0x%x\n", fpu.last_dp);
255}
256
257template<typename T>
258static void
259dumpFpuCommon(const T &fpu)
260{
261    const unsigned top((fpu.fsw >> 11) & 0x7);
262    inform("\tfcw: 0x%x\n", fpu.fcw);
263
264    inform("\tfsw: 0x%x (top: %i, "
265           "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n",
266           fpu.fsw, top,
267
268           (fpu.fsw & CC0Bit) ? "C0" : "",
269           (fpu.fsw & CC1Bit) ? "C1" : "",
270           (fpu.fsw & CC2Bit) ? "C2" : "",
271           (fpu.fsw & CC3Bit) ? "C3" : "",
272
273           (fpu.fsw & IEBit) ? "I" : "",
274           (fpu.fsw & DEBit) ? "D" : "",
275           (fpu.fsw & ZEBit) ? "Z" : "",
276           (fpu.fsw & OEBit) ? "O" : "",
277           (fpu.fsw & UEBit) ? "U" : "",
278           (fpu.fsw & PEBit) ? "P" : "",
279
280           (fpu.fsw & StackFaultBit) ? "SF " : "",
281           (fpu.fsw & ErrSummaryBit) ? "ES " : "",
282           (fpu.fsw & BusyBit) ? "BUSY " : ""
283        );
284    inform("\tftwx: 0x%x\n", fpu.ftwx);
285    inform("\tlast_opcode: 0x%x\n", fpu.last_opcode);
286    dumpFpuSpec(fpu);
287    inform("\tmxcsr: 0x%x\n", fpu.mxcsr);
288    inform("\tFP Stack:\n");
289    for (int i = 0; i < 8; ++i) {
290        const unsigned reg_idx((i + top) & 0x7);
291        const bool empty(!((fpu.ftwx >> reg_idx) & 0x1));
292        const double value(X86ISA::loadFloat80(fpu.fpr[i]));
293        char hex[33];
294        for (int j = 0; j < 10; ++j)
295            snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]);
296        inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx,
297               hex, value, empty ? " (e)" : "");
298    }
299    inform("\tXMM registers:\n");
300    for (int i = 0; i < 16; ++i) {
301        char hex[33];
302        for (int j = 0; j < 16; ++j)
303            snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]);
304        inform("\t\t%i: 0x%s\n", i, hex);
305    }
306}
307
308static void
309dumpKvm(const struct kvm_fpu &fpu)
310{
311    inform("FPU registers:\n");
312    dumpFpuCommon(fpu);
313}
314
315static void
316dumpKvm(const struct kvm_xsave &xsave)
317{
318    inform("FPU registers (XSave):\n");
319    dumpFpuCommon(*(FXSave *)xsave.region);
320}
321
322static void
323dumpKvm(const struct kvm_msrs &msrs)
324{
325    inform("MSRs:\n");
326
327    for (int i = 0; i < msrs.nmsrs; ++i) {
328        const struct kvm_msr_entry &e(msrs.entries[i]);
329
330        inform("\t0x%x: 0x%x\n", e.index, e.data);
331    }
332}
333
334static void
335dumpKvm(const struct kvm_xcrs &regs)
336{
337    inform("KVM XCR registers:\n");
338
339    inform("\tFlags: 0x%x\n", regs.flags);
340    for (int i = 0; i < regs.nr_xcrs; ++i) {
341        inform("\tXCR[0x%x]: 0x%x\n",
342               regs.xcrs[i].xcr,
343               regs.xcrs[i].value);
344    }
345}
346
347static void
348dumpKvm(const struct kvm_vcpu_events &events)
349{
350    inform("vCPU events:\n");
351
352    inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n",
353           events.exception.injected, events.exception.nr,
354           events.exception.has_error_code, events.exception.error_code);
355
356    inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n",
357           events.interrupt.injected, events.interrupt.nr,
358           events.interrupt.soft);
359
360    inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n",
361           events.nmi.injected, events.nmi.pending,
362           events.nmi.masked);
363
364    inform("\tSIPI vector: 0x%x\n", events.sipi_vector);
365    inform("\tFlags: 0x%x\n", events.flags);
366}
367
368static bool
369isCanonicalAddress(uint64_t addr)
370{
371    // x86-64 doesn't currently use the full 64-bit virtual address
372    // space, instead it uses signed 48 bit addresses that are
373    // sign-extended to 64 bits.  Such addresses are known as
374    // "canonical".
375    uint64_t upper_half(addr & 0xffff800000000000ULL);
376    return upper_half == 0 || upper_half == 0xffff800000000000;
377}
378
379static void
380checkSeg(const char *name, const int idx, const struct kvm_segment &seg,
381         struct kvm_sregs sregs)
382{
383    // Check the register base
384    switch (idx) {
385      case MISCREG_TSL:
386      case MISCREG_TR:
387      case MISCREG_FS:
388      case MISCREG_GS:
389        if (!isCanonicalAddress(seg.base))
390            warn("Illegal %s base: 0x%x\n", name, seg.base);
391        break;
392
393      case MISCREG_SS:
394      case MISCREG_DS:
395      case MISCREG_ES:
396        if (seg.unusable)
397            break;
398      case MISCREG_CS:
399        if (seg.base & 0xffffffff00000000ULL)
400            warn("Illegal %s base: 0x%x\n", name, seg.base);
401        break;
402    }
403
404    // Check the type
405    switch (idx) {
406      case MISCREG_CS:
407        switch (seg.type) {
408          case 3:
409            if (seg.dpl != 0)
410                warn("CS type is 3 but dpl != 0.\n");
411            break;
412          case 9:
413          case 11:
414            if (seg.dpl != sregs.ss.dpl)
415                warn("CS type is %i but CS DPL != SS DPL\n", seg.type);
416            break;
417          case 13:
418          case 15:
419            if (seg.dpl > sregs.ss.dpl)
420                warn("CS type is %i but CS DPL > SS DPL\n", seg.type);
421            break;
422          default:
423            warn("Illegal CS type: %i\n", seg.type);
424            break;
425        }
426        break;
427
428      case MISCREG_SS:
429        if (seg.unusable)
430            break;
431        switch (seg.type) {
432          case 3:
433            if (sregs.cs.type == 3 && seg.dpl != 0)
434                warn("CS type is 3, but SS DPL is != 0.\n");
435            /* FALLTHROUGH */
436          case 7:
437            if (!(sregs.cr0 & 1) && seg.dpl != 0)
438                warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl);
439            break;
440          default:
441            warn("Illegal SS type: %i\n", seg.type);
442            break;
443        }
444        break;
445
446      case MISCREG_DS:
447      case MISCREG_ES:
448      case MISCREG_FS:
449      case MISCREG_GS:
450        if (seg.unusable)
451            break;
452        if (!(seg.type & 0x1) ||
453            ((seg.type & 0x8) && !(seg.type & 0x2)))
454            warn("%s has an illegal type field: %i\n", name, seg.type);
455        break;
456
457      case MISCREG_TR:
458        // TODO: We should check the CPU mode
459        if (seg.type != 3 && seg.type != 11)
460            warn("%s: Illegal segment type (%i)\n", name, seg.type);
461        break;
462
463      case MISCREG_TSL:
464        if (seg.unusable)
465            break;
466        if (seg.type != 2)
467            warn("%s: Illegal segment type (%i)\n", name, seg.type);
468        break;
469    }
470
471    switch (idx) {
472      case MISCREG_SS:
473      case MISCREG_DS:
474      case MISCREG_ES:
475      case MISCREG_FS:
476      case MISCREG_GS:
477        if (seg.unusable)
478            break;
479      case MISCREG_CS:
480        if (!seg.s)
481            warn("%s: S flag not set\n", name);
482        break;
483
484      case MISCREG_TSL:
485        if (seg.unusable)
486            break;
487      case MISCREG_TR:
488        if (seg.s)
489            warn("%s: S flag is set\n", name);
490        break;
491    }
492
493    switch (idx) {
494      case MISCREG_SS:
495      case MISCREG_DS:
496      case MISCREG_ES:
497      case MISCREG_FS:
498      case MISCREG_GS:
499      case MISCREG_TSL:
500        if (seg.unusable)
501            break;
502      case MISCREG_TR:
503      case MISCREG_CS:
504        if (!seg.present)
505            warn("%s: P flag not set\n", name);
506
507        if (((seg.limit & 0xFFF) == 0 && seg.g) ||
508            ((seg.limit & 0xFFF00000) != 0 && !seg.g)) {
509            warn("%s limit (0x%x) and g (%i) combination is illegal.\n",
510                 name, seg.limit, seg.g);
511        }
512        break;
513    }
514
515    // TODO: Check CS DB
516}
517
518X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params)
519    : BaseKvmCPU(params),
520      useXSave(params->useXSave)
521{
522    Kvm &kvm(vm.kvm);
523
524    if (!kvm.capSetTSSAddress())
525        panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n");
526    if (!kvm.capExtendedCPUID())
527        panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n");
528    if (!kvm.capUserNMI())
529        warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n");
530    if (!kvm.capVCPUEvents())
531        warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n");
532
533    haveDebugRegs = kvm.capDebugRegs();
534    haveXSave = kvm.capXSave();
535    haveXCRs = kvm.capXCRs();
536
537    if (useXSave && !haveXSave) {
538        warn("KVM: XSAVE not supported by host. MXCSR synchronization might be "
539             "unreliable due to kernel bugs.\n");
540        useXSave = false;
541    } else if (!useXSave) {
542        warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n");
543    }
544}
545
546X86KvmCPU::~X86KvmCPU()
547{
548}
549
550void
551X86KvmCPU::startup()
552{
553    BaseKvmCPU::startup();
554
555    updateCPUID();
556
557    io_req.setThreadContext(tc->contextId(), 0);
558
559    // TODO: Do we need to create an identity mapped TSS area? We
560    // should call kvm.vm.setTSSAddress() here in that case. It should
561    // only be needed for old versions of the virtualization
562    // extensions. We should make sure that the identity range is
563    // reserved in the e820 memory map in that case.
564}
565
566void
567X86KvmCPU::dump()
568{
569    dumpIntRegs();
570    if (useXSave)
571        dumpXSave();
572    else
573        dumpFpuRegs();
574    dumpSpecRegs();
575    dumpDebugRegs();
576    dumpXCRs();
577    dumpVCpuEvents();
578    dumpMSRs();
579}
580
581void
582X86KvmCPU::dumpFpuRegs() const
583{
584    struct kvm_fpu fpu;
585    getFPUState(fpu);
586    dumpKvm(fpu);
587}
588
589void
590X86KvmCPU::dumpIntRegs() const
591{
592    struct kvm_regs regs;
593    getRegisters(regs);
594    dumpKvm(regs);
595}
596
597void
598X86KvmCPU::dumpSpecRegs() const
599{
600    struct kvm_sregs sregs;
601    getSpecialRegisters(sregs);
602    dumpKvm(sregs);
603}
604
605void
606X86KvmCPU::dumpDebugRegs() const
607{
608    if (haveDebugRegs) {
609#ifdef KVM_GET_DEBUGREGS
610        struct kvm_debugregs dregs;
611        getDebugRegisters(dregs);
612        dumpKvm(dregs);
613#endif
614    } else {
615        inform("Debug registers not supported by kernel.\n");
616    }
617}
618
619void
620X86KvmCPU::dumpXCRs() const
621{
622    if (haveXCRs) {
623        struct kvm_xcrs xcrs;
624        getXCRs(xcrs);
625        dumpKvm(xcrs);
626    } else {
627        inform("XCRs not supported by kernel.\n");
628    }
629}
630
631void
632X86KvmCPU::dumpXSave() const
633{
634    if (haveXSave) {
635        struct kvm_xsave xsave;
636        getXSave(xsave);
637        dumpKvm(xsave);
638    } else {
639        inform("XSave not supported by kernel.\n");
640    }
641}
642
643void
644X86KvmCPU::dumpVCpuEvents() const
645{
646    struct kvm_vcpu_events events;
647    getVCpuEvents(events);
648    dumpKvm(events);
649}
650
651void
652X86KvmCPU::dumpMSRs() const
653{
654    const Kvm::MSRIndexVector &supported_msrs(vm.kvm.getSupportedMSRs());
655    std::unique_ptr<struct kvm_msrs> msrs(
656        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(
657            supported_msrs.size()));
658
659    msrs->nmsrs = supported_msrs.size();
660    for (int i = 0; i < supported_msrs.size(); ++i) {
661        struct kvm_msr_entry &e(msrs->entries[i]);
662        e.index = supported_msrs[i];
663        e.reserved = 0;
664        e.data = 0;
665    }
666    getMSRs(*msrs.get());
667
668    dumpKvm(*msrs.get());
669}
670
671void
672X86KvmCPU::updateKvmState()
673{
674    updateKvmStateRegs();
675    updateKvmStateSRegs();
676    updateKvmStateFPU();
677    updateKvmStateMSRs();
678
679    DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n");
680    if (DTRACE(KvmContext))
681        dump();
682}
683
684void
685X86KvmCPU::updateKvmStateRegs()
686{
687    struct kvm_regs regs;
688
689#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg)
690    FOREACH_IREG();
691#undef APPLY_IREG
692
693    regs.rip = tc->instAddr();
694
695    /* You might think that setting regs.rflags to the contents
696     * MISCREG_RFLAGS here would suffice. In that case you're
697     * mistaken. We need to reconstruct it from a bunch of ucode
698     * registers and wave a dead chicken over it (aka mask out and set
699     * reserved bits) to get it to work.
700     */
701    regs.rflags = X86ISA::getRFlags(tc);
702
703    setRegisters(regs);
704}
705
706static inline void
707setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg,
708                 const int index)
709{
710    SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index)));
711
712    kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
713    kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
714    kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index));
715    kvm_seg.type = attr.type;
716    kvm_seg.present = attr.present;
717    kvm_seg.dpl = attr.dpl;
718    kvm_seg.db = attr.defaultSize;
719    kvm_seg.s = attr.system;
720    kvm_seg.l = attr.longMode;
721    kvm_seg.g = attr.granularity;
722    kvm_seg.avl = attr.avl;
723
724    // A segment is normally unusable when the selector is zero. There
725    // is a attr.unusable flag in gem5, but it seems unused. qemu
726    // seems to set this to 0 all the time, so we just do the same and
727    // hope for the best.
728    kvm_seg.unusable = 0;
729}
730
731static inline void
732setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable,
733                const int index)
734{
735    kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
736    kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
737}
738
739static void
740forceSegAccessed(struct kvm_segment &seg)
741{
742    // Intel's VMX requires that (some) usable segments are flagged as
743    // 'accessed' (i.e., the lowest bit in the segment type is set)
744    // when entering VMX. This wouldn't necessary be the case even if
745    // gem5 did set the access bits correctly, so we force it to one
746    // in that case.
747    if (!seg.unusable)
748        seg.type |= SEG_TYPE_BIT_ACCESSED;
749}
750
751void
752X86KvmCPU::updateKvmStateSRegs()
753{
754    struct kvm_sregs sregs;
755
756#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg)
757#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx)
758#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx)
759
760    FOREACH_SREG();
761    FOREACH_SEGMENT();
762    FOREACH_DTABLE();
763
764#undef APPLY_SREG
765#undef APPLY_SEGMENT
766#undef APPLY_DTABLE
767
768    // Clear the interrupt bitmap
769    memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
770
771    // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed
772    // bit in the type field set.
773    forceSegAccessed(sregs.cs);
774    forceSegAccessed(sregs.ss);
775    forceSegAccessed(sregs.ds);
776    forceSegAccessed(sregs.es);
777    forceSegAccessed(sregs.fs);
778    forceSegAccessed(sregs.gs);
779
780    // There are currently some cases where the active task isn't
781    // marked as busy. This is illegal in VMX, so we force it to busy.
782    if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) {
783        hack("tr.type (%i) is not busy. Forcing the busy bit.\n",
784             sregs.tr.type);
785        sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY;
786    }
787
788    // VMX requires the DPL of SS and CS to be the same for
789    // non-conforming code segments. It seems like m5 doesn't set the
790    // DPL of SS correctly when taking interrupts, so we need to fix
791    // that here.
792    if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED ||
793         sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) &&
794        sregs.cs.dpl != sregs.ss.dpl) {
795
796        hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n",
797             sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl);
798        sregs.ss.dpl = sregs.cs.dpl;
799    }
800
801    // Do checks after fixing up the state to avoid getting excessive
802    // amounts of warnings.
803    RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS));
804    if (!rflags_nocc.vm) {
805        // Do segment verification if the CPU isn't entering virtual
806        // 8086 mode.  We currently assume that unrestricted guest
807        // mode is available.
808
809#define APPLY_SEGMENT(kreg, idx) \
810        checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs)
811
812        FOREACH_SEGMENT();
813#undef APPLY_SEGMENT
814    }
815
816    setSpecialRegisters(sregs);
817}
818
819template <typename T>
820static void
821updateKvmStateFPUCommon(ThreadContext *tc, T &fpu)
822{
823    static_assert(sizeof(X86ISA::FloatRegBits) == 8,
824                  "Unexpected size of X86ISA::FloatRegBits");
825
826    fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR);
827    fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW);
828    // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read
829    // with effects.
830    fpu.fsw = tc->readMiscReg(MISCREG_FSW);
831
832    uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW));
833    fpu.ftwx = X86ISA::convX87TagsToXTags(ftw);
834
835    fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP);
836
837    const unsigned top((fpu.fsw >> 11) & 0x7);
838    for (int i = 0; i < 8; ++i) {
839        const unsigned reg_idx((i + top) & 0x7);
840        const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx)));
841        DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n",
842                reg_idx, i, value);
843        X86ISA::storeFloat80(fpu.fpr[i], value);
844    }
845
846    // TODO: We should update the MMX state
847
848    for (int i = 0; i < 16; ++i) {
849        *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] =
850            tc->readFloatRegBits(FLOATREG_XMM_LOW(i));
851        *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] =
852            tc->readFloatRegBits(FLOATREG_XMM_HIGH(i));
853    }
854}
855
856void
857X86KvmCPU::updateKvmStateFPULegacy()
858{
859    struct kvm_fpu fpu;
860
861    // There is some padding in the FP registers, so we'd better zero
862    // the whole struct.
863    memset(&fpu, 0, sizeof(fpu));
864
865    updateKvmStateFPUCommon(tc, fpu);
866
867    if (tc->readMiscRegNoEffect(MISCREG_FISEG))
868        warn_once("MISCREG_FISEG is non-zero.\n");
869
870    fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
871
872    if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
873        warn_once("MISCREG_FOSEG is non-zero.\n");
874
875    fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
876
877    setFPUState(fpu);
878}
879
880void
881X86KvmCPU::updateKvmStateFPUXSave()
882{
883    struct kvm_xsave kxsave;
884    FXSave &xsave(*(FXSave *)kxsave.region);
885
886    // There is some padding and reserved fields in the structure, so
887    // we'd better zero the whole thing.
888    memset(&kxsave, 0, sizeof(kxsave));
889
890    updateKvmStateFPUCommon(tc, xsave);
891
892    if (tc->readMiscRegNoEffect(MISCREG_FISEG))
893        warn_once("MISCREG_FISEG is non-zero.\n");
894
895    xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
896
897    if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
898        warn_once("MISCREG_FOSEG is non-zero.\n");
899
900    xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
901
902    setXSave(kxsave);
903}
904
905void
906X86KvmCPU::updateKvmStateFPU()
907{
908    if (useXSave)
909        updateKvmStateFPUXSave();
910    else
911        updateKvmStateFPULegacy();
912}
913
914void
915X86KvmCPU::updateKvmStateMSRs()
916{
917    KvmMSRVector msrs;
918
919    const Kvm::MSRIndexVector &indices(getMsrIntersection());
920
921    for (auto it = indices.cbegin(); it != indices.cend(); ++it) {
922        struct kvm_msr_entry e;
923
924        e.index = *it;
925        e.reserved = 0;
926        e.data = tc->readMiscReg(msrMap.at(*it));
927        DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n",
928                e.index, e.data);
929
930        msrs.push_back(e);
931    }
932
933    setMSRs(msrs);
934}
935
936void
937X86KvmCPU::updateThreadContext()
938{
939    DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n");
940    if (DTRACE(KvmContext))
941        dump();
942
943    updateThreadContextRegs();
944    updateThreadContextSRegs();
945    if (useXSave)
946        updateThreadContextXSave();
947    else
948        updateThreadContextFPU();
949    updateThreadContextMSRs();
950
951    // The M5 misc reg caches some values from other
952    // registers. Writing to it with side effects causes it to be
953    // updated from its source registers.
954    tc->setMiscReg(MISCREG_M5_REG, 0);
955}
956
957void
958X86KvmCPU::updateThreadContextRegs()
959{
960    struct kvm_regs regs;
961    getRegisters(regs);
962
963#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg)
964
965    FOREACH_IREG();
966
967#undef APPLY_IREG
968
969    tc->pcState(PCState(regs.rip));
970
971    // Flags are spread out across multiple semi-magic registers so we
972    // need some special care when updating them.
973    X86ISA::setRFlags(tc, regs.rflags);
974}
975
976
977inline void
978setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg,
979                  const int index)
980{
981    SegAttr attr(0);
982
983    attr.type = kvm_seg.type;
984    attr.present = kvm_seg.present;
985    attr.dpl = kvm_seg.dpl;
986    attr.defaultSize = kvm_seg.db;
987    attr.system = kvm_seg.s;
988    attr.longMode = kvm_seg.l;
989    attr.granularity = kvm_seg.g;
990    attr.avl = kvm_seg.avl;
991    attr.unusable = kvm_seg.unusable;
992
993    // We need some setMiscReg magic here to keep the effective base
994    // addresses in sync. We need an up-to-date version of EFER, so
995    // make sure this is called after the sregs have been synced.
996    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base);
997    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit);
998    tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector);
999    tc->setMiscReg(MISCREG_SEG_ATTR(index), attr);
1000}
1001
1002inline void
1003setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable,
1004                  const int index)
1005{
1006    // We need some setMiscReg magic here to keep the effective base
1007    // addresses in sync. We need an up-to-date version of EFER, so
1008    // make sure this is called after the sregs have been synced.
1009    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base);
1010    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit);
1011}
1012
1013void
1014X86KvmCPU::updateThreadContextSRegs()
1015{
1016    struct kvm_sregs sregs;
1017    getSpecialRegisters(sregs);
1018
1019    assert(getKvmRunState()->apic_base == sregs.apic_base);
1020    assert(getKvmRunState()->cr8 == sregs.cr8);
1021
1022#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg)
1023#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1024#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1025    FOREACH_SREG();
1026    FOREACH_SEGMENT();
1027    FOREACH_DTABLE();
1028#undef APPLY_SREG
1029#undef APPLY_SEGMENT
1030#undef APPLY_DTABLE
1031}
1032
1033template<typename T>
1034static void
1035updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu)
1036{
1037    const unsigned top((fpu.fsw >> 11) & 0x7);
1038
1039    static_assert(sizeof(X86ISA::FloatRegBits) == 8,
1040                  "Unexpected size of X86ISA::FloatRegBits");
1041
1042    for (int i = 0; i < 8; ++i) {
1043        const unsigned reg_idx((i + top) & 0x7);
1044        const double value(X86ISA::loadFloat80(fpu.fpr[i]));
1045        DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n",
1046                reg_idx, i, value);
1047        tc->setFloatReg(FLOATREG_FPR(reg_idx), value);
1048    }
1049
1050    // TODO: We should update the MMX state
1051
1052    tc->setMiscRegNoEffect(MISCREG_X87_TOP, top);
1053    tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr);
1054    tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw);
1055    tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw);
1056
1057    uint64_t ftw(convX87XTagsToTags(fpu.ftwx));
1058    // TODO: Are these registers really the same?
1059    tc->setMiscRegNoEffect(MISCREG_FTW, ftw);
1060    tc->setMiscRegNoEffect(MISCREG_FTAG, ftw);
1061
1062    tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode);
1063
1064    for (int i = 0; i < 16; ++i) {
1065        tc->setFloatRegBits(FLOATREG_XMM_LOW(i),
1066                            *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]);
1067        tc->setFloatRegBits(FLOATREG_XMM_HIGH(i),
1068                            *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]);
1069    }
1070}
1071
1072void
1073X86KvmCPU::updateThreadContextFPU()
1074{
1075    struct kvm_fpu fpu;
1076    getFPUState(fpu);
1077
1078    updateThreadContextFPUCommon(tc, fpu);
1079
1080    tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1081    tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip);
1082    tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1083    tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp);
1084}
1085
1086void
1087X86KvmCPU::updateThreadContextXSave()
1088{
1089    struct kvm_xsave kxsave;
1090    FXSave &xsave(*(FXSave *)kxsave.region);
1091    getXSave(kxsave);
1092
1093    updateThreadContextFPUCommon(tc, xsave);
1094
1095    tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1096    tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip);
1097    tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1098    tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp);
1099}
1100
1101void
1102X86KvmCPU::updateThreadContextMSRs()
1103{
1104    const Kvm::MSRIndexVector &msrs(getMsrIntersection());
1105
1106    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1107        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1108    struct kvm_msr_entry *entry;
1109
1110    // Create a list of MSRs to read
1111    kvm_msrs->nmsrs = msrs.size();
1112    entry = &kvm_msrs->entries[0];
1113    for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) {
1114        entry->index = *it;
1115        entry->reserved = 0;
1116        entry->data = 0;
1117    }
1118
1119    getMSRs(*kvm_msrs.get());
1120
1121    // Update M5's state
1122    entry = &kvm_msrs->entries[0];
1123    for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) {
1124        DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n",
1125                entry->index, entry->data);
1126
1127        tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data);
1128    }
1129}
1130
1131void
1132X86KvmCPU::deliverInterrupts()
1133{
1134    syncThreadContext();
1135
1136    Fault fault(interrupts->getInterrupt(tc));
1137    interrupts->updateIntrInfo(tc);
1138
1139    X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get()));
1140    if (x86int) {
1141        struct kvm_interrupt kvm_int;
1142        kvm_int.irq = x86int->getVector();
1143
1144        DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n",
1145                fault->name(), kvm_int.irq);
1146
1147        kvmInterrupt(kvm_int);
1148    } else if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) {
1149        DPRINTF(KvmInt, "Delivering NMI\n");
1150        kvmNonMaskableInterrupt();
1151    } else {
1152        panic("KVM: Unknown interrupt type\n");
1153    }
1154
1155}
1156
1157Tick
1158X86KvmCPU::kvmRun(Tick ticks)
1159{
1160    struct kvm_run &kvm_run(*getKvmRunState());
1161
1162    if (interrupts->checkInterruptsRaw()) {
1163        if (kvm_run.ready_for_interrupt_injection) {
1164            // KVM claims that it is ready for an interrupt. It might
1165            // be lying if we just updated rflags and disabled
1166            // interrupts (e.g., by doing a CPU handover). Let's sync
1167            // the thread context and check if there are /really/
1168            // interrupts that should be delivered now.
1169            syncThreadContext();
1170            if (interrupts->checkInterrupts(tc)) {
1171                DPRINTF(KvmInt,
1172                        "M5 has pending interrupts, delivering interrupt.\n");
1173
1174                deliverInterrupts();
1175            } else {
1176                DPRINTF(KvmInt,
1177                        "Interrupt delivery delayed due to KVM confusion.\n");
1178                kvm_run.request_interrupt_window = 1;
1179            }
1180        } else if (!kvm_run.request_interrupt_window) {
1181            DPRINTF(KvmInt,
1182                    "M5 has pending interrupts, requesting interrupt "
1183                    "window.\n");
1184            kvm_run.request_interrupt_window = 1;
1185        }
1186    } else {
1187        kvm_run.request_interrupt_window = 0;
1188    }
1189
1190    return kvmRunWrapper(ticks);
1191}
1192
1193Tick
1194X86KvmCPU::kvmRunDrain()
1195{
1196    struct kvm_run &kvm_run(*getKvmRunState());
1197
1198    if (!archIsDrained()) {
1199        DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n");
1200
1201        // Tell KVM to find a suitable place to deliver interrupts. This
1202        // should ensure that pending interrupts have been delivered and
1203        // things are reasonably consistent (i.e., no interrupts pending
1204        // in the guest).
1205        kvm_run.request_interrupt_window = 1;
1206
1207        // Limit the run to 1 millisecond. That is hopefully enough to
1208        // reach an interrupt window. Otherwise, we'll just try again
1209        // later.
1210        return kvmRunWrapper(1 * SimClock::Float::ms);
1211    } else {
1212        DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n");
1213
1214        return kvmRunWrapper(0);
1215    }
1216}
1217
1218Tick
1219X86KvmCPU::kvmRunWrapper(Tick ticks)
1220{
1221    struct kvm_run &kvm_run(*getKvmRunState());
1222
1223    // Synchronize the APIC base and CR8 here since they are present
1224    // in the kvm_run struct, which makes the synchronization really
1225    // cheap.
1226    kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE);
1227    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1228
1229    const Tick run_ticks(BaseKvmCPU::kvmRun(ticks));
1230
1231    tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base);
1232    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1233
1234    return run_ticks;
1235}
1236
1237uint64_t
1238X86KvmCPU::getHostCycles() const
1239{
1240    return getMSR(MSR_TSC);
1241}
1242
1243void
1244X86KvmCPU::handleIOMiscReg32(int miscreg)
1245{
1246    struct kvm_run &kvm_run(*getKvmRunState());
1247    const uint16_t port(kvm_run.io.port);
1248
1249    assert(kvm_run.exit_reason == KVM_EXIT_IO);
1250
1251    if (kvm_run.io.size != 4) {
1252        panic("Unexpected IO size (%u) for address 0x%x.\n",
1253              kvm_run.io.size, port);
1254    }
1255
1256    if (kvm_run.io.count != 1) {
1257        panic("Unexpected IO count (%u) for address 0x%x.\n",
1258              kvm_run.io.count, port);
1259    }
1260
1261    uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset));
1262    if (kvm_run.io.direction == KVM_EXIT_IO_OUT)
1263        tc->setMiscReg(miscreg, *data);
1264    else
1265        *data = tc->readMiscRegNoEffect(miscreg);
1266}
1267
1268Tick
1269X86KvmCPU::handleKvmExitIO()
1270{
1271    struct kvm_run &kvm_run(*getKvmRunState());
1272    bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT);
1273    unsigned char *guestData(getGuestData(kvm_run.io.data_offset));
1274    Tick delay(0);
1275    uint16_t port(kvm_run.io.port);
1276    Addr pAddr;
1277    const int count(kvm_run.io.count);
1278
1279    assert(kvm_run.io.direction == KVM_EXIT_IO_IN ||
1280           kvm_run.io.direction == KVM_EXIT_IO_OUT);
1281
1282    DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n",
1283            (isWrite ? "out" : "in"), kvm_run.io.port);
1284
1285    /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we
1286     * don't use the TLB component, we need to intercept and handle
1287     * the PCI configuration space IO ports here.
1288     *
1289     * The IO port PCI discovery mechanism uses one address register
1290     * and one data register. We map the address register to a misc
1291     * reg and use that to re-route data register accesses to the
1292     * right location in the PCI configuration space.
1293     */
1294    if (port == IO_PCI_CONF_ADDR) {
1295        handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS);
1296        return 0;
1297    } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) {
1298        Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS));
1299        if (pciConfigAddr & 0x80000000) {
1300            pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) |
1301                                                (port & 0x3));
1302        } else {
1303            pAddr = X86ISA::x86IOAddress(port);
1304        }
1305    } else {
1306        pAddr = X86ISA::x86IOAddress(port);
1307    }
1308
1309    io_req.setPhys(pAddr, kvm_run.io.size, Request::UNCACHEABLE,
1310                   dataMasterId());
1311
1312    const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
1313    for (int i = 0; i < count; ++i) {
1314        Packet pkt(&io_req, cmd);
1315
1316        pkt.dataStatic(guestData);
1317        delay += dataPort.sendAtomic(&pkt);
1318
1319        guestData += kvm_run.io.size;
1320    }
1321
1322    return delay;
1323}
1324
1325Tick
1326X86KvmCPU::handleKvmExitIRQWindowOpen()
1327{
1328    // We don't need to do anything here since this is caught the next
1329    // time we execute kvmRun(). We still overload the exit event to
1330    // silence the warning about an unhandled exit event.
1331    return 0;
1332}
1333
1334bool
1335X86KvmCPU::archIsDrained() const
1336{
1337    struct kvm_vcpu_events events;
1338
1339    getVCpuEvents(events);
1340
1341    // We could probably handle this in a by re-inserting interrupts
1342    // that are pending into gem5 on a drain. However, that would
1343    // probably be tricky to do reliably, so we'll just prevent a
1344    // drain if there is anything pending in the
1345    // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code
1346    // executed in the guest by requesting an interrupt window if
1347    // there are pending interrupts.
1348    const bool pending_events(events.exception.injected ||
1349                              events.interrupt.injected ||
1350                              events.nmi.injected || events.nmi.pending);
1351
1352    if (pending_events) {
1353        DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n",
1354                events.exception.injected ? "exception" : "",
1355                events.interrupt.injected ? "interrupt" : "",
1356                events.nmi.injected ? "nmi[i]" : "",
1357                events.nmi.pending ? "nmi[p]" : "");
1358    }
1359
1360    return !pending_events;
1361}
1362
1363static struct kvm_cpuid_entry2
1364makeKvmCpuid(uint32_t function, uint32_t index,
1365             CpuidResult &result)
1366{
1367    struct kvm_cpuid_entry2 e;
1368    e.function = function;
1369    e.index = index;
1370    e.flags = 0;
1371    e.eax = (uint32_t)result.rax;
1372    e.ebx = (uint32_t)result.rbx;
1373    e.ecx = (uint32_t)result.rcx;
1374    e.edx = (uint32_t)result.rdx;
1375
1376    return e;
1377}
1378
1379void
1380X86KvmCPU::updateCPUID()
1381{
1382    Kvm::CPUIDVector m5_supported;
1383
1384    /* TODO: We currently don't support any of the functions that
1385     * iterate through data structures in the CPU using an index. It's
1386     * currently not a problem since M5 doesn't expose any of them at
1387     * the moment.
1388     */
1389
1390    /* Basic features */
1391    CpuidResult func0;
1392    X86ISA::doCpuid(tc, 0x0, 0, func0);
1393    for (uint32_t function = 0; function <= func0.rax; ++function) {
1394        CpuidResult cpuid;
1395        uint32_t idx(0);
1396
1397        X86ISA::doCpuid(tc, function, idx, cpuid);
1398        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1399    }
1400
1401    /* Extended features */
1402    CpuidResult efunc0;
1403    X86ISA::doCpuid(tc, 0x80000000, 0, efunc0);
1404    for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) {
1405        CpuidResult cpuid;
1406        uint32_t idx(0);
1407
1408        X86ISA::doCpuid(tc, function, idx, cpuid);
1409        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1410    }
1411
1412    setCPUID(m5_supported);
1413}
1414
1415void
1416X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid)
1417{
1418    if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1)
1419        panic("KVM: Failed to set guest CPUID2 (errno: %i)\n",
1420              errno);
1421}
1422
1423void
1424X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid)
1425{
1426    std::unique_ptr<struct kvm_cpuid2> kvm_cpuid(
1427        newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size()));
1428
1429    kvm_cpuid->nent = cpuid.size();
1430    std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries);
1431
1432    setCPUID(*kvm_cpuid);
1433}
1434
1435void
1436X86KvmCPU::setMSRs(const struct kvm_msrs &msrs)
1437{
1438    if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1)
1439        panic("KVM: Failed to set guest MSRs (errno: %i)\n",
1440              errno);
1441}
1442
1443void
1444X86KvmCPU::setMSRs(const KvmMSRVector &msrs)
1445{
1446    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1447        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1448
1449    kvm_msrs->nmsrs = msrs.size();
1450    std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries);
1451
1452    setMSRs(*kvm_msrs);
1453}
1454
1455void
1456X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const
1457{
1458    if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1)
1459        panic("KVM: Failed to get guest MSRs (errno: %i)\n",
1460              errno);
1461}
1462
1463
1464void
1465X86KvmCPU::setMSR(uint32_t index, uint64_t value)
1466{
1467    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1468        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1469    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1470
1471    kvm_msrs->nmsrs = 1;
1472    entry.index = index;
1473    entry.reserved = 0;
1474    entry.data = value;
1475
1476    setMSRs(*kvm_msrs.get());
1477}
1478
1479uint64_t
1480X86KvmCPU::getMSR(uint32_t index) const
1481{
1482    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1483        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1484    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1485
1486    kvm_msrs->nmsrs = 1;
1487    entry.index = index;
1488    entry.reserved = 0;
1489    entry.data = 0;
1490
1491    getMSRs(*kvm_msrs.get());
1492    return entry.data;
1493}
1494
1495const Kvm::MSRIndexVector &
1496X86KvmCPU::getMsrIntersection() const
1497{
1498    if (cachedMsrIntersection.empty()) {
1499        const Kvm::MSRIndexVector &kvm_msrs(vm.kvm.getSupportedMSRs());
1500
1501        DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n");
1502        for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) {
1503            if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) {
1504                cachedMsrIntersection.push_back(*it);
1505                DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it);
1506            } else {
1507                warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n",
1508                     *it);
1509            }
1510        }
1511    }
1512
1513    return cachedMsrIntersection;
1514}
1515
1516void
1517X86KvmCPU::getDebugRegisters(struct kvm_debugregs &regs) const
1518{
1519#ifdef KVM_GET_DEBUGREGS
1520    if (ioctl(KVM_GET_DEBUGREGS, &regs) == -1)
1521        panic("KVM: Failed to get guest debug registers\n");
1522#else
1523    panic("KVM: Unsupported getDebugRegisters call.\n");
1524#endif
1525}
1526
1527void
1528X86KvmCPU::setDebugRegisters(const struct kvm_debugregs &regs)
1529{
1530#ifdef KVM_SET_DEBUGREGS
1531    if (ioctl(KVM_SET_DEBUGREGS, (void *)&regs) == -1)
1532        panic("KVM: Failed to set guest debug registers\n");
1533#else
1534    panic("KVM: Unsupported setDebugRegisters call.\n");
1535#endif
1536}
1537
1538void
1539X86KvmCPU::getXCRs(struct kvm_xcrs &regs) const
1540{
1541    if (ioctl(KVM_GET_XCRS, &regs) == -1)
1542        panic("KVM: Failed to get guest debug registers\n");
1543}
1544
1545void
1546X86KvmCPU::setXCRs(const struct kvm_xcrs &regs)
1547{
1548    if (ioctl(KVM_SET_XCRS, (void *)&regs) == -1)
1549        panic("KVM: Failed to set guest debug registers\n");
1550}
1551
1552void
1553X86KvmCPU::getXSave(struct kvm_xsave &xsave) const
1554{
1555    if (ioctl(KVM_GET_XSAVE, &xsave) == -1)
1556        panic("KVM: Failed to get guest debug registers\n");
1557}
1558
1559void
1560X86KvmCPU::setXSave(const struct kvm_xsave &xsave)
1561{
1562    if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1)
1563        panic("KVM: Failed to set guest debug registers\n");
1564}
1565
1566
1567void
1568X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const
1569{
1570    if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1)
1571        panic("KVM: Failed to get guest debug registers\n");
1572}
1573
1574void
1575X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events)
1576{
1577    if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1)
1578        panic("KVM: Failed to set guest debug registers\n");
1579}
1580
1581X86KvmCPU *
1582X86KvmCPUParams::create()
1583{
1584    return new X86KvmCPU(this);
1585}
1586