x86_cpu.cc revision 12392:e0dbdf30a2a5
1/*
2 * Copyright (c) 2013 Andreas Sandberg
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Andreas Sandberg
29 */
30
31#include "cpu/kvm/x86_cpu.hh"
32
33#include <linux/kvm.h>
34
35#include <algorithm>
36#include <cerrno>
37#include <memory>
38
39#include "arch/registers.hh"
40#include "arch/x86/cpuid.hh"
41#include "arch/x86/regs/msr.hh"
42#include "arch/x86/utility.hh"
43#include "cpu/kvm/base.hh"
44#include "debug/Drain.hh"
45#include "debug/Kvm.hh"
46#include "debug/KvmContext.hh"
47#include "debug/KvmIO.hh"
48#include "debug/KvmInt.hh"
49
50using namespace X86ISA;
51
52#define MSR_TSC 0x10
53
54#define IO_PCI_CONF_ADDR 0xCF8
55#define IO_PCI_CONF_DATA_BASE 0xCFC
56
57// Task segment type of an inactive 32-bit or 64-bit task
58#define SEG_SYS_TYPE_TSS_AVAILABLE 9
59// Task segment type of an active 32-bit or 64-bit task
60#define SEG_SYS_TYPE_TSS_BUSY 11
61
62// Non-conforming accessed code segment
63#define SEG_CS_TYPE_ACCESSED 9
64// Non-conforming accessed code segment that can be read
65#define SEG_CS_TYPE_READ_ACCESSED 11
66
67// The lowest bit of the type field for normal segments (code and
68// data) is used to indicate that a segment has been accessed.
69#define SEG_TYPE_BIT_ACCESSED 1
70
71struct FXSave
72{
73    uint16_t fcw;
74    uint16_t fsw;
75    uint8_t ftwx;
76    uint8_t pad0;
77    uint16_t last_opcode;
78    union {
79        struct {
80            uint32_t fpu_ip;
81            uint16_t fpu_cs;
82            uint16_t pad1;
83            uint32_t fpu_dp;
84            uint16_t fpu_ds;
85            uint16_t pad2;
86        } ctrl32;
87
88        struct {
89            uint64_t fpu_ip;
90            uint64_t fpu_dp;
91        } ctrl64;
92    };
93    uint32_t mxcsr;
94    uint32_t mxcsr_mask;
95
96    uint8_t fpr[8][16];
97    uint8_t xmm[16][16];
98
99    uint64_t reserved[12];
100} M5_ATTR_PACKED;
101
102static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave");
103
104#define FOREACH_IREG()                          \
105    do {                                        \
106        APPLY_IREG(rax, INTREG_RAX);            \
107        APPLY_IREG(rbx, INTREG_RBX);            \
108        APPLY_IREG(rcx, INTREG_RCX);            \
109        APPLY_IREG(rdx, INTREG_RDX);            \
110        APPLY_IREG(rsi, INTREG_RSI);            \
111        APPLY_IREG(rdi, INTREG_RDI);            \
112        APPLY_IREG(rsp, INTREG_RSP);            \
113        APPLY_IREG(rbp, INTREG_RBP);            \
114        APPLY_IREG(r8, INTREG_R8);              \
115        APPLY_IREG(r9, INTREG_R9);              \
116        APPLY_IREG(r10, INTREG_R10);            \
117        APPLY_IREG(r11, INTREG_R11);            \
118        APPLY_IREG(r12, INTREG_R12);            \
119        APPLY_IREG(r13, INTREG_R13);            \
120        APPLY_IREG(r14, INTREG_R14);            \
121        APPLY_IREG(r15, INTREG_R15);            \
122    } while (0)
123
124#define FOREACH_SREG()                                  \
125    do {                                                \
126        APPLY_SREG(cr0, MISCREG_CR0);                   \
127        APPLY_SREG(cr2, MISCREG_CR2);                   \
128        APPLY_SREG(cr3, MISCREG_CR3);                   \
129        APPLY_SREG(cr4, MISCREG_CR4);                   \
130        APPLY_SREG(cr8, MISCREG_CR8);                   \
131        APPLY_SREG(efer, MISCREG_EFER);                 \
132        APPLY_SREG(apic_base, MISCREG_APIC_BASE);       \
133    } while (0)
134
135#define FOREACH_DREG()                          \
136    do {                                        \
137        APPLY_DREG(db[0], MISCREG_DR0);         \
138        APPLY_DREG(db[1], MISCREG_DR1);         \
139        APPLY_DREG(db[2], MISCREG_DR2);         \
140        APPLY_DREG(db[3], MISCREG_DR3);         \
141        APPLY_DREG(dr6, MISCREG_DR6);           \
142        APPLY_DREG(dr7, MISCREG_DR7);           \
143    } while (0)
144
145#define FOREACH_SEGMENT()                                       \
146    do {                                                        \
147        APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE);   \
148        APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE);   \
149        APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE);   \
150        APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE);   \
151        APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE);   \
152        APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE);   \
153        APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE);   \
154        APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \
155    } while (0)
156
157#define FOREACH_DTABLE()                                        \
158    do {                                                        \
159        APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE);  \
160        APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \
161    } while (0)
162
163template<typename STRUCT, typename ENTRY>
164static STRUCT *newVarStruct(size_t entries)
165{
166    return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY));
167}
168
169static void
170dumpKvm(const struct kvm_regs &regs)
171{
172    inform("KVM register state:\n");
173
174#define APPLY_IREG(kreg, mreg)                  \
175    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
176
177    FOREACH_IREG();
178
179#undef APPLY_IREG
180
181    inform("\trip: 0x%llx\n", regs.rip);
182    inform("\trflags: 0x%llx\n", regs.rflags);
183}
184
185static void
186dumpKvm(const char *reg_name, const struct kvm_segment &seg)
187{
188    inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n"
189           "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n",
190           reg_name,
191           seg.base, seg.limit, seg.selector, seg.type,
192           seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable);
193}
194
195static void
196dumpKvm(const char *reg_name, const struct kvm_dtable &dtable)
197{
198    inform("\t%s: @0x%llx+%x\n",
199           reg_name, dtable.base, dtable.limit);
200}
201
202static void
203dumpKvm(const struct kvm_sregs &sregs)
204{
205#define APPLY_SREG(kreg, mreg)                          \
206    inform("\t" # kreg ": 0x%llx\n", sregs.kreg);
207#define APPLY_SEGMENT(kreg, idx)                \
208    dumpKvm(# kreg, sregs.kreg);
209#define APPLY_DTABLE(kreg, idx)                 \
210    dumpKvm(# kreg, sregs.kreg);
211
212    inform("Special registers:\n");
213    FOREACH_SEGMENT();
214    FOREACH_SREG();
215    FOREACH_DTABLE();
216
217    inform("Interrupt Bitmap:");
218    for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64)
219        inform("  0x%.8x", sregs.interrupt_bitmap[i / 64]);
220
221#undef APPLY_SREG
222#undef APPLY_SEGMENT
223#undef APPLY_DTABLE
224}
225
226#ifdef KVM_GET_DEBUGREGS
227static void
228dumpKvm(const struct kvm_debugregs &regs)
229{
230    inform("KVM debug state:\n");
231
232#define APPLY_DREG(kreg, mreg)                  \
233    inform("\t" # kreg ": 0x%llx\n", regs.kreg)
234
235    FOREACH_DREG();
236
237#undef APPLY_DREG
238
239    inform("\tflags: 0x%llx\n", regs.flags);
240}
241#endif
242
243static void
244dumpFpuSpec(const struct FXSave &xs)
245{
246    inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip);
247    inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp);
248    inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask);
249}
250
251static void
252dumpFpuSpec(const struct kvm_fpu &fpu)
253{
254    inform("\tlast_ip: 0x%x\n", fpu.last_ip);
255    inform("\tlast_dp: 0x%x\n", fpu.last_dp);
256}
257
258template<typename T>
259static void
260dumpFpuCommon(const T &fpu)
261{
262    const unsigned top((fpu.fsw >> 11) & 0x7);
263    inform("\tfcw: 0x%x\n", fpu.fcw);
264
265    inform("\tfsw: 0x%x (top: %i, "
266           "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n",
267           fpu.fsw, top,
268
269           (fpu.fsw & CC0Bit) ? "C0" : "",
270           (fpu.fsw & CC1Bit) ? "C1" : "",
271           (fpu.fsw & CC2Bit) ? "C2" : "",
272           (fpu.fsw & CC3Bit) ? "C3" : "",
273
274           (fpu.fsw & IEBit) ? "I" : "",
275           (fpu.fsw & DEBit) ? "D" : "",
276           (fpu.fsw & ZEBit) ? "Z" : "",
277           (fpu.fsw & OEBit) ? "O" : "",
278           (fpu.fsw & UEBit) ? "U" : "",
279           (fpu.fsw & PEBit) ? "P" : "",
280
281           (fpu.fsw & StackFaultBit) ? "SF " : "",
282           (fpu.fsw & ErrSummaryBit) ? "ES " : "",
283           (fpu.fsw & BusyBit) ? "BUSY " : ""
284        );
285    inform("\tftwx: 0x%x\n", fpu.ftwx);
286    inform("\tlast_opcode: 0x%x\n", fpu.last_opcode);
287    dumpFpuSpec(fpu);
288    inform("\tmxcsr: 0x%x\n", fpu.mxcsr);
289    inform("\tFP Stack:\n");
290    for (int i = 0; i < 8; ++i) {
291        const unsigned reg_idx((i + top) & 0x7);
292        const bool empty(!((fpu.ftwx >> reg_idx) & 0x1));
293        const double value(X86ISA::loadFloat80(fpu.fpr[i]));
294        char hex[33];
295        for (int j = 0; j < 10; ++j)
296            snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]);
297        inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx,
298               hex, value, empty ? " (e)" : "");
299    }
300    inform("\tXMM registers:\n");
301    for (int i = 0; i < 16; ++i) {
302        char hex[33];
303        for (int j = 0; j < 16; ++j)
304            snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]);
305        inform("\t\t%i: 0x%s\n", i, hex);
306    }
307}
308
309static void
310dumpKvm(const struct kvm_fpu &fpu)
311{
312    inform("FPU registers:\n");
313    dumpFpuCommon(fpu);
314}
315
316static void
317dumpKvm(const struct kvm_xsave &xsave)
318{
319    inform("FPU registers (XSave):\n");
320    dumpFpuCommon(*(FXSave *)xsave.region);
321}
322
323static void
324dumpKvm(const struct kvm_msrs &msrs)
325{
326    inform("MSRs:\n");
327
328    for (int i = 0; i < msrs.nmsrs; ++i) {
329        const struct kvm_msr_entry &e(msrs.entries[i]);
330
331        inform("\t0x%x: 0x%x\n", e.index, e.data);
332    }
333}
334
335static void
336dumpKvm(const struct kvm_xcrs &regs)
337{
338    inform("KVM XCR registers:\n");
339
340    inform("\tFlags: 0x%x\n", regs.flags);
341    for (int i = 0; i < regs.nr_xcrs; ++i) {
342        inform("\tXCR[0x%x]: 0x%x\n",
343               regs.xcrs[i].xcr,
344               regs.xcrs[i].value);
345    }
346}
347
348static void
349dumpKvm(const struct kvm_vcpu_events &events)
350{
351    inform("vCPU events:\n");
352
353    inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n",
354           events.exception.injected, events.exception.nr,
355           events.exception.has_error_code, events.exception.error_code);
356
357    inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n",
358           events.interrupt.injected, events.interrupt.nr,
359           events.interrupt.soft);
360
361    inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n",
362           events.nmi.injected, events.nmi.pending,
363           events.nmi.masked);
364
365    inform("\tSIPI vector: 0x%x\n", events.sipi_vector);
366    inform("\tFlags: 0x%x\n", events.flags);
367}
368
369static bool
370isCanonicalAddress(uint64_t addr)
371{
372    // x86-64 doesn't currently use the full 64-bit virtual address
373    // space, instead it uses signed 48 bit addresses that are
374    // sign-extended to 64 bits.  Such addresses are known as
375    // "canonical".
376    uint64_t upper_half(addr & 0xffff800000000000ULL);
377    return upper_half == 0 || upper_half == 0xffff800000000000;
378}
379
380static void
381checkSeg(const char *name, const int idx, const struct kvm_segment &seg,
382         struct kvm_sregs sregs)
383{
384    // Check the register base
385    switch (idx) {
386      case MISCREG_TSL:
387      case MISCREG_TR:
388      case MISCREG_FS:
389      case MISCREG_GS:
390        if (!isCanonicalAddress(seg.base))
391            warn("Illegal %s base: 0x%x\n", name, seg.base);
392        break;
393
394      case MISCREG_SS:
395      case MISCREG_DS:
396      case MISCREG_ES:
397        if (seg.unusable)
398            break;
399        M5_FALLTHROUGH;
400      case MISCREG_CS:
401        if (seg.base & 0xffffffff00000000ULL)
402            warn("Illegal %s base: 0x%x\n", name, seg.base);
403        break;
404    }
405
406    // Check the type
407    switch (idx) {
408      case MISCREG_CS:
409        switch (seg.type) {
410          case 3:
411            if (seg.dpl != 0)
412                warn("CS type is 3 but dpl != 0.\n");
413            break;
414          case 9:
415          case 11:
416            if (seg.dpl != sregs.ss.dpl)
417                warn("CS type is %i but CS DPL != SS DPL\n", seg.type);
418            break;
419          case 13:
420          case 15:
421            if (seg.dpl > sregs.ss.dpl)
422                warn("CS type is %i but CS DPL > SS DPL\n", seg.type);
423            break;
424          default:
425            warn("Illegal CS type: %i\n", seg.type);
426            break;
427        }
428        break;
429
430      case MISCREG_SS:
431        if (seg.unusable)
432            break;
433        switch (seg.type) {
434          case 3:
435            if (sregs.cs.type == 3 && seg.dpl != 0)
436                warn("CS type is 3, but SS DPL is != 0.\n");
437            M5_FALLTHROUGH;
438          case 7:
439            if (!(sregs.cr0 & 1) && seg.dpl != 0)
440                warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl);
441            break;
442          default:
443            warn("Illegal SS type: %i\n", seg.type);
444            break;
445        }
446        break;
447
448      case MISCREG_DS:
449      case MISCREG_ES:
450      case MISCREG_FS:
451      case MISCREG_GS:
452        if (seg.unusable)
453            break;
454        if (!(seg.type & 0x1) ||
455            ((seg.type & 0x8) && !(seg.type & 0x2)))
456            warn("%s has an illegal type field: %i\n", name, seg.type);
457        break;
458
459      case MISCREG_TR:
460        // TODO: We should check the CPU mode
461        if (seg.type != 3 && seg.type != 11)
462            warn("%s: Illegal segment type (%i)\n", name, seg.type);
463        break;
464
465      case MISCREG_TSL:
466        if (seg.unusable)
467            break;
468        if (seg.type != 2)
469            warn("%s: Illegal segment type (%i)\n", name, seg.type);
470        break;
471    }
472
473    switch (idx) {
474      case MISCREG_SS:
475      case MISCREG_DS:
476      case MISCREG_ES:
477      case MISCREG_FS:
478      case MISCREG_GS:
479        if (seg.unusable)
480            break;
481        M5_FALLTHROUGH;
482      case MISCREG_CS:
483        if (!seg.s)
484            warn("%s: S flag not set\n", name);
485        break;
486
487      case MISCREG_TSL:
488        if (seg.unusable)
489            break;
490        M5_FALLTHROUGH;
491      case MISCREG_TR:
492        if (seg.s)
493            warn("%s: S flag is set\n", name);
494        break;
495    }
496
497    switch (idx) {
498      case MISCREG_SS:
499      case MISCREG_DS:
500      case MISCREG_ES:
501      case MISCREG_FS:
502      case MISCREG_GS:
503      case MISCREG_TSL:
504        if (seg.unusable)
505            break;
506        M5_FALLTHROUGH;
507      case MISCREG_TR:
508      case MISCREG_CS:
509        if (!seg.present)
510            warn("%s: P flag not set\n", name);
511
512        if (((seg.limit & 0xFFF) == 0 && seg.g) ||
513            ((seg.limit & 0xFFF00000) != 0 && !seg.g)) {
514            warn("%s limit (0x%x) and g (%i) combination is illegal.\n",
515                 name, seg.limit, seg.g);
516        }
517        break;
518    }
519
520    // TODO: Check CS DB
521}
522
523X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params)
524    : BaseKvmCPU(params),
525      useXSave(params->useXSave)
526{
527    Kvm &kvm(*vm.kvm);
528
529    if (!kvm.capSetTSSAddress())
530        panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n");
531    if (!kvm.capExtendedCPUID())
532        panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n");
533    if (!kvm.capUserNMI())
534        warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n");
535    if (!kvm.capVCPUEvents())
536        warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n");
537
538    haveDebugRegs = kvm.capDebugRegs();
539    haveXSave = kvm.capXSave();
540    haveXCRs = kvm.capXCRs();
541
542    if (useXSave && !haveXSave) {
543        warn("KVM: XSAVE not supported by host. MXCSR synchronization might be "
544             "unreliable due to kernel bugs.\n");
545        useXSave = false;
546    } else if (!useXSave) {
547        warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n");
548    }
549}
550
551X86KvmCPU::~X86KvmCPU()
552{
553}
554
555void
556X86KvmCPU::startup()
557{
558    BaseKvmCPU::startup();
559
560    updateCPUID();
561
562    // TODO: Do we need to create an identity mapped TSS area? We
563    // should call kvm.vm.setTSSAddress() here in that case. It should
564    // only be needed for old versions of the virtualization
565    // extensions. We should make sure that the identity range is
566    // reserved in the e820 memory map in that case.
567}
568
569void
570X86KvmCPU::dump() const
571{
572    dumpIntRegs();
573    if (useXSave)
574        dumpXSave();
575    else
576        dumpFpuRegs();
577    dumpSpecRegs();
578    dumpDebugRegs();
579    dumpXCRs();
580    dumpVCpuEvents();
581    dumpMSRs();
582}
583
584void
585X86KvmCPU::dumpFpuRegs() const
586{
587    struct kvm_fpu fpu;
588    getFPUState(fpu);
589    dumpKvm(fpu);
590}
591
592void
593X86KvmCPU::dumpIntRegs() const
594{
595    struct kvm_regs regs;
596    getRegisters(regs);
597    dumpKvm(regs);
598}
599
600void
601X86KvmCPU::dumpSpecRegs() const
602{
603    struct kvm_sregs sregs;
604    getSpecialRegisters(sregs);
605    dumpKvm(sregs);
606}
607
608void
609X86KvmCPU::dumpDebugRegs() const
610{
611    if (haveDebugRegs) {
612#ifdef KVM_GET_DEBUGREGS
613        struct kvm_debugregs dregs;
614        getDebugRegisters(dregs);
615        dumpKvm(dregs);
616#endif
617    } else {
618        inform("Debug registers not supported by kernel.\n");
619    }
620}
621
622void
623X86KvmCPU::dumpXCRs() const
624{
625    if (haveXCRs) {
626        struct kvm_xcrs xcrs;
627        getXCRs(xcrs);
628        dumpKvm(xcrs);
629    } else {
630        inform("XCRs not supported by kernel.\n");
631    }
632}
633
634void
635X86KvmCPU::dumpXSave() const
636{
637    if (haveXSave) {
638        struct kvm_xsave xsave;
639        getXSave(xsave);
640        dumpKvm(xsave);
641    } else {
642        inform("XSave not supported by kernel.\n");
643    }
644}
645
646void
647X86KvmCPU::dumpVCpuEvents() const
648{
649    struct kvm_vcpu_events events;
650    getVCpuEvents(events);
651    dumpKvm(events);
652}
653
654void
655X86KvmCPU::dumpMSRs() const
656{
657    const Kvm::MSRIndexVector &supported_msrs(vm.kvm->getSupportedMSRs());
658    std::unique_ptr<struct kvm_msrs> msrs(
659        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(
660            supported_msrs.size()));
661
662    msrs->nmsrs = supported_msrs.size();
663    for (int i = 0; i < supported_msrs.size(); ++i) {
664        struct kvm_msr_entry &e(msrs->entries[i]);
665        e.index = supported_msrs[i];
666        e.reserved = 0;
667        e.data = 0;
668    }
669    getMSRs(*msrs.get());
670
671    dumpKvm(*msrs.get());
672}
673
674void
675X86KvmCPU::updateKvmState()
676{
677    updateKvmStateRegs();
678    updateKvmStateSRegs();
679    updateKvmStateFPU();
680    updateKvmStateMSRs();
681
682    DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n");
683    if (DTRACE(KvmContext))
684        dump();
685}
686
687void
688X86KvmCPU::updateKvmStateRegs()
689{
690    struct kvm_regs regs;
691
692#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg)
693    FOREACH_IREG();
694#undef APPLY_IREG
695
696    regs.rip = tc->instAddr() - tc->readMiscReg(MISCREG_CS_BASE);
697
698    /* You might think that setting regs.rflags to the contents
699     * MISCREG_RFLAGS here would suffice. In that case you're
700     * mistaken. We need to reconstruct it from a bunch of ucode
701     * registers and wave a dead chicken over it (aka mask out and set
702     * reserved bits) to get it to work.
703     */
704    regs.rflags = X86ISA::getRFlags(tc);
705
706    setRegisters(regs);
707}
708
709static inline void
710setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg,
711                 const int index)
712{
713    SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index)));
714
715    kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
716    kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
717    kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index));
718    kvm_seg.type = attr.type;
719    kvm_seg.present = attr.present;
720    kvm_seg.dpl = attr.dpl;
721    kvm_seg.db = attr.defaultSize;
722    kvm_seg.s = attr.system;
723    kvm_seg.l = attr.longMode;
724    kvm_seg.g = attr.granularity;
725    kvm_seg.avl = attr.avl;
726
727    // A segment is normally unusable when the selector is zero. There
728    // is a attr.unusable flag in gem5, but it seems unused. qemu
729    // seems to set this to 0 all the time, so we just do the same and
730    // hope for the best.
731    kvm_seg.unusable = 0;
732}
733
734static inline void
735setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable,
736                const int index)
737{
738    kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
739    kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
740}
741
742static void
743forceSegAccessed(struct kvm_segment &seg)
744{
745    // Intel's VMX requires that (some) usable segments are flagged as
746    // 'accessed' (i.e., the lowest bit in the segment type is set)
747    // when entering VMX. This wouldn't necessary be the case even if
748    // gem5 did set the access bits correctly, so we force it to one
749    // in that case.
750    if (!seg.unusable)
751        seg.type |= SEG_TYPE_BIT_ACCESSED;
752}
753
754void
755X86KvmCPU::updateKvmStateSRegs()
756{
757    struct kvm_sregs sregs;
758
759#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg)
760#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx)
761#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx)
762
763    FOREACH_SREG();
764    FOREACH_SEGMENT();
765    FOREACH_DTABLE();
766
767#undef APPLY_SREG
768#undef APPLY_SEGMENT
769#undef APPLY_DTABLE
770
771    // Clear the interrupt bitmap
772    memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
773
774    // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed
775    // bit in the type field set.
776    forceSegAccessed(sregs.cs);
777    forceSegAccessed(sregs.ss);
778    forceSegAccessed(sregs.ds);
779    forceSegAccessed(sregs.es);
780    forceSegAccessed(sregs.fs);
781    forceSegAccessed(sregs.gs);
782
783    // There are currently some cases where the active task isn't
784    // marked as busy. This is illegal in VMX, so we force it to busy.
785    if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) {
786        hack("tr.type (%i) is not busy. Forcing the busy bit.\n",
787             sregs.tr.type);
788        sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY;
789    }
790
791    // VMX requires the DPL of SS and CS to be the same for
792    // non-conforming code segments. It seems like m5 doesn't set the
793    // DPL of SS correctly when taking interrupts, so we need to fix
794    // that here.
795    if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED ||
796         sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) &&
797        sregs.cs.dpl != sregs.ss.dpl) {
798
799        hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n",
800             sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl);
801        sregs.ss.dpl = sregs.cs.dpl;
802    }
803
804    // Do checks after fixing up the state to avoid getting excessive
805    // amounts of warnings.
806    RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS));
807    if (!rflags_nocc.vm) {
808        // Do segment verification if the CPU isn't entering virtual
809        // 8086 mode.  We currently assume that unrestricted guest
810        // mode is available.
811
812#define APPLY_SEGMENT(kreg, idx) \
813        checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs)
814
815        FOREACH_SEGMENT();
816#undef APPLY_SEGMENT
817    }
818
819    setSpecialRegisters(sregs);
820}
821
822template <typename T>
823static void
824updateKvmStateFPUCommon(ThreadContext *tc, T &fpu)
825{
826    static_assert(sizeof(X86ISA::FloatRegBits) == 8,
827                  "Unexpected size of X86ISA::FloatRegBits");
828
829    fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR);
830    fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW);
831    // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read
832    // with effects.
833    fpu.fsw = tc->readMiscReg(MISCREG_FSW);
834
835    uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW));
836    fpu.ftwx = X86ISA::convX87TagsToXTags(ftw);
837
838    fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP);
839
840    const unsigned top((fpu.fsw >> 11) & 0x7);
841    for (int i = 0; i < 8; ++i) {
842        const unsigned reg_idx((i + top) & 0x7);
843        const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx)));
844        DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n",
845                reg_idx, i, value);
846        X86ISA::storeFloat80(fpu.fpr[i], value);
847    }
848
849    // TODO: We should update the MMX state
850
851    for (int i = 0; i < 16; ++i) {
852        *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] =
853            tc->readFloatRegBits(FLOATREG_XMM_LOW(i));
854        *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] =
855            tc->readFloatRegBits(FLOATREG_XMM_HIGH(i));
856    }
857}
858
859void
860X86KvmCPU::updateKvmStateFPULegacy()
861{
862    struct kvm_fpu fpu;
863
864    // There is some padding in the FP registers, so we'd better zero
865    // the whole struct.
866    memset(&fpu, 0, sizeof(fpu));
867
868    updateKvmStateFPUCommon(tc, fpu);
869
870    if (tc->readMiscRegNoEffect(MISCREG_FISEG))
871        warn_once("MISCREG_FISEG is non-zero.\n");
872
873    fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
874
875    if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
876        warn_once("MISCREG_FOSEG is non-zero.\n");
877
878    fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
879
880    setFPUState(fpu);
881}
882
883void
884X86KvmCPU::updateKvmStateFPUXSave()
885{
886    struct kvm_xsave kxsave;
887    FXSave &xsave(*(FXSave *)kxsave.region);
888
889    // There is some padding and reserved fields in the structure, so
890    // we'd better zero the whole thing.
891    memset(&kxsave, 0, sizeof(kxsave));
892
893    updateKvmStateFPUCommon(tc, xsave);
894
895    if (tc->readMiscRegNoEffect(MISCREG_FISEG))
896        warn_once("MISCREG_FISEG is non-zero.\n");
897
898    xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
899
900    if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
901        warn_once("MISCREG_FOSEG is non-zero.\n");
902
903    xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
904
905    setXSave(kxsave);
906}
907
908void
909X86KvmCPU::updateKvmStateFPU()
910{
911    if (useXSave)
912        updateKvmStateFPUXSave();
913    else
914        updateKvmStateFPULegacy();
915}
916
917void
918X86KvmCPU::updateKvmStateMSRs()
919{
920    KvmMSRVector msrs;
921
922    const Kvm::MSRIndexVector &indices(getMsrIntersection());
923
924    for (auto it = indices.cbegin(); it != indices.cend(); ++it) {
925        struct kvm_msr_entry e;
926
927        e.index = *it;
928        e.reserved = 0;
929        e.data = tc->readMiscReg(msrMap.at(*it));
930        DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n",
931                e.index, e.data);
932
933        msrs.push_back(e);
934    }
935
936    setMSRs(msrs);
937}
938
939void
940X86KvmCPU::updateThreadContext()
941{
942    struct kvm_regs regs;
943    struct kvm_sregs sregs;
944
945    getRegisters(regs);
946    getSpecialRegisters(sregs);
947
948    DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n");
949    if (DTRACE(KvmContext))
950        dump();
951
952    updateThreadContextRegs(regs, sregs);
953    updateThreadContextSRegs(sregs);
954    if (useXSave) {
955        struct kvm_xsave xsave;
956        getXSave(xsave);
957
958       updateThreadContextXSave(xsave);
959    } else {
960        struct kvm_fpu fpu;
961        getFPUState(fpu);
962
963        updateThreadContextFPU(fpu);
964    }
965    updateThreadContextMSRs();
966
967    // The M5 misc reg caches some values from other
968    // registers. Writing to it with side effects causes it to be
969    // updated from its source registers.
970    tc->setMiscReg(MISCREG_M5_REG, 0);
971}
972
973void
974X86KvmCPU::updateThreadContextRegs(const struct kvm_regs &regs,
975                                   const struct kvm_sregs &sregs)
976{
977#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg)
978
979    FOREACH_IREG();
980
981#undef APPLY_IREG
982
983    tc->pcState(PCState(regs.rip + sregs.cs.base));
984
985    // Flags are spread out across multiple semi-magic registers so we
986    // need some special care when updating them.
987    X86ISA::setRFlags(tc, regs.rflags);
988}
989
990
991inline void
992setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg,
993                  const int index)
994{
995    SegAttr attr(0);
996
997    attr.type = kvm_seg.type;
998    attr.present = kvm_seg.present;
999    attr.dpl = kvm_seg.dpl;
1000    attr.defaultSize = kvm_seg.db;
1001    attr.system = kvm_seg.s;
1002    attr.longMode = kvm_seg.l;
1003    attr.granularity = kvm_seg.g;
1004    attr.avl = kvm_seg.avl;
1005    attr.unusable = kvm_seg.unusable;
1006
1007    // We need some setMiscReg magic here to keep the effective base
1008    // addresses in sync. We need an up-to-date version of EFER, so
1009    // make sure this is called after the sregs have been synced.
1010    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base);
1011    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit);
1012    tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector);
1013    tc->setMiscReg(MISCREG_SEG_ATTR(index), attr);
1014}
1015
1016inline void
1017setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable,
1018                  const int index)
1019{
1020    // We need some setMiscReg magic here to keep the effective base
1021    // addresses in sync. We need an up-to-date version of EFER, so
1022    // make sure this is called after the sregs have been synced.
1023    tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base);
1024    tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit);
1025}
1026
1027void
1028X86KvmCPU::updateThreadContextSRegs(const struct kvm_sregs &sregs)
1029{
1030    assert(getKvmRunState()->apic_base == sregs.apic_base);
1031    assert(getKvmRunState()->cr8 == sregs.cr8);
1032
1033#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg)
1034#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1035#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1036    FOREACH_SREG();
1037    FOREACH_SEGMENT();
1038    FOREACH_DTABLE();
1039#undef APPLY_SREG
1040#undef APPLY_SEGMENT
1041#undef APPLY_DTABLE
1042}
1043
1044template<typename T>
1045static void
1046updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu)
1047{
1048    const unsigned top((fpu.fsw >> 11) & 0x7);
1049
1050    static_assert(sizeof(X86ISA::FloatRegBits) == 8,
1051                  "Unexpected size of X86ISA::FloatRegBits");
1052
1053    for (int i = 0; i < 8; ++i) {
1054        const unsigned reg_idx((i + top) & 0x7);
1055        const double value(X86ISA::loadFloat80(fpu.fpr[i]));
1056        DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n",
1057                reg_idx, i, value);
1058        tc->setFloatReg(FLOATREG_FPR(reg_idx), value);
1059    }
1060
1061    // TODO: We should update the MMX state
1062
1063    tc->setMiscRegNoEffect(MISCREG_X87_TOP, top);
1064    tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr);
1065    tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw);
1066    tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw);
1067
1068    uint64_t ftw(convX87XTagsToTags(fpu.ftwx));
1069    // TODO: Are these registers really the same?
1070    tc->setMiscRegNoEffect(MISCREG_FTW, ftw);
1071    tc->setMiscRegNoEffect(MISCREG_FTAG, ftw);
1072
1073    tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode);
1074
1075    for (int i = 0; i < 16; ++i) {
1076        tc->setFloatRegBits(FLOATREG_XMM_LOW(i),
1077                            *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]);
1078        tc->setFloatRegBits(FLOATREG_XMM_HIGH(i),
1079                            *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]);
1080    }
1081}
1082
1083void
1084X86KvmCPU::updateThreadContextFPU(const struct kvm_fpu &fpu)
1085{
1086    updateThreadContextFPUCommon(tc, fpu);
1087
1088    tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1089    tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip);
1090    tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1091    tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp);
1092}
1093
1094void
1095X86KvmCPU::updateThreadContextXSave(const struct kvm_xsave &kxsave)
1096{
1097    const FXSave &xsave(*(const FXSave *)kxsave.region);
1098
1099    updateThreadContextFPUCommon(tc, xsave);
1100
1101    tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1102    tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip);
1103    tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1104    tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp);
1105}
1106
1107void
1108X86KvmCPU::updateThreadContextMSRs()
1109{
1110    const Kvm::MSRIndexVector &msrs(getMsrIntersection());
1111
1112    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1113        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1114    struct kvm_msr_entry *entry;
1115
1116    // Create a list of MSRs to read
1117    kvm_msrs->nmsrs = msrs.size();
1118    entry = &kvm_msrs->entries[0];
1119    for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) {
1120        entry->index = *it;
1121        entry->reserved = 0;
1122        entry->data = 0;
1123    }
1124
1125    getMSRs(*kvm_msrs.get());
1126
1127    // Update M5's state
1128    entry = &kvm_msrs->entries[0];
1129    for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) {
1130        DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n",
1131                entry->index, entry->data);
1132
1133        tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data);
1134    }
1135}
1136
1137void
1138X86KvmCPU::deliverInterrupts()
1139{
1140    Fault fault;
1141
1142    syncThreadContext();
1143
1144    {
1145        // Migrate to the interrupt controller's thread to get the
1146        // interrupt. Even though the individual methods are safe to
1147        // call across threads, we might still lose interrupts unless
1148        // they are getInterrupt() and updateIntrInfo() are called
1149        // atomically.
1150        EventQueue::ScopedMigration migrate(interrupts[0]->eventQueue());
1151        fault = interrupts[0]->getInterrupt(tc);
1152        interrupts[0]->updateIntrInfo(tc);
1153    }
1154
1155    X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get()));
1156    if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) {
1157        DPRINTF(KvmInt, "Delivering NMI\n");
1158        kvmNonMaskableInterrupt();
1159    } else if (dynamic_cast<InitInterrupt *>(fault.get())) {
1160        DPRINTF(KvmInt, "INIT interrupt\n");
1161        fault.get()->invoke(tc);
1162        // Delay the kvm state update since we won't enter KVM on this
1163        // tick.
1164        threadContextDirty = true;
1165        // HACK: gem5 doesn't actually have any BIOS code, which means
1166        // that we need to halt the thread and wait for a startup
1167        // interrupt before restarting the thread. The simulated CPUs
1168        // use the same kind of hack using a microcode routine.
1169        thread->suspend();
1170    } else if (dynamic_cast<StartupInterrupt *>(fault.get())) {
1171        DPRINTF(KvmInt, "STARTUP interrupt\n");
1172        fault.get()->invoke(tc);
1173        // The kvm state is assumed to have been updated when entering
1174        // kvmRun(), so we need to update manually it here.
1175        updateKvmState();
1176    } else if (x86int) {
1177        struct kvm_interrupt kvm_int;
1178        kvm_int.irq = x86int->getVector();
1179
1180        DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n",
1181                fault->name(), kvm_int.irq);
1182
1183        kvmInterrupt(kvm_int);
1184    } else {
1185        panic("KVM: Unknown interrupt type\n");
1186    }
1187
1188}
1189
1190Tick
1191X86KvmCPU::kvmRun(Tick ticks)
1192{
1193    struct kvm_run &kvm_run(*getKvmRunState());
1194
1195    if (interrupts[0]->checkInterruptsRaw()) {
1196        if (interrupts[0]->hasPendingUnmaskable()) {
1197            DPRINTF(KvmInt,
1198                    "Delivering unmaskable interrupt.\n");
1199            syncThreadContext();
1200            deliverInterrupts();
1201        } else if (kvm_run.ready_for_interrupt_injection) {
1202            // KVM claims that it is ready for an interrupt. It might
1203            // be lying if we just updated rflags and disabled
1204            // interrupts (e.g., by doing a CPU handover). Let's sync
1205            // the thread context and check if there are /really/
1206            // interrupts that should be delivered now.
1207            syncThreadContext();
1208            if (interrupts[0]->checkInterrupts(tc)) {
1209                DPRINTF(KvmInt,
1210                        "M5 has pending interrupts, delivering interrupt.\n");
1211
1212                deliverInterrupts();
1213            } else {
1214                DPRINTF(KvmInt,
1215                        "Interrupt delivery delayed due to KVM confusion.\n");
1216                kvm_run.request_interrupt_window = 1;
1217            }
1218        } else if (!kvm_run.request_interrupt_window) {
1219            DPRINTF(KvmInt,
1220                    "M5 has pending interrupts, requesting interrupt "
1221                    "window.\n");
1222            kvm_run.request_interrupt_window = 1;
1223        }
1224    } else {
1225        kvm_run.request_interrupt_window = 0;
1226    }
1227
1228    // The CPU might have been suspended as a result of the INIT
1229    // interrupt delivery hack. In that case, don't enter into KVM.
1230    if (_status == Idle)
1231        return 0;
1232    else
1233        return kvmRunWrapper(ticks);
1234}
1235
1236Tick
1237X86KvmCPU::kvmRunDrain()
1238{
1239    struct kvm_run &kvm_run(*getKvmRunState());
1240
1241    if (!archIsDrained()) {
1242        DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n");
1243
1244        // Tell KVM to find a suitable place to deliver interrupts. This
1245        // should ensure that pending interrupts have been delivered and
1246        // things are reasonably consistent (i.e., no interrupts pending
1247        // in the guest).
1248        kvm_run.request_interrupt_window = 1;
1249
1250        // Limit the run to 1 millisecond. That is hopefully enough to
1251        // reach an interrupt window. Otherwise, we'll just try again
1252        // later.
1253        return kvmRunWrapper(1 * SimClock::Float::ms);
1254    } else {
1255        DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n");
1256
1257        return kvmRunWrapper(0);
1258    }
1259}
1260
1261Tick
1262X86KvmCPU::kvmRunWrapper(Tick ticks)
1263{
1264    struct kvm_run &kvm_run(*getKvmRunState());
1265
1266    // Synchronize the APIC base and CR8 here since they are present
1267    // in the kvm_run struct, which makes the synchronization really
1268    // cheap.
1269    kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE);
1270    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1271
1272    const Tick run_ticks(BaseKvmCPU::kvmRun(ticks));
1273
1274    tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base);
1275    kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1276
1277    return run_ticks;
1278}
1279
1280uint64_t
1281X86KvmCPU::getHostCycles() const
1282{
1283    return getMSR(MSR_TSC);
1284}
1285
1286void
1287X86KvmCPU::handleIOMiscReg32(int miscreg)
1288{
1289    struct kvm_run &kvm_run(*getKvmRunState());
1290    const uint16_t port(kvm_run.io.port);
1291
1292    assert(kvm_run.exit_reason == KVM_EXIT_IO);
1293
1294    if (kvm_run.io.size != 4) {
1295        panic("Unexpected IO size (%u) for address 0x%x.\n",
1296              kvm_run.io.size, port);
1297    }
1298
1299    if (kvm_run.io.count != 1) {
1300        panic("Unexpected IO count (%u) for address 0x%x.\n",
1301              kvm_run.io.count, port);
1302    }
1303
1304    uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset));
1305    if (kvm_run.io.direction == KVM_EXIT_IO_OUT)
1306        tc->setMiscReg(miscreg, *data);
1307    else
1308        *data = tc->readMiscRegNoEffect(miscreg);
1309}
1310
1311Tick
1312X86KvmCPU::handleKvmExitIO()
1313{
1314    struct kvm_run &kvm_run(*getKvmRunState());
1315    bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT);
1316    unsigned char *guestData(getGuestData(kvm_run.io.data_offset));
1317    Tick delay(0);
1318    uint16_t port(kvm_run.io.port);
1319    Addr pAddr;
1320    const int count(kvm_run.io.count);
1321
1322    assert(kvm_run.io.direction == KVM_EXIT_IO_IN ||
1323           kvm_run.io.direction == KVM_EXIT_IO_OUT);
1324
1325    DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n",
1326            (isWrite ? "out" : "in"), kvm_run.io.port);
1327
1328    /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we
1329     * don't use the TLB component, we need to intercept and handle
1330     * the PCI configuration space IO ports here.
1331     *
1332     * The IO port PCI discovery mechanism uses one address register
1333     * and one data register. We map the address register to a misc
1334     * reg and use that to re-route data register accesses to the
1335     * right location in the PCI configuration space.
1336     */
1337    if (port == IO_PCI_CONF_ADDR) {
1338        handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS);
1339        return 0;
1340    } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) {
1341        Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS));
1342        if (pciConfigAddr & 0x80000000) {
1343            pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) |
1344                                                (port & 0x3));
1345        } else {
1346            pAddr = X86ISA::x86IOAddress(port);
1347        }
1348    } else {
1349        pAddr = X86ISA::x86IOAddress(port);
1350    }
1351
1352    const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
1353    // Temporarily lock and migrate to the device event queue to
1354    // prevent races in multi-core mode.
1355    EventQueue::ScopedMigration migrate(deviceEventQueue());
1356    for (int i = 0; i < count; ++i) {
1357        RequestPtr io_req = new Request(pAddr, kvm_run.io.size,
1358                                        Request::UNCACHEABLE, dataMasterId());
1359        io_req->setContext(tc->contextId());
1360
1361        PacketPtr pkt = new Packet(io_req, cmd);
1362
1363        pkt->dataStatic(guestData);
1364        delay += dataPort.submitIO(pkt);
1365
1366        guestData += kvm_run.io.size;
1367    }
1368
1369    return delay;
1370}
1371
1372Tick
1373X86KvmCPU::handleKvmExitIRQWindowOpen()
1374{
1375    // We don't need to do anything here since this is caught the next
1376    // time we execute kvmRun(). We still overload the exit event to
1377    // silence the warning about an unhandled exit event.
1378    return 0;
1379}
1380
1381bool
1382X86KvmCPU::archIsDrained() const
1383{
1384    struct kvm_vcpu_events events;
1385
1386    getVCpuEvents(events);
1387
1388    // We could probably handle this in a by re-inserting interrupts
1389    // that are pending into gem5 on a drain. However, that would
1390    // probably be tricky to do reliably, so we'll just prevent a
1391    // drain if there is anything pending in the
1392    // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code
1393    // executed in the guest by requesting an interrupt window if
1394    // there are pending interrupts.
1395    const bool pending_events(events.exception.injected ||
1396                              events.interrupt.injected ||
1397                              events.nmi.injected || events.nmi.pending);
1398
1399    if (pending_events) {
1400        DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n",
1401                events.exception.injected ? "exception" : "",
1402                events.interrupt.injected ? "interrupt" : "",
1403                events.nmi.injected ? "nmi[i]" : "",
1404                events.nmi.pending ? "nmi[p]" : "");
1405    }
1406
1407    return !pending_events;
1408}
1409
1410static struct kvm_cpuid_entry2
1411makeKvmCpuid(uint32_t function, uint32_t index,
1412             CpuidResult &result)
1413{
1414    struct kvm_cpuid_entry2 e;
1415    e.function = function;
1416    e.index = index;
1417    e.flags = 0;
1418    e.eax = (uint32_t)result.rax;
1419    e.ebx = (uint32_t)result.rbx;
1420    e.ecx = (uint32_t)result.rcx;
1421    e.edx = (uint32_t)result.rdx;
1422
1423    return e;
1424}
1425
1426void
1427X86KvmCPU::updateCPUID()
1428{
1429    Kvm::CPUIDVector m5_supported;
1430
1431    /* TODO: We currently don't support any of the functions that
1432     * iterate through data structures in the CPU using an index. It's
1433     * currently not a problem since M5 doesn't expose any of them at
1434     * the moment.
1435     */
1436
1437    /* Basic features */
1438    CpuidResult func0;
1439    X86ISA::doCpuid(tc, 0x0, 0, func0);
1440    for (uint32_t function = 0; function <= func0.rax; ++function) {
1441        CpuidResult cpuid;
1442        uint32_t idx(0);
1443
1444        X86ISA::doCpuid(tc, function, idx, cpuid);
1445        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1446    }
1447
1448    /* Extended features */
1449    CpuidResult efunc0;
1450    X86ISA::doCpuid(tc, 0x80000000, 0, efunc0);
1451    for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) {
1452        CpuidResult cpuid;
1453        uint32_t idx(0);
1454
1455        X86ISA::doCpuid(tc, function, idx, cpuid);
1456        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1457    }
1458
1459    setCPUID(m5_supported);
1460}
1461
1462void
1463X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid)
1464{
1465    if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1)
1466        panic("KVM: Failed to set guest CPUID2 (errno: %i)\n",
1467              errno);
1468}
1469
1470void
1471X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid)
1472{
1473    std::unique_ptr<struct kvm_cpuid2> kvm_cpuid(
1474        newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size()));
1475
1476    kvm_cpuid->nent = cpuid.size();
1477    std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries);
1478
1479    setCPUID(*kvm_cpuid);
1480}
1481
1482void
1483X86KvmCPU::setMSRs(const struct kvm_msrs &msrs)
1484{
1485    if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1)
1486        panic("KVM: Failed to set guest MSRs (errno: %i)\n",
1487              errno);
1488}
1489
1490void
1491X86KvmCPU::setMSRs(const KvmMSRVector &msrs)
1492{
1493    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1494        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1495
1496    kvm_msrs->nmsrs = msrs.size();
1497    std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries);
1498
1499    setMSRs(*kvm_msrs);
1500}
1501
1502void
1503X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const
1504{
1505    if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1)
1506        panic("KVM: Failed to get guest MSRs (errno: %i)\n",
1507              errno);
1508}
1509
1510
1511void
1512X86KvmCPU::setMSR(uint32_t index, uint64_t value)
1513{
1514    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1515        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1516    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1517
1518    kvm_msrs->nmsrs = 1;
1519    entry.index = index;
1520    entry.reserved = 0;
1521    entry.data = value;
1522
1523    setMSRs(*kvm_msrs.get());
1524}
1525
1526uint64_t
1527X86KvmCPU::getMSR(uint32_t index) const
1528{
1529    std::unique_ptr<struct kvm_msrs> kvm_msrs(
1530        newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1531    struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1532
1533    kvm_msrs->nmsrs = 1;
1534    entry.index = index;
1535    entry.reserved = 0;
1536    entry.data = 0;
1537
1538    getMSRs(*kvm_msrs.get());
1539    return entry.data;
1540}
1541
1542const Kvm::MSRIndexVector &
1543X86KvmCPU::getMsrIntersection() const
1544{
1545    if (cachedMsrIntersection.empty()) {
1546        const Kvm::MSRIndexVector &kvm_msrs(vm.kvm->getSupportedMSRs());
1547
1548        DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n");
1549        for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) {
1550            if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) {
1551                cachedMsrIntersection.push_back(*it);
1552                DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it);
1553            } else {
1554                warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n",
1555                     *it);
1556            }
1557        }
1558    }
1559
1560    return cachedMsrIntersection;
1561}
1562
1563void
1564X86KvmCPU::getDebugRegisters(struct kvm_debugregs &regs) const
1565{
1566#ifdef KVM_GET_DEBUGREGS
1567    if (ioctl(KVM_GET_DEBUGREGS, &regs) == -1)
1568        panic("KVM: Failed to get guest debug registers\n");
1569#else
1570    panic("KVM: Unsupported getDebugRegisters call.\n");
1571#endif
1572}
1573
1574void
1575X86KvmCPU::setDebugRegisters(const struct kvm_debugregs &regs)
1576{
1577#ifdef KVM_SET_DEBUGREGS
1578    if (ioctl(KVM_SET_DEBUGREGS, (void *)&regs) == -1)
1579        panic("KVM: Failed to set guest debug registers\n");
1580#else
1581    panic("KVM: Unsupported setDebugRegisters call.\n");
1582#endif
1583}
1584
1585void
1586X86KvmCPU::getXCRs(struct kvm_xcrs &regs) const
1587{
1588    if (ioctl(KVM_GET_XCRS, &regs) == -1)
1589        panic("KVM: Failed to get guest debug registers\n");
1590}
1591
1592void
1593X86KvmCPU::setXCRs(const struct kvm_xcrs &regs)
1594{
1595    if (ioctl(KVM_SET_XCRS, (void *)&regs) == -1)
1596        panic("KVM: Failed to set guest debug registers\n");
1597}
1598
1599void
1600X86KvmCPU::getXSave(struct kvm_xsave &xsave) const
1601{
1602    if (ioctl(KVM_GET_XSAVE, &xsave) == -1)
1603        panic("KVM: Failed to get guest debug registers\n");
1604}
1605
1606void
1607X86KvmCPU::setXSave(const struct kvm_xsave &xsave)
1608{
1609    if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1)
1610        panic("KVM: Failed to set guest debug registers\n");
1611}
1612
1613
1614void
1615X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const
1616{
1617    if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1)
1618        panic("KVM: Failed to get guest debug registers\n");
1619}
1620
1621void
1622X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events)
1623{
1624    if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1)
1625        panic("KVM: Failed to set guest debug registers\n");
1626}
1627
1628X86KvmCPU *
1629X86KvmCPUParams::create()
1630{
1631    return new X86KvmCPU(this);
1632}
1633