1/* 2 * Copyright (c) 2013 Andreas Sandberg 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Andreas Sandberg 29 */ 30 31#include "cpu/kvm/x86_cpu.hh" 32 33#include <linux/kvm.h> 34 35#include <algorithm> 36#include <cerrno> 37#include <memory> 38 39#include "arch/registers.hh" 40#include "arch/x86/cpuid.hh" 41#include "arch/x86/regs/msr.hh" 42#include "arch/x86/utility.hh" 43#include "cpu/kvm/base.hh" 44#include "debug/Drain.hh" 45#include "debug/Kvm.hh" 46#include "debug/KvmContext.hh" 47#include "debug/KvmIO.hh" 48#include "debug/KvmInt.hh" 49 50using namespace X86ISA; 51 52#define MSR_TSC 0x10 53 54#define IO_PCI_CONF_ADDR 0xCF8 55#define IO_PCI_CONF_DATA_BASE 0xCFC 56 57// Task segment type of an inactive 32-bit or 64-bit task 58#define SEG_SYS_TYPE_TSS_AVAILABLE 9 59// Task segment type of an active 32-bit or 64-bit task 60#define SEG_SYS_TYPE_TSS_BUSY 11 61 62// Non-conforming accessed code segment 63#define SEG_CS_TYPE_ACCESSED 9 64// Non-conforming accessed code segment that can be read 65#define SEG_CS_TYPE_READ_ACCESSED 11 66 67// The lowest bit of the type field for normal segments (code and 68// data) is used to indicate that a segment has been accessed. 69#define SEG_TYPE_BIT_ACCESSED 1 70 71struct FXSave 72{ 73 uint16_t fcw; 74 uint16_t fsw; 75 uint8_t ftwx; 76 uint8_t pad0; 77 uint16_t last_opcode; 78 union { 79 struct { 80 uint32_t fpu_ip; 81 uint16_t fpu_cs; 82 uint16_t pad1; 83 uint32_t fpu_dp; 84 uint16_t fpu_ds; 85 uint16_t pad2; 86 } ctrl32; 87 88 struct { 89 uint64_t fpu_ip; 90 uint64_t fpu_dp; 91 } ctrl64; 92 }; 93 uint32_t mxcsr; 94 uint32_t mxcsr_mask; 95 96 uint8_t fpr[8][16]; 97 uint8_t xmm[16][16]; 98 99 uint64_t reserved[12]; 100} M5_ATTR_PACKED; 101 102static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave"); 103 104#define FOREACH_IREG() \ 105 do { \ 106 APPLY_IREG(rax, INTREG_RAX); \ 107 APPLY_IREG(rbx, INTREG_RBX); \ 108 APPLY_IREG(rcx, INTREG_RCX); \ 109 APPLY_IREG(rdx, INTREG_RDX); \ 110 APPLY_IREG(rsi, INTREG_RSI); \ 111 APPLY_IREG(rdi, INTREG_RDI); \ 112 APPLY_IREG(rsp, INTREG_RSP); \ 113 APPLY_IREG(rbp, INTREG_RBP); \ 114 APPLY_IREG(r8, INTREG_R8); \ 115 APPLY_IREG(r9, INTREG_R9); \ 116 APPLY_IREG(r10, INTREG_R10); \ 117 APPLY_IREG(r11, INTREG_R11); \ 118 APPLY_IREG(r12, INTREG_R12); \ 119 APPLY_IREG(r13, INTREG_R13); \ 120 APPLY_IREG(r14, INTREG_R14); \ 121 APPLY_IREG(r15, INTREG_R15); \ 122 } while (0) 123 124#define FOREACH_SREG() \ 125 do { \ 126 APPLY_SREG(cr0, MISCREG_CR0); \ 127 APPLY_SREG(cr2, MISCREG_CR2); \ 128 APPLY_SREG(cr3, MISCREG_CR3); \ 129 APPLY_SREG(cr4, MISCREG_CR4); \ 130 APPLY_SREG(cr8, MISCREG_CR8); \ 131 APPLY_SREG(efer, MISCREG_EFER); \ 132 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \ 133 } while (0) 134 135#define FOREACH_DREG() \ 136 do { \ 137 APPLY_DREG(db[0], MISCREG_DR0); \ 138 APPLY_DREG(db[1], MISCREG_DR1); \ 139 APPLY_DREG(db[2], MISCREG_DR2); \ 140 APPLY_DREG(db[3], MISCREG_DR3); \ 141 APPLY_DREG(dr6, MISCREG_DR6); \ 142 APPLY_DREG(dr7, MISCREG_DR7); \ 143 } while (0) 144 145#define FOREACH_SEGMENT() \ 146 do { \ 147 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \ 148 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \ 149 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \ 150 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \ 151 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \ 152 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \ 153 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \ 154 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \ 155 } while (0) 156 157#define FOREACH_DTABLE() \ 158 do { \ 159 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \ 160 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \ 161 } while (0) 162 163template<typename STRUCT, typename ENTRY> 164static STRUCT *newVarStruct(size_t entries) 165{ 166 return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY)); 167} 168 169static void 170dumpKvm(const struct kvm_regs ®s) 171{ 172 inform("KVM register state:\n"); 173 174#define APPLY_IREG(kreg, mreg) \ 175 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 176 177 FOREACH_IREG(); 178 179#undef APPLY_IREG 180 181 inform("\trip: 0x%llx\n", regs.rip); 182 inform("\trflags: 0x%llx\n", regs.rflags); 183} 184 185static void 186dumpKvm(const char *reg_name, const struct kvm_segment &seg) 187{ 188 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n" 189 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n", 190 reg_name, 191 seg.base, seg.limit, seg.selector, seg.type, 192 seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable); 193} 194 195static void 196dumpKvm(const char *reg_name, const struct kvm_dtable &dtable) 197{ 198 inform("\t%s: @0x%llx+%x\n", 199 reg_name, dtable.base, dtable.limit); 200} 201 202static void 203dumpKvm(const struct kvm_sregs &sregs) 204{ 205#define APPLY_SREG(kreg, mreg) \ 206 inform("\t" # kreg ": 0x%llx\n", sregs.kreg); 207#define APPLY_SEGMENT(kreg, idx) \ 208 dumpKvm(# kreg, sregs.kreg); 209#define APPLY_DTABLE(kreg, idx) \ 210 dumpKvm(# kreg, sregs.kreg); 211 212 inform("Special registers:\n"); 213 FOREACH_SEGMENT(); 214 FOREACH_SREG(); 215 FOREACH_DTABLE(); 216 217 inform("Interrupt Bitmap:"); 218 for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64) 219 inform(" 0x%.8x", sregs.interrupt_bitmap[i / 64]); 220 221#undef APPLY_SREG 222#undef APPLY_SEGMENT 223#undef APPLY_DTABLE 224} 225 226#ifdef KVM_GET_DEBUGREGS 227static void 228dumpKvm(const struct kvm_debugregs ®s) 229{ 230 inform("KVM debug state:\n"); 231 232#define APPLY_DREG(kreg, mreg) \ 233 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 234 235 FOREACH_DREG(); 236 237#undef APPLY_DREG 238 239 inform("\tflags: 0x%llx\n", regs.flags); 240} 241#endif 242 243static void 244dumpFpuSpec(const struct FXSave &xs) 245{ 246 inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip); 247 inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp); 248 inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask); 249} 250 251static void 252dumpFpuSpec(const struct kvm_fpu &fpu) 253{ 254 inform("\tlast_ip: 0x%x\n", fpu.last_ip); 255 inform("\tlast_dp: 0x%x\n", fpu.last_dp); 256} 257 258template<typename T> 259static void 260dumpFpuCommon(const T &fpu) 261{ 262 const unsigned top((fpu.fsw >> 11) & 0x7); 263 inform("\tfcw: 0x%x\n", fpu.fcw); 264 265 inform("\tfsw: 0x%x (top: %i, " 266 "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n", 267 fpu.fsw, top, 268 269 (fpu.fsw & CC0Bit) ? "C0" : "", 270 (fpu.fsw & CC1Bit) ? "C1" : "", 271 (fpu.fsw & CC2Bit) ? "C2" : "", 272 (fpu.fsw & CC3Bit) ? "C3" : "", 273 274 (fpu.fsw & IEBit) ? "I" : "", 275 (fpu.fsw & DEBit) ? "D" : "", 276 (fpu.fsw & ZEBit) ? "Z" : "", 277 (fpu.fsw & OEBit) ? "O" : "", 278 (fpu.fsw & UEBit) ? "U" : "", 279 (fpu.fsw & PEBit) ? "P" : "", 280 281 (fpu.fsw & StackFaultBit) ? "SF " : "", 282 (fpu.fsw & ErrSummaryBit) ? "ES " : "", 283 (fpu.fsw & BusyBit) ? "BUSY " : "" 284 ); 285 inform("\tftwx: 0x%x\n", fpu.ftwx); 286 inform("\tlast_opcode: 0x%x\n", fpu.last_opcode); 287 dumpFpuSpec(fpu); 288 inform("\tmxcsr: 0x%x\n", fpu.mxcsr); 289 inform("\tFP Stack:\n"); 290 for (int i = 0; i < 8; ++i) { 291 const unsigned reg_idx((i + top) & 0x7); 292 const bool empty(!((fpu.ftwx >> reg_idx) & 0x1)); 293 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 294 char hex[33]; 295 for (int j = 0; j < 10; ++j) 296 snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]); 297 inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx, 298 hex, value, empty ? " (e)" : ""); 299 } 300 inform("\tXMM registers:\n"); 301 for (int i = 0; i < 16; ++i) { 302 char hex[33]; 303 for (int j = 0; j < 16; ++j) 304 snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]); 305 inform("\t\t%i: 0x%s\n", i, hex); 306 } 307} 308 309static void 310dumpKvm(const struct kvm_fpu &fpu) 311{ 312 inform("FPU registers:\n"); 313 dumpFpuCommon(fpu); 314} 315 316static void 317dumpKvm(const struct kvm_xsave &xsave) 318{ 319 inform("FPU registers (XSave):\n"); 320 dumpFpuCommon(*(FXSave *)xsave.region); 321} 322 323static void 324dumpKvm(const struct kvm_msrs &msrs) 325{ 326 inform("MSRs:\n"); 327 328 for (int i = 0; i < msrs.nmsrs; ++i) { 329 const struct kvm_msr_entry &e(msrs.entries[i]); 330 331 inform("\t0x%x: 0x%x\n", e.index, e.data); 332 } 333} 334 335static void 336dumpKvm(const struct kvm_xcrs ®s) 337{ 338 inform("KVM XCR registers:\n"); 339 340 inform("\tFlags: 0x%x\n", regs.flags); 341 for (int i = 0; i < regs.nr_xcrs; ++i) { 342 inform("\tXCR[0x%x]: 0x%x\n", 343 regs.xcrs[i].xcr, 344 regs.xcrs[i].value); 345 } 346} 347 348static void 349dumpKvm(const struct kvm_vcpu_events &events) 350{ 351 inform("vCPU events:\n"); 352 353 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n", 354 events.exception.injected, events.exception.nr, 355 events.exception.has_error_code, events.exception.error_code); 356 357 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n", 358 events.interrupt.injected, events.interrupt.nr, 359 events.interrupt.soft); 360 361 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n", 362 events.nmi.injected, events.nmi.pending, 363 events.nmi.masked); 364 365 inform("\tSIPI vector: 0x%x\n", events.sipi_vector); 366 inform("\tFlags: 0x%x\n", events.flags); 367} 368 369static bool 370isCanonicalAddress(uint64_t addr) 371{ 372 // x86-64 doesn't currently use the full 64-bit virtual address 373 // space, instead it uses signed 48 bit addresses that are 374 // sign-extended to 64 bits. Such addresses are known as 375 // "canonical". 376 uint64_t upper_half(addr & 0xffff800000000000ULL); 377 return upper_half == 0 || upper_half == 0xffff800000000000; 378} 379 380static void 381checkSeg(const char *name, const int idx, const struct kvm_segment &seg, 382 struct kvm_sregs sregs) 383{ 384 // Check the register base 385 switch (idx) { 386 case MISCREG_TSL: 387 case MISCREG_TR: 388 case MISCREG_FS: 389 case MISCREG_GS: 390 if (!isCanonicalAddress(seg.base)) 391 warn("Illegal %s base: 0x%x\n", name, seg.base); 392 break; 393 394 case MISCREG_SS: 395 case MISCREG_DS: 396 case MISCREG_ES: 397 if (seg.unusable) 398 break; 399 M5_FALLTHROUGH; 400 case MISCREG_CS: 401 if (seg.base & 0xffffffff00000000ULL) 402 warn("Illegal %s base: 0x%x\n", name, seg.base); 403 break; 404 } 405 406 // Check the type 407 switch (idx) { 408 case MISCREG_CS: 409 switch (seg.type) { 410 case 3: 411 if (seg.dpl != 0) 412 warn("CS type is 3 but dpl != 0.\n"); 413 break; 414 case 9: 415 case 11: 416 if (seg.dpl != sregs.ss.dpl) 417 warn("CS type is %i but CS DPL != SS DPL\n", seg.type); 418 break; 419 case 13: 420 case 15: 421 if (seg.dpl > sregs.ss.dpl) 422 warn("CS type is %i but CS DPL > SS DPL\n", seg.type); 423 break; 424 default: 425 warn("Illegal CS type: %i\n", seg.type); 426 break; 427 } 428 break; 429 430 case MISCREG_SS: 431 if (seg.unusable) 432 break; 433 switch (seg.type) { 434 case 3: 435 if (sregs.cs.type == 3 && seg.dpl != 0) 436 warn("CS type is 3, but SS DPL is != 0.\n"); 437 M5_FALLTHROUGH; 438 case 7: 439 if (!(sregs.cr0 & 1) && seg.dpl != 0) 440 warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl); 441 break; 442 default: 443 warn("Illegal SS type: %i\n", seg.type); 444 break; 445 } 446 break; 447 448 case MISCREG_DS: 449 case MISCREG_ES: 450 case MISCREG_FS: 451 case MISCREG_GS: 452 if (seg.unusable) 453 break; 454 if (!(seg.type & 0x1) || 455 ((seg.type & 0x8) && !(seg.type & 0x2))) 456 warn("%s has an illegal type field: %i\n", name, seg.type); 457 break; 458 459 case MISCREG_TR: 460 // TODO: We should check the CPU mode 461 if (seg.type != 3 && seg.type != 11) 462 warn("%s: Illegal segment type (%i)\n", name, seg.type); 463 break; 464 465 case MISCREG_TSL: 466 if (seg.unusable) 467 break; 468 if (seg.type != 2) 469 warn("%s: Illegal segment type (%i)\n", name, seg.type); 470 break; 471 } 472 473 switch (idx) { 474 case MISCREG_SS: 475 case MISCREG_DS: 476 case MISCREG_ES: 477 case MISCREG_FS: 478 case MISCREG_GS: 479 if (seg.unusable) 480 break; 481 M5_FALLTHROUGH; 482 case MISCREG_CS: 483 if (!seg.s) 484 warn("%s: S flag not set\n", name); 485 break; 486 487 case MISCREG_TSL: 488 if (seg.unusable) 489 break; 490 M5_FALLTHROUGH; 491 case MISCREG_TR: 492 if (seg.s) 493 warn("%s: S flag is set\n", name); 494 break; 495 } 496 497 switch (idx) { 498 case MISCREG_SS: 499 case MISCREG_DS: 500 case MISCREG_ES: 501 case MISCREG_FS: 502 case MISCREG_GS: 503 case MISCREG_TSL: 504 if (seg.unusable) 505 break; 506 M5_FALLTHROUGH; 507 case MISCREG_TR: 508 case MISCREG_CS: 509 if (!seg.present) 510 warn("%s: P flag not set\n", name); 511 512 if (((seg.limit & 0xFFF) == 0 && seg.g) || 513 ((seg.limit & 0xFFF00000) != 0 && !seg.g)) { 514 warn("%s limit (0x%x) and g (%i) combination is illegal.\n", 515 name, seg.limit, seg.g); 516 } 517 break; 518 } 519 520 // TODO: Check CS DB 521} 522 523X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params) 524 : BaseKvmCPU(params), 525 useXSave(params->useXSave) 526{ 527 Kvm &kvm(*vm.kvm); 528 529 if (!kvm.capSetTSSAddress()) 530 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n"); 531 if (!kvm.capExtendedCPUID()) 532 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n"); 533 if (!kvm.capUserNMI()) 534 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n"); 535 if (!kvm.capVCPUEvents()) 536 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n"); 537 538 haveDebugRegs = kvm.capDebugRegs(); 539 haveXSave = kvm.capXSave(); 540 haveXCRs = kvm.capXCRs(); 541 542 if (useXSave && !haveXSave) { 543 warn("KVM: XSAVE not supported by host. MXCSR synchronization might be " 544 "unreliable due to kernel bugs.\n"); 545 useXSave = false; 546 } else if (!useXSave) { 547 warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n"); 548 } 549} 550 551X86KvmCPU::~X86KvmCPU() 552{ 553} 554 555void 556X86KvmCPU::startup() 557{ 558 BaseKvmCPU::startup(); 559 560 updateCPUID(); 561 562 // TODO: Do we need to create an identity mapped TSS area? We 563 // should call kvm.vm.setTSSAddress() here in that case. It should 564 // only be needed for old versions of the virtualization 565 // extensions. We should make sure that the identity range is 566 // reserved in the e820 memory map in that case. 567} 568 569void 570X86KvmCPU::dump() const 571{ 572 dumpIntRegs(); 573 if (useXSave) 574 dumpXSave(); 575 else 576 dumpFpuRegs(); 577 dumpSpecRegs(); 578 dumpDebugRegs(); 579 dumpXCRs(); 580 dumpVCpuEvents(); 581 dumpMSRs(); 582} 583 584void 585X86KvmCPU::dumpFpuRegs() const 586{ 587 struct kvm_fpu fpu; 588 getFPUState(fpu); 589 dumpKvm(fpu); 590} 591 592void 593X86KvmCPU::dumpIntRegs() const 594{ 595 struct kvm_regs regs; 596 getRegisters(regs); 597 dumpKvm(regs); 598} 599 600void 601X86KvmCPU::dumpSpecRegs() const 602{ 603 struct kvm_sregs sregs; 604 getSpecialRegisters(sregs); 605 dumpKvm(sregs); 606} 607 608void 609X86KvmCPU::dumpDebugRegs() const 610{ 611 if (haveDebugRegs) { 612#ifdef KVM_GET_DEBUGREGS 613 struct kvm_debugregs dregs; 614 getDebugRegisters(dregs); 615 dumpKvm(dregs); 616#endif 617 } else { 618 inform("Debug registers not supported by kernel.\n"); 619 } 620} 621 622void 623X86KvmCPU::dumpXCRs() const 624{ 625 if (haveXCRs) { 626 struct kvm_xcrs xcrs; 627 getXCRs(xcrs); 628 dumpKvm(xcrs); 629 } else { 630 inform("XCRs not supported by kernel.\n"); 631 } 632} 633 634void 635X86KvmCPU::dumpXSave() const 636{ 637 if (haveXSave) { 638 struct kvm_xsave xsave; 639 getXSave(xsave); 640 dumpKvm(xsave); 641 } else { 642 inform("XSave not supported by kernel.\n"); 643 } 644} 645 646void 647X86KvmCPU::dumpVCpuEvents() const 648{ 649 struct kvm_vcpu_events events; 650 getVCpuEvents(events); 651 dumpKvm(events); 652} 653 654void 655X86KvmCPU::dumpMSRs() const 656{ 657 const Kvm::MSRIndexVector &supported_msrs(vm.kvm->getSupportedMSRs()); 658 std::unique_ptr<struct kvm_msrs> msrs( 659 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>( 660 supported_msrs.size())); 661 662 msrs->nmsrs = supported_msrs.size(); 663 for (int i = 0; i < supported_msrs.size(); ++i) { 664 struct kvm_msr_entry &e(msrs->entries[i]); 665 e.index = supported_msrs[i]; 666 e.reserved = 0; 667 e.data = 0; 668 } 669 getMSRs(*msrs.get()); 670 671 dumpKvm(*msrs.get()); 672} 673 674void 675X86KvmCPU::updateKvmState() 676{ 677 updateKvmStateRegs(); 678 updateKvmStateSRegs(); 679 updateKvmStateFPU(); 680 updateKvmStateMSRs(); 681 682 DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); 683 if (DTRACE(KvmContext)) 684 dump(); 685} 686 687void 688X86KvmCPU::updateKvmStateRegs() 689{ 690 struct kvm_regs regs; 691 692#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg) 693 FOREACH_IREG(); 694#undef APPLY_IREG 695 696 regs.rip = tc->instAddr() - tc->readMiscReg(MISCREG_CS_BASE); 697 698 /* You might think that setting regs.rflags to the contents 699 * MISCREG_RFLAGS here would suffice. In that case you're 700 * mistaken. We need to reconstruct it from a bunch of ucode 701 * registers and wave a dead chicken over it (aka mask out and set 702 * reserved bits) to get it to work. 703 */ 704 regs.rflags = X86ISA::getRFlags(tc); 705 706 setRegisters(regs); 707} 708 709static inline void 710setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg, 711 const int index) 712{ 713 SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index))); 714 715 kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 716 kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 717 kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index)); 718 kvm_seg.type = attr.type; 719 kvm_seg.present = attr.present; 720 kvm_seg.dpl = attr.dpl; 721 kvm_seg.db = attr.defaultSize; 722 kvm_seg.s = attr.system; 723 kvm_seg.l = attr.longMode; 724 kvm_seg.g = attr.granularity; 725 kvm_seg.avl = attr.avl; 726 727 // A segment is normally unusable when the selector is zero. There 728 // is a attr.unusable flag in gem5, but it seems unused. qemu 729 // seems to set this to 0 all the time, so we just do the same and 730 // hope for the best. 731 kvm_seg.unusable = 0; 732} 733 734static inline void 735setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable, 736 const int index) 737{ 738 kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 739 kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 740} 741 742static void 743forceSegAccessed(struct kvm_segment &seg) 744{ 745 // Intel's VMX requires that (some) usable segments are flagged as 746 // 'accessed' (i.e., the lowest bit in the segment type is set) 747 // when entering VMX. This wouldn't necessary be the case even if 748 // gem5 did set the access bits correctly, so we force it to one 749 // in that case. 750 if (!seg.unusable) 751 seg.type |= SEG_TYPE_BIT_ACCESSED; 752} 753 754void 755X86KvmCPU::updateKvmStateSRegs() 756{ 757 struct kvm_sregs sregs; 758 759#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg) 760#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx) 761#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx) 762 763 FOREACH_SREG(); 764 FOREACH_SEGMENT(); 765 FOREACH_DTABLE(); 766 767#undef APPLY_SREG 768#undef APPLY_SEGMENT 769#undef APPLY_DTABLE 770 771 // Clear the interrupt bitmap 772 memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); 773 774 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed 775 // bit in the type field set. 776 forceSegAccessed(sregs.cs); 777 forceSegAccessed(sregs.ss); 778 forceSegAccessed(sregs.ds); 779 forceSegAccessed(sregs.es); 780 forceSegAccessed(sregs.fs); 781 forceSegAccessed(sregs.gs); 782 783 // There are currently some cases where the active task isn't 784 // marked as busy. This is illegal in VMX, so we force it to busy. 785 if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) { 786 hack("tr.type (%i) is not busy. Forcing the busy bit.\n", 787 sregs.tr.type); 788 sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY; 789 } 790 791 // VMX requires the DPL of SS and CS to be the same for 792 // non-conforming code segments. It seems like m5 doesn't set the 793 // DPL of SS correctly when taking interrupts, so we need to fix 794 // that here. 795 if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED || 796 sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) && 797 sregs.cs.dpl != sregs.ss.dpl) { 798 799 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n", 800 sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl); 801 sregs.ss.dpl = sregs.cs.dpl; 802 } 803 804 // Do checks after fixing up the state to avoid getting excessive 805 // amounts of warnings. 806 RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS)); 807 if (!rflags_nocc.vm) { 808 // Do segment verification if the CPU isn't entering virtual 809 // 8086 mode. We currently assume that unrestricted guest 810 // mode is available. 811 812#define APPLY_SEGMENT(kreg, idx) \ 813 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs) 814 815 FOREACH_SEGMENT(); 816#undef APPLY_SEGMENT 817 } 818 819 setSpecialRegisters(sregs); 820} 821 822template <typename T> 823static void 824updateKvmStateFPUCommon(ThreadContext *tc, T &fpu) 825{ 826 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 827 "Unexpected size of X86ISA::FloatRegBits"); 828 829 fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR); 830 fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW); 831 // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read 832 // with effects. 833 fpu.fsw = tc->readMiscReg(MISCREG_FSW); 834 835 uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW)); 836 fpu.ftwx = X86ISA::convX87TagsToXTags(ftw); 837 838 fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP); 839 840 const unsigned top((fpu.fsw >> 11) & 0x7); 841 for (int i = 0; i < 8; ++i) { 842 const unsigned reg_idx((i + top) & 0x7); 843 const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx))); 844 DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n", 845 reg_idx, i, value); 846 X86ISA::storeFloat80(fpu.fpr[i], value); 847 } 848 849 // TODO: We should update the MMX state 850 851 for (int i = 0; i < 16; ++i) { 852 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] = 853 tc->readFloatRegBits(FLOATREG_XMM_LOW(i)); 854 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] = 855 tc->readFloatRegBits(FLOATREG_XMM_HIGH(i)); 856 } 857} 858 859void 860X86KvmCPU::updateKvmStateFPULegacy() 861{ 862 struct kvm_fpu fpu; 863 864 // There is some padding in the FP registers, so we'd better zero 865 // the whole struct. 866 memset(&fpu, 0, sizeof(fpu)); 867 868 updateKvmStateFPUCommon(tc, fpu); 869 870 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 871 warn_once("MISCREG_FISEG is non-zero.\n"); 872 873 fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 874 875 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 876 warn_once("MISCREG_FOSEG is non-zero.\n"); 877 878 fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 879 880 setFPUState(fpu); 881} 882 883void 884X86KvmCPU::updateKvmStateFPUXSave() 885{ 886 struct kvm_xsave kxsave; 887 FXSave &xsave(*(FXSave *)kxsave.region); 888 889 // There is some padding and reserved fields in the structure, so 890 // we'd better zero the whole thing. 891 memset(&kxsave, 0, sizeof(kxsave)); 892 893 updateKvmStateFPUCommon(tc, xsave); 894 895 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 896 warn_once("MISCREG_FISEG is non-zero.\n"); 897 898 xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 899 900 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 901 warn_once("MISCREG_FOSEG is non-zero.\n"); 902 903 xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 904 905 setXSave(kxsave); 906} 907 908void 909X86KvmCPU::updateKvmStateFPU() 910{ 911 if (useXSave) 912 updateKvmStateFPUXSave(); 913 else 914 updateKvmStateFPULegacy(); 915} 916 917void 918X86KvmCPU::updateKvmStateMSRs() 919{ 920 KvmMSRVector msrs; 921 922 const Kvm::MSRIndexVector &indices(getMsrIntersection()); 923 924 for (auto it = indices.cbegin(); it != indices.cend(); ++it) { 925 struct kvm_msr_entry e; 926 927 e.index = *it; 928 e.reserved = 0; 929 e.data = tc->readMiscReg(msrMap.at(*it)); 930 DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n", 931 e.index, e.data); 932 933 msrs.push_back(e); 934 } 935 936 setMSRs(msrs); 937} 938 939void 940X86KvmCPU::updateThreadContext() 941{ 942 struct kvm_regs regs; 943 struct kvm_sregs sregs; 944 945 getRegisters(regs); 946 getSpecialRegisters(sregs); 947 948 DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n"); 949 if (DTRACE(KvmContext)) 950 dump(); 951 952 updateThreadContextRegs(regs, sregs); 953 updateThreadContextSRegs(sregs); 954 if (useXSave) { 955 struct kvm_xsave xsave; 956 getXSave(xsave); 957 958 updateThreadContextXSave(xsave); 959 } else { 960 struct kvm_fpu fpu; 961 getFPUState(fpu); 962 963 updateThreadContextFPU(fpu); 964 } 965 updateThreadContextMSRs(); 966 967 // The M5 misc reg caches some values from other 968 // registers. Writing to it with side effects causes it to be 969 // updated from its source registers. 970 tc->setMiscReg(MISCREG_M5_REG, 0); 971} 972 973void 974X86KvmCPU::updateThreadContextRegs(const struct kvm_regs ®s, 975 const struct kvm_sregs &sregs) 976{ 977#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg) 978 979 FOREACH_IREG(); 980 981#undef APPLY_IREG 982 983 tc->pcState(PCState(regs.rip + sregs.cs.base)); 984 985 // Flags are spread out across multiple semi-magic registers so we 986 // need some special care when updating them. 987 X86ISA::setRFlags(tc, regs.rflags); 988} 989 990 991inline void 992setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg, 993 const int index) 994{ 995 SegAttr attr(0); 996 997 attr.type = kvm_seg.type; 998 attr.present = kvm_seg.present; 999 attr.dpl = kvm_seg.dpl; 1000 attr.defaultSize = kvm_seg.db; 1001 attr.system = kvm_seg.s; 1002 attr.longMode = kvm_seg.l; 1003 attr.granularity = kvm_seg.g; 1004 attr.avl = kvm_seg.avl; 1005 attr.unusable = kvm_seg.unusable; 1006 1007 // We need some setMiscReg magic here to keep the effective base 1008 // addresses in sync. We need an up-to-date version of EFER, so 1009 // make sure this is called after the sregs have been synced. 1010 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base); 1011 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit); 1012 tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector); 1013 tc->setMiscReg(MISCREG_SEG_ATTR(index), attr); 1014} 1015 1016inline void 1017setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable, 1018 const int index) 1019{ 1020 // We need some setMiscReg magic here to keep the effective base 1021 // addresses in sync. We need an up-to-date version of EFER, so 1022 // make sure this is called after the sregs have been synced. 1023 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base); 1024 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit); 1025} 1026 1027void 1028X86KvmCPU::updateThreadContextSRegs(const struct kvm_sregs &sregs) 1029{ 1030 assert(getKvmRunState()->apic_base == sregs.apic_base); 1031 assert(getKvmRunState()->cr8 == sregs.cr8); 1032 1033#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg) 1034#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1035#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1036 FOREACH_SREG(); 1037 FOREACH_SEGMENT(); 1038 FOREACH_DTABLE(); 1039#undef APPLY_SREG 1040#undef APPLY_SEGMENT 1041#undef APPLY_DTABLE 1042} 1043 1044template<typename T> 1045static void 1046updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu) 1047{ 1048 const unsigned top((fpu.fsw >> 11) & 0x7); 1049 1050 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 1051 "Unexpected size of X86ISA::FloatRegBits"); 1052 1053 for (int i = 0; i < 8; ++i) { 1054 const unsigned reg_idx((i + top) & 0x7); 1055 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 1056 DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n", 1057 reg_idx, i, value); 1058 tc->setFloatReg(FLOATREG_FPR(reg_idx), value); 1059 } 1060 1061 // TODO: We should update the MMX state 1062 1063 tc->setMiscRegNoEffect(MISCREG_X87_TOP, top); 1064 tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr); 1065 tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw); 1066 tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw); 1067 1068 uint64_t ftw(convX87XTagsToTags(fpu.ftwx)); 1069 // TODO: Are these registers really the same? 1070 tc->setMiscRegNoEffect(MISCREG_FTW, ftw); 1071 tc->setMiscRegNoEffect(MISCREG_FTAG, ftw); 1072 1073 tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode); 1074 1075 for (int i = 0; i < 16; ++i) { 1076 tc->setFloatRegBits(FLOATREG_XMM_LOW(i), 1077 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]); 1078 tc->setFloatRegBits(FLOATREG_XMM_HIGH(i), 1079 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]); 1080 } 1081} 1082 1083void 1084X86KvmCPU::updateThreadContextFPU(const struct kvm_fpu &fpu) 1085{ 1086 updateThreadContextFPUCommon(tc, fpu); 1087 1088 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1089 tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip); 1090 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1091 tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp); 1092} 1093 1094void 1095X86KvmCPU::updateThreadContextXSave(const struct kvm_xsave &kxsave) 1096{ 1097 const FXSave &xsave(*(const FXSave *)kxsave.region); 1098 1099 updateThreadContextFPUCommon(tc, xsave); 1100 1101 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1102 tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip); 1103 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1104 tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp); 1105} 1106 1107void 1108X86KvmCPU::updateThreadContextMSRs() 1109{ 1110 const Kvm::MSRIndexVector &msrs(getMsrIntersection()); 1111 1112 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1113 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1114 struct kvm_msr_entry *entry; 1115 1116 // Create a list of MSRs to read 1117 kvm_msrs->nmsrs = msrs.size(); 1118 entry = &kvm_msrs->entries[0]; 1119 for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) { 1120 entry->index = *it; 1121 entry->reserved = 0; 1122 entry->data = 0; 1123 } 1124 1125 getMSRs(*kvm_msrs.get()); 1126 1127 // Update M5's state 1128 entry = &kvm_msrs->entries[0]; 1129 for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) { 1130 DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n", 1131 entry->index, entry->data); 1132 1133 tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data); 1134 } 1135} 1136 1137void 1138X86KvmCPU::deliverInterrupts() 1139{ 1140 Fault fault; 1141 1142 syncThreadContext(); 1143 1144 { 1145 // Migrate to the interrupt controller's thread to get the 1146 // interrupt. Even though the individual methods are safe to 1147 // call across threads, we might still lose interrupts unless 1148 // they are getInterrupt() and updateIntrInfo() are called 1149 // atomically. 1150 EventQueue::ScopedMigration migrate(interrupts[0]->eventQueue()); 1151 fault = interrupts[0]->getInterrupt(tc); 1152 interrupts[0]->updateIntrInfo(tc); 1153 } 1154 1155 X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get())); 1156 if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) { 1157 DPRINTF(KvmInt, "Delivering NMI\n"); 1158 kvmNonMaskableInterrupt(); 1159 } else if (dynamic_cast<InitInterrupt *>(fault.get())) { 1160 DPRINTF(KvmInt, "INIT interrupt\n"); 1161 fault.get()->invoke(tc); 1162 // Delay the kvm state update since we won't enter KVM on this 1163 // tick. 1164 threadContextDirty = true; 1165 // HACK: gem5 doesn't actually have any BIOS code, which means 1166 // that we need to halt the thread and wait for a startup 1167 // interrupt before restarting the thread. The simulated CPUs 1168 // use the same kind of hack using a microcode routine. 1169 thread->suspend(); 1170 } else if (dynamic_cast<StartupInterrupt *>(fault.get())) { 1171 DPRINTF(KvmInt, "STARTUP interrupt\n"); 1172 fault.get()->invoke(tc); 1173 // The kvm state is assumed to have been updated when entering 1174 // kvmRun(), so we need to update manually it here. 1175 updateKvmState(); 1176 } else if (x86int) { 1177 struct kvm_interrupt kvm_int; 1178 kvm_int.irq = x86int->getVector(); 1179 1180 DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n", 1181 fault->name(), kvm_int.irq); 1182 1183 kvmInterrupt(kvm_int); 1184 } else { 1185 panic("KVM: Unknown interrupt type\n"); 1186 } 1187 1188} 1189 1190Tick 1191X86KvmCPU::kvmRun(Tick ticks) 1192{ 1193 struct kvm_run &kvm_run(*getKvmRunState()); 1194 1195 if (interrupts[0]->checkInterruptsRaw()) { 1196 if (interrupts[0]->hasPendingUnmaskable()) { 1197 DPRINTF(KvmInt, 1198 "Delivering unmaskable interrupt.\n"); 1199 syncThreadContext(); 1200 deliverInterrupts(); 1201 } else if (kvm_run.ready_for_interrupt_injection) { 1202 // KVM claims that it is ready for an interrupt. It might 1203 // be lying if we just updated rflags and disabled 1204 // interrupts (e.g., by doing a CPU handover). Let's sync 1205 // the thread context and check if there are /really/ 1206 // interrupts that should be delivered now. 1207 syncThreadContext(); 1208 if (interrupts[0]->checkInterrupts(tc)) { 1209 DPRINTF(KvmInt, 1210 "M5 has pending interrupts, delivering interrupt.\n"); 1211 1212 deliverInterrupts(); 1213 } else { 1214 DPRINTF(KvmInt, 1215 "Interrupt delivery delayed due to KVM confusion.\n"); 1216 kvm_run.request_interrupt_window = 1; 1217 } 1218 } else if (!kvm_run.request_interrupt_window) { 1219 DPRINTF(KvmInt, 1220 "M5 has pending interrupts, requesting interrupt " 1221 "window.\n"); 1222 kvm_run.request_interrupt_window = 1; 1223 } 1224 } else { 1225 kvm_run.request_interrupt_window = 0; 1226 } 1227 1228 // The CPU might have been suspended as a result of the INIT 1229 // interrupt delivery hack. In that case, don't enter into KVM. 1230 if (_status == Idle) 1231 return 0; 1232 else 1233 return kvmRunWrapper(ticks); 1234} 1235 1236Tick 1237X86KvmCPU::kvmRunDrain() 1238{ 1239 struct kvm_run &kvm_run(*getKvmRunState()); 1240 1241 if (!archIsDrained()) { 1242 DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n"); 1243 1244 // Tell KVM to find a suitable place to deliver interrupts. This 1245 // should ensure that pending interrupts have been delivered and 1246 // things are reasonably consistent (i.e., no interrupts pending 1247 // in the guest). 1248 kvm_run.request_interrupt_window = 1; 1249 1250 // Limit the run to 1 millisecond. That is hopefully enough to 1251 // reach an interrupt window. Otherwise, we'll just try again 1252 // later. 1253 return kvmRunWrapper(1 * SimClock::Float::ms); 1254 } else { 1255 DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n"); 1256 1257 return kvmRunWrapper(0); 1258 } 1259} 1260 1261Tick 1262X86KvmCPU::kvmRunWrapper(Tick ticks) 1263{ 1264 struct kvm_run &kvm_run(*getKvmRunState()); 1265 1266 // Synchronize the APIC base and CR8 here since they are present 1267 // in the kvm_run struct, which makes the synchronization really 1268 // cheap. 1269 kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE); 1270 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1271 1272 const Tick run_ticks(BaseKvmCPU::kvmRun(ticks)); 1273 1274 tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base); 1275 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1276 1277 return run_ticks; 1278} 1279 1280uint64_t 1281X86KvmCPU::getHostCycles() const 1282{ 1283 return getMSR(MSR_TSC); 1284} 1285 1286void 1287X86KvmCPU::handleIOMiscReg32(int miscreg) 1288{ 1289 struct kvm_run &kvm_run(*getKvmRunState()); 1290 const uint16_t port(kvm_run.io.port); 1291 1292 assert(kvm_run.exit_reason == KVM_EXIT_IO); 1293 1294 if (kvm_run.io.size != 4) { 1295 panic("Unexpected IO size (%u) for address 0x%x.\n", 1296 kvm_run.io.size, port); 1297 } 1298 1299 if (kvm_run.io.count != 1) { 1300 panic("Unexpected IO count (%u) for address 0x%x.\n", 1301 kvm_run.io.count, port); 1302 } 1303 1304 uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset)); 1305 if (kvm_run.io.direction == KVM_EXIT_IO_OUT) 1306 tc->setMiscReg(miscreg, *data); 1307 else 1308 *data = tc->readMiscRegNoEffect(miscreg); 1309} 1310 1311Tick 1312X86KvmCPU::handleKvmExitIO() 1313{ 1314 struct kvm_run &kvm_run(*getKvmRunState()); 1315 bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT); 1316 unsigned char *guestData(getGuestData(kvm_run.io.data_offset)); 1317 Tick delay(0); 1318 uint16_t port(kvm_run.io.port); 1319 Addr pAddr; 1320 const int count(kvm_run.io.count); 1321 1322 assert(kvm_run.io.direction == KVM_EXIT_IO_IN || 1323 kvm_run.io.direction == KVM_EXIT_IO_OUT); 1324 1325 DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n", 1326 (isWrite ? "out" : "in"), kvm_run.io.port); 1327 1328 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we 1329 * don't use the TLB component, we need to intercept and handle 1330 * the PCI configuration space IO ports here. 1331 * 1332 * The IO port PCI discovery mechanism uses one address register 1333 * and one data register. We map the address register to a misc 1334 * reg and use that to re-route data register accesses to the 1335 * right location in the PCI configuration space. 1336 */ 1337 if (port == IO_PCI_CONF_ADDR) { 1338 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS); 1339 return 0; 1340 } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) { 1341 Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS)); 1342 if (pciConfigAddr & 0x80000000) { 1343 pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) | 1344 (port & 0x3)); 1345 } else { 1346 pAddr = X86ISA::x86IOAddress(port); 1347 } 1348 } else { 1349 pAddr = X86ISA::x86IOAddress(port); 1350 } 1351 1352 const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq); 1353 // Temporarily lock and migrate to the device event queue to 1354 // prevent races in multi-core mode. 1355 EventQueue::ScopedMigration migrate(deviceEventQueue()); 1356 for (int i = 0; i < count; ++i) {
| 1/* 2 * Copyright (c) 2013 Andreas Sandberg 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Andreas Sandberg 29 */ 30 31#include "cpu/kvm/x86_cpu.hh" 32 33#include <linux/kvm.h> 34 35#include <algorithm> 36#include <cerrno> 37#include <memory> 38 39#include "arch/registers.hh" 40#include "arch/x86/cpuid.hh" 41#include "arch/x86/regs/msr.hh" 42#include "arch/x86/utility.hh" 43#include "cpu/kvm/base.hh" 44#include "debug/Drain.hh" 45#include "debug/Kvm.hh" 46#include "debug/KvmContext.hh" 47#include "debug/KvmIO.hh" 48#include "debug/KvmInt.hh" 49 50using namespace X86ISA; 51 52#define MSR_TSC 0x10 53 54#define IO_PCI_CONF_ADDR 0xCF8 55#define IO_PCI_CONF_DATA_BASE 0xCFC 56 57// Task segment type of an inactive 32-bit or 64-bit task 58#define SEG_SYS_TYPE_TSS_AVAILABLE 9 59// Task segment type of an active 32-bit or 64-bit task 60#define SEG_SYS_TYPE_TSS_BUSY 11 61 62// Non-conforming accessed code segment 63#define SEG_CS_TYPE_ACCESSED 9 64// Non-conforming accessed code segment that can be read 65#define SEG_CS_TYPE_READ_ACCESSED 11 66 67// The lowest bit of the type field for normal segments (code and 68// data) is used to indicate that a segment has been accessed. 69#define SEG_TYPE_BIT_ACCESSED 1 70 71struct FXSave 72{ 73 uint16_t fcw; 74 uint16_t fsw; 75 uint8_t ftwx; 76 uint8_t pad0; 77 uint16_t last_opcode; 78 union { 79 struct { 80 uint32_t fpu_ip; 81 uint16_t fpu_cs; 82 uint16_t pad1; 83 uint32_t fpu_dp; 84 uint16_t fpu_ds; 85 uint16_t pad2; 86 } ctrl32; 87 88 struct { 89 uint64_t fpu_ip; 90 uint64_t fpu_dp; 91 } ctrl64; 92 }; 93 uint32_t mxcsr; 94 uint32_t mxcsr_mask; 95 96 uint8_t fpr[8][16]; 97 uint8_t xmm[16][16]; 98 99 uint64_t reserved[12]; 100} M5_ATTR_PACKED; 101 102static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave"); 103 104#define FOREACH_IREG() \ 105 do { \ 106 APPLY_IREG(rax, INTREG_RAX); \ 107 APPLY_IREG(rbx, INTREG_RBX); \ 108 APPLY_IREG(rcx, INTREG_RCX); \ 109 APPLY_IREG(rdx, INTREG_RDX); \ 110 APPLY_IREG(rsi, INTREG_RSI); \ 111 APPLY_IREG(rdi, INTREG_RDI); \ 112 APPLY_IREG(rsp, INTREG_RSP); \ 113 APPLY_IREG(rbp, INTREG_RBP); \ 114 APPLY_IREG(r8, INTREG_R8); \ 115 APPLY_IREG(r9, INTREG_R9); \ 116 APPLY_IREG(r10, INTREG_R10); \ 117 APPLY_IREG(r11, INTREG_R11); \ 118 APPLY_IREG(r12, INTREG_R12); \ 119 APPLY_IREG(r13, INTREG_R13); \ 120 APPLY_IREG(r14, INTREG_R14); \ 121 APPLY_IREG(r15, INTREG_R15); \ 122 } while (0) 123 124#define FOREACH_SREG() \ 125 do { \ 126 APPLY_SREG(cr0, MISCREG_CR0); \ 127 APPLY_SREG(cr2, MISCREG_CR2); \ 128 APPLY_SREG(cr3, MISCREG_CR3); \ 129 APPLY_SREG(cr4, MISCREG_CR4); \ 130 APPLY_SREG(cr8, MISCREG_CR8); \ 131 APPLY_SREG(efer, MISCREG_EFER); \ 132 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \ 133 } while (0) 134 135#define FOREACH_DREG() \ 136 do { \ 137 APPLY_DREG(db[0], MISCREG_DR0); \ 138 APPLY_DREG(db[1], MISCREG_DR1); \ 139 APPLY_DREG(db[2], MISCREG_DR2); \ 140 APPLY_DREG(db[3], MISCREG_DR3); \ 141 APPLY_DREG(dr6, MISCREG_DR6); \ 142 APPLY_DREG(dr7, MISCREG_DR7); \ 143 } while (0) 144 145#define FOREACH_SEGMENT() \ 146 do { \ 147 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \ 148 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \ 149 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \ 150 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \ 151 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \ 152 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \ 153 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \ 154 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \ 155 } while (0) 156 157#define FOREACH_DTABLE() \ 158 do { \ 159 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \ 160 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \ 161 } while (0) 162 163template<typename STRUCT, typename ENTRY> 164static STRUCT *newVarStruct(size_t entries) 165{ 166 return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY)); 167} 168 169static void 170dumpKvm(const struct kvm_regs ®s) 171{ 172 inform("KVM register state:\n"); 173 174#define APPLY_IREG(kreg, mreg) \ 175 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 176 177 FOREACH_IREG(); 178 179#undef APPLY_IREG 180 181 inform("\trip: 0x%llx\n", regs.rip); 182 inform("\trflags: 0x%llx\n", regs.rflags); 183} 184 185static void 186dumpKvm(const char *reg_name, const struct kvm_segment &seg) 187{ 188 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n" 189 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n", 190 reg_name, 191 seg.base, seg.limit, seg.selector, seg.type, 192 seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable); 193} 194 195static void 196dumpKvm(const char *reg_name, const struct kvm_dtable &dtable) 197{ 198 inform("\t%s: @0x%llx+%x\n", 199 reg_name, dtable.base, dtable.limit); 200} 201 202static void 203dumpKvm(const struct kvm_sregs &sregs) 204{ 205#define APPLY_SREG(kreg, mreg) \ 206 inform("\t" # kreg ": 0x%llx\n", sregs.kreg); 207#define APPLY_SEGMENT(kreg, idx) \ 208 dumpKvm(# kreg, sregs.kreg); 209#define APPLY_DTABLE(kreg, idx) \ 210 dumpKvm(# kreg, sregs.kreg); 211 212 inform("Special registers:\n"); 213 FOREACH_SEGMENT(); 214 FOREACH_SREG(); 215 FOREACH_DTABLE(); 216 217 inform("Interrupt Bitmap:"); 218 for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64) 219 inform(" 0x%.8x", sregs.interrupt_bitmap[i / 64]); 220 221#undef APPLY_SREG 222#undef APPLY_SEGMENT 223#undef APPLY_DTABLE 224} 225 226#ifdef KVM_GET_DEBUGREGS 227static void 228dumpKvm(const struct kvm_debugregs ®s) 229{ 230 inform("KVM debug state:\n"); 231 232#define APPLY_DREG(kreg, mreg) \ 233 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 234 235 FOREACH_DREG(); 236 237#undef APPLY_DREG 238 239 inform("\tflags: 0x%llx\n", regs.flags); 240} 241#endif 242 243static void 244dumpFpuSpec(const struct FXSave &xs) 245{ 246 inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip); 247 inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp); 248 inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask); 249} 250 251static void 252dumpFpuSpec(const struct kvm_fpu &fpu) 253{ 254 inform("\tlast_ip: 0x%x\n", fpu.last_ip); 255 inform("\tlast_dp: 0x%x\n", fpu.last_dp); 256} 257 258template<typename T> 259static void 260dumpFpuCommon(const T &fpu) 261{ 262 const unsigned top((fpu.fsw >> 11) & 0x7); 263 inform("\tfcw: 0x%x\n", fpu.fcw); 264 265 inform("\tfsw: 0x%x (top: %i, " 266 "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n", 267 fpu.fsw, top, 268 269 (fpu.fsw & CC0Bit) ? "C0" : "", 270 (fpu.fsw & CC1Bit) ? "C1" : "", 271 (fpu.fsw & CC2Bit) ? "C2" : "", 272 (fpu.fsw & CC3Bit) ? "C3" : "", 273 274 (fpu.fsw & IEBit) ? "I" : "", 275 (fpu.fsw & DEBit) ? "D" : "", 276 (fpu.fsw & ZEBit) ? "Z" : "", 277 (fpu.fsw & OEBit) ? "O" : "", 278 (fpu.fsw & UEBit) ? "U" : "", 279 (fpu.fsw & PEBit) ? "P" : "", 280 281 (fpu.fsw & StackFaultBit) ? "SF " : "", 282 (fpu.fsw & ErrSummaryBit) ? "ES " : "", 283 (fpu.fsw & BusyBit) ? "BUSY " : "" 284 ); 285 inform("\tftwx: 0x%x\n", fpu.ftwx); 286 inform("\tlast_opcode: 0x%x\n", fpu.last_opcode); 287 dumpFpuSpec(fpu); 288 inform("\tmxcsr: 0x%x\n", fpu.mxcsr); 289 inform("\tFP Stack:\n"); 290 for (int i = 0; i < 8; ++i) { 291 const unsigned reg_idx((i + top) & 0x7); 292 const bool empty(!((fpu.ftwx >> reg_idx) & 0x1)); 293 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 294 char hex[33]; 295 for (int j = 0; j < 10; ++j) 296 snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]); 297 inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx, 298 hex, value, empty ? " (e)" : ""); 299 } 300 inform("\tXMM registers:\n"); 301 for (int i = 0; i < 16; ++i) { 302 char hex[33]; 303 for (int j = 0; j < 16; ++j) 304 snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]); 305 inform("\t\t%i: 0x%s\n", i, hex); 306 } 307} 308 309static void 310dumpKvm(const struct kvm_fpu &fpu) 311{ 312 inform("FPU registers:\n"); 313 dumpFpuCommon(fpu); 314} 315 316static void 317dumpKvm(const struct kvm_xsave &xsave) 318{ 319 inform("FPU registers (XSave):\n"); 320 dumpFpuCommon(*(FXSave *)xsave.region); 321} 322 323static void 324dumpKvm(const struct kvm_msrs &msrs) 325{ 326 inform("MSRs:\n"); 327 328 for (int i = 0; i < msrs.nmsrs; ++i) { 329 const struct kvm_msr_entry &e(msrs.entries[i]); 330 331 inform("\t0x%x: 0x%x\n", e.index, e.data); 332 } 333} 334 335static void 336dumpKvm(const struct kvm_xcrs ®s) 337{ 338 inform("KVM XCR registers:\n"); 339 340 inform("\tFlags: 0x%x\n", regs.flags); 341 for (int i = 0; i < regs.nr_xcrs; ++i) { 342 inform("\tXCR[0x%x]: 0x%x\n", 343 regs.xcrs[i].xcr, 344 regs.xcrs[i].value); 345 } 346} 347 348static void 349dumpKvm(const struct kvm_vcpu_events &events) 350{ 351 inform("vCPU events:\n"); 352 353 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n", 354 events.exception.injected, events.exception.nr, 355 events.exception.has_error_code, events.exception.error_code); 356 357 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n", 358 events.interrupt.injected, events.interrupt.nr, 359 events.interrupt.soft); 360 361 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n", 362 events.nmi.injected, events.nmi.pending, 363 events.nmi.masked); 364 365 inform("\tSIPI vector: 0x%x\n", events.sipi_vector); 366 inform("\tFlags: 0x%x\n", events.flags); 367} 368 369static bool 370isCanonicalAddress(uint64_t addr) 371{ 372 // x86-64 doesn't currently use the full 64-bit virtual address 373 // space, instead it uses signed 48 bit addresses that are 374 // sign-extended to 64 bits. Such addresses are known as 375 // "canonical". 376 uint64_t upper_half(addr & 0xffff800000000000ULL); 377 return upper_half == 0 || upper_half == 0xffff800000000000; 378} 379 380static void 381checkSeg(const char *name, const int idx, const struct kvm_segment &seg, 382 struct kvm_sregs sregs) 383{ 384 // Check the register base 385 switch (idx) { 386 case MISCREG_TSL: 387 case MISCREG_TR: 388 case MISCREG_FS: 389 case MISCREG_GS: 390 if (!isCanonicalAddress(seg.base)) 391 warn("Illegal %s base: 0x%x\n", name, seg.base); 392 break; 393 394 case MISCREG_SS: 395 case MISCREG_DS: 396 case MISCREG_ES: 397 if (seg.unusable) 398 break; 399 M5_FALLTHROUGH; 400 case MISCREG_CS: 401 if (seg.base & 0xffffffff00000000ULL) 402 warn("Illegal %s base: 0x%x\n", name, seg.base); 403 break; 404 } 405 406 // Check the type 407 switch (idx) { 408 case MISCREG_CS: 409 switch (seg.type) { 410 case 3: 411 if (seg.dpl != 0) 412 warn("CS type is 3 but dpl != 0.\n"); 413 break; 414 case 9: 415 case 11: 416 if (seg.dpl != sregs.ss.dpl) 417 warn("CS type is %i but CS DPL != SS DPL\n", seg.type); 418 break; 419 case 13: 420 case 15: 421 if (seg.dpl > sregs.ss.dpl) 422 warn("CS type is %i but CS DPL > SS DPL\n", seg.type); 423 break; 424 default: 425 warn("Illegal CS type: %i\n", seg.type); 426 break; 427 } 428 break; 429 430 case MISCREG_SS: 431 if (seg.unusable) 432 break; 433 switch (seg.type) { 434 case 3: 435 if (sregs.cs.type == 3 && seg.dpl != 0) 436 warn("CS type is 3, but SS DPL is != 0.\n"); 437 M5_FALLTHROUGH; 438 case 7: 439 if (!(sregs.cr0 & 1) && seg.dpl != 0) 440 warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl); 441 break; 442 default: 443 warn("Illegal SS type: %i\n", seg.type); 444 break; 445 } 446 break; 447 448 case MISCREG_DS: 449 case MISCREG_ES: 450 case MISCREG_FS: 451 case MISCREG_GS: 452 if (seg.unusable) 453 break; 454 if (!(seg.type & 0x1) || 455 ((seg.type & 0x8) && !(seg.type & 0x2))) 456 warn("%s has an illegal type field: %i\n", name, seg.type); 457 break; 458 459 case MISCREG_TR: 460 // TODO: We should check the CPU mode 461 if (seg.type != 3 && seg.type != 11) 462 warn("%s: Illegal segment type (%i)\n", name, seg.type); 463 break; 464 465 case MISCREG_TSL: 466 if (seg.unusable) 467 break; 468 if (seg.type != 2) 469 warn("%s: Illegal segment type (%i)\n", name, seg.type); 470 break; 471 } 472 473 switch (idx) { 474 case MISCREG_SS: 475 case MISCREG_DS: 476 case MISCREG_ES: 477 case MISCREG_FS: 478 case MISCREG_GS: 479 if (seg.unusable) 480 break; 481 M5_FALLTHROUGH; 482 case MISCREG_CS: 483 if (!seg.s) 484 warn("%s: S flag not set\n", name); 485 break; 486 487 case MISCREG_TSL: 488 if (seg.unusable) 489 break; 490 M5_FALLTHROUGH; 491 case MISCREG_TR: 492 if (seg.s) 493 warn("%s: S flag is set\n", name); 494 break; 495 } 496 497 switch (idx) { 498 case MISCREG_SS: 499 case MISCREG_DS: 500 case MISCREG_ES: 501 case MISCREG_FS: 502 case MISCREG_GS: 503 case MISCREG_TSL: 504 if (seg.unusable) 505 break; 506 M5_FALLTHROUGH; 507 case MISCREG_TR: 508 case MISCREG_CS: 509 if (!seg.present) 510 warn("%s: P flag not set\n", name); 511 512 if (((seg.limit & 0xFFF) == 0 && seg.g) || 513 ((seg.limit & 0xFFF00000) != 0 && !seg.g)) { 514 warn("%s limit (0x%x) and g (%i) combination is illegal.\n", 515 name, seg.limit, seg.g); 516 } 517 break; 518 } 519 520 // TODO: Check CS DB 521} 522 523X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params) 524 : BaseKvmCPU(params), 525 useXSave(params->useXSave) 526{ 527 Kvm &kvm(*vm.kvm); 528 529 if (!kvm.capSetTSSAddress()) 530 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n"); 531 if (!kvm.capExtendedCPUID()) 532 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n"); 533 if (!kvm.capUserNMI()) 534 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n"); 535 if (!kvm.capVCPUEvents()) 536 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n"); 537 538 haveDebugRegs = kvm.capDebugRegs(); 539 haveXSave = kvm.capXSave(); 540 haveXCRs = kvm.capXCRs(); 541 542 if (useXSave && !haveXSave) { 543 warn("KVM: XSAVE not supported by host. MXCSR synchronization might be " 544 "unreliable due to kernel bugs.\n"); 545 useXSave = false; 546 } else if (!useXSave) { 547 warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n"); 548 } 549} 550 551X86KvmCPU::~X86KvmCPU() 552{ 553} 554 555void 556X86KvmCPU::startup() 557{ 558 BaseKvmCPU::startup(); 559 560 updateCPUID(); 561 562 // TODO: Do we need to create an identity mapped TSS area? We 563 // should call kvm.vm.setTSSAddress() here in that case. It should 564 // only be needed for old versions of the virtualization 565 // extensions. We should make sure that the identity range is 566 // reserved in the e820 memory map in that case. 567} 568 569void 570X86KvmCPU::dump() const 571{ 572 dumpIntRegs(); 573 if (useXSave) 574 dumpXSave(); 575 else 576 dumpFpuRegs(); 577 dumpSpecRegs(); 578 dumpDebugRegs(); 579 dumpXCRs(); 580 dumpVCpuEvents(); 581 dumpMSRs(); 582} 583 584void 585X86KvmCPU::dumpFpuRegs() const 586{ 587 struct kvm_fpu fpu; 588 getFPUState(fpu); 589 dumpKvm(fpu); 590} 591 592void 593X86KvmCPU::dumpIntRegs() const 594{ 595 struct kvm_regs regs; 596 getRegisters(regs); 597 dumpKvm(regs); 598} 599 600void 601X86KvmCPU::dumpSpecRegs() const 602{ 603 struct kvm_sregs sregs; 604 getSpecialRegisters(sregs); 605 dumpKvm(sregs); 606} 607 608void 609X86KvmCPU::dumpDebugRegs() const 610{ 611 if (haveDebugRegs) { 612#ifdef KVM_GET_DEBUGREGS 613 struct kvm_debugregs dregs; 614 getDebugRegisters(dregs); 615 dumpKvm(dregs); 616#endif 617 } else { 618 inform("Debug registers not supported by kernel.\n"); 619 } 620} 621 622void 623X86KvmCPU::dumpXCRs() const 624{ 625 if (haveXCRs) { 626 struct kvm_xcrs xcrs; 627 getXCRs(xcrs); 628 dumpKvm(xcrs); 629 } else { 630 inform("XCRs not supported by kernel.\n"); 631 } 632} 633 634void 635X86KvmCPU::dumpXSave() const 636{ 637 if (haveXSave) { 638 struct kvm_xsave xsave; 639 getXSave(xsave); 640 dumpKvm(xsave); 641 } else { 642 inform("XSave not supported by kernel.\n"); 643 } 644} 645 646void 647X86KvmCPU::dumpVCpuEvents() const 648{ 649 struct kvm_vcpu_events events; 650 getVCpuEvents(events); 651 dumpKvm(events); 652} 653 654void 655X86KvmCPU::dumpMSRs() const 656{ 657 const Kvm::MSRIndexVector &supported_msrs(vm.kvm->getSupportedMSRs()); 658 std::unique_ptr<struct kvm_msrs> msrs( 659 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>( 660 supported_msrs.size())); 661 662 msrs->nmsrs = supported_msrs.size(); 663 for (int i = 0; i < supported_msrs.size(); ++i) { 664 struct kvm_msr_entry &e(msrs->entries[i]); 665 e.index = supported_msrs[i]; 666 e.reserved = 0; 667 e.data = 0; 668 } 669 getMSRs(*msrs.get()); 670 671 dumpKvm(*msrs.get()); 672} 673 674void 675X86KvmCPU::updateKvmState() 676{ 677 updateKvmStateRegs(); 678 updateKvmStateSRegs(); 679 updateKvmStateFPU(); 680 updateKvmStateMSRs(); 681 682 DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); 683 if (DTRACE(KvmContext)) 684 dump(); 685} 686 687void 688X86KvmCPU::updateKvmStateRegs() 689{ 690 struct kvm_regs regs; 691 692#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg) 693 FOREACH_IREG(); 694#undef APPLY_IREG 695 696 regs.rip = tc->instAddr() - tc->readMiscReg(MISCREG_CS_BASE); 697 698 /* You might think that setting regs.rflags to the contents 699 * MISCREG_RFLAGS here would suffice. In that case you're 700 * mistaken. We need to reconstruct it from a bunch of ucode 701 * registers and wave a dead chicken over it (aka mask out and set 702 * reserved bits) to get it to work. 703 */ 704 regs.rflags = X86ISA::getRFlags(tc); 705 706 setRegisters(regs); 707} 708 709static inline void 710setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg, 711 const int index) 712{ 713 SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index))); 714 715 kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 716 kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 717 kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index)); 718 kvm_seg.type = attr.type; 719 kvm_seg.present = attr.present; 720 kvm_seg.dpl = attr.dpl; 721 kvm_seg.db = attr.defaultSize; 722 kvm_seg.s = attr.system; 723 kvm_seg.l = attr.longMode; 724 kvm_seg.g = attr.granularity; 725 kvm_seg.avl = attr.avl; 726 727 // A segment is normally unusable when the selector is zero. There 728 // is a attr.unusable flag in gem5, but it seems unused. qemu 729 // seems to set this to 0 all the time, so we just do the same and 730 // hope for the best. 731 kvm_seg.unusable = 0; 732} 733 734static inline void 735setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable, 736 const int index) 737{ 738 kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 739 kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 740} 741 742static void 743forceSegAccessed(struct kvm_segment &seg) 744{ 745 // Intel's VMX requires that (some) usable segments are flagged as 746 // 'accessed' (i.e., the lowest bit in the segment type is set) 747 // when entering VMX. This wouldn't necessary be the case even if 748 // gem5 did set the access bits correctly, so we force it to one 749 // in that case. 750 if (!seg.unusable) 751 seg.type |= SEG_TYPE_BIT_ACCESSED; 752} 753 754void 755X86KvmCPU::updateKvmStateSRegs() 756{ 757 struct kvm_sregs sregs; 758 759#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg) 760#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx) 761#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx) 762 763 FOREACH_SREG(); 764 FOREACH_SEGMENT(); 765 FOREACH_DTABLE(); 766 767#undef APPLY_SREG 768#undef APPLY_SEGMENT 769#undef APPLY_DTABLE 770 771 // Clear the interrupt bitmap 772 memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); 773 774 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed 775 // bit in the type field set. 776 forceSegAccessed(sregs.cs); 777 forceSegAccessed(sregs.ss); 778 forceSegAccessed(sregs.ds); 779 forceSegAccessed(sregs.es); 780 forceSegAccessed(sregs.fs); 781 forceSegAccessed(sregs.gs); 782 783 // There are currently some cases where the active task isn't 784 // marked as busy. This is illegal in VMX, so we force it to busy. 785 if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) { 786 hack("tr.type (%i) is not busy. Forcing the busy bit.\n", 787 sregs.tr.type); 788 sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY; 789 } 790 791 // VMX requires the DPL of SS and CS to be the same for 792 // non-conforming code segments. It seems like m5 doesn't set the 793 // DPL of SS correctly when taking interrupts, so we need to fix 794 // that here. 795 if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED || 796 sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) && 797 sregs.cs.dpl != sregs.ss.dpl) { 798 799 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n", 800 sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl); 801 sregs.ss.dpl = sregs.cs.dpl; 802 } 803 804 // Do checks after fixing up the state to avoid getting excessive 805 // amounts of warnings. 806 RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS)); 807 if (!rflags_nocc.vm) { 808 // Do segment verification if the CPU isn't entering virtual 809 // 8086 mode. We currently assume that unrestricted guest 810 // mode is available. 811 812#define APPLY_SEGMENT(kreg, idx) \ 813 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs) 814 815 FOREACH_SEGMENT(); 816#undef APPLY_SEGMENT 817 } 818 819 setSpecialRegisters(sregs); 820} 821 822template <typename T> 823static void 824updateKvmStateFPUCommon(ThreadContext *tc, T &fpu) 825{ 826 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 827 "Unexpected size of X86ISA::FloatRegBits"); 828 829 fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR); 830 fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW); 831 // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read 832 // with effects. 833 fpu.fsw = tc->readMiscReg(MISCREG_FSW); 834 835 uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW)); 836 fpu.ftwx = X86ISA::convX87TagsToXTags(ftw); 837 838 fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP); 839 840 const unsigned top((fpu.fsw >> 11) & 0x7); 841 for (int i = 0; i < 8; ++i) { 842 const unsigned reg_idx((i + top) & 0x7); 843 const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx))); 844 DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n", 845 reg_idx, i, value); 846 X86ISA::storeFloat80(fpu.fpr[i], value); 847 } 848 849 // TODO: We should update the MMX state 850 851 for (int i = 0; i < 16; ++i) { 852 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] = 853 tc->readFloatRegBits(FLOATREG_XMM_LOW(i)); 854 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] = 855 tc->readFloatRegBits(FLOATREG_XMM_HIGH(i)); 856 } 857} 858 859void 860X86KvmCPU::updateKvmStateFPULegacy() 861{ 862 struct kvm_fpu fpu; 863 864 // There is some padding in the FP registers, so we'd better zero 865 // the whole struct. 866 memset(&fpu, 0, sizeof(fpu)); 867 868 updateKvmStateFPUCommon(tc, fpu); 869 870 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 871 warn_once("MISCREG_FISEG is non-zero.\n"); 872 873 fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 874 875 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 876 warn_once("MISCREG_FOSEG is non-zero.\n"); 877 878 fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 879 880 setFPUState(fpu); 881} 882 883void 884X86KvmCPU::updateKvmStateFPUXSave() 885{ 886 struct kvm_xsave kxsave; 887 FXSave &xsave(*(FXSave *)kxsave.region); 888 889 // There is some padding and reserved fields in the structure, so 890 // we'd better zero the whole thing. 891 memset(&kxsave, 0, sizeof(kxsave)); 892 893 updateKvmStateFPUCommon(tc, xsave); 894 895 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 896 warn_once("MISCREG_FISEG is non-zero.\n"); 897 898 xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 899 900 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 901 warn_once("MISCREG_FOSEG is non-zero.\n"); 902 903 xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 904 905 setXSave(kxsave); 906} 907 908void 909X86KvmCPU::updateKvmStateFPU() 910{ 911 if (useXSave) 912 updateKvmStateFPUXSave(); 913 else 914 updateKvmStateFPULegacy(); 915} 916 917void 918X86KvmCPU::updateKvmStateMSRs() 919{ 920 KvmMSRVector msrs; 921 922 const Kvm::MSRIndexVector &indices(getMsrIntersection()); 923 924 for (auto it = indices.cbegin(); it != indices.cend(); ++it) { 925 struct kvm_msr_entry e; 926 927 e.index = *it; 928 e.reserved = 0; 929 e.data = tc->readMiscReg(msrMap.at(*it)); 930 DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n", 931 e.index, e.data); 932 933 msrs.push_back(e); 934 } 935 936 setMSRs(msrs); 937} 938 939void 940X86KvmCPU::updateThreadContext() 941{ 942 struct kvm_regs regs; 943 struct kvm_sregs sregs; 944 945 getRegisters(regs); 946 getSpecialRegisters(sregs); 947 948 DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n"); 949 if (DTRACE(KvmContext)) 950 dump(); 951 952 updateThreadContextRegs(regs, sregs); 953 updateThreadContextSRegs(sregs); 954 if (useXSave) { 955 struct kvm_xsave xsave; 956 getXSave(xsave); 957 958 updateThreadContextXSave(xsave); 959 } else { 960 struct kvm_fpu fpu; 961 getFPUState(fpu); 962 963 updateThreadContextFPU(fpu); 964 } 965 updateThreadContextMSRs(); 966 967 // The M5 misc reg caches some values from other 968 // registers. Writing to it with side effects causes it to be 969 // updated from its source registers. 970 tc->setMiscReg(MISCREG_M5_REG, 0); 971} 972 973void 974X86KvmCPU::updateThreadContextRegs(const struct kvm_regs ®s, 975 const struct kvm_sregs &sregs) 976{ 977#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg) 978 979 FOREACH_IREG(); 980 981#undef APPLY_IREG 982 983 tc->pcState(PCState(regs.rip + sregs.cs.base)); 984 985 // Flags are spread out across multiple semi-magic registers so we 986 // need some special care when updating them. 987 X86ISA::setRFlags(tc, regs.rflags); 988} 989 990 991inline void 992setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg, 993 const int index) 994{ 995 SegAttr attr(0); 996 997 attr.type = kvm_seg.type; 998 attr.present = kvm_seg.present; 999 attr.dpl = kvm_seg.dpl; 1000 attr.defaultSize = kvm_seg.db; 1001 attr.system = kvm_seg.s; 1002 attr.longMode = kvm_seg.l; 1003 attr.granularity = kvm_seg.g; 1004 attr.avl = kvm_seg.avl; 1005 attr.unusable = kvm_seg.unusable; 1006 1007 // We need some setMiscReg magic here to keep the effective base 1008 // addresses in sync. We need an up-to-date version of EFER, so 1009 // make sure this is called after the sregs have been synced. 1010 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base); 1011 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit); 1012 tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector); 1013 tc->setMiscReg(MISCREG_SEG_ATTR(index), attr); 1014} 1015 1016inline void 1017setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable, 1018 const int index) 1019{ 1020 // We need some setMiscReg magic here to keep the effective base 1021 // addresses in sync. We need an up-to-date version of EFER, so 1022 // make sure this is called after the sregs have been synced. 1023 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base); 1024 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit); 1025} 1026 1027void 1028X86KvmCPU::updateThreadContextSRegs(const struct kvm_sregs &sregs) 1029{ 1030 assert(getKvmRunState()->apic_base == sregs.apic_base); 1031 assert(getKvmRunState()->cr8 == sregs.cr8); 1032 1033#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg) 1034#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1035#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1036 FOREACH_SREG(); 1037 FOREACH_SEGMENT(); 1038 FOREACH_DTABLE(); 1039#undef APPLY_SREG 1040#undef APPLY_SEGMENT 1041#undef APPLY_DTABLE 1042} 1043 1044template<typename T> 1045static void 1046updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu) 1047{ 1048 const unsigned top((fpu.fsw >> 11) & 0x7); 1049 1050 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 1051 "Unexpected size of X86ISA::FloatRegBits"); 1052 1053 for (int i = 0; i < 8; ++i) { 1054 const unsigned reg_idx((i + top) & 0x7); 1055 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 1056 DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n", 1057 reg_idx, i, value); 1058 tc->setFloatReg(FLOATREG_FPR(reg_idx), value); 1059 } 1060 1061 // TODO: We should update the MMX state 1062 1063 tc->setMiscRegNoEffect(MISCREG_X87_TOP, top); 1064 tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr); 1065 tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw); 1066 tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw); 1067 1068 uint64_t ftw(convX87XTagsToTags(fpu.ftwx)); 1069 // TODO: Are these registers really the same? 1070 tc->setMiscRegNoEffect(MISCREG_FTW, ftw); 1071 tc->setMiscRegNoEffect(MISCREG_FTAG, ftw); 1072 1073 tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode); 1074 1075 for (int i = 0; i < 16; ++i) { 1076 tc->setFloatRegBits(FLOATREG_XMM_LOW(i), 1077 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]); 1078 tc->setFloatRegBits(FLOATREG_XMM_HIGH(i), 1079 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]); 1080 } 1081} 1082 1083void 1084X86KvmCPU::updateThreadContextFPU(const struct kvm_fpu &fpu) 1085{ 1086 updateThreadContextFPUCommon(tc, fpu); 1087 1088 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1089 tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip); 1090 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1091 tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp); 1092} 1093 1094void 1095X86KvmCPU::updateThreadContextXSave(const struct kvm_xsave &kxsave) 1096{ 1097 const FXSave &xsave(*(const FXSave *)kxsave.region); 1098 1099 updateThreadContextFPUCommon(tc, xsave); 1100 1101 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1102 tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip); 1103 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1104 tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp); 1105} 1106 1107void 1108X86KvmCPU::updateThreadContextMSRs() 1109{ 1110 const Kvm::MSRIndexVector &msrs(getMsrIntersection()); 1111 1112 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1113 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1114 struct kvm_msr_entry *entry; 1115 1116 // Create a list of MSRs to read 1117 kvm_msrs->nmsrs = msrs.size(); 1118 entry = &kvm_msrs->entries[0]; 1119 for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) { 1120 entry->index = *it; 1121 entry->reserved = 0; 1122 entry->data = 0; 1123 } 1124 1125 getMSRs(*kvm_msrs.get()); 1126 1127 // Update M5's state 1128 entry = &kvm_msrs->entries[0]; 1129 for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) { 1130 DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n", 1131 entry->index, entry->data); 1132 1133 tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data); 1134 } 1135} 1136 1137void 1138X86KvmCPU::deliverInterrupts() 1139{ 1140 Fault fault; 1141 1142 syncThreadContext(); 1143 1144 { 1145 // Migrate to the interrupt controller's thread to get the 1146 // interrupt. Even though the individual methods are safe to 1147 // call across threads, we might still lose interrupts unless 1148 // they are getInterrupt() and updateIntrInfo() are called 1149 // atomically. 1150 EventQueue::ScopedMigration migrate(interrupts[0]->eventQueue()); 1151 fault = interrupts[0]->getInterrupt(tc); 1152 interrupts[0]->updateIntrInfo(tc); 1153 } 1154 1155 X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get())); 1156 if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) { 1157 DPRINTF(KvmInt, "Delivering NMI\n"); 1158 kvmNonMaskableInterrupt(); 1159 } else if (dynamic_cast<InitInterrupt *>(fault.get())) { 1160 DPRINTF(KvmInt, "INIT interrupt\n"); 1161 fault.get()->invoke(tc); 1162 // Delay the kvm state update since we won't enter KVM on this 1163 // tick. 1164 threadContextDirty = true; 1165 // HACK: gem5 doesn't actually have any BIOS code, which means 1166 // that we need to halt the thread and wait for a startup 1167 // interrupt before restarting the thread. The simulated CPUs 1168 // use the same kind of hack using a microcode routine. 1169 thread->suspend(); 1170 } else if (dynamic_cast<StartupInterrupt *>(fault.get())) { 1171 DPRINTF(KvmInt, "STARTUP interrupt\n"); 1172 fault.get()->invoke(tc); 1173 // The kvm state is assumed to have been updated when entering 1174 // kvmRun(), so we need to update manually it here. 1175 updateKvmState(); 1176 } else if (x86int) { 1177 struct kvm_interrupt kvm_int; 1178 kvm_int.irq = x86int->getVector(); 1179 1180 DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n", 1181 fault->name(), kvm_int.irq); 1182 1183 kvmInterrupt(kvm_int); 1184 } else { 1185 panic("KVM: Unknown interrupt type\n"); 1186 } 1187 1188} 1189 1190Tick 1191X86KvmCPU::kvmRun(Tick ticks) 1192{ 1193 struct kvm_run &kvm_run(*getKvmRunState()); 1194 1195 if (interrupts[0]->checkInterruptsRaw()) { 1196 if (interrupts[0]->hasPendingUnmaskable()) { 1197 DPRINTF(KvmInt, 1198 "Delivering unmaskable interrupt.\n"); 1199 syncThreadContext(); 1200 deliverInterrupts(); 1201 } else if (kvm_run.ready_for_interrupt_injection) { 1202 // KVM claims that it is ready for an interrupt. It might 1203 // be lying if we just updated rflags and disabled 1204 // interrupts (e.g., by doing a CPU handover). Let's sync 1205 // the thread context and check if there are /really/ 1206 // interrupts that should be delivered now. 1207 syncThreadContext(); 1208 if (interrupts[0]->checkInterrupts(tc)) { 1209 DPRINTF(KvmInt, 1210 "M5 has pending interrupts, delivering interrupt.\n"); 1211 1212 deliverInterrupts(); 1213 } else { 1214 DPRINTF(KvmInt, 1215 "Interrupt delivery delayed due to KVM confusion.\n"); 1216 kvm_run.request_interrupt_window = 1; 1217 } 1218 } else if (!kvm_run.request_interrupt_window) { 1219 DPRINTF(KvmInt, 1220 "M5 has pending interrupts, requesting interrupt " 1221 "window.\n"); 1222 kvm_run.request_interrupt_window = 1; 1223 } 1224 } else { 1225 kvm_run.request_interrupt_window = 0; 1226 } 1227 1228 // The CPU might have been suspended as a result of the INIT 1229 // interrupt delivery hack. In that case, don't enter into KVM. 1230 if (_status == Idle) 1231 return 0; 1232 else 1233 return kvmRunWrapper(ticks); 1234} 1235 1236Tick 1237X86KvmCPU::kvmRunDrain() 1238{ 1239 struct kvm_run &kvm_run(*getKvmRunState()); 1240 1241 if (!archIsDrained()) { 1242 DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n"); 1243 1244 // Tell KVM to find a suitable place to deliver interrupts. This 1245 // should ensure that pending interrupts have been delivered and 1246 // things are reasonably consistent (i.e., no interrupts pending 1247 // in the guest). 1248 kvm_run.request_interrupt_window = 1; 1249 1250 // Limit the run to 1 millisecond. That is hopefully enough to 1251 // reach an interrupt window. Otherwise, we'll just try again 1252 // later. 1253 return kvmRunWrapper(1 * SimClock::Float::ms); 1254 } else { 1255 DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n"); 1256 1257 return kvmRunWrapper(0); 1258 } 1259} 1260 1261Tick 1262X86KvmCPU::kvmRunWrapper(Tick ticks) 1263{ 1264 struct kvm_run &kvm_run(*getKvmRunState()); 1265 1266 // Synchronize the APIC base and CR8 here since they are present 1267 // in the kvm_run struct, which makes the synchronization really 1268 // cheap. 1269 kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE); 1270 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1271 1272 const Tick run_ticks(BaseKvmCPU::kvmRun(ticks)); 1273 1274 tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base); 1275 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1276 1277 return run_ticks; 1278} 1279 1280uint64_t 1281X86KvmCPU::getHostCycles() const 1282{ 1283 return getMSR(MSR_TSC); 1284} 1285 1286void 1287X86KvmCPU::handleIOMiscReg32(int miscreg) 1288{ 1289 struct kvm_run &kvm_run(*getKvmRunState()); 1290 const uint16_t port(kvm_run.io.port); 1291 1292 assert(kvm_run.exit_reason == KVM_EXIT_IO); 1293 1294 if (kvm_run.io.size != 4) { 1295 panic("Unexpected IO size (%u) for address 0x%x.\n", 1296 kvm_run.io.size, port); 1297 } 1298 1299 if (kvm_run.io.count != 1) { 1300 panic("Unexpected IO count (%u) for address 0x%x.\n", 1301 kvm_run.io.count, port); 1302 } 1303 1304 uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset)); 1305 if (kvm_run.io.direction == KVM_EXIT_IO_OUT) 1306 tc->setMiscReg(miscreg, *data); 1307 else 1308 *data = tc->readMiscRegNoEffect(miscreg); 1309} 1310 1311Tick 1312X86KvmCPU::handleKvmExitIO() 1313{ 1314 struct kvm_run &kvm_run(*getKvmRunState()); 1315 bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT); 1316 unsigned char *guestData(getGuestData(kvm_run.io.data_offset)); 1317 Tick delay(0); 1318 uint16_t port(kvm_run.io.port); 1319 Addr pAddr; 1320 const int count(kvm_run.io.count); 1321 1322 assert(kvm_run.io.direction == KVM_EXIT_IO_IN || 1323 kvm_run.io.direction == KVM_EXIT_IO_OUT); 1324 1325 DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n", 1326 (isWrite ? "out" : "in"), kvm_run.io.port); 1327 1328 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we 1329 * don't use the TLB component, we need to intercept and handle 1330 * the PCI configuration space IO ports here. 1331 * 1332 * The IO port PCI discovery mechanism uses one address register 1333 * and one data register. We map the address register to a misc 1334 * reg and use that to re-route data register accesses to the 1335 * right location in the PCI configuration space. 1336 */ 1337 if (port == IO_PCI_CONF_ADDR) { 1338 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS); 1339 return 0; 1340 } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) { 1341 Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS)); 1342 if (pciConfigAddr & 0x80000000) { 1343 pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) | 1344 (port & 0x3)); 1345 } else { 1346 pAddr = X86ISA::x86IOAddress(port); 1347 } 1348 } else { 1349 pAddr = X86ISA::x86IOAddress(port); 1350 } 1351 1352 const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq); 1353 // Temporarily lock and migrate to the device event queue to 1354 // prevent races in multi-core mode. 1355 EventQueue::ScopedMigration migrate(deviceEventQueue()); 1356 for (int i = 0; i < count; ++i) {
|