43#include "debug/Drain.hh" 44#include "debug/Kvm.hh" 45#include "debug/KvmContext.hh" 46#include "debug/KvmIO.hh" 47#include "debug/KvmInt.hh" 48 49using namespace X86ISA; 50 51#define MSR_TSC 0x10 52 53#define IO_PCI_CONF_ADDR 0xCF8 54#define IO_PCI_CONF_DATA_BASE 0xCFC 55 56// Task segment type of an inactive 32-bit or 64-bit task 57#define SEG_SYS_TYPE_TSS_AVAILABLE 9 58// Task segment type of an active 32-bit or 64-bit task 59#define SEG_SYS_TYPE_TSS_BUSY 11 60 61// Non-conforming accessed code segment 62#define SEG_CS_TYPE_ACCESSED 9 63// Non-conforming accessed code segment that can be read 64#define SEG_CS_TYPE_READ_ACCESSED 11 65 66// The lowest bit of the type field for normal segments (code and 67// data) is used to indicate that a segment has been accessed. 68#define SEG_TYPE_BIT_ACCESSED 1 69 70struct FXSave 71{ 72 uint16_t fcw; 73 uint16_t fsw; 74 uint8_t ftwx; 75 uint8_t pad0; 76 uint16_t last_opcode; 77 union { 78 struct { 79 uint32_t fpu_ip; 80 uint16_t fpu_cs; 81 uint16_t pad1; 82 uint32_t fpu_dp; 83 uint16_t fpu_ds; 84 uint16_t pad2; 85 } ctrl32; 86 87 struct { 88 uint64_t fpu_ip; 89 uint64_t fpu_dp; 90 } ctrl64; 91 }; 92 uint32_t mxcsr; 93 uint32_t mxcsr_mask; 94 95 uint8_t fpr[8][16]; 96 uint8_t xmm[16][16]; 97 98 uint64_t reserved[12]; 99} M5_ATTR_PACKED; 100 101static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave"); 102 103#define FOREACH_IREG() \ 104 do { \ 105 APPLY_IREG(rax, INTREG_RAX); \ 106 APPLY_IREG(rbx, INTREG_RBX); \ 107 APPLY_IREG(rcx, INTREG_RCX); \ 108 APPLY_IREG(rdx, INTREG_RDX); \ 109 APPLY_IREG(rsi, INTREG_RSI); \ 110 APPLY_IREG(rdi, INTREG_RDI); \ 111 APPLY_IREG(rsp, INTREG_RSP); \ 112 APPLY_IREG(rbp, INTREG_RBP); \ 113 APPLY_IREG(r8, INTREG_R8); \ 114 APPLY_IREG(r9, INTREG_R9); \ 115 APPLY_IREG(r10, INTREG_R10); \ 116 APPLY_IREG(r11, INTREG_R11); \ 117 APPLY_IREG(r12, INTREG_R12); \ 118 APPLY_IREG(r13, INTREG_R13); \ 119 APPLY_IREG(r14, INTREG_R14); \ 120 APPLY_IREG(r15, INTREG_R15); \ 121 } while (0) 122 123#define FOREACH_SREG() \ 124 do { \ 125 APPLY_SREG(cr0, MISCREG_CR0); \ 126 APPLY_SREG(cr2, MISCREG_CR2); \ 127 APPLY_SREG(cr3, MISCREG_CR3); \ 128 APPLY_SREG(cr4, MISCREG_CR4); \ 129 APPLY_SREG(cr8, MISCREG_CR8); \ 130 APPLY_SREG(efer, MISCREG_EFER); \ 131 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \ 132 } while (0) 133 134#define FOREACH_DREG() \ 135 do { \ 136 APPLY_DREG(db[0], MISCREG_DR0); \ 137 APPLY_DREG(db[1], MISCREG_DR1); \ 138 APPLY_DREG(db[2], MISCREG_DR2); \ 139 APPLY_DREG(db[3], MISCREG_DR3); \ 140 APPLY_DREG(dr6, MISCREG_DR6); \ 141 APPLY_DREG(dr7, MISCREG_DR7); \ 142 } while (0) 143 144#define FOREACH_SEGMENT() \ 145 do { \ 146 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \ 147 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \ 148 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \ 149 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \ 150 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \ 151 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \ 152 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \ 153 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \ 154 } while (0) 155 156#define FOREACH_DTABLE() \ 157 do { \ 158 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \ 159 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \ 160 } while (0) 161 162template<typename STRUCT, typename ENTRY> 163static STRUCT *newVarStruct(size_t entries) 164{ 165 return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY)); 166} 167 168static void 169dumpKvm(const struct kvm_regs ®s) 170{ 171 inform("KVM register state:\n"); 172 173#define APPLY_IREG(kreg, mreg) \ 174 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 175 176 FOREACH_IREG(); 177 178#undef APPLY_IREG 179 180 inform("\trip: 0x%llx\n", regs.rip); 181 inform("\trflags: 0x%llx\n", regs.rflags); 182} 183 184static void 185dumpKvm(const char *reg_name, const struct kvm_segment &seg) 186{ 187 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n" 188 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n", 189 reg_name, 190 seg.base, seg.limit, seg.selector, seg.type, 191 seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable); 192} 193 194static void 195dumpKvm(const char *reg_name, const struct kvm_dtable &dtable) 196{ 197 inform("\t%s: @0x%llx+%x\n", 198 reg_name, dtable.base, dtable.limit); 199} 200 201static void 202dumpKvm(const struct kvm_sregs &sregs) 203{ 204#define APPLY_SREG(kreg, mreg) \ 205 inform("\t" # kreg ": 0x%llx\n", sregs.kreg); 206#define APPLY_SEGMENT(kreg, idx) \ 207 dumpKvm(# kreg, sregs.kreg); 208#define APPLY_DTABLE(kreg, idx) \ 209 dumpKvm(# kreg, sregs.kreg); 210 211 inform("Special registers:\n"); 212 FOREACH_SEGMENT(); 213 FOREACH_SREG(); 214 FOREACH_DTABLE(); 215 216 inform("Interrupt Bitmap:"); 217 for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64) 218 inform(" 0x%.8x", sregs.interrupt_bitmap[i / 64]); 219 220#undef APPLY_SREG 221#undef APPLY_SEGMENT 222#undef APPLY_DTABLE 223} 224 225#ifdef KVM_GET_DEBUGREGS 226static void 227dumpKvm(const struct kvm_debugregs ®s) 228{ 229 inform("KVM debug state:\n"); 230 231#define APPLY_DREG(kreg, mreg) \ 232 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 233 234 FOREACH_DREG(); 235 236#undef APPLY_DREG 237 238 inform("\tflags: 0x%llx\n", regs.flags); 239} 240#endif 241 242static void 243dumpFpuSpec(const struct FXSave &xs) 244{ 245 inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip); 246 inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp); 247 inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask); 248} 249 250static void 251dumpFpuSpec(const struct kvm_fpu &fpu) 252{ 253 inform("\tlast_ip: 0x%x\n", fpu.last_ip); 254 inform("\tlast_dp: 0x%x\n", fpu.last_dp); 255} 256 257template<typename T> 258static void 259dumpFpuCommon(const T &fpu) 260{ 261 const unsigned top((fpu.fsw >> 11) & 0x7); 262 inform("\tfcw: 0x%x\n", fpu.fcw); 263 264 inform("\tfsw: 0x%x (top: %i, " 265 "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n", 266 fpu.fsw, top, 267 268 (fpu.fsw & CC0Bit) ? "C0" : "", 269 (fpu.fsw & CC1Bit) ? "C1" : "", 270 (fpu.fsw & CC2Bit) ? "C2" : "", 271 (fpu.fsw & CC3Bit) ? "C3" : "", 272 273 (fpu.fsw & IEBit) ? "I" : "", 274 (fpu.fsw & DEBit) ? "D" : "", 275 (fpu.fsw & ZEBit) ? "Z" : "", 276 (fpu.fsw & OEBit) ? "O" : "", 277 (fpu.fsw & UEBit) ? "U" : "", 278 (fpu.fsw & PEBit) ? "P" : "", 279 280 (fpu.fsw & StackFaultBit) ? "SF " : "", 281 (fpu.fsw & ErrSummaryBit) ? "ES " : "", 282 (fpu.fsw & BusyBit) ? "BUSY " : "" 283 ); 284 inform("\tftwx: 0x%x\n", fpu.ftwx); 285 inform("\tlast_opcode: 0x%x\n", fpu.last_opcode); 286 dumpFpuSpec(fpu); 287 inform("\tmxcsr: 0x%x\n", fpu.mxcsr); 288 inform("\tFP Stack:\n"); 289 for (int i = 0; i < 8; ++i) { 290 const unsigned reg_idx((i + top) & 0x7); 291 const bool empty(!((fpu.ftwx >> reg_idx) & 0x1)); 292 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 293 char hex[33]; 294 for (int j = 0; j < 10; ++j) 295 snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]); 296 inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx, 297 hex, value, empty ? " (e)" : ""); 298 } 299 inform("\tXMM registers:\n"); 300 for (int i = 0; i < 16; ++i) { 301 char hex[33]; 302 for (int j = 0; j < 16; ++j) 303 snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]); 304 inform("\t\t%i: 0x%s\n", i, hex); 305 } 306} 307 308static void 309dumpKvm(const struct kvm_fpu &fpu) 310{ 311 inform("FPU registers:\n"); 312 dumpFpuCommon(fpu); 313} 314 315static void 316dumpKvm(const struct kvm_xsave &xsave) 317{ 318 inform("FPU registers (XSave):\n"); 319 dumpFpuCommon(*(FXSave *)xsave.region); 320} 321 322static void 323dumpKvm(const struct kvm_msrs &msrs) 324{ 325 inform("MSRs:\n"); 326 327 for (int i = 0; i < msrs.nmsrs; ++i) { 328 const struct kvm_msr_entry &e(msrs.entries[i]); 329 330 inform("\t0x%x: 0x%x\n", e.index, e.data); 331 } 332} 333 334static void 335dumpKvm(const struct kvm_xcrs ®s) 336{ 337 inform("KVM XCR registers:\n"); 338 339 inform("\tFlags: 0x%x\n", regs.flags); 340 for (int i = 0; i < regs.nr_xcrs; ++i) { 341 inform("\tXCR[0x%x]: 0x%x\n", 342 regs.xcrs[i].xcr, 343 regs.xcrs[i].value); 344 } 345} 346 347static void 348dumpKvm(const struct kvm_vcpu_events &events) 349{ 350 inform("vCPU events:\n"); 351 352 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n", 353 events.exception.injected, events.exception.nr, 354 events.exception.has_error_code, events.exception.error_code); 355 356 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n", 357 events.interrupt.injected, events.interrupt.nr, 358 events.interrupt.soft); 359 360 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n", 361 events.nmi.injected, events.nmi.pending, 362 events.nmi.masked); 363 364 inform("\tSIPI vector: 0x%x\n", events.sipi_vector); 365 inform("\tFlags: 0x%x\n", events.flags); 366} 367 368static bool 369isCanonicalAddress(uint64_t addr) 370{ 371 // x86-64 doesn't currently use the full 64-bit virtual address 372 // space, instead it uses signed 48 bit addresses that are 373 // sign-extended to 64 bits. Such addresses are known as 374 // "canonical". 375 uint64_t upper_half(addr & 0xffff800000000000ULL); 376 return upper_half == 0 || upper_half == 0xffff800000000000; 377} 378 379static void 380checkSeg(const char *name, const int idx, const struct kvm_segment &seg, 381 struct kvm_sregs sregs) 382{ 383 // Check the register base 384 switch (idx) { 385 case MISCREG_TSL: 386 case MISCREG_TR: 387 case MISCREG_FS: 388 case MISCREG_GS: 389 if (!isCanonicalAddress(seg.base)) 390 warn("Illegal %s base: 0x%x\n", name, seg.base); 391 break; 392 393 case MISCREG_SS: 394 case MISCREG_DS: 395 case MISCREG_ES: 396 if (seg.unusable) 397 break; 398 case MISCREG_CS: 399 if (seg.base & 0xffffffff00000000ULL) 400 warn("Illegal %s base: 0x%x\n", name, seg.base); 401 break; 402 } 403 404 // Check the type 405 switch (idx) { 406 case MISCREG_CS: 407 switch (seg.type) { 408 case 3: 409 if (seg.dpl != 0) 410 warn("CS type is 3 but dpl != 0.\n"); 411 break; 412 case 9: 413 case 11: 414 if (seg.dpl != sregs.ss.dpl) 415 warn("CS type is %i but CS DPL != SS DPL\n", seg.type); 416 break; 417 case 13: 418 case 15: 419 if (seg.dpl > sregs.ss.dpl) 420 warn("CS type is %i but CS DPL > SS DPL\n", seg.type); 421 break; 422 default: 423 warn("Illegal CS type: %i\n", seg.type); 424 break; 425 } 426 break; 427 428 case MISCREG_SS: 429 if (seg.unusable) 430 break; 431 switch (seg.type) { 432 case 3: 433 if (sregs.cs.type == 3 && seg.dpl != 0) 434 warn("CS type is 3, but SS DPL is != 0.\n"); 435 /* FALLTHROUGH */ 436 case 7: 437 if (!(sregs.cr0 & 1) && seg.dpl != 0) 438 warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl); 439 break; 440 default: 441 warn("Illegal SS type: %i\n", seg.type); 442 break; 443 } 444 break; 445 446 case MISCREG_DS: 447 case MISCREG_ES: 448 case MISCREG_FS: 449 case MISCREG_GS: 450 if (seg.unusable) 451 break; 452 if (!(seg.type & 0x1) || 453 ((seg.type & 0x8) && !(seg.type & 0x2))) 454 warn("%s has an illegal type field: %i\n", name, seg.type); 455 break; 456 457 case MISCREG_TR: 458 // TODO: We should check the CPU mode 459 if (seg.type != 3 && seg.type != 11) 460 warn("%s: Illegal segment type (%i)\n", name, seg.type); 461 break; 462 463 case MISCREG_TSL: 464 if (seg.unusable) 465 break; 466 if (seg.type != 2) 467 warn("%s: Illegal segment type (%i)\n", name, seg.type); 468 break; 469 } 470 471 switch (idx) { 472 case MISCREG_SS: 473 case MISCREG_DS: 474 case MISCREG_ES: 475 case MISCREG_FS: 476 case MISCREG_GS: 477 if (seg.unusable) 478 break; 479 case MISCREG_CS: 480 if (!seg.s) 481 warn("%s: S flag not set\n", name); 482 break; 483 484 case MISCREG_TSL: 485 if (seg.unusable) 486 break; 487 case MISCREG_TR: 488 if (seg.s) 489 warn("%s: S flag is set\n", name); 490 break; 491 } 492 493 switch (idx) { 494 case MISCREG_SS: 495 case MISCREG_DS: 496 case MISCREG_ES: 497 case MISCREG_FS: 498 case MISCREG_GS: 499 case MISCREG_TSL: 500 if (seg.unusable) 501 break; 502 case MISCREG_TR: 503 case MISCREG_CS: 504 if (!seg.present) 505 warn("%s: P flag not set\n", name); 506 507 if (((seg.limit & 0xFFF) == 0 && seg.g) || 508 ((seg.limit & 0xFFF00000) != 0 && !seg.g)) { 509 warn("%s limit (0x%x) and g (%i) combination is illegal.\n", 510 name, seg.limit, seg.g); 511 } 512 break; 513 } 514 515 // TODO: Check CS DB 516} 517 518X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params) 519 : BaseKvmCPU(params), 520 useXSave(params->useXSave) 521{ 522 Kvm &kvm(*vm.kvm); 523 524 if (!kvm.capSetTSSAddress()) 525 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n"); 526 if (!kvm.capExtendedCPUID()) 527 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n"); 528 if (!kvm.capUserNMI()) 529 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n"); 530 if (!kvm.capVCPUEvents()) 531 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n"); 532 533 haveDebugRegs = kvm.capDebugRegs(); 534 haveXSave = kvm.capXSave(); 535 haveXCRs = kvm.capXCRs(); 536 537 if (useXSave && !haveXSave) { 538 warn("KVM: XSAVE not supported by host. MXCSR synchronization might be " 539 "unreliable due to kernel bugs.\n"); 540 useXSave = false; 541 } else if (!useXSave) { 542 warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n"); 543 } 544} 545 546X86KvmCPU::~X86KvmCPU() 547{ 548} 549 550void 551X86KvmCPU::startup() 552{ 553 BaseKvmCPU::startup(); 554 555 updateCPUID(); 556 557 // TODO: Do we need to create an identity mapped TSS area? We 558 // should call kvm.vm.setTSSAddress() here in that case. It should 559 // only be needed for old versions of the virtualization 560 // extensions. We should make sure that the identity range is 561 // reserved in the e820 memory map in that case. 562} 563 564void 565X86KvmCPU::dump() const 566{ 567 dumpIntRegs(); 568 if (useXSave) 569 dumpXSave(); 570 else 571 dumpFpuRegs(); 572 dumpSpecRegs(); 573 dumpDebugRegs(); 574 dumpXCRs(); 575 dumpVCpuEvents(); 576 dumpMSRs(); 577} 578 579void 580X86KvmCPU::dumpFpuRegs() const 581{ 582 struct kvm_fpu fpu; 583 getFPUState(fpu); 584 dumpKvm(fpu); 585} 586 587void 588X86KvmCPU::dumpIntRegs() const 589{ 590 struct kvm_regs regs; 591 getRegisters(regs); 592 dumpKvm(regs); 593} 594 595void 596X86KvmCPU::dumpSpecRegs() const 597{ 598 struct kvm_sregs sregs; 599 getSpecialRegisters(sregs); 600 dumpKvm(sregs); 601} 602 603void 604X86KvmCPU::dumpDebugRegs() const 605{ 606 if (haveDebugRegs) { 607#ifdef KVM_GET_DEBUGREGS 608 struct kvm_debugregs dregs; 609 getDebugRegisters(dregs); 610 dumpKvm(dregs); 611#endif 612 } else { 613 inform("Debug registers not supported by kernel.\n"); 614 } 615} 616 617void 618X86KvmCPU::dumpXCRs() const 619{ 620 if (haveXCRs) { 621 struct kvm_xcrs xcrs; 622 getXCRs(xcrs); 623 dumpKvm(xcrs); 624 } else { 625 inform("XCRs not supported by kernel.\n"); 626 } 627} 628 629void 630X86KvmCPU::dumpXSave() const 631{ 632 if (haveXSave) { 633 struct kvm_xsave xsave; 634 getXSave(xsave); 635 dumpKvm(xsave); 636 } else { 637 inform("XSave not supported by kernel.\n"); 638 } 639} 640 641void 642X86KvmCPU::dumpVCpuEvents() const 643{ 644 struct kvm_vcpu_events events; 645 getVCpuEvents(events); 646 dumpKvm(events); 647} 648 649void 650X86KvmCPU::dumpMSRs() const 651{ 652 const Kvm::MSRIndexVector &supported_msrs(vm.kvm->getSupportedMSRs()); 653 std::unique_ptr<struct kvm_msrs> msrs( 654 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>( 655 supported_msrs.size())); 656 657 msrs->nmsrs = supported_msrs.size(); 658 for (int i = 0; i < supported_msrs.size(); ++i) { 659 struct kvm_msr_entry &e(msrs->entries[i]); 660 e.index = supported_msrs[i]; 661 e.reserved = 0; 662 e.data = 0; 663 } 664 getMSRs(*msrs.get()); 665 666 dumpKvm(*msrs.get()); 667} 668 669void 670X86KvmCPU::updateKvmState() 671{ 672 updateKvmStateRegs(); 673 updateKvmStateSRegs(); 674 updateKvmStateFPU(); 675 updateKvmStateMSRs(); 676 677 DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); 678 if (DTRACE(KvmContext)) 679 dump(); 680} 681 682void 683X86KvmCPU::updateKvmStateRegs() 684{ 685 struct kvm_regs regs; 686 687#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg) 688 FOREACH_IREG(); 689#undef APPLY_IREG 690 691 regs.rip = tc->instAddr() - tc->readMiscReg(MISCREG_CS_BASE); 692 693 /* You might think that setting regs.rflags to the contents 694 * MISCREG_RFLAGS here would suffice. In that case you're 695 * mistaken. We need to reconstruct it from a bunch of ucode 696 * registers and wave a dead chicken over it (aka mask out and set 697 * reserved bits) to get it to work. 698 */ 699 regs.rflags = X86ISA::getRFlags(tc); 700 701 setRegisters(regs); 702} 703 704static inline void 705setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg, 706 const int index) 707{ 708 SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index))); 709 710 kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 711 kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 712 kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index)); 713 kvm_seg.type = attr.type; 714 kvm_seg.present = attr.present; 715 kvm_seg.dpl = attr.dpl; 716 kvm_seg.db = attr.defaultSize; 717 kvm_seg.s = attr.system; 718 kvm_seg.l = attr.longMode; 719 kvm_seg.g = attr.granularity; 720 kvm_seg.avl = attr.avl; 721 722 // A segment is normally unusable when the selector is zero. There 723 // is a attr.unusable flag in gem5, but it seems unused. qemu 724 // seems to set this to 0 all the time, so we just do the same and 725 // hope for the best. 726 kvm_seg.unusable = 0; 727} 728 729static inline void 730setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable, 731 const int index) 732{ 733 kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 734 kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 735} 736 737static void 738forceSegAccessed(struct kvm_segment &seg) 739{ 740 // Intel's VMX requires that (some) usable segments are flagged as 741 // 'accessed' (i.e., the lowest bit in the segment type is set) 742 // when entering VMX. This wouldn't necessary be the case even if 743 // gem5 did set the access bits correctly, so we force it to one 744 // in that case. 745 if (!seg.unusable) 746 seg.type |= SEG_TYPE_BIT_ACCESSED; 747} 748 749void 750X86KvmCPU::updateKvmStateSRegs() 751{ 752 struct kvm_sregs sregs; 753 754#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg) 755#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx) 756#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx) 757 758 FOREACH_SREG(); 759 FOREACH_SEGMENT(); 760 FOREACH_DTABLE(); 761 762#undef APPLY_SREG 763#undef APPLY_SEGMENT 764#undef APPLY_DTABLE 765 766 // Clear the interrupt bitmap 767 memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); 768 769 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed 770 // bit in the type field set. 771 forceSegAccessed(sregs.cs); 772 forceSegAccessed(sregs.ss); 773 forceSegAccessed(sregs.ds); 774 forceSegAccessed(sregs.es); 775 forceSegAccessed(sregs.fs); 776 forceSegAccessed(sregs.gs); 777 778 // There are currently some cases where the active task isn't 779 // marked as busy. This is illegal in VMX, so we force it to busy. 780 if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) { 781 hack("tr.type (%i) is not busy. Forcing the busy bit.\n", 782 sregs.tr.type); 783 sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY; 784 } 785 786 // VMX requires the DPL of SS and CS to be the same for 787 // non-conforming code segments. It seems like m5 doesn't set the 788 // DPL of SS correctly when taking interrupts, so we need to fix 789 // that here. 790 if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED || 791 sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) && 792 sregs.cs.dpl != sregs.ss.dpl) { 793 794 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n", 795 sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl); 796 sregs.ss.dpl = sregs.cs.dpl; 797 } 798 799 // Do checks after fixing up the state to avoid getting excessive 800 // amounts of warnings. 801 RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS)); 802 if (!rflags_nocc.vm) { 803 // Do segment verification if the CPU isn't entering virtual 804 // 8086 mode. We currently assume that unrestricted guest 805 // mode is available. 806 807#define APPLY_SEGMENT(kreg, idx) \ 808 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs) 809 810 FOREACH_SEGMENT(); 811#undef APPLY_SEGMENT 812 } 813 814 setSpecialRegisters(sregs); 815} 816 817template <typename T> 818static void 819updateKvmStateFPUCommon(ThreadContext *tc, T &fpu) 820{ 821 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 822 "Unexpected size of X86ISA::FloatRegBits"); 823 824 fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR); 825 fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW); 826 // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read 827 // with effects. 828 fpu.fsw = tc->readMiscReg(MISCREG_FSW); 829 830 uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW)); 831 fpu.ftwx = X86ISA::convX87TagsToXTags(ftw); 832 833 fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP); 834 835 const unsigned top((fpu.fsw >> 11) & 0x7); 836 for (int i = 0; i < 8; ++i) { 837 const unsigned reg_idx((i + top) & 0x7); 838 const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx))); 839 DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n", 840 reg_idx, i, value); 841 X86ISA::storeFloat80(fpu.fpr[i], value); 842 } 843 844 // TODO: We should update the MMX state 845 846 for (int i = 0; i < 16; ++i) { 847 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] = 848 tc->readFloatRegBits(FLOATREG_XMM_LOW(i)); 849 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] = 850 tc->readFloatRegBits(FLOATREG_XMM_HIGH(i)); 851 } 852} 853 854void 855X86KvmCPU::updateKvmStateFPULegacy() 856{ 857 struct kvm_fpu fpu; 858 859 // There is some padding in the FP registers, so we'd better zero 860 // the whole struct. 861 memset(&fpu, 0, sizeof(fpu)); 862 863 updateKvmStateFPUCommon(tc, fpu); 864 865 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 866 warn_once("MISCREG_FISEG is non-zero.\n"); 867 868 fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 869 870 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 871 warn_once("MISCREG_FOSEG is non-zero.\n"); 872 873 fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 874 875 setFPUState(fpu); 876} 877 878void 879X86KvmCPU::updateKvmStateFPUXSave() 880{ 881 struct kvm_xsave kxsave; 882 FXSave &xsave(*(FXSave *)kxsave.region); 883 884 // There is some padding and reserved fields in the structure, so 885 // we'd better zero the whole thing. 886 memset(&kxsave, 0, sizeof(kxsave)); 887 888 updateKvmStateFPUCommon(tc, xsave); 889 890 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 891 warn_once("MISCREG_FISEG is non-zero.\n"); 892 893 xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 894 895 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 896 warn_once("MISCREG_FOSEG is non-zero.\n"); 897 898 xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 899 900 setXSave(kxsave); 901} 902 903void 904X86KvmCPU::updateKvmStateFPU() 905{ 906 if (useXSave) 907 updateKvmStateFPUXSave(); 908 else 909 updateKvmStateFPULegacy(); 910} 911 912void 913X86KvmCPU::updateKvmStateMSRs() 914{ 915 KvmMSRVector msrs; 916 917 const Kvm::MSRIndexVector &indices(getMsrIntersection()); 918 919 for (auto it = indices.cbegin(); it != indices.cend(); ++it) { 920 struct kvm_msr_entry e; 921 922 e.index = *it; 923 e.reserved = 0; 924 e.data = tc->readMiscReg(msrMap.at(*it)); 925 DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n", 926 e.index, e.data); 927 928 msrs.push_back(e); 929 } 930 931 setMSRs(msrs); 932} 933 934void 935X86KvmCPU::updateThreadContext() 936{ 937 struct kvm_regs regs; 938 struct kvm_sregs sregs; 939 940 getRegisters(regs); 941 getSpecialRegisters(sregs); 942 943 DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n"); 944 if (DTRACE(KvmContext)) 945 dump(); 946 947 updateThreadContextRegs(regs, sregs); 948 updateThreadContextSRegs(sregs); 949 if (useXSave) { 950 struct kvm_xsave xsave; 951 getXSave(xsave); 952 953 updateThreadContextXSave(xsave); 954 } else { 955 struct kvm_fpu fpu; 956 getFPUState(fpu); 957 958 updateThreadContextFPU(fpu); 959 } 960 updateThreadContextMSRs(); 961 962 // The M5 misc reg caches some values from other 963 // registers. Writing to it with side effects causes it to be 964 // updated from its source registers. 965 tc->setMiscReg(MISCREG_M5_REG, 0); 966} 967 968void 969X86KvmCPU::updateThreadContextRegs(const struct kvm_regs ®s, 970 const struct kvm_sregs &sregs) 971{ 972#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg) 973 974 FOREACH_IREG(); 975 976#undef APPLY_IREG 977 978 tc->pcState(PCState(regs.rip + sregs.cs.base)); 979 980 // Flags are spread out across multiple semi-magic registers so we 981 // need some special care when updating them. 982 X86ISA::setRFlags(tc, regs.rflags); 983} 984 985 986inline void 987setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg, 988 const int index) 989{ 990 SegAttr attr(0); 991 992 attr.type = kvm_seg.type; 993 attr.present = kvm_seg.present; 994 attr.dpl = kvm_seg.dpl; 995 attr.defaultSize = kvm_seg.db; 996 attr.system = kvm_seg.s; 997 attr.longMode = kvm_seg.l; 998 attr.granularity = kvm_seg.g; 999 attr.avl = kvm_seg.avl; 1000 attr.unusable = kvm_seg.unusable; 1001 1002 // We need some setMiscReg magic here to keep the effective base 1003 // addresses in sync. We need an up-to-date version of EFER, so 1004 // make sure this is called after the sregs have been synced. 1005 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base); 1006 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit); 1007 tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector); 1008 tc->setMiscReg(MISCREG_SEG_ATTR(index), attr); 1009} 1010 1011inline void 1012setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable, 1013 const int index) 1014{ 1015 // We need some setMiscReg magic here to keep the effective base 1016 // addresses in sync. We need an up-to-date version of EFER, so 1017 // make sure this is called after the sregs have been synced. 1018 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base); 1019 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit); 1020} 1021 1022void 1023X86KvmCPU::updateThreadContextSRegs(const struct kvm_sregs &sregs) 1024{ 1025 assert(getKvmRunState()->apic_base == sregs.apic_base); 1026 assert(getKvmRunState()->cr8 == sregs.cr8); 1027 1028#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg) 1029#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1030#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1031 FOREACH_SREG(); 1032 FOREACH_SEGMENT(); 1033 FOREACH_DTABLE(); 1034#undef APPLY_SREG 1035#undef APPLY_SEGMENT 1036#undef APPLY_DTABLE 1037} 1038 1039template<typename T> 1040static void 1041updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu) 1042{ 1043 const unsigned top((fpu.fsw >> 11) & 0x7); 1044 1045 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 1046 "Unexpected size of X86ISA::FloatRegBits"); 1047 1048 for (int i = 0; i < 8; ++i) { 1049 const unsigned reg_idx((i + top) & 0x7); 1050 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 1051 DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n", 1052 reg_idx, i, value); 1053 tc->setFloatReg(FLOATREG_FPR(reg_idx), value); 1054 } 1055 1056 // TODO: We should update the MMX state 1057 1058 tc->setMiscRegNoEffect(MISCREG_X87_TOP, top); 1059 tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr); 1060 tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw); 1061 tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw); 1062 1063 uint64_t ftw(convX87XTagsToTags(fpu.ftwx)); 1064 // TODO: Are these registers really the same? 1065 tc->setMiscRegNoEffect(MISCREG_FTW, ftw); 1066 tc->setMiscRegNoEffect(MISCREG_FTAG, ftw); 1067 1068 tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode); 1069 1070 for (int i = 0; i < 16; ++i) { 1071 tc->setFloatRegBits(FLOATREG_XMM_LOW(i), 1072 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]); 1073 tc->setFloatRegBits(FLOATREG_XMM_HIGH(i), 1074 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]); 1075 } 1076} 1077 1078void 1079X86KvmCPU::updateThreadContextFPU(const struct kvm_fpu &fpu) 1080{ 1081 updateThreadContextFPUCommon(tc, fpu); 1082 1083 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1084 tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip); 1085 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1086 tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp); 1087} 1088 1089void 1090X86KvmCPU::updateThreadContextXSave(const struct kvm_xsave &kxsave) 1091{ 1092 const FXSave &xsave(*(const FXSave *)kxsave.region); 1093 1094 updateThreadContextFPUCommon(tc, xsave); 1095 1096 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1097 tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip); 1098 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1099 tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp); 1100} 1101 1102void 1103X86KvmCPU::updateThreadContextMSRs() 1104{ 1105 const Kvm::MSRIndexVector &msrs(getMsrIntersection()); 1106 1107 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1108 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1109 struct kvm_msr_entry *entry; 1110 1111 // Create a list of MSRs to read 1112 kvm_msrs->nmsrs = msrs.size(); 1113 entry = &kvm_msrs->entries[0]; 1114 for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) { 1115 entry->index = *it; 1116 entry->reserved = 0; 1117 entry->data = 0; 1118 } 1119 1120 getMSRs(*kvm_msrs.get()); 1121 1122 // Update M5's state 1123 entry = &kvm_msrs->entries[0]; 1124 for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) { 1125 DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n", 1126 entry->index, entry->data); 1127 1128 tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data); 1129 } 1130} 1131 1132void 1133X86KvmCPU::deliverInterrupts() 1134{ 1135 Fault fault; 1136 1137 syncThreadContext(); 1138 1139 { 1140 // Migrate to the interrupt controller's thread to get the 1141 // interrupt. Even though the individual methods are safe to 1142 // call across threads, we might still lose interrupts unless 1143 // they are getInterrupt() and updateIntrInfo() are called 1144 // atomically. 1145 EventQueue::ScopedMigration migrate(interrupts[0]->eventQueue()); 1146 fault = interrupts[0]->getInterrupt(tc); 1147 interrupts[0]->updateIntrInfo(tc); 1148 } 1149 1150 X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get())); 1151 if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) { 1152 DPRINTF(KvmInt, "Delivering NMI\n"); 1153 kvmNonMaskableInterrupt(); 1154 } else if (dynamic_cast<InitInterrupt *>(fault.get())) { 1155 DPRINTF(KvmInt, "INIT interrupt\n"); 1156 fault.get()->invoke(tc); 1157 // Delay the kvm state update since we won't enter KVM on this 1158 // tick. 1159 threadContextDirty = true; 1160 // HACK: gem5 doesn't actually have any BIOS code, which means 1161 // that we need to halt the thread and wait for a startup 1162 // interrupt before restarting the thread. The simulated CPUs 1163 // use the same kind of hack using a microcode routine. 1164 thread->suspend(); 1165 } else if (dynamic_cast<StartupInterrupt *>(fault.get())) { 1166 DPRINTF(KvmInt, "STARTUP interrupt\n"); 1167 fault.get()->invoke(tc); 1168 // The kvm state is assumed to have been updated when entering 1169 // kvmRun(), so we need to update manually it here. 1170 updateKvmState(); 1171 } else if (x86int) { 1172 struct kvm_interrupt kvm_int; 1173 kvm_int.irq = x86int->getVector(); 1174 1175 DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n", 1176 fault->name(), kvm_int.irq); 1177 1178 kvmInterrupt(kvm_int); 1179 } else { 1180 panic("KVM: Unknown interrupt type\n"); 1181 } 1182 1183} 1184 1185Tick 1186X86KvmCPU::kvmRun(Tick ticks) 1187{ 1188 struct kvm_run &kvm_run(*getKvmRunState()); 1189 1190 if (interrupts[0]->checkInterruptsRaw()) { 1191 if (interrupts[0]->hasPendingUnmaskable()) { 1192 DPRINTF(KvmInt, 1193 "Delivering unmaskable interrupt.\n"); 1194 syncThreadContext(); 1195 deliverInterrupts(); 1196 } else if (kvm_run.ready_for_interrupt_injection) { 1197 // KVM claims that it is ready for an interrupt. It might 1198 // be lying if we just updated rflags and disabled 1199 // interrupts (e.g., by doing a CPU handover). Let's sync 1200 // the thread context and check if there are /really/ 1201 // interrupts that should be delivered now. 1202 syncThreadContext(); 1203 if (interrupts[0]->checkInterrupts(tc)) { 1204 DPRINTF(KvmInt, 1205 "M5 has pending interrupts, delivering interrupt.\n"); 1206 1207 deliverInterrupts(); 1208 } else { 1209 DPRINTF(KvmInt, 1210 "Interrupt delivery delayed due to KVM confusion.\n"); 1211 kvm_run.request_interrupt_window = 1; 1212 } 1213 } else if (!kvm_run.request_interrupt_window) { 1214 DPRINTF(KvmInt, 1215 "M5 has pending interrupts, requesting interrupt " 1216 "window.\n"); 1217 kvm_run.request_interrupt_window = 1; 1218 } 1219 } else { 1220 kvm_run.request_interrupt_window = 0; 1221 } 1222 1223 // The CPU might have been suspended as a result of the INIT 1224 // interrupt delivery hack. In that case, don't enter into KVM. 1225 if (_status == Idle) 1226 return 0; 1227 else 1228 return kvmRunWrapper(ticks); 1229} 1230 1231Tick 1232X86KvmCPU::kvmRunDrain() 1233{ 1234 struct kvm_run &kvm_run(*getKvmRunState()); 1235 1236 if (!archIsDrained()) { 1237 DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n"); 1238 1239 // Tell KVM to find a suitable place to deliver interrupts. This 1240 // should ensure that pending interrupts have been delivered and 1241 // things are reasonably consistent (i.e., no interrupts pending 1242 // in the guest). 1243 kvm_run.request_interrupt_window = 1; 1244 1245 // Limit the run to 1 millisecond. That is hopefully enough to 1246 // reach an interrupt window. Otherwise, we'll just try again 1247 // later. 1248 return kvmRunWrapper(1 * SimClock::Float::ms); 1249 } else { 1250 DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n"); 1251 1252 return kvmRunWrapper(0); 1253 } 1254} 1255 1256Tick 1257X86KvmCPU::kvmRunWrapper(Tick ticks) 1258{ 1259 struct kvm_run &kvm_run(*getKvmRunState()); 1260 1261 // Synchronize the APIC base and CR8 here since they are present 1262 // in the kvm_run struct, which makes the synchronization really 1263 // cheap. 1264 kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE); 1265 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1266 1267 const Tick run_ticks(BaseKvmCPU::kvmRun(ticks)); 1268 1269 tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base); 1270 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1271 1272 return run_ticks; 1273} 1274 1275uint64_t 1276X86KvmCPU::getHostCycles() const 1277{ 1278 return getMSR(MSR_TSC); 1279} 1280 1281void 1282X86KvmCPU::handleIOMiscReg32(int miscreg) 1283{ 1284 struct kvm_run &kvm_run(*getKvmRunState()); 1285 const uint16_t port(kvm_run.io.port); 1286 1287 assert(kvm_run.exit_reason == KVM_EXIT_IO); 1288 1289 if (kvm_run.io.size != 4) { 1290 panic("Unexpected IO size (%u) for address 0x%x.\n", 1291 kvm_run.io.size, port); 1292 } 1293 1294 if (kvm_run.io.count != 1) { 1295 panic("Unexpected IO count (%u) for address 0x%x.\n", 1296 kvm_run.io.count, port); 1297 } 1298 1299 uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset)); 1300 if (kvm_run.io.direction == KVM_EXIT_IO_OUT) 1301 tc->setMiscReg(miscreg, *data); 1302 else 1303 *data = tc->readMiscRegNoEffect(miscreg); 1304} 1305 1306Tick 1307X86KvmCPU::handleKvmExitIO() 1308{ 1309 struct kvm_run &kvm_run(*getKvmRunState()); 1310 bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT); 1311 unsigned char *guestData(getGuestData(kvm_run.io.data_offset)); 1312 Tick delay(0); 1313 uint16_t port(kvm_run.io.port); 1314 Addr pAddr; 1315 const int count(kvm_run.io.count); 1316 1317 assert(kvm_run.io.direction == KVM_EXIT_IO_IN || 1318 kvm_run.io.direction == KVM_EXIT_IO_OUT); 1319 1320 DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n", 1321 (isWrite ? "out" : "in"), kvm_run.io.port); 1322 1323 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we 1324 * don't use the TLB component, we need to intercept and handle 1325 * the PCI configuration space IO ports here. 1326 * 1327 * The IO port PCI discovery mechanism uses one address register 1328 * and one data register. We map the address register to a misc 1329 * reg and use that to re-route data register accesses to the 1330 * right location in the PCI configuration space. 1331 */ 1332 if (port == IO_PCI_CONF_ADDR) { 1333 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS); 1334 return 0; 1335 } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) { 1336 Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS)); 1337 if (pciConfigAddr & 0x80000000) { 1338 pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) | 1339 (port & 0x3)); 1340 } else { 1341 pAddr = X86ISA::x86IOAddress(port); 1342 } 1343 } else { 1344 pAddr = X86ISA::x86IOAddress(port); 1345 } 1346 1347 const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq); 1348 // Temporarily lock and migrate to the event queue of the 1349 // VM. This queue is assumed to "own" all devices we need to 1350 // access if running in multi-core mode. 1351 EventQueue::ScopedMigration migrate(vm.eventQueue()); 1352 for (int i = 0; i < count; ++i) { 1353 RequestPtr io_req = new Request(pAddr, kvm_run.io.size, 1354 Request::UNCACHEABLE, dataMasterId()); 1355 io_req->setContext(tc->contextId()); 1356 1357 PacketPtr pkt = new Packet(io_req, cmd); 1358 1359 pkt->dataStatic(guestData); 1360 delay += dataPort.submitIO(pkt); 1361 1362 guestData += kvm_run.io.size; 1363 } 1364 1365 return delay; 1366} 1367 1368Tick 1369X86KvmCPU::handleKvmExitIRQWindowOpen() 1370{ 1371 // We don't need to do anything here since this is caught the next 1372 // time we execute kvmRun(). We still overload the exit event to 1373 // silence the warning about an unhandled exit event. 1374 return 0; 1375} 1376 1377bool 1378X86KvmCPU::archIsDrained() const 1379{ 1380 struct kvm_vcpu_events events; 1381 1382 getVCpuEvents(events); 1383 1384 // We could probably handle this in a by re-inserting interrupts 1385 // that are pending into gem5 on a drain. However, that would 1386 // probably be tricky to do reliably, so we'll just prevent a 1387 // drain if there is anything pending in the 1388 // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code 1389 // executed in the guest by requesting an interrupt window if 1390 // there are pending interrupts. 1391 const bool pending_events(events.exception.injected || 1392 events.interrupt.injected || 1393 events.nmi.injected || events.nmi.pending); 1394 1395 if (pending_events) { 1396 DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n", 1397 events.exception.injected ? "exception" : "", 1398 events.interrupt.injected ? "interrupt" : "", 1399 events.nmi.injected ? "nmi[i]" : "", 1400 events.nmi.pending ? "nmi[p]" : ""); 1401 } 1402 1403 return !pending_events; 1404} 1405 1406static struct kvm_cpuid_entry2 1407makeKvmCpuid(uint32_t function, uint32_t index, 1408 CpuidResult &result) 1409{ 1410 struct kvm_cpuid_entry2 e; 1411 e.function = function; 1412 e.index = index; 1413 e.flags = 0; 1414 e.eax = (uint32_t)result.rax; 1415 e.ebx = (uint32_t)result.rbx; 1416 e.ecx = (uint32_t)result.rcx; 1417 e.edx = (uint32_t)result.rdx; 1418 1419 return e; 1420} 1421 1422void 1423X86KvmCPU::updateCPUID() 1424{ 1425 Kvm::CPUIDVector m5_supported; 1426 1427 /* TODO: We currently don't support any of the functions that 1428 * iterate through data structures in the CPU using an index. It's 1429 * currently not a problem since M5 doesn't expose any of them at 1430 * the moment. 1431 */ 1432 1433 /* Basic features */ 1434 CpuidResult func0; 1435 X86ISA::doCpuid(tc, 0x0, 0, func0); 1436 for (uint32_t function = 0; function <= func0.rax; ++function) { 1437 CpuidResult cpuid; 1438 uint32_t idx(0); 1439 1440 X86ISA::doCpuid(tc, function, idx, cpuid); 1441 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); 1442 } 1443 1444 /* Extended features */ 1445 CpuidResult efunc0; 1446 X86ISA::doCpuid(tc, 0x80000000, 0, efunc0); 1447 for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) { 1448 CpuidResult cpuid; 1449 uint32_t idx(0); 1450 1451 X86ISA::doCpuid(tc, function, idx, cpuid); 1452 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); 1453 } 1454 1455 setCPUID(m5_supported); 1456} 1457 1458void 1459X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid) 1460{ 1461 if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1) 1462 panic("KVM: Failed to set guest CPUID2 (errno: %i)\n", 1463 errno); 1464} 1465 1466void 1467X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid) 1468{ 1469 std::unique_ptr<struct kvm_cpuid2> kvm_cpuid( 1470 newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size())); 1471 1472 kvm_cpuid->nent = cpuid.size(); 1473 std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries); 1474 1475 setCPUID(*kvm_cpuid); 1476} 1477 1478void 1479X86KvmCPU::setMSRs(const struct kvm_msrs &msrs) 1480{ 1481 if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1) 1482 panic("KVM: Failed to set guest MSRs (errno: %i)\n", 1483 errno); 1484} 1485 1486void 1487X86KvmCPU::setMSRs(const KvmMSRVector &msrs) 1488{ 1489 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1490 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1491 1492 kvm_msrs->nmsrs = msrs.size(); 1493 std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries); 1494 1495 setMSRs(*kvm_msrs); 1496} 1497 1498void 1499X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const 1500{ 1501 if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1) 1502 panic("KVM: Failed to get guest MSRs (errno: %i)\n", 1503 errno); 1504} 1505 1506 1507void 1508X86KvmCPU::setMSR(uint32_t index, uint64_t value) 1509{ 1510 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1511 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1)); 1512 struct kvm_msr_entry &entry(kvm_msrs->entries[0]); 1513 1514 kvm_msrs->nmsrs = 1; 1515 entry.index = index; 1516 entry.reserved = 0; 1517 entry.data = value; 1518 1519 setMSRs(*kvm_msrs.get()); 1520} 1521 1522uint64_t 1523X86KvmCPU::getMSR(uint32_t index) const 1524{ 1525 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1526 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1)); 1527 struct kvm_msr_entry &entry(kvm_msrs->entries[0]); 1528 1529 kvm_msrs->nmsrs = 1; 1530 entry.index = index; 1531 entry.reserved = 0; 1532 entry.data = 0; 1533 1534 getMSRs(*kvm_msrs.get()); 1535 return entry.data; 1536} 1537 1538const Kvm::MSRIndexVector & 1539X86KvmCPU::getMsrIntersection() const 1540{ 1541 if (cachedMsrIntersection.empty()) { 1542 const Kvm::MSRIndexVector &kvm_msrs(vm.kvm->getSupportedMSRs()); 1543 1544 DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n"); 1545 for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) { 1546 if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) { 1547 cachedMsrIntersection.push_back(*it); 1548 DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it); 1549 } else { 1550 warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n", 1551 *it); 1552 } 1553 } 1554 } 1555 1556 return cachedMsrIntersection; 1557} 1558 1559void 1560X86KvmCPU::getDebugRegisters(struct kvm_debugregs ®s) const 1561{ 1562#ifdef KVM_GET_DEBUGREGS 1563 if (ioctl(KVM_GET_DEBUGREGS, ®s) == -1) 1564 panic("KVM: Failed to get guest debug registers\n"); 1565#else 1566 panic("KVM: Unsupported getDebugRegisters call.\n"); 1567#endif 1568} 1569 1570void 1571X86KvmCPU::setDebugRegisters(const struct kvm_debugregs ®s) 1572{ 1573#ifdef KVM_SET_DEBUGREGS 1574 if (ioctl(KVM_SET_DEBUGREGS, (void *)®s) == -1) 1575 panic("KVM: Failed to set guest debug registers\n"); 1576#else 1577 panic("KVM: Unsupported setDebugRegisters call.\n"); 1578#endif 1579} 1580 1581void 1582X86KvmCPU::getXCRs(struct kvm_xcrs ®s) const 1583{ 1584 if (ioctl(KVM_GET_XCRS, ®s) == -1) 1585 panic("KVM: Failed to get guest debug registers\n"); 1586} 1587 1588void 1589X86KvmCPU::setXCRs(const struct kvm_xcrs ®s) 1590{ 1591 if (ioctl(KVM_SET_XCRS, (void *)®s) == -1) 1592 panic("KVM: Failed to set guest debug registers\n"); 1593} 1594 1595void 1596X86KvmCPU::getXSave(struct kvm_xsave &xsave) const 1597{ 1598 if (ioctl(KVM_GET_XSAVE, &xsave) == -1) 1599 panic("KVM: Failed to get guest debug registers\n"); 1600} 1601 1602void 1603X86KvmCPU::setXSave(const struct kvm_xsave &xsave) 1604{ 1605 if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1) 1606 panic("KVM: Failed to set guest debug registers\n"); 1607} 1608 1609 1610void 1611X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const 1612{ 1613 if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1) 1614 panic("KVM: Failed to get guest debug registers\n"); 1615} 1616 1617void 1618X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events) 1619{ 1620 if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1) 1621 panic("KVM: Failed to set guest debug registers\n"); 1622} 1623 1624X86KvmCPU * 1625X86KvmCPUParams::create() 1626{ 1627 return new X86KvmCPU(this); 1628}
| 44#include "debug/Drain.hh" 45#include "debug/Kvm.hh" 46#include "debug/KvmContext.hh" 47#include "debug/KvmIO.hh" 48#include "debug/KvmInt.hh" 49 50using namespace X86ISA; 51 52#define MSR_TSC 0x10 53 54#define IO_PCI_CONF_ADDR 0xCF8 55#define IO_PCI_CONF_DATA_BASE 0xCFC 56 57// Task segment type of an inactive 32-bit or 64-bit task 58#define SEG_SYS_TYPE_TSS_AVAILABLE 9 59// Task segment type of an active 32-bit or 64-bit task 60#define SEG_SYS_TYPE_TSS_BUSY 11 61 62// Non-conforming accessed code segment 63#define SEG_CS_TYPE_ACCESSED 9 64// Non-conforming accessed code segment that can be read 65#define SEG_CS_TYPE_READ_ACCESSED 11 66 67// The lowest bit of the type field for normal segments (code and 68// data) is used to indicate that a segment has been accessed. 69#define SEG_TYPE_BIT_ACCESSED 1 70 71struct FXSave 72{ 73 uint16_t fcw; 74 uint16_t fsw; 75 uint8_t ftwx; 76 uint8_t pad0; 77 uint16_t last_opcode; 78 union { 79 struct { 80 uint32_t fpu_ip; 81 uint16_t fpu_cs; 82 uint16_t pad1; 83 uint32_t fpu_dp; 84 uint16_t fpu_ds; 85 uint16_t pad2; 86 } ctrl32; 87 88 struct { 89 uint64_t fpu_ip; 90 uint64_t fpu_dp; 91 } ctrl64; 92 }; 93 uint32_t mxcsr; 94 uint32_t mxcsr_mask; 95 96 uint8_t fpr[8][16]; 97 uint8_t xmm[16][16]; 98 99 uint64_t reserved[12]; 100} M5_ATTR_PACKED; 101 102static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave"); 103 104#define FOREACH_IREG() \ 105 do { \ 106 APPLY_IREG(rax, INTREG_RAX); \ 107 APPLY_IREG(rbx, INTREG_RBX); \ 108 APPLY_IREG(rcx, INTREG_RCX); \ 109 APPLY_IREG(rdx, INTREG_RDX); \ 110 APPLY_IREG(rsi, INTREG_RSI); \ 111 APPLY_IREG(rdi, INTREG_RDI); \ 112 APPLY_IREG(rsp, INTREG_RSP); \ 113 APPLY_IREG(rbp, INTREG_RBP); \ 114 APPLY_IREG(r8, INTREG_R8); \ 115 APPLY_IREG(r9, INTREG_R9); \ 116 APPLY_IREG(r10, INTREG_R10); \ 117 APPLY_IREG(r11, INTREG_R11); \ 118 APPLY_IREG(r12, INTREG_R12); \ 119 APPLY_IREG(r13, INTREG_R13); \ 120 APPLY_IREG(r14, INTREG_R14); \ 121 APPLY_IREG(r15, INTREG_R15); \ 122 } while (0) 123 124#define FOREACH_SREG() \ 125 do { \ 126 APPLY_SREG(cr0, MISCREG_CR0); \ 127 APPLY_SREG(cr2, MISCREG_CR2); \ 128 APPLY_SREG(cr3, MISCREG_CR3); \ 129 APPLY_SREG(cr4, MISCREG_CR4); \ 130 APPLY_SREG(cr8, MISCREG_CR8); \ 131 APPLY_SREG(efer, MISCREG_EFER); \ 132 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \ 133 } while (0) 134 135#define FOREACH_DREG() \ 136 do { \ 137 APPLY_DREG(db[0], MISCREG_DR0); \ 138 APPLY_DREG(db[1], MISCREG_DR1); \ 139 APPLY_DREG(db[2], MISCREG_DR2); \ 140 APPLY_DREG(db[3], MISCREG_DR3); \ 141 APPLY_DREG(dr6, MISCREG_DR6); \ 142 APPLY_DREG(dr7, MISCREG_DR7); \ 143 } while (0) 144 145#define FOREACH_SEGMENT() \ 146 do { \ 147 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \ 148 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \ 149 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \ 150 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \ 151 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \ 152 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \ 153 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \ 154 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \ 155 } while (0) 156 157#define FOREACH_DTABLE() \ 158 do { \ 159 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \ 160 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \ 161 } while (0) 162 163template<typename STRUCT, typename ENTRY> 164static STRUCT *newVarStruct(size_t entries) 165{ 166 return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY)); 167} 168 169static void 170dumpKvm(const struct kvm_regs ®s) 171{ 172 inform("KVM register state:\n"); 173 174#define APPLY_IREG(kreg, mreg) \ 175 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 176 177 FOREACH_IREG(); 178 179#undef APPLY_IREG 180 181 inform("\trip: 0x%llx\n", regs.rip); 182 inform("\trflags: 0x%llx\n", regs.rflags); 183} 184 185static void 186dumpKvm(const char *reg_name, const struct kvm_segment &seg) 187{ 188 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n" 189 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n", 190 reg_name, 191 seg.base, seg.limit, seg.selector, seg.type, 192 seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable); 193} 194 195static void 196dumpKvm(const char *reg_name, const struct kvm_dtable &dtable) 197{ 198 inform("\t%s: @0x%llx+%x\n", 199 reg_name, dtable.base, dtable.limit); 200} 201 202static void 203dumpKvm(const struct kvm_sregs &sregs) 204{ 205#define APPLY_SREG(kreg, mreg) \ 206 inform("\t" # kreg ": 0x%llx\n", sregs.kreg); 207#define APPLY_SEGMENT(kreg, idx) \ 208 dumpKvm(# kreg, sregs.kreg); 209#define APPLY_DTABLE(kreg, idx) \ 210 dumpKvm(# kreg, sregs.kreg); 211 212 inform("Special registers:\n"); 213 FOREACH_SEGMENT(); 214 FOREACH_SREG(); 215 FOREACH_DTABLE(); 216 217 inform("Interrupt Bitmap:"); 218 for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64) 219 inform(" 0x%.8x", sregs.interrupt_bitmap[i / 64]); 220 221#undef APPLY_SREG 222#undef APPLY_SEGMENT 223#undef APPLY_DTABLE 224} 225 226#ifdef KVM_GET_DEBUGREGS 227static void 228dumpKvm(const struct kvm_debugregs ®s) 229{ 230 inform("KVM debug state:\n"); 231 232#define APPLY_DREG(kreg, mreg) \ 233 inform("\t" # kreg ": 0x%llx\n", regs.kreg) 234 235 FOREACH_DREG(); 236 237#undef APPLY_DREG 238 239 inform("\tflags: 0x%llx\n", regs.flags); 240} 241#endif 242 243static void 244dumpFpuSpec(const struct FXSave &xs) 245{ 246 inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip); 247 inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp); 248 inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask); 249} 250 251static void 252dumpFpuSpec(const struct kvm_fpu &fpu) 253{ 254 inform("\tlast_ip: 0x%x\n", fpu.last_ip); 255 inform("\tlast_dp: 0x%x\n", fpu.last_dp); 256} 257 258template<typename T> 259static void 260dumpFpuCommon(const T &fpu) 261{ 262 const unsigned top((fpu.fsw >> 11) & 0x7); 263 inform("\tfcw: 0x%x\n", fpu.fcw); 264 265 inform("\tfsw: 0x%x (top: %i, " 266 "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n", 267 fpu.fsw, top, 268 269 (fpu.fsw & CC0Bit) ? "C0" : "", 270 (fpu.fsw & CC1Bit) ? "C1" : "", 271 (fpu.fsw & CC2Bit) ? "C2" : "", 272 (fpu.fsw & CC3Bit) ? "C3" : "", 273 274 (fpu.fsw & IEBit) ? "I" : "", 275 (fpu.fsw & DEBit) ? "D" : "", 276 (fpu.fsw & ZEBit) ? "Z" : "", 277 (fpu.fsw & OEBit) ? "O" : "", 278 (fpu.fsw & UEBit) ? "U" : "", 279 (fpu.fsw & PEBit) ? "P" : "", 280 281 (fpu.fsw & StackFaultBit) ? "SF " : "", 282 (fpu.fsw & ErrSummaryBit) ? "ES " : "", 283 (fpu.fsw & BusyBit) ? "BUSY " : "" 284 ); 285 inform("\tftwx: 0x%x\n", fpu.ftwx); 286 inform("\tlast_opcode: 0x%x\n", fpu.last_opcode); 287 dumpFpuSpec(fpu); 288 inform("\tmxcsr: 0x%x\n", fpu.mxcsr); 289 inform("\tFP Stack:\n"); 290 for (int i = 0; i < 8; ++i) { 291 const unsigned reg_idx((i + top) & 0x7); 292 const bool empty(!((fpu.ftwx >> reg_idx) & 0x1)); 293 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 294 char hex[33]; 295 for (int j = 0; j < 10; ++j) 296 snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]); 297 inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx, 298 hex, value, empty ? " (e)" : ""); 299 } 300 inform("\tXMM registers:\n"); 301 for (int i = 0; i < 16; ++i) { 302 char hex[33]; 303 for (int j = 0; j < 16; ++j) 304 snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]); 305 inform("\t\t%i: 0x%s\n", i, hex); 306 } 307} 308 309static void 310dumpKvm(const struct kvm_fpu &fpu) 311{ 312 inform("FPU registers:\n"); 313 dumpFpuCommon(fpu); 314} 315 316static void 317dumpKvm(const struct kvm_xsave &xsave) 318{ 319 inform("FPU registers (XSave):\n"); 320 dumpFpuCommon(*(FXSave *)xsave.region); 321} 322 323static void 324dumpKvm(const struct kvm_msrs &msrs) 325{ 326 inform("MSRs:\n"); 327 328 for (int i = 0; i < msrs.nmsrs; ++i) { 329 const struct kvm_msr_entry &e(msrs.entries[i]); 330 331 inform("\t0x%x: 0x%x\n", e.index, e.data); 332 } 333} 334 335static void 336dumpKvm(const struct kvm_xcrs ®s) 337{ 338 inform("KVM XCR registers:\n"); 339 340 inform("\tFlags: 0x%x\n", regs.flags); 341 for (int i = 0; i < regs.nr_xcrs; ++i) { 342 inform("\tXCR[0x%x]: 0x%x\n", 343 regs.xcrs[i].xcr, 344 regs.xcrs[i].value); 345 } 346} 347 348static void 349dumpKvm(const struct kvm_vcpu_events &events) 350{ 351 inform("vCPU events:\n"); 352 353 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n", 354 events.exception.injected, events.exception.nr, 355 events.exception.has_error_code, events.exception.error_code); 356 357 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n", 358 events.interrupt.injected, events.interrupt.nr, 359 events.interrupt.soft); 360 361 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n", 362 events.nmi.injected, events.nmi.pending, 363 events.nmi.masked); 364 365 inform("\tSIPI vector: 0x%x\n", events.sipi_vector); 366 inform("\tFlags: 0x%x\n", events.flags); 367} 368 369static bool 370isCanonicalAddress(uint64_t addr) 371{ 372 // x86-64 doesn't currently use the full 64-bit virtual address 373 // space, instead it uses signed 48 bit addresses that are 374 // sign-extended to 64 bits. Such addresses are known as 375 // "canonical". 376 uint64_t upper_half(addr & 0xffff800000000000ULL); 377 return upper_half == 0 || upper_half == 0xffff800000000000; 378} 379 380static void 381checkSeg(const char *name, const int idx, const struct kvm_segment &seg, 382 struct kvm_sregs sregs) 383{ 384 // Check the register base 385 switch (idx) { 386 case MISCREG_TSL: 387 case MISCREG_TR: 388 case MISCREG_FS: 389 case MISCREG_GS: 390 if (!isCanonicalAddress(seg.base)) 391 warn("Illegal %s base: 0x%x\n", name, seg.base); 392 break; 393 394 case MISCREG_SS: 395 case MISCREG_DS: 396 case MISCREG_ES: 397 if (seg.unusable) 398 break; 399 case MISCREG_CS: 400 if (seg.base & 0xffffffff00000000ULL) 401 warn("Illegal %s base: 0x%x\n", name, seg.base); 402 break; 403 } 404 405 // Check the type 406 switch (idx) { 407 case MISCREG_CS: 408 switch (seg.type) { 409 case 3: 410 if (seg.dpl != 0) 411 warn("CS type is 3 but dpl != 0.\n"); 412 break; 413 case 9: 414 case 11: 415 if (seg.dpl != sregs.ss.dpl) 416 warn("CS type is %i but CS DPL != SS DPL\n", seg.type); 417 break; 418 case 13: 419 case 15: 420 if (seg.dpl > sregs.ss.dpl) 421 warn("CS type is %i but CS DPL > SS DPL\n", seg.type); 422 break; 423 default: 424 warn("Illegal CS type: %i\n", seg.type); 425 break; 426 } 427 break; 428 429 case MISCREG_SS: 430 if (seg.unusable) 431 break; 432 switch (seg.type) { 433 case 3: 434 if (sregs.cs.type == 3 && seg.dpl != 0) 435 warn("CS type is 3, but SS DPL is != 0.\n"); 436 /* FALLTHROUGH */ 437 case 7: 438 if (!(sregs.cr0 & 1) && seg.dpl != 0) 439 warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl); 440 break; 441 default: 442 warn("Illegal SS type: %i\n", seg.type); 443 break; 444 } 445 break; 446 447 case MISCREG_DS: 448 case MISCREG_ES: 449 case MISCREG_FS: 450 case MISCREG_GS: 451 if (seg.unusable) 452 break; 453 if (!(seg.type & 0x1) || 454 ((seg.type & 0x8) && !(seg.type & 0x2))) 455 warn("%s has an illegal type field: %i\n", name, seg.type); 456 break; 457 458 case MISCREG_TR: 459 // TODO: We should check the CPU mode 460 if (seg.type != 3 && seg.type != 11) 461 warn("%s: Illegal segment type (%i)\n", name, seg.type); 462 break; 463 464 case MISCREG_TSL: 465 if (seg.unusable) 466 break; 467 if (seg.type != 2) 468 warn("%s: Illegal segment type (%i)\n", name, seg.type); 469 break; 470 } 471 472 switch (idx) { 473 case MISCREG_SS: 474 case MISCREG_DS: 475 case MISCREG_ES: 476 case MISCREG_FS: 477 case MISCREG_GS: 478 if (seg.unusable) 479 break; 480 case MISCREG_CS: 481 if (!seg.s) 482 warn("%s: S flag not set\n", name); 483 break; 484 485 case MISCREG_TSL: 486 if (seg.unusable) 487 break; 488 case MISCREG_TR: 489 if (seg.s) 490 warn("%s: S flag is set\n", name); 491 break; 492 } 493 494 switch (idx) { 495 case MISCREG_SS: 496 case MISCREG_DS: 497 case MISCREG_ES: 498 case MISCREG_FS: 499 case MISCREG_GS: 500 case MISCREG_TSL: 501 if (seg.unusable) 502 break; 503 case MISCREG_TR: 504 case MISCREG_CS: 505 if (!seg.present) 506 warn("%s: P flag not set\n", name); 507 508 if (((seg.limit & 0xFFF) == 0 && seg.g) || 509 ((seg.limit & 0xFFF00000) != 0 && !seg.g)) { 510 warn("%s limit (0x%x) and g (%i) combination is illegal.\n", 511 name, seg.limit, seg.g); 512 } 513 break; 514 } 515 516 // TODO: Check CS DB 517} 518 519X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params) 520 : BaseKvmCPU(params), 521 useXSave(params->useXSave) 522{ 523 Kvm &kvm(*vm.kvm); 524 525 if (!kvm.capSetTSSAddress()) 526 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n"); 527 if (!kvm.capExtendedCPUID()) 528 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n"); 529 if (!kvm.capUserNMI()) 530 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n"); 531 if (!kvm.capVCPUEvents()) 532 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n"); 533 534 haveDebugRegs = kvm.capDebugRegs(); 535 haveXSave = kvm.capXSave(); 536 haveXCRs = kvm.capXCRs(); 537 538 if (useXSave && !haveXSave) { 539 warn("KVM: XSAVE not supported by host. MXCSR synchronization might be " 540 "unreliable due to kernel bugs.\n"); 541 useXSave = false; 542 } else if (!useXSave) { 543 warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n"); 544 } 545} 546 547X86KvmCPU::~X86KvmCPU() 548{ 549} 550 551void 552X86KvmCPU::startup() 553{ 554 BaseKvmCPU::startup(); 555 556 updateCPUID(); 557 558 // TODO: Do we need to create an identity mapped TSS area? We 559 // should call kvm.vm.setTSSAddress() here in that case. It should 560 // only be needed for old versions of the virtualization 561 // extensions. We should make sure that the identity range is 562 // reserved in the e820 memory map in that case. 563} 564 565void 566X86KvmCPU::dump() const 567{ 568 dumpIntRegs(); 569 if (useXSave) 570 dumpXSave(); 571 else 572 dumpFpuRegs(); 573 dumpSpecRegs(); 574 dumpDebugRegs(); 575 dumpXCRs(); 576 dumpVCpuEvents(); 577 dumpMSRs(); 578} 579 580void 581X86KvmCPU::dumpFpuRegs() const 582{ 583 struct kvm_fpu fpu; 584 getFPUState(fpu); 585 dumpKvm(fpu); 586} 587 588void 589X86KvmCPU::dumpIntRegs() const 590{ 591 struct kvm_regs regs; 592 getRegisters(regs); 593 dumpKvm(regs); 594} 595 596void 597X86KvmCPU::dumpSpecRegs() const 598{ 599 struct kvm_sregs sregs; 600 getSpecialRegisters(sregs); 601 dumpKvm(sregs); 602} 603 604void 605X86KvmCPU::dumpDebugRegs() const 606{ 607 if (haveDebugRegs) { 608#ifdef KVM_GET_DEBUGREGS 609 struct kvm_debugregs dregs; 610 getDebugRegisters(dregs); 611 dumpKvm(dregs); 612#endif 613 } else { 614 inform("Debug registers not supported by kernel.\n"); 615 } 616} 617 618void 619X86KvmCPU::dumpXCRs() const 620{ 621 if (haveXCRs) { 622 struct kvm_xcrs xcrs; 623 getXCRs(xcrs); 624 dumpKvm(xcrs); 625 } else { 626 inform("XCRs not supported by kernel.\n"); 627 } 628} 629 630void 631X86KvmCPU::dumpXSave() const 632{ 633 if (haveXSave) { 634 struct kvm_xsave xsave; 635 getXSave(xsave); 636 dumpKvm(xsave); 637 } else { 638 inform("XSave not supported by kernel.\n"); 639 } 640} 641 642void 643X86KvmCPU::dumpVCpuEvents() const 644{ 645 struct kvm_vcpu_events events; 646 getVCpuEvents(events); 647 dumpKvm(events); 648} 649 650void 651X86KvmCPU::dumpMSRs() const 652{ 653 const Kvm::MSRIndexVector &supported_msrs(vm.kvm->getSupportedMSRs()); 654 std::unique_ptr<struct kvm_msrs> msrs( 655 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>( 656 supported_msrs.size())); 657 658 msrs->nmsrs = supported_msrs.size(); 659 for (int i = 0; i < supported_msrs.size(); ++i) { 660 struct kvm_msr_entry &e(msrs->entries[i]); 661 e.index = supported_msrs[i]; 662 e.reserved = 0; 663 e.data = 0; 664 } 665 getMSRs(*msrs.get()); 666 667 dumpKvm(*msrs.get()); 668} 669 670void 671X86KvmCPU::updateKvmState() 672{ 673 updateKvmStateRegs(); 674 updateKvmStateSRegs(); 675 updateKvmStateFPU(); 676 updateKvmStateMSRs(); 677 678 DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); 679 if (DTRACE(KvmContext)) 680 dump(); 681} 682 683void 684X86KvmCPU::updateKvmStateRegs() 685{ 686 struct kvm_regs regs; 687 688#define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg) 689 FOREACH_IREG(); 690#undef APPLY_IREG 691 692 regs.rip = tc->instAddr() - tc->readMiscReg(MISCREG_CS_BASE); 693 694 /* You might think that setting regs.rflags to the contents 695 * MISCREG_RFLAGS here would suffice. In that case you're 696 * mistaken. We need to reconstruct it from a bunch of ucode 697 * registers and wave a dead chicken over it (aka mask out and set 698 * reserved bits) to get it to work. 699 */ 700 regs.rflags = X86ISA::getRFlags(tc); 701 702 setRegisters(regs); 703} 704 705static inline void 706setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg, 707 const int index) 708{ 709 SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index))); 710 711 kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 712 kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 713 kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index)); 714 kvm_seg.type = attr.type; 715 kvm_seg.present = attr.present; 716 kvm_seg.dpl = attr.dpl; 717 kvm_seg.db = attr.defaultSize; 718 kvm_seg.s = attr.system; 719 kvm_seg.l = attr.longMode; 720 kvm_seg.g = attr.granularity; 721 kvm_seg.avl = attr.avl; 722 723 // A segment is normally unusable when the selector is zero. There 724 // is a attr.unusable flag in gem5, but it seems unused. qemu 725 // seems to set this to 0 all the time, so we just do the same and 726 // hope for the best. 727 kvm_seg.unusable = 0; 728} 729 730static inline void 731setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable, 732 const int index) 733{ 734 kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index)); 735 kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index)); 736} 737 738static void 739forceSegAccessed(struct kvm_segment &seg) 740{ 741 // Intel's VMX requires that (some) usable segments are flagged as 742 // 'accessed' (i.e., the lowest bit in the segment type is set) 743 // when entering VMX. This wouldn't necessary be the case even if 744 // gem5 did set the access bits correctly, so we force it to one 745 // in that case. 746 if (!seg.unusable) 747 seg.type |= SEG_TYPE_BIT_ACCESSED; 748} 749 750void 751X86KvmCPU::updateKvmStateSRegs() 752{ 753 struct kvm_sregs sregs; 754 755#define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg) 756#define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx) 757#define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx) 758 759 FOREACH_SREG(); 760 FOREACH_SEGMENT(); 761 FOREACH_DTABLE(); 762 763#undef APPLY_SREG 764#undef APPLY_SEGMENT 765#undef APPLY_DTABLE 766 767 // Clear the interrupt bitmap 768 memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); 769 770 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed 771 // bit in the type field set. 772 forceSegAccessed(sregs.cs); 773 forceSegAccessed(sregs.ss); 774 forceSegAccessed(sregs.ds); 775 forceSegAccessed(sregs.es); 776 forceSegAccessed(sregs.fs); 777 forceSegAccessed(sregs.gs); 778 779 // There are currently some cases where the active task isn't 780 // marked as busy. This is illegal in VMX, so we force it to busy. 781 if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) { 782 hack("tr.type (%i) is not busy. Forcing the busy bit.\n", 783 sregs.tr.type); 784 sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY; 785 } 786 787 // VMX requires the DPL of SS and CS to be the same for 788 // non-conforming code segments. It seems like m5 doesn't set the 789 // DPL of SS correctly when taking interrupts, so we need to fix 790 // that here. 791 if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED || 792 sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) && 793 sregs.cs.dpl != sregs.ss.dpl) { 794 795 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n", 796 sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl); 797 sregs.ss.dpl = sregs.cs.dpl; 798 } 799 800 // Do checks after fixing up the state to avoid getting excessive 801 // amounts of warnings. 802 RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS)); 803 if (!rflags_nocc.vm) { 804 // Do segment verification if the CPU isn't entering virtual 805 // 8086 mode. We currently assume that unrestricted guest 806 // mode is available. 807 808#define APPLY_SEGMENT(kreg, idx) \ 809 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs) 810 811 FOREACH_SEGMENT(); 812#undef APPLY_SEGMENT 813 } 814 815 setSpecialRegisters(sregs); 816} 817 818template <typename T> 819static void 820updateKvmStateFPUCommon(ThreadContext *tc, T &fpu) 821{ 822 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 823 "Unexpected size of X86ISA::FloatRegBits"); 824 825 fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR); 826 fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW); 827 // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read 828 // with effects. 829 fpu.fsw = tc->readMiscReg(MISCREG_FSW); 830 831 uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW)); 832 fpu.ftwx = X86ISA::convX87TagsToXTags(ftw); 833 834 fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP); 835 836 const unsigned top((fpu.fsw >> 11) & 0x7); 837 for (int i = 0; i < 8; ++i) { 838 const unsigned reg_idx((i + top) & 0x7); 839 const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx))); 840 DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n", 841 reg_idx, i, value); 842 X86ISA::storeFloat80(fpu.fpr[i], value); 843 } 844 845 // TODO: We should update the MMX state 846 847 for (int i = 0; i < 16; ++i) { 848 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] = 849 tc->readFloatRegBits(FLOATREG_XMM_LOW(i)); 850 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] = 851 tc->readFloatRegBits(FLOATREG_XMM_HIGH(i)); 852 } 853} 854 855void 856X86KvmCPU::updateKvmStateFPULegacy() 857{ 858 struct kvm_fpu fpu; 859 860 // There is some padding in the FP registers, so we'd better zero 861 // the whole struct. 862 memset(&fpu, 0, sizeof(fpu)); 863 864 updateKvmStateFPUCommon(tc, fpu); 865 866 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 867 warn_once("MISCREG_FISEG is non-zero.\n"); 868 869 fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 870 871 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 872 warn_once("MISCREG_FOSEG is non-zero.\n"); 873 874 fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 875 876 setFPUState(fpu); 877} 878 879void 880X86KvmCPU::updateKvmStateFPUXSave() 881{ 882 struct kvm_xsave kxsave; 883 FXSave &xsave(*(FXSave *)kxsave.region); 884 885 // There is some padding and reserved fields in the structure, so 886 // we'd better zero the whole thing. 887 memset(&kxsave, 0, sizeof(kxsave)); 888 889 updateKvmStateFPUCommon(tc, xsave); 890 891 if (tc->readMiscRegNoEffect(MISCREG_FISEG)) 892 warn_once("MISCREG_FISEG is non-zero.\n"); 893 894 xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); 895 896 if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) 897 warn_once("MISCREG_FOSEG is non-zero.\n"); 898 899 xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); 900 901 setXSave(kxsave); 902} 903 904void 905X86KvmCPU::updateKvmStateFPU() 906{ 907 if (useXSave) 908 updateKvmStateFPUXSave(); 909 else 910 updateKvmStateFPULegacy(); 911} 912 913void 914X86KvmCPU::updateKvmStateMSRs() 915{ 916 KvmMSRVector msrs; 917 918 const Kvm::MSRIndexVector &indices(getMsrIntersection()); 919 920 for (auto it = indices.cbegin(); it != indices.cend(); ++it) { 921 struct kvm_msr_entry e; 922 923 e.index = *it; 924 e.reserved = 0; 925 e.data = tc->readMiscReg(msrMap.at(*it)); 926 DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n", 927 e.index, e.data); 928 929 msrs.push_back(e); 930 } 931 932 setMSRs(msrs); 933} 934 935void 936X86KvmCPU::updateThreadContext() 937{ 938 struct kvm_regs regs; 939 struct kvm_sregs sregs; 940 941 getRegisters(regs); 942 getSpecialRegisters(sregs); 943 944 DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n"); 945 if (DTRACE(KvmContext)) 946 dump(); 947 948 updateThreadContextRegs(regs, sregs); 949 updateThreadContextSRegs(sregs); 950 if (useXSave) { 951 struct kvm_xsave xsave; 952 getXSave(xsave); 953 954 updateThreadContextXSave(xsave); 955 } else { 956 struct kvm_fpu fpu; 957 getFPUState(fpu); 958 959 updateThreadContextFPU(fpu); 960 } 961 updateThreadContextMSRs(); 962 963 // The M5 misc reg caches some values from other 964 // registers. Writing to it with side effects causes it to be 965 // updated from its source registers. 966 tc->setMiscReg(MISCREG_M5_REG, 0); 967} 968 969void 970X86KvmCPU::updateThreadContextRegs(const struct kvm_regs ®s, 971 const struct kvm_sregs &sregs) 972{ 973#define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg) 974 975 FOREACH_IREG(); 976 977#undef APPLY_IREG 978 979 tc->pcState(PCState(regs.rip + sregs.cs.base)); 980 981 // Flags are spread out across multiple semi-magic registers so we 982 // need some special care when updating them. 983 X86ISA::setRFlags(tc, regs.rflags); 984} 985 986 987inline void 988setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg, 989 const int index) 990{ 991 SegAttr attr(0); 992 993 attr.type = kvm_seg.type; 994 attr.present = kvm_seg.present; 995 attr.dpl = kvm_seg.dpl; 996 attr.defaultSize = kvm_seg.db; 997 attr.system = kvm_seg.s; 998 attr.longMode = kvm_seg.l; 999 attr.granularity = kvm_seg.g; 1000 attr.avl = kvm_seg.avl; 1001 attr.unusable = kvm_seg.unusable; 1002 1003 // We need some setMiscReg magic here to keep the effective base 1004 // addresses in sync. We need an up-to-date version of EFER, so 1005 // make sure this is called after the sregs have been synced. 1006 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base); 1007 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit); 1008 tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector); 1009 tc->setMiscReg(MISCREG_SEG_ATTR(index), attr); 1010} 1011 1012inline void 1013setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable, 1014 const int index) 1015{ 1016 // We need some setMiscReg magic here to keep the effective base 1017 // addresses in sync. We need an up-to-date version of EFER, so 1018 // make sure this is called after the sregs have been synced. 1019 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base); 1020 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit); 1021} 1022 1023void 1024X86KvmCPU::updateThreadContextSRegs(const struct kvm_sregs &sregs) 1025{ 1026 assert(getKvmRunState()->apic_base == sregs.apic_base); 1027 assert(getKvmRunState()->cr8 == sregs.cr8); 1028 1029#define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg) 1030#define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1031#define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx) 1032 FOREACH_SREG(); 1033 FOREACH_SEGMENT(); 1034 FOREACH_DTABLE(); 1035#undef APPLY_SREG 1036#undef APPLY_SEGMENT 1037#undef APPLY_DTABLE 1038} 1039 1040template<typename T> 1041static void 1042updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu) 1043{ 1044 const unsigned top((fpu.fsw >> 11) & 0x7); 1045 1046 static_assert(sizeof(X86ISA::FloatRegBits) == 8, 1047 "Unexpected size of X86ISA::FloatRegBits"); 1048 1049 for (int i = 0; i < 8; ++i) { 1050 const unsigned reg_idx((i + top) & 0x7); 1051 const double value(X86ISA::loadFloat80(fpu.fpr[i])); 1052 DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n", 1053 reg_idx, i, value); 1054 tc->setFloatReg(FLOATREG_FPR(reg_idx), value); 1055 } 1056 1057 // TODO: We should update the MMX state 1058 1059 tc->setMiscRegNoEffect(MISCREG_X87_TOP, top); 1060 tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr); 1061 tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw); 1062 tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw); 1063 1064 uint64_t ftw(convX87XTagsToTags(fpu.ftwx)); 1065 // TODO: Are these registers really the same? 1066 tc->setMiscRegNoEffect(MISCREG_FTW, ftw); 1067 tc->setMiscRegNoEffect(MISCREG_FTAG, ftw); 1068 1069 tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode); 1070 1071 for (int i = 0; i < 16; ++i) { 1072 tc->setFloatRegBits(FLOATREG_XMM_LOW(i), 1073 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]); 1074 tc->setFloatRegBits(FLOATREG_XMM_HIGH(i), 1075 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]); 1076 } 1077} 1078 1079void 1080X86KvmCPU::updateThreadContextFPU(const struct kvm_fpu &fpu) 1081{ 1082 updateThreadContextFPUCommon(tc, fpu); 1083 1084 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1085 tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip); 1086 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1087 tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp); 1088} 1089 1090void 1091X86KvmCPU::updateThreadContextXSave(const struct kvm_xsave &kxsave) 1092{ 1093 const FXSave &xsave(*(const FXSave *)kxsave.region); 1094 1095 updateThreadContextFPUCommon(tc, xsave); 1096 1097 tc->setMiscRegNoEffect(MISCREG_FISEG, 0); 1098 tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip); 1099 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); 1100 tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp); 1101} 1102 1103void 1104X86KvmCPU::updateThreadContextMSRs() 1105{ 1106 const Kvm::MSRIndexVector &msrs(getMsrIntersection()); 1107 1108 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1109 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1110 struct kvm_msr_entry *entry; 1111 1112 // Create a list of MSRs to read 1113 kvm_msrs->nmsrs = msrs.size(); 1114 entry = &kvm_msrs->entries[0]; 1115 for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) { 1116 entry->index = *it; 1117 entry->reserved = 0; 1118 entry->data = 0; 1119 } 1120 1121 getMSRs(*kvm_msrs.get()); 1122 1123 // Update M5's state 1124 entry = &kvm_msrs->entries[0]; 1125 for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) { 1126 DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n", 1127 entry->index, entry->data); 1128 1129 tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data); 1130 } 1131} 1132 1133void 1134X86KvmCPU::deliverInterrupts() 1135{ 1136 Fault fault; 1137 1138 syncThreadContext(); 1139 1140 { 1141 // Migrate to the interrupt controller's thread to get the 1142 // interrupt. Even though the individual methods are safe to 1143 // call across threads, we might still lose interrupts unless 1144 // they are getInterrupt() and updateIntrInfo() are called 1145 // atomically. 1146 EventQueue::ScopedMigration migrate(interrupts[0]->eventQueue()); 1147 fault = interrupts[0]->getInterrupt(tc); 1148 interrupts[0]->updateIntrInfo(tc); 1149 } 1150 1151 X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get())); 1152 if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) { 1153 DPRINTF(KvmInt, "Delivering NMI\n"); 1154 kvmNonMaskableInterrupt(); 1155 } else if (dynamic_cast<InitInterrupt *>(fault.get())) { 1156 DPRINTF(KvmInt, "INIT interrupt\n"); 1157 fault.get()->invoke(tc); 1158 // Delay the kvm state update since we won't enter KVM on this 1159 // tick. 1160 threadContextDirty = true; 1161 // HACK: gem5 doesn't actually have any BIOS code, which means 1162 // that we need to halt the thread and wait for a startup 1163 // interrupt before restarting the thread. The simulated CPUs 1164 // use the same kind of hack using a microcode routine. 1165 thread->suspend(); 1166 } else if (dynamic_cast<StartupInterrupt *>(fault.get())) { 1167 DPRINTF(KvmInt, "STARTUP interrupt\n"); 1168 fault.get()->invoke(tc); 1169 // The kvm state is assumed to have been updated when entering 1170 // kvmRun(), so we need to update manually it here. 1171 updateKvmState(); 1172 } else if (x86int) { 1173 struct kvm_interrupt kvm_int; 1174 kvm_int.irq = x86int->getVector(); 1175 1176 DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n", 1177 fault->name(), kvm_int.irq); 1178 1179 kvmInterrupt(kvm_int); 1180 } else { 1181 panic("KVM: Unknown interrupt type\n"); 1182 } 1183 1184} 1185 1186Tick 1187X86KvmCPU::kvmRun(Tick ticks) 1188{ 1189 struct kvm_run &kvm_run(*getKvmRunState()); 1190 1191 if (interrupts[0]->checkInterruptsRaw()) { 1192 if (interrupts[0]->hasPendingUnmaskable()) { 1193 DPRINTF(KvmInt, 1194 "Delivering unmaskable interrupt.\n"); 1195 syncThreadContext(); 1196 deliverInterrupts(); 1197 } else if (kvm_run.ready_for_interrupt_injection) { 1198 // KVM claims that it is ready for an interrupt. It might 1199 // be lying if we just updated rflags and disabled 1200 // interrupts (e.g., by doing a CPU handover). Let's sync 1201 // the thread context and check if there are /really/ 1202 // interrupts that should be delivered now. 1203 syncThreadContext(); 1204 if (interrupts[0]->checkInterrupts(tc)) { 1205 DPRINTF(KvmInt, 1206 "M5 has pending interrupts, delivering interrupt.\n"); 1207 1208 deliverInterrupts(); 1209 } else { 1210 DPRINTF(KvmInt, 1211 "Interrupt delivery delayed due to KVM confusion.\n"); 1212 kvm_run.request_interrupt_window = 1; 1213 } 1214 } else if (!kvm_run.request_interrupt_window) { 1215 DPRINTF(KvmInt, 1216 "M5 has pending interrupts, requesting interrupt " 1217 "window.\n"); 1218 kvm_run.request_interrupt_window = 1; 1219 } 1220 } else { 1221 kvm_run.request_interrupt_window = 0; 1222 } 1223 1224 // The CPU might have been suspended as a result of the INIT 1225 // interrupt delivery hack. In that case, don't enter into KVM. 1226 if (_status == Idle) 1227 return 0; 1228 else 1229 return kvmRunWrapper(ticks); 1230} 1231 1232Tick 1233X86KvmCPU::kvmRunDrain() 1234{ 1235 struct kvm_run &kvm_run(*getKvmRunState()); 1236 1237 if (!archIsDrained()) { 1238 DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n"); 1239 1240 // Tell KVM to find a suitable place to deliver interrupts. This 1241 // should ensure that pending interrupts have been delivered and 1242 // things are reasonably consistent (i.e., no interrupts pending 1243 // in the guest). 1244 kvm_run.request_interrupt_window = 1; 1245 1246 // Limit the run to 1 millisecond. That is hopefully enough to 1247 // reach an interrupt window. Otherwise, we'll just try again 1248 // later. 1249 return kvmRunWrapper(1 * SimClock::Float::ms); 1250 } else { 1251 DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n"); 1252 1253 return kvmRunWrapper(0); 1254 } 1255} 1256 1257Tick 1258X86KvmCPU::kvmRunWrapper(Tick ticks) 1259{ 1260 struct kvm_run &kvm_run(*getKvmRunState()); 1261 1262 // Synchronize the APIC base and CR8 here since they are present 1263 // in the kvm_run struct, which makes the synchronization really 1264 // cheap. 1265 kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE); 1266 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1267 1268 const Tick run_ticks(BaseKvmCPU::kvmRun(ticks)); 1269 1270 tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base); 1271 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8); 1272 1273 return run_ticks; 1274} 1275 1276uint64_t 1277X86KvmCPU::getHostCycles() const 1278{ 1279 return getMSR(MSR_TSC); 1280} 1281 1282void 1283X86KvmCPU::handleIOMiscReg32(int miscreg) 1284{ 1285 struct kvm_run &kvm_run(*getKvmRunState()); 1286 const uint16_t port(kvm_run.io.port); 1287 1288 assert(kvm_run.exit_reason == KVM_EXIT_IO); 1289 1290 if (kvm_run.io.size != 4) { 1291 panic("Unexpected IO size (%u) for address 0x%x.\n", 1292 kvm_run.io.size, port); 1293 } 1294 1295 if (kvm_run.io.count != 1) { 1296 panic("Unexpected IO count (%u) for address 0x%x.\n", 1297 kvm_run.io.count, port); 1298 } 1299 1300 uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset)); 1301 if (kvm_run.io.direction == KVM_EXIT_IO_OUT) 1302 tc->setMiscReg(miscreg, *data); 1303 else 1304 *data = tc->readMiscRegNoEffect(miscreg); 1305} 1306 1307Tick 1308X86KvmCPU::handleKvmExitIO() 1309{ 1310 struct kvm_run &kvm_run(*getKvmRunState()); 1311 bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT); 1312 unsigned char *guestData(getGuestData(kvm_run.io.data_offset)); 1313 Tick delay(0); 1314 uint16_t port(kvm_run.io.port); 1315 Addr pAddr; 1316 const int count(kvm_run.io.count); 1317 1318 assert(kvm_run.io.direction == KVM_EXIT_IO_IN || 1319 kvm_run.io.direction == KVM_EXIT_IO_OUT); 1320 1321 DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n", 1322 (isWrite ? "out" : "in"), kvm_run.io.port); 1323 1324 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we 1325 * don't use the TLB component, we need to intercept and handle 1326 * the PCI configuration space IO ports here. 1327 * 1328 * The IO port PCI discovery mechanism uses one address register 1329 * and one data register. We map the address register to a misc 1330 * reg and use that to re-route data register accesses to the 1331 * right location in the PCI configuration space. 1332 */ 1333 if (port == IO_PCI_CONF_ADDR) { 1334 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS); 1335 return 0; 1336 } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) { 1337 Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS)); 1338 if (pciConfigAddr & 0x80000000) { 1339 pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) | 1340 (port & 0x3)); 1341 } else { 1342 pAddr = X86ISA::x86IOAddress(port); 1343 } 1344 } else { 1345 pAddr = X86ISA::x86IOAddress(port); 1346 } 1347 1348 const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq); 1349 // Temporarily lock and migrate to the event queue of the 1350 // VM. This queue is assumed to "own" all devices we need to 1351 // access if running in multi-core mode. 1352 EventQueue::ScopedMigration migrate(vm.eventQueue()); 1353 for (int i = 0; i < count; ++i) { 1354 RequestPtr io_req = new Request(pAddr, kvm_run.io.size, 1355 Request::UNCACHEABLE, dataMasterId()); 1356 io_req->setContext(tc->contextId()); 1357 1358 PacketPtr pkt = new Packet(io_req, cmd); 1359 1360 pkt->dataStatic(guestData); 1361 delay += dataPort.submitIO(pkt); 1362 1363 guestData += kvm_run.io.size; 1364 } 1365 1366 return delay; 1367} 1368 1369Tick 1370X86KvmCPU::handleKvmExitIRQWindowOpen() 1371{ 1372 // We don't need to do anything here since this is caught the next 1373 // time we execute kvmRun(). We still overload the exit event to 1374 // silence the warning about an unhandled exit event. 1375 return 0; 1376} 1377 1378bool 1379X86KvmCPU::archIsDrained() const 1380{ 1381 struct kvm_vcpu_events events; 1382 1383 getVCpuEvents(events); 1384 1385 // We could probably handle this in a by re-inserting interrupts 1386 // that are pending into gem5 on a drain. However, that would 1387 // probably be tricky to do reliably, so we'll just prevent a 1388 // drain if there is anything pending in the 1389 // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code 1390 // executed in the guest by requesting an interrupt window if 1391 // there are pending interrupts. 1392 const bool pending_events(events.exception.injected || 1393 events.interrupt.injected || 1394 events.nmi.injected || events.nmi.pending); 1395 1396 if (pending_events) { 1397 DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n", 1398 events.exception.injected ? "exception" : "", 1399 events.interrupt.injected ? "interrupt" : "", 1400 events.nmi.injected ? "nmi[i]" : "", 1401 events.nmi.pending ? "nmi[p]" : ""); 1402 } 1403 1404 return !pending_events; 1405} 1406 1407static struct kvm_cpuid_entry2 1408makeKvmCpuid(uint32_t function, uint32_t index, 1409 CpuidResult &result) 1410{ 1411 struct kvm_cpuid_entry2 e; 1412 e.function = function; 1413 e.index = index; 1414 e.flags = 0; 1415 e.eax = (uint32_t)result.rax; 1416 e.ebx = (uint32_t)result.rbx; 1417 e.ecx = (uint32_t)result.rcx; 1418 e.edx = (uint32_t)result.rdx; 1419 1420 return e; 1421} 1422 1423void 1424X86KvmCPU::updateCPUID() 1425{ 1426 Kvm::CPUIDVector m5_supported; 1427 1428 /* TODO: We currently don't support any of the functions that 1429 * iterate through data structures in the CPU using an index. It's 1430 * currently not a problem since M5 doesn't expose any of them at 1431 * the moment. 1432 */ 1433 1434 /* Basic features */ 1435 CpuidResult func0; 1436 X86ISA::doCpuid(tc, 0x0, 0, func0); 1437 for (uint32_t function = 0; function <= func0.rax; ++function) { 1438 CpuidResult cpuid; 1439 uint32_t idx(0); 1440 1441 X86ISA::doCpuid(tc, function, idx, cpuid); 1442 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); 1443 } 1444 1445 /* Extended features */ 1446 CpuidResult efunc0; 1447 X86ISA::doCpuid(tc, 0x80000000, 0, efunc0); 1448 for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) { 1449 CpuidResult cpuid; 1450 uint32_t idx(0); 1451 1452 X86ISA::doCpuid(tc, function, idx, cpuid); 1453 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); 1454 } 1455 1456 setCPUID(m5_supported); 1457} 1458 1459void 1460X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid) 1461{ 1462 if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1) 1463 panic("KVM: Failed to set guest CPUID2 (errno: %i)\n", 1464 errno); 1465} 1466 1467void 1468X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid) 1469{ 1470 std::unique_ptr<struct kvm_cpuid2> kvm_cpuid( 1471 newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size())); 1472 1473 kvm_cpuid->nent = cpuid.size(); 1474 std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries); 1475 1476 setCPUID(*kvm_cpuid); 1477} 1478 1479void 1480X86KvmCPU::setMSRs(const struct kvm_msrs &msrs) 1481{ 1482 if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1) 1483 panic("KVM: Failed to set guest MSRs (errno: %i)\n", 1484 errno); 1485} 1486 1487void 1488X86KvmCPU::setMSRs(const KvmMSRVector &msrs) 1489{ 1490 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1491 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size())); 1492 1493 kvm_msrs->nmsrs = msrs.size(); 1494 std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries); 1495 1496 setMSRs(*kvm_msrs); 1497} 1498 1499void 1500X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const 1501{ 1502 if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1) 1503 panic("KVM: Failed to get guest MSRs (errno: %i)\n", 1504 errno); 1505} 1506 1507 1508void 1509X86KvmCPU::setMSR(uint32_t index, uint64_t value) 1510{ 1511 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1512 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1)); 1513 struct kvm_msr_entry &entry(kvm_msrs->entries[0]); 1514 1515 kvm_msrs->nmsrs = 1; 1516 entry.index = index; 1517 entry.reserved = 0; 1518 entry.data = value; 1519 1520 setMSRs(*kvm_msrs.get()); 1521} 1522 1523uint64_t 1524X86KvmCPU::getMSR(uint32_t index) const 1525{ 1526 std::unique_ptr<struct kvm_msrs> kvm_msrs( 1527 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1)); 1528 struct kvm_msr_entry &entry(kvm_msrs->entries[0]); 1529 1530 kvm_msrs->nmsrs = 1; 1531 entry.index = index; 1532 entry.reserved = 0; 1533 entry.data = 0; 1534 1535 getMSRs(*kvm_msrs.get()); 1536 return entry.data; 1537} 1538 1539const Kvm::MSRIndexVector & 1540X86KvmCPU::getMsrIntersection() const 1541{ 1542 if (cachedMsrIntersection.empty()) { 1543 const Kvm::MSRIndexVector &kvm_msrs(vm.kvm->getSupportedMSRs()); 1544 1545 DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n"); 1546 for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) { 1547 if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) { 1548 cachedMsrIntersection.push_back(*it); 1549 DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it); 1550 } else { 1551 warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n", 1552 *it); 1553 } 1554 } 1555 } 1556 1557 return cachedMsrIntersection; 1558} 1559 1560void 1561X86KvmCPU::getDebugRegisters(struct kvm_debugregs ®s) const 1562{ 1563#ifdef KVM_GET_DEBUGREGS 1564 if (ioctl(KVM_GET_DEBUGREGS, ®s) == -1) 1565 panic("KVM: Failed to get guest debug registers\n"); 1566#else 1567 panic("KVM: Unsupported getDebugRegisters call.\n"); 1568#endif 1569} 1570 1571void 1572X86KvmCPU::setDebugRegisters(const struct kvm_debugregs ®s) 1573{ 1574#ifdef KVM_SET_DEBUGREGS 1575 if (ioctl(KVM_SET_DEBUGREGS, (void *)®s) == -1) 1576 panic("KVM: Failed to set guest debug registers\n"); 1577#else 1578 panic("KVM: Unsupported setDebugRegisters call.\n"); 1579#endif 1580} 1581 1582void 1583X86KvmCPU::getXCRs(struct kvm_xcrs ®s) const 1584{ 1585 if (ioctl(KVM_GET_XCRS, ®s) == -1) 1586 panic("KVM: Failed to get guest debug registers\n"); 1587} 1588 1589void 1590X86KvmCPU::setXCRs(const struct kvm_xcrs ®s) 1591{ 1592 if (ioctl(KVM_SET_XCRS, (void *)®s) == -1) 1593 panic("KVM: Failed to set guest debug registers\n"); 1594} 1595 1596void 1597X86KvmCPU::getXSave(struct kvm_xsave &xsave) const 1598{ 1599 if (ioctl(KVM_GET_XSAVE, &xsave) == -1) 1600 panic("KVM: Failed to get guest debug registers\n"); 1601} 1602 1603void 1604X86KvmCPU::setXSave(const struct kvm_xsave &xsave) 1605{ 1606 if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1) 1607 panic("KVM: Failed to set guest debug registers\n"); 1608} 1609 1610 1611void 1612X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const 1613{ 1614 if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1) 1615 panic("KVM: Failed to get guest debug registers\n"); 1616} 1617 1618void 1619X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events) 1620{ 1621 if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1) 1622 panic("KVM: Failed to set guest debug registers\n"); 1623} 1624 1625X86KvmCPU * 1626X86KvmCPUParams::create() 1627{ 1628 return new X86KvmCPU(this); 1629}
|