base.cc revision 12085
1/* 2 * Copyright (c) 2012, 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Andreas Sandberg 38 */ 39 40#include "cpu/kvm/base.hh" 41 42#include <linux/kvm.h> 43#include <sys/ioctl.h> 44#include <sys/mman.h> 45#include <unistd.h> 46 47#include <cerrno> 48#include <csignal> 49#include <ostream> 50 51#include "arch/mmapped_ipr.hh" 52#include "arch/utility.hh" 53#include "debug/Checkpoint.hh" 54#include "debug/Drain.hh" 55#include "debug/Kvm.hh" 56#include "debug/KvmIO.hh" 57#include "debug/KvmRun.hh" 58#include "params/BaseKvmCPU.hh" 59#include "sim/process.hh" 60#include "sim/system.hh" 61 62/* Used by some KVM macros */ 63#define PAGE_SIZE pageSize 64 65BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params) 66 : BaseCPU(params), 67 vm(*params->system->getKvmVM()), 68 _status(Idle), 69 dataPort(name() + ".dcache_port", this), 70 instPort(name() + ".icache_port", this), 71 alwaysSyncTC(params->alwaysSyncTC), 72 threadContextDirty(true), 73 kvmStateDirty(false), 74 vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0), 75 _kvmRun(NULL), mmioRing(NULL), 76 pageSize(sysconf(_SC_PAGE_SIZE)), 77 tickEvent(*this), 78 activeInstPeriod(0), 79 perfControlledByTimer(params->usePerfOverflow), 80 hostFactor(params->hostFactor), 81 ctrInsts(0) 82{ 83 if (pageSize == -1) 84 panic("KVM: Failed to determine host page size (%i)\n", 85 errno); 86 87 if (FullSystem) 88 thread = new SimpleThread(this, 0, params->system, params->itb, params->dtb, 89 params->isa[0]); 90 else 91 thread = new SimpleThread(this, /* thread_num */ 0, params->system, 92 params->workload[0], params->itb, 93 params->dtb, params->isa[0]); 94 95 thread->setStatus(ThreadContext::Halted); 96 tc = thread->getTC(); 97 threadContexts.push_back(tc); 98} 99 100BaseKvmCPU::~BaseKvmCPU() 101{ 102 if (_kvmRun) 103 munmap(_kvmRun, vcpuMMapSize); 104 close(vcpuFD); 105} 106 107void 108BaseKvmCPU::init() 109{ 110 BaseCPU::init(); 111 112 if (numThreads != 1) 113 fatal("KVM: Multithreading not supported"); 114 115 tc->initMemProxies(tc); 116 117 // initialize CPU, including PC 118 if (FullSystem && !switchedOut()) 119 TheISA::initCPU(tc, tc->contextId()); 120} 121 122void 123BaseKvmCPU::startup() 124{ 125 const BaseKvmCPUParams * const p( 126 dynamic_cast<const BaseKvmCPUParams *>(params())); 127 128 Kvm &kvm(*vm.kvm); 129 130 BaseCPU::startup(); 131 132 assert(vcpuFD == -1); 133 134 // Tell the VM that a CPU is about to start. 135 vm.cpuStartup(); 136 137 // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are 138 // not guaranteed that the parent KVM VM has initialized at that 139 // point. Initialize virtual CPUs here instead. 140 vcpuFD = vm.createVCPU(vcpuID); 141 142 // Map the KVM run structure */ 143 vcpuMMapSize = kvm.getVCPUMMapSize(); 144 _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize, 145 PROT_READ | PROT_WRITE, MAP_SHARED, 146 vcpuFD, 0); 147 if (_kvmRun == MAP_FAILED) 148 panic("KVM: Failed to map run data structure\n"); 149 150 // Setup a pointer to the MMIO ring buffer if coalesced MMIO is 151 // available. The offset into the KVM's communication page is 152 // provided by the coalesced MMIO capability. 153 int mmioOffset(kvm.capCoalescedMMIO()); 154 if (!p->useCoalescedMMIO) { 155 inform("KVM: Coalesced MMIO disabled by config.\n"); 156 } else if (mmioOffset) { 157 inform("KVM: Coalesced IO available\n"); 158 mmioRing = (struct kvm_coalesced_mmio_ring *)( 159 (char *)_kvmRun + (mmioOffset * pageSize)); 160 } else { 161 inform("KVM: Coalesced not supported by host OS\n"); 162 } 163 164 thread->startup(); 165 166 Event *startupEvent( 167 new EventFunctionWrapper([this]{ startupThread(); }, name(), true)); 168 schedule(startupEvent, curTick()); 169} 170 171BaseKvmCPU::Status 172BaseKvmCPU::KVMCpuPort::nextIOState() const 173{ 174 return (activeMMIOReqs || pendingMMIOPkts.size()) 175 ? RunningMMIOPending : RunningServiceCompletion; 176} 177 178Tick 179BaseKvmCPU::KVMCpuPort::submitIO(PacketPtr pkt) 180{ 181 if (cpu->system->isAtomicMode()) { 182 Tick delay = sendAtomic(pkt); 183 delete pkt->req; 184 delete pkt; 185 return delay; 186 } else { 187 if (pendingMMIOPkts.empty() && sendTimingReq(pkt)) { 188 activeMMIOReqs++; 189 } else { 190 pendingMMIOPkts.push(pkt); 191 } 192 // Return value is irrelevant for timing-mode accesses. 193 return 0; 194 } 195} 196 197bool 198BaseKvmCPU::KVMCpuPort::recvTimingResp(PacketPtr pkt) 199{ 200 DPRINTF(KvmIO, "KVM: Finished timing request\n"); 201 202 delete pkt->req; 203 delete pkt; 204 activeMMIOReqs--; 205 206 // We can switch back into KVM when all pending and in-flight MMIO 207 // operations have completed. 208 if (!(activeMMIOReqs || pendingMMIOPkts.size())) { 209 DPRINTF(KvmIO, "KVM: Finished all outstanding timing requests\n"); 210 cpu->finishMMIOPending(); 211 } 212 return true; 213} 214 215void 216BaseKvmCPU::KVMCpuPort::recvReqRetry() 217{ 218 DPRINTF(KvmIO, "KVM: Retry for timing request\n"); 219 220 assert(pendingMMIOPkts.size()); 221 222 // Assuming that we can issue infinite requests this cycle is a bit 223 // unrealistic, but it's not worth modeling something more complex in 224 // KVM. 225 while (pendingMMIOPkts.size() && sendTimingReq(pendingMMIOPkts.front())) { 226 pendingMMIOPkts.pop(); 227 activeMMIOReqs++; 228 } 229} 230 231void 232BaseKvmCPU::finishMMIOPending() 233{ 234 assert(_status = RunningMMIOPending); 235 assert(!tickEvent.scheduled()); 236 237 _status = RunningServiceCompletion; 238 schedule(tickEvent, nextCycle()); 239} 240 241void 242BaseKvmCPU::startupThread() 243{ 244 // Do thread-specific initialization. We need to setup signal 245 // delivery for counters and timers from within the thread that 246 // will execute the event queue to ensure that signals are 247 // delivered to the right threads. 248 const BaseKvmCPUParams * const p( 249 dynamic_cast<const BaseKvmCPUParams *>(params())); 250 251 vcpuThread = pthread_self(); 252 253 // Setup signal handlers. This has to be done after the vCPU is 254 // created since it manipulates the vCPU signal mask. 255 setupSignalHandler(); 256 257 setupCounters(); 258 259 if (p->usePerfOverflow) 260 runTimer.reset(new PerfKvmTimer(hwCycles, 261 KVM_KICK_SIGNAL, 262 p->hostFactor, 263 p->hostFreq)); 264 else 265 runTimer.reset(new PosixKvmTimer(KVM_KICK_SIGNAL, CLOCK_MONOTONIC, 266 p->hostFactor, 267 p->hostFreq)); 268 269} 270 271void 272BaseKvmCPU::regStats() 273{ 274 using namespace Stats; 275 276 BaseCPU::regStats(); 277 278 numInsts 279 .name(name() + ".committedInsts") 280 .desc("Number of instructions committed") 281 ; 282 283 numVMExits 284 .name(name() + ".numVMExits") 285 .desc("total number of KVM exits") 286 ; 287 288 numVMHalfEntries 289 .name(name() + ".numVMHalfEntries") 290 .desc("number of KVM entries to finalize pending operations") 291 ; 292 293 numExitSignal 294 .name(name() + ".numExitSignal") 295 .desc("exits due to signal delivery") 296 ; 297 298 numMMIO 299 .name(name() + ".numMMIO") 300 .desc("number of VM exits due to memory mapped IO") 301 ; 302 303 numCoalescedMMIO 304 .name(name() + ".numCoalescedMMIO") 305 .desc("number of coalesced memory mapped IO requests") 306 ; 307 308 numIO 309 .name(name() + ".numIO") 310 .desc("number of VM exits due to legacy IO") 311 ; 312 313 numHalt 314 .name(name() + ".numHalt") 315 .desc("number of VM exits due to wait for interrupt instructions") 316 ; 317 318 numInterrupts 319 .name(name() + ".numInterrupts") 320 .desc("number of interrupts delivered") 321 ; 322 323 numHypercalls 324 .name(name() + ".numHypercalls") 325 .desc("number of hypercalls") 326 ; 327} 328 329void 330BaseKvmCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const 331{ 332 if (DTRACE(Checkpoint)) { 333 DPRINTF(Checkpoint, "KVM: Serializing thread %i:\n", tid); 334 dump(); 335 } 336 337 assert(tid == 0); 338 assert(_status == Idle); 339 thread->serialize(cp); 340} 341 342void 343BaseKvmCPU::unserializeThread(CheckpointIn &cp, ThreadID tid) 344{ 345 DPRINTF(Checkpoint, "KVM: Unserialize thread %i:\n", tid); 346 347 assert(tid == 0); 348 assert(_status == Idle); 349 thread->unserialize(cp); 350 threadContextDirty = true; 351} 352 353DrainState 354BaseKvmCPU::drain() 355{ 356 if (switchedOut()) 357 return DrainState::Drained; 358 359 DPRINTF(Drain, "BaseKvmCPU::drain\n"); 360 switch (_status) { 361 case Running: 362 // The base KVM code is normally ready when it is in the 363 // Running state, but the architecture specific code might be 364 // of a different opinion. This may happen when the CPU been 365 // notified of an event that hasn't been accepted by the vCPU 366 // yet. 367 if (!archIsDrained()) 368 return DrainState::Draining; 369 370 // The state of the CPU is consistent, so we don't need to do 371 // anything special to drain it. We simply de-schedule the 372 // tick event and enter the Idle state to prevent nasty things 373 // like MMIOs from happening. 374 if (tickEvent.scheduled()) 375 deschedule(tickEvent); 376 _status = Idle; 377 378 /** FALLTHROUGH */ 379 case Idle: 380 // Idle, no need to drain 381 assert(!tickEvent.scheduled()); 382 383 // Sync the thread context here since we'll need it when we 384 // switch CPUs or checkpoint the CPU. 385 syncThreadContext(); 386 387 return DrainState::Drained; 388 389 case RunningServiceCompletion: 390 // The CPU has just requested a service that was handled in 391 // the RunningService state, but the results have still not 392 // been reported to the CPU. Now, we /could/ probably just 393 // update the register state ourselves instead of letting KVM 394 // handle it, but that would be tricky. Instead, we enter KVM 395 // and let it do its stuff. 396 DPRINTF(Drain, "KVM CPU is waiting for service completion, " 397 "requesting drain.\n"); 398 return DrainState::Draining; 399 400 case RunningMMIOPending: 401 // We need to drain since there are in-flight timing accesses 402 DPRINTF(Drain, "KVM CPU is waiting for timing accesses to complete, " 403 "requesting drain.\n"); 404 return DrainState::Draining; 405 406 case RunningService: 407 // We need to drain since the CPU is waiting for service (e.g., MMIOs) 408 DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n"); 409 return DrainState::Draining; 410 411 default: 412 panic("KVM: Unhandled CPU state in drain()\n"); 413 return DrainState::Drained; 414 } 415} 416 417void 418BaseKvmCPU::drainResume() 419{ 420 assert(!tickEvent.scheduled()); 421 422 // We might have been switched out. In that case, we don't need to 423 // do anything. 424 if (switchedOut()) 425 return; 426 427 DPRINTF(Kvm, "drainResume\n"); 428 verifyMemoryMode(); 429 430 // The tick event is de-scheduled as a part of the draining 431 // process. Re-schedule it if the thread context is active. 432 if (tc->status() == ThreadContext::Active) { 433 schedule(tickEvent, nextCycle()); 434 _status = Running; 435 } else { 436 _status = Idle; 437 } 438} 439 440void 441BaseKvmCPU::notifyFork() 442{ 443 // We should have drained prior to forking, which means that the 444 // tick event shouldn't be scheduled and the CPU is idle. 445 assert(!tickEvent.scheduled()); 446 assert(_status == Idle); 447 448 if (vcpuFD != -1) { 449 if (close(vcpuFD) == -1) 450 warn("kvm CPU: notifyFork failed to close vcpuFD\n"); 451 452 if (_kvmRun) 453 munmap(_kvmRun, vcpuMMapSize); 454 455 vcpuFD = -1; 456 _kvmRun = NULL; 457 458 hwInstructions.detach(); 459 hwCycles.detach(); 460 } 461} 462 463void 464BaseKvmCPU::switchOut() 465{ 466 DPRINTF(Kvm, "switchOut\n"); 467 468 BaseCPU::switchOut(); 469 470 // We should have drained prior to executing a switchOut, which 471 // means that the tick event shouldn't be scheduled and the CPU is 472 // idle. 473 assert(!tickEvent.scheduled()); 474 assert(_status == Idle); 475} 476 477void 478BaseKvmCPU::takeOverFrom(BaseCPU *cpu) 479{ 480 DPRINTF(Kvm, "takeOverFrom\n"); 481 482 BaseCPU::takeOverFrom(cpu); 483 484 // We should have drained prior to executing a switchOut, which 485 // means that the tick event shouldn't be scheduled and the CPU is 486 // idle. 487 assert(!tickEvent.scheduled()); 488 assert(_status == Idle); 489 assert(threadContexts.size() == 1); 490 491 // Force an update of the KVM state here instead of flagging the 492 // TC as dirty. This is not ideal from a performance point of 493 // view, but it makes debugging easier as it allows meaningful KVM 494 // state to be dumped before and after a takeover. 495 updateKvmState(); 496 threadContextDirty = false; 497} 498 499void 500BaseKvmCPU::verifyMemoryMode() const 501{ 502 if (!(system->bypassCaches())) { 503 fatal("The KVM-based CPUs requires the memory system to be in the " 504 "'noncaching' mode.\n"); 505 } 506} 507 508void 509BaseKvmCPU::wakeup(ThreadID tid) 510{ 511 DPRINTF(Kvm, "wakeup()\n"); 512 // This method might have been called from another 513 // context. Migrate to this SimObject's event queue when 514 // delivering the wakeup signal. 515 EventQueue::ScopedMigration migrate(eventQueue()); 516 517 // Kick the vCPU to get it to come out of KVM. 518 kick(); 519 520 if (thread->status() != ThreadContext::Suspended) 521 return; 522 523 thread->activate(); 524} 525 526void 527BaseKvmCPU::activateContext(ThreadID thread_num) 528{ 529 DPRINTF(Kvm, "ActivateContext %d\n", thread_num); 530 531 assert(thread_num == 0); 532 assert(thread); 533 534 assert(_status == Idle); 535 assert(!tickEvent.scheduled()); 536 537 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend); 538 539 schedule(tickEvent, clockEdge(Cycles(0))); 540 _status = Running; 541} 542 543 544void 545BaseKvmCPU::suspendContext(ThreadID thread_num) 546{ 547 DPRINTF(Kvm, "SuspendContext %d\n", thread_num); 548 549 assert(thread_num == 0); 550 assert(thread); 551 552 if (_status == Idle) 553 return; 554 555 assert(_status == Running || _status == RunningServiceCompletion); 556 557 // The tick event may no be scheduled if the quest has requested 558 // the monitor to wait for interrupts. The normal CPU models can 559 // get their tick events descheduled by quiesce instructions, but 560 // that can't happen here. 561 if (tickEvent.scheduled()) 562 deschedule(tickEvent); 563 564 _status = Idle; 565} 566 567void 568BaseKvmCPU::deallocateContext(ThreadID thread_num) 569{ 570 // for now, these are equivalent 571 suspendContext(thread_num); 572} 573 574void 575BaseKvmCPU::haltContext(ThreadID thread_num) 576{ 577 // for now, these are equivalent 578 suspendContext(thread_num); 579} 580 581ThreadContext * 582BaseKvmCPU::getContext(int tn) 583{ 584 assert(tn == 0); 585 syncThreadContext(); 586 return tc; 587} 588 589 590Counter 591BaseKvmCPU::totalInsts() const 592{ 593 return ctrInsts; 594} 595 596Counter 597BaseKvmCPU::totalOps() const 598{ 599 hack_once("Pretending totalOps is equivalent to totalInsts()\n"); 600 return ctrInsts; 601} 602 603void 604BaseKvmCPU::dump() const 605{ 606 inform("State dumping not implemented."); 607} 608 609void 610BaseKvmCPU::tick() 611{ 612 Tick delay(0); 613 assert(_status != Idle && _status != RunningMMIOPending); 614 615 switch (_status) { 616 case RunningService: 617 // handleKvmExit() will determine the next state of the CPU 618 delay = handleKvmExit(); 619 620 if (tryDrain()) 621 _status = Idle; 622 break; 623 624 case RunningServiceCompletion: 625 case Running: { 626 const uint64_t nextInstEvent( 627 !comInstEventQueue[0]->empty() ? 628 comInstEventQueue[0]->nextTick() : UINT64_MAX); 629 // Enter into KVM and complete pending IO instructions if we 630 // have an instruction event pending. 631 const Tick ticksToExecute( 632 nextInstEvent > ctrInsts ? 633 curEventQueue()->nextTick() - curTick() : 0); 634 635 if (alwaysSyncTC) 636 threadContextDirty = true; 637 638 // We might need to update the KVM state. 639 syncKvmState(); 640 641 // Setup any pending instruction count breakpoints using 642 // PerfEvent if we are going to execute more than just an IO 643 // completion. 644 if (ticksToExecute > 0) 645 setupInstStop(); 646 647 DPRINTF(KvmRun, "Entering KVM...\n"); 648 if (drainState() == DrainState::Draining) { 649 // Force an immediate exit from KVM after completing 650 // pending operations. The architecture-specific code 651 // takes care to run until it is in a state where it can 652 // safely be drained. 653 delay = kvmRunDrain(); 654 } else { 655 delay = kvmRun(ticksToExecute); 656 } 657 658 // The CPU might have been suspended before entering into 659 // KVM. Assume that the CPU was suspended /before/ entering 660 // into KVM and skip the exit handling. 661 if (_status == Idle) 662 break; 663 664 // Entering into KVM implies that we'll have to reload the thread 665 // context from KVM if we want to access it. Flag the KVM state as 666 // dirty with respect to the cached thread context. 667 kvmStateDirty = true; 668 669 if (alwaysSyncTC) 670 syncThreadContext(); 671 672 // Enter into the RunningService state unless the 673 // simulation was stopped by a timer. 674 if (_kvmRun->exit_reason != KVM_EXIT_INTR) { 675 _status = RunningService; 676 } else { 677 ++numExitSignal; 678 _status = Running; 679 } 680 681 // Service any pending instruction events. The vCPU should 682 // have exited in time for the event using the instruction 683 // counter configured by setupInstStop(). 684 comInstEventQueue[0]->serviceEvents(ctrInsts); 685 system->instEventQueue.serviceEvents(system->totalNumInsts); 686 687 if (tryDrain()) 688 _status = Idle; 689 } break; 690 691 default: 692 panic("BaseKvmCPU entered tick() in an illegal state (%i)\n", 693 _status); 694 } 695 696 // Schedule a new tick if we are still running 697 if (_status != Idle && _status != RunningMMIOPending) 698 schedule(tickEvent, clockEdge(ticksToCycles(delay))); 699} 700 701Tick 702BaseKvmCPU::kvmRunDrain() 703{ 704 // By default, the only thing we need to drain is a pending IO 705 // operation which assumes that we are in the 706 // RunningServiceCompletion or RunningMMIOPending state. 707 assert(_status == RunningServiceCompletion || 708 _status == RunningMMIOPending); 709 710 // Deliver the data from the pending IO operation and immediately 711 // exit. 712 return kvmRun(0); 713} 714 715uint64_t 716BaseKvmCPU::getHostCycles() const 717{ 718 return hwCycles.read(); 719} 720 721Tick 722BaseKvmCPU::kvmRun(Tick ticks) 723{ 724 Tick ticksExecuted; 725 fatal_if(vcpuFD == -1, 726 "Trying to run a KVM CPU in a forked child process. " 727 "This is not supported.\n"); 728 DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks); 729 730 if (ticks == 0) { 731 // Settings ticks == 0 is a special case which causes an entry 732 // into KVM that finishes pending operations (e.g., IO) and 733 // then immediately exits. 734 DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n"); 735 736 ++numVMHalfEntries; 737 738 // Send a KVM_KICK_SIGNAL to the vCPU thread (i.e., this 739 // thread). The KVM control signal is masked while executing 740 // in gem5 and gets unmasked temporarily as when entering 741 // KVM. See setSignalMask() and setupSignalHandler(). 742 kick(); 743 744 // Start the vCPU. KVM will check for signals after completing 745 // pending operations (IO). Since the KVM_KICK_SIGNAL is 746 // pending, this forces an immediate exit to gem5 again. We 747 // don't bother to setup timers since this shouldn't actually 748 // execute any code (other than completing half-executed IO 749 // instructions) in the guest. 750 ioctlRun(); 751 752 // We always execute at least one cycle to prevent the 753 // BaseKvmCPU::tick() to be rescheduled on the same tick 754 // twice. 755 ticksExecuted = clockPeriod(); 756 } else { 757 // This method is executed as a result of a tick event. That 758 // means that the event queue will be locked when entering the 759 // method. We temporarily unlock the event queue to allow 760 // other threads to steal control of this thread to inject 761 // interrupts. They will typically lock the queue and then 762 // force an exit from KVM by kicking the vCPU. 763 EventQueue::ScopedRelease release(curEventQueue()); 764 765 if (ticks < runTimer->resolution()) { 766 DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n", 767 ticks, runTimer->resolution()); 768 ticks = runTimer->resolution(); 769 } 770 771 // Get hardware statistics after synchronizing contexts. The KVM 772 // state update might affect guest cycle counters. 773 uint64_t baseCycles(getHostCycles()); 774 uint64_t baseInstrs(hwInstructions.read()); 775 776 // Arm the run timer and start the cycle timer if it isn't 777 // controlled by the overflow timer. Starting/stopping the cycle 778 // timer automatically starts the other perf timers as they are in 779 // the same counter group. 780 runTimer->arm(ticks); 781 if (!perfControlledByTimer) 782 hwCycles.start(); 783 784 ioctlRun(); 785 786 runTimer->disarm(); 787 if (!perfControlledByTimer) 788 hwCycles.stop(); 789 790 // The control signal may have been delivered after we exited 791 // from KVM. It will be pending in that case since it is 792 // masked when we aren't executing in KVM. Discard it to make 793 // sure we don't deliver it immediately next time we try to 794 // enter into KVM. 795 discardPendingSignal(KVM_KICK_SIGNAL); 796 797 const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles); 798 const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor); 799 const uint64_t instsExecuted(hwInstructions.read() - baseInstrs); 800 ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted); 801 802 /* Update statistics */ 803 numCycles += simCyclesExecuted;; 804 numInsts += instsExecuted; 805 ctrInsts += instsExecuted; 806 system->totalNumInsts += instsExecuted; 807 808 DPRINTF(KvmRun, 809 "KVM: Executed %i instructions in %i cycles " 810 "(%i ticks, sim cycles: %i).\n", 811 instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted); 812 } 813 814 ++numVMExits; 815 816 return ticksExecuted + flushCoalescedMMIO(); 817} 818 819void 820BaseKvmCPU::kvmNonMaskableInterrupt() 821{ 822 ++numInterrupts; 823 if (ioctl(KVM_NMI) == -1) 824 panic("KVM: Failed to deliver NMI to virtual CPU\n"); 825} 826 827void 828BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt) 829{ 830 ++numInterrupts; 831 if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1) 832 panic("KVM: Failed to deliver interrupt to virtual CPU\n"); 833} 834 835void 836BaseKvmCPU::getRegisters(struct kvm_regs ®s) const 837{ 838 if (ioctl(KVM_GET_REGS, ®s) == -1) 839 panic("KVM: Failed to get guest registers\n"); 840} 841 842void 843BaseKvmCPU::setRegisters(const struct kvm_regs ®s) 844{ 845 if (ioctl(KVM_SET_REGS, (void *)®s) == -1) 846 panic("KVM: Failed to set guest registers\n"); 847} 848 849void 850BaseKvmCPU::getSpecialRegisters(struct kvm_sregs ®s) const 851{ 852 if (ioctl(KVM_GET_SREGS, ®s) == -1) 853 panic("KVM: Failed to get guest special registers\n"); 854} 855 856void 857BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs ®s) 858{ 859 if (ioctl(KVM_SET_SREGS, (void *)®s) == -1) 860 panic("KVM: Failed to set guest special registers\n"); 861} 862 863void 864BaseKvmCPU::getFPUState(struct kvm_fpu &state) const 865{ 866 if (ioctl(KVM_GET_FPU, &state) == -1) 867 panic("KVM: Failed to get guest FPU state\n"); 868} 869 870void 871BaseKvmCPU::setFPUState(const struct kvm_fpu &state) 872{ 873 if (ioctl(KVM_SET_FPU, (void *)&state) == -1) 874 panic("KVM: Failed to set guest FPU state\n"); 875} 876 877 878void 879BaseKvmCPU::setOneReg(uint64_t id, const void *addr) 880{ 881#ifdef KVM_SET_ONE_REG 882 struct kvm_one_reg reg; 883 reg.id = id; 884 reg.addr = (uint64_t)addr; 885 886 if (ioctl(KVM_SET_ONE_REG, ®) == -1) { 887 panic("KVM: Failed to set register (0x%x) value (errno: %i)\n", 888 id, errno); 889 } 890#else 891 panic("KVM_SET_ONE_REG is unsupported on this platform.\n"); 892#endif 893} 894 895void 896BaseKvmCPU::getOneReg(uint64_t id, void *addr) const 897{ 898#ifdef KVM_GET_ONE_REG 899 struct kvm_one_reg reg; 900 reg.id = id; 901 reg.addr = (uint64_t)addr; 902 903 if (ioctl(KVM_GET_ONE_REG, ®) == -1) { 904 panic("KVM: Failed to get register (0x%x) value (errno: %i)\n", 905 id, errno); 906 } 907#else 908 panic("KVM_GET_ONE_REG is unsupported on this platform.\n"); 909#endif 910} 911 912std::string 913BaseKvmCPU::getAndFormatOneReg(uint64_t id) const 914{ 915#ifdef KVM_GET_ONE_REG 916 std::ostringstream ss; 917 918 ss.setf(std::ios::hex, std::ios::basefield); 919 ss.setf(std::ios::showbase); 920#define HANDLE_INTTYPE(len) \ 921 case KVM_REG_SIZE_U ## len: { \ 922 uint ## len ## _t value; \ 923 getOneReg(id, &value); \ 924 ss << value; \ 925 } break 926 927#define HANDLE_ARRAY(len) \ 928 case KVM_REG_SIZE_U ## len: { \ 929 uint8_t value[len / 8]; \ 930 getOneReg(id, value); \ 931 ccprintf(ss, "[0x%x", value[0]); \ 932 for (int i = 1; i < len / 8; ++i) \ 933 ccprintf(ss, ", 0x%x", value[i]); \ 934 ccprintf(ss, "]"); \ 935 } break 936 937 switch (id & KVM_REG_SIZE_MASK) { 938 HANDLE_INTTYPE(8); 939 HANDLE_INTTYPE(16); 940 HANDLE_INTTYPE(32); 941 HANDLE_INTTYPE(64); 942 HANDLE_ARRAY(128); 943 HANDLE_ARRAY(256); 944 HANDLE_ARRAY(512); 945 HANDLE_ARRAY(1024); 946 default: 947 ss << "??"; 948 } 949 950#undef HANDLE_INTTYPE 951#undef HANDLE_ARRAY 952 953 return ss.str(); 954#else 955 panic("KVM_GET_ONE_REG is unsupported on this platform.\n"); 956#endif 957} 958 959void 960BaseKvmCPU::syncThreadContext() 961{ 962 if (!kvmStateDirty) 963 return; 964 965 assert(!threadContextDirty); 966 967 updateThreadContext(); 968 kvmStateDirty = false; 969} 970 971void 972BaseKvmCPU::syncKvmState() 973{ 974 if (!threadContextDirty) 975 return; 976 977 assert(!kvmStateDirty); 978 979 updateKvmState(); 980 threadContextDirty = false; 981} 982 983Tick 984BaseKvmCPU::handleKvmExit() 985{ 986 DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason); 987 assert(_status == RunningService); 988 989 // Switch into the running state by default. Individual handlers 990 // can override this. 991 _status = Running; 992 switch (_kvmRun->exit_reason) { 993 case KVM_EXIT_UNKNOWN: 994 return handleKvmExitUnknown(); 995 996 case KVM_EXIT_EXCEPTION: 997 return handleKvmExitException(); 998 999 case KVM_EXIT_IO: 1000 { 1001 ++numIO; 1002 Tick ticks = handleKvmExitIO(); 1003 _status = dataPort.nextIOState(); 1004 return ticks; 1005 } 1006 1007 case KVM_EXIT_HYPERCALL: 1008 ++numHypercalls; 1009 return handleKvmExitHypercall(); 1010 1011 case KVM_EXIT_HLT: 1012 /* The guest has halted and is waiting for interrupts */ 1013 DPRINTF(Kvm, "handleKvmExitHalt\n"); 1014 ++numHalt; 1015 1016 // Suspend the thread until the next interrupt arrives 1017 thread->suspend(); 1018 1019 // This is actually ignored since the thread is suspended. 1020 return 0; 1021 1022 case KVM_EXIT_MMIO: 1023 { 1024 /* Service memory mapped IO requests */ 1025 DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n", 1026 _kvmRun->mmio.is_write, 1027 _kvmRun->mmio.phys_addr, _kvmRun->mmio.len); 1028 1029 ++numMMIO; 1030 Tick ticks = doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data, 1031 _kvmRun->mmio.len, _kvmRun->mmio.is_write); 1032 // doMMIOAccess could have triggered a suspend, in which case we don't 1033 // want to overwrite the _status. 1034 if (_status != Idle) 1035 _status = dataPort.nextIOState(); 1036 return ticks; 1037 } 1038 1039 case KVM_EXIT_IRQ_WINDOW_OPEN: 1040 return handleKvmExitIRQWindowOpen(); 1041 1042 case KVM_EXIT_FAIL_ENTRY: 1043 return handleKvmExitFailEntry(); 1044 1045 case KVM_EXIT_INTR: 1046 /* KVM was interrupted by a signal, restart it in the next 1047 * tick. */ 1048 return 0; 1049 1050 case KVM_EXIT_INTERNAL_ERROR: 1051 panic("KVM: Internal error (suberror: %u)\n", 1052 _kvmRun->internal.suberror); 1053 1054 default: 1055 dump(); 1056 panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason); 1057 } 1058} 1059 1060Tick 1061BaseKvmCPU::handleKvmExitIO() 1062{ 1063 panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n", 1064 _kvmRun->io.direction, _kvmRun->io.size, 1065 _kvmRun->io.port, _kvmRun->io.count); 1066} 1067 1068Tick 1069BaseKvmCPU::handleKvmExitHypercall() 1070{ 1071 panic("KVM: Unhandled hypercall\n"); 1072} 1073 1074Tick 1075BaseKvmCPU::handleKvmExitIRQWindowOpen() 1076{ 1077 warn("KVM: Unhandled IRQ window.\n"); 1078 return 0; 1079} 1080 1081 1082Tick 1083BaseKvmCPU::handleKvmExitUnknown() 1084{ 1085 dump(); 1086 panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n", 1087 _kvmRun->hw.hardware_exit_reason); 1088} 1089 1090Tick 1091BaseKvmCPU::handleKvmExitException() 1092{ 1093 dump(); 1094 panic("KVM: Got exception when starting vCPU " 1095 "(exception: %u, error_code: %u)\n", 1096 _kvmRun->ex.exception, _kvmRun->ex.error_code); 1097} 1098 1099Tick 1100BaseKvmCPU::handleKvmExitFailEntry() 1101{ 1102 dump(); 1103 panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n", 1104 _kvmRun->fail_entry.hardware_entry_failure_reason); 1105} 1106 1107Tick 1108BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write) 1109{ 1110 ThreadContext *tc(thread->getTC()); 1111 syncThreadContext(); 1112 1113 RequestPtr mmio_req = new Request(paddr, size, Request::UNCACHEABLE, 1114 dataMasterId()); 1115 mmio_req->setContext(tc->contextId()); 1116 // Some architectures do need to massage physical addresses a bit 1117 // before they are inserted into the memory system. This enables 1118 // APIC accesses on x86 and m5ops where supported through a MMIO 1119 // interface. 1120 BaseTLB::Mode tlb_mode(write ? BaseTLB::Write : BaseTLB::Read); 1121 Fault fault(tc->getDTBPtr()->finalizePhysical(mmio_req, tc, tlb_mode)); 1122 if (fault != NoFault) 1123 warn("Finalization of MMIO address failed: %s\n", fault->name()); 1124 1125 1126 const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq); 1127 PacketPtr pkt = new Packet(mmio_req, cmd); 1128 pkt->dataStatic(data); 1129 1130 if (mmio_req->isMmappedIpr()) { 1131 // We currently assume that there is no need to migrate to a 1132 // different event queue when doing IPRs. Currently, IPRs are 1133 // only used for m5ops, so it should be a valid assumption. 1134 const Cycles ipr_delay(write ? 1135 TheISA::handleIprWrite(tc, pkt) : 1136 TheISA::handleIprRead(tc, pkt)); 1137 threadContextDirty = true; 1138 delete pkt->req; 1139 delete pkt; 1140 return clockPeriod() * ipr_delay; 1141 } else { 1142 // Temporarily lock and migrate to the event queue of the 1143 // VM. This queue is assumed to "own" all devices we need to 1144 // access if running in multi-core mode. 1145 EventQueue::ScopedMigration migrate(vm.eventQueue()); 1146 1147 return dataPort.submitIO(pkt); 1148 } 1149} 1150 1151void 1152BaseKvmCPU::setSignalMask(const sigset_t *mask) 1153{ 1154 std::unique_ptr<struct kvm_signal_mask> kvm_mask; 1155 1156 if (mask) { 1157 kvm_mask.reset((struct kvm_signal_mask *)operator new( 1158 sizeof(struct kvm_signal_mask) + sizeof(*mask))); 1159 // The kernel and the user-space headers have different ideas 1160 // about the size of sigset_t. This seems like a massive hack, 1161 // but is actually what qemu does. 1162 assert(sizeof(*mask) >= 8); 1163 kvm_mask->len = 8; 1164 memcpy(kvm_mask->sigset, mask, kvm_mask->len); 1165 } 1166 1167 if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1) 1168 panic("KVM: Failed to set vCPU signal mask (errno: %i)\n", 1169 errno); 1170} 1171 1172int 1173BaseKvmCPU::ioctl(int request, long p1) const 1174{ 1175 if (vcpuFD == -1) 1176 panic("KVM: CPU ioctl called before initialization\n"); 1177 1178 return ::ioctl(vcpuFD, request, p1); 1179} 1180 1181Tick 1182BaseKvmCPU::flushCoalescedMMIO() 1183{ 1184 if (!mmioRing) 1185 return 0; 1186 1187 DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n"); 1188 1189 // TODO: We might need to do synchronization when we start to 1190 // support multiple CPUs 1191 Tick ticks(0); 1192 while (mmioRing->first != mmioRing->last) { 1193 struct kvm_coalesced_mmio &ent( 1194 mmioRing->coalesced_mmio[mmioRing->first]); 1195 1196 DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n", 1197 ent.phys_addr, ent.len); 1198 1199 ++numCoalescedMMIO; 1200 ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true); 1201 1202 mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX; 1203 } 1204 1205 return ticks; 1206} 1207 1208/** 1209 * Dummy handler for KVM kick signals. 1210 * 1211 * @note This function is usually not called since the kernel doesn't 1212 * seem to deliver signals when the signal is only unmasked when 1213 * running in KVM. This doesn't matter though since we are only 1214 * interested in getting KVM to exit, which happens as expected. See 1215 * setupSignalHandler() and kvmRun() for details about KVM signal 1216 * handling. 1217 */ 1218static void 1219onKickSignal(int signo, siginfo_t *si, void *data) 1220{ 1221} 1222 1223void 1224BaseKvmCPU::setupSignalHandler() 1225{ 1226 struct sigaction sa; 1227 1228 memset(&sa, 0, sizeof(sa)); 1229 sa.sa_sigaction = onKickSignal; 1230 sa.sa_flags = SA_SIGINFO | SA_RESTART; 1231 if (sigaction(KVM_KICK_SIGNAL, &sa, NULL) == -1) 1232 panic("KVM: Failed to setup vCPU timer signal handler\n"); 1233 1234 sigset_t sigset; 1235 if (pthread_sigmask(SIG_BLOCK, NULL, &sigset) == -1) 1236 panic("KVM: Failed get signal mask\n"); 1237 1238 // Request KVM to setup the same signal mask as we're currently 1239 // running with except for the KVM control signal. We'll sometimes 1240 // need to raise the KVM_KICK_SIGNAL to cause immediate exits from 1241 // KVM after servicing IO requests. See kvmRun(). 1242 sigdelset(&sigset, KVM_KICK_SIGNAL); 1243 setSignalMask(&sigset); 1244 1245 // Mask our control signals so they aren't delivered unless we're 1246 // actually executing inside KVM. 1247 sigaddset(&sigset, KVM_KICK_SIGNAL); 1248 if (pthread_sigmask(SIG_SETMASK, &sigset, NULL) == -1) 1249 panic("KVM: Failed mask the KVM control signals\n"); 1250} 1251 1252bool 1253BaseKvmCPU::discardPendingSignal(int signum) const 1254{ 1255 int discardedSignal; 1256 1257 // Setting the timeout to zero causes sigtimedwait to return 1258 // immediately. 1259 struct timespec timeout; 1260 timeout.tv_sec = 0; 1261 timeout.tv_nsec = 0; 1262 1263 sigset_t sigset; 1264 sigemptyset(&sigset); 1265 sigaddset(&sigset, signum); 1266 1267 do { 1268 discardedSignal = sigtimedwait(&sigset, NULL, &timeout); 1269 } while (discardedSignal == -1 && errno == EINTR); 1270 1271 if (discardedSignal == signum) 1272 return true; 1273 else if (discardedSignal == -1 && errno == EAGAIN) 1274 return false; 1275 else 1276 panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n", 1277 discardedSignal, errno); 1278} 1279 1280void 1281BaseKvmCPU::setupCounters() 1282{ 1283 DPRINTF(Kvm, "Attaching cycle counter...\n"); 1284 PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE, 1285 PERF_COUNT_HW_CPU_CYCLES); 1286 cfgCycles.disabled(true) 1287 .pinned(true); 1288 1289 // Try to exclude the host. We set both exclude_hv and 1290 // exclude_host since different architectures use slightly 1291 // different APIs in the kernel. 1292 cfgCycles.exclude_hv(true) 1293 .exclude_host(true); 1294 1295 if (perfControlledByTimer) { 1296 // We need to configure the cycles counter to send overflows 1297 // since we are going to use it to trigger timer signals that 1298 // trap back into m5 from KVM. In practice, this means that we 1299 // need to set some non-zero sample period that gets 1300 // overridden when the timer is armed. 1301 cfgCycles.wakeupEvents(1) 1302 .samplePeriod(42); 1303 } 1304 1305 hwCycles.attach(cfgCycles, 1306 0); // TID (0 => currentThread) 1307 1308 setupInstCounter(); 1309} 1310 1311bool 1312BaseKvmCPU::tryDrain() 1313{ 1314 if (drainState() != DrainState::Draining) 1315 return false; 1316 1317 if (!archIsDrained()) { 1318 DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n"); 1319 return false; 1320 } 1321 1322 if (_status == Idle || _status == Running) { 1323 DPRINTF(Drain, 1324 "tryDrain: CPU transitioned into the Idle state, drain done\n"); 1325 signalDrainDone(); 1326 return true; 1327 } else { 1328 DPRINTF(Drain, "tryDrain: CPU not ready.\n"); 1329 return false; 1330 } 1331} 1332 1333void 1334BaseKvmCPU::ioctlRun() 1335{ 1336 if (ioctl(KVM_RUN) == -1) { 1337 if (errno != EINTR) 1338 panic("KVM: Failed to start virtual CPU (errno: %i)\n", 1339 errno); 1340 } 1341} 1342 1343void 1344BaseKvmCPU::setupInstStop() 1345{ 1346 if (comInstEventQueue[0]->empty()) { 1347 setupInstCounter(0); 1348 } else { 1349 const uint64_t next(comInstEventQueue[0]->nextTick()); 1350 1351 assert(next > ctrInsts); 1352 setupInstCounter(next - ctrInsts); 1353 } 1354} 1355 1356void 1357BaseKvmCPU::setupInstCounter(uint64_t period) 1358{ 1359 // No need to do anything if we aren't attaching for the first 1360 // time or the period isn't changing. 1361 if (period == activeInstPeriod && hwInstructions.attached()) 1362 return; 1363 1364 PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE, 1365 PERF_COUNT_HW_INSTRUCTIONS); 1366 1367 // Try to exclude the host. We set both exclude_hv and 1368 // exclude_host since different architectures use slightly 1369 // different APIs in the kernel. 1370 cfgInstructions.exclude_hv(true) 1371 .exclude_host(true); 1372 1373 if (period) { 1374 // Setup a sampling counter if that has been requested. 1375 cfgInstructions.wakeupEvents(1) 1376 .samplePeriod(period); 1377 } 1378 1379 // We need to detach and re-attach the counter to reliably change 1380 // sampling settings. See PerfKvmCounter::period() for details. 1381 if (hwInstructions.attached()) 1382 hwInstructions.detach(); 1383 assert(hwCycles.attached()); 1384 hwInstructions.attach(cfgInstructions, 1385 0, // TID (0 => currentThread) 1386 hwCycles); 1387 1388 if (period) 1389 hwInstructions.enableSignals(KVM_KICK_SIGNAL); 1390 1391 activeInstPeriod = period; 1392} 1393