51a52
> #include "debug/Drain.hh"
58a60,61
> #include <signal.h>
>
83a87
> drainManager(NULL),
97d100
< setupSignalHandler();
153a157,160
> // Setup signal handlers. This has to be done after the vCPU is
> // created since it manipulates the vCPU signal mask.
> setupSignalHandler();
>
235,237d241
< // Update the thread context so we have something to serialize.
< syncThreadContext();
<
261c265,276
< DPRINTF(Kvm, "drain\n");
---
> DPRINTF(Drain, "BaseKvmCPU::drain\n");
> switch (_status) {
> case Running:
> // The base KVM code is normally ready when it is in the
> // Running state, but the architecture specific code might be
> // of a different opinion. This may happen when the CPU been
> // notified of an event that hasn't been accepted by the vCPU
> // yet.
> if (!archIsDrained()) {
> drainManager = dm;
> return 1;
> }
263,266c278,284
< // De-schedule the tick event so we don't insert any more MMIOs
< // into the system while it is draining.
< if (tickEvent.scheduled())
< deschedule(tickEvent);
---
> // The state of the CPU is consistent, so we don't need to do
> // anything special to drain it. We simply de-schedule the
> // tick event and enter the Idle state to prevent nasty things
> // like MMIOs from happening.
> if (tickEvent.scheduled())
> deschedule(tickEvent);
> _status = Idle;
268,269c286,320
< _status = Idle;
< return 0;
---
> /** FALLTHROUGH */
> case Idle:
> // Idle, no need to drain
> assert(!tickEvent.scheduled());
>
> // Sync the thread context here since we'll need it when we
> // switch CPUs or checkpoint the CPU.
> syncThreadContext();
>
> return 0;
>
> case RunningServiceCompletion:
> // The CPU has just requested a service that was handled in
> // the RunningService state, but the results have still not
> // been reported to the CPU. Now, we /could/ probably just
> // update the register state ourselves instead of letting KVM
> // handle it, but that would be tricky. Instead, we enter KVM
> // and let it do its stuff.
> drainManager = dm;
>
> DPRINTF(Drain, "KVM CPU is waiting for service completion, "
> "requesting drain.\n");
> return 1;
>
> case RunningService:
> // We need to drain since the CPU is waiting for service (e.g., MMIOs)
> drainManager = dm;
>
> DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
> return 1;
>
> default:
> panic("KVM: Unhandled CPU state in drain()\n");
> return 0;
> }
300,303d350
< // Make sure to update the thread context in case, the new CPU
< // will need to access it.
< syncThreadContext();
<
327,329c374,379
< // The BaseCPU updated the thread context, make sure that we
< // synchronize next time we enter start the CPU.
< threadContextDirty = true;
---
> // Force an update of the KVM state here instead of flagging the
> // TC as dirty. This is not ideal from a performance point of
> // view, but it makes debugging easier as it allows meaningful KVM
> // state to be dumped before and after a takeover.
> updateKvmState();
> threadContextDirty = false;
439c489,490
< assert(_status == Running);
---
> Tick delay(0);
> assert(_status != Idle);
441c492,495
< DPRINTF(KvmRun, "Entering KVM...\n");
---
> switch (_status) {
> case RunningService:
> // handleKvmExit() will determine the next state of the CPU
> delay = handleKvmExit();
443,444c497,499
< Tick ticksToExecute(mainEventQueue.nextTick() - curTick());
< Tick ticksExecuted(kvmRun(ticksToExecute));
---
> if (tryDrain())
> _status = Idle;
> break;
446c501,503
< Tick delay(ticksExecuted + handleKvmExit());
---
> case RunningServiceCompletion:
> case Running: {
> Tick ticksToExecute(mainEventQueue.nextTick() - curTick());
448,451c505,506
< switch (_status) {
< case Running:
< schedule(tickEvent, clockEdge(ticksToCycles(delay)));
< break;
---
> // We might need to update the KVM state.
> syncKvmState();
452a508,534
> DPRINTF(KvmRun, "Entering KVM...\n");
> if (drainManager) {
> // Force an immediate exit from KVM after completing
> // pending operations. The architecture-specific code
> // takes care to run until it is in a state where it can
> // safely be drained.
> delay = kvmRunDrain();
> } else {
> delay = kvmRun(ticksToExecute);
> }
>
> // Entering into KVM implies that we'll have to reload the thread
> // context from KVM if we want to access it. Flag the KVM state as
> // dirty with respect to the cached thread context.
> kvmStateDirty = true;
>
> // Enter into the RunningService state unless the
> // simulation was stopped by a timer.
> if (_kvmRun->exit_reason != KVM_EXIT_INTR)
> _status = RunningService;
> else
> _status = Running;
>
> if (tryDrain())
> _status = Idle;
> } break;
>
454,456c536,537
< /* The CPU is halted or waiting for an interrupt from a
< * device. Don't start it. */
< break;
---
> panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
> _status);
457a539,542
>
> // Schedule a new tick if we are still running
> if (_status != Idle)
> schedule(tickEvent, clockEdge(ticksToCycles(delay)));
459a545,557
> Tick
> BaseKvmCPU::kvmRunDrain()
> {
> // By default, the only thing we need to drain is a pending IO
> // operation which assumes that we are in the
> // RunningServiceCompletion state.
> assert(_status == RunningServiceCompletion);
>
> // Deliver the data from the pending IO operation and immediately
> // exit.
> return kvmRun(0);
> }
>
469,481c567
< // We might need to update the KVM state.
< syncKvmState();
< // Entering into KVM implies that we'll have to reload the thread
< // context from KVM if we want to access it. Flag the KVM state as
< // dirty with respect to the cached thread context.
< kvmStateDirty = true;
<
< if (ticks < runTimer->resolution()) {
< DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
< ticks, runTimer->resolution());
< ticks = runTimer->resolution();
< }
<
---
> Tick ticksExecuted;
485,488c571,575
< // Get hardware statistics after synchronizing contexts. The KVM
< // state update might affect guest cycle counters.
< uint64_t baseCycles(getHostCycles());
< uint64_t baseInstrs(hwInstructions.read());
---
> if (ticks == 0) {
> // Settings ticks == 0 is a special case which causes an entry
> // into KVM that finishes pending operations (e.g., IO) and
> // then immediately exits.
> DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n");
490,496c577,580
< // Arm the run timer and start the cycle timer if it isn't
< // controlled by the overflow timer. Starting/stopping the cycle
< // timer automatically starts the other perf timers as they are in
< // the same counter group.
< runTimer->arm(ticks);
< if (!perfControlledByTimer)
< hwCycles.start();
---
> // This signal is always masked while we are executing in gem5
> // and gets unmasked temporarily as soon as we enter into
> // KVM. See setSignalMask() and setupSignalHandler().
> raise(KVM_TIMER_SIGNAL);
498,502c582,587
< if (ioctl(KVM_RUN) == -1) {
< if (errno != EINTR)
< panic("KVM: Failed to start virtual CPU (errno: %i)\n",
< errno);
< }
---
> // Enter into KVM. KVM will check for signals after completing
> // pending operations (IO). Since the KVM_TIMER_SIGNAL is
> // pending, this forces an immediate exit into gem5 again. We
> // don't bother to setup timers since this shouldn't actually
> // execute any code in the guest.
> ioctlRun();
504,506c589,598
< runTimer->disarm();
< if (!perfControlledByTimer)
< hwCycles.stop();
---
> // We always execute at least one cycle to prevent the
> // BaseKvmCPU::tick() to be rescheduled on the same tick
> // twice.
> ticksExecuted = clockPeriod();
> } else {
> if (ticks < runTimer->resolution()) {
> DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
> ticks, runTimer->resolution());
> ticks = runTimer->resolution();
> }
507a600,603
> // Get hardware statistics after synchronizing contexts. The KVM
> // state update might affect guest cycle counters.
> uint64_t baseCycles(getHostCycles());
> uint64_t baseInstrs(hwInstructions.read());
509,512c605,611
< const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
< const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
< const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
< const Tick ticksExecuted(runTimer->ticksFromHostCycles(hostCyclesExecuted));
---
> // Arm the run timer and start the cycle timer if it isn't
> // controlled by the overflow timer. Starting/stopping the cycle
> // timer automatically starts the other perf timers as they are in
> // the same counter group.
> runTimer->arm(ticks);
> if (!perfControlledByTimer)
> hwCycles.start();
514,519c613,648
< if (ticksExecuted < ticks &&
< timerOverflowed &&
< _kvmRun->exit_reason == KVM_EXIT_INTR) {
< // TODO: We should probably do something clever here...
< warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n",
< ticks, ticksExecuted);
---
> ioctlRun();
>
> runTimer->disarm();
> if (!perfControlledByTimer)
> hwCycles.stop();
>
> // The timer signal may have been delivered after we exited
> // from KVM. It will be pending in that case since it is
> // masked when we aren't executing in KVM. Discard it to make
> // sure we don't deliver it immediately next time we try to
> // enter into KVM.
> discardPendingSignal(KVM_TIMER_SIGNAL);
>
> const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
> const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
> const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
> ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
>
> if (ticksExecuted < ticks &&
> timerOverflowed &&
> _kvmRun->exit_reason == KVM_EXIT_INTR) {
> // TODO: We should probably do something clever here...
> warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n",
> ticks, ticksExecuted);
> }
>
> /* Update statistics */
> numCycles += simCyclesExecuted;;
> numInsts += instsExecuted;
> ctrInsts += instsExecuted;
> system->totalNumInsts += instsExecuted;
>
> DPRINTF(KvmRun,
> "KVM: Executed %i instructions in %i cycles "
> "(%i ticks, sim cycles: %i).\n",
> instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
522,523d650
< /* Update statistics */
< numCycles += simCyclesExecuted;;
525,527d651
< numInsts += instsExecuted;
< ctrInsts += instsExecuted;
< system->totalNumInsts += instsExecuted;
529,531d652
< DPRINTF(KvmRun, "KVM: Executed %i instructions in %i cycles (%i ticks, sim cycles: %i).\n",
< instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
<
702a824
> assert(_status == RunningService);
703a826,828
> // Switch into the running state by default. Individual handlers
> // can override this.
> _status = Running;
711a837
> _status = RunningServiceCompletion;
730a857
> _status = RunningServiceCompletion;
818a946,966
> void
> BaseKvmCPU::setSignalMask(const sigset_t *mask)
> {
> std::unique_ptr<struct kvm_signal_mask> kvm_mask;
>
> if (mask) {
> kvm_mask.reset((struct kvm_signal_mask *)operator new(
> sizeof(struct kvm_signal_mask) + sizeof(*mask)));
> // The kernel and the user-space headers have different ideas
> // about the size of sigset_t. This seems like a massive hack,
> // but is actually what qemu does.
> assert(sizeof(*mask) >= 8);
> kvm_mask->len = 8;
> memcpy(kvm_mask->sigset, mask, kvm_mask->len);
> }
>
> if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1)
> panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
> errno);
> }
>
864a1013,1028
>
> sigset_t sigset;
> if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1)
> panic("KVM: Failed get signal mask\n");
>
> // Request KVM to setup the same signal mask as we're currently
> // running with. We'll sometimes need to mask the KVM_TIMER_SIGNAL
> // to cause immediate exits from KVM after servicing IO
> // requests. See kvmRun().
> setSignalMask(&sigset);
>
> // Mask the KVM_TIMER_SIGNAL so it isn't delivered unless we're
> // actually executing inside KVM.
> sigaddset(&sigset, KVM_TIMER_SIGNAL);
> if (sigprocmask(SIG_SETMASK, &sigset, NULL) == -1)
> panic("KVM: Failed mask the KVM timer signal\n");
866a1031,1058
> bool
> BaseKvmCPU::discardPendingSignal(int signum) const
> {
> int discardedSignal;
>
> // Setting the timeout to zero causes sigtimedwait to return
> // immediately.
> struct timespec timeout;
> timeout.tv_sec = 0;
> timeout.tv_nsec = 0;
>
> sigset_t sigset;
> sigemptyset(&sigset);
> sigaddset(&sigset, signum);
>
> do {
> discardedSignal = sigtimedwait(&sigset, NULL, &timeout);
> } while (discardedSignal == -1 && errno == EINTR);
>
> if (discardedSignal == signum)
> return true;
> else if (discardedSignal == -1 && errno == EAGAIN)
> return false;
> else
> panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
> discardedSignal, errno);
> }
>
895a1088,1120
>
> bool
> BaseKvmCPU::tryDrain()
> {
> if (!drainManager)
> return false;
>
> if (!archIsDrained()) {
> DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n");
> return false;
> }
>
> if (_status == Idle || _status == Running) {
> DPRINTF(Drain,
> "tryDrain: CPU transitioned into the Idle state, drain done\n");
> drainManager->signalDrainDone();
> drainManager = NULL;
> return true;
> } else {
> DPRINTF(Drain, "tryDrain: CPU not ready.\n");
> return false;
> }
> }
>
> void
> BaseKvmCPU::ioctlRun()
> {
> if (ioctl(KVM_RUN) == -1) {
> if (errno != EINTR)
> panic("KVM: Failed to start virtual CPU (errno: %i)\n",
> errno);
> }
> }