base.cc revision 9755
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Sandberg
38 */
39
40#include <linux/kvm.h>
41#include <sys/ioctl.h>
42#include <sys/mman.h>
43#include <unistd.h>
44
45#include <cerrno>
46#include <csignal>
47#include <ostream>
48
49#include "arch/utility.hh"
50#include "cpu/kvm/base.hh"
51#include "debug/Checkpoint.hh"
52#include "debug/Drain.hh"
53#include "debug/Kvm.hh"
54#include "debug/KvmIO.hh"
55#include "debug/KvmRun.hh"
56#include "params/BaseKvmCPU.hh"
57#include "sim/process.hh"
58#include "sim/system.hh"
59
60#include <signal.h>
61
62/* Used by some KVM macros */
63#define PAGE_SIZE pageSize
64
65volatile bool timerOverflowed = false;
66
67static void
68onTimerOverflow(int signo, siginfo_t *si, void *data)
69{
70    timerOverflowed = true;
71}
72
73BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params)
74    : BaseCPU(params),
75      vm(*params->kvmVM),
76      _status(Idle),
77      dataPort(name() + ".dcache_port", this),
78      instPort(name() + ".icache_port", this),
79      threadContextDirty(true),
80      kvmStateDirty(false),
81      vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0),
82      _kvmRun(NULL), mmioRing(NULL),
83      pageSize(sysconf(_SC_PAGE_SIZE)),
84      tickEvent(*this),
85      perfControlledByTimer(params->usePerfOverflow),
86      hostFreq(params->hostFreq),
87      hostFactor(params->hostFactor),
88      drainManager(NULL),
89      ctrInsts(0)
90{
91    if (pageSize == -1)
92        panic("KVM: Failed to determine host page size (%i)\n",
93              errno);
94
95    thread = new SimpleThread(this, 0, params->system,
96                              params->itb, params->dtb, params->isa[0]);
97    thread->setStatus(ThreadContext::Halted);
98    tc = thread->getTC();
99    threadContexts.push_back(tc);
100
101    setupCounters();
102
103    if (params->usePerfOverflow)
104        runTimer.reset(new PerfKvmTimer(hwCycles,
105                                        KVM_TIMER_SIGNAL,
106                                        params->hostFactor,
107                                        params->hostFreq));
108    else
109        runTimer.reset(new PosixKvmTimer(KVM_TIMER_SIGNAL, CLOCK_MONOTONIC,
110                                         params->hostFactor,
111                                         params->hostFreq));
112}
113
114BaseKvmCPU::~BaseKvmCPU()
115{
116    if (_kvmRun)
117        munmap(_kvmRun, vcpuMMapSize);
118    close(vcpuFD);
119}
120
121void
122BaseKvmCPU::init()
123{
124    BaseCPU::init();
125
126    if (numThreads != 1)
127        fatal("KVM: Multithreading not supported");
128
129    tc->initMemProxies(tc);
130
131    // initialize CPU, including PC
132    if (FullSystem && !switchedOut())
133        TheISA::initCPU(tc, tc->contextId());
134
135    mmio_req.setThreadContext(tc->contextId(), 0);
136}
137
138void
139BaseKvmCPU::startup()
140{
141    const BaseKvmCPUParams * const p(
142        dynamic_cast<const BaseKvmCPUParams *>(params()));
143
144    Kvm &kvm(vm.kvm);
145
146    BaseCPU::startup();
147
148    assert(vcpuFD == -1);
149
150    // Tell the VM that a CPU is about to start.
151    vm.cpuStartup();
152
153    // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are
154    // not guaranteed that the parent KVM VM has initialized at that
155    // point. Initialize virtual CPUs here instead.
156    vcpuFD = vm.createVCPU(vcpuID);
157
158    // Setup signal handlers. This has to be done after the vCPU is
159    // created since it manipulates the vCPU signal mask.
160    setupSignalHandler();
161
162    // Map the KVM run structure */
163    vcpuMMapSize = kvm.getVCPUMMapSize();
164    _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize,
165                                     PROT_READ | PROT_WRITE, MAP_SHARED,
166                                     vcpuFD, 0);
167    if (_kvmRun == MAP_FAILED)
168        panic("KVM: Failed to map run data structure\n");
169
170    // Setup a pointer to the MMIO ring buffer if coalesced MMIO is
171    // available. The offset into the KVM's communication page is
172    // provided by the coalesced MMIO capability.
173    int mmioOffset(kvm.capCoalescedMMIO());
174    if (!p->useCoalescedMMIO) {
175        inform("KVM: Coalesced MMIO disabled by config.\n");
176    } else if (mmioOffset) {
177        inform("KVM: Coalesced IO available\n");
178        mmioRing = (struct kvm_coalesced_mmio_ring *)(
179            (char *)_kvmRun + (mmioOffset * pageSize));
180    } else {
181        inform("KVM: Coalesced not supported by host OS\n");
182    }
183
184    thread->startup();
185}
186
187void
188BaseKvmCPU::regStats()
189{
190    using namespace Stats;
191
192    BaseCPU::regStats();
193
194    numInsts
195        .name(name() + ".committedInsts")
196        .desc("Number of instructions committed")
197        ;
198
199    numVMExits
200        .name(name() + ".numVMExits")
201        .desc("total number of KVM exits")
202        ;
203
204    numVMHalfEntries
205        .name(name() + ".numVMHalfEntries")
206        .desc("number of KVM entries to finalize pending operations")
207        ;
208
209    numExitSignal
210        .name(name() + ".numExitSignal")
211        .desc("exits due to signal delivery")
212        ;
213
214    numMMIO
215        .name(name() + ".numMMIO")
216        .desc("number of VM exits due to memory mapped IO")
217        ;
218
219    numCoalescedMMIO
220        .name(name() + ".numCoalescedMMIO")
221        .desc("number of coalesced memory mapped IO requests")
222        ;
223
224    numIO
225        .name(name() + ".numIO")
226        .desc("number of VM exits due to legacy IO")
227        ;
228
229    numHalt
230        .name(name() + ".numHalt")
231        .desc("number of VM exits due to wait for interrupt instructions")
232        ;
233
234    numInterrupts
235        .name(name() + ".numInterrupts")
236        .desc("number of interrupts delivered")
237        ;
238
239    numHypercalls
240        .name(name() + ".numHypercalls")
241        .desc("number of hypercalls")
242        ;
243}
244
245void
246BaseKvmCPU::serializeThread(std::ostream &os, ThreadID tid)
247{
248    if (DTRACE(Checkpoint)) {
249        DPRINTF(Checkpoint, "KVM: Serializing thread %i:\n", tid);
250        dump();
251    }
252
253    assert(tid == 0);
254    assert(_status == Idle);
255    thread->serialize(os);
256}
257
258void
259BaseKvmCPU::unserializeThread(Checkpoint *cp, const std::string &section,
260                              ThreadID tid)
261{
262    DPRINTF(Checkpoint, "KVM: Unserialize thread %i:\n", tid);
263
264    assert(tid == 0);
265    assert(_status == Idle);
266    thread->unserialize(cp, section);
267    threadContextDirty = true;
268}
269
270unsigned int
271BaseKvmCPU::drain(DrainManager *dm)
272{
273    if (switchedOut())
274        return 0;
275
276    DPRINTF(Drain, "BaseKvmCPU::drain\n");
277    switch (_status) {
278      case Running:
279        // The base KVM code is normally ready when it is in the
280        // Running state, but the architecture specific code might be
281        // of a different opinion. This may happen when the CPU been
282        // notified of an event that hasn't been accepted by the vCPU
283        // yet.
284        if (!archIsDrained()) {
285            drainManager = dm;
286            return 1;
287        }
288
289        // The state of the CPU is consistent, so we don't need to do
290        // anything special to drain it. We simply de-schedule the
291        // tick event and enter the Idle state to prevent nasty things
292        // like MMIOs from happening.
293        if (tickEvent.scheduled())
294            deschedule(tickEvent);
295        _status = Idle;
296
297        /** FALLTHROUGH */
298      case Idle:
299        // Idle, no need to drain
300        assert(!tickEvent.scheduled());
301
302        // Sync the thread context here since we'll need it when we
303        // switch CPUs or checkpoint the CPU.
304        syncThreadContext();
305
306        return 0;
307
308      case RunningServiceCompletion:
309        // The CPU has just requested a service that was handled in
310        // the RunningService state, but the results have still not
311        // been reported to the CPU. Now, we /could/ probably just
312        // update the register state ourselves instead of letting KVM
313        // handle it, but that would be tricky. Instead, we enter KVM
314        // and let it do its stuff.
315        drainManager = dm;
316
317        DPRINTF(Drain, "KVM CPU is waiting for service completion, "
318                "requesting drain.\n");
319        return 1;
320
321      case RunningService:
322        // We need to drain since the CPU is waiting for service (e.g., MMIOs)
323        drainManager = dm;
324
325        DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
326        return 1;
327
328      default:
329        panic("KVM: Unhandled CPU state in drain()\n");
330        return 0;
331    }
332}
333
334void
335BaseKvmCPU::drainResume()
336{
337    assert(!tickEvent.scheduled());
338
339    // We might have been switched out. In that case, we don't need to
340    // do anything.
341    if (switchedOut())
342        return;
343
344    DPRINTF(Kvm, "drainResume\n");
345    verifyMemoryMode();
346
347    // The tick event is de-scheduled as a part of the draining
348    // process. Re-schedule it if the thread context is active.
349    if (tc->status() == ThreadContext::Active) {
350        schedule(tickEvent, nextCycle());
351        _status = Running;
352    } else {
353        _status = Idle;
354    }
355}
356
357void
358BaseKvmCPU::switchOut()
359{
360    DPRINTF(Kvm, "switchOut\n");
361
362    BaseCPU::switchOut();
363
364    // We should have drained prior to executing a switchOut, which
365    // means that the tick event shouldn't be scheduled and the CPU is
366    // idle.
367    assert(!tickEvent.scheduled());
368    assert(_status == Idle);
369}
370
371void
372BaseKvmCPU::takeOverFrom(BaseCPU *cpu)
373{
374    DPRINTF(Kvm, "takeOverFrom\n");
375
376    BaseCPU::takeOverFrom(cpu);
377
378    // We should have drained prior to executing a switchOut, which
379    // means that the tick event shouldn't be scheduled and the CPU is
380    // idle.
381    assert(!tickEvent.scheduled());
382    assert(_status == Idle);
383    assert(threadContexts.size() == 1);
384
385    // Force an update of the KVM state here instead of flagging the
386    // TC as dirty. This is not ideal from a performance point of
387    // view, but it makes debugging easier as it allows meaningful KVM
388    // state to be dumped before and after a takeover.
389    updateKvmState();
390    threadContextDirty = false;
391}
392
393void
394BaseKvmCPU::verifyMemoryMode() const
395{
396    if (!(system->isAtomicMode() && system->bypassCaches())) {
397        fatal("The KVM-based CPUs requires the memory system to be in the "
398              "'atomic_noncaching' mode.\n");
399    }
400}
401
402void
403BaseKvmCPU::wakeup()
404{
405    DPRINTF(Kvm, "wakeup()\n");
406
407    if (thread->status() != ThreadContext::Suspended)
408        return;
409
410    thread->activate();
411}
412
413void
414BaseKvmCPU::activateContext(ThreadID thread_num, Cycles delay)
415{
416    DPRINTF(Kvm, "ActivateContext %d (%d cycles)\n", thread_num, delay);
417
418    assert(thread_num == 0);
419    assert(thread);
420
421    assert(_status == Idle);
422    assert(!tickEvent.scheduled());
423
424    numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
425
426    schedule(tickEvent, clockEdge(delay));
427    _status = Running;
428}
429
430
431void
432BaseKvmCPU::suspendContext(ThreadID thread_num)
433{
434    DPRINTF(Kvm, "SuspendContext %d\n", thread_num);
435
436    assert(thread_num == 0);
437    assert(thread);
438
439    if (_status == Idle)
440        return;
441
442    assert(_status == Running);
443
444    // The tick event may no be scheduled if the quest has requested
445    // the monitor to wait for interrupts. The normal CPU models can
446    // get their tick events descheduled by quiesce instructions, but
447    // that can't happen here.
448    if (tickEvent.scheduled())
449        deschedule(tickEvent);
450
451    _status = Idle;
452}
453
454void
455BaseKvmCPU::deallocateContext(ThreadID thread_num)
456{
457    // for now, these are equivalent
458    suspendContext(thread_num);
459}
460
461void
462BaseKvmCPU::haltContext(ThreadID thread_num)
463{
464    // for now, these are equivalent
465    suspendContext(thread_num);
466}
467
468ThreadContext *
469BaseKvmCPU::getContext(int tn)
470{
471    assert(tn == 0);
472    syncThreadContext();
473    return tc;
474}
475
476
477Counter
478BaseKvmCPU::totalInsts() const
479{
480    return ctrInsts;
481}
482
483Counter
484BaseKvmCPU::totalOps() const
485{
486    hack_once("Pretending totalOps is equivalent to totalInsts()\n");
487    return ctrInsts;
488}
489
490void
491BaseKvmCPU::dump()
492{
493    inform("State dumping not implemented.");
494}
495
496void
497BaseKvmCPU::tick()
498{
499    Tick delay(0);
500    assert(_status != Idle);
501
502    switch (_status) {
503      case RunningService:
504        // handleKvmExit() will determine the next state of the CPU
505        delay = handleKvmExit();
506
507        if (tryDrain())
508            _status = Idle;
509        break;
510
511      case RunningServiceCompletion:
512      case Running: {
513          Tick ticksToExecute(mainEventQueue.nextTick() - curTick());
514
515          // We might need to update the KVM state.
516          syncKvmState();
517
518          DPRINTF(KvmRun, "Entering KVM...\n");
519          if (drainManager) {
520              // Force an immediate exit from KVM after completing
521              // pending operations. The architecture-specific code
522              // takes care to run until it is in a state where it can
523              // safely be drained.
524              delay = kvmRunDrain();
525          } else {
526              delay = kvmRun(ticksToExecute);
527          }
528
529          // Entering into KVM implies that we'll have to reload the thread
530          // context from KVM if we want to access it. Flag the KVM state as
531          // dirty with respect to the cached thread context.
532          kvmStateDirty = true;
533
534          // Enter into the RunningService state unless the
535          // simulation was stopped by a timer.
536          if (_kvmRun->exit_reason !=  KVM_EXIT_INTR) {
537              _status = RunningService;
538          } else {
539              ++numExitSignal;
540              _status = Running;
541          }
542
543          if (tryDrain())
544              _status = Idle;
545      } break;
546
547      default:
548        panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
549              _status);
550    }
551
552    // Schedule a new tick if we are still running
553    if (_status != Idle)
554        schedule(tickEvent, clockEdge(ticksToCycles(delay)));
555}
556
557Tick
558BaseKvmCPU::kvmRunDrain()
559{
560    // By default, the only thing we need to drain is a pending IO
561    // operation which assumes that we are in the
562    // RunningServiceCompletion state.
563    assert(_status == RunningServiceCompletion);
564
565    // Deliver the data from the pending IO operation and immediately
566    // exit.
567    return kvmRun(0);
568}
569
570uint64_t
571BaseKvmCPU::getHostCycles() const
572{
573    return hwCycles.read();
574}
575
576Tick
577BaseKvmCPU::kvmRun(Tick ticks)
578{
579    Tick ticksExecuted;
580    DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks);
581    timerOverflowed = false;
582
583    if (ticks == 0) {
584        // Settings ticks == 0 is a special case which causes an entry
585        // into KVM that finishes pending operations (e.g., IO) and
586        // then immediately exits.
587        DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n");
588
589        ++numVMHalfEntries;
590
591        // This signal is always masked while we are executing in gem5
592        // and gets unmasked temporarily as soon as we enter into
593        // KVM. See setSignalMask() and setupSignalHandler().
594        raise(KVM_TIMER_SIGNAL);
595
596        // Enter into KVM. KVM will check for signals after completing
597        // pending operations (IO). Since the KVM_TIMER_SIGNAL is
598        // pending, this forces an immediate exit into gem5 again. We
599        // don't bother to setup timers since this shouldn't actually
600        // execute any code in the guest.
601        ioctlRun();
602
603        // We always execute at least one cycle to prevent the
604        // BaseKvmCPU::tick() to be rescheduled on the same tick
605        // twice.
606        ticksExecuted = clockPeriod();
607    } else {
608        if (ticks < runTimer->resolution()) {
609            DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
610                    ticks, runTimer->resolution());
611            ticks = runTimer->resolution();
612        }
613
614        // Get hardware statistics after synchronizing contexts. The KVM
615        // state update might affect guest cycle counters.
616        uint64_t baseCycles(getHostCycles());
617        uint64_t baseInstrs(hwInstructions.read());
618
619        // Arm the run timer and start the cycle timer if it isn't
620        // controlled by the overflow timer. Starting/stopping the cycle
621        // timer automatically starts the other perf timers as they are in
622        // the same counter group.
623        runTimer->arm(ticks);
624        if (!perfControlledByTimer)
625            hwCycles.start();
626
627        ioctlRun();
628
629        runTimer->disarm();
630        if (!perfControlledByTimer)
631            hwCycles.stop();
632
633        // The timer signal may have been delivered after we exited
634        // from KVM. It will be pending in that case since it is
635        // masked when we aren't executing in KVM. Discard it to make
636        // sure we don't deliver it immediately next time we try to
637        // enter into KVM.
638        discardPendingSignal(KVM_TIMER_SIGNAL);
639
640        const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
641        const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
642        const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
643        ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
644
645        if (ticksExecuted < ticks &&
646            timerOverflowed &&
647            _kvmRun->exit_reason == KVM_EXIT_INTR) {
648            // TODO: We should probably do something clever here...
649            warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n",
650                 ticks, ticksExecuted);
651        }
652
653        /* Update statistics */
654        numCycles += simCyclesExecuted;;
655        numInsts += instsExecuted;
656        ctrInsts += instsExecuted;
657        system->totalNumInsts += instsExecuted;
658
659        DPRINTF(KvmRun,
660                "KVM: Executed %i instructions in %i cycles "
661                "(%i ticks, sim cycles: %i).\n",
662                instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
663    }
664
665    ++numVMExits;
666
667    return ticksExecuted + flushCoalescedMMIO();
668}
669
670void
671BaseKvmCPU::kvmNonMaskableInterrupt()
672{
673    ++numInterrupts;
674    if (ioctl(KVM_NMI) == -1)
675        panic("KVM: Failed to deliver NMI to virtual CPU\n");
676}
677
678void
679BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt)
680{
681    ++numInterrupts;
682    if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1)
683        panic("KVM: Failed to deliver interrupt to virtual CPU\n");
684}
685
686void
687BaseKvmCPU::getRegisters(struct kvm_regs &regs) const
688{
689    if (ioctl(KVM_GET_REGS, &regs) == -1)
690        panic("KVM: Failed to get guest registers\n");
691}
692
693void
694BaseKvmCPU::setRegisters(const struct kvm_regs &regs)
695{
696    if (ioctl(KVM_SET_REGS, (void *)&regs) == -1)
697        panic("KVM: Failed to set guest registers\n");
698}
699
700void
701BaseKvmCPU::getSpecialRegisters(struct kvm_sregs &regs) const
702{
703    if (ioctl(KVM_GET_SREGS, &regs) == -1)
704        panic("KVM: Failed to get guest special registers\n");
705}
706
707void
708BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs &regs)
709{
710    if (ioctl(KVM_SET_SREGS, (void *)&regs) == -1)
711        panic("KVM: Failed to set guest special registers\n");
712}
713
714void
715BaseKvmCPU::getFPUState(struct kvm_fpu &state) const
716{
717    if (ioctl(KVM_GET_FPU, &state) == -1)
718        panic("KVM: Failed to get guest FPU state\n");
719}
720
721void
722BaseKvmCPU::setFPUState(const struct kvm_fpu &state)
723{
724    if (ioctl(KVM_SET_FPU, (void *)&state) == -1)
725        panic("KVM: Failed to set guest FPU state\n");
726}
727
728
729void
730BaseKvmCPU::setOneReg(uint64_t id, const void *addr)
731{
732#ifdef KVM_SET_ONE_REG
733    struct kvm_one_reg reg;
734    reg.id = id;
735    reg.addr = (uint64_t)addr;
736
737    if (ioctl(KVM_SET_ONE_REG, &reg) == -1) {
738        panic("KVM: Failed to set register (0x%x) value (errno: %i)\n",
739              id, errno);
740    }
741#else
742    panic("KVM_SET_ONE_REG is unsupported on this platform.\n");
743#endif
744}
745
746void
747BaseKvmCPU::getOneReg(uint64_t id, void *addr) const
748{
749#ifdef KVM_GET_ONE_REG
750    struct kvm_one_reg reg;
751    reg.id = id;
752    reg.addr = (uint64_t)addr;
753
754    if (ioctl(KVM_GET_ONE_REG, &reg) == -1) {
755        panic("KVM: Failed to get register (0x%x) value (errno: %i)\n",
756              id, errno);
757    }
758#else
759    panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
760#endif
761}
762
763std::string
764BaseKvmCPU::getAndFormatOneReg(uint64_t id) const
765{
766#ifdef KVM_GET_ONE_REG
767    std::ostringstream ss;
768
769    ss.setf(std::ios::hex, std::ios::basefield);
770    ss.setf(std::ios::showbase);
771#define HANDLE_INTTYPE(len)                      \
772    case KVM_REG_SIZE_U ## len: {                \
773        uint ## len ## _t value;                 \
774        getOneReg(id, &value);                   \
775        ss << value;                             \
776    }  break
777
778#define HANDLE_ARRAY(len)                       \
779    case KVM_REG_SIZE_U ## len: {               \
780        uint8_t value[len / 8];                 \
781        getOneReg(id, value);                   \
782        ss << "[" << value[0];                  \
783        for (int i = 1; i < len  / 8; ++i)      \
784            ss << ", " << value[i];             \
785        ss << "]";                              \
786      } break
787
788    switch (id & KVM_REG_SIZE_MASK) {
789        HANDLE_INTTYPE(8);
790        HANDLE_INTTYPE(16);
791        HANDLE_INTTYPE(32);
792        HANDLE_INTTYPE(64);
793        HANDLE_ARRAY(128);
794        HANDLE_ARRAY(256);
795        HANDLE_ARRAY(512);
796        HANDLE_ARRAY(1024);
797      default:
798        ss << "??";
799    }
800
801#undef HANDLE_INTTYPE
802#undef HANDLE_ARRAY
803
804    return ss.str();
805#else
806    panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
807#endif
808}
809
810void
811BaseKvmCPU::syncThreadContext()
812{
813    if (!kvmStateDirty)
814        return;
815
816    assert(!threadContextDirty);
817
818    updateThreadContext();
819    kvmStateDirty = false;
820}
821
822void
823BaseKvmCPU::syncKvmState()
824{
825    if (!threadContextDirty)
826        return;
827
828    assert(!kvmStateDirty);
829
830    updateKvmState();
831    threadContextDirty = false;
832}
833
834Tick
835BaseKvmCPU::handleKvmExit()
836{
837    DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason);
838    assert(_status == RunningService);
839
840    // Switch into the running state by default. Individual handlers
841    // can override this.
842    _status = Running;
843    switch (_kvmRun->exit_reason) {
844      case KVM_EXIT_UNKNOWN:
845        return handleKvmExitUnknown();
846
847      case KVM_EXIT_EXCEPTION:
848        return handleKvmExitException();
849
850      case KVM_EXIT_IO:
851        _status = RunningServiceCompletion;
852        ++numIO;
853        return handleKvmExitIO();
854
855      case KVM_EXIT_HYPERCALL:
856        ++numHypercalls;
857        return handleKvmExitHypercall();
858
859      case KVM_EXIT_HLT:
860        /* The guest has halted and is waiting for interrupts */
861        DPRINTF(Kvm, "handleKvmExitHalt\n");
862        ++numHalt;
863
864        // Suspend the thread until the next interrupt arrives
865        thread->suspend();
866
867        // This is actually ignored since the thread is suspended.
868        return 0;
869
870      case KVM_EXIT_MMIO:
871        _status = RunningServiceCompletion;
872        /* Service memory mapped IO requests */
873        DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
874                _kvmRun->mmio.is_write,
875                _kvmRun->mmio.phys_addr, _kvmRun->mmio.len);
876
877        ++numMMIO;
878        return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
879                            _kvmRun->mmio.len, _kvmRun->mmio.is_write);
880
881      case KVM_EXIT_IRQ_WINDOW_OPEN:
882        return handleKvmExitIRQWindowOpen();
883
884      case KVM_EXIT_FAIL_ENTRY:
885        return handleKvmExitFailEntry();
886
887      case KVM_EXIT_INTR:
888        /* KVM was interrupted by a signal, restart it in the next
889         * tick. */
890        return 0;
891
892      case KVM_EXIT_INTERNAL_ERROR:
893        panic("KVM: Internal error (suberror: %u)\n",
894              _kvmRun->internal.suberror);
895
896      default:
897        dump();
898        panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason);
899    }
900}
901
902Tick
903BaseKvmCPU::handleKvmExitIO()
904{
905    panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n",
906          _kvmRun->io.direction, _kvmRun->io.size,
907          _kvmRun->io.port, _kvmRun->io.count);
908}
909
910Tick
911BaseKvmCPU::handleKvmExitHypercall()
912{
913    panic("KVM: Unhandled hypercall\n");
914}
915
916Tick
917BaseKvmCPU::handleKvmExitIRQWindowOpen()
918{
919    warn("KVM: Unhandled IRQ window.\n");
920    return 0;
921}
922
923
924Tick
925BaseKvmCPU::handleKvmExitUnknown()
926{
927    dump();
928    panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n",
929          _kvmRun->hw.hardware_exit_reason);
930}
931
932Tick
933BaseKvmCPU::handleKvmExitException()
934{
935    dump();
936    panic("KVM: Got exception when starting vCPU "
937          "(exception: %u, error_code: %u)\n",
938          _kvmRun->ex.exception, _kvmRun->ex.error_code);
939}
940
941Tick
942BaseKvmCPU::handleKvmExitFailEntry()
943{
944    dump();
945    panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n",
946          _kvmRun->fail_entry.hardware_entry_failure_reason);
947}
948
949Tick
950BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
951{
952    mmio_req.setPhys(paddr, size, Request::UNCACHEABLE, dataMasterId());
953
954    const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
955    Packet pkt(&mmio_req, cmd);
956    pkt.dataStatic(data);
957    return dataPort.sendAtomic(&pkt);
958}
959
960void
961BaseKvmCPU::setSignalMask(const sigset_t *mask)
962{
963    std::unique_ptr<struct kvm_signal_mask> kvm_mask;
964
965    if (mask) {
966        kvm_mask.reset((struct kvm_signal_mask *)operator new(
967                           sizeof(struct kvm_signal_mask) + sizeof(*mask)));
968        // The kernel and the user-space headers have different ideas
969        // about the size of sigset_t. This seems like a massive hack,
970        // but is actually what qemu does.
971        assert(sizeof(*mask) >= 8);
972        kvm_mask->len = 8;
973        memcpy(kvm_mask->sigset, mask, kvm_mask->len);
974    }
975
976    if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1)
977        panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
978              errno);
979}
980
981int
982BaseKvmCPU::ioctl(int request, long p1) const
983{
984    if (vcpuFD == -1)
985        panic("KVM: CPU ioctl called before initialization\n");
986
987    return ::ioctl(vcpuFD, request, p1);
988}
989
990Tick
991BaseKvmCPU::flushCoalescedMMIO()
992{
993    if (!mmioRing)
994        return 0;
995
996    DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n");
997
998    // TODO: We might need to do synchronization when we start to
999    // support multiple CPUs
1000    Tick ticks(0);
1001    while (mmioRing->first != mmioRing->last) {
1002        struct kvm_coalesced_mmio &ent(
1003            mmioRing->coalesced_mmio[mmioRing->first]);
1004
1005        DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n",
1006                ent.phys_addr, ent.len);
1007
1008        ++numCoalescedMMIO;
1009        ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true);
1010
1011        mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX;
1012    }
1013
1014    return ticks;
1015}
1016
1017void
1018BaseKvmCPU::setupSignalHandler()
1019{
1020    struct sigaction sa;
1021
1022    memset(&sa, 0, sizeof(sa));
1023    sa.sa_sigaction = onTimerOverflow;
1024    sa.sa_flags = SA_SIGINFO | SA_RESTART;
1025    if (sigaction(KVM_TIMER_SIGNAL, &sa, NULL) == -1)
1026        panic("KVM: Failed to setup vCPU signal handler\n");
1027
1028    sigset_t sigset;
1029    if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1)
1030        panic("KVM: Failed get signal mask\n");
1031
1032    // Request KVM to setup the same signal mask as we're currently
1033    // running with. We'll sometimes need to mask the KVM_TIMER_SIGNAL
1034    // to cause immediate exits from KVM after servicing IO
1035    // requests. See kvmRun().
1036    setSignalMask(&sigset);
1037
1038    // Mask the KVM_TIMER_SIGNAL so it isn't delivered unless we're
1039    // actually executing inside KVM.
1040    sigaddset(&sigset, KVM_TIMER_SIGNAL);
1041    if (sigprocmask(SIG_SETMASK, &sigset, NULL) == -1)
1042        panic("KVM: Failed mask the KVM timer signal\n");
1043}
1044
1045bool
1046BaseKvmCPU::discardPendingSignal(int signum) const
1047{
1048    int discardedSignal;
1049
1050    // Setting the timeout to zero causes sigtimedwait to return
1051    // immediately.
1052    struct timespec timeout;
1053    timeout.tv_sec = 0;
1054    timeout.tv_nsec = 0;
1055
1056    sigset_t sigset;
1057    sigemptyset(&sigset);
1058    sigaddset(&sigset, signum);
1059
1060    do {
1061        discardedSignal = sigtimedwait(&sigset, NULL, &timeout);
1062    } while (discardedSignal == -1 && errno == EINTR);
1063
1064    if (discardedSignal == signum)
1065        return true;
1066    else if (discardedSignal == -1 && errno == EAGAIN)
1067        return false;
1068    else
1069        panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
1070              discardedSignal, errno);
1071}
1072
1073void
1074BaseKvmCPU::setupCounters()
1075{
1076    DPRINTF(Kvm, "Attaching cycle counter...\n");
1077    PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE,
1078                                PERF_COUNT_HW_CPU_CYCLES);
1079    cfgCycles.disabled(true)
1080        .pinned(true);
1081
1082    if (perfControlledByTimer) {
1083        // We need to configure the cycles counter to send overflows
1084        // since we are going to use it to trigger timer signals that
1085        // trap back into m5 from KVM. In practice, this means that we
1086        // need to set some non-zero sample period that gets
1087        // overridden when the timer is armed.
1088        cfgCycles.wakeupEvents(1)
1089            .samplePeriod(42);
1090    }
1091
1092    hwCycles.attach(cfgCycles,
1093                    0); // TID (0 => currentThread)
1094
1095    DPRINTF(Kvm, "Attaching instruction counter...\n");
1096    PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE,
1097                                      PERF_COUNT_HW_INSTRUCTIONS);
1098    hwInstructions.attach(cfgInstructions,
1099                          0, // TID (0 => currentThread)
1100                          hwCycles);
1101}
1102
1103bool
1104BaseKvmCPU::tryDrain()
1105{
1106    if (!drainManager)
1107        return false;
1108
1109    if (!archIsDrained()) {
1110        DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n");
1111        return false;
1112    }
1113
1114    if (_status == Idle || _status == Running) {
1115        DPRINTF(Drain,
1116                "tryDrain: CPU transitioned into the Idle state, drain done\n");
1117        drainManager->signalDrainDone();
1118        drainManager = NULL;
1119        return true;
1120    } else {
1121        DPRINTF(Drain, "tryDrain: CPU not ready.\n");
1122        return false;
1123    }
1124}
1125
1126void
1127BaseKvmCPU::ioctlRun()
1128{
1129    if (ioctl(KVM_RUN) == -1) {
1130        if (errno != EINTR)
1131            panic("KVM: Failed to start virtual CPU (errno: %i)\n",
1132                  errno);
1133    }
1134}
1135