base.cc revision 9753
1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Sandberg
38 */
39
40#include <linux/kvm.h>
41#include <sys/ioctl.h>
42#include <sys/mman.h>
43#include <unistd.h>
44
45#include <cerrno>
46#include <csignal>
47#include <ostream>
48
49#include "arch/utility.hh"
50#include "cpu/kvm/base.hh"
51#include "debug/Checkpoint.hh"
52#include "debug/Drain.hh"
53#include "debug/Kvm.hh"
54#include "debug/KvmIO.hh"
55#include "debug/KvmRun.hh"
56#include "params/BaseKvmCPU.hh"
57#include "sim/process.hh"
58#include "sim/system.hh"
59
60#include <signal.h>
61
62/* Used by some KVM macros */
63#define PAGE_SIZE pageSize
64
65volatile bool timerOverflowed = false;
66
67static void
68onTimerOverflow(int signo, siginfo_t *si, void *data)
69{
70    timerOverflowed = true;
71}
72
73BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params)
74    : BaseCPU(params),
75      vm(*params->kvmVM),
76      _status(Idle),
77      dataPort(name() + ".dcache_port", this),
78      instPort(name() + ".icache_port", this),
79      threadContextDirty(true),
80      kvmStateDirty(false),
81      vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0),
82      _kvmRun(NULL), mmioRing(NULL),
83      pageSize(sysconf(_SC_PAGE_SIZE)),
84      tickEvent(*this),
85      perfControlledByTimer(params->usePerfOverflow),
86      hostFactor(params->hostFactor),
87      drainManager(NULL),
88      ctrInsts(0)
89{
90    if (pageSize == -1)
91        panic("KVM: Failed to determine host page size (%i)\n",
92              errno);
93
94    thread = new SimpleThread(this, 0, params->system,
95                              params->itb, params->dtb, params->isa[0]);
96    thread->setStatus(ThreadContext::Halted);
97    tc = thread->getTC();
98    threadContexts.push_back(tc);
99
100    setupCounters();
101
102    if (params->usePerfOverflow)
103        runTimer.reset(new PerfKvmTimer(hwCycles,
104                                        KVM_TIMER_SIGNAL,
105                                        params->hostFactor,
106                                        params->clock));
107    else
108        runTimer.reset(new PosixKvmTimer(KVM_TIMER_SIGNAL, CLOCK_MONOTONIC,
109                                         params->hostFactor,
110                                         params->clock));
111}
112
113BaseKvmCPU::~BaseKvmCPU()
114{
115    if (_kvmRun)
116        munmap(_kvmRun, vcpuMMapSize);
117    close(vcpuFD);
118}
119
120void
121BaseKvmCPU::init()
122{
123    BaseCPU::init();
124
125    if (numThreads != 1)
126        fatal("KVM: Multithreading not supported");
127
128    tc->initMemProxies(tc);
129
130    // initialize CPU, including PC
131    if (FullSystem && !switchedOut())
132        TheISA::initCPU(tc, tc->contextId());
133
134    mmio_req.setThreadContext(tc->contextId(), 0);
135}
136
137void
138BaseKvmCPU::startup()
139{
140    const BaseKvmCPUParams * const p(
141        dynamic_cast<const BaseKvmCPUParams *>(params()));
142
143    Kvm &kvm(vm.kvm);
144
145    BaseCPU::startup();
146
147    assert(vcpuFD == -1);
148
149    // Tell the VM that a CPU is about to start.
150    vm.cpuStartup();
151
152    // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are
153    // not guaranteed that the parent KVM VM has initialized at that
154    // point. Initialize virtual CPUs here instead.
155    vcpuFD = vm.createVCPU(vcpuID);
156
157    // Setup signal handlers. This has to be done after the vCPU is
158    // created since it manipulates the vCPU signal mask.
159    setupSignalHandler();
160
161    // Map the KVM run structure */
162    vcpuMMapSize = kvm.getVCPUMMapSize();
163    _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize,
164                                     PROT_READ | PROT_WRITE, MAP_SHARED,
165                                     vcpuFD, 0);
166    if (_kvmRun == MAP_FAILED)
167        panic("KVM: Failed to map run data structure\n");
168
169    // Setup a pointer to the MMIO ring buffer if coalesced MMIO is
170    // available. The offset into the KVM's communication page is
171    // provided by the coalesced MMIO capability.
172    int mmioOffset(kvm.capCoalescedMMIO());
173    if (!p->useCoalescedMMIO) {
174        inform("KVM: Coalesced MMIO disabled by config.\n");
175    } else if (mmioOffset) {
176        inform("KVM: Coalesced IO available\n");
177        mmioRing = (struct kvm_coalesced_mmio_ring *)(
178            (char *)_kvmRun + (mmioOffset * pageSize));
179    } else {
180        inform("KVM: Coalesced not supported by host OS\n");
181    }
182
183    thread->startup();
184}
185
186void
187BaseKvmCPU::regStats()
188{
189    using namespace Stats;
190
191    BaseCPU::regStats();
192
193    numInsts
194        .name(name() + ".committedInsts")
195        .desc("Number of instructions committed")
196        ;
197
198    numVMExits
199        .name(name() + ".numVMExits")
200        .desc("total number of KVM exits")
201        ;
202
203    numMMIO
204        .name(name() + ".numMMIO")
205        .desc("number of VM exits due to memory mapped IO")
206        ;
207
208    numCoalescedMMIO
209        .name(name() + ".numCoalescedMMIO")
210        .desc("number of coalesced memory mapped IO requests")
211        ;
212
213    numIO
214        .name(name() + ".numIO")
215        .desc("number of VM exits due to legacy IO")
216        ;
217
218    numHalt
219        .name(name() + ".numHalt")
220        .desc("number of VM exits due to wait for interrupt instructions")
221        ;
222
223    numInterrupts
224        .name(name() + ".numInterrupts")
225        .desc("number of interrupts delivered")
226        ;
227
228    numHypercalls
229        .name(name() + ".numHypercalls")
230        .desc("number of hypercalls")
231        ;
232}
233
234void
235BaseKvmCPU::serializeThread(std::ostream &os, ThreadID tid)
236{
237    if (DTRACE(Checkpoint)) {
238        DPRINTF(Checkpoint, "KVM: Serializing thread %i:\n", tid);
239        dump();
240    }
241
242    assert(tid == 0);
243    assert(_status == Idle);
244    thread->serialize(os);
245}
246
247void
248BaseKvmCPU::unserializeThread(Checkpoint *cp, const std::string &section,
249                              ThreadID tid)
250{
251    DPRINTF(Checkpoint, "KVM: Unserialize thread %i:\n", tid);
252
253    assert(tid == 0);
254    assert(_status == Idle);
255    thread->unserialize(cp, section);
256    threadContextDirty = true;
257}
258
259unsigned int
260BaseKvmCPU::drain(DrainManager *dm)
261{
262    if (switchedOut())
263        return 0;
264
265    DPRINTF(Drain, "BaseKvmCPU::drain\n");
266    switch (_status) {
267      case Running:
268        // The base KVM code is normally ready when it is in the
269        // Running state, but the architecture specific code might be
270        // of a different opinion. This may happen when the CPU been
271        // notified of an event that hasn't been accepted by the vCPU
272        // yet.
273        if (!archIsDrained()) {
274            drainManager = dm;
275            return 1;
276        }
277
278        // The state of the CPU is consistent, so we don't need to do
279        // anything special to drain it. We simply de-schedule the
280        // tick event and enter the Idle state to prevent nasty things
281        // like MMIOs from happening.
282        if (tickEvent.scheduled())
283            deschedule(tickEvent);
284        _status = Idle;
285
286        /** FALLTHROUGH */
287      case Idle:
288        // Idle, no need to drain
289        assert(!tickEvent.scheduled());
290
291        // Sync the thread context here since we'll need it when we
292        // switch CPUs or checkpoint the CPU.
293        syncThreadContext();
294
295        return 0;
296
297      case RunningServiceCompletion:
298        // The CPU has just requested a service that was handled in
299        // the RunningService state, but the results have still not
300        // been reported to the CPU. Now, we /could/ probably just
301        // update the register state ourselves instead of letting KVM
302        // handle it, but that would be tricky. Instead, we enter KVM
303        // and let it do its stuff.
304        drainManager = dm;
305
306        DPRINTF(Drain, "KVM CPU is waiting for service completion, "
307                "requesting drain.\n");
308        return 1;
309
310      case RunningService:
311        // We need to drain since the CPU is waiting for service (e.g., MMIOs)
312        drainManager = dm;
313
314        DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
315        return 1;
316
317      default:
318        panic("KVM: Unhandled CPU state in drain()\n");
319        return 0;
320    }
321}
322
323void
324BaseKvmCPU::drainResume()
325{
326    assert(!tickEvent.scheduled());
327
328    // We might have been switched out. In that case, we don't need to
329    // do anything.
330    if (switchedOut())
331        return;
332
333    DPRINTF(Kvm, "drainResume\n");
334    verifyMemoryMode();
335
336    // The tick event is de-scheduled as a part of the draining
337    // process. Re-schedule it if the thread context is active.
338    if (tc->status() == ThreadContext::Active) {
339        schedule(tickEvent, nextCycle());
340        _status = Running;
341    } else {
342        _status = Idle;
343    }
344}
345
346void
347BaseKvmCPU::switchOut()
348{
349    DPRINTF(Kvm, "switchOut\n");
350
351    BaseCPU::switchOut();
352
353    // We should have drained prior to executing a switchOut, which
354    // means that the tick event shouldn't be scheduled and the CPU is
355    // idle.
356    assert(!tickEvent.scheduled());
357    assert(_status == Idle);
358}
359
360void
361BaseKvmCPU::takeOverFrom(BaseCPU *cpu)
362{
363    DPRINTF(Kvm, "takeOverFrom\n");
364
365    BaseCPU::takeOverFrom(cpu);
366
367    // We should have drained prior to executing a switchOut, which
368    // means that the tick event shouldn't be scheduled and the CPU is
369    // idle.
370    assert(!tickEvent.scheduled());
371    assert(_status == Idle);
372    assert(threadContexts.size() == 1);
373
374    // Force an update of the KVM state here instead of flagging the
375    // TC as dirty. This is not ideal from a performance point of
376    // view, but it makes debugging easier as it allows meaningful KVM
377    // state to be dumped before and after a takeover.
378    updateKvmState();
379    threadContextDirty = false;
380}
381
382void
383BaseKvmCPU::verifyMemoryMode() const
384{
385    if (!(system->isAtomicMode() && system->bypassCaches())) {
386        fatal("The KVM-based CPUs requires the memory system to be in the "
387              "'atomic_noncaching' mode.\n");
388    }
389}
390
391void
392BaseKvmCPU::wakeup()
393{
394    DPRINTF(Kvm, "wakeup()\n");
395
396    if (thread->status() != ThreadContext::Suspended)
397        return;
398
399    thread->activate();
400}
401
402void
403BaseKvmCPU::activateContext(ThreadID thread_num, Cycles delay)
404{
405    DPRINTF(Kvm, "ActivateContext %d (%d cycles)\n", thread_num, delay);
406
407    assert(thread_num == 0);
408    assert(thread);
409
410    assert(_status == Idle);
411    assert(!tickEvent.scheduled());
412
413    numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend)
414        * hostFactor;
415
416    schedule(tickEvent, clockEdge(delay));
417    _status = Running;
418}
419
420
421void
422BaseKvmCPU::suspendContext(ThreadID thread_num)
423{
424    DPRINTF(Kvm, "SuspendContext %d\n", thread_num);
425
426    assert(thread_num == 0);
427    assert(thread);
428
429    if (_status == Idle)
430        return;
431
432    assert(_status == Running);
433
434    // The tick event may no be scheduled if the quest has requested
435    // the monitor to wait for interrupts. The normal CPU models can
436    // get their tick events descheduled by quiesce instructions, but
437    // that can't happen here.
438    if (tickEvent.scheduled())
439        deschedule(tickEvent);
440
441    _status = Idle;
442}
443
444void
445BaseKvmCPU::deallocateContext(ThreadID thread_num)
446{
447    // for now, these are equivalent
448    suspendContext(thread_num);
449}
450
451void
452BaseKvmCPU::haltContext(ThreadID thread_num)
453{
454    // for now, these are equivalent
455    suspendContext(thread_num);
456}
457
458ThreadContext *
459BaseKvmCPU::getContext(int tn)
460{
461    assert(tn == 0);
462    syncThreadContext();
463    return tc;
464}
465
466
467Counter
468BaseKvmCPU::totalInsts() const
469{
470    return ctrInsts;
471}
472
473Counter
474BaseKvmCPU::totalOps() const
475{
476    hack_once("Pretending totalOps is equivalent to totalInsts()\n");
477    return ctrInsts;
478}
479
480void
481BaseKvmCPU::dump()
482{
483    inform("State dumping not implemented.");
484}
485
486void
487BaseKvmCPU::tick()
488{
489    Tick delay(0);
490    assert(_status != Idle);
491
492    switch (_status) {
493      case RunningService:
494        // handleKvmExit() will determine the next state of the CPU
495        delay = handleKvmExit();
496
497        if (tryDrain())
498            _status = Idle;
499        break;
500
501      case RunningServiceCompletion:
502      case Running: {
503          Tick ticksToExecute(mainEventQueue.nextTick() - curTick());
504
505          // We might need to update the KVM state.
506          syncKvmState();
507
508          DPRINTF(KvmRun, "Entering KVM...\n");
509          if (drainManager) {
510              // Force an immediate exit from KVM after completing
511              // pending operations. The architecture-specific code
512              // takes care to run until it is in a state where it can
513              // safely be drained.
514              delay = kvmRunDrain();
515          } else {
516              delay = kvmRun(ticksToExecute);
517          }
518
519          // Entering into KVM implies that we'll have to reload the thread
520          // context from KVM if we want to access it. Flag the KVM state as
521          // dirty with respect to the cached thread context.
522          kvmStateDirty = true;
523
524          // Enter into the RunningService state unless the
525          // simulation was stopped by a timer.
526          if (_kvmRun->exit_reason !=  KVM_EXIT_INTR)
527              _status = RunningService;
528          else
529              _status = Running;
530
531          if (tryDrain())
532              _status = Idle;
533      } break;
534
535      default:
536        panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
537              _status);
538    }
539
540    // Schedule a new tick if we are still running
541    if (_status != Idle)
542        schedule(tickEvent, clockEdge(ticksToCycles(delay)));
543}
544
545Tick
546BaseKvmCPU::kvmRunDrain()
547{
548    // By default, the only thing we need to drain is a pending IO
549    // operation which assumes that we are in the
550    // RunningServiceCompletion state.
551    assert(_status == RunningServiceCompletion);
552
553    // Deliver the data from the pending IO operation and immediately
554    // exit.
555    return kvmRun(0);
556}
557
558uint64_t
559BaseKvmCPU::getHostCycles() const
560{
561    return hwCycles.read();
562}
563
564Tick
565BaseKvmCPU::kvmRun(Tick ticks)
566{
567    Tick ticksExecuted;
568    DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks);
569    timerOverflowed = false;
570
571    if (ticks == 0) {
572        // Settings ticks == 0 is a special case which causes an entry
573        // into KVM that finishes pending operations (e.g., IO) and
574        // then immediately exits.
575        DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n");
576
577        // This signal is always masked while we are executing in gem5
578        // and gets unmasked temporarily as soon as we enter into
579        // KVM. See setSignalMask() and setupSignalHandler().
580        raise(KVM_TIMER_SIGNAL);
581
582        // Enter into KVM. KVM will check for signals after completing
583        // pending operations (IO). Since the KVM_TIMER_SIGNAL is
584        // pending, this forces an immediate exit into gem5 again. We
585        // don't bother to setup timers since this shouldn't actually
586        // execute any code in the guest.
587        ioctlRun();
588
589        // We always execute at least one cycle to prevent the
590        // BaseKvmCPU::tick() to be rescheduled on the same tick
591        // twice.
592        ticksExecuted = clockPeriod();
593    } else {
594        if (ticks < runTimer->resolution()) {
595            DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
596                    ticks, runTimer->resolution());
597            ticks = runTimer->resolution();
598        }
599
600        // Get hardware statistics after synchronizing contexts. The KVM
601        // state update might affect guest cycle counters.
602        uint64_t baseCycles(getHostCycles());
603        uint64_t baseInstrs(hwInstructions.read());
604
605        // Arm the run timer and start the cycle timer if it isn't
606        // controlled by the overflow timer. Starting/stopping the cycle
607        // timer automatically starts the other perf timers as they are in
608        // the same counter group.
609        runTimer->arm(ticks);
610        if (!perfControlledByTimer)
611            hwCycles.start();
612
613        ioctlRun();
614
615        runTimer->disarm();
616        if (!perfControlledByTimer)
617            hwCycles.stop();
618
619        // The timer signal may have been delivered after we exited
620        // from KVM. It will be pending in that case since it is
621        // masked when we aren't executing in KVM. Discard it to make
622        // sure we don't deliver it immediately next time we try to
623        // enter into KVM.
624        discardPendingSignal(KVM_TIMER_SIGNAL);
625
626        const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
627        const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
628        const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
629        ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
630
631        if (ticksExecuted < ticks &&
632            timerOverflowed &&
633            _kvmRun->exit_reason == KVM_EXIT_INTR) {
634            // TODO: We should probably do something clever here...
635            warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n",
636                 ticks, ticksExecuted);
637        }
638
639        /* Update statistics */
640        numCycles += simCyclesExecuted;;
641        numInsts += instsExecuted;
642        ctrInsts += instsExecuted;
643        system->totalNumInsts += instsExecuted;
644
645        DPRINTF(KvmRun,
646                "KVM: Executed %i instructions in %i cycles "
647                "(%i ticks, sim cycles: %i).\n",
648                instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
649    }
650
651    ++numVMExits;
652
653    return ticksExecuted + flushCoalescedMMIO();
654}
655
656void
657BaseKvmCPU::kvmNonMaskableInterrupt()
658{
659    ++numInterrupts;
660    if (ioctl(KVM_NMI) == -1)
661        panic("KVM: Failed to deliver NMI to virtual CPU\n");
662}
663
664void
665BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt)
666{
667    ++numInterrupts;
668    if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1)
669        panic("KVM: Failed to deliver interrupt to virtual CPU\n");
670}
671
672void
673BaseKvmCPU::getRegisters(struct kvm_regs &regs) const
674{
675    if (ioctl(KVM_GET_REGS, &regs) == -1)
676        panic("KVM: Failed to get guest registers\n");
677}
678
679void
680BaseKvmCPU::setRegisters(const struct kvm_regs &regs)
681{
682    if (ioctl(KVM_SET_REGS, (void *)&regs) == -1)
683        panic("KVM: Failed to set guest registers\n");
684}
685
686void
687BaseKvmCPU::getSpecialRegisters(struct kvm_sregs &regs) const
688{
689    if (ioctl(KVM_GET_SREGS, &regs) == -1)
690        panic("KVM: Failed to get guest special registers\n");
691}
692
693void
694BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs &regs)
695{
696    if (ioctl(KVM_SET_SREGS, (void *)&regs) == -1)
697        panic("KVM: Failed to set guest special registers\n");
698}
699
700void
701BaseKvmCPU::getFPUState(struct kvm_fpu &state) const
702{
703    if (ioctl(KVM_GET_FPU, &state) == -1)
704        panic("KVM: Failed to get guest FPU state\n");
705}
706
707void
708BaseKvmCPU::setFPUState(const struct kvm_fpu &state)
709{
710    if (ioctl(KVM_SET_FPU, (void *)&state) == -1)
711        panic("KVM: Failed to set guest FPU state\n");
712}
713
714
715void
716BaseKvmCPU::setOneReg(uint64_t id, const void *addr)
717{
718#ifdef KVM_SET_ONE_REG
719    struct kvm_one_reg reg;
720    reg.id = id;
721    reg.addr = (uint64_t)addr;
722
723    if (ioctl(KVM_SET_ONE_REG, &reg) == -1) {
724        panic("KVM: Failed to set register (0x%x) value (errno: %i)\n",
725              id, errno);
726    }
727#else
728    panic("KVM_SET_ONE_REG is unsupported on this platform.\n");
729#endif
730}
731
732void
733BaseKvmCPU::getOneReg(uint64_t id, void *addr) const
734{
735#ifdef KVM_GET_ONE_REG
736    struct kvm_one_reg reg;
737    reg.id = id;
738    reg.addr = (uint64_t)addr;
739
740    if (ioctl(KVM_GET_ONE_REG, &reg) == -1) {
741        panic("KVM: Failed to get register (0x%x) value (errno: %i)\n",
742              id, errno);
743    }
744#else
745    panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
746#endif
747}
748
749std::string
750BaseKvmCPU::getAndFormatOneReg(uint64_t id) const
751{
752#ifdef KVM_GET_ONE_REG
753    std::ostringstream ss;
754
755    ss.setf(std::ios::hex, std::ios::basefield);
756    ss.setf(std::ios::showbase);
757#define HANDLE_INTTYPE(len)                      \
758    case KVM_REG_SIZE_U ## len: {                \
759        uint ## len ## _t value;                 \
760        getOneReg(id, &value);                   \
761        ss << value;                             \
762    }  break
763
764#define HANDLE_ARRAY(len)                       \
765    case KVM_REG_SIZE_U ## len: {               \
766        uint8_t value[len / 8];                 \
767        getOneReg(id, value);                   \
768        ss << "[" << value[0];                  \
769        for (int i = 1; i < len  / 8; ++i)      \
770            ss << ", " << value[i];             \
771        ss << "]";                              \
772      } break
773
774    switch (id & KVM_REG_SIZE_MASK) {
775        HANDLE_INTTYPE(8);
776        HANDLE_INTTYPE(16);
777        HANDLE_INTTYPE(32);
778        HANDLE_INTTYPE(64);
779        HANDLE_ARRAY(128);
780        HANDLE_ARRAY(256);
781        HANDLE_ARRAY(512);
782        HANDLE_ARRAY(1024);
783      default:
784        ss << "??";
785    }
786
787#undef HANDLE_INTTYPE
788#undef HANDLE_ARRAY
789
790    return ss.str();
791#else
792    panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
793#endif
794}
795
796void
797BaseKvmCPU::syncThreadContext()
798{
799    if (!kvmStateDirty)
800        return;
801
802    assert(!threadContextDirty);
803
804    updateThreadContext();
805    kvmStateDirty = false;
806}
807
808void
809BaseKvmCPU::syncKvmState()
810{
811    if (!threadContextDirty)
812        return;
813
814    assert(!kvmStateDirty);
815
816    updateKvmState();
817    threadContextDirty = false;
818}
819
820Tick
821BaseKvmCPU::handleKvmExit()
822{
823    DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason);
824    assert(_status == RunningService);
825
826    // Switch into the running state by default. Individual handlers
827    // can override this.
828    _status = Running;
829    switch (_kvmRun->exit_reason) {
830      case KVM_EXIT_UNKNOWN:
831        return handleKvmExitUnknown();
832
833      case KVM_EXIT_EXCEPTION:
834        return handleKvmExitException();
835
836      case KVM_EXIT_IO:
837        _status = RunningServiceCompletion;
838        ++numIO;
839        return handleKvmExitIO();
840
841      case KVM_EXIT_HYPERCALL:
842        ++numHypercalls;
843        return handleKvmExitHypercall();
844
845      case KVM_EXIT_HLT:
846        /* The guest has halted and is waiting for interrupts */
847        DPRINTF(Kvm, "handleKvmExitHalt\n");
848        ++numHalt;
849
850        // Suspend the thread until the next interrupt arrives
851        thread->suspend();
852
853        // This is actually ignored since the thread is suspended.
854        return 0;
855
856      case KVM_EXIT_MMIO:
857        _status = RunningServiceCompletion;
858        /* Service memory mapped IO requests */
859        DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
860                _kvmRun->mmio.is_write,
861                _kvmRun->mmio.phys_addr, _kvmRun->mmio.len);
862
863        ++numMMIO;
864        return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
865                            _kvmRun->mmio.len, _kvmRun->mmio.is_write);
866
867      case KVM_EXIT_IRQ_WINDOW_OPEN:
868        return handleKvmExitIRQWindowOpen();
869
870      case KVM_EXIT_FAIL_ENTRY:
871        return handleKvmExitFailEntry();
872
873      case KVM_EXIT_INTR:
874        /* KVM was interrupted by a signal, restart it in the next
875         * tick. */
876        return 0;
877
878      case KVM_EXIT_INTERNAL_ERROR:
879        panic("KVM: Internal error (suberror: %u)\n",
880              _kvmRun->internal.suberror);
881
882      default:
883        dump();
884        panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason);
885    }
886}
887
888Tick
889BaseKvmCPU::handleKvmExitIO()
890{
891    panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n",
892          _kvmRun->io.direction, _kvmRun->io.size,
893          _kvmRun->io.port, _kvmRun->io.count);
894}
895
896Tick
897BaseKvmCPU::handleKvmExitHypercall()
898{
899    panic("KVM: Unhandled hypercall\n");
900}
901
902Tick
903BaseKvmCPU::handleKvmExitIRQWindowOpen()
904{
905    warn("KVM: Unhandled IRQ window.\n");
906    return 0;
907}
908
909
910Tick
911BaseKvmCPU::handleKvmExitUnknown()
912{
913    dump();
914    panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n",
915          _kvmRun->hw.hardware_exit_reason);
916}
917
918Tick
919BaseKvmCPU::handleKvmExitException()
920{
921    dump();
922    panic("KVM: Got exception when starting vCPU "
923          "(exception: %u, error_code: %u)\n",
924          _kvmRun->ex.exception, _kvmRun->ex.error_code);
925}
926
927Tick
928BaseKvmCPU::handleKvmExitFailEntry()
929{
930    dump();
931    panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n",
932          _kvmRun->fail_entry.hardware_entry_failure_reason);
933}
934
935Tick
936BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
937{
938    mmio_req.setPhys(paddr, size, Request::UNCACHEABLE, dataMasterId());
939
940    const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
941    Packet pkt(&mmio_req, cmd);
942    pkt.dataStatic(data);
943    return dataPort.sendAtomic(&pkt);
944}
945
946void
947BaseKvmCPU::setSignalMask(const sigset_t *mask)
948{
949    std::unique_ptr<struct kvm_signal_mask> kvm_mask;
950
951    if (mask) {
952        kvm_mask.reset((struct kvm_signal_mask *)operator new(
953                           sizeof(struct kvm_signal_mask) + sizeof(*mask)));
954        // The kernel and the user-space headers have different ideas
955        // about the size of sigset_t. This seems like a massive hack,
956        // but is actually what qemu does.
957        assert(sizeof(*mask) >= 8);
958        kvm_mask->len = 8;
959        memcpy(kvm_mask->sigset, mask, kvm_mask->len);
960    }
961
962    if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1)
963        panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
964              errno);
965}
966
967int
968BaseKvmCPU::ioctl(int request, long p1) const
969{
970    if (vcpuFD == -1)
971        panic("KVM: CPU ioctl called before initialization\n");
972
973    return ::ioctl(vcpuFD, request, p1);
974}
975
976Tick
977BaseKvmCPU::flushCoalescedMMIO()
978{
979    if (!mmioRing)
980        return 0;
981
982    DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n");
983
984    // TODO: We might need to do synchronization when we start to
985    // support multiple CPUs
986    Tick ticks(0);
987    while (mmioRing->first != mmioRing->last) {
988        struct kvm_coalesced_mmio &ent(
989            mmioRing->coalesced_mmio[mmioRing->first]);
990
991        DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n",
992                ent.phys_addr, ent.len);
993
994        ++numCoalescedMMIO;
995        ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true);
996
997        mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX;
998    }
999
1000    return ticks;
1001}
1002
1003void
1004BaseKvmCPU::setupSignalHandler()
1005{
1006    struct sigaction sa;
1007
1008    memset(&sa, 0, sizeof(sa));
1009    sa.sa_sigaction = onTimerOverflow;
1010    sa.sa_flags = SA_SIGINFO | SA_RESTART;
1011    if (sigaction(KVM_TIMER_SIGNAL, &sa, NULL) == -1)
1012        panic("KVM: Failed to setup vCPU signal handler\n");
1013
1014    sigset_t sigset;
1015    if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1)
1016        panic("KVM: Failed get signal mask\n");
1017
1018    // Request KVM to setup the same signal mask as we're currently
1019    // running with. We'll sometimes need to mask the KVM_TIMER_SIGNAL
1020    // to cause immediate exits from KVM after servicing IO
1021    // requests. See kvmRun().
1022    setSignalMask(&sigset);
1023
1024    // Mask the KVM_TIMER_SIGNAL so it isn't delivered unless we're
1025    // actually executing inside KVM.
1026    sigaddset(&sigset, KVM_TIMER_SIGNAL);
1027    if (sigprocmask(SIG_SETMASK, &sigset, NULL) == -1)
1028        panic("KVM: Failed mask the KVM timer signal\n");
1029}
1030
1031bool
1032BaseKvmCPU::discardPendingSignal(int signum) const
1033{
1034    int discardedSignal;
1035
1036    // Setting the timeout to zero causes sigtimedwait to return
1037    // immediately.
1038    struct timespec timeout;
1039    timeout.tv_sec = 0;
1040    timeout.tv_nsec = 0;
1041
1042    sigset_t sigset;
1043    sigemptyset(&sigset);
1044    sigaddset(&sigset, signum);
1045
1046    do {
1047        discardedSignal = sigtimedwait(&sigset, NULL, &timeout);
1048    } while (discardedSignal == -1 && errno == EINTR);
1049
1050    if (discardedSignal == signum)
1051        return true;
1052    else if (discardedSignal == -1 && errno == EAGAIN)
1053        return false;
1054    else
1055        panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
1056              discardedSignal, errno);
1057}
1058
1059void
1060BaseKvmCPU::setupCounters()
1061{
1062    DPRINTF(Kvm, "Attaching cycle counter...\n");
1063    PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE,
1064                                PERF_COUNT_HW_CPU_CYCLES);
1065    cfgCycles.disabled(true)
1066        .pinned(true);
1067
1068    if (perfControlledByTimer) {
1069        // We need to configure the cycles counter to send overflows
1070        // since we are going to use it to trigger timer signals that
1071        // trap back into m5 from KVM. In practice, this means that we
1072        // need to set some non-zero sample period that gets
1073        // overridden when the timer is armed.
1074        cfgCycles.wakeupEvents(1)
1075            .samplePeriod(42);
1076    }
1077
1078    hwCycles.attach(cfgCycles,
1079                    0); // TID (0 => currentThread)
1080
1081    DPRINTF(Kvm, "Attaching instruction counter...\n");
1082    PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE,
1083                                      PERF_COUNT_HW_INSTRUCTIONS);
1084    hwInstructions.attach(cfgInstructions,
1085                          0, // TID (0 => currentThread)
1086                          hwCycles);
1087}
1088
1089bool
1090BaseKvmCPU::tryDrain()
1091{
1092    if (!drainManager)
1093        return false;
1094
1095    if (!archIsDrained()) {
1096        DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n");
1097        return false;
1098    }
1099
1100    if (_status == Idle || _status == Running) {
1101        DPRINTF(Drain,
1102                "tryDrain: CPU transitioned into the Idle state, drain done\n");
1103        drainManager->signalDrainDone();
1104        drainManager = NULL;
1105        return true;
1106    } else {
1107        DPRINTF(Drain, "tryDrain: CPU not ready.\n");
1108        return false;
1109    }
1110}
1111
1112void
1113BaseKvmCPU::ioctlRun()
1114{
1115    if (ioctl(KVM_RUN) == -1) {
1116        if (errno != EINTR)
1117            panic("KVM: Failed to start virtual CPU (errno: %i)\n",
1118                  errno);
1119    }
1120}
1121