[RFC 3/4] KVM in-kernel PM Timer implementation (experimental code part 3)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



experimental code part 3 (qemu userspace)
-----------------------------------------


This code utlizes the new ioctl commands introduced by code part 2.

The KVM_CREATE_PMTMR ioctl command is simply called once when a virtual
machine is being created. However, calling KVM_CONFIGURE_PMTMR is more
challenging because it involves ...

-  passing the base address of PM I/O port range to code part 1
-  passing the clock offset to code part 1

'timers_state.cpu_clock_offset' gets updated at each vm_start() call.
However, the PM I/O port base address is not available at the first
vm_start() call. So, configuring the in-kernel PM Timer needs to be
postponed until the PIIX4 PCI configuration is initialized. This is
facilitated by the new function kvm_pmtmr_handler() which is called
by vm_start() and by pm_io_space_update().

kvm_pmtmr_handler() calls architecture-specific code thru a function
pointer 'kvm_arch_pmtmr_handler'. kvm_pmtmr_handler() is a 'no-op' if
an architecture does not provide or clears this function pointer. The
architecture-specific code is responsible for configuring the in-kernel
PM Timer.

The experimental code provides kvm_arch_configure_pmtmr_wrapper() in
qemu-kvm-x86.c. kvm_arch_create_pmtmr() sets 'kvm_arch_pmtmr_handler'
to 'kvm_arch_configure_pmtmr_wrapper' after successful completion of
the KVM_CREATE_PMTMR ioctl command.

kvm_arch_configure_pmtmr_wrapper() requires ACPI PM code to provide a
function pointer 'kvm_arch_get_pm_io_base' thru which the PM I/O port
base address can be obtained. kvm_arch_configure_pmtmr_wrapper() is a
'no-op' too if ACPI PM code does not provide or clears this function
pointer. The experimental code provides piix4_get_pm_io_base() in
hw/acpi_piix4.c. pm_io_space_update() sets 'kvm_arch_get_pm_io_base'
to 'piix4_get_pm_io_base'.

Consider two scenarios ...

-  during virtual machine creation and startup

     kvm_arch_create
       kvm_arch_create_pmtmr
         ioctl(KVM_CREATE_PMTMR)
         kvm_arch_pmtmr_handler = kvm_arch_configure_pmtmr_wrapper
      :
     vm_start
       kvm_pmtmr_handler
         kvm_arch_configure_pmtmr_wrapper
           'no-op' because kvm_arch_get_pm_io_base not set yet
      :
     pm_io_space_update
       kvm_arch_get_pm_io_base = piix4_get_pm_io_base
       kvm_pmtmr_handler
         kvm_arch_configure_pmtmr_wrapper
           obtain PM I/O port base thru kvm_arch_get_pm_io_base
           kvm_arch_configure_pmtmr
             ioctl(KVM_CONFIGURE_PMTMR)

-  any other vm_start() call, for example after migration

     vm_start
       kvm_pmtmr_handler
         kvm_arch_configure_pmtmr_wrapper
           obtain PM I/O port base thru kvm_arch_get_pm_io_base
           kvm_arch_configure_pmtmr
             ioctl(KVM_CONFIGURE_PMTMR)



diff -up ./hw/acpi_piix4.c.orig3 ./hw/acpi_piix4.c
--- ./hw/acpi_piix4.c.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./hw/acpi_piix4.c	2010-12-10 11:26:53.943753235 +0100
@@ -23,6 +23,7 @@
 #include "acpi.h"
 #include "sysemu.h"
 #include "range.h"
+#include "qemu-kvm.h"
 
 //#define DEBUG
 
@@ -80,6 +81,9 @@ typedef struct PIIX4PMState {
 
 static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
 
+/* for cpu hotadd (and in-kernel PM Timer if KVM_CAP_PMTMR is defined) */
+static PIIX4PMState *global_piix4_pm_state;
+
 #define ACPI_ENABLE 0xf1
 #define ACPI_DISABLE 0xf0
 
@@ -250,6 +254,19 @@ static void acpi_dbg_writel(void *opaque
     PIIX4_DPRINTF("ACPI: DBG: 0x%08x\n", val);
 }
 
+#ifdef KVM_CAP_PMTMR
+static uint64_t piix4_get_pm_io_base(void)
+{
+    PIIX4PMState *s = global_piix4_pm_state;
+    uint32_t pm_io_base;
+
+    pm_io_base = le32_to_cpu(*(uint32_t *)(s->dev.config + 0x40));
+    pm_io_base &= 0xffc0;
+
+    return (uint64_t)pm_io_base;
+}
+#endif
+
 static void pm_io_space_update(PIIX4PMState *s)
 {
     uint32_t pm_io_base;
@@ -262,6 +279,16 @@ static void pm_io_space_update(PIIX4PMSt
         PIIX4_DPRINTF("PM: mapping to 0x%x\n", pm_io_base);
         iorange_init(&s->ioport, &pm_iorange_ops, pm_io_base, 64);
         ioport_register(&s->ioport);
+#ifdef  KVM_CAP_PMTMR
+        kvm_arch_get_pm_io_base = piix4_get_pm_io_base;
+        /*
+         * The base address of the PM I/O port address range is now known.
+         * The following call is needed to pass the base address to the
+         * in-kernel PM Timer emulation. Note that 'kvm_arch_get_pm_io_base'
+         * must be set _before_ this call.
+         */
+        kvm_pmtmr_handler();
+#endif
     }
 }
 
@@ -354,14 +381,12 @@ static void piix4_powerdown(void *opaque
     }
 }
 
-static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */
-
 static int piix4_pm_initfn(PCIDevice *dev)
 {
     PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, dev);
     uint8_t *pci_conf;
 
-    /* for cpu hotadd */
+    /* for cpu hotadd and in-kernel PM Timer */
     global_piix4_pm_state = s;
 
     pci_conf = s->dev.config;
diff -up ./kvm/include/linux/kvm.h.orig3 ./kvm/include/linux/kvm.h
--- ./kvm/include/linux/kvm.h.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./kvm/include/linux/kvm.h	2010-12-10 10:00:11.646936579 +0100
@@ -140,6 +140,12 @@ struct kvm_pit_config {
 	__u32 pad[15];
 };
 
+/* for KVM_CONFIGURE_PMTMR */
+struct kvm_pmtmr_config {
+	__u64 pm_io_base;
+	__s64 clock_offset;
+};
+
 #define KVM_PIT_SPEAKER_DUMMY     1
 
 #define KVM_EXIT_UNKNOWN          0
@@ -530,6 +536,9 @@ struct kvm_enable_cap {
 #ifdef __KVM_HAVE_XCRS
 #define KVM_CAP_XCRS 56
 #endif
+#ifdef __KVM_HAVE_PMTMR
+#define KVM_CAP_PMTMR 60
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -660,6 +669,8 @@ struct kvm_clock_data {
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+#define KVM_CREATE_PMTMR           _IO(KVMIO,  0x7d)
+#define KVM_CONFIGURE_PMTMR       _IOW(KVMIO,  0x7e, struct kvm_pmtmr_config)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
diff -up ./kvm/include/x86/asm/kvm.h.orig3 ./kvm/include/x86/asm/kvm.h
--- ./kvm/include/x86/asm/kvm.h.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./kvm/include/x86/asm/kvm.h	2010-12-10 11:29:56.410873314 +0100
@@ -24,6 +24,7 @@
 #define __KVM_HAVE_DEBUGREGS
 #define __KVM_HAVE_XSAVE
 #define __KVM_HAVE_XCRS
+#define __KVM_HAVE_PMTMR
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff -up ./qemu-kvm.c.orig3 ./qemu-kvm.c
--- ./qemu-kvm.c.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./qemu-kvm.c	2010-12-10 10:50:42.857811776 +0100
@@ -185,6 +185,9 @@ int kvm_init(int smp_cpus)
     kvm_context->dirty_pages_log_all = 0;
     kvm_context->no_irqchip_creation = 0;
     kvm_context->no_pit_creation = 0;
+#ifdef KVM_CAP_PMTMR
+    kvm_context->no_pmtmr_creation = 0;
+#endif
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
     QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
@@ -237,6 +240,22 @@ void kvm_disable_pit_creation(kvm_contex
     kvm->no_pit_creation = 1;
 }
 
+#ifdef	KVM_CAP_PMTMR
+void (*kvm_arch_pmtmr_handler)(kvm_context_t kvm);
+/*
+ * This handler is called by
+ * - the monitor thread during vm_start().
+ * - the ACPI PM code during pm_io_space_update().
+ * It is a 'no-op' if an architecture-specific handler is not available.
+ * Architecture-specific code must configure the in-kernel PM Timer emulation.
+ */
+void kvm_pmtmr_handler(void)
+{
+    if (kvm_arch_pmtmr_handler)
+        kvm_arch_pmtmr_handler(kvm_context);
+}
+#endif
+
 static void kvm_reset_vcpu(void *opaque)
 {
     CPUState *env = opaque;
diff -up ./qemu-kvm.h.orig3 ./qemu-kvm.h
--- ./qemu-kvm.h.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./qemu-kvm.h	2010-12-10 11:26:43.726790319 +0100
@@ -64,6 +64,10 @@ struct kvm_context {
     int irqchip_inject_ioctl;
     /// do not create in-kernel pit if set
     int no_pit_creation;
+#ifdef KVM_CAP_PMTMR
+    /// do not create in-kernel PM Timer if set
+    int no_pmtmr_creation;
+#endif
 #ifdef KVM_CAP_IRQ_ROUTING
     struct kvm_irq_routing *irq_routes;
     int nr_allocated_irq_routes;
@@ -655,8 +659,14 @@ int kvm_qemu_create_memory_alias(uint64_
                                  uint64_t target_phys);
 int kvm_qemu_destroy_memory_alias(uint64_t phys_start);
 
-int kvm_arch_qemu_create_context(void);
+#ifdef KVM_CAP_PMTMR
+void kvm_pmtmr_handler(void);
+int kvm_arch_configure_pmtmr(kvm_context_t kvm, struct kvm_pmtmr_config *conf);
+extern void (*kvm_arch_pmtmr_handler)(kvm_context_t kvm);
+extern uint64_t (*kvm_arch_get_pm_io_base)(void);
+#endif
 
+int kvm_arch_qemu_create_context(void);
 void kvm_arch_save_regs(CPUState *env);
 void kvm_arch_load_regs(CPUState *env, int level);
 int kvm_arch_has_work(CPUState *env);
diff -up ./qemu-kvm-x86.c.orig3 ./qemu-kvm-x86.c
--- ./qemu-kvm-x86.c.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./qemu-kvm-x86.c	2010-12-10 11:26:39.665811451 +0100
@@ -15,6 +15,9 @@
 #include <sys/io.h>
 
 #include "qemu-kvm.h"
+#ifdef	KVM_CAP_PMTMR
+#include "qemu-timer.h"
+#endif
 #include "libkvm.h"
 #include <pthread.h>
 #include <sys/utsname.h>
@@ -124,6 +127,61 @@ static int kvm_create_pit(kvm_context_t 
     return 0;
 }
 
+#ifdef KVM_CAP_PMTMR
+
+int kvm_arch_configure_pmtmr(kvm_context_t kvm, struct kvm_pmtmr_config *conf)
+{
+    int r;
+
+    if (kvm_arch_pmtmr_handler) {
+        r = kvm_vm_ioctl(kvm_state, KVM_CONFIGURE_PMTMR, conf);
+        if (r < 0) {
+            fprintf(stderr, "Configure kernel PM Timer failed\n");
+            kvm_arch_pmtmr_handler = 0;
+        }
+    }
+    return 0;
+}
+
+uint64_t (*kvm_arch_get_pm_io_base)(void);
+/*
+ * Architecture-specfic code called by kvm_pmtmr_handler().
+ * Configures the in-kernel PM Timer emulation if the ACPI PM code provides
+ * a function to obtain the base address of the PM I/O port address range.
+ */
+static void kvm_arch_configure_pmtmr_wrapper(kvm_context_t kvm)
+{
+    struct kvm_pmtmr_config conf;
+
+    if (kvm_arch_get_pm_io_base) {
+        conf.pm_io_base = kvm_arch_get_pm_io_base();
+        conf.clock_offset = cpu_get_clock_offset();
+        kvm_arch_configure_pmtmr(kvm, &conf);
+    }
+}
+
+static int kvm_arch_create_pmtmr(kvm_context_t kvm)
+{
+    int r;
+
+    if (!kvm->no_pmtmr_creation) {
+        r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PMTMR);
+        if (r <= 0)
+            return 0;
+
+        r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PMTMR);
+        if (r < 0) {
+            fprintf(stderr, "Create kernel PM Timer failed\n");
+            return r;
+        }
+        /* for kvm_pmtmr_handler() */
+        kvm_arch_pmtmr_handler = kvm_arch_configure_pmtmr_wrapper;
+    }
+    return 0;
+}
+
+#endif
+
 int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
                         void **vm_mem)
 {
@@ -156,7 +214,9 @@ int kvm_arch_create(kvm_context_t kvm, u
     if (r < 0) {
         return r;
     }
-
+#ifdef KVM_CAP_PMTMR
+    kvm_arch_create_pmtmr(kvm);
+#endif
     return 0;
 }
 
diff -up ./qemu-timer.c.orig3 ./qemu-timer.c
--- ./qemu-timer.c.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./qemu-timer.c	2010-12-10 10:45:27.071749627 +0100
@@ -110,6 +110,11 @@ static int64_t cpu_get_clock(void)
     }
 }
 
+int64_t cpu_get_clock_offset(void)
+{
+    return timers_state.cpu_clock_offset;
+}
+
 /* FIXME: qemu-kvm hack */
 #define CONFIG_IOTHREAD 1
 #ifndef CONFIG_IOTHREAD
diff -up ./qemu-timer.h.orig3 ./qemu-timer.h
--- ./qemu-timer.h.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./qemu-timer.h	2010-12-10 10:45:33.367685692 +0100
@@ -53,6 +53,7 @@ int qemu_calculate_timeout(void);
 void init_clocks(void);
 int init_timer_alarm(void);
 void quit_timers(void);
+int64_t cpu_get_clock_offset(void);
 
 static inline int64_t get_ticks_per_sec(void)
 {
diff -up ./vl.c.orig3 ./vl.c
--- ./vl.c.orig3	2010-12-02 15:15:20.000000000 +0100
+++ ./vl.c	2010-12-10 10:34:55.388997058 +0100
@@ -1091,6 +1091,14 @@ void vm_start(void)
 {
     if (!vm_running) {
         cpu_enable_ticks();
+#ifdef  KVM_CAP_PMTMR
+        /*
+         * cpu_enable_ticks() has updated 'timers_state.cpu_clock_offset'.
+         * The following call is needed to pass the updated clock offset
+         * to the in-kernel PM Timer emulation.
+         */
+        kvm_pmtmr_handler();
+#endif
         vm_running = 1;
         vm_state_notify(1, 0);
         resume_all_vcpus();
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux