Re: [PATCH v2 3/4] target-i386: Allow changing of Hypervisor CPUIDs.

Don Slutz <Don@xxxxxxxxxxxxxxx> · Thu, 13 Sep 2012 14:43:55 -0400

On 09/12/12 13:55, Marcelo Tosatti wrote:
The problem with integrating this is that it has little or
no assurance from documentation. The Linux kernel source is a good
source, then say "accordingly to VMWare guest support code in version xyz"
in the changelog.
I will work on getting a list of the documentation and sources used to 
generate this.

Also extracting this information in a text file (or comment in the code)
would be better than just adding code.
I am not sure what information you are talking about here.  Are you 
asking about the known "Hypervisor CPUIDs", or what a lot of Linux 
version look at to determine the Hypervisor they are on, or something else?

On Tue, Sep 11, 2012 at 10:07:46AM -0400, Don Slutz wrote:
This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

Signed-off-by: Don Slutz <Don@xxxxxxxxxxxxxxx>
---
  target-i386/cpu.c |  214 +++++++++++++++++++++++++++++++++++++++++++++++++++++
  target-i386/cpu.h |   21 +++++
  target-i386/kvm.c |   33 +++++++--
  3 files changed, 262 insertions(+), 6 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5f9866a..9f1f390 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1135,6 +1135,36 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id,
      }
  }
  
+static void x86_cpuid_set_vmware_extra(Object *obj)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    if ((cpu->env.tsc_khz != 0) &&
+        (cpu->env.cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) &&
+        (cpu->env.cpuid_hv_vendor1 == CPUID_HV_VENDOR_VMWARE_1) &&
+        (cpu->env.cpuid_hv_vendor2 == CPUID_HV_VENDOR_VMWARE_2) &&
+        (cpu->env.cpuid_hv_vendor3 == CPUID_HV_VENDOR_VMWARE_3)) {
+        const uint32_t apic_khz = 1000000L;
+
+        /*
+         * From article.gmane.org/gmane.comp.emulators.kvm.devel/22643
+         *
+         *    Leaf 0x40000010, Timing Information.
+         *
+         *    VMware has defined the first generic leaf to provide timing
+         *    information.  This leaf returns the current TSC frequency and
+         *    current Bus frequency in kHz.
+         *
+         *    # EAX: (Virtual) TSC frequency in kHz.
+         *    # EBX: (Virtual) Bus (local apic timer) frequency in kHz.
+         *    # ECX, EDX: RESERVED (Per above, reserved fields are set to zero).
+         */
+        cpu->env.cpuid_hv_extra = 0x40000010;
+        cpu->env.cpuid_hv_extra_a = (uint32_t)cpu->env.tsc_khz;
+        cpu->env.cpuid_hv_extra_b = apic_khz;
+    }
+}
What happens in case you migrate the vmware guest to a host
with different frequency? How is that transmitted to the
vmware-guest-running-on-kvm ? Or is migration not supported?
As far as I know, it would be the same as for a non-vmware guest. 
http://lists.nongnu.org/archive/html/qemu-devel/2011-07/msg01656.html is 
related to this.

I did not look to see if this has been done since then.

All this change does is to allow the guest to "read" the tsc-frequency 
instead of trying to calculate it.

I will look into the current state of migration when tsc_freq=X is 
specified.  The machine I have been doing most of the testing on (Intel 
Xeon E3-1260L) when I add tsc_freq=2.0G or tsc_freq=2.4G, the guest does 
not see any difference in accel=kvm.

+static void x86_cpuid_set_hv_level(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    uint32_t value;
+
+    visit_type_uint32(v, &value, name, errp);
+    if (error_is_set(errp)) {
+        return;
+    }
+
+    if ((value != 0) && (value < 0x40000000)) {
+        value += 0x40000000;
+    }
+    cpu->env.cpuid_hv_level = value;
+}
+
+static char *x86_cpuid_get_hv_vendor(Object *obj, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    CPUX86State *env = &cpu->env;
+    char *value;
+    int i;
+
+    value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
+    for (i = 0; i < 4; i++) {
+        value[i + 0] = env->cpuid_hv_vendor1 >> (8 * i);
+        value[i + 4] = env->cpuid_hv_vendor2 >> (8 * i);
+        value[i + 8] = env->cpuid_hv_vendor3 >> (8 * i);
+    }
+    value[CPUID_VENDOR_SZ] = '\0';
+
+    /* Convert known names */
+    if (!strcmp(value, CPUID_HV_VENDOR_VMWARE)) {
+        if (env->cpuid_hv_level == CPUID_HV_LEVEL_VMARE_4) {
+            pstrcpy(value, sizeof(value), "vmware4");
+        } else if (env->cpuid_hv_level == CPUID_HV_LEVEL_VMARE_3) {
+            pstrcpy(value, sizeof(value), "vmware3");
+        }
+    } else if (!strcmp(value, CPUID_HV_VENDOR_XEN) &&
+               env->cpuid_hv_level == CPUID_HV_LEVEL_XEN) {
+        pstrcpy(value, sizeof(value), "xen");
+    } else if (!strcmp(value, CPUID_HV_VENDOR_KVM) &&
+               env->cpuid_hv_level == 0) {
+        pstrcpy(value, sizeof(value), "kvm");
+    }
+    return value;
+}
+
+static void x86_cpuid_set_hv_vendor(Object *obj, const char *value,
+                                     Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    CPUX86State *env = &cpu->env;
+    int i;
+    char adj_value[CPUID_VENDOR_SZ + 1];
+
+    memset(adj_value, 0, sizeof(adj_value));
+
+    /* Convert known names */
+    if (!strcmp(value, "vmware") || !strcmp(value, "vmware4")) {
+        if (env->cpuid_hv_level == 0) {
+            env->cpuid_hv_level = CPUID_HV_LEVEL_VMARE_4;
+        }
+        pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+    } else if (!strcmp(value, "vmware3")) {
+        if (env->cpuid_hv_level == 0) {
+            env->cpuid_hv_level = CPUID_HV_LEVEL_VMARE_3;
+        }
+        pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_VMWARE);
+    } else if (!strcmp(value, "xen")) {
+        if (env->cpuid_hv_level == 0) {
+            env->cpuid_hv_level = CPUID_HV_LEVEL_XEN;
+        }
+        pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN);
+    } else if (!strcmp(value, "kvm")) {
+        pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_KVM);
+    } else {
+        pstrcpy(adj_value, sizeof(adj_value), value);
+    }
+
+    env->cpuid_hv_vendor1 = 0;
+    env->cpuid_hv_vendor2 = 0;
+    env->cpuid_hv_vendor3 = 0;
+    for (i = 0; i < 4; i++) {
+        env->cpuid_hv_vendor1 |= ((uint8_t)adj_value[i + 0]) << (8 * i);
+        env->cpuid_hv_vendor2 |= ((uint8_t)adj_value[i + 4]) << (8 * i);
+        env->cpuid_hv_vendor3 |= ((uint8_t)adj_value[i + 8]) << (8 * i);
+    }
+    x86_cpuid_set_vmware_extra(obj);
+}
+
+static void x86_cpuid_get_hv_extra(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    visit_type_uint32(v, &cpu->env.cpuid_hv_extra, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    uint32_t value;
+
+    visit_type_uint32(v, &value, name, errp);
+    if (error_is_set(errp)) {
+        return;
+    }
+
+    if ((value != 0) && (value < 0x40000000)) {
+        value += 0x40000000;
+    }
+    cpu->env.cpuid_hv_extra = value;
+}
+
+static void x86_cpuid_get_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    visit_type_uint32(v, &cpu->env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_a(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    visit_type_uint32(v, &cpu->env.cpuid_hv_extra_a, name, errp);
+}
+
+static void x86_cpuid_get_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    visit_type_uint32(v, &cpu->env.cpuid_hv_extra_b, name, errp);
+}
+
+static void x86_cpuid_set_hv_extra_b(Object *obj, Visitor *v, void *opaque,
+                                const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    visit_type_uint32(v, &cpu->env.cpuid_hv_extra_b, name, errp);
  }
  
  #if !defined(CONFIG_USER_ONLY)
+static void x86_set_hyperv(Object *obj, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+
+    cpu->env.cpuid_hv_level = HYPERV_CPUID_MIN;
+    x86_cpuid_set_hv_vendor(obj, "Microsoft Hv", errp);
+}
+
  static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
                                   const char *name, Error **errp)
  {
@@ -1189,6 +1385,7 @@ static void x86_set_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
          return;
      }
      hyperv_set_spinlock_retries(value);
+    x86_set_hyperv(obj, errp);
  }
  
  static void x86_get_hv_relaxed(Object *obj, Visitor *v, void *opaque,
@@ -1209,6 +1406,7 @@ static void x86_set_hv_relaxed(Object *obj, Visitor *v, void *opaque,
          return;
      }
      hyperv_enable_relaxed_timing(value);
+    x86_set_hyperv(obj, errp);
  }
This seems unrelated to the rest of the patch?
The new call to x86_set_hyperv() is how the 1st change to 
target-i386/kvm.c still does the same thing as before.

  static void x86_get_hv_vapic(Object *obj, Visitor *v, void *opaque,
@@ -1229,6 +1427,7 @@ static void x86_set_hv_vapic(Object *obj, Visitor *v, void *opaque,
          return;
      }
      hyperv_enable_vapic_recommended(value);
+    x86_set_hyperv(obj, errp);
  }
  #endif
  
@@ -2061,6 +2260,21 @@ static void x86_cpu_initfn(Object *obj)
      object_property_add(obj, "enforce", "bool",
                          x86_cpuid_get_enforce,
                          x86_cpuid_set_enforce, NULL, NULL, NULL);
+    object_property_add(obj, "hypervisor-level", "int",
+                        x86_cpuid_get_hv_level,
+                        x86_cpuid_set_hv_level, NULL, NULL, NULL);
+    object_property_add_str(obj, "hypervisor-vendor",
+                            x86_cpuid_get_hv_vendor,
+                            x86_cpuid_set_hv_vendor, NULL);
+    object_property_add(obj, "hypervisor-extra", "int",
+                        x86_cpuid_get_hv_extra,
+                        x86_cpuid_set_hv_extra, NULL, NULL, NULL);
+    object_property_add(obj, "hypervisor-extra-a", "int",
+                        x86_cpuid_get_hv_extra_a,
+                        x86_cpuid_set_hv_extra_a, NULL, NULL, NULL);
+    object_property_add(obj, "hypervisor-extra-b", "int",
+                        x86_cpuid_get_hv_extra_b,
+                        x86_cpuid_set_hv_extra_b, NULL, NULL, NULL);
  #if !defined(CONFIG_USER_ONLY)
      object_property_add(obj, "hv_spinlocks", "int",
                          x86_get_hv_spinlocks,
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 5265c5a..a2d3588 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -488,6 +488,18 @@
  
  #define CPUID_VENDOR_VIA   "CentaurHauls"
  
+#define CPUID_HV_VENDOR_VMWARE_1 0x61774d56 /* "VMwa" */
+#define CPUID_HV_VENDOR_VMWARE_2 0x4d566572 /* "reVM" */
+#define CPUID_HV_VENDOR_VMWARE_3 0x65726177 /* "ware" */
+#define CPUID_HV_VENDOR_VMWARE "VMwareVMware"
+#define CPUID_HV_LEVEL_VMARE_3 0x40000002
+#define CPUID_HV_LEVEL_VMARE_4 0x40000010
+
+#define CPUID_HV_VENDOR_XEN "XenVMMXenVMM"
+#define CPUID_HV_LEVEL_XEN  0x40000002
+
+#define CPUID_HV_VENDOR_KVM "KVMKVMKVM"
+
  #define CPUID_MWAIT_IBE     (1 << 1) /* Interrupts can exit capability */
  #define CPUID_MWAIT_EMX     (1 << 0) /* enumeration supported */
  
@@ -782,6 +794,15 @@ typedef struct CPUX86State {
      uint32_t cpuid_ext4_features;
      /* Flags from CPUID[EAX=7,ECX=0].EBX */
      uint32_t cpuid_7_0_ebx;
+    /* Hypervisor CPUIDs */
+    uint32_t cpuid_hv_level;
+    uint32_t cpuid_hv_vendor1;
+    uint32_t cpuid_hv_vendor2;
+    uint32_t cpuid_hv_vendor3;
+    /* VMware extra data */
+    uint32_t cpuid_hv_extra;
+    uint32_t cpuid_hv_extra_a;
+    uint32_t cpuid_hv_extra_b;
  
      /* MTRRs */
      uint64_t mtrr_fixed[11];
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 895d848..17c72bc 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -389,16 +389,18 @@ int kvm_arch_init_vcpu(CPUX86State *env)
      c = &cpuid_data.entries[cpuid_i++];
      memset(c, 0, sizeof(*c));
      c->function = KVM_CPUID_SIGNATURE;
-    if (!hyperv_enabled()) {
+    if (env->cpuid_hv_level == 0) {
          memcpy(signature, "KVMKVMKVM\0\0\0", 12);
          c->eax = 0;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
      } else {
-        memcpy(signature, "Microsoft Hv", 12);
-        c->eax = HYPERV_CPUID_MIN;
+        c->eax = env->cpuid_hv_level;
+        c->ebx = env->cpuid_hv_vendor1;
+        c->ecx = env->cpuid_hv_vendor2;
+        c->edx = env->cpuid_hv_vendor3;
      }
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
  
      c = &cpuid_data.entries[cpuid_i++];
      memset(c, 0, sizeof(*c));
@@ -452,6 +454,25 @@ int kvm_arch_init_vcpu(CPUX86State *env)
          c->ebx = signature[0];
          c->ecx = signature[1];
          c->edx = signature[2];
+    } else if (env->cpuid_hv_level > 0) {
+        for (i = KVM_CPUID_FEATURES + 1; i <= env->cpuid_hv_level; i++) {
+            c = &cpuid_data.entries[cpuid_i++];
+            memset(c, 0, sizeof(*c));
+            c->function = i;
+            if (i == env->cpuid_hv_extra) {
+                c->eax = env->cpuid_hv_extra_a;
+                c->ebx = env->cpuid_hv_extra_b;
+            }
+        }
+
+        c = &cpuid_data.entries[cpuid_i++];
+        memset(c, 0, sizeof(*c));
+        c->function = KVM_CPUID_SIGNATURE_NEXT;
+        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+        c->eax = 0;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
      }
  
      has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);

Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.
Will re-work into a set of smaller changes.
   -Don
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html