Hypercall code page is specified in the Hyper-V TLFS to be an overlay page, ie., guest chooses a GPA and the host _places_ a page at that location, making it visible to the guest and the existing page becomes inaccessible. Similarly when disabled, the host should _remove_ the overlay and the old page should become visible to the guest. Until now, KVM patched the hypercall code directly into the guest chosen GPA which is incorrect; instead, use the new user space MSR filtering feature to trap hypercall page MSR writes, overlay it as requested and then invoke a KVM_SET_MSR from user space to bounce back control KVM. This bounce back is needed as KVM may have to write data into the newly overlaid page. Signed-off-by: Siddharth Chandrasekaran <sidcha@xxxxxxxxx> --- hw/hyperv/hyperv.c | 10 ++++- include/hw/hyperv/hyperv.h | 5 +++ target/i386/kvm/hyperv.c | 91 ++++++++++++++++++++++++++++++++++++++ target/i386/kvm/hyperv.h | 4 ++ target/i386/kvm/kvm.c | 49 +++++++++++++++++++- 5 files changed, 156 insertions(+), 3 deletions(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index ac45e8e139..aa5ac5226e 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -36,6 +36,7 @@ struct SynICState { OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC) static bool synic_enabled; +struct hyperv_overlay_page hcall_page; static void alloc_overlay_page(struct hyperv_overlay_page *overlay, Object *owner, const char *name) @@ -50,7 +51,7 @@ static void alloc_overlay_page(struct hyperv_overlay_page *overlay, * This method must be called with iothread lock taken as it modifies * the memory hierarchy. */ -static void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr addr) +void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr addr) { if (addr != HYPERV_INVALID_OVERLAY_GPA) { /* check if overlay page is enabled */ @@ -70,6 +71,13 @@ static void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr ad } } +void hyperv_overlay_init(void) +{ + memory_region_init_ram(&hcall_page.mr, NULL, "hyperv.hcall_page", + qemu_real_host_page_size, &error_abort); + hcall_page.addr = HYPERV_INVALID_OVERLAY_GPA; +} + static void synic_update(SynICState *synic, bool enable, hwaddr msg_page_addr, hwaddr event_page_addr) { diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index d989193e84..f444431a81 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -85,6 +85,11 @@ static inline uint32_t hyperv_vp_index(CPUState *cs) return cs->cpu_index; } +extern struct hyperv_overlay_page hcall_page; + +void hyperv_overlay_init(void); +void hyperv_overlay_update(struct hyperv_overlay_page *page, hwaddr addr); + void hyperv_synic_add(CPUState *cs); void hyperv_synic_reset(CPUState *cs); void hyperv_synic_update(CPUState *cs, bool enable, diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index f49ed2621d..d5eb47b8b0 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -16,6 +16,83 @@ #include "hyperv.h" #include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" +#include "kvm_i386.h" + +struct x86_hv_overlay { + struct hyperv_overlay_page *page; + uint32_t msr; + hwaddr gpa; +}; + +static void async_overlay_update(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + struct x86_hv_overlay *overlay = data.host_ptr; + + qemu_mutex_lock_iothread(); + hyperv_overlay_update(overlay->page, overlay->gpa); + qemu_mutex_unlock_iothread(); + + /** + * Call KVM so it can keep a copy of the MSR data and do other post-overlay + * actions such as filling the overlay page contents before returning to + * guest. This works because MSR filtering is inactive for KVM_SET_MSRS + */ + kvm_put_one_msr(cpu, overlay->msr, overlay->gpa); + + g_free(overlay); +} + +static void do_overlay_update(X86CPU *cpu, struct hyperv_overlay_page *page, + uint32_t msr, uint64_t data) +{ + struct x86_hv_overlay *overlay = g_malloc(sizeof(struct x86_hv_overlay)); + + *overlay = (struct x86_hv_overlay) { + .page = page, + .msr = msr, + .gpa = data + }; + + /** + * This will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_overlay_update, + RUN_ON_CPU_HOST_PTR(overlay)); +} + +static void overlay_update(X86CPU *cpu, uint32_t msr, uint64_t data) +{ + CPUX86State *env = &cpu->env; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + /** + * When GUEST_OS_ID is cleared, hypercall overlay should be removed; + * otherwise it is a NOP. We still need to do a SET_MSR here as the + * kernel need to keep a copy of data. + */ + env->msr_hv_guest_os_id = data; + if (data != 0) { + kvm_put_one_msr(cpu, msr, data); + return; + } + /* Fake a zero write to the overlay page hcall to invalidate the mapping */ + do_overlay_update(cpu, &hcall_page, msr, 0); + break; + case HV_X64_MSR_HYPERCALL: + if (env->msr_hv_guest_os_id == 0) { + /* This is a NOP in KVM too so we don't need to write back the MSR */ + return; + } + do_overlay_update(cpu, &hcall_page, msr, data); + break; + default: + return; + } +} int hyperv_x86_synic_add(X86CPU *cpu) { @@ -44,6 +121,20 @@ static void async_synic_update(CPUState *cs, run_on_cpu_data data) qemu_mutex_unlock_iothread(); } +int kvm_hv_handle_wrmsr(X86CPU *cpu, uint32_t msr, uint64_t data) +{ + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + case HV_X64_MSR_HYPERCALL: + overlay_update(cpu, msr, data); + break; + default: + return 1; + } + + return 0; +} + int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) { CPUX86State *env = &cpu->env; diff --git a/target/i386/kvm/hyperv.h b/target/i386/kvm/hyperv.h index 67543296c3..8e90fa949f 100644 --- a/target/i386/kvm/hyperv.h +++ b/target/i386/kvm/hyperv.h @@ -20,8 +20,12 @@ #ifdef CONFIG_KVM int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +int kvm_hv_handle_wrmsr(X86CPU *cpu, uint32_t msr, uint64_t data); + #endif +void hyperv_x86_hcall_page_update(X86CPU *cpu); + int hyperv_x86_synic_add(X86CPU *cpu); void hyperv_x86_synic_reset(X86CPU *cpu); void hyperv_x86_synic_update(X86CPU *cpu); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index b89b343acc..c279c13437 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2207,11 +2207,30 @@ static void kvm_set_msr_filter_range(struct kvm_msr_filter_range *range, uint32_ static int kvm_set_msr_filters(KVMState *s) { - int r; + int r, nmsrs, nfilt = 0, bitmap_pos = 0; struct kvm_msr_filter filter = { }; + struct kvm_msr_filter_range *range; + uint8_t bitmap_buf[KVM_MSR_FILTER_MAX_RANGES * 8] = {0}; filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + if (has_hyperv) { + /* + * Set filter on WRMSR to trap HV_X64_MSR_GUEST_OS_ID and + * HV_X64_MSR_HYPERCALL to userspace. These will be used to + * handle overlay requests. + */ + nmsrs = 2; + range = &filter.ranges[nfilt++]; + range->bitmap = &bitmap_buf[bitmap_pos]; + kvm_set_msr_filter_range(range, KVM_MSR_FILTER_WRITE, + HV_X64_MSR_GUEST_OS_ID, nmsrs, + true, /* HV_X64_MSR_GUEST_OS_ID */ + true /* HV_X64_MSR_HYPERCALL */); + bitmap_pos += ROUND_UP(nmsrs, 8) / 8; + assert(bitmap_pos < sizeof(bitmap_buf)); + } + r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter); if (r != 0) { error_report("kvm: failed to set MSR filters"); @@ -2363,6 +2382,10 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (has_hyperv && msr_filters_active) { + hyperv_overlay_init(); + } + return 0; } @@ -4644,7 +4667,29 @@ static int kvm_handle_rdmsr(X86CPU *cpu, struct kvm_run *run) static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run) { - run->msr.error = 1; + int r = 1; + uint32_t msr; + uint64_t data; + + if (run->msr.reason != KVM_MSR_EXIT_REASON_FILTER) { + error_report("MSR exit without exit reason FILTER"); + goto error; + } + + msr = run->msr.index; + data = run->msr.data; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + case HV_X64_MSR_HYPERCALL: + r = kvm_hv_handle_wrmsr(cpu, msr, data); + break; + default: + error_report("Unknown MSR exit"); + } + +error: + run->msr.error = r; return 0; } -- 2.17.1 Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879