From: Tom Lendacky <thomas.lendacky@xxxxxxx> For an SEV-ES guest, MMIO is performed to a shared (un-encrypted) page so that both the hypervisor and guest can read or write to it and each see the contents. The GHCB specification provides software-defined VMGEXIT exit codes to indicate a request for an MMIO read or an MMIO write. Add support to recognize the MMIO requests and invoke SEV-ES specific routines that can complete the MMIO operation. These routines use common KVM support to complete the MMIO operation. Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx> --- arch/x86/kvm/svm/sev.c | 116 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 3 + arch/x86/kvm/svm/svm.h | 6 ++ arch/x86/kvm/x86.c | 123 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 5 ++ 5 files changed, 253 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 92a4df26057a..740b44485f36 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1191,6 +1191,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm) if (!svm->ghcb) return; + if (svm->ghcb_sa_free) { + /* + * The scratch area lives outside the GHCB, so there is a + * buffer that, depending on the operation performed, may + * need to be synced, then freed. + */ + if (svm->ghcb_sa_sync) { + kvm_write_guest(svm->vcpu.kvm, + ghcb_get_sw_scratch(svm->ghcb), + svm->ghcb_sa, svm->ghcb_sa_len); + svm->ghcb_sa_sync = false; + } + + kfree(svm->ghcb_sa); + svm->ghcb_sa = NULL; + svm->ghcb_sa_free = false; + } + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); @@ -1223,6 +1241,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) +static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct ghcb *ghcb = svm->ghcb; + u64 ghcb_scratch_beg, ghcb_scratch_end; + u64 scratch_gpa_beg, scratch_gpa_end; + void *scratch_va; + + scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); + if (!scratch_gpa_beg) { + pr_err("vmgexit: scratch gpa not provided\n"); + return false; + } + + scratch_gpa_end = scratch_gpa_beg + len; + if (scratch_gpa_end < scratch_gpa_beg) { + pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", + len, scratch_gpa_beg); + return false; + } + + if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { + /* Scratch area begins within GHCB */ + ghcb_scratch_beg = control->ghcb_gpa + + offsetof(struct ghcb, shared_buffer); + ghcb_scratch_end = control->ghcb_gpa + + offsetof(struct ghcb, reserved_1); + + /* + * If the scratch area begins within the GHCB, it must be + * completely contained in the GHCB shared buffer area. + */ + if (scratch_gpa_beg < ghcb_scratch_beg || + scratch_gpa_end > ghcb_scratch_end) { + pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", + scratch_gpa_beg, scratch_gpa_end); + return false; + } + + scratch_va = (void *)svm->ghcb; + scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + } else { + /* + * The guest memory must be read into a kernel buffer, so + * limit the size + */ + if (len > GHCB_SCRATCH_AREA_LIMIT) { + pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", + len, GHCB_SCRATCH_AREA_LIMIT); + return false; + } + scratch_va = kzalloc(len, GFP_KERNEL); + if (!scratch_va) + return false; + + if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { + /* Unable to copy scratch area from guest */ + pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); + + kfree(scratch_va); + return false; + } + + /* + * The scratch area is outside the GHCB. The operation will + * dictate whether the buffer needs to be synced before running + * the vCPU next time (i.e. a read was requested so the data + * must be written back to the guest memory). + */ + svm->ghcb_sa_sync = sync; + svm->ghcb_sa_free = true; + } + + svm->ghcb_sa = scratch_va; + svm->ghcb_sa_len = len; + + return true; +} + static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, unsigned int pos) { @@ -1356,6 +1454,24 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) ret = -EINVAL; switch (ghcb_get_sw_exit_code(ghcb)) { + case SVM_VMGEXIT_MMIO_READ: + if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_read(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; + case SVM_VMGEXIT_MMIO_WRITE: + if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_write(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: pr_err("vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", control->exit_info_1, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 89ee9d533e9a..439b0d0e53eb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1306,6 +1306,9 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) } __free_page(virt_to_page(svm->vmsa)); + + if (svm->ghcb_sa_free) + kfree(svm->ghcb_sa); } __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 3574f52f8a1c..8de45462ff4a 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -165,6 +165,12 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + + /* SEV-ES scratch area support */ + void *ghcb_sa; + u64 ghcb_sa_len; + bool ghcb_sa_sync; + bool ghcb_sa_free; }; struct svm_cpu_data { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a2a394126a2..a0070eeeb139 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10768,6 +10768,129 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c } EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error); +static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_mmio_fragment *frag; + unsigned int len; + + BUG_ON(!vcpu->mmio_needed); + + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; + len = min(8u, frag->len); + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, len); + + if (frag->len <= 8) { + /* Switch to the next fragment. */ + frag++; + vcpu->mmio_cur_fragment++; + } else { + /* Go forward to the next mmio piece. */ + frag->data += len; + frag->gpa += len; + frag->len -= len; + } + + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; + + // VMG change, at this point, we're always done + // RIP has already been advanced + return 1; + } + + // More MMIO is needed + run->mmio.phys_addr = frag->gpa; + run->mmio.len = min(8u, frag->len); + run->mmio.is_write = vcpu->mmio_is_write; + if (run->mmio.is_write) + memcpy(run->mmio.data, frag->data, min(8u, frag->len)); + run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} + +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 1; + memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); + +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 0; + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 995ab696dcf0..ce3b7d3d8631 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -398,4 +398,9 @@ bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); __reserved_bits; \ }) +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); + #endif -- 2.28.0