When SEV-SNP is enabled in the guest VM, the guest memory pages can either be a private or shared. A write from the hypervisor goes through the RMP checks. If hardware sees that hypervisor is attempting to write to a guest private page, then it triggers an RMP violation #PF. To avoid the RMP violation, add post_{map,unmap}_gfn() ops that can be used to verify that its safe to map a given guest page. Use the SRCU to protect against the page state change for existing mapped pages. Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 2 + arch/x86/include/asm/kvm_host.h | 4 ++ arch/x86/kvm/svm/sev.c | 69 +++++++++++++++++++++----- arch/x86/kvm/svm/svm.c | 4 ++ arch/x86/kvm/svm/svm.h | 8 +++ arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++--- 6 files changed, 146 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 371756c7f8f4..c09bd40e0160 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -124,6 +124,8 @@ KVM_X86_OP(msr_filter_changed) KVM_X86_OP_NULL(complete_emulated_msr) KVM_X86_OP(alloc_apic_backing_page) KVM_X86_OP_NULL(rmp_page_level_adjust) +KVM_X86_OP(post_map_gfn) +KVM_X86_OP(post_unmap_gfn) #undef KVM_X86_OP #undef KVM_X86_OP_NULL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a6e764458f3e..5ac1ff097e8c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1463,7 +1463,11 @@ struct kvm_x86_ops { void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + void (*rmp_page_level_adjust)(struct kvm *kvm, kvm_pfn_t pfn, int *level); + + int (*post_map_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token); + void (*post_unmap_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0de85ed63e9b..65b578463271 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -336,6 +336,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (ret) goto e_free; + init_srcu_struct(&sev->psc_srcu); ret = sev_snp_init(&argp->error); } else { ret = sev_platform_init(&argp->error); @@ -2293,6 +2294,7 @@ void sev_vm_destroy(struct kvm *kvm) WARN_ONCE(1, "Failed to free SNP guest context, leaking asid!\n"); return; } + cleanup_srcu_struct(&sev->psc_srcu); } else { sev_unbind_asid(kvm, sev->handle); } @@ -2494,23 +2496,32 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) kfree(svm->ghcb_sa); } -static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map) +static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int *token) { struct vmcb_control_area *control = &svm->vmcb->control; u64 gfn = gpa_to_gfn(control->ghcb_gpa); + struct kvm_vcpu *vcpu = &svm->vcpu; - if (kvm_vcpu_map(&svm->vcpu, gfn, map)) { + if (kvm_vcpu_map(vcpu, gfn, map)) { /* Unable to map GHCB from guest */ pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn); return -EFAULT; } + if (sev_post_map_gfn(vcpu->kvm, map->gfn, map->pfn, token)) { + kvm_vcpu_unmap(vcpu, map, false); + return -EBUSY; + } + return 0; } -static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map) +static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int token) { - kvm_vcpu_unmap(&svm->vcpu, map, true); + struct kvm_vcpu *vcpu = &svm->vcpu; + + kvm_vcpu_unmap(vcpu, map, true); + sev_post_unmap_gfn(vcpu->kvm, map->gfn, map->pfn, token); } static void dump_ghcb(struct vcpu_svm *svm) @@ -2518,8 +2529,9 @@ static void dump_ghcb(struct vcpu_svm *svm) struct kvm_host_map map; unsigned int nbits; struct ghcb *ghcb; + int token; - if (svm_map_ghcb(svm, &map)) + if (svm_map_ghcb(svm, &map, &token)) return; ghcb = map.hva; @@ -2544,7 +2556,7 @@ static void dump_ghcb(struct vcpu_svm *svm) pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); e_unmap: - svm_unmap_ghcb(svm, &map); + svm_unmap_ghcb(svm, &map, token); } static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm) @@ -2552,8 +2564,9 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_host_map map; struct ghcb *ghcb; + int token; - if (svm_map_ghcb(svm, &map)) + if (svm_map_ghcb(svm, &map, &token)) return false; ghcb = map.hva; @@ -2579,7 +2592,7 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm) trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb); - svm_unmap_ghcb(svm, &map); + svm_unmap_ghcb(svm, &map, token); return true; } @@ -2636,8 +2649,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_host_map map; struct ghcb *ghcb; + int token; - if (svm_map_ghcb(svm, &map)) + if (svm_map_ghcb(svm, &map, &token)) return -EFAULT; ghcb = map.hva; @@ -2739,7 +2753,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) sev_es_sync_from_ghcb(svm, ghcb); - svm_unmap_ghcb(svm, &map); + svm_unmap_ghcb(svm, &map, token); return 0; vmgexit_err: @@ -2760,7 +2774,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) vcpu->run->internal.data[0] = *exit_code; vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - svm_unmap_ghcb(svm, &map); + svm_unmap_ghcb(svm, &map, token); return -EINVAL; } @@ -3036,6 +3050,9 @@ static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, enum psc_op op, return PSC_UNDEF_ERR; } + /* Wait for all the existing mapped gfn to unmap */ + synchronize_srcu_expedited(&sev->psc_srcu); + write_lock(&kvm->mmu_lock); rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level); @@ -3604,3 +3621,33 @@ void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level) /* Adjust the level to keep the NPT and RMP in sync */ *level = min_t(size_t, *level, rmp_level); } + +int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + int level; + + if (!sev_snp_guest(kvm)) + return 0; + + *token = srcu_read_lock(&sev->psc_srcu); + + /* If pfn is not added as private then fail */ + if (snp_lookup_rmpentry(pfn, &level) == 1) { + srcu_read_unlock(&sev->psc_srcu, *token); + pr_err_ratelimited("failed to map private gfn 0x%llx pfn 0x%llx\n", gfn, pfn); + return -EBUSY; + } + + return 0; +} + +void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + if (!sev_snp_guest(kvm)) + return; + + srcu_read_unlock(&sev->psc_srcu, token); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5f73f21a37a1..3784d389247b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4679,7 +4679,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, .alloc_apic_backing_page = svm_alloc_apic_backing_page, + .rmp_page_level_adjust = sev_rmp_page_level_adjust, + + .post_map_gfn = sev_post_map_gfn, + .post_unmap_gfn = sev_post_unmap_gfn, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index d10f7166b39d..ff91184f9b4a 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -76,16 +76,22 @@ struct kvm_sev_info { bool active; /* SEV enabled guest */ bool es_active; /* SEV-ES enabled guest */ bool snp_active; /* SEV-SNP enabled guest */ + unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ + unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + u64 ap_jump_table; /* SEV-ES AP Jump Table address */ + struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ + u64 snp_init_flags; void *snp_context; /* SNP guest context page */ + struct srcu_struct psc_srcu; }; struct kvm_svm { @@ -618,6 +624,8 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); void sev_es_unmap_ghcb(struct vcpu_svm *svm); struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level); +int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token); +void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token); /* vmenter.S */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index afcdc75a99f2..bf4389ffc88f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3095,6 +3095,65 @@ static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) return (vcpu->arch.apf.msr_en_val & mask) == mask; } +static int kvm_map_gfn_protected(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic, int *token) +{ + int ret; + + ret = kvm_map_gfn(vcpu, gfn, map, cache, atomic); + if (ret) + return ret; + + if (kvm_x86_ops.post_map_gfn) { + ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token); + if (ret) + kvm_unmap_gfn(vcpu, map, cache, false, atomic); + } + + return ret; +} + +static int kvm_unmap_gfn_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, + bool atomic, int token) +{ + int ret; + + ret = kvm_unmap_gfn(vcpu, map, cache, dirty, atomic); + + if (kvm_x86_ops.post_unmap_gfn) + static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token); + + return ret; +} + +static int kvm_vcpu_map_protected(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, + int *token) +{ + int ret; + + ret = kvm_vcpu_map(vcpu, gpa, map); + if (ret) + return ret; + + if (kvm_x86_ops.post_map_gfn) { + ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token); + if (ret) + kvm_vcpu_unmap(vcpu, map, false); + } + + return ret; +} + +static void kvm_vcpu_unmap_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + bool dirty, int token) +{ + kvm_vcpu_unmap(vcpu, map, dirty); + + if (kvm_x86_ops.post_unmap_gfn) + static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token); +} + static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; @@ -3185,6 +3244,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) { struct kvm_host_map map; struct kvm_steal_time *st; + int token; if (kvm_xen_msr_enabled(vcpu->kvm)) { kvm_xen_runstate_set_running(vcpu); @@ -3195,8 +3255,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu) return; /* -EAGAIN is returned in atomic context so we can just return. */ - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, - &map, &vcpu->arch.st.cache, false)) + if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false, &token)) return; st = map.hva + @@ -3234,7 +3294,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) st->version += 1; - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, false, token); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -4271,6 +4331,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { struct kvm_host_map map; struct kvm_steal_time *st; + int token; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -4278,8 +4339,8 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (vcpu->arch.st.preempted) return; - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, - &vcpu->arch.st.cache, true)) + if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, true, &token)) return; st = map.hva + @@ -4287,7 +4348,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, true, token); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6816,6 +6877,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, gpa_t gpa; char *kaddr; bool exchanged; + int token; /* guests cmpxchg8b have to be emulated atomically */ if (bytes > 8 || (bytes & (bytes - 1))) @@ -6839,7 +6901,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) goto emul_write; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) + if (kvm_vcpu_map_protected(vcpu, gpa_to_gfn(gpa), &map, &token)) goto emul_write; kaddr = map.hva + offset_in_page(gpa); @@ -6861,7 +6923,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, BUG(); } - kvm_vcpu_unmap(vcpu, &map, true); + kvm_vcpu_unmap_protected(vcpu, &map, true, token); if (!exchanged) return X86EMUL_CMPXCHG_FAILED; -- 2.17.1