The kvm_write_guest{_page} and kvm_vcpu_write_guest{_page} are used by the hypevisor to write to the guest memory. The kvm_vcpu_map() and kvm_map_gfn() are used by the hypervisor to map the guest memory and and access it later. When SEV-SNP is enabled in the guest VM, the guest memory pages can either be a private or shared. A write from the hypervisor goes through the RMP checks. If hardware sees that hypervisor is attempting to write to a guest private page, then it triggers an RMP violation (i.e, #PF with RMP bit set). Enhance the KVM guest write helpers to invoke an architecture specific hooks (kvm_arch_write_gfn_{begin,end}) to track the write access from the hypervisor. When SEV-SNP is enabled, the guest uses the PAGE_STATE vmgexit to ask the hypervisor to change the page state from shared to private or vice versa. While changing the page state to private, use the kvm_host_write_track_is_active() to check whether the page is being tracked for the host write access (i.e either mapped or kvm_write_guest is in progress). If its tracked, then do not change the page state. Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> --- arch/x86/include/asm/kvm_host.h | 6 +++ arch/x86/kvm/svm/sev.c | 51 +++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 + arch/x86/kvm/svm/svm.h | 1 + arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 3 ++ virt/kvm/kvm_main.c | 21 +++++++-- 7 files changed, 159 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 59185b6bc82a..678992e9966a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -865,10 +865,13 @@ struct kvm_lpage_info { int disallow_lpage; }; +bool kvm_host_write_track_is_active(struct kvm *kvm, gfn_t gfn); + struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; + unsigned short *host_write_track[KVM_PAGE_TRACK_MAX]; }; /* @@ -1393,6 +1396,9 @@ struct kvm_x86_ops { void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*get_tdp_max_page_level)(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level); + + void (*write_page_begin)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); + void (*write_page_end)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ddcbae37de4f..d30419e91288 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2862,6 +2862,19 @@ static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, return rmpupdate(pfn_to_page(pfn), &val); } +static inline bool kvm_host_write_track_gpa_range_is_active(struct kvm *kvm, + gpa_t start, gpa_t end) +{ + while (start < end) { + if (kvm_host_write_track_is_active(kvm, gpa_to_gfn(start))) + return 1; + + start += PAGE_SIZE; + } + + return false; +} + static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level) { struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; @@ -2873,6 +2886,14 @@ static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn if (!e) return -EINVAL; + /* + * If the GPA is tracked for the write access then do not change the + * page state from shared to private. + */ + if (kvm_host_write_track_gpa_range_is_active(vcpu->kvm, + gpa, gpa + page_level_size(level))) + return -EBUSY; + /* Log if the entry is validated */ if (rmpentry_validated(e)) pr_warn_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa); @@ -3446,3 +3467,33 @@ int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level) return min_t(uint32_t, level, max_level); } + +void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) +{ + struct rmpentry *e; + int level, rc; + kvm_pfn_t pfn; + + if (!sev_snp_guest(kvm)) + return; + + pfn = gfn_to_pfn(kvm, gfn); + if (is_error_noslot_pfn(pfn)) + return; + + e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &level); + if (unlikely(!e)) + return; + + /* + * A hypervisor should never write to the guest private page. A write to the + * guest private will cause an RMP violation. If the guest page is private, + * then make it shared. + */ + if (rmpentry_assigned(e)) { + pr_err("SEV-SNP: write to guest private gfn %llx\n", gfn); + rc = snp_make_page_shared(kvm_get_vcpu(kvm, 0), + gfn << PAGE_SHIFT, pfn, PG_LEVEL_4K); + BUG_ON(rc != 0); + } +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2632eae52aa3..4ff6fc86dd18 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4577,6 +4577,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .alloc_apic_backing_page = svm_alloc_apic_backing_page, .get_tdp_max_page_level = sev_get_tdp_max_page_level, + + .write_page_begin = sev_snp_write_page_begin, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index af4cce39b30f..e0276ad8a1ae 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -576,6 +576,7 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); void sev_es_unmap_ghcb(struct vcpu_svm *svm); struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level); +void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); /* vmenter.S */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbc4e04e67ad..1398b8021982 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9076,6 +9076,48 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } +static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode, short count) +{ + int index, val; + + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); + + val = slot->arch.host_write_track[mode][index]; + + if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) + return; + + slot->arch.host_write_track[mode][index] += count; +} + +bool kvm_host_write_track_is_active(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int index; + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) + return false; + + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); + return !!READ_ONCE(slot->arch.host_write_track[KVM_PAGE_TRACK_WRITE][index]); +} +EXPORT_SYMBOL_GPL(kvm_host_write_track_is_active); + +void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) +{ + update_gfn_track(slot, gfn, KVM_PAGE_TRACK_WRITE, 1); + + if (kvm_x86_ops.write_page_begin) + kvm_x86_ops.write_page_begin(kvm, slot, gfn); +} + +void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) +{ + update_gfn_track(slot, gfn, KVM_PAGE_TRACK_WRITE, -1); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) @@ -10896,6 +10938,36 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_hv_destroy_vm(kvm); } +static void kvm_write_page_track_free_memslot(struct kvm_memory_slot *slot) +{ + int i; + + for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { + kvfree(slot->arch.host_write_track[i]); + slot->arch.host_write_track[i] = NULL; + } +} + +static int kvm_write_page_track_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + int i; + + for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { + slot->arch.host_write_track[i] = + kvcalloc(npages, sizeof(*slot->arch.host_write_track[i]), + GFP_KERNEL_ACCOUNT); + if (!slot->arch.host_write_track[i]) + goto track_free; + } + + return 0; + +track_free: + kvm_write_page_track_free_memslot(slot); + return -ENOMEM; +} + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { int i; @@ -10969,8 +11041,14 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, if (kvm_page_track_create_memslot(slot, npages)) goto out_free; + if (kvm_write_page_track_create_memslot(slot, npages)) + goto e_free_page_track; + return 0; +e_free_page_track: + kvm_page_track_free_memslot(slot); + out_free: for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { kvfree(slot->arch.rmap[i]); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2f34487e21f2..f22e22cd2179 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1550,6 +1550,9 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end); +void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); +void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); + #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); #else diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6b4feb92dc79..bc805c15d0de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -160,6 +160,14 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, { } +__weak void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) +{ +} + +__weak void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) +{ +} + bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) { /* @@ -2309,7 +2317,8 @@ static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn, cache->generation = gen; } -static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, +static int __kvm_map_gfn(struct kvm *kvm, + struct kvm_memslots *slots, gfn_t gfn, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool atomic) @@ -2361,20 +2370,22 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, map->pfn = pfn; map->gfn = gfn; + kvm_arch_write_gfn_begin(kvm, slot, map->gfn); + return 0; } int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool atomic) { - return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map, + return __kvm_map_gfn(vcpu->kvm, kvm_memslots(vcpu->kvm), gfn, map, cache, atomic); } EXPORT_SYMBOL_GPL(kvm_map_gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map, + return __kvm_map_gfn(vcpu->kvm, kvm_vcpu_memslots(vcpu), gfn, map, NULL, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_map); @@ -2412,6 +2423,8 @@ static void __kvm_unmap_gfn(struct kvm *kvm, else kvm_release_pfn(map->pfn, dirty, NULL); + kvm_arch_write_gfn_end(kvm, memslot, map->gfn); + map->hva = NULL; map->page = NULL; } @@ -2612,7 +2625,9 @@ static int __kvm_write_guest_page(struct kvm *kvm, addr = gfn_to_hva_memslot(memslot, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; + kvm_arch_write_gfn_begin(kvm, memslot, gfn); r = __copy_to_user((void __user *)addr + offset, data, len); + kvm_arch_write_gfn_end(kvm, memslot, gfn); if (r) return -EFAULT; mark_page_dirty_in_slot(kvm, memslot, gfn); -- 2.17.1