In some cases, like with SEV-SNP, guest memory needs to be updated in a platform-specific manner before it can be safely freed back to the host. Add hooks to wire up handling of this sort to the invalidation notifiers for restricted memory. Also issue invalidations of all allocated pages during notifier unregistration so that the pages are not left in an unusable state when they eventually get freed back to the host upon FD release. Signed-off-by: Michael Roth <michael.roth@xxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 5 +++++ include/linux/kvm_host.h | 2 ++ mm/restrictedmem.c | 16 ++++++++++++++++ virt/kvm/kvm_main.c | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 52f94a0ba5e9..c71df44b0f02 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -134,6 +134,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL_RET0(private_mem_enabled); KVM_X86_OP_OPTIONAL_RET0(fault_is_private); KVM_X86_OP_OPTIONAL_RET0(update_mem_attr) +KVM_X86_OP_OPTIONAL(invalidate_restricted_mem) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 13802389f0f9..9ef8d73455d9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1639,6 +1639,7 @@ struct kvm_x86_ops { int (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault); int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr, gfn_t start, gfn_t end); + void (*invalidate_restricted_mem)(struct kvm_memory_slot *slot, gfn_t start, gfn_t end); bool (*has_wbinvd_exit)(void); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a0c41d391547..2713632e5061 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7183,3 +7183,8 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm, kvm_update_lpage_private_shared_mixed(kvm, slot, attrs, start, end); } + +void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end) +{ + static_call_cond(kvm_x86_invalidate_restricted_mem)(slot, start, end); +} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f032d878e034..f72a2e0b8699 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2327,6 +2327,7 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long attrs, gfn_t start, gfn_t end); + #else static inline void kvm_arch_set_memory_attributes(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -2366,6 +2367,7 @@ static inline int kvm_restricted_mem_get_pfn(struct kvm_memory_slot *slot, } void kvm_arch_memory_mce(struct kvm *kvm); +void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end); #endif /* CONFIG_HAVE_KVM_RESTRICTED_MEM */ #endif diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c index 56953c204e5c..74fa2cfb8618 100644 --- a/mm/restrictedmem.c +++ b/mm/restrictedmem.c @@ -54,6 +54,11 @@ static int restrictedmem_release(struct inode *inode, struct file *file) { struct restrictedmem_data *data = inode->i_mapping->private_data; + pr_debug("%s: releasing memfd, invalidating page offsets 0x0-0x%llx\n", + __func__, inode->i_size >> PAGE_SHIFT); + restrictedmem_invalidate_start(data, 0, inode->i_size >> PAGE_SHIFT); + restrictedmem_invalidate_end(data, 0, inode->i_size >> PAGE_SHIFT); + fput(data->memfd); kfree(data); return 0; @@ -258,6 +263,17 @@ void restrictedmem_unregister_notifier(struct file *file, struct restrictedmem_notifier *notifier) { struct restrictedmem_data *data = file->f_mapping->private_data; + struct inode *inode = file_inode(data->memfd); + + /* TODO: this will issue notifications to all registered notifiers, + * but it's only the one being unregistered that needs to process + * invalidations for any ranges still allocated at this point in + * time. For now this relies on KVM currently being the only notifier. + */ + pr_debug("%s: unregistering notifier, invalidating page offsets 0x0-0x%llx\n", + __func__, inode->i_size >> PAGE_SHIFT); + restrictedmem_invalidate_start(data, 0, inode->i_size >> PAGE_SHIFT); + restrictedmem_invalidate_end(data, 0, inode->i_size >> PAGE_SHIFT); mutex_lock(&data->lock); list_del(¬ifier->list); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d2d829d23442..d2daa049e94a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -974,6 +974,9 @@ static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no &gfn_start, &gfn_end)) return; + pr_debug("%s: start: 0x%lx, end: 0x%lx, roffset: 0x%llx, gfn_start: 0x%llx, gfn_end: 0x%llx\n", + __func__, start, end, slot->restricted_offset, gfn_start, gfn_end); + gfn_range.start = gfn_start; gfn_range.end = gfn_end; gfn_range.slot = slot; @@ -988,6 +991,8 @@ static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no if (kvm_unmap_gfn_range(kvm, &gfn_range)) kvm_flush_remote_tlbs(kvm); + kvm_arch_invalidate_restricted_mem(slot, gfn_start, gfn_end); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); } -- 2.25.1