On Fri, Feb 21, 2025 at 05:38:36PM -0800, Sean Christopherson wrote: > On Sat, Feb 22, 2025, Rick P Edgecombe wrote: > > On Thu, 2025-02-20 at 17:08 -0800, Sean Christopherson wrote: > > > Argh, after digging more, this isn't actually true. The separate "unload MMUs" > > > code is leftover from when KVM rather stupidly tried to free all MMU pages when > > > freeing a vCPU. > > > > > > Commit 7b53aa565084 ("KVM: Fix vcpu freeing for guest smp") "fixed" things by > > > unloading MMUs before destroying vCPUs, but the underlying problem was trying to > > > free _all_ MMU pages when destroying a single vCPU. That eventually got fixed > > > for good (haven't checked when), but the separate MMU unloading never got cleaned > > > up. > > > > > > So, scratch the mmu_destroy() idea. But I still want/need to move vCPU destruction > > > before vm_destroy. > > > > > > Now that kvm_arch_pre_destroy_vm() is a thing, one idea would be to add > > > kvm_x86_ops.pre_destroy_vm(), which would pair decently well with the existing > > > call to kvm_mmu_pre_destroy_vm(). > > > > So: > > kvm_x86_call(vm_destroy)(kvm); -> tdx_mmu_release_hkid() > > kvm_destroy_vcpus(kvm); -> tdx_vcpu_free() -> reclaim > > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim > > > > To: > > (pre_destroy_vm)() -> tdx_mmu_release_hkid() > > kvm_destroy_vcpus(kvm); -> reclaim > > kvm_x86_call(vm_destroy)(kvm); -> nothing > > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim > > I was thinking this last one could go away, and TDX could reclaim at vm_destroy? > Or is that not possible because it absolutely must come dead last? Hmm, below change works on my end. diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 50cf27473ffb..79406bf07a1c 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -21,7 +21,7 @@ KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) KVM_X86_OP(vm_init) KVM_X86_OP_OPTIONAL(vm_destroy) -KVM_X86_OP_OPTIONAL(vm_free) +KVM_X86_OP_OPTIONAL(vm_pre_destroy) KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate) KVM_X86_OP(vcpu_create) KVM_X86_OP(vcpu_free) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8d15e604613b..2d5b6d34d30e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,7 +1677,7 @@ struct kvm_x86_ops { unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); - void (*vm_free)(struct kvm *kvm); + void (*vm_pre_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ int (*vcpu_precreate)(struct kvm *kvm); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 1fa0364faa60..a8acf98dfadd 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -76,18 +76,19 @@ static int vt_vm_init(struct kvm *kvm) return vmx_vm_init(kvm); } -static void vt_vm_destroy(struct kvm *kvm) +static void vt_vm_pre_destroy(struct kvm *kvm) { if (is_td(kvm)) return tdx_mmu_release_hkid(kvm); - vmx_vm_destroy(kvm); } -static void vt_vm_free(struct kvm *kvm) +static void vt_vm_destroy(struct kvm *kvm) { if (is_td(kvm)) - tdx_vm_free(kvm); + return tdx_vm_free(kvm); + + vmx_vm_destroy(kvm); } static int vt_vcpu_precreate(struct kvm *kvm) @@ -914,7 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vm_init = vt_vm_init, .vm_destroy = vt_vm_destroy, - .vm_free = vt_vm_free, + .vm_pre_destroy = vt_vm_pre_destroy, .vcpu_precreate = vt_vcpu_precreate, .vcpu_create = vt_vcpu_create, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ae96449e6e2..cb2672a59715 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12889,6 +12889,7 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region); void kvm_arch_pre_destroy_vm(struct kvm *kvm) { kvm_mmu_pre_destroy_vm(kvm); + static_call_cond(kvm_x86_vm_pre_destroy)(kvm); } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -12908,7 +12909,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); - kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); @@ -12919,7 +12919,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_page_track_cleanup(kvm); kvm_xen_destroy_vm(kvm); kvm_hv_destroy_vm(kvm); - static_call_cond(kvm_x86_vm_free)(kvm); + kvm_x86_call(vm_destroy)(kvm); } static void memslot_rmap_free(struct kvm_memory_slot *slot)