On Thu, 31 Mar 2022 16:02:55 +0200 Janosch Frank <frankja@xxxxxxxxxxxxx> wrote: > On 3/30/22 14:26, Claudio Imbrenda wrote: > > In upcoming patches it will be possible to start tearing down a > > protected VM, and finish the teardown concurrently in a different > > thread. > > > > Protected VMs that are pending for tear down ("leftover") need to be > > cleaned properly when the userspace process (e.g. qemu) terminates. > > > > This patch makes sure that all "leftover" protected VMs are always > > properly torn down. > > > > Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > > --- > > arch/s390/include/asm/kvm_host.h | 2 + > > arch/s390/kvm/kvm-s390.c | 1 + > > arch/s390/kvm/pv.c | 69 ++++++++++++++++++++++++++++++++ > > 3 files changed, 72 insertions(+) > > > > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > > index 1bccb8561ba9..50e3516cbc03 100644 > > --- a/arch/s390/include/asm/kvm_host.h > > +++ b/arch/s390/include/asm/kvm_host.h > > @@ -922,6 +922,8 @@ struct kvm_s390_pv { > > u64 guest_len; > > unsigned long stor_base; > > void *stor_var; > > + void *async_deinit; > > + struct list_head need_cleanup; > > struct mmu_notifier mmu_notifier; > > }; > > > > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > > index 446f89db93a1..3637f556ff33 100644 > > --- a/arch/s390/kvm/kvm-s390.c > > +++ b/arch/s390/kvm/kvm-s390.c > > @@ -2788,6 +2788,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > > kvm_s390_vsie_init(kvm); > > if (use_gisa) > > kvm_s390_gisa_init(kvm); > > + INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); > > kvm->arch.pv.sync_deinit = NULL; isn't the struct allocated with __GFP_ZERO ? > > > KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); > > > > return 0; > > diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c > > index be3b467f8feb..56412617dd01 100644 > > --- a/arch/s390/kvm/pv.c > > +++ b/arch/s390/kvm/pv.c > > @@ -17,6 +17,19 @@ > > #include <linux/mmu_notifier.h> > > #include "kvm-s390.h" > > > > +/** > > + * @struct deferred_priv > > + * Represents a "leftover" protected VM that does not correspond to any > > + * active KVM VM. > > Maybe something like: > ...that is still registered with the Ultravisor but isn't registered > with KVM anymore. will fix > > > + */ > > +struct deferred_priv { > > + struct list_head list; > > + unsigned long old_table; > > + u64 handle; > > + void *stor_var; > > + unsigned long stor_base; > > +}; > > + > > static void kvm_s390_clear_pv_state(struct kvm *kvm) > > { > > kvm->arch.pv.handle = 0; > > @@ -163,6 +176,60 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) > > return -ENOMEM; > > } > > > > +/** > > + * kvm_s390_pv_cleanup_deferred - Clean up one leftover protected VM. > > + * @kvm the KVM that was associated with this leftover protected VM > > + * @deferred details about the leftover protected VM that needs a clean up > > + * Return: 0 in case of success, otherwise 1 > > + */ > > +static int kvm_s390_pv_cleanup_deferred(struct kvm *kvm, struct deferred_priv *deferred) > > +{ > > + u16 rc, rrc; > > + int cc; > > + > > + cc = uv_cmd_nodata(deferred->handle, UVC_CMD_DESTROY_SEC_CONF, &rc, &rrc); > > + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", rc, rrc); > > + WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", rc, rrc); > > + if (cc) > > + return cc; > > + /* > > + * Intentionally leak unusable memory. If the UVC fails, the memory > > + * used for the VM and its metadata is permanently unusable. > > + * This can only happen in case of a serious KVM or hardware bug; it > > + * is not expected to happen in normal operation. > > + */ > > + free_pages(deferred->stor_base, get_order(uv_info.guest_base_stor_len)); > > + free_pages(deferred->old_table, CRST_ALLOC_ORDER); > > + vfree(deferred->stor_var); > > + return 0; > > +} > > + > > +/** > > + * kvm_s390_pv_cleanup_leftovers - Clean up all leftover protected VMs. > > + * @kvm the KVM whose leftover protected VMs are to be cleaned up > > + * Return: 0 in case of success, otherwise 1 > > + */ > > +static int kvm_s390_pv_cleanup_leftovers(struct kvm *kvm) > > +{ > > + struct deferred_priv *deferred; > > + int cc = 0; > > + > > + if (kvm->arch.pv.async_deinit) > > + list_add(kvm->arch.pv.async_deinit, &kvm->arch.pv.need_cleanup); > > + > > + while (!list_empty(&kvm->arch.pv.need_cleanup)) { > > + deferred = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*deferred), list); > > + if (kvm_s390_pv_cleanup_deferred(kvm, deferred)) > > + cc = 1; > > + else > > + atomic_dec(&kvm->mm->context.protected_count); > > + list_del(&deferred->list); > > + kfree(deferred); > > + } > > + kvm->arch.pv.async_deinit = NULL; > > + return cc; > > +} > > + > > /* this should not fail, but if it does, we must not free the donated memory */ > > int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) > > { > > @@ -190,6 +257,8 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) > > KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); > > WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); > > > > + cc |= kvm_s390_pv_cleanup_leftovers(kvm); > > + > > return cc ? -EIO : 0; > > } > > >