On 3/30/22 14:26, Claudio Imbrenda wrote:
In upcoming patches it will be possible to start tearing down a
protected VM, and finish the teardown concurrently in a different
thread.
Protected VMs that are pending for tear down ("leftover") need to be
cleaned properly when the userspace process (e.g. qemu) terminates.
This patch makes sure that all "leftover" protected VMs are always
properly torn down.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
arch/s390/include/asm/kvm_host.h | 2 +
arch/s390/kvm/kvm-s390.c | 1 +
arch/s390/kvm/pv.c | 69 ++++++++++++++++++++++++++++++++
3 files changed, 72 insertions(+)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1bccb8561ba9..50e3516cbc03 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -922,6 +922,8 @@ struct kvm_s390_pv {
u64 guest_len;
unsigned long stor_base;
void *stor_var;
+ void *async_deinit;
+ struct list_head need_cleanup;
struct mmu_notifier mmu_notifier;
};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 446f89db93a1..3637f556ff33 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2788,6 +2788,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_vsie_init(kvm);
if (use_gisa)
kvm_s390_gisa_init(kvm);
+ INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.sync_deinit = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index be3b467f8feb..56412617dd01 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,19 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+/**
+ * @struct deferred_priv
+ * Represents a "leftover" protected VM that does not correspond to any
+ * active KVM VM.
Maybe something like:
...that is still registered with the Ultravisor but isn't registered
with KVM anymore.
+ */
+struct deferred_priv {
+ struct list_head list;
+ unsigned long old_table;
+ u64 handle;
+ void *stor_var;
+ unsigned long stor_base;
+};
+
static void kvm_s390_clear_pv_state(struct kvm *kvm)
{
kvm->arch.pv.handle = 0;
@@ -163,6 +176,60 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
return -ENOMEM;
}
+/**
+ * kvm_s390_pv_cleanup_deferred - Clean up one leftover protected VM.
+ * @kvm the KVM that was associated with this leftover protected VM
+ * @deferred details about the leftover protected VM that needs a clean up
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_cleanup_deferred(struct kvm *kvm, struct deferred_priv *deferred)
+{
+ u16 rc, rrc;
+ int cc;
+
+ cc = uv_cmd_nodata(deferred->handle, UVC_CMD_DESTROY_SEC_CONF, &rc, &rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", rc, rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", rc, rrc);
+ if (cc)
+ return cc;
+ /*
+ * Intentionally leak unusable memory. If the UVC fails, the memory
+ * used for the VM and its metadata is permanently unusable.
+ * This can only happen in case of a serious KVM or hardware bug; it
+ * is not expected to happen in normal operation.
+ */
+ free_pages(deferred->stor_base, get_order(uv_info.guest_base_stor_len));
+ free_pages(deferred->old_table, CRST_ALLOC_ORDER);
+ vfree(deferred->stor_var);
+ return 0;
+}
+
+/**
+ * kvm_s390_pv_cleanup_leftovers - Clean up all leftover protected VMs.
+ * @kvm the KVM whose leftover protected VMs are to be cleaned up
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_cleanup_leftovers(struct kvm *kvm)
+{
+ struct deferred_priv *deferred;
+ int cc = 0;
+
+ if (kvm->arch.pv.async_deinit)
+ list_add(kvm->arch.pv.async_deinit, &kvm->arch.pv.need_cleanup);
+
+ while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+ deferred = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*deferred), list);
+ if (kvm_s390_pv_cleanup_deferred(kvm, deferred))
+ cc = 1;
+ else
+ atomic_dec(&kvm->mm->context.protected_count);
+ list_del(&deferred->list);
+ kfree(deferred);
+ }
+ kvm->arch.pv.async_deinit = NULL;
+ return cc;
+}
+
/* this should not fail, but if it does, we must not free the donated memory */
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
@@ -190,6 +257,8 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ cc |= kvm_s390_pv_cleanup_leftovers(kvm);
+
return cc ? -EIO : 0;
}