On Thu, Aug 19, 2021 at 8:49 AM Peter Gonda <pgonda@xxxxxxxxxx> wrote: > > For SEV to work with intra host migration, contents of the SEV info struct > such as the ASID (used to index the encryption key in the AMD SP) and > the list of memory regions need to be transferred to the target VM. > This change adds a commands for a target VMM to get a source SEV VM's sev > info. > > The target is expected to be initialized (sev_guest_init), but not > launched state (sev_launch_start) when performing receive. Once the > target has received, it will be in a launched state and will not > need to perform the typical SEV launch commands. > > Signed-off-by: Peter Gonda <pgonda@xxxxxxxxxx> > Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Cc: Marc Orr <marcorr@xxxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Sean Christopherson <seanjc@xxxxxxxxxx> > Cc: David Rientjes <rientjes@xxxxxxxxxx> > Cc: Dr. David Alan Gilbert <dgilbert@xxxxxxxxxx> > Cc: Brijesh Singh <brijesh.singh@xxxxxxx> > Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > Cc: Wanpeng Li <wanpengli@xxxxxxxxxxx> > Cc: Jim Mattson <jmattson@xxxxxxxxxx> > Cc: Joerg Roedel <joro@xxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxxxx> > Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> > Cc: kvm@xxxxxxxxxxxxxxx > Cc: linux-kernel@xxxxxxxxxxxxxxx > --- > Documentation/virt/kvm/api.rst | 15 +++++ > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/svm/sev.c | 108 ++++++++++++++++++++++++++++++++ > arch/x86/kvm/svm/svm.c | 1 + > arch/x86/kvm/svm/svm.h | 3 + > arch/x86/kvm/x86.c | 5 ++ > include/uapi/linux/kvm.h | 1 + > 7 files changed, 134 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index 86d7ad3a126c..9dc56778b421 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -6701,6 +6701,21 @@ MAP_SHARED mmap will result in an -EINVAL return. > When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to > perform a bulk copy of tags to/from the guest. > > +7.29 KVM_CAP_VM_MIGRATE_ENC_CONTEXT_FROM > +------------------------------------- > + > +Architectures: x86 SEV enabled > +Type: vm > +Parameters: args[0] is the fd of the source vm > +Returns: 0 on success > + > +This capability enables userspace to migrate the encryption context from the vm > +indicated by the fd to the vm this is called on. > + > +This is intended to support intra-host migration of VMs between userspace VMMs. > +in-guest workloads scheduled by the host. This allows for upgrading the VMM > +process without interrupting the guest. > + > 8. Other capabilities. > ====================== > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 20daaf67a5bf..fd3a118c9e40 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1448,6 +1448,7 @@ struct kvm_x86_ops { > int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); > int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); > int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); > + int (*vm_migrate_enc_context_from)(struct kvm *kvm, unsigned int source_fd); > > int (*get_msr_feature)(struct kvm_msr_entry *entry); > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 75e0b21ad07c..2d98b56b6f8c 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1501,6 +1501,114 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) > return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); > } > > +static int svm_sev_lock_for_migration(struct kvm *kvm) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + int ret; > + > + /* > + * Bail if this VM is already involved in a migration to avoid deadlock > + * between two VMs trying to migrate to/from each other. > + */ > + spin_lock(&sev->migration_lock); > + if (sev->migration_in_progress) > + ret = -EBUSY; > + else { > + /* > + * Otherwise indicate VM is migrating and take the KVM lock. > + */ > + sev->migration_in_progress = true; > + mutex_lock(&kvm->lock); > + ret = 0; > + } > + spin_unlock(&sev->migration_lock); > + > + return ret; > +} > + > +static void svm_unlock_after_migration(struct kvm *kvm) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + > + mutex_unlock(&kvm->lock); > + WRITE_ONCE(sev->migration_in_progress, false); > +} > + This entire locking scheme seems over-complicated to me. Can we simply rely on `migration_lock` and get rid of `migration_in_progress`? I was chatting about these patches with Peter, while he worked on this new version. But he mentioned that this locking scheme had been suggested by Sean in a previous review. Sean: what do you think? My rationale was that this is called via a VM-level ioctl. So serializing the entire code path on `migration_lock` seems fine. But maybe I'm missing something? > +static void migrate_info_from(struct kvm_sev_info *dst, > + struct kvm_sev_info *src) > +{ > + sev_asid_free(dst); > + > + dst->asid = src->asid; > + dst->misc_cg = src->misc_cg; > + dst->handle = src->handle; > + dst->pages_locked = src->pages_locked; > + > + src->asid = 0; > + src->active = false; > + src->handle = 0; > + src->pages_locked = 0; > + src->misc_cg = NULL; > + > + INIT_LIST_HEAD(&dst->regions_list); > + list_replace_init(&src->regions_list, &dst->regions_list); > +} > + > +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) > +{ > + struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; > + struct file *source_kvm_file; > + struct kvm *source_kvm; > + int ret; > + > + ret = svm_sev_lock_for_migration(kvm); > + if (ret) > + return ret; > + > + if (!sev_guest(kvm) || sev_es_guest(kvm)) { > + ret = -EINVAL; > + pr_warn_ratelimited("VM must be SEV enabled to migrate to.\n"); > + goto out_unlock; > + } > + > + if (!list_empty(&dst_sev->regions_list)) { > + ret = -EINVAL; > + pr_warn_ratelimited( > + "VM must not have encrypted regions to migrate to.\n"); > + goto out_unlock; > + } > + > + source_kvm_file = fget(source_fd); > + if (!file_is_kvm(source_kvm_file)) { > + ret = -EBADF; > + goto out_fput; > + } > + > + source_kvm = source_kvm_file->private_data; > + ret = svm_sev_lock_for_migration(source_kvm); > + if (ret) > + goto out_fput; > + > + if (!sev_guest(source_kvm) || sev_es_guest(source_kvm)) { > + ret = -EINVAL; > + pr_warn_ratelimited( > + "Source VM must be SEV enabled to migrate from.\n"); > + goto out_source; > + } > + > + migrate_info_from(dst_sev, &to_kvm_svm(source_kvm)->sev_info); > + ret = 0; > + > +out_source: > + svm_unlock_after_migration(source_kvm); > +out_fput: > + if (source_kvm_file) > + fput(source_kvm_file); > +out_unlock: > + svm_unlock_after_migration(kvm); > + return ret; > +} > + > int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > { > struct kvm_sev_cmd sev_cmd; > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 7b58e445a967..8b5bcab48937 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -4627,6 +4627,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > .mem_enc_unreg_region = svm_unregister_enc_region, > > .vm_copy_enc_context_from = svm_vm_copy_asid_from, > + .vm_migrate_enc_context_from = svm_vm_migrate_from, > > .can_emulate_instruction = svm_can_emulate_instruction, > > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 524d943f3efc..3576a12700d7 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -80,6 +80,8 @@ struct kvm_sev_info { > u64 ap_jump_table; /* SEV-ES AP Jump Table address */ > struct kvm *enc_context_owner; /* Owner of copied encryption context */ > struct misc_cg *misc_cg; /* For misc cgroup accounting */ > + spinlock_t migration_lock; > + bool migration_in_progress; > }; > > struct kvm_svm { > @@ -552,6 +554,7 @@ int svm_register_enc_region(struct kvm *kvm, > int svm_unregister_enc_region(struct kvm *kvm, > struct kvm_enc_region *range); > int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd); > +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd); > void pre_sev_run(struct vcpu_svm *svm, int cpu); > void __init sev_set_cpu_caps(void); > void __init sev_hardware_setup(void); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index fdc0c18339fb..ea3100134e35 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5655,6 +5655,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > if (kvm_x86_ops.vm_copy_enc_context_from) > r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]); > return r; > + case KVM_CAP_VM_MIGRATE_ENC_CONTEXT_FROM: > + r = -EINVAL; > + if (kvm_x86_ops.vm_migrate_enc_context_from) > + r = kvm_x86_ops.vm_migrate_enc_context_from(kvm, cap->args[0]); > + return r; > case KVM_CAP_EXIT_HYPERCALL: > if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) { > r = -EINVAL; > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index a067410ebea5..49660204cdb9 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1112,6 +1112,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_BINARY_STATS_FD 203 > #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 > #define KVM_CAP_ARM_MTE 205 > +#define KVM_CAP_VM_MIGRATE_ENC_CONTEXT_FROM 206 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- > 2.33.0.rc1.237.g0d66db33f3-goog >