On Mon, Jun 21, 2021, Peter Gonda wrote: > +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list) > +{ > + struct vmsa_node *vmsa_node, *q; > + struct kvm_vcpu *vcpu; > + struct vcpu_svm *svm; > + > + lockdep_assert_held(&kvm->lock); > + > + if (!vmsa_list) This is pointless, all callers pass in a list, i.e. it's mandatory. > + return 0; > + > + list_for_each_entry(vmsa_node, vmsa_list, list) { > + if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) { > + WARN(1, > + "Failed to find VCPU with ID %d despite presence in VMSA list.\n", > + vmsa_node->vcpu_id); > + return -1; > + } > + } > + > + /* > + * Move any stashed VMSAs back to their respective VMCBs and delete > + * those nodes. > + */ > + list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) { > + vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id); Barring a KVM bug, is it even theoretically possible for vcpu to be NULL? If not, I'd simply drop the above sanity check. If this can only be true if there's a KVM bug and you really want to keep it the WARN, just do: if (WARN_ON(!vcpu)) continue; since a KVM bug this egregious means all bets are off anyways. That should also allow you to make this a void returning helper and avoid pointless checking. > + svm = to_svm(vcpu); > + svm->vmsa = vmsa_node->vmsa; > + svm->ghcb = vmsa_node->ghcb; > + svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa; > + svm->vcpu.arch.guest_state_protected = true; > + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); > + svm->ghcb_sa = vmsa_node->ghcb_sa; > + svm->ghcb_sa_len = vmsa_node->ghcb_sa_len; > + svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync; > + svm->ghcb_sa_free = vmsa_node->ghcb_sa_free; > + > + list_del(&vmsa_node->list); > + kfree(vmsa_node); > + } > + > + return 0; > +} > + > +static int create_vmsa_list(struct kvm *kvm, > + struct sev_info_migration_node *entry) > +{ > + int i; > + const int num_vcpus = atomic_read(&kvm->online_vcpus); > + struct vmsa_node *node; > + struct kvm_vcpu *vcpu; > + struct vcpu_svm *svm; > + > + INIT_LIST_HEAD(&entry->vmsa_list); > + for (i = 0; i < num_vcpus; ++i) { > + node = kzalloc(sizeof(*node), GFP_KERNEL); > + if (!node) > + goto e_freelist; > + > + vcpu = kvm->vcpus[i]; > + node->vcpu_id = vcpu->vcpu_id; > + > + svm = to_svm(vcpu); > + node->vmsa = svm->vmsa; > + svm->vmsa = NULL; > + node->ghcb = svm->ghcb; > + svm->ghcb = NULL; > + node->ghcb_gpa = svm->vmcb->control.ghcb_gpa; > + node->ghcb_sa = svm->ghcb_sa; > + svm->ghcb_sa = NULL; > + node->ghcb_sa_len = svm->ghcb_sa_len; > + svm->ghcb_sa_len = 0; > + node->ghcb_sa_sync = svm->ghcb_sa_sync; > + svm->ghcb_sa_sync = false; > + node->ghcb_sa_free = svm->ghcb_sa_free; > + svm->ghcb_sa_free = false; > + > + list_add_tail(&node->list, &entry->vmsa_list); > + } > + > + return 0; > + > +e_freelist: > + if (process_vmsa_list(kvm, &entry->vmsa_list)) > + WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now."); Same comments about err_freelist and using WARN_ON(). Though if process_vmsa_list() can't return an error, this goes away entirely. > + return -1; > +} > + > static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp) > { > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp) > if (!sev_guest(kvm)) > return -ENOTTY; > > - if (sev->es_active) > - return -EPERM; > - > if (sev->info_token != 0) > return -EEXIST; > > @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp) > INIT_LIST_HEAD(&entry->regions_list); > list_replace_init(&sev->regions_list, &entry->regions_list); > > + if (sev_es_guest(kvm)) { > + /* > + * If this is an ES guest, we need to move each VMCB's VMSA into a > + * list for migration. > + */ > + entry->es_enabled = true; > + entry->ap_jump_table = sev->ap_jump_table; > + if (create_vmsa_list(kvm, entry)) > + goto e_listdel; > + } > + > if (place_migration_node(entry)) > - goto e_listdel; > + goto e_vmsadel; > > token = entry->token; > > @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp) > hash_del(&entry->hnode); > spin_unlock(&sev_info_migration_hash_lock); > > +e_vmsadel: > + if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list)) > + WARN(1, > + "Unable to move VMSA list back to source VM. Guest is in a broken state now."); Guess what today's Final Jeopardy answer is? :-D > + > e_listdel: > list_replace_init(&entry->regions_list, &sev->regions_list); > > @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp) > if (!sev_guest(kvm)) > return -ENOTTY; > > - if (sev->es_active) > - return -EPERM; > - > if (sev->handle != 0) > return -EPERM; > > @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp) > > memcpy(&old_info, sev, sizeof(old_info)); > > + if (entry->es_enabled) { > + if (process_vmsa_list(kvm, &entry->vmsa_list)) > + goto err_unlock; > + > + sev->es_active = true; > + sev->ap_jump_table = entry->ap_jump_table; > + } > + > /* > * The source VM always frees @entry On the target we simply > * mark the token as invalid to notify the source the sev info > @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm) > __unregister_region_list_locked(kvm, &sev->regions_list); > } > > - /* > - * If userspace was terminated before unregistering the memory > - * regions then lets unpin all the registered memory. > - */ > - if (entry) > + if (entry) { > + /* > + * If there are any saved VMSAs, restore them so they can be > + * destructed through the normal path. > + */ > + if (entry->es_enabled) > + if (process_vmsa_list(kvm, &entry->vmsa_list)) > + WARN(1, > + "Unable to clean up vmsa_list"); More code that can be zapped if process_vmsa_list() is less of a zealot. > + > + /* > + * If userspace was terminated before unregistering the memory > + * regions then lets unpin all the registered memory. > + */ > __unregister_region_list_locked(kvm, &entry->regions_list); > + } > > mutex_unlock(&kvm->lock); > > @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) > > svm = to_svm(vcpu); > > - if (vcpu->arch.guest_state_protected) > + if (svm->ghcb && vcpu->arch.guest_state_protected) > sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); > - __free_page(virt_to_page(svm->vmsa)); > + > + if (svm->vmsa) > + __free_page(virt_to_page(svm->vmsa)); > > if (svm->ghcb_sa_free) > kfree(svm->ghcb_sa); > -- > 2.32.0.288.g62a8d224e6-goog >