Re: [PATCH 3/3] KVM, SEV: Add support for SEV-ES local migration

Sean Christopherson <seanjc@xxxxxxxxxx> · Tue, 13 Jul 2021 22:21:53 +0000

On Mon, Jun 21, 2021, Peter Gonda wrote:
> +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
> +{
> +	struct vmsa_node *vmsa_node, *q;
> +	struct kvm_vcpu *vcpu;
> +	struct vcpu_svm *svm;
> +
> +	lockdep_assert_held(&kvm->lock);
> +
> +	if (!vmsa_list)

This is pointless, all callers pass in a list, i.e. it's mandatory.

> +		return 0;
> +
> +	list_for_each_entry(vmsa_node, vmsa_list, list) {
> +		if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
> +			WARN(1,
> +			     "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
> +			     vmsa_node->vcpu_id);
> +			return -1;
> +		}
> +	}
> +
> +	/*
> +	 * Move any stashed VMSAs back to their respective VMCBs and delete
> +	 * those nodes.
> +	 */
> +	list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
> +		vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);

Barring a KVM bug, is it even theoretically possible for vcpu to be NULL?  If not,
I'd simply drop the above sanity check.  If this can only be true if there's a
KVM bug and you really want to keep it the WARN, just do:

		if (WARN_ON(!vcpu))
			continue;

since a KVM bug this egregious means all bets are off anyways.  That should also
allow you to make this a void returning helper and avoid pointless checking.

> +		svm = to_svm(vcpu);
> +		svm->vmsa = vmsa_node->vmsa;
> +		svm->ghcb = vmsa_node->ghcb;
> +		svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
> +		svm->vcpu.arch.guest_state_protected = true;
> +		svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
> +		svm->ghcb_sa = vmsa_node->ghcb_sa;
> +		svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
> +		svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
> +		svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
> +
> +		list_del(&vmsa_node->list);
> +		kfree(vmsa_node);
> +	}
> +
> +	return 0;
> +}
> +
> +static int create_vmsa_list(struct kvm *kvm,
> +			    struct sev_info_migration_node *entry)
> +{
> +	int i;
> +	const int num_vcpus = atomic_read(&kvm->online_vcpus);
> +	struct vmsa_node *node;
> +	struct kvm_vcpu *vcpu;
> +	struct vcpu_svm *svm;
> +
> +	INIT_LIST_HEAD(&entry->vmsa_list);
> +	for (i = 0; i < num_vcpus; ++i) {
> +		node = kzalloc(sizeof(*node), GFP_KERNEL);
> +		if (!node)
> +			goto e_freelist;
> +
> +		vcpu = kvm->vcpus[i];
> +		node->vcpu_id = vcpu->vcpu_id;
> +
> +		svm = to_svm(vcpu);
> +		node->vmsa = svm->vmsa;
> +		svm->vmsa = NULL;
> +		node->ghcb = svm->ghcb;
> +		svm->ghcb = NULL;
> +		node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
> +		node->ghcb_sa = svm->ghcb_sa;
> +		svm->ghcb_sa = NULL;
> +		node->ghcb_sa_len = svm->ghcb_sa_len;
> +		svm->ghcb_sa_len = 0;
> +		node->ghcb_sa_sync = svm->ghcb_sa_sync;
> +		svm->ghcb_sa_sync = false;
> +		node->ghcb_sa_free = svm->ghcb_sa_free;
> +		svm->ghcb_sa_free = false;
> +
> +		list_add_tail(&node->list, &entry->vmsa_list);
> +	}
> +
> +	return 0;
> +
> +e_freelist:
> +	if (process_vmsa_list(kvm, &entry->vmsa_list))
> +		WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Same comments about err_freelist and using WARN_ON().  Though if process_vmsa_list()
can't return an error, this goes away entirely.

> +	return -1;
> +}
> +
>  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!sev_guest(kvm))
>  		return -ENOTTY;
>  
> -	if (sev->es_active)
> -		return -EPERM;
> -
>  	if (sev->info_token != 0)
>  		return -EEXIST;
>  
> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	INIT_LIST_HEAD(&entry->regions_list);
>  	list_replace_init(&sev->regions_list, &entry->regions_list);
>  
> +	if (sev_es_guest(kvm)) {
> +		/*
> +		 * If this is an ES guest, we need to move each VMCB's VMSA into a
> +		 * list for migration.
> +		 */
> +		entry->es_enabled = true;
> +		entry->ap_jump_table = sev->ap_jump_table;
> +		if (create_vmsa_list(kvm, entry))
> +			goto e_listdel;
> +	}
> +
>  	if (place_migration_node(entry))
> -		goto e_listdel;
> +		goto e_vmsadel;
>  
>  	token = entry->token;
>  
> @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	hash_del(&entry->hnode);
>  	spin_unlock(&sev_info_migration_hash_lock);
>  
> +e_vmsadel:
> +	if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
> +		WARN(1,
> +		     "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Guess what today's Final Jeopardy answer is? :-D

> +
>  e_listdel:
>  	list_replace_init(&entry->regions_list, &sev->regions_list);
>  
> @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!sev_guest(kvm))
>  		return -ENOTTY;
>  
> -	if (sev->es_active)
> -		return -EPERM;
> -
>  	if (sev->handle != 0)
>  		return -EPERM;
>  
> @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  
>  	memcpy(&old_info, sev, sizeof(old_info));
>  
> +	if (entry->es_enabled) {
> +		if (process_vmsa_list(kvm, &entry->vmsa_list))
> +			goto err_unlock;
> +
> +		sev->es_active = true;
> +		sev->ap_jump_table = entry->ap_jump_table;
> +	}
> +
>  	/*
>  	 * The source VM always frees @entry On the target we simply
>  	 * mark the token as invalid to notify the source the sev info
> @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm)
>  		__unregister_region_list_locked(kvm, &sev->regions_list);
>  	}
>  
> -	/*
> -	 * If userspace was terminated before unregistering the memory
> -	 * regions then lets unpin all the registered memory.
> -	 */
> -	if (entry)
> +	if (entry) {
> +		/*
> +		 * If there are any saved VMSAs, restore them so they can be
> +		 * destructed through the normal path.
> +		 */
> +		if (entry->es_enabled)
> +			if (process_vmsa_list(kvm, &entry->vmsa_list))
> +				WARN(1,
> +				     "Unable to clean up vmsa_list");

More code that can be zapped if process_vmsa_list() is less of a zealot.

> +
> +		/*
> +		 * If userspace was terminated before unregistering the memory
> +		 * regions then lets unpin all the registered memory.
> +		 */
>  		__unregister_region_list_locked(kvm, &entry->regions_list);
> +	}
>  
>  	mutex_unlock(&kvm->lock);
>  
> @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>  
>  	svm = to_svm(vcpu);
>  
> -	if (vcpu->arch.guest_state_protected)
> +	if (svm->ghcb && vcpu->arch.guest_state_protected)
>  		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
> -	__free_page(virt_to_page(svm->vmsa));
> +
> +	if (svm->vmsa)
> +		__free_page(virt_to_page(svm->vmsa));
>  
>  	if (svm->ghcb_sa_free)
>  		kfree(svm->ghcb_sa);
> -- 
> 2.32.0.288.g62a8d224e6-goog
>