Re: [PATCH v6 17/17] KVM: PPC: Book3S HV: XIVE: replace the 'destroy' method by a 'release' method

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Apr 18, 2019 at 12:39:42PM +0200, Cédric Le Goater wrote:
> When a P9 sPAPR VM boots, the CAS negotiation process determines which
> interrupt mode to use (XICS legacy or XIVE native) and invokes a
> machine reset to activate the chosen mode.
> 
> We introduce 'release' methods for the XICS-on-XIVE and the XIVE
> native KVM devices which are called when the file descriptor of the
> device is closed after the TIMA and ESB pages have been unmapped.
> They perform the necessary cleanups : clear the vCPU interrupt
> presenters that could be attached and then destroy the device. The
> 'release' methods replace the 'destroy' methods as 'destroy' is not
> called anymore once 'release' is. Compatibility with older QEMU is
> nevertheless maintained.
> 
> This is not considered as a safe operation as the vCPUs are still
> running and could be referencing the KVM device through their
> presenters. To protect the system from any breakage, the kvmppc_xive
> objects representing both KVM devices are now stored in an array under
> the VM. Allocation is performed on first usage and memory is freed
> only when the VM exits.
> 
> Signed-off-by: Cédric Le Goater <clg@xxxxxxxx>

Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx>

> ---
> 
>  Changes since v5 :
> 
>  - replaced the kvmppc_xive device array with named kvmppc_xive structs.
>  - in kvmppc_xive_cleanup_vcpu(), replaced xc->xive by vcpu->kvm->arch.xive
>    which is always valid in this context
>  - in kvmppc_xive_release(), removed the comment on the race. The
>    device being destroyed when the fd is closed, dev->private should
>    always be valid when used. Also removed the kick_all_cpus_sync() to
>    flush the IPIs.   
>  - in the both release operation, removed the test on &kvm->online_vcpus.
>  - devices are now freed when the machine exits.
>  
>  arch/powerpc/include/asm/kvm_host.h   |  6 ++-
>  arch/powerpc/kvm/book3s_xive.h        |  1 +
>  arch/powerpc/kvm/book3s_xive.c        | 67 ++++++++++++++++++++++++---
>  arch/powerpc/kvm/book3s_xive_native.c | 28 +++++++++--
>  arch/powerpc/kvm/powerpc.c            |  9 ++++
>  5 files changed, 99 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 9cc6abdce1b9..c7abc7c3c6cd 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -313,7 +313,11 @@ struct kvm_arch {
>  #endif
>  #ifdef CONFIG_KVM_XICS
>  	struct kvmppc_xics *xics;
> -	struct kvmppc_xive *xive;
> +	struct kvmppc_xive *xive;    /* Current XIVE device in use */
> +	struct {
> +		struct kvmppc_xive *native;
> +		struct kvmppc_xive *xics_on_xive;
> +	} xive_devices;
>  	struct kvmppc_passthru_irqmap *pimap;
>  #endif
>  	struct kvmppc_ops *kvm_ops;
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index e011622dc038..426146332984 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -283,6 +283,7 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
>  int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
>  int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
>  				  bool single_escalation);
> +struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
>  
>  #endif /* CONFIG_KVM_XICS */
>  #endif /* _KVM_PPC_BOOK3S_XICS_H */
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index 480a3fc6b9fd..922689b768e6 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -1100,9 +1100,15 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
>  void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
>  {
>  	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -	struct kvmppc_xive *xive = xc->xive;
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>  	int i;
>  
> +	if (!kvmppc_xics_enabled(vcpu))
> +		return;
> +
> +	if (!xc)
> +		return;
> +
>  	pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
>  
>  	/* Ensure no interrupt is still routed to that VP */
> @@ -1141,6 +1147,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
>  	}
>  	/* Free the VP */
>  	kfree(xc);
> +
> +	/* Cleanup the vcpu */
> +	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
> +	vcpu->arch.xive_vcpu = NULL;
>  }
>  
>  int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
> @@ -1158,7 +1168,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>  	}
>  	if (xive->kvm != vcpu->kvm)
>  		return -EPERM;
> -	if (vcpu->arch.irq_type)
> +	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
>  		return -EBUSY;
>  	if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
>  		pr_devel("Duplicate !\n");
> @@ -1824,12 +1834,26 @@ void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
>  	}
>  }
>  
> -static void kvmppc_xive_free(struct kvm_device *dev)
> +/*
> + * Called when device fd is closed
> + */
> +static void kvmppc_xive_release(struct kvm_device *dev)
>  {
>  	struct kvmppc_xive *xive = dev->private;
>  	struct kvm *kvm = xive->kvm;
> +	struct kvm_vcpu *vcpu;
>  	int i;
>  
> +	pr_devel("Releasing xive device\n");
> +
> +	/*
> +	 * When releasing the KVM device fd, the vCPUs can still be
> +	 * running and we should clean up the vCPU interrupt
> +	 * presenters first.
> +	 */
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		kvmppc_xive_cleanup_vcpu(vcpu);
> +
>  	debugfs_remove(xive->dentry);
>  
>  	if (kvm)
> @@ -1846,11 +1870,42 @@ static void kvmppc_xive_free(struct kvm_device *dev)
>  	if (xive->vp_base != XIVE_INVALID_VP)
>  		xive_native_free_vp_block(xive->vp_base);
>  
> +	/*
> +	 * A reference of the kvmppc_xive pointer is now kept under
> +	 * the xive_devices struct of the machine for reuse. It is
> +	 * freed when the VM is destroyed for now until we fix all the
> +	 * execution paths.
> +	 */
>  
> -	kfree(xive);
>  	kfree(dev);
>  }
>  
> +/*
> + * When the guest chooses the interrupt mode (XICS legacy or XIVE
> + * native), the VM will switch of KVM device. The previous device will
> + * be "released" before the new one is created.
> + *
> + * Until we are sure all execution paths are well protected, provide a
> + * fail safe (transitional) method for device destruction, in which
> + * the XIVE device pointer is recycled and not directly freed.
> + */
> +struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
> +{
> +	struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
> +		&kvm->arch.xive_devices.native :
> +		&kvm->arch.xive_devices.xics_on_xive;
> +	struct kvmppc_xive *xive = *kvm_xive_device;
> +
> +	if (!xive) {
> +		xive = kzalloc(sizeof(*xive), GFP_KERNEL);
> +		*kvm_xive_device = xive;
> +	} else {
> +		memset(xive, 0, sizeof(*xive));
> +	}
> +
> +	return xive;
> +}
> +
>  static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
>  {
>  	struct kvmppc_xive *xive;
> @@ -1859,7 +1914,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
>  
>  	pr_devel("Creating xive for partition\n");
>  
> -	xive = kzalloc(sizeof(*xive), GFP_KERNEL);
> +	xive = kvmppc_xive_get_device(kvm, type);
>  	if (!xive)
>  		return -ENOMEM;
>  
> @@ -2024,7 +2079,7 @@ struct kvm_device_ops kvm_xive_ops = {
>  	.name = "kvm-xive",
>  	.create = kvmppc_xive_create,
>  	.init = kvmppc_xive_init,
> -	.destroy = kvmppc_xive_free,
> +	.release = kvmppc_xive_release,
>  	.set_attr = xive_set_attr,
>  	.get_attr = xive_get_attr,
>  	.has_attr = xive_has_attr,
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index 62648f833adf..0497272a72fa 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -964,15 +964,27 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>  	return -ENXIO;
>  }
>  
> -static void kvmppc_xive_native_free(struct kvm_device *dev)
> +/*
> + * Called when device fd is closed
> + */
> +static void kvmppc_xive_native_release(struct kvm_device *dev)
>  {
>  	struct kvmppc_xive *xive = dev->private;
>  	struct kvm *kvm = xive->kvm;
> +	struct kvm_vcpu *vcpu;
>  	int i;
>  
>  	debugfs_remove(xive->dentry);
>  
> -	pr_devel("Destroying xive native device\n");
> +	pr_devel("Releasing xive native device\n");
> +
> +	/*
> +	 * When releasing the KVM device fd, the vCPUs can still be
> +	 * running and we should clean up the vCPU interrupt
> +	 * presenters first.
> +	 */
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		kvmppc_xive_native_cleanup_vcpu(vcpu);
>  
>  	if (kvm)
>  		kvm->arch.xive = NULL;
> @@ -987,7 +999,13 @@ static void kvmppc_xive_native_free(struct kvm_device *dev)
>  	if (xive->vp_base != XIVE_INVALID_VP)
>  		xive_native_free_vp_block(xive->vp_base);
>  
> -	kfree(xive);
> +	/*
> +	 * A reference of the kvmppc_xive pointer is now kept under
> +	 * the xive_devices struct of the machine for reuse. It is
> +	 * freed when the VM is destroyed for now until we fix all the
> +	 * execution paths.
> +	 */
> +
>  	kfree(dev);
>  }
>  
> @@ -1002,7 +1020,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
>  	if (kvm->arch.xive)
>  		return -EEXIST;
>  
> -	xive = kzalloc(sizeof(*xive), GFP_KERNEL);
> +	xive = kvmppc_xive_get_device(kvm, type);
>  	if (!xive)
>  		return -ENOMEM;
>  
> @@ -1182,7 +1200,7 @@ struct kvm_device_ops kvm_xive_native_ops = {
>  	.name = "kvm-xive-native",
>  	.create = kvmppc_xive_native_create,
>  	.init = kvmppc_xive_native_init,
> -	.destroy = kvmppc_xive_native_free,
> +	.release = kvmppc_xive_native_release,
>  	.set_attr = kvmppc_xive_native_set_attr,
>  	.get_attr = kvmppc_xive_native_get_attr,
>  	.has_attr = kvmppc_xive_native_has_attr,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index f54926c78320..7f41a68d8094 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -501,6 +501,15 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  
>  	mutex_unlock(&kvm->lock);
>  
> +	/*
> +	 * Free the XIVE devices which are not directly freed by the
> +	 * device 'release' method
> +	 */
> +	kfree(kvm->arch.xive_devices.native);
> +	kvm->arch.xive_devices.native = NULL;
> +	kfree(kvm->arch.xive_devices.xics_on_xive);
> +	kvm->arch.xive_devices.xics_on_xive = NULL;
> +
>  	/* drop the module reference */
>  	module_put(kvm->arch.kvm_ops->owner);
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux