Re: [PATCH] KVM: PPC: Book3S HV: XICS: Replace the 'destroy' method by a 'release' method

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2 Sep 2020 09:26:06 +0200
Cédric Le Goater <clg@xxxxxxxx> wrote:

> On 8/10/20 12:08 PM, Greg Kurz wrote:
> > Similarly to what was done with XICS-on-XIVE and XIVE native KVM devices
> > with commit 5422e95103cf ("KVM: PPC: Book3S HV: XIVE: Replace the 'destroy'
> > method by a 'release' method"), convert the historical XICS KVM device to
> > implement the 'release' method. This is needed to run nested guests with
> > an in-kernel IRQ chip. A typical POWER9 guest can select XICS or XIVE
> > during boot, which requires to be able to destroy and to re-create the
> > KVM device. Only the historical XICS KVM device is available under pseries
> > at the current time and it still uses the legacy 'destroy' method.
> > 
> > Switching to 'release' means that vCPUs might still be running when the
> > device is destroyed. In order to avoid potential use-after-free, the
> > kvmppc_xics structure is allocated on first usage and kept around until
> > the VM exits. The same pointer is used each time a KVM XICS device is
> > being created, but this is okay since we only have one per VM.
> > 
> > Clear the ICP of each vCPU with vcpu->mutex held. This ensures that the
> > next time the vCPU resumes execution, it won't be going into the XICS
> > code anymore.
> > 
> > Signed-off-by: Greg Kurz <groug@xxxxxxxx>
> 
> Reviewed-by: Cédric Le Goater <clg@xxxxxxxx>
> 
> and on a P8 host, 
> 
> Tested-by: Cédric Le Goater <clg@xxxxxxxx>
> 
> Thanks,
> 
> C. 
> 

Thanks for the review and testing !

Cheers,

--
Greg

> > ---
> >  arch/powerpc/include/asm/kvm_host.h |    1 
> >  arch/powerpc/kvm/book3s.c           |    4 +-
> >  arch/powerpc/kvm/book3s_xics.c      |   86 ++++++++++++++++++++++++++++-------
> >  3 files changed, 72 insertions(+), 19 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> > index e020d269416d..974adda2ec94 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -325,6 +325,7 @@ struct kvm_arch {
> >  #endif
> >  #ifdef CONFIG_KVM_XICS
> >  	struct kvmppc_xics *xics;
> > +	struct kvmppc_xics *xics_device;
> >  	struct kvmppc_xive *xive;    /* Current XIVE device in use */
> >  	struct {
> >  		struct kvmppc_xive *native;
> > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> > index 41fedec69ac3..56618c2770e1 100644
> > --- a/arch/powerpc/kvm/book3s.c
> > +++ b/arch/powerpc/kvm/book3s.c
> > @@ -878,13 +878,15 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
> >  
> >  #ifdef CONFIG_KVM_XICS
> >  	/*
> > -	 * Free the XIVE devices which are not directly freed by the
> > +	 * Free the XIVE and XICS devices which are not directly freed by the
> >  	 * device 'release' method
> >  	 */
> >  	kfree(kvm->arch.xive_devices.native);
> >  	kvm->arch.xive_devices.native = NULL;
> >  	kfree(kvm->arch.xive_devices.xics_on_xive);
> >  	kvm->arch.xive_devices.xics_on_xive = NULL;
> > +	kfree(kvm->arch.xics_device);
> > +	kvm->arch.xics_device = NULL;
> >  #endif /* CONFIG_KVM_XICS */
> >  }
> >  
> > diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
> > index 381bf8dea193..5fee5a11550d 100644
> > --- a/arch/powerpc/kvm/book3s_xics.c
> > +++ b/arch/powerpc/kvm/book3s_xics.c
> > @@ -1334,47 +1334,97 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> >  	return -ENXIO;
> >  }
> >  
> > -static void kvmppc_xics_free(struct kvm_device *dev)
> > +/*
> > + * Called when device fd is closed. kvm->lock is held.
> > + */
> > +static void kvmppc_xics_release(struct kvm_device *dev)
> >  {
> >  	struct kvmppc_xics *xics = dev->private;
> >  	int i;
> >  	struct kvm *kvm = xics->kvm;
> > +	struct kvm_vcpu *vcpu;
> > +
> > +	pr_devel("Releasing xics device\n");
> > +
> > +	/*
> > +	 * Since this is the device release function, we know that
> > +	 * userspace does not have any open fd referring to the
> > +	 * device.  Therefore there can not be any of the device
> > +	 * attribute set/get functions being executed concurrently,
> > +	 * and similarly, the connect_vcpu and set/clr_mapped
> > +	 * functions also cannot be being executed.
> > +	 */
> >  
> >  	debugfs_remove(xics->dentry);
> >  
> > +	/*
> > +	 * We should clean up the vCPU interrupt presenters first.
> > +	 */
> > +	kvm_for_each_vcpu(i, vcpu, kvm) {
> > +		/*
> > +		 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
> > +		 * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
> > +		 * Holding the vcpu->mutex also means that execution is
> > +		 * excluded for the vcpu until the ICP was freed. When the vcpu
> > +		 * can execute again, vcpu->arch.icp and vcpu->arch.irq_type
> > +		 * have been cleared and the vcpu will not be going into the
> > +		 * XICS code anymore.
> > +		 */
> > +		mutex_lock(&vcpu->mutex);
> > +		kvmppc_xics_free_icp(vcpu);
> > +		mutex_unlock(&vcpu->mutex);
> > +	}
> > +
> >  	if (kvm)
> >  		kvm->arch.xics = NULL;
> >  
> > -	for (i = 0; i <= xics->max_icsid; i++)
> > +	for (i = 0; i <= xics->max_icsid; i++) {
> >  		kfree(xics->ics[i]);
> > -	kfree(xics);
> > +		xics->ics[i] = NULL;
> > +	}
> > +	/*
> > +	 * A reference of the kvmppc_xics pointer is now kept under
> > +	 * the xics_device pointer of the machine for reuse. It is
> > +	 * freed when the VM is destroyed for now until we fix all the
> > +	 * execution paths.
> > +	 */
> >  	kfree(dev);
> >  }
> >  
> > +static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
> > +{
> > +	struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
> > +	struct kvmppc_xics *xics = *kvm_xics_device;
> > +
> > +	if (!xics) {
> > +		xics = kzalloc(sizeof(*xics), GFP_KERNEL);
> > +		*kvm_xics_device = xics;
> > +	} else {
> > +		memset(xics, 0, sizeof(*xics));
> > +	}
> > +
> > +	return xics;
> > +}
> > +
> >  static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
> >  {
> >  	struct kvmppc_xics *xics;
> >  	struct kvm *kvm = dev->kvm;
> > -	int ret = 0;
> >  
> > -	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
> > +	pr_devel("Creating xics for partition\n");
> > +
> > +	/* Already there ? */
> > +	if (kvm->arch.xics)
> > +		return -EEXIST;
> > +
> > +	xics = kvmppc_xics_get_device(kvm);
> >  	if (!xics)
> >  		return -ENOMEM;
> >  
> >  	dev->private = xics;
> >  	xics->dev = dev;
> >  	xics->kvm = kvm;
> > -
> > -	/* Already there ? */
> > -	if (kvm->arch.xics)
> > -		ret = -EEXIST;
> > -	else
> > -		kvm->arch.xics = xics;
> > -
> > -	if (ret) {
> > -		kfree(xics);
> > -		return ret;
> > -	}
> > +	kvm->arch.xics = xics;
> >  
> >  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> >  	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
> > @@ -1399,7 +1449,7 @@ struct kvm_device_ops kvm_xics_ops = {
> >  	.name = "kvm-xics",
> >  	.create = kvmppc_xics_create,
> >  	.init = kvmppc_xics_init,
> > -	.destroy = kvmppc_xics_free,
> > +	.release = kvmppc_xics_release,
> >  	.set_attr = xics_set_attr,
> >  	.get_attr = xics_get_attr,
> >  	.has_attr = xics_has_attr,
> > @@ -1415,7 +1465,7 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
> >  		return -EPERM;
> >  	if (xics->kvm != vcpu->kvm)
> >  		return -EPERM;
> > -	if (vcpu->arch.irq_type)
> > +	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
> >  		return -EBUSY;
> >  
> >  	r = kvmppc_xics_create_icp(vcpu, xcpu);
> > 
> > 
> 




[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux