Re: [RFC PATCH 11/21] KVM: SEV: Add TIO VMGEXIT and bind TDI

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 30, 2024 at 02:00:30PM +1000, Alexey Kardashevskiy wrote:
> 
> 
> On 29/8/24 20:08, Xu Yilun wrote:
> > > diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> > > index 76b7f6085dcd..a4e9db212adc 100644
> > > --- a/virt/kvm/vfio.c
> > > +++ b/virt/kvm/vfio.c
> > > @@ -15,6 +15,7 @@
> > >   #include <linux/slab.h>
> > >   #include <linux/uaccess.h>
> > >   #include <linux/vfio.h>
> > > +#include <linux/tsm.h>
> > >   #include "vfio.h"
> > >   #ifdef CONFIG_SPAPR_TCE_IOMMU
> > > @@ -29,8 +30,14 @@ struct kvm_vfio_file {
> > >   #endif
> > >   };
> > > +struct kvm_vfio_tdi {
> > > +	struct list_head node;
> > > +	struct vfio_device *vdev;
> > > +};
> > > +
> > >   struct kvm_vfio {
> > >   	struct list_head file_list;
> > > +	struct list_head tdi_list;
> > >   	struct mutex lock;
> > >   	bool noncoherent;
> > >   };
> > > @@ -80,6 +87,22 @@ static bool kvm_vfio_file_is_valid(struct file *file)
> > >   	return ret;
> > >   }
> > > +static struct vfio_device *kvm_vfio_file_device(struct file *file)
> > > +{
> > > +	struct vfio_device *(*fn)(struct file *file);
> > > +	struct vfio_device *ret;
> > > +
> > > +	fn = symbol_get(vfio_file_device);
> > > +	if (!fn)
> > > +		return NULL;
> > > +
> > > +	ret = fn(file);
> > > +
> > > +	symbol_put(vfio_file_device);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > >   #ifdef CONFIG_SPAPR_TCE_IOMMU
> > >   static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file)
> > >   {
> > > @@ -297,6 +320,103 @@ static int kvm_vfio_set_file(struct kvm_device *dev, long attr,
> > >   	return -ENXIO;
> > >   }
> > > +static int kvm_dev_tsm_bind(struct kvm_device *dev, void __user *arg)
> > > +{
> > > +	struct kvm_vfio *kv = dev->private;
> > > +	struct kvm_vfio_tsm_bind tb;
> > > +	struct kvm_vfio_tdi *ktdi;
> > > +	struct vfio_device *vdev;
> > > +	struct fd fdev;
> > > +	int ret;
> > > +
> > > +	if (copy_from_user(&tb, arg, sizeof(tb)))
> > > +		return -EFAULT;
> > > +
> > > +	ktdi = kzalloc(sizeof(*ktdi), GFP_KERNEL_ACCOUNT);
> > > +	if (!ktdi)
> > > +		return -ENOMEM;
> > > +
> > > +	fdev = fdget(tb.devfd);
> > > +	if (!fdev.file)
> > > +		return -EBADF;
> > > +
> > > +	ret = -ENOENT;
> > > +
> > > +	mutex_lock(&kv->lock);
> > > +
> > > +	vdev = kvm_vfio_file_device(fdev.file);
> > > +	if (vdev) {
> > > +		ret = kvm_arch_tsm_bind(dev->kvm, vdev->dev, tb.guest_rid);
> > > +		if (!ret) {
> > > +			ktdi->vdev = vdev;
> > > +			list_add_tail(&ktdi->node, &kv->tdi_list);
> > > +		} else {
> > > +			vfio_put_device(vdev);
> > > +		}
> > > +	}
> > > +
> > > +	fdput(fdev);
> > > +	mutex_unlock(&kv->lock);
> > > +	if (ret)
> > > +		kfree(ktdi);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int kvm_dev_tsm_unbind(struct kvm_device *dev, void __user *arg)
> > > +{
> > > +	struct kvm_vfio *kv = dev->private;
> > > +	struct kvm_vfio_tsm_bind tb;
> > > +	struct kvm_vfio_tdi *ktdi;
> > > +	struct vfio_device *vdev;
> > > +	struct fd fdev;
> > > +	int ret;
> > > +
> > > +	if (copy_from_user(&tb, arg, sizeof(tb)))
> > > +		return -EFAULT;
> > > +
> > > +	fdev = fdget(tb.devfd);
> > > +	if (!fdev.file)
> > > +		return -EBADF;
> > > +
> > > +	ret = -ENOENT;
> > > +
> > > +	mutex_lock(&kv->lock);
> > > +
> > > +	vdev = kvm_vfio_file_device(fdev.file);
> > > +	if (vdev) {
> > > +		list_for_each_entry(ktdi, &kv->tdi_list, node) {
> > > +			if (ktdi->vdev != vdev)
> > > +				continue;
> > > +
> > > +			kvm_arch_tsm_unbind(dev->kvm, vdev->dev);
> > > +			list_del(&ktdi->node);
> > > +			kfree(ktdi);
> > > +			vfio_put_device(vdev);
> > > +			ret = 0;
> > > +			break;
> > > +		}
> > > +		vfio_put_device(vdev);
> > > +	}
> > > +
> > > +	fdput(fdev);
> > > +	mutex_unlock(&kv->lock);
> > > +	return ret;
> > > +}
> > > +
> > > +static int kvm_vfio_set_device(struct kvm_device *dev, long attr,
> > > +			       void __user *arg)
> > > +{
> > > +	switch (attr) {
> > > +	case KVM_DEV_VFIO_DEVICE_TDI_BIND:
> > > +		return kvm_dev_tsm_bind(dev, arg);
> > 
> > I think the TDI bind operation should be under the control of the device
> > owner (i.e. VFIO driver), rather than in this bridge driver.
> 
> This is a valid point, although this means teaching VFIO about the KVM
> lifetime (and KVM already holds references to VFIO groups) and

Not sure if I understand, VFIO already knows KVM lifetime via
vfio_device_get_kvm_safe(), is it?

> guest BDFns (which have no meaning for VFIO in the host kernel).

KVM is not aware of the guest BDF today.

I think we need to pass a firmware recognizable TDI identifier, which
is actually a magic number and specific to vendors. For TDX, it is the
FUNCTION_ID. So I didn't think too much to whom the identifier is
meaningful.

> 
> > The TDI bind
> > means TDI would be transitioned to CONFIG_LOCKED state, and a bunch of
> > device configurations breaks the state (TDISP spec 11.4.5/8/9). So the
> > VFIO driver should be fully aware of the TDI bind and manage unwanted
> > breakage.
> 
> VFIO has no control over TDI any way, cannot even know what state it is in
> without talking to the firmware.

I think VFIO could talk to the firmware, that's part of the reason we are
working on the TSM module independent to KVM.

> When TDI goes into ERROR, this needs to be
> propagated to the VM. At the moment (afaik) it does not tell the

I assume when TDISP ERROR happens, an interrupt (e.g. AER) would be sent
to OS and VFIO driver is the one who handles it in the first place. So
maybe there has to be some TDI stuff in VFIO?

Thanks,
Yilun

> userspace/guest about IOMMU errors and it probably should but the existing
> mechanism should be able to do so. Thanks,
> 
> 
> > 
> > Thanks,
> > Yilun
> 
> -- 
> Alexey
> 




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux