Re: [PATCH] vfio: align capability structures

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Aug 03, 2023 at 03:18:23PM -0600, Alex Williamson wrote:
> On Thu,  3 Aug 2023 10:41:09 -0400
> Stefan Hajnoczi <stefanha@xxxxxxxxxx> wrote:
> 
> > The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
> > VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
> > structs:
> > 
> >   +------+---------+---------+-----+
> >   | info | caps[0] | caps[1] | ... |
> >   +------+---------+---------+-----+
> > 
> > Both the info and capability struct sizes are not always multiples of
> > sizeof(u64), leaving u64 fields in later capability structs misaligned.
> > 
> > Userspace applications currently need to handle misalignment manually in
> > order to support CPU architectures and programming languages with strict
> > alignment requirements.
> > 
> > Make life easier for userspace by ensuring alignment in the kernel.
> > The new layout is as follows:
> > 
> >   +------+---+---------+---------+---+-----+
> >   | info | 0 | caps[0] | caps[1] | 0 | ... |
> >   +------+---+---------+---------+---+-----+
> > 
> > In this example info and caps[1] have sizes that are not multiples of
> > sizeof(u64), so zero padding is added to align the subsequent structure.
> > 
> > Adding zero padding between structs does not break the uapi. The memory
> > layout is specified by the info.cap_offset and caps[i].next fields
> > filled in by the kernel. Applications use these field values to locate
> > structs and are therefore unaffected by the addition of zero padding.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
> > ---
> >  include/linux/vfio.h             |  2 +-
> >  drivers/gpu/drm/i915/gvt/kvmgt.c |  7 +++--
> >  drivers/s390/cio/vfio_ccw_ops.c  |  7 +++--
> >  drivers/vfio/pci/vfio_pci_core.c | 14 ++++++---
> >  drivers/vfio/vfio_iommu_type1.c  |  7 +++--
> >  drivers/vfio/vfio_main.c         | 53 +++++++++++++++++++++++++++-----
> >  6 files changed, 71 insertions(+), 19 deletions(-)
> > 
> > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > index 2c137ea94a3e..ff0864e73cc3 100644
> > --- a/include/linux/vfio.h
> > +++ b/include/linux/vfio.h
> > @@ -272,7 +272,7 @@ struct vfio_info_cap {
> >  struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
> >  					       size_t size, u16 id,
> >  					       u16 version);
> > -void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
> > +ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
> >  
> >  int vfio_info_add_capability(struct vfio_info_cap *caps,
> >  			     struct vfio_info_cap_header *cap, size_t size);
> > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> > index de675d799c7d..9060e9c6ac7c 100644
> > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> > @@ -1297,7 +1297,10 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
> >  				info.argsz = sizeof(info) + caps.size;
> >  				info.cap_offset = 0;
> >  			} else {
> > -				vfio_info_cap_shift(&caps, sizeof(info));
> > +				ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> > +				if (cap_offset < 0)
> > +					return cap_offset;
> > +
> >  				if (copy_to_user((void __user *)arg +
> >  						  sizeof(info), caps.buf,
> >  						  caps.size)) {
> > @@ -1305,7 +1308,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
> >  					kfree(sparse);
> >  					return -EFAULT;
> >  				}
> > -				info.cap_offset = sizeof(info);
> > +				info.cap_offset = cap_offset;
> 
> The copy_to_user() above needs to be modified to make this true:
> 
> 	copy_to_user((void __user *)arg + cap_offset,...
> 
> Same for all similar below.

vfio_info_cap_shift() inserts zero padding before the first capability
in order to achieve alignment. The zero padding is part of caps.buf, not
the info struct. Therefore the copy_to_user((void __user *)arg +
sizeof(info), ...) is correct.

It's confusing though. Your suggestion for simplifying alignment below
looks good and will avoid this issue.

> 
> >  			}
> >  
> >  			kfree(caps.buf);
> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> > index 5b53b94f13c7..63d5163376a5 100644
> > --- a/drivers/s390/cio/vfio_ccw_ops.c
> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
> > @@ -361,13 +361,16 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_ccw_private *private,
> >  			info->argsz = sizeof(*info) + caps.size;
> >  			info->cap_offset = 0;
> >  		} else {
> > -			vfio_info_cap_shift(&caps, sizeof(*info));
> > +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(*info));
> > +			if (cap_offset < 0)
> > +				return cap_offset;
> > +
> >  			if (copy_to_user((void __user *)arg + sizeof(*info),
> >  					 caps.buf, caps.size)) {
> >  				kfree(caps.buf);
> >  				return -EFAULT;
> >  			}
> > -			info->cap_offset = sizeof(*info);
> > +			info->cap_offset = cap_offset;
> >  		}
> >  
> >  		kfree(caps.buf);
> > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> > index 20d7b69ea6ff..92c093b99187 100644
> > --- a/drivers/vfio/pci/vfio_pci_core.c
> > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > @@ -966,12 +966,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
> >  		if (info.argsz < sizeof(info) + caps.size) {
> >  			info.argsz = sizeof(info) + caps.size;
> >  		} else {
> > -			vfio_info_cap_shift(&caps, sizeof(info));
> > +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> > +			if (cap_offset < 0)
> > +				return cap_offset;
> > +
> >  			if (copy_to_user(arg + 1, caps.buf, caps.size)) {
> >  				kfree(caps.buf);
> >  				return -EFAULT;
> >  			}
> > -			info.cap_offset = sizeof(*arg);
> > +			info.cap_offset = cap_offset;
> >  		}
> >  
> >  		kfree(caps.buf);
> > @@ -1107,12 +1110,15 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
> >  			info.argsz = sizeof(info) + caps.size;
> >  			info.cap_offset = 0;
> >  		} else {
> > -			vfio_info_cap_shift(&caps, sizeof(info));
> > +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> > +			if (cap_offset < 0)
> > +				return cap_offset;
> > +
> >  			if (copy_to_user(arg + 1, caps.buf, caps.size)) {
> >  				kfree(caps.buf);
> >  				return -EFAULT;
> >  			}
> > -			info.cap_offset = sizeof(*arg);
> > +			info.cap_offset = cap_offset;
> >  		}
> >  
> >  		kfree(caps.buf);
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index ebe0ad31d0b0..ab64b9e3ed7c 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -2808,14 +2808,17 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> >  		if (info.argsz < sizeof(info) + caps.size) {
> >  			info.argsz = sizeof(info) + caps.size;
> >  		} else {
> > -			vfio_info_cap_shift(&caps, sizeof(info));
> > +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> > +			if (cap_offset < 0)
> > +				return cap_offset;
> > +
> >  			if (copy_to_user((void __user *)arg +
> >  					sizeof(info), caps.buf,
> >  					caps.size)) {
> >  				kfree(caps.buf);
> >  				return -EFAULT;
> >  			}
> > -			info.cap_offset = sizeof(info);
> > +			info.cap_offset = cap_offset;
> >  		}
> >  
> >  		kfree(caps.buf);
> > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > index f0ca33b2e1df..4fc8698577a7 100644
> > --- a/drivers/vfio/vfio_main.c
> > +++ b/drivers/vfio/vfio_main.c
> > @@ -1171,8 +1171,18 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
> >  {
> >  	void *buf;
> >  	struct vfio_info_cap_header *header, *tmp;
> > +	size_t header_offset;
> > +	size_t new_size;
> >  
> > -	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
> > +	/*
> > +	 * Reserve extra space when the previous capability was not a multiple of
> > +	 * the largest field size. This ensures that capabilities are properly
> > +	 * aligned.
> > +	 */
> 
> If we simply start with:
> 
> 	size = ALIGN(size, sizeof(u64));
> 
> then shouldn't there never be a previous misaligned size to correct?

Yes, I applied padding at the beginning but it could be applied at the
end instead.

> I wonder if we really need all this complexity, we're drawing from a
> finite set of info structs for the initial alignment, we can pad those
> without breaking the uapi and we can introduce a warning to avoid such
> poor alignment in the future.  Allocating an aligned size for each
> capability is then sufficiently trivial to handle runtime.  ex:
> 
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 902f06e52c48..2d074cbd371d 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -1362,6 +1362,8 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  	void *buf;
>  	struct vfio_info_cap_header *header, *tmp;
>  
> +	size = ALIGN(size, sizeof(u64));
> +
>  	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
>  	if (!buf) {
>  		kfree(caps->buf);
> @@ -1395,6 +1397,8 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  	struct vfio_info_cap_header *tmp;
>  	void *buf = (void *)caps->buf;
>  
> +	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
> +
>  	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
>  		tmp->next += offset;
>  }
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index fa06e3eb4955..fd2761841ffe 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -217,6 +217,7 @@ struct vfio_device_info {
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> +	__u32	pad;		/* Size must be aligned for caps */
>  };
>  #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
>  
> @@ -1444,6 +1445,7 @@ struct vfio_iommu_type1_info {
>  #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
>  	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> +	__u32	pad;		/* Size must be aligned for caps */
>  };
>  
>  /*

Yes, that's much simpler. I'll send another revision that follows that
approach.

Stefan

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux