Re: [PATCH] vfio: align capability structures

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu,  3 Aug 2023 10:41:09 -0400
Stefan Hajnoczi <stefanha@xxxxxxxxxx> wrote:

> The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
> VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
> structs:
> 
>   +------+---------+---------+-----+
>   | info | caps[0] | caps[1] | ... |
>   +------+---------+---------+-----+
> 
> Both the info and capability struct sizes are not always multiples of
> sizeof(u64), leaving u64 fields in later capability structs misaligned.
> 
> Userspace applications currently need to handle misalignment manually in
> order to support CPU architectures and programming languages with strict
> alignment requirements.
> 
> Make life easier for userspace by ensuring alignment in the kernel.
> The new layout is as follows:
> 
>   +------+---+---------+---------+---+-----+
>   | info | 0 | caps[0] | caps[1] | 0 | ... |
>   +------+---+---------+---------+---+-----+
> 
> In this example info and caps[1] have sizes that are not multiples of
> sizeof(u64), so zero padding is added to align the subsequent structure.
> 
> Adding zero padding between structs does not break the uapi. The memory
> layout is specified by the info.cap_offset and caps[i].next fields
> filled in by the kernel. Applications use these field values to locate
> structs and are therefore unaffected by the addition of zero padding.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
> ---
>  include/linux/vfio.h             |  2 +-
>  drivers/gpu/drm/i915/gvt/kvmgt.c |  7 +++--
>  drivers/s390/cio/vfio_ccw_ops.c  |  7 +++--
>  drivers/vfio/pci/vfio_pci_core.c | 14 ++++++---
>  drivers/vfio/vfio_iommu_type1.c  |  7 +++--
>  drivers/vfio/vfio_main.c         | 53 +++++++++++++++++++++++++++-----
>  6 files changed, 71 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 2c137ea94a3e..ff0864e73cc3 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -272,7 +272,7 @@ struct vfio_info_cap {
>  struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  					       size_t size, u16 id,
>  					       u16 version);
> -void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
> +ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
>  
>  int vfio_info_add_capability(struct vfio_info_cap *caps,
>  			     struct vfio_info_cap_header *cap, size_t size);
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index de675d799c7d..9060e9c6ac7c 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1297,7 +1297,10 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
>  				info.argsz = sizeof(info) + caps.size;
>  				info.cap_offset = 0;
>  			} else {
> -				vfio_info_cap_shift(&caps, sizeof(info));
> +				ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> +				if (cap_offset < 0)
> +					return cap_offset;
> +
>  				if (copy_to_user((void __user *)arg +
>  						  sizeof(info), caps.buf,
>  						  caps.size)) {
> @@ -1305,7 +1308,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
>  					kfree(sparse);
>  					return -EFAULT;
>  				}
> -				info.cap_offset = sizeof(info);
> +				info.cap_offset = cap_offset;

The copy_to_user() above needs to be modified to make this true:

	copy_to_user((void __user *)arg + cap_offset,...

Same for all similar below.

>  			}
>  
>  			kfree(caps.buf);
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index 5b53b94f13c7..63d5163376a5 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -361,13 +361,16 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_ccw_private *private,
>  			info->argsz = sizeof(*info) + caps.size;
>  			info->cap_offset = 0;
>  		} else {
> -			vfio_info_cap_shift(&caps, sizeof(*info));
> +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(*info));
> +			if (cap_offset < 0)
> +				return cap_offset;
> +
>  			if (copy_to_user((void __user *)arg + sizeof(*info),
>  					 caps.buf, caps.size)) {
>  				kfree(caps.buf);
>  				return -EFAULT;
>  			}
> -			info->cap_offset = sizeof(*info);
> +			info->cap_offset = cap_offset;
>  		}
>  
>  		kfree(caps.buf);
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 20d7b69ea6ff..92c093b99187 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -966,12 +966,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
>  		if (info.argsz < sizeof(info) + caps.size) {
>  			info.argsz = sizeof(info) + caps.size;
>  		} else {
> -			vfio_info_cap_shift(&caps, sizeof(info));
> +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> +			if (cap_offset < 0)
> +				return cap_offset;
> +
>  			if (copy_to_user(arg + 1, caps.buf, caps.size)) {
>  				kfree(caps.buf);
>  				return -EFAULT;
>  			}
> -			info.cap_offset = sizeof(*arg);
> +			info.cap_offset = cap_offset;
>  		}
>  
>  		kfree(caps.buf);
> @@ -1107,12 +1110,15 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
>  			info.argsz = sizeof(info) + caps.size;
>  			info.cap_offset = 0;
>  		} else {
> -			vfio_info_cap_shift(&caps, sizeof(info));
> +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> +			if (cap_offset < 0)
> +				return cap_offset;
> +
>  			if (copy_to_user(arg + 1, caps.buf, caps.size)) {
>  				kfree(caps.buf);
>  				return -EFAULT;
>  			}
> -			info.cap_offset = sizeof(*arg);
> +			info.cap_offset = cap_offset;
>  		}
>  
>  		kfree(caps.buf);
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index ebe0ad31d0b0..ab64b9e3ed7c 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2808,14 +2808,17 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
>  		if (info.argsz < sizeof(info) + caps.size) {
>  			info.argsz = sizeof(info) + caps.size;
>  		} else {
> -			vfio_info_cap_shift(&caps, sizeof(info));
> +			ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
> +			if (cap_offset < 0)
> +				return cap_offset;
> +
>  			if (copy_to_user((void __user *)arg +
>  					sizeof(info), caps.buf,
>  					caps.size)) {
>  				kfree(caps.buf);
>  				return -EFAULT;
>  			}
> -			info.cap_offset = sizeof(info);
> +			info.cap_offset = cap_offset;
>  		}
>  
>  		kfree(caps.buf);
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index f0ca33b2e1df..4fc8698577a7 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -1171,8 +1171,18 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  {
>  	void *buf;
>  	struct vfio_info_cap_header *header, *tmp;
> +	size_t header_offset;
> +	size_t new_size;
>  
> -	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
> +	/*
> +	 * Reserve extra space when the previous capability was not a multiple of
> +	 * the largest field size. This ensures that capabilities are properly
> +	 * aligned.
> +	 */

If we simply start with:

	size = ALIGN(size, sizeof(u64));

then shouldn't there never be a previous misaligned size to correct?

I wonder if we really need all this complexity, we're drawing from a
finite set of info structs for the initial alignment, we can pad those
without breaking the uapi and we can introduce a warning to avoid such
poor alignment in the future.  Allocating an aligned size for each
capability is then sufficiently trivial to handle runtime.  ex:

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 902f06e52c48..2d074cbd371d 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1362,6 +1362,8 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
 	void *buf;
 	struct vfio_info_cap_header *header, *tmp;
 
+	size = ALIGN(size, sizeof(u64));
+
 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
 	if (!buf) {
 		kfree(caps->buf);
@@ -1395,6 +1397,8 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 	struct vfio_info_cap_header *tmp;
 	void *buf = (void *)caps->buf;
 
+	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
+
 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
 		tmp->next += offset;
 }
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index fa06e3eb4955..fd2761841ffe 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -217,6 +217,7 @@ struct vfio_device_info {
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32	pad;		/* Size must be aligned for caps */
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
@@ -1444,6 +1445,7 @@ struct vfio_iommu_type1_info {
 #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
 	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32	pad;		/* Size must be aligned for caps */
 };
 
 /*

Thanks,
Alex


> +	header_offset = ALIGN(caps->size, sizeof(u64));
> +	new_size = header_offset + size;
> +
> +	buf = krealloc(caps->buf, new_size, GFP_KERNEL);
>  	if (!buf) {
>  		kfree(caps->buf);
>  		caps->buf = NULL;
> @@ -1181,10 +1191,10 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  	}
>  
>  	caps->buf = buf;
> -	header = buf + caps->size;
> +	header = buf + header_offset;
>  
>  	/* Eventually copied to user buffer, zero */
> -	memset(header, 0, size);
> +	memset(buf + caps->size, 0, new_size - caps->size);
>  
>  	header->id = id;
>  	header->version = version;
> @@ -1193,20 +1203,47 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
>  		; /* nothing */
>  
> -	tmp->next = caps->size;
> -	caps->size += size;
> +	tmp->next = header_offset;
> +	caps->size = new_size;
>  
>  	return header;
>  }
>  EXPORT_SYMBOL_GPL(vfio_info_cap_add);
>  
> -void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
> +/*
> + * Adjust the capability next fields to account for the given offset at which
> + * capability structures start and any padding added for alignment. Returns the
> + * cap_offset or -errno.
> + */
> +ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  {
>  	struct vfio_info_cap_header *tmp;
> +	struct vfio_info_cap_header *next_tmp;
>  	void *buf = (void *)caps->buf;
> +	size_t pad = ALIGN(offset, sizeof(u64)) - offset;
> +	size_t cap_offset = offset + pad;
>  
> -	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
> -		tmp->next += offset;
> +	/* Shift the next fields to account for offset and pad */
> +	for (tmp = buf; tmp->next; tmp = next_tmp) {
> +		next_tmp = buf + tmp->next;
> +		tmp->next += cap_offset;
> +	}
> +
> +	/* Pad with zeroes so capabilities start with proper alignment */
> +	buf = krealloc(caps->buf, caps->size + pad, GFP_KERNEL);
> +	if (!buf) {
> +		kfree(caps->buf);
> +		caps->buf = NULL;
> +		caps->size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	memmove(buf + pad, buf, caps->size);
> +	memset(buf, 0, pad);
> +
> +	caps->buf = buf;
> +	caps->size += pad;
> +	return cap_offset;
>  }
>  EXPORT_SYMBOL(vfio_info_cap_shift);
>  




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux