On Thu, Aug 03, 2023 at 03:18:23PM -0600, Alex Williamson wrote: > On Thu, 3 Aug 2023 10:41:09 -0400 > Stefan Hajnoczi <stefanha@xxxxxxxxxx> wrote: > > > The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and > > VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability > > structs: > > > > +------+---------+---------+-----+ > > | info | caps[0] | caps[1] | ... | > > +------+---------+---------+-----+ > > > > Both the info and capability struct sizes are not always multiples of > > sizeof(u64), leaving u64 fields in later capability structs misaligned. > > > > Userspace applications currently need to handle misalignment manually in > > order to support CPU architectures and programming languages with strict > > alignment requirements. > > > > Make life easier for userspace by ensuring alignment in the kernel. > > The new layout is as follows: > > > > +------+---+---------+---------+---+-----+ > > | info | 0 | caps[0] | caps[1] | 0 | ... | > > +------+---+---------+---------+---+-----+ > > > > In this example info and caps[1] have sizes that are not multiples of > > sizeof(u64), so zero padding is added to align the subsequent structure. > > > > Adding zero padding between structs does not break the uapi. The memory > > layout is specified by the info.cap_offset and caps[i].next fields > > filled in by the kernel. Applications use these field values to locate > > structs and are therefore unaffected by the addition of zero padding. > > > > Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx> > > --- > > include/linux/vfio.h | 2 +- > > drivers/gpu/drm/i915/gvt/kvmgt.c | 7 +++-- > > drivers/s390/cio/vfio_ccw_ops.c | 7 +++-- > > drivers/vfio/pci/vfio_pci_core.c | 14 ++++++--- > > drivers/vfio/vfio_iommu_type1.c | 7 +++-- > > drivers/vfio/vfio_main.c | 53 +++++++++++++++++++++++++++----- > > 6 files changed, 71 insertions(+), 19 deletions(-) > > > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > > index 2c137ea94a3e..ff0864e73cc3 100644 > > --- a/include/linux/vfio.h > > +++ b/include/linux/vfio.h > > @@ -272,7 +272,7 @@ struct vfio_info_cap { > > struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, > > size_t size, u16 id, > > u16 version); > > -void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); > > +ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); > > > > int vfio_info_add_capability(struct vfio_info_cap *caps, > > struct vfio_info_cap_header *cap, size_t size); > > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c > > index de675d799c7d..9060e9c6ac7c 100644 > > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c > > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c > > @@ -1297,7 +1297,10 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, > > info.argsz = sizeof(info) + caps.size; > > info.cap_offset = 0; > > } else { > > - vfio_info_cap_shift(&caps, sizeof(info)); > > + ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info)); > > + if (cap_offset < 0) > > + return cap_offset; > > + > > if (copy_to_user((void __user *)arg + > > sizeof(info), caps.buf, > > caps.size)) { > > @@ -1305,7 +1308,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, > > kfree(sparse); > > return -EFAULT; > > } > > - info.cap_offset = sizeof(info); > > + info.cap_offset = cap_offset; > > The copy_to_user() above needs to be modified to make this true: > > copy_to_user((void __user *)arg + cap_offset,... > > Same for all similar below. vfio_info_cap_shift() inserts zero padding before the first capability in order to achieve alignment. The zero padding is part of caps.buf, not the info struct. Therefore the copy_to_user((void __user *)arg + sizeof(info), ...) is correct. It's confusing though. Your suggestion for simplifying alignment below looks good and will avoid this issue. > > > } > > > > kfree(caps.buf); > > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c > > index 5b53b94f13c7..63d5163376a5 100644 > > --- a/drivers/s390/cio/vfio_ccw_ops.c > > +++ b/drivers/s390/cio/vfio_ccw_ops.c > > @@ -361,13 +361,16 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_ccw_private *private, > > info->argsz = sizeof(*info) + caps.size; > > info->cap_offset = 0; > > } else { > > - vfio_info_cap_shift(&caps, sizeof(*info)); > > + ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(*info)); > > + if (cap_offset < 0) > > + return cap_offset; > > + > > if (copy_to_user((void __user *)arg + sizeof(*info), > > caps.buf, caps.size)) { > > kfree(caps.buf); > > return -EFAULT; > > } > > - info->cap_offset = sizeof(*info); > > + info->cap_offset = cap_offset; > > } > > > > kfree(caps.buf); > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > > index 20d7b69ea6ff..92c093b99187 100644 > > --- a/drivers/vfio/pci/vfio_pci_core.c > > +++ b/drivers/vfio/pci/vfio_pci_core.c > > @@ -966,12 +966,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, > > if (info.argsz < sizeof(info) + caps.size) { > > info.argsz = sizeof(info) + caps.size; > > } else { > > - vfio_info_cap_shift(&caps, sizeof(info)); > > + ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info)); > > + if (cap_offset < 0) > > + return cap_offset; > > + > > if (copy_to_user(arg + 1, caps.buf, caps.size)) { > > kfree(caps.buf); > > return -EFAULT; > > } > > - info.cap_offset = sizeof(*arg); > > + info.cap_offset = cap_offset; > > } > > > > kfree(caps.buf); > > @@ -1107,12 +1110,15 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, > > info.argsz = sizeof(info) + caps.size; > > info.cap_offset = 0; > > } else { > > - vfio_info_cap_shift(&caps, sizeof(info)); > > + ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info)); > > + if (cap_offset < 0) > > + return cap_offset; > > + > > if (copy_to_user(arg + 1, caps.buf, caps.size)) { > > kfree(caps.buf); > > return -EFAULT; > > } > > - info.cap_offset = sizeof(*arg); > > + info.cap_offset = cap_offset; > > } > > > > kfree(caps.buf); > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > > index ebe0ad31d0b0..ab64b9e3ed7c 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -2808,14 +2808,17 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, > > if (info.argsz < sizeof(info) + caps.size) { > > info.argsz = sizeof(info) + caps.size; > > } else { > > - vfio_info_cap_shift(&caps, sizeof(info)); > > + ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info)); > > + if (cap_offset < 0) > > + return cap_offset; > > + > > if (copy_to_user((void __user *)arg + > > sizeof(info), caps.buf, > > caps.size)) { > > kfree(caps.buf); > > return -EFAULT; > > } > > - info.cap_offset = sizeof(info); > > + info.cap_offset = cap_offset; > > } > > > > kfree(caps.buf); > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > > index f0ca33b2e1df..4fc8698577a7 100644 > > --- a/drivers/vfio/vfio_main.c > > +++ b/drivers/vfio/vfio_main.c > > @@ -1171,8 +1171,18 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, > > { > > void *buf; > > struct vfio_info_cap_header *header, *tmp; > > + size_t header_offset; > > + size_t new_size; > > > > - buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); > > + /* > > + * Reserve extra space when the previous capability was not a multiple of > > + * the largest field size. This ensures that capabilities are properly > > + * aligned. > > + */ > > If we simply start with: > > size = ALIGN(size, sizeof(u64)); > > then shouldn't there never be a previous misaligned size to correct? Yes, I applied padding at the beginning but it could be applied at the end instead. > I wonder if we really need all this complexity, we're drawing from a > finite set of info structs for the initial alignment, we can pad those > without breaking the uapi and we can introduce a warning to avoid such > poor alignment in the future. Allocating an aligned size for each > capability is then sufficiently trivial to handle runtime. ex: > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index 902f06e52c48..2d074cbd371d 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -1362,6 +1362,8 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, > void *buf; > struct vfio_info_cap_header *header, *tmp; > > + size = ALIGN(size, sizeof(u64)); > + > buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); > if (!buf) { > kfree(caps->buf); > @@ -1395,6 +1397,8 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) > struct vfio_info_cap_header *tmp; > void *buf = (void *)caps->buf; > > + WARN_ON(!IS_ALIGNED(offset, sizeof(u64))); > + > for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) > tmp->next += offset; > } > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index fa06e3eb4955..fd2761841ffe 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -217,6 +217,7 @@ struct vfio_device_info { > __u32 num_regions; /* Max region index + 1 */ > __u32 num_irqs; /* Max IRQ index + 1 */ > __u32 cap_offset; /* Offset within info struct of first cap */ > + __u32 pad; /* Size must be aligned for caps */ > }; > #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) > > @@ -1444,6 +1445,7 @@ struct vfio_iommu_type1_info { > #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ > __u64 iova_pgsizes; /* Bitmap of supported page sizes */ > __u32 cap_offset; /* Offset within info struct of first cap */ > + __u32 pad; /* Size must be aligned for caps */ > }; > > /* Yes, that's much simpler. I'll send another revision that follows that approach. Stefan
Attachment:
signature.asc
Description: PGP signature