Re: [PATCH v2 13/16] xen-blkback: Implement diskseq checks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 30, 2023 at 04:31:13PM -0400, Demi Marie Obenour wrote:
> This allows specifying a disk sequence number in XenStore.  If it does
> not match the disk sequence number of the underlying device, the device
> will not be exported and a warning will be logged.  Userspace can use
> this to eliminate race conditions due to major/minor number reuse.
> Old kernels do not support the new syntax, but a later patch will allow
> userspace to discover that the new syntax is supported.
> 
> Signed-off-by: Demi Marie Obenour <demi@xxxxxxxxxxxxxxxxxxxxxx>
> ---
>  drivers/block/xen-blkback/xenbus.c | 112 +++++++++++++++++++++++------
>  1 file changed, 89 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 4807af1d58059394d7a992335dabaf2bc3901721..9c3eb148fbd802c74e626c3d7bcd69dcb09bd921 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -24,6 +24,7 @@ struct backend_info {
>  	struct xenbus_watch	backend_watch;
>  	unsigned		major;
>  	unsigned		minor;
> +	unsigned long long	diskseq;

Since diskseq is declared as u64 in gendisk, better use the same type
here too?

>  	char			*mode;
>  };
>  
> @@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)
>  
>  static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  			  unsigned major, unsigned minor, int readonly,
> -			  int cdrom)
> +			  bool cdrom, u64 diskseq)
>  {
>  	struct xen_vbd *vbd;
>  	struct block_device *bdev;
> @@ -507,6 +508,26 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  		xen_vbd_free(vbd);
>  		return -ENOENT;
>  	}
> +
> +	if (diskseq) {
> +		struct gendisk *disk = bdev->bd_disk;

const.

> +
> +		if (unlikely(disk == NULL)) {
> +			pr_err("%s: device %08x has no gendisk\n",
> +			       __func__, vbd->pdevice);
> +			xen_vbd_free(vbd);
> +			return -EFAULT;

ENODEV or ENOENT might be more accurate IMO.

> +		}
> +
> +		if (unlikely(disk->diskseq != diskseq)) {
> +			pr_warn("%s: device %08x has incorrect sequence "
> +				"number 0x%llx (expected 0x%llx)\n",

I prefer %#llx, and likely pr_err like above.  Also I think it's now
preferred to not split printed lines, so that `grep "has incorrect
sequence number" ...` can find the instance.

> +				__func__, vbd->pdevice, disk->diskseq, diskseq);
> +			xen_vbd_free(vbd);
> +			return -ENODEV;
> +		}
> +	}
> +
>  	vbd->size = vbd_sz(vbd);
>  
>  	if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
> @@ -707,6 +728,9 @@ static void backend_changed(struct xenbus_watch *watch,
>  	int cdrom = 0;
>  	unsigned long handle;
>  	char *device_type;
> +	char *diskseq_str = NULL;

const, and I think there's no need to init to NULL.

> +	int diskseq_len;

unsigned int

> +	unsigned long long diskseq;

u64

>  
>  	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
>  
> @@ -725,10 +749,46 @@ static void backend_changed(struct xenbus_watch *watch,
>  		return;
>  	}
>  
> -	if (be->major | be->minor) {
> -		if (be->major != major || be->minor != minor)
> -			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
> -				be->major, be->minor, major, minor);
> +	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
> +	if (IS_ERR(diskseq_str)) {
> +		int err = PTR_ERR(diskseq_str);
> +		diskseq_str = NULL;
> +
> +		/*
> +		 * If this does not exist, it means legacy userspace that does not
> +		 * support diskseq.
> +		 */
> +		if (unlikely(!XENBUS_EXIST_ERR(err))) {
> +			xenbus_dev_fatal(dev, err, "reading diskseq");
> +			return;
> +		}
> +		diskseq = 0;
> +	} else if (diskseq_len <= 0) {
> +		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> +		goto fail;
> +	} else if (diskseq_len > 16) {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> +				 diskseq_len);
> +		goto fail;
> +	} else if (diskseq_str[0] == '0') {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> +		goto fail;
> +	} else {
> +		char *diskseq_end;
> +		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> +		if (diskseq_end != diskseq_str + diskseq_len) {
> +			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> +			goto fail;
> +		}
> +		kfree(diskseq_str);
> +		diskseq_str = NULL;
> +	}

Won't it be simpler to use xenbus_scanf() with %llx formatter?

Also, we might want to fetch "physical-device" and "diskseq" inside
the same xenstore transaction.

Also, you tie this logic to the "physical-device" watch, which
strictly implies that the "diskseq" node must be written to xenstore
before the "physical-device" node.  This seems fragile, but I don't
see much better optiono since the "diskseq" is optional.

The node and its behaviour should be documented in blkif.h.

> +	if (be->major | be->minor | be->diskseq) {
> +		if (be->major != major || be->minor != minor || be->diskseq != diskseq)
> +			pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
> +				" not supported.\n",
> +				be->major, be->minor, be->diskseq, major, minor, diskseq);
>  		return;

You are leaking diskseq_str here, and in all the error cases between
here and up to the call to xen_vbd_create().

It might be better to simnply free diskseq_str once you are done with
the processing, and have set diskseq.

Otherwise see my suggestion of using xenbus_scanf().

Thanks, Roger.



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux