Re: [RFC PATCH 3/4] nfs/blocklayout: Fix premature PR key unregistration

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 19 Jun 2024, at 13:39, cel@xxxxxxxxxx wrote:

> From: Chuck Lever <chuck.lever@xxxxxxxxxx>
>
> During generic/069 runs with pNFS SCSI layouts, the NFS client emits
> the following in the system journal:
>
> kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2)
> kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3)
> kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2)
> kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3)
> kernel: sd 6:0:0:1: reservation conflict
> kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
> kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00
> kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2
> kernel: sd 6:0:0:1: reservation conflict
> kernel: sd 6:0:0:1: reservation conflict
> kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
> kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
> kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00
> kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00
> kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0
> kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0
> systemd[1]: fstests-generic-069.scope: Deactivated successfully.
> systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time.
> systemd[1]: media-test.mount: Deactivated successfully.
> systemd[1]: media-scratch.mount: Deactivated successfully.
> kernel: sd 6:0:0:1: reservation conflict
> kernel: failed to unregister PR key.
>
> This appears to be due to a race. bl_alloc_lseg() calls this:
>
> 561 static struct nfs4_deviceid_node *
> 562 bl_find_get_deviceid(struct nfs_server *server,
> 563                 const struct nfs4_deviceid *id, const struct cred *cred,
> 564                 gfp_t gfp_mask)
> 565 {
> 566         struct nfs4_deviceid_node *node;
> 567         unsigned long start, end;
> 568
> 569 retry:
> 570         node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
> 571         if (!node)
> 572                 return ERR_PTR(-ENODEV);
>
> nfs4_find_get_deviceid() does a lookup without the spin lock first.
> If it can't find a matching deviceid, it creates a new device_info
> (which calls bl_alloc_deviceid_node, and that registers the device's
> PR key).
>
> Then it takes the nfs4_deviceid_lock and looks up the deviceid again.
> If it finds it this time, bl_find_get_deviceid() frees the spare
> (new) device_info, which unregisters the PR key for the same device.
>
> Any subsequent I/O from this client on that device gets EBADE.
>
> The umount later unregisters the device's PR key again.
>
> To prevent this problem, register the PR key after the deviceid_node
> lookup.

Hi Chuck - nice catch, but I'm not seeing how we don't have the same problem
after this patch, instead it just seems like it moves the race.  What
prevents another process waiting to take the nfs4_deviceid_lock from
unregistering the same device?  I think we need another way to signal
bl_free_device that we don't want to unregister for the case where the new
device isn't added to nfs4_deviceid_cache.

No good ideas yet - maybe we can use a flag set within the
nfs4_deviceid_lock?

Ben

>
> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
> ---
>  fs/nfs/blocklayout/blocklayout.c |  9 ++++++++-
>  fs/nfs/blocklayout/blocklayout.h |  1 +
>  fs/nfs/blocklayout/dev.c         | 29 +++++++++++++++++++++--------
>  3 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 6be13e0ec170..75cc5e50bd37 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -571,8 +571,14 @@ bl_find_get_deviceid(struct nfs_server *server,
>  	if (!node)
>  		return ERR_PTR(-ENODEV);
>
> -	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
> +	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) {
> +		struct pnfs_block_dev *d =
> +			container_of(node, struct pnfs_block_dev, node);
> +		if (d->pr_reg)
> +			if (d->pr_reg(d) < 0)
> +				goto out_put;
>  		return node;
> +	}
>
>  	end = jiffies;
>  	start = end - PNFS_DEVICE_RETRY_TIMEOUT;
> @@ -581,6 +587,7 @@ bl_find_get_deviceid(struct nfs_server *server,
>  		goto retry;
>  	}
>
> +out_put:
>  	nfs4_put_deviceid_node(node);
>  	return ERR_PTR(-ENODEV);
>  }
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index f1eeb4914199..8aabaf5218b8 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -116,6 +116,7 @@ struct pnfs_block_dev {
>
>  	bool (*map)(struct pnfs_block_dev *dev, u64 offset,
>  			struct pnfs_block_dev_map *map);
> +	int (*pr_reg)(struct pnfs_block_dev *dev);
>  };
>
>  /* sector_t fields are all in 512-byte sectors */
> diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
> index 356bc967fb5d..3d2401820ef4 100644
> --- a/fs/nfs/blocklayout/dev.c
> +++ b/fs/nfs/blocklayout/dev.c
> @@ -230,6 +230,26 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
>  	return true;
>  }
>
> +static int bl_register_scsi(struct pnfs_block_dev *d)
> +{
> +	struct block_device *bdev = file_bdev(d->bdev_file);
> +	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
> +	int error;
> +
> +	if (d->pr_registered)
> +		return 0;
> +
> +	error = ops->pr_register(bdev, 0, d->pr_key, true);
> +	if (error) {
> +		trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error);
> +		return -error;
> +	}
> +
> +	trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key);
> +	d->pr_registered = true;
> +	return 0;
> +}
> +
>  static int
>  bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
>  		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
> @@ -373,14 +393,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
>  		goto out_blkdev_put;
>  	}
>
> -	error = ops->pr_register(bdev, 0, d->pr_key, true);
> -	if (error) {
> -		trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error);
> -		goto out_blkdev_put;
> -	}
> -	trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key);
> -
> -	d->pr_registered = true;
> +	d->pr_reg = bl_register_scsi;
>  	return 0;
>
>  out_blkdev_put:
> -- 
> 2.45.1






[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux