From: Chuck Lever <chuck.lever@xxxxxxxxxx> During generic/069 runs with pNFS SCSI layouts, the NFS client emits the following in the system journal: kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2 kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00 kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 systemd[1]: fstests-generic-069.scope: Deactivated successfully. systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time. systemd[1]: media-test.mount: Deactivated successfully. systemd[1]: media-scratch.mount: Deactivated successfully. kernel: sd 6:0:0:1: reservation conflict kernel: failed to unregister PR key. This appears to be due to a race. bl_alloc_lseg() calls this: 561 static struct nfs4_deviceid_node * 562 bl_find_get_deviceid(struct nfs_server *server, 563 const struct nfs4_deviceid *id, const struct cred *cred, 564 gfp_t gfp_mask) 565 { 566 struct nfs4_deviceid_node *node; 567 unsigned long start, end; 568 569 retry: 570 node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); 571 if (!node) 572 return ERR_PTR(-ENODEV); nfs4_find_get_deviceid() does a lookup without the spin lock first. If it can't find a matching deviceid, it creates a new device_info (which calls bl_alloc_deviceid_node, and that registers the device's PR key). Then it takes the nfs4_deviceid_lock and looks up the deviceid again. If it finds it this time, bl_find_get_deviceid() frees the spare (new) device_info, which unregisters the PR key for the same device. Any subsequent I/O from this client on that device gets EBADE. The umount later unregisters the device's PR key again. To prevent this problem, register the PR key after the deviceid_node lookup. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- fs/nfs/blocklayout/blocklayout.c | 9 ++++++++- fs/nfs/blocklayout/blocklayout.h | 1 + fs/nfs/blocklayout/dev.c | 29 +++++++++++++++++++++-------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..75cc5e50bd37 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -571,8 +571,14 @@ bl_find_get_deviceid(struct nfs_server *server, if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) { + struct pnfs_block_dev *d = + container_of(node, struct pnfs_block_dev, node); + if (d->pr_reg) + if (d->pr_reg(d) < 0) + goto out_put; return node; + } end = jiffies; start = end - PNFS_DEVICE_RETRY_TIMEOUT; @@ -581,6 +587,7 @@ bl_find_get_deviceid(struct nfs_server *server, goto retry; } +out_put: nfs4_put_deviceid_node(node); return ERR_PTR(-ENODEV); } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f1eeb4914199..8aabaf5218b8 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -116,6 +116,7 @@ struct pnfs_block_dev { bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); + int (*pr_reg)(struct pnfs_block_dev *dev); }; /* sector_t fields are all in 512-byte sectors */ diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 356bc967fb5d..3d2401820ef4 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -230,6 +230,26 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, return true; } +static int bl_register_scsi(struct pnfs_block_dev *d) +{ + struct block_device *bdev = file_bdev(d->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int error; + + if (d->pr_registered) + return 0; + + error = ops->pr_register(bdev, 0, d->pr_key, true); + if (error) { + trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error); + return -error; + } + + trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key); + d->pr_registered = true; + return 0; +} + static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); @@ -373,14 +393,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, goto out_blkdev_put; } - error = ops->pr_register(bdev, 0, d->pr_key, true); - if (error) { - trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error); - goto out_blkdev_put; - } - trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key); - - d->pr_registered = true; + d->pr_reg = bl_register_scsi; return 0; out_blkdev_put: -- 2.45.1