From: Chuck Lever <chuck.lever@xxxxxxxxxx> During generic/069 runs with pNFS SCSI layouts, the NFS client emits the following in the system journal: kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2 kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00 kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 systemd[1]: fstests-generic-069.scope: Deactivated successfully. systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time. systemd[1]: media-test.mount: Deactivated successfully. systemd[1]: media-scratch.mount: Deactivated successfully. kernel: sd 6:0:0:1: reservation conflict kernel: failed to unregister PR key. This appears to be due to a race. bl_alloc_lseg() calls this: 561 static struct nfs4_deviceid_node * 562 bl_find_get_deviceid(struct nfs_server *server, 563 const struct nfs4_deviceid *id, const struct cred *cred, 564 gfp_t gfp_mask) 565 { 566 struct nfs4_deviceid_node *node; 567 unsigned long start, end; 568 569 retry: 570 node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); 571 if (!node) 572 return ERR_PTR(-ENODEV); nfs4_find_get_deviceid() does a lookup without the spin lock first. If it can't find a matching deviceid, it creates a new device_info (which calls bl_alloc_deviceid_node, and that registers the device's PR key). Then it takes the nfs4_deviceid_lock and looks up the deviceid again. If it finds it this time, bl_find_get_deviceid() frees the spare (new) device_info, which unregisters the PR key for the same device. Any subsequent I/O from this client on that device gets EBADE. The umount later unregisters the device's PR key again. To prevent this problem, register the PR key after the deviceid_node lookup. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- fs/nfs/blocklayout/blocklayout.c | 13 +++++++++-- fs/nfs/blocklayout/blocklayout.h | 8 ++++++- fs/nfs/blocklayout/dev.c | 39 +++++++++++++++++++++++--------- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..947b2c523440 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -564,6 +564,7 @@ bl_find_get_deviceid(struct nfs_server *server, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; + struct pnfs_block_dev *d; unsigned long start, end; retry: @@ -571,9 +572,16 @@ bl_find_get_deviceid(struct nfs_server *server, if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) - return node; + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) + goto transient; + d = container_of(node, struct pnfs_block_dev, node); + if (d->pr_register) + if (!d->pr_register(d)) + goto out_put; + return node; + +transient: end = jiffies; start = end - PNFS_DEVICE_RETRY_TIMEOUT; if (!time_in_range(node->timestamp_unavailable, start, end)) { @@ -581,6 +589,7 @@ bl_find_get_deviceid(struct nfs_server *server, goto retry; } +out_put: nfs4_put_deviceid_node(node); return ERR_PTR(-ENODEV); } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f1eeb4914199..cc788e8ce909 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -110,12 +110,18 @@ struct pnfs_block_dev { struct file *bdev_file; u64 disk_offset; + unsigned long flags; u64 pr_key; - bool pr_registered; bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); + bool (*pr_register)(struct pnfs_block_dev *dev); +}; + +/* pnfs_block_dev flag bits */ +enum { + PNFS_BDEV_REGISTERED = 0, }; /* sector_t fields are all in 512-byte sectors */ diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 519c310c745d..83753a08a19d 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -23,9 +23,9 @@ bl_free_device(struct pnfs_block_dev *dev) bl_free_device(&dev->children[i]); kfree(dev->children); } else { - if (dev->pr_registered) { - const struct pr_ops *ops = - file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops; + if (test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) { + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; int error; error = ops->pr_register(file_bdev(dev->bdev_file), @@ -226,6 +226,30 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, return true; } +/** + * bl_pr_register_scsi - Register a SCSI PR key for @d + * @dev: pNFS block device, key to register is already in @d->pr_key + * + * Returns true if the device's PR key is registered, otherwise false. + */ +static bool bl_pr_register_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return true; + + status = ops->pr_register(bdev, 0, dev->pr_key, true); + if (status) { + pr_err("pNFS: failed to register key for block device %s.", + bdev->bd_disk->disk_name); + return false; + } + return true; +} + static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); @@ -367,14 +391,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, goto out_blkdev_put; } - error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true); - if (error) { - pr_err("pNFS: failed to register key for block device %s.", - file_bdev(d->bdev_file)->bd_disk->disk_name); - goto out_blkdev_put; - } - - d->pr_registered = true; + d->pr_register = bl_pr_register_scsi; return 0; out_blkdev_put: -- 2.45.1