On Sun, Jun 23, 2024 at 09:36:27AM +0200, Christoph Hellwig wrote: > On Sat, Jun 22, 2024 at 01:26:10PM -0400, Chuck Lever wrote: > > This patch currently adds the pr_reg callback to > > bl_find_get_deviceid(), which has no visibility of the volume > > hierarchy. Where should the registration be done instead? I'm > > missing something. > > Something like the patch below (untested Sunday morning coding) should > do the trick: > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > index 947b2c52344097..6db54b215066e0 100644 > --- a/fs/nfs/blocklayout/blocklayout.c > +++ b/fs/nfs/blocklayout/blocklayout.c > @@ -564,34 +564,32 @@ bl_find_get_deviceid(struct nfs_server *server, > gfp_t gfp_mask) > { > struct nfs4_deviceid_node *node; > - struct pnfs_block_dev *d; > - unsigned long start, end; > + int err = -ENODEV; > > retry: > node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); > if (!node) > return ERR_PTR(-ENODEV); > > - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) > - goto transient; > + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { > + unsigned long end = jiffies; > + unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT; > > - d = container_of(node, struct pnfs_block_dev, node); > - if (d->pr_register) > - if (!d->pr_register(d)) > - goto out_put; > - return node; > - > -transient: > - end = jiffies; > - start = end - PNFS_DEVICE_RETRY_TIMEOUT; > - if (!time_in_range(node->timestamp_unavailable, start, end)) { > - nfs4_delete_deviceid(node->ld, node->nfs_client, id); > - goto retry; > + if (!time_in_range(node->timestamp_unavailable, start, end)) { > + nfs4_delete_deviceid(node->ld, node->nfs_client, id); > + goto retry; > + } > + goto out_put; > } > > + err = bl_register_dev(container_of(node, struct pnfs_block_dev, node)); > + if (err) > + goto out_put; > + return node; > + > out_put: > nfs4_put_deviceid_node(node); > - return ERR_PTR(-ENODEV); > + return ERR_PTR(err); > } > > static int > diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h > index cc788e8ce90933..7efbef9d10dba8 100644 > --- a/fs/nfs/blocklayout/blocklayout.h > +++ b/fs/nfs/blocklayout/blocklayout.h > @@ -104,6 +104,7 @@ struct pnfs_block_dev { > u64 start; > u64 len; > > + enum pnfs_block_volume_type type; > u32 nr_children; > struct pnfs_block_dev *children; > u64 chunk_size; > @@ -116,7 +117,6 @@ struct pnfs_block_dev { > > bool (*map)(struct pnfs_block_dev *dev, u64 offset, > struct pnfs_block_dev_map *map); > - bool (*pr_register)(struct pnfs_block_dev *dev); > }; > > /* pnfs_block_dev flag bits */ > @@ -178,6 +178,7 @@ struct bl_msg_hdr { > #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ > > /* dev.c */ > +int bl_register_dev(struct pnfs_block_dev *d); > struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, > struct pnfs_device *pdev, gfp_t gfp_mask); > void bl_free_deviceid_node(struct nfs4_deviceid_node *d); > diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c > index 16fb64d4af31db..72e061e87e145a 100644 > --- a/fs/nfs/blocklayout/dev.c > +++ b/fs/nfs/blocklayout/dev.c > @@ -13,9 +13,74 @@ > > #define NFSDBG_FACILITY NFSDBG_PNFS_LD > > +static void bl_unregister_scsi(struct pnfs_block_dev *dev) > +{ > + struct block_device *bdev = file_bdev(dev->bdev_file); > + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; > + > + if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) > + return; > + > + if (ops->pr_register(bdev, dev->pr_key, 0, false)) > + pr_err("failed to unregister PR key.\n"); > +} > + > +static bool bl_register_scsi(struct pnfs_block_dev *dev) > +{ > + struct block_device *bdev = file_bdev(dev->bdev_file); > + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; > + int status; > + > + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) > + return true; > + > + status = ops->pr_register(bdev, 0, dev->pr_key, true); > + if (status) { > + pr_err("pNFS: failed to register key for block device %s.", > + bdev->bd_disk->disk_name); > + return false; > + } > + return true; > +} > + > +static void bl_unregister_dev(struct pnfs_block_dev *dev) > +{ > + if (dev->nr_children) { > + for (u32 i = 0; i < dev->nr_children; i++) > + bl_unregister_dev(&dev->children[i]); > + return; > + } > + > + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) > + bl_unregister_scsi(dev); > +} > + > +int bl_register_dev(struct pnfs_block_dev *dev) > +{ > + if (dev->nr_children) { > + for (u32 i = 0; i < dev->nr_children; i++) { > + int ret = bl_register_dev(&dev->children[i]); > + > + if (ret) { > + while (i > 0) > + bl_unregister_dev(&dev->children[--i]); > + return ret; > + } > + } > + > + return 0; > + } > + > + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) > + return bl_register_scsi(dev); > + return 0; > +} > + > static void > bl_free_device(struct pnfs_block_dev *dev) > { > + bl_unregister_dev(dev); > + > if (dev->nr_children) { > int i; > > @@ -23,17 +88,6 @@ bl_free_device(struct pnfs_block_dev *dev) > bl_free_device(&dev->children[i]); > kfree(dev->children); > } else { > - if (test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) { > - struct block_device *bdev = file_bdev(dev->bdev_file); > - const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; > - int error; > - > - error = ops->pr_register(file_bdev(dev->bdev_file), > - dev->pr_key, 0, false); > - if (error) > - pr_err("failed to unregister PR key.\n"); > - } > - > if (dev->bdev_file) > fput(dev->bdev_file); > } > @@ -226,30 +280,6 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, > return true; > } > > -/** > - * bl_pr_register_scsi - Register a SCSI PR key for @d > - * @dev: pNFS block device, key to register is already in @d->pr_key > - * > - * Returns true if the device's PR key is registered, otherwise false. > - */ > -static bool bl_pr_register_scsi(struct pnfs_block_dev *dev) > -{ > - struct block_device *bdev = file_bdev(dev->bdev_file); > - const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; > - int status; > - > - if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) > - return true; > - > - status = ops->pr_register(bdev, 0, dev->pr_key, true); > - if (status) { > - pr_err("pNFS: failed to register key for block device %s.", > - bdev->bd_disk->disk_name); > - return false; > - } > - return true; > -} > - > static int > bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, > struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); > @@ -392,7 +422,6 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, > goto out_blkdev_put; > } > > - d->pr_register = bl_pr_register_scsi; > return 0; > > out_blkdev_put: > @@ -478,7 +507,9 @@ static int > bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, > struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) > { > - switch (volumes[idx].type) { > + d->type = volumes[idx].type; > + > + switch (d->type) { > case PNFS_BLOCK_VOLUME_SIMPLE: > return bl_parse_simple(server, d, volumes, idx, gfp_mask); > case PNFS_BLOCK_VOLUME_SLICE: > @@ -490,7 +521,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, > case PNFS_BLOCK_VOLUME_SCSI: > return bl_parse_scsi(server, d, volumes, idx, gfp_mask); > default: > - dprintk("unsupported volume type: %d\n", volumes[idx].type); > + dprintk("unsupported volume type: %d\n", d->type); > return -EIO; > } > } Thanks. I've applied this as a separate patch (I can squash it into 1/4 once it passes testing). The first write I/O segfaults at L143 in do_add_page_to_bio() : 142 if (!offset_in_map(disk_addr, map)) { 143 if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map)) 144 return ERR_PTR(-EIO); I'm looking into it. -- Chuck Lever