The 01/29/2021 04:46, Damien Le Moal wrote: > On 2021/01/29 2:23, Sergei Shtepa wrote: > > Implement a block interposer for device-mapper to attach > > to an existing block layer stack. Using the interposer, > > we can connect the dm-linear to a device with a mounted > > file system. > > > > changes: > > * the new dm_interposer structure contains blk_interposer > > to intercept bio from the interposed disk and interval tree > > of block devices on this disk. > > * the new interval tree for device mapper. > > * the dm_submit_bio_interposer_fn() function implements > > the bio interception logic. > > * the functions dm_interposer_attach_dev() & > > dm_interposer_detach_dev() allow to attach and detach devices > > to dm_interposer. > > * the new parameter 'noexcl' allows to create dm-linear to device > > with an already mounted file system. > > * the non_exclusive parameter in dm_target structure - it`s a sign > > that target device should be opened without FMODE_EXCL mode. > > * the new ioctl IOCTL_DEV_REMAP allow to attach dm device to > > a regular block device. > > Same comment about changelog as in the previous patch. > > > Signed-off-by: Sergei Shtepa <sergei.shtepa@xxxxxxxxx> > > --- > > drivers/md/dm-core.h | 46 +++- > > drivers/md/dm-ioctl.c | 39 ++++ > > drivers/md/dm-linear.c | 17 +- > > drivers/md/dm-table.c | 12 +- > > drivers/md/dm.c | 383 ++++++++++++++++++++++++++++++++-- > > drivers/md/dm.h | 2 +- > > include/linux/device-mapper.h | 7 + > > include/uapi/linux/dm-ioctl.h | 15 +- > > 8 files changed, 493 insertions(+), 28 deletions(-) > > > > diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h > > index 086d293c2b03..0f870b1d4be4 100644 > > --- a/drivers/md/dm-core.h > > +++ b/drivers/md/dm-core.h > > @@ -13,7 +13,7 @@ > > #include <linux/ktime.h> > > #include <linux/genhd.h> > > #include <linux/blk-mq.h> > > - > > whiteline change. > > > +#include <linux/rbtree.h> > > #include <trace/events/block.h> I don't see any problem in the fact that a new include appeared instead of whiteline. It doesn't make sense to split the include section by whiteline. > > > > #include "dm.h" > > @@ -109,6 +109,9 @@ struct mapped_device { > > bool init_tio_pdu:1; > > > > struct srcu_struct io_barrier; > > + > > + /* interposer device for remap */ > > + struct dm_interposed_dev *ip_dev; > > }; > > > > void disable_discard(struct mapped_device *md); > > @@ -164,6 +167,47 @@ struct dm_table { > > struct dm_md_mempools *mempools; > > }; > > > > +/* > > + * Interval tree for device mapper > > + */ > > +struct dm_rb_range { > > + struct rb_node node; > > + sector_t start; /* start sector of rb node */ > > + sector_t last; /* end sector of rb node */ > > + sector_t _subtree_last; /* highest sector in subtree of rb node */ > > +}; > > + > > +void dm_rb_insert(struct dm_rb_range *node, struct rb_root_cached *root); > > +void dm_rb_remove(struct dm_rb_range *node, struct rb_root_cached *root); > > + > > +struct dm_rb_range *dm_rb_iter_first(struct rb_root_cached *root, sector_t start, sector_t last); > > +struct dm_rb_range *dm_rb_iter_next(struct dm_rb_range *node, sector_t start, sector_t last); > > + > > +/* > > + * For connecting blk_interposer and dm-targets devices. > > Is this comment about the callback or the structure ? I think the latter, so it > is in the worng place. Please also add a comment for the callback definition > explaining what it should be doing. Ok. > > > + */ > > +typedef void (*dm_interpose_bio_t) (void *context, struct dm_rb_range *node, struct bio *bio); > > + > > +struct dm_interposed_dev { > > + struct gendisk *disk; > > + struct dm_rb_range node; > > + void *context; > > + dm_interpose_bio_t dm_interpose_bio; > > + > > + atomic64_t ip_cnt; /*for debug purpose*/ > > +}; > > + > > +struct dm_interposed_dev *dm_interposer_new_dev(struct gendisk *disk, > > + sector_t ofs, sector_t len, > > + void *context, > > + dm_interpose_bio_t dm_interpose_bio_t); > > +void dm_interposer_free_dev(struct dm_interposed_dev *ip_dev); > > +int dm_interposer_attach_dev(struct dm_interposed_dev *ip_dev); > > +int dm_interposer_detach_dev(struct dm_interposed_dev *ip_dev); > > + > > +int dm_remap_install(struct mapped_device *md, const char *donor_device_name); > > +int dm_remap_uninstall(struct mapped_device *md); > > + > > static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) > > { > > return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; > > diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c > > index 5e306bba4375..2944d442c256 100644 > > --- a/drivers/md/dm-ioctl.c > > +++ b/drivers/md/dm-ioctl.c > > @@ -1649,6 +1649,44 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para > > return r; > > } > > > > +static inline int dev_remap_start(struct mapped_device *md, uint8_t *params) > > +{ > > + char *donor_device_name = (char *)params; > > + > > + return dm_remap_install(md, donor_device_name); > > +} > > +static int dev_remap_finish(struct mapped_device *md) > > +{ > > + return dm_remap_uninstall(md); > > +} > > + > > +static int dev_remap(struct file *filp, struct dm_ioctl *param, size_t param_size) > > +{ > > + int ret = 0; > > + struct mapped_device *md; > > + void *bin_data; > > + struct dm_remap_param *remap_param; > > + > > + md = find_device(param); > > + if (!md) > > + return -ENXIO; > > + > > + bin_data = (void *)(param) + param->data_start; > > + remap_param = bin_data; > > + > > + if (remap_param->cmd == REMAP_START_CMD) > > + ret = dev_remap_start(md, remap_param->params); > > + else if (remap_param->cmd == REMAP_FINISH_CMD) > > + ret = dev_remap_finish(md); > > + else { > > + DMWARN("Invalid remap command, %d", remap_param->cmd); > > + ret = -EINVAL; > > + } > > + > > + dm_put(md); > > + return ret; > > +} > > + > > /* > > * The ioctl parameter block consists of two parts, a dm_ioctl struct > > * followed by a data buffer. This flag is set if the second part, > > @@ -1691,6 +1729,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) > > {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}, > > {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll}, > > {DM_GET_TARGET_VERSION, 0, get_target_version}, > > + {DM_DEV_REMAP_CMD, 0, dev_remap}, > > }; > > > > if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) > > diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c > > index 00774b5d7668..ffb8b5ca4d10 100644 > > --- a/drivers/md/dm-linear.c > > +++ b/drivers/md/dm-linear.c > > @@ -28,12 +28,13 @@ struct linear_c { > > */ > > static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) > > { > > + fmode_t mode; > > struct linear_c *lc; > > unsigned long long tmp; > > char dummy; > > int ret; > > > > - if (argc != 2) { > > + if ((argc < 2) || (argc > 3)) { > > ti->error = "Invalid argument count"; > > return -EINVAL; > > } > > @@ -51,7 +52,19 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) > > } > > lc->start = tmp; > > > > - ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev); > > + ti->non_exclusive = false; > > + if (argc > 2) { > > + if (strcmp("noexcl", argv[2]) == 0) > > + ti->non_exclusive = true; > > + else if (strcmp("excl", argv[2]) == 0) > > + ti->non_exclusive = false; > > It already is false. Yes, and even the value of the "excl" parameter is redundant, since it defines the default value. I think this code structure more clearly reflects the meaning of the parameter. > > > + else { > > + ti->error = "Invalid exclusive option"; > > + return -EINVAL; > > + } > > + } > > + > > + ret = dm_get_device(ti, argv[0], mode, &lc->dev); > > Where is mode initialized ? Why remove dm_table_get_mode(ti->table) ? Yes. It`s a bug. In this plaсe should be dm_table_get_mode(). > > > if (ret) { > > ti->error = "Device lookup failed"; > > goto bad; > > I would prefer to see this change to dm-linear in its own patch, following this > one, with a clear explanation in the commit message how this change relates to > interposer since the explanation for this "exclusive" change is nowhere to be > seen. Also please check if there is a file describing dm-linear options under > Documentation/ (I can't remember if there is one). If there is one, it will need > to be updated too. It's a good idea. > > > diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c > > index 4acf2342f7ad..f15bc2171f25 100644 > > --- a/drivers/md/dm-table.c > > +++ b/drivers/md/dm-table.c > > @@ -322,7 +322,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, > > * device and not to touch the existing bdev field in case > > * it is accessed concurrently. > > */ > > -static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, > > +static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, bool non_exclusive, > > struct mapped_device *md) > > { > > int r; > > @@ -330,7 +330,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, > > > > old_dev = dd->dm_dev; > > > > - r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, > > + r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, non_exclusive, > > dd->dm_dev->mode | new_mode, &new_dev); > > if (r) > > return r; > > @@ -387,7 +387,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, > > if (!dd) > > return -ENOMEM; > > > > - if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) { > > + r = dm_get_table_device(t->md, dev, mode, ti->non_exclusive, &dd->dm_dev); > > + if (r) { > > kfree(dd); > > return r; > > } > > @@ -396,8 +397,9 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, > > list_add(&dd->list, &t->devices); > > goto out; > > > > - } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { > > - r = upgrade_mode(dd, mode, t->md); > > + } else if ((dd->dm_dev->mode != (mode | dd->dm_dev->mode)) && > > + (dd->dm_dev->non_exclusive != ti->non_exclusive)) { > > + r = upgrade_mode(dd, mode, ti->non_exclusive, t->md); > > if (r) > > return r; > > } > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > > index 7bac564f3faa..3b871d98b7b6 100644 > > --- a/drivers/md/dm.c > > +++ b/drivers/md/dm.c > > @@ -28,6 +28,7 @@ > > #include <linux/refcount.h> > > #include <linux/part_stat.h> > > #include <linux/blk-crypto.h> > > +#include <linux/interval_tree_generic.h> > > > > #define DM_MSG_PREFIX "core" > > > > @@ -56,6 +57,8 @@ static struct workqueue_struct *deferred_remove_workqueue; > > atomic_t dm_global_event_nr = ATOMIC_INIT(0); > > DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); > > > > +static DEFINE_MUTEX(interposer_mutex); /* synchronizing access to blk_interposer */ > > Why not dm_interposer_mutex as the name ? And the comment is not very useful: a > mutex is always for synchronizing :) Right. I'll do it. > > > + > > void dm_issue_global_event(void) > > { > > atomic_inc(&dm_global_event_nr); > > @@ -162,6 +165,26 @@ struct table_device { > > struct dm_dev dm_dev; > > }; > > > > +/* > > + * Device mapper`s interposer. > > + */ > > +struct dm_interposer { > > + struct blk_interposer blk_ip; > > + struct mapped_device *md; > > + > > + struct kref kref; > > + struct rw_semaphore ip_devs_lock; > > + struct rb_root_cached ip_devs_root; /* dm_interposed_dev tree */ > > +}; > > + > > +/* > > + * Interval tree for device mapper > > + */ > > +#define START(node) ((node)->start) > > +#define LAST(node) ((node)->last) > > +INTERVAL_TREE_DEFINE(struct dm_rb_range, node, sector_t, _subtree_last, > > + START, LAST,, dm_rb); > > + > > /* > > * Bio-based DM's mempools' reserved IOs set by the user. > > */ > > @@ -733,28 +756,340 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) > > rcu_read_unlock(); > > } > > > > +static void dm_submit_bio_interposer_fn(struct bio *bio) > > +{ > > + struct dm_interposer *ip; > > + unsigned int noio_flag = 0; > > + sector_t start; > > + sector_t last; > > + struct dm_rb_range *node; > > + > > + ip = container_of(bio->bi_disk->interposer, struct dm_interposer, blk_ip); > > + start = bio->bi_iter.bi_sector; > > + last = start + dm_sector_div_up(bio->bi_iter.bi_size, SECTOR_SIZE); > > + > > + noio_flag = memalloc_noio_save(); > > + down_read(&ip->ip_devs_lock); > > + node = dm_rb_iter_first(&ip->ip_devs_root, start, last); > > + while (node) { > > + struct dm_interposed_dev *ip_dev = > > + container_of(node, struct dm_interposed_dev, node); > > + > > + atomic64_inc(&ip_dev->ip_cnt); > > + ip_dev->dm_interpose_bio(ip_dev->context, node, bio); > > + > > + node = dm_rb_iter_next(node, start, last); > > + } > > + up_read(&ip->ip_devs_lock); > > + memalloc_noio_restore(noio_flag); > > +} > > + > > +static void free_interposer(struct kref *kref) > > +{ > > + struct dm_interposer *ip = container_of(kref, struct dm_interposer, kref); > > + > > + blk_interposer_detach(&ip->blk_ip, dm_submit_bio_interposer_fn); > > No queue freeze ? Yes. The queue should be already freeze. > > > + > > + kfree(ip); > > +} > > + > > +static struct dm_interposer *new_interposer(struct gendisk *disk) > > +{ > > + int ret = 0; > > + struct dm_interposer *ip; > > + > > + ip = kzalloc(sizeof(struct dm_interposer), GFP_NOIO); > > + if (!ip) > > + return ERR_PTR(-ENOMEM); > > + > > + kref_init(&ip->kref); > > + init_rwsem(&ip->ip_devs_lock); > > + ip->ip_devs_root = RB_ROOT_CACHED; > > + > > + ret = blk_interposer_attach(disk, &ip->blk_ip, dm_submit_bio_interposer_fn); > > No queue freeze ? Yes, again. > > > + if (ret) { > > + DMERR("Failed to attack blk_interposer"); > > + kref_put(&ip->kref, free_interposer); > > + return ERR_PTR(ret); > > + } > > + > > + return ip; > > +} > > + > > +static struct dm_interposer *get_interposer(struct gendisk *disk) > > +{ > > + struct dm_interposer *ip; > > + > > + if (!blk_has_interposer(disk)) > > + return NULL; > > + > > + if (disk->interposer->ip_submit_bio != dm_submit_bio_interposer_fn) { > > + DMERR("Disks interposer slot already occupied."); > > + return ERR_PTR(-EBUSY); > > This is weird... If there is an interposer, why not get a ref on that one. That > is what the function name suggests at least. Getting a ref on that just below in this function. But the name "get_dm_interposer" would be better. > > > + } > > + > > + ip = container_of(disk->interposer, struct dm_interposer, blk_ip); > > + > > + kref_get(&ip->kref); > > + return ip; > > +} > > + > > +struct dm_interposed_dev *dm_interposer_new_dev(struct gendisk *disk, sector_t ofs, sector_t len, > > + void *context, dm_interpose_bio_t dm_interpose_bio) > > +{ > > + sector_t start = ofs; > > + sector_t last = ofs + len - 1; > > + struct dm_interposed_dev *ip_dev = NULL; > > + > > + /* Allocate new ip_dev */ > > + ip_dev = kzalloc(sizeof(struct dm_interposed_dev), GFP_KERNEL); > > + if (!ip_dev) > > + return NULL; > > + > > + ip_dev->disk = disk; > > + ip_dev->node.start = start; > > + ip_dev->node.last = last; > > + > > + ip_dev->context = context; > > + ip_dev->dm_interpose_bio = dm_interpose_bio; > > + > > + atomic64_set(&ip_dev->ip_cnt, 0); > > + > > + return ip_dev; > > +} > > + > > +void dm_interposer_free_dev(struct dm_interposed_dev *ip_dev) > > +{ > > + kfree(ip_dev); > > +} > > Make this inline may be ? Yes. Or even remove this function. > > > + > > +static inline void dm_disk_freeze(struct gendisk *disk) > > +{ > > + blk_mq_freeze_queue(disk->queue); > > + blk_mq_quiesce_queue(disk->queue); > > I think you can replace this with blk_mq_freeze_queue_wait(). I think no. blk_freeze_queue_start() also is required. > > > +} > > + > > +static inline void dm_disk_unfreeze(struct gendisk *disk) > > +{ > > + blk_mq_unquiesce_queue(disk->queue); > > + blk_mq_unfreeze_queue(disk->queue); > > +} > > + > > +int dm_interposer_attach_dev(struct dm_interposed_dev *ip_dev) > > +{ > > + int ret = 0; > > + struct dm_interposer *ip = NULL; > > + unsigned int noio_flag = 0; > > + > > + if (!ip_dev) > > + return -EINVAL; > > + > > + dm_disk_freeze(ip_dev->disk); > > + mutex_lock(&interposer_mutex); > > + noio_flag = memalloc_noio_save(); > > + > > + ip = get_interposer(ip_dev->disk); > > + if (ip == NULL) > > + ip = new_interposer(ip_dev->disk); > > + if (IS_ERR(ip)) { > > + ret = PTR_ERR(ip); > > + goto out; > > + } > > + > > + /* Attach dm_interposed_dev to dm_interposer */ > > + down_write(&ip->ip_devs_lock); > > + do { > > + struct dm_rb_range *node; > > + > > + /* checking that ip_dev already exists for this region */ > > + node = dm_rb_iter_first(&ip->ip_devs_root, ip_dev->node.start, ip_dev->node.last); > > + if (node) { > > + DMERR("Disk part form [%llu] to [%llu] already have interposer", > > + node->start, node->last); > > + > > + ret = -EBUSY; > > + break; > > + } > > + > > + /* insert ip_dev to ip tree */ > > + dm_rb_insert(&ip_dev->node, &ip->ip_devs_root); > > + /* increment ip reference counter */ > > + kref_get(&ip->kref); > > + } while (false); > > + up_write(&ip->ip_devs_lock); > > + > > + kref_put(&ip->kref, free_interposer); > > + > > +out: > > + memalloc_noio_restore(noio_flag); > > + mutex_unlock(&interposer_mutex); > > + dm_disk_unfreeze(ip_dev->disk); > > + > > + return ret; > > +} > > + > > +int dm_interposer_detach_dev(struct dm_interposed_dev *ip_dev) > > +{ > > + int ret = 0; > > + struct dm_interposer *ip = NULL; > > + unsigned int noio_flag = 0; > > + > > + if (!ip_dev) > > + return -EINVAL; > > + > > + dm_disk_freeze(ip_dev->disk); > > + mutex_lock(&interposer_mutex); > > + noio_flag = memalloc_noio_save(); > > + > > + ip = get_interposer(ip_dev->disk); > > + if (IS_ERR(ip)) { > > + ret = PTR_ERR(ip); > > + DMERR("Interposer not found"); > > + goto out; > > + } > > + if (unlikely(ip == NULL)) { > > + ret = -ENXIO; > > + DMERR("Interposer not found"); > > + goto out; > > + } > > + > > + down_write(&ip->ip_devs_lock); > > + do { > > + dm_rb_remove(&ip_dev->node, &ip->ip_devs_root); > > + /* the reference counter here cannot be zero */ > > + kref_put(&ip->kref, free_interposer); > > + > > + } while (false); > > + up_write(&ip->ip_devs_lock); > > + > > + /* detach and free interposer if it`s not needed */ > > s/`/'/ Thanks. It's my problem. > > > + kref_put(&ip->kref, free_interposer); > > +out: > > + memalloc_noio_restore(noio_flag); > > + mutex_unlock(&interposer_mutex); > > + dm_disk_unfreeze(ip_dev->disk); > > + > > + return ret; > > +} > > + > > +static void dm_remap_fn(void *context, struct dm_rb_range *node, struct bio *bio) > > +{ > > + struct mapped_device *md = context; > > + > > + /* Set acceptor device. */ > > + bio->bi_disk = md->disk; > > + > > + /* Remap disks offset */ > > + bio->bi_iter.bi_sector -= node->start; > > + > > + /* > > + * bio should be resubmitted. > > + * We can just add bio to bio_list of the current process. > > + * current->bio_list must be initialized when this function is called. > > + * If call submit_bio_noacct(), the bio will be checked twice. > > + */ > > + BUG_ON(!current->bio_list); > > + bio_list_add(¤t->bio_list[0], bio); > > +} > > + > > +int dm_remap_install(struct mapped_device *md, const char *donor_device_name) > > +{ > > + int ret = 0; > > + struct block_device *donor_bdev; > > + fmode_t mode = FMODE_READ | FMODE_WRITE; > > + > > + DMDEBUG("Dm remap install for mapped device %s and donor device %s", > > + md->name, donor_device_name); > > + > > + donor_bdev = blkdev_get_by_path(donor_device_name, mode, "device-mapper remap"); > > + if (IS_ERR(donor_bdev)) { > > + DMERR("Cannot open device [%s]", donor_device_name); > > + return PTR_ERR(donor_bdev); > > + } > > + > > + do { > > + sector_t ofs = get_start_sect(donor_bdev); > > + sector_t len = bdev_nr_sectors(donor_bdev); > > + > > + md->ip_dev = dm_interposer_new_dev(donor_bdev->bd_disk, ofs, len, md, dm_remap_fn); > > + if (!md->ip_dev) { > > + ret = -ENOMEM; > > + break; > > + } > > + > > + DMDEBUG("New interposed device 0x%p", md->ip_dev); > > + ret = dm_interposer_attach_dev(md->ip_dev); > > + if (ret) { > > + dm_interposer_free_dev(md->ip_dev); > > + > > + md->ip_dev = NULL; > > + DMERR("Failed to attach dm interposer"); > > + break; > > + } > > + > > + DMDEBUG("Attached successfully."); > > + } while (false); > > + > > + blkdev_put(donor_bdev, mode); > > + > > + return ret; > > +} > > + > > +int dm_remap_uninstall(struct mapped_device *md) > > +{ > > + int ret = 0; > > + > > + DMDEBUG("Dm remap uninstall for mapped device %s ip_dev=0x%p", md->name, md->ip_dev); > > + > > + if (!md->ip_dev) { > > + DMERR("Cannot detach dm interposer"); > > + return -EINVAL; > > + } > > + > > + ret = dm_interposer_detach_dev(md->ip_dev); > > + if (ret) { > > + DMERR("Failed to detach dm interposer"); > > + return ret; > > + } > > + > > + DMDEBUG("Detached successfully. %llu bios was interposed", > > + atomic64_read(&md->ip_dev->ip_cnt)); > > + dm_interposer_free_dev(md->ip_dev); > > + md->ip_dev = NULL; > > + > > + return 0; > > +} > > + > > static char *_dm_claim_ptr = "I belong to device-mapper"; > > > > /* > > * Open a table device so we can use it as a map destination. > > */ > > static int open_table_device(struct table_device *td, dev_t dev, > > - struct mapped_device *md) > > + struct mapped_device *md, bool non_exclusive) > > { > > struct block_device *bdev; > > - > > - int r; > > + int ret; > > > > BUG_ON(td->dm_dev.bdev); > > > > - bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); > > - if (IS_ERR(bdev)) > > - return PTR_ERR(bdev); > > + if (non_exclusive) > > + bdev = blkdev_get_by_dev(dev, td->dm_dev.mode, NULL); > > + else > > + bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); > > > > - r = bd_link_disk_holder(bdev, dm_disk(md)); > > - if (r) { > > - blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); > > - return r; > > + if (IS_ERR(bdev)) { > > + ret = PTR_ERR(bdev); > > + if (ret != -EBUSY) > > + return ret; > > + } > > + > > + if (!non_exclusive) { > > + ret = bd_link_disk_holder(bdev, dm_disk(md)); > > + if (ret) { > > + blkdev_put(bdev, td->dm_dev.mode); > > + return ret; > > + } > > } > > > > td->dm_dev.bdev = bdev; > > @@ -770,33 +1105,38 @@ static void close_table_device(struct table_device *td, struct mapped_device *md > > if (!td->dm_dev.bdev) > > return; > > > > - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); > > - blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); > > + if (td->dm_dev.mode & FMODE_EXCL) > > + bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); > > + > > + blkdev_put(td->dm_dev.bdev, td->dm_dev.mode); > > + > > put_dax(td->dm_dev.dax_dev); > > td->dm_dev.bdev = NULL; > > td->dm_dev.dax_dev = NULL; > > } > > > > static struct table_device *find_table_device(struct list_head *l, dev_t dev, > > - fmode_t mode) > > + fmode_t mode, bool non_exclusive) > > { > > struct table_device *td; > > > > list_for_each_entry(td, l, list) > > - if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) > > + if (td->dm_dev.bdev->bd_dev == dev && > > + td->dm_dev.mode == mode && > > + td->dm_dev.non_exclusive == non_exclusive) > > return td; > > > > return NULL; > > } > > > > -int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, > > +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, bool non_exclusive, > > struct dm_dev **result) > > { > > int r; > > struct table_device *td; > > > > mutex_lock(&md->table_devices_lock); > > - td = find_table_device(&md->table_devices, dev, mode); > > + td = find_table_device(&md->table_devices, dev, mode, non_exclusive); > > if (!td) { > > td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); > > if (!td) { > > @@ -807,7 +1147,8 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, > > td->dm_dev.mode = mode; > > td->dm_dev.bdev = NULL; > > > > - if ((r = open_table_device(td, dev, md))) { > > + r = open_table_device(td, dev, md, non_exclusive); > > + if (r) { > > mutex_unlock(&md->table_devices_lock); > > kfree(td); > > return r; > > @@ -2182,6 +2523,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait) > > > > might_sleep(); > > > > + if (md->ip_dev) { > > + if (dm_interposer_detach_dev(md->ip_dev)) > > + DMERR("Failed to detach dm interposer"); > > + > > + dm_interposer_free_dev(md->ip_dev); > > + md->ip_dev = NULL; > > + } > > + > > spin_lock(&_minor_lock); > > idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); > > set_bit(DMF_FREEING, &md->flags); > > diff --git a/drivers/md/dm.h b/drivers/md/dm.h > > index fffe1e289c53..7bf20fb2de74 100644 > > --- a/drivers/md/dm.h > > +++ b/drivers/md/dm.h > > @@ -179,7 +179,7 @@ int dm_open_count(struct mapped_device *md); > > int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred); > > int dm_cancel_deferred_remove(struct mapped_device *md); > > int dm_request_based(struct mapped_device *md); > > -int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, > > +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, bool non_exclusive, > > struct dm_dev **result); > > void dm_put_table_device(struct mapped_device *md, struct dm_dev *d); > > > > diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h > > index 61a66fb8ebb3..70002363bfc0 100644 > > --- a/include/linux/device-mapper.h > > +++ b/include/linux/device-mapper.h > > @@ -150,6 +150,7 @@ struct dm_dev { > > struct block_device *bdev; > > struct dax_device *dax_dev; > > fmode_t mode; > > + bool non_exclusive; > > char name[16]; > > }; > > > > @@ -325,6 +326,12 @@ struct dm_target { > > * whether or not its underlying devices have support. > > */ > > bool discards_supported:1; > > + > > + /* > > + * Set if this target needs to open device without FMODE_EXCL > > + * mode. > > + */ > > + bool non_exclusive:1; > > }; > > > > void *dm_per_bio_data(struct bio *bio, size_t data_size); > > diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h > > index 4933b6b67b85..08d7dbff80f4 100644 > > --- a/include/uapi/linux/dm-ioctl.h > > +++ b/include/uapi/linux/dm-ioctl.h > > @@ -214,6 +214,15 @@ struct dm_target_msg { > > char message[0]; > > }; > > > > +enum { > > + REMAP_START_CMD = 1, > > + REMAP_FINISH_CMD, > > +}; > > + > > +struct dm_remap_param { > > + uint8_t cmd; > > + uint8_t params[0]; > > +}; > > /* > > * If you change this make sure you make the corresponding change > > * to dm-ioctl.c:lookup_ioctl() > > @@ -244,6 +253,7 @@ enum { > > DM_DEV_SET_GEOMETRY_CMD, > > DM_DEV_ARM_POLL_CMD, > > DM_GET_TARGET_VERSION_CMD, > > + DM_DEV_REMAP_CMD > > }; > > > > #define DM_IOCTL 0xfd > > @@ -259,6 +269,7 @@ enum { > > #define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) > > #define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl) > > #define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl) > > +#define DM_DEV_REMAP _IOWR(DM_IOCTL, DM_DEV_REMAP_CMD, struct dm_ioctl) > > > > #define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl) > > #define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl) > > @@ -272,9 +283,9 @@ enum { > > #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) > > > > #define DM_VERSION_MAJOR 4 > > -#define DM_VERSION_MINOR 43 > > +#define DM_VERSION_MINOR 44 > > #define DM_VERSION_PATCHLEVEL 0 > > -#define DM_VERSION_EXTRA "-ioctl (2020-10-01)" > > +#define DM_VERSION_EXTRA "-ioctl (2020-12-25)" > > > > /* Status bits */ > > #define DM_READONLY_FLAG (1 << 0) /* In/Out */ > > > > > -- > Damien Le Moal > Western Digital Research > -- Sergei Shtepa Veeam Software developer.