On Tue, Mar 16, 2021 at 03:23:14PM +0000, Christoph Hellwig wrote: > On Mon, Mar 15, 2021 at 04:25:09PM +0300, Sergei Shtepa wrote: > > The 03/14/2021 12:30, Christoph Hellwig wrote: > > > On Fri, Mar 12, 2021 at 06:44:55PM +0300, Sergei Shtepa wrote: > > > > DM_INTERPOSED_FLAG allow to create DM targets on "the fly". > > > > Underlying block device opens without a flag FMODE_EXCL. > > > > DM target receives bio from the original device via > > > > bdev_interposer. > > > > > > This is more of a philopical comment, but the idea of just letting the > > > interposed reopen the device by itself seems like a bad idea. I think > > > that is probably better hidden in the block layer interposer attachment > > > function, which could do the extra blkdev_get_by_dev for the caller. > > > > I suppose this cannot be implemented, since we need to change the behavior > > for block devices that already have been opened. > > That's not what I mean. Take a look at the patch relative to your > series to let me know what you think. The new blkdev_interposer_attach > now takes a dev_t + mode for the original device and opens it on > behalf of the interposer. It also moves the queue freezing into the > API, which should address the concerns about the helper and adds a few > more sanity checks. And now actually with the diff: diff --git a/block/blk-mq.c b/block/blk-mq.c index 2f188a865024ac..d4d7c1caa43966 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -161,19 +161,6 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -bool blk_mq_is_queue_frozen(struct request_queue *q) -{ - bool frozen; - - mutex_lock(&q->mq_freeze_lock); - frozen = percpu_ref_is_dying(&q->q_usage_counter) && - percpu_ref_is_zero(&q->q_usage_counter); - mutex_unlock(&q->mq_freeze_lock); - - return frozen; -} -EXPORT_SYMBOL_GPL(blk_mq_is_queue_frozen); - /* * Guarantee no request is in use, so we can change any data structure of * the queue afterward. diff --git a/block/genhd.c b/block/genhd.c index fa406b972371ae..64d6338b08cc87 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1944,51 +1944,70 @@ static void disk_release_events(struct gendisk *disk) kfree(disk->ev); } -int bdev_interposer_attach(struct block_device *original, +struct block_device *blkdev_interposer_attach(dev_t dev, fmode_t mode, struct block_device *interposer) { + struct block_device *bdev; int ret = 0; - if (WARN_ON(((!original) || (!interposer)))) - return -EINVAL; - /* - * interposer should be simple, no a multi-queue device - */ - if (!interposer->bd_disk->fops->submit_bio) - return -EINVAL; + if (WARN_ON_ONCE(!bdev_is_partition(interposer))) + return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(!queue_is_mq(interposer->bd_disk->queue))) + return ERR_PTR(-EINVAL); - if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue))) - return -EPERM; + bdev = blkdev_get_by_dev(dev, mode, NULL); + if (IS_ERR(bdev)) + return bdev; - mutex_lock(&bdev_interposer_attach_lock); + ret = -EINVAL; + if (WARN_ON_ONCE(bdev_nr_sectors(bdev) != bdev_nr_sectors(interposer))) + goto out; - if (bdev_has_interposer(original)) - ret = -EBUSY; - else { - original->bd_interposer = bdgrab(interposer); - if (!original->bd_interposer) - ret = -ENODEV; - } + blk_mq_freeze_queue(bdev->bd_disk->queue); + blk_mq_quiesce_queue(bdev->bd_disk->queue); + mutex_lock(&bdev_interposer_attach_lock); + ret = -EBUSY; + if (bdev_has_interposer(bdev)) + goto out_unlock; + ret = -ENODEV; + bdev->bd_interposer = bdgrab(interposer); + if (!bdev->bd_interposer) + goto out_unlock; + ret = 0; +out_unlock: mutex_unlock(&bdev_interposer_attach_lock); - return ret; + blk_mq_unquiesce_queue(bdev->bd_disk->queue); + blk_mq_unfreeze_queue(bdev->bd_disk->queue); +out: + if (ret) { + blkdev_put(bdev, mode); + bdev = ERR_PTR(ret); + } + + return bdev; } -EXPORT_SYMBOL_GPL(bdev_interposer_attach); +EXPORT_SYMBOL_GPL(blkdev_interposer_attach); -void bdev_interposer_detach(struct block_device *original) +void blkdev_interposer_detach(struct block_device *bdev, fmode_t mode) { - if (WARN_ON(!original)) - return; + struct block_device *interposer; - if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue))) + if (WARN_ON_ONCE(!bdev_has_interposer(bdev))) return; + blk_mq_freeze_queue(bdev->bd_disk->queue); + blk_mq_quiesce_queue(bdev->bd_disk->queue); + mutex_lock(&bdev_interposer_attach_lock); - if (bdev_has_interposer(original)) { - bdput(original->bd_interposer); - original->bd_interposer = NULL; - } + interposer = bdev->bd_interposer; + bdev->bd_interposer = NULL; mutex_unlock(&bdev_interposer_attach_lock); + + blk_mq_unquiesce_queue(bdev->bd_disk->queue); + blk_mq_unfreeze_queue(bdev->bd_disk->queue); + + blkdev_put(interposer, mode); } -EXPORT_SYMBOL_GPL(bdev_interposer_detach); +EXPORT_SYMBOL_GPL(blkdev_interposer_detach); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f6e2eb3f894940..fde57bb5105025 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -359,18 +359,6 @@ dev_t dm_get_dev_t(const char *path) } EXPORT_SYMBOL_GPL(dm_get_dev_t); -static inline void dm_disk_freeze(struct gendisk *disk) -{ - blk_mq_freeze_queue(disk->queue); - blk_mq_quiesce_queue(disk->queue); -} - -static inline void dm_disk_unfreeze(struct gendisk *disk) -{ - blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); -} - /* * Add a device to the list, or just increment the usage count if * it's already present. @@ -418,29 +406,11 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, refcount_inc(&dd->count); } - if (t->md->is_interposed) { - struct block_device *original = dd->dm_dev->bdev; - struct block_device *interposer = t->md->disk->part0; - - if ((ti->begin != 0) || (ti->len < bdev_nr_sectors(original))) { - dm_put_device(ti, dd->dm_dev); - DMERR("The interposer device should not be less than the original."); - return -EINVAL; - } - - /* - * Attach mapped interposer device to original. - * It is quite convenient that device mapper creates - * one disk for one block device. - */ - dm_disk_freeze(original->bd_disk); - r = bdev_interposer_attach(original, interposer); - dm_disk_unfreeze(original->bd_disk); - if (r) { - dm_put_device(ti, dd->dm_dev); - DMERR("Failed to attach dm interposer."); - return r; - } + if (t->md->is_interposed && + (ti->begin != 0 || ti->len < bdev_nr_sectors(dd->dm_dev->bdev))) { + dm_put_device(ti, dd->dm_dev); + DMERR("The interposer device should not be less than the original."); + return -EINVAL; } *result = dd->dm_dev; @@ -496,11 +466,6 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) dm_device_name(md), d->name); return; } - if (md->is_interposed) { - dm_disk_freeze(d->bdev->bd_disk); - bdev_interposer_detach(d->bdev); - dm_disk_unfreeze(d->bdev->bd_disk); - } if (refcount_dec_and_test(&dd->count)) { dm_put_table_device(md, d); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c488e9554aa000..532ce17064b1c1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -763,10 +763,12 @@ static int open_table_device(struct table_device *td, dev_t dev, BUG_ON(td->dm_dev.bdev); if (md->is_interposed) { - - bdev = blkdev_get_by_dev(dev, td->dm_dev.mode, NULL); - if (IS_ERR(bdev)) + bdev = blkdev_interposer_attach(dev, td->dm_dev.mode, + md->disk->part0); + if (IS_ERR(bdev)) { + DMERR("Failed to attach dm interposer."); return PTR_ERR(bdev); + } } else { bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); if (IS_ERR(bdev)) @@ -793,9 +795,9 @@ static void close_table_device(struct table_device *td, struct mapped_device *md if (!td->dm_dev.bdev) return; - if (td->dm_dev.is_interposed) - blkdev_put(td->dm_dev.bdev, td->dm_dev.mode); - else { + if (td->dm_dev.is_interposed) { + blkdev_interposer_detach(td->dm_dev.bdev, td->dm_dev.mode); + } else { bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6f01971abf7b9b..2c473c9b899089 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -533,7 +533,6 @@ void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -bool blk_mq_is_queue_frozen(struct request_queue *q); int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90f62b4197da91..fbc510162c3827 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2036,8 +2036,8 @@ static inline bool bdev_has_interposer(struct block_device *bdev) return (bdev->bd_interposer != NULL); }; -int bdev_interposer_attach(struct block_device *original, +struct block_device *blkdev_interposer_attach(dev_t dev, fmode_t mode, struct block_device *interposer); -void bdev_interposer_detach(struct block_device *original); +void blkdev_interposer_detach(struct block_device *bdev, fmode_t mode); #endif /* _LINUX_BLKDEV_H */