Defer allocating the gendisk and request_queue until UBLK_CMD_START_DEV is called. This avoids funky life times where a disk is allocated and then can be added and removed multiple times, which has never been supported by the block layer. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- drivers/block/ublk_drv.c | 127 ++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 74 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 748247c0435be..81bfdda0f1af4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -112,7 +112,6 @@ struct ublk_queue { struct ublk_device { struct gendisk *ub_disk; - struct request_queue *ub_queue; char *__queues; @@ -126,6 +125,7 @@ struct ublk_device { struct device cdev_dev; #define UB_STATE_OPEN (1 << 0) +#define UB_STATE_USED (1 << 1) unsigned long state; int ub_number; @@ -156,8 +156,6 @@ static DEFINE_MUTEX(ublk_ctl_mutex); static struct miscdevice ublk_misc; -static struct lock_class_key ublk_bio_compl_lkclass; - static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) { if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && @@ -209,8 +207,17 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) PAGE_SIZE); } +static void ublk_free_disk(struct gendisk *disk) +{ + struct ublk_device *ub = disk->private_data; + + clear_bit(UB_STATE_USED, &ub->state); + put_device(&ub->cdev_dev); +} + static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, + .free_disk = ublk_free_disk, }; #define UBLK_MAX_PIN_PAGES 32 @@ -801,13 +808,15 @@ static void ublk_cancel_dev(struct ublk_device *ub) static void ublk_stop_dev(struct ublk_device *ub) { mutex_lock(&ub->mutex); - if (!disk_live(ub->ub_disk)) + if (ub->dev_info.state != UBLK_S_DEV_LIVE) goto unlock; del_gendisk(ub->ub_disk); ub->dev_info.state = UBLK_S_DEV_DEAD; ub->dev_info.ublksrv_pid = -1; ublk_cancel_dev(ub); + put_disk(ub->ub_disk); + ub->ub_disk = NULL; unlock: mutex_unlock(&ub->mutex); cancel_delayed_work_sync(&ub->monitor_work); @@ -1033,12 +1042,7 @@ static void ublk_cdev_rel(struct device *dev) { struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev); - blk_mq_destroy_queue(ub->ub_queue); - - put_disk(ub->ub_disk); - blk_mq_free_tag_set(&ub->tag_set); - ublk_deinit_queues(ub); __ublk_destroy_dev(ub); @@ -1078,31 +1082,24 @@ static void ublk_stop_work_fn(struct work_struct *work) ublk_stop_dev(ub); } -static void ublk_update_capacity(struct ublk_device *ub) +/* align maximum I/O size to PAGE_SIZE */ +static void ublk_align_max_io_size(struct ublk_device *ub) { - unsigned int max_rq_bytes; + unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift; - /* make max request buffer size aligned with PAGE_SIZE */ - max_rq_bytes = round_down(ub->dev_info.rq_max_blocks << - ub->bs_shift, PAGE_SIZE); - ub->dev_info.rq_max_blocks = max_rq_bytes >> ub->bs_shift; - - set_capacity(ub->ub_disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9)); + ub->dev_info.rq_max_blocks = + round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift; } -/* add disk & cdev, cleanup everything in case of failure */ +/* add tag_set & cdev, cleanup everything in case of failure */ static int ublk_add_dev(struct ublk_device *ub) { - struct gendisk *disk; int err = -ENOMEM; - int bsize; /* We are not ready to support zero copy */ ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; - bsize = ub->dev_info.block_size; - ub->bs_shift = ilog2(bsize); - + ub->bs_shift = ilog2(ub->dev_info.block_size); ub->dev_info.nr_hw_queues = min_t(unsigned int, ub->dev_info.nr_hw_queues, nr_cpu_ids); @@ -1119,59 +1116,16 @@ static int ublk_add_dev(struct ublk_device *ub) ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.driver_data = ub; - err = blk_mq_alloc_tag_set(&ub->tag_set); if (err) goto out_deinit_queues; - ub->ub_queue = blk_mq_init_queue(&ub->tag_set); - if (IS_ERR(ub->ub_queue)) { - err = PTR_ERR(ub->ub_queue); - goto out_cleanup_tags; - } - ub->ub_queue->queuedata = ub; - - disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue, - &ublk_bio_compl_lkclass); - if (!disk) { - err = -ENOMEM; - goto out_free_request_queue; - } - - blk_queue_logical_block_size(ub->ub_queue, bsize); - blk_queue_physical_block_size(ub->ub_queue, bsize); - blk_queue_io_min(ub->ub_queue, bsize); - - blk_queue_max_hw_sectors(ub->ub_queue, ub->dev_info.rq_max_blocks << - (ub->bs_shift - 9)); - - ub->ub_queue->limits.discard_granularity = PAGE_SIZE; - - blk_queue_max_discard_sectors(ub->ub_queue, UINT_MAX >> 9); - blk_queue_max_write_zeroes_sectors(ub->ub_queue, UINT_MAX >> 9); - - ublk_update_capacity(ub); - - disk->fops = &ub_fops; - disk->private_data = ub; - disk->queue = ub->ub_queue; - sprintf(disk->disk_name, "ublkb%d", ub->ub_number); - + ublk_align_max_io_size(ub); mutex_init(&ub->mutex); /* add char dev so that ublksrv daemon can be setup */ - err = ublk_add_chdev(ub); - if (err) - return err; - - /* don't expose disk now until we got start command from cdev */ + return ublk_add_chdev(ub); - return 0; - -out_free_request_queue: - blk_mq_destroy_queue(ub->ub_queue); -out_cleanup_tags: - blk_mq_free_tag_set(&ub->tag_set); out_deinit_queues: ublk_deinit_queues(ub); out_destroy_dev: @@ -1209,6 +1163,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) int ublksrv_pid = (int)header->data[0]; unsigned long dev_blocks = header->data[1]; struct ublk_device *ub; + struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) @@ -1223,21 +1178,45 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); mutex_lock(&ub->mutex); - if (disk_live(ub->ub_disk)) { + if (ub->dev_info.state == UBLK_S_DEV_LIVE || + test_bit(UB_STATE_USED, &ub->state)) { ret = -EEXIST; goto out_unlock; } /* We may get disk size updated */ - if (dev_blocks) { + if (dev_blocks) ub->dev_info.dev_blocks = dev_blocks; - ublk_update_capacity(ub); + + disk = blk_mq_alloc_disk(&ub->tag_set, ub); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); + goto out_unlock; } + sprintf(disk->disk_name, "ublkb%d", ub->ub_number); + disk->fops = &ub_fops; + disk->private_data = ub; + + blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size); + blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size); + blk_queue_io_min(disk->queue, ub->dev_info.block_size); + blk_queue_max_hw_sectors(disk->queue, + ub->dev_info.rq_max_blocks << (ub->bs_shift - 9)); + disk->queue->limits.discard_granularity = PAGE_SIZE; + blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9); + + set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9)); + ub->dev_info.ublksrv_pid = ublksrv_pid; - ret = add_disk(ub->ub_disk); - if (ret) + ub->ub_disk = disk; + get_device(&ub->cdev_dev); + ret = add_disk(disk); + if (ret) { + put_disk(disk); goto out_unlock; - + } + set_bit(UB_STATE_USED, &ub->state); ub->dev_info.state = UBLK_S_DEV_LIVE; out_unlock: mutex_unlock(&ub->mutex); -- 2.30.2