There can't be FS IO in disk_release(), so move blk_exit_queue() there. We still need to freeze queue here since request is freed after bio is ended, meantime passthrough request relies on scheduler tags too, but either q->q_usage_counter is in atomic mode, such as scsi, or it has been drained already, such as most of other drivers, so the added queue freeze is pretty fast, and RCU grace period isn't supposed to be involved. disk can be released before or after queue is cleaned up, and we have to free scheduler request pool before blk_cleanup_queue returns, meantime the request pool has to be freed before exiting the scheduler. So move scheduler pool freeing into both the two functions. Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 3 +++ block/blk-sysfs.c | 16 ---------------- block/genhd.c | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d51b0aa2e4e4..72ae9955cf27 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3101,6 +3101,9 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, struct blk_mq_tags *drv_tags; struct page *page; + if (list_empty(&tags->page_list)) + return; + if (blk_mq_is_shared_tags(set->flags)) drv_tags = set->shared_tags; else diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5f14fd333182..e0f29b56e8e2 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -739,20 +739,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q); } -/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ -static void blk_exit_queue(struct request_queue *q) -{ - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - ioc_clear_queue(q); - elevator_exit(q); - } -} - /** * blk_release_queue - releases all allocated resources of the request_queue * @kobj: pointer to a kobject, whose container is a request_queue @@ -786,8 +772,6 @@ static void blk_release_queue(struct kobject *kobj) blk_stat_remove_callback(q, q->poll_cb); blk_stat_free_callback(q->poll_cb); - blk_exit_queue(q); - blk_free_queue_stats(q->stats); kfree(q->poll_stat); diff --git a/block/genhd.c b/block/genhd.c index a86027619683..f1aef5d13afa 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1085,11 +1085,48 @@ static const struct attribute_group *disk_attr_groups[] = { NULL }; +/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ +static void blk_exit_queue(struct request_queue *q) +{ + /* + * Since the I/O scheduler exit code may access cgroup information, + * perform I/O scheduler exit before disassociating from the block + * cgroup controller. + */ + if (q->elevator) { + ioc_clear_queue(q); + + mutex_lock(&q->sysfs_lock); + blk_mq_sched_free_rqs(q); + elevator_exit(q); + mutex_unlock(&q->sysfs_lock); + } +} + static void disk_release_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; - blk_mq_cancel_work_sync(q); + if (queue_is_mq(q)) { + blk_mq_cancel_work_sync(q); + + /* + * All FS bios have been done, however FS request may not + * be freed yet since we end bio before freeing request, + * meantime passthrough request replies on scheduler tags, + * so queue needs to be frozen here. + * + * Most of drivers release disk after blk_cleanup_queue() + * returns, and SCSI may release disk before calling + * blk_cleanup_queue, but request queue has been in atomic + * mode already, see scsi_disk_release(), so the following + * queue freeze is pretty fast, and RCU grace period isn't + * supposed to be involved. + */ + blk_mq_freeze_queue(q); + blk_exit_queue(q); + __blk_mq_unfreeze_queue(q, true); + } /* * Remove all references to @q from the block cgroup controller before -- 2.31.1