Currently, the getting and putting of io scheduler module references are not paired. The root cause is that elevator_alloc relies on its caller to get the references to io scheduler modules instead of getting the references by itself, while the corresponding elevator_release does put the references to io scheduler modules back on its own. This results in some weird code containing bugs: 1. Both elevator_switch_mq and elevator_init_mq call blk_mq_init_sched, but elevator_init_mq calls elevator_put on failure while elevator_switch_mq does not. These inconsistent behaviors may cause negative refcnt or ghost refcnt due to the position where the failure happens in blk_mq_init_sched. 2. blk_mq_elv_switch_none gets references to the io scheduler modules to prevent them from being removed. But blk_mq_elv_switch_back does not put the references back. This is confusing. To address the problem, firstly, we make elevator_alloc to get its own references to io scheduler modules. These references will be put back by elevator_release later. Then, we can just pair each elevator_get with an elevator_put. The bugs and the patch can be validated with tools here: https://github.com/nickyc975/linux_elv_refcnt_bug.git Signed-off-by: Jinlong Chen <nickyc975@xxxxxxxxxx> --- block/blk-mq.c | 6 ++++++ block/elevator.c | 29 ++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8070b6c10e8d..2adfd52786dc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4595,6 +4595,12 @@ static void blk_mq_elv_switch_back(struct list_head *head, mutex_lock(&q->sysfs_lock); elevator_switch(q, t); + /** + * We got a reference of the io scheduler module in blk_mq_elv_switch_none + * to prevent the module from being removed. We need to put that reference + * back once we are done. + */ + module_put(t->elevator_owner); mutex_unlock(&q->sysfs_lock); } diff --git a/block/elevator.c b/block/elevator.c index bd71f0fc4e4b..aaafd415f922 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -166,9 +166,12 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, { struct elevator_queue *eq; + if (!try_module_get(e->elevator_owner)) + goto err_out; + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); if (unlikely(!eq)) - return NULL; + goto err_put_elevator; eq->type = e; kobject_init(&eq->kobj, &elv_ktype); @@ -176,6 +179,11 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, hash_init(eq->hash); return eq; + +err_put_elevator: + elevator_put(e); +err_out: + return NULL; } EXPORT_SYMBOL(elevator_alloc); @@ -713,8 +721,9 @@ void elevator_init_mq(struct request_queue *q) if (err) { pr_warn("\"%s\" elevator initialization failed, " "falling back to \"none\"\n", e->elevator_name); - elevator_put(e); } + + elevator_put(e); } /* @@ -747,6 +756,7 @@ static int __elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; + int ret = 0; /* Make sure queue is not in the middle of being removed */ if (!blk_queue_registered(q)) @@ -762,17 +772,18 @@ static int __elevator_change(struct request_queue *q, const char *name) } strlcpy(elevator_name, name, sizeof(elevator_name)); - e = elevator_get(q, strstrip(elevator_name), true); - if (!e) - return -EINVAL; - if (q->elevator && - elevator_match(q->elevator->type, elevator_name, 0)) { - elevator_put(e); + elevator_match(q->elevator->type, strstrip(elevator_name), 0)) { return 0; } - return elevator_switch(q, e); + e = elevator_get(q, strstrip(elevator_name), true); + if (!e) + return -EINVAL; + + ret = elevator_switch(q, e); + elevator_put(e); + return ret; } ssize_t elv_iosched_store(struct request_queue *q, const char *name, -- 2.31.1