From: Yu Kuai <yukuai3@xxxxxxxxxx> Currently, icq is tracked by both request_queue(icq->q_node) and task(icq->ioc_node), and ioc_clear_queue() from elevator exit is not safe because it can access the list without protection: ioc_clear_queue ioc_release_fn lock queue_lock list_splice /* move queue list to a local list */ unlock queue_lock /* * lock is released, the local list * can be accessed through task exit. */ lock ioc->lock while (!hlist_empty) icq = hlist_entry lock queue_lock ioc_destroy_icq delete icq->ioc_node while (!list_empty) icq = list_entry() list_del icq->q_node /* * This is not protected by any lock, * list_entry concurrent with list_del * is not safe. */ unlock queue_lock unlock ioc->lock Fix this problem by protecting list 'icq->q_node' by queue_lock from ioc_clear_queue(). Reported-and-tested-by: Pradeep Pragallapati <quic_pragalla@xxxxxxxxxxx> Link: https://lore.kernel.org/lkml/20230517084434.18932-1-quic_pragalla@xxxxxxxxxxx/ Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- block/blk-ioc.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 63fc02042408..d5db92e62c43 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -77,6 +77,10 @@ static void ioc_destroy_icq(struct io_cq *icq) struct elevator_type *et = q->elevator->type; lockdep_assert_held(&ioc->lock); + lockdep_assert_held(&q->queue_lock); + + if (icq->flags & ICQ_DESTROYED) + return; radix_tree_delete(&ioc->icq_tree, icq->q->id); hlist_del_init(&icq->ioc_node); @@ -128,12 +132,7 @@ static void ioc_release_fn(struct work_struct *work) spin_lock(&q->queue_lock); spin_lock(&ioc->lock); - /* - * The icq may have been destroyed when the ioc lock - * was released. - */ - if (!(icq->flags & ICQ_DESTROYED)) - ioc_destroy_icq(icq); + ioc_destroy_icq(icq); spin_unlock(&q->queue_lock); rcu_read_unlock(); @@ -171,23 +170,20 @@ static bool ioc_delay_free(struct io_context *ioc) */ void ioc_clear_queue(struct request_queue *q) { - LIST_HEAD(icq_list); - spin_lock_irq(&q->queue_lock); - list_splice_init(&q->icq_list, &icq_list); - spin_unlock_irq(&q->queue_lock); - - rcu_read_lock(); - while (!list_empty(&icq_list)) { + while (!list_empty(&q->icq_list)) { struct io_cq *icq = - list_entry(icq_list.next, struct io_cq, q_node); + list_first_entry(&q->icq_list, struct io_cq, q_node); + /* + * Other context won't hold ioc lock to wait for queue_lock, see + * details in ioc_release_fn(). + */ spin_lock_irq(&icq->ioc->lock); - if (!(icq->flags & ICQ_DESTROYED)) - ioc_destroy_icq(icq); + ioc_destroy_icq(icq); spin_unlock_irq(&icq->ioc->lock); } - rcu_read_unlock(); + spin_unlock_irq(&q->queue_lock); } #else /* CONFIG_BLK_ICQ */ static inline void ioc_exit_icqs(struct io_context *ioc) -- 2.39.2