On 12/21/18 6:50 AM, Jens Axboe wrote: ... > @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) > > if (!reserved) > bitnr += tags->nr_reserved_tags; > - rq = tags->rqs[bitnr]; > + if (tags->rqs[bitnr].queue != hctx->queue) > + return true; > > /* > * We can hit rq == NULL here, because the tagging functions > * test and set the bit before assigning ->rqs[]. > */ > - if (rq && rq->q == hctx->queue) > + rq = tags->rqs[bitnr].rq; > + if (rq) > return iter_data->fn(hctx, rq, iter_data->data, reserved); > return true; > } > @@ -263,7 +265,9 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, > .reserved = reserved, > }; > > + rcu_read_lock(); > sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data); > + rcu_read_unlock(); > } > > struct bt_tags_iter_data { ... > @@ -2038,15 +2071,30 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, > } > } Just checking request_queue match in bt_iter seems not enough. The blk_mq_free_rqs could be invoked by switching io scheduler. At the moment, request_queue could match. Maybe we still need to clear the associated tags->rqs[] entries. With cleaning the associated tags->rqs[] entries and synchronize_rcu after that, bt_iter would only see some stable request with idle state but not a freed one. Thanks Jianchao > > - while (!list_empty(&tags->page_list)) { > - page = list_first_entry(&tags->page_list, struct page, lru); > - list_del_init(&page->lru); > + if (list_empty(&tags->page_list)) > + return; > + > + rpl = kmalloc(sizeof(*rpl), GFP_NOIO); > + if (rpl) { > + INIT_LIST_HEAD(&rpl->list); > + list_splice_init(&tags->page_list, &rpl->list); > + > + /* Punt to RCU free, so we don't race with tag iteration */ > + INIT_RCU_WORK(&rpl->rcu_work, blk_mq_rcu_free_pages); > + rpl->on_stack = false; > + queue_rcu_work(system_wq, &rpl->rcu_work); > + } else {