> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > index 7513c8eaabee..b24334f99c5d 100644 > --- a/block/blk-mq-tag.c > +++ b/block/blk-mq-tag.c > @@ -332,7 +332,7 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, > * true to continue iterating tags, false to stop. > * @priv: Will be passed as second argument to @fn. > */ > -static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, > +void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, > busy_tag_iter_fn *fn, void *priv) How about moving blk_mq_tags_inflight_rqs to blk-mq-tag.c instead? > + #define BLK_MQ_TAGS_DRAINED 0 Please don't indent #defines. > > +static int blk_mq_hctx_notify_prepare(unsigned int cpu, struct hlist_node *node) > +{ > + struct blk_mq_hw_ctx *hctx; > + struct blk_mq_tags *tags; > + > + tags = hctx->tags; > + > + if (tags) > + clear_bit(BLK_MQ_TAGS_DRAINED, &tags->flags); > + > + return 0; I'd write this as: { struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); if (hctx->tags) clear_bit(BLK_MQ_TAGS_DRAINED, &hctx->tags->flags); return 0; } > +static void blk_mq_drain_inflight_rqs(struct blk_mq_tags *tags, int dead_cpu) > +{ > + unsigned long msecs_left = 1000 * 5; > + > + if (!tags) > + return; > + > + if (test_and_set_bit(BLK_MQ_TAGS_DRAINED, &tags->flags)) > + return; > + > + while (msecs_left > 0) { > + if (!blk_mq_tags_inflight_rqs(tags)) > + break; > + msleep(5); > + msecs_left -= 5; > + } > + > + if (msecs_left > 0) > + printk(KERN_WARNING "requests not completed from dead " > + "CPU %d\n", dead_cpu); > +} Isn't this condition inverted? If we break out early msecs_left will be larger than 0 and we are fine. Why not: for (attempts = 0; attempts < 1000; attempts++) { if (!blk_mq_tags_inflight_rqs(tags)) return; } ... But more importantly I'm not sure we can just give up that easily. Shouldn't we at lest wait the same timeout we otherwise have for requests, and if the command isn't finished in time kick off error handling?