On Mon, Apr 26, 2021 at 09:19:20AM +0200, Hannes Reinecke wrote: > On 4/22/21 2:20 PM, Ming Lei wrote: > > Limit at most 8 queues are polled in each blk_pull(), avoid to > > add extra latency when queue depth is high. > > > > Reviewed-by: Jeffle Xu <jefflexu@xxxxxxxxxxxxxxxxx> > > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > > --- > > block/blk-poll.c | 78 ++++++++++++++++++++++++++++++++++-------------- > > 1 file changed, 55 insertions(+), 23 deletions(-) > > > > diff --git a/block/blk-poll.c b/block/blk-poll.c > > index 249d73ff6f81..20e7c47cc984 100644 > > --- a/block/blk-poll.c > > +++ b/block/blk-poll.c > > @@ -288,36 +288,32 @@ static void bio_grp_list_move(struct bio_grp_list *dst, > > src->nr_grps -= cnt; > > } > > > > -static int blk_mq_poll_io(struct bio *bio) > > +#define POLL_HCTX_MAX_CNT 8 > > + > > +static bool blk_add_unique_hctx(struct blk_mq_hw_ctx **data, int *cnt, > > + struct blk_mq_hw_ctx *hctx) > > { > > - struct request_queue *q = bio->bi_bdev->bd_disk->queue; > > - blk_qc_t cookie = bio_get_poll_data(bio); > > - int ret = 0; > > + int i; > > > > - /* wait until the bio is submitted really */ > > - if (!blk_qc_t_ready(cookie)) > > - return 0; > > > > - if (!bio_flagged(bio, BIO_DONE) && blk_qc_t_valid(cookie)) { > > - struct blk_mq_hw_ctx *hctx = > > - q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; > > + for (i = 0; i < *cnt; i++) { > > + if (data[i] == hctx) > > + goto exit; > > + } > > > > - ret += blk_mq_poll_hctx(q, hctx); > > + if (i < POLL_HCTX_MAX_CNT) { > > + data[i] = hctx; > > + (*cnt)++; > > } > > - return ret; > > + exit: > > + return *cnt == POLL_HCTX_MAX_CNT; > > } > > > > -static int blk_bio_poll_and_end_io(struct bio_grp_list *grps) > > +static void blk_build_poll_queues(struct bio_grp_list *grps, > > + struct blk_mq_hw_ctx **data, int *cnt) > > { > > - int ret = 0; > > int i; > > > > - /* > > - * Poll hw queue first. > > - * > > - * TODO: limit max poll times and make sure to not poll same > > - * hw queue one more time. > > - */ > > for (i = 0; i < grps->nr_grps; i++) { > > struct bio_grp_list_data *grp = &grps->head[i]; > > struct bio *bio; > > @@ -325,11 +321,31 @@ static int blk_bio_poll_and_end_io(struct bio_grp_list *grps) > > if (bio_grp_list_grp_empty(grp)) > > continue; > > > > - for (bio = grp->list.head; bio; bio = bio->bi_poll) > > - ret += blk_mq_poll_io(bio); > > + for (bio = grp->list.head; bio; bio = bio->bi_poll) { > > + blk_qc_t cookie; > > + struct blk_mq_hw_ctx *hctx; > > + struct request_queue *q; > > + > > + if (bio_flagged(bio, BIO_DONE)) > > + continue; > > + > > + /* wait until the bio is submitted really */ > > + cookie = bio_get_poll_data(bio); > > + if (!blk_qc_t_ready(cookie) || !blk_qc_t_valid(cookie)) > > + continue; > > + > > + q = bio->bi_bdev->bd_disk->queue; > > + hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; > > + if (blk_add_unique_hctx(data, cnt, hctx)) > > + return; > > + } > > } > > +} > > + > > +static void blk_bio_poll_reap_ios(struct bio_grp_list *grps) > > +{ > > + int i; > > > > - /* reap bios */ > > for (i = 0; i < grps->nr_grps; i++) { > > struct bio_grp_list_data *grp = &grps->head[i]; > > struct bio *bio; > > @@ -354,6 +370,22 @@ static int blk_bio_poll_and_end_io(struct bio_grp_list *grps) > > } > > __bio_grp_list_merge(&grp->list, &bl); > > } > > +} > > + > > +static int blk_bio_poll_and_end_io(struct bio_grp_list *grps) > > +{ > > + int ret = 0; > > + int i; > > + struct blk_mq_hw_ctx *hctx[POLL_HCTX_MAX_CNT]; > > + int cnt = 0; > > + > > + blk_build_poll_queues(grps, hctx, &cnt); > > + > > + for (i = 0; i < cnt; i++) > > + ret += blk_mq_poll_hctx(hctx[i]->queue, hctx[i]); > > + > > + blk_bio_poll_reap_ios(grps); > > + > > return ret; > > } > > > > > Can't we make it a sysfs attribute instead of hard-coding it? > '8' seems a bit arbitrary to me, I'd rather have the ability to modify it... I'd rather not add such code in the feature 'enablement' stage since I doesn't observe the number plays a big role yet. It is added for holding hw queues to be polled on stack variables, also avoid to add too much latency if there is too many bios from too many hw queues to be reaped. Also the actual polled hw queues can be observed easily via bpftrace, so debug purpose from sysfs isn't necessary too. Thanks, Ming