From: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx> Use generic io accounting functions to manage io stats. There was an attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic io stats accounting functions to simplify io stat accounting"), but it did not include a call to generic_end_io_acct() and caused issues with tracking in-flight IOs, so it was later removed in commit 74672d069b29 ("md: fix md io stats accounting broken"). This patch attempts to fix this by using both generic_start_io_acct() and generic_end_io_acct(). To make it possible, in md_make_request() a bio is cloned with additional data - struct md_io, which includes the io start_time. A new bioset is introduced for this purpose. We call generic_start_io_acct() and pass the clone instead of the original to md_handle_request(). When it completes, we call generic_end_io_acct() and complete the original bio. This adds correct statistics about in-flight IOs and IO processing time, interpreted e.g. in iostat as await, svctm, aqu-sz and %util. It also fixes a situation where too many IOs where reported if a bio was re-submitted to the mddev, because io accounting is now performed only on newly arriving bios. Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx> [Guoqing: rebase and make generic accounting applies to personalities which don't have clone infrastructure] Signed-off-by: Guoqing Jiang <jiangguoqing@xxxxxxxxxx> --- Delete not necessary bioset_exit in md_integrity_register, thanks for Artur's review. drivers/md/md.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- drivers/md/md.h | 1 + 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7ba00e4c862d..d8823db843db 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -441,6 +441,25 @@ void md_handle_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_handle_request); +struct md_io { + struct mddev *mddev; + struct bio *orig_bio; + unsigned long start_time; + struct bio bio_clone; +}; + +static void md_end_io(struct bio *bio) +{ + struct md_io *md_io = bio->bi_private; + struct bio *orig_bio = md_io->orig_bio; + + orig_bio->bi_status = bio->bi_status; + + bio_end_io_acct(orig_bio, md_io->start_time); + bio_put(bio); + bio_endio(orig_bio); +} + static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); @@ -465,6 +484,30 @@ static blk_qc_t md_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } + /* + * clone bio under conditions: + * 1. QUEUE_FLAG_IO_STAT flag is set. + * 2. bio just enters md and it is not split from personality. + */ + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue) && + (bio->bi_pool != &mddev->md_io_bs) && + (mddev->pers->accounting_bio && + mddev->pers->accounting_bio(mddev, bio))) { + struct md_io *md_io; + struct bio *clone; + + clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs); + + md_io = container_of(clone, struct md_io, bio_clone); + md_io->mddev = mddev; + md_io->orig_bio = bio; + md_io->start_time = bio_start_io_acct(bio); + + clone->bi_end_io = md_end_io; + clone->bi_private = md_io; + bio = clone; + } + /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; @@ -2340,7 +2383,8 @@ int md_integrity_register(struct mddev *mddev) bdev_get_integrity(reference->bdev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); - if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { + if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || + bioset_integrity_create(&mddev->md_io_bs, BIO_POOL_SIZE)) { pr_err("md: failed to create integrity pool for %s\n", mdname(mddev)); return -EINVAL; @@ -5569,6 +5613,7 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->md_io_bs); kfree(mddev); } @@ -5864,6 +5909,12 @@ int md_run(struct mddev *mddev) if (err) return err; } + if (!bioset_initialized(&mddev->md_io_bs)) { + err = bioset_init(&mddev->md_io_bs, BIO_POOL_SIZE, + offsetof(struct md_io, bio_clone), 0); + if (err) + return err; + } spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -6041,6 +6092,7 @@ int md_run(struct mddev *mddev) abort: bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->md_io_bs); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6264,6 +6316,7 @@ void md_stop(struct mddev *mddev) __md_stop(mddev); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->md_io_bs); } EXPORT_SYMBOL_GPL(md_stop); diff --git a/drivers/md/md.h b/drivers/md/md.h index 5125ccf9df06..d2f476c427a9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -487,6 +487,7 @@ struct mddev { struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ + struct bio_set md_io_bs; /* for io accounting */ /* Generic flush handling. * The last to finish preflush schedules a worker to submit -- 2.25.1