[PATCH V2 3/7] md: the latest try for improve io stats accounting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx>

Use generic io accounting functions to manage io stats. There was an
attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
io stats accounting functions to simplify io stat accounting"), but it
did not include a call to generic_end_io_acct() and caused issues with
tracking in-flight IOs, so it was later removed in commit 74672d069b29
("md: fix md io stats accounting broken").

This patch attempts to fix this by using both generic_start_io_acct()
and generic_end_io_acct(). To make it possible, in md_make_request() a
bio is cloned with additional data - struct md_io, which includes the io
start_time. A new bioset is introduced for this purpose. We call
generic_start_io_acct() and pass the clone instead of the original to
md_handle_request(). When it completes, we call generic_end_io_acct()
and complete the original bio.

This adds correct statistics about in-flight IOs and IO processing time,
interpreted e.g. in iostat as await, svctm, aqu-sz and %util.

It also fixes a situation where too many IOs where reported if a bio was
re-submitted to the mddev, because io accounting is now performed only
on newly arriving bios.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx>
[Guoqing: rebase and make generic accounting applies to personalities
	  which don't have clone infrastructure]
Signed-off-by: Guoqing Jiang <jiangguoqing@xxxxxxxxxx>
---
 drivers/md/md.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/md.h |  1 +
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7ba00e4c862d..13392fe9379c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -441,6 +441,25 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_handle_request);
 
+struct md_io {
+	struct mddev *mddev;
+	struct bio *orig_bio;
+	unsigned long start_time;
+	struct bio bio_clone;
+};
+
+static void md_end_io(struct bio *bio)
+{
+	struct md_io *md_io = bio->bi_private;
+	struct bio *orig_bio = md_io->orig_bio;
+
+	orig_bio->bi_status = bio->bi_status;
+
+	bio_end_io_acct(orig_bio, md_io->start_time);
+	bio_put(bio);
+	bio_endio(orig_bio);
+}
+
 static blk_qc_t md_submit_bio(struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
@@ -465,6 +484,30 @@ static blk_qc_t md_submit_bio(struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
+	/*
+	 * clone bio under conditions:
+	 * 1. QUEUE_FLAG_IO_STAT flag is set.
+	 * 2. bio just enters md and it is not split from personality.
+	 */
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue) &&
+	    (bio->bi_pool != &mddev->md_io_bs) &&
+	    (mddev->pers->accounting_bio &&
+	     mddev->pers->accounting_bio(mddev, bio))) {
+		struct md_io *md_io;
+		struct bio *clone;
+
+		clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
+
+		md_io = container_of(clone, struct md_io, bio_clone);
+		md_io->mddev = mddev;
+		md_io->orig_bio = bio;
+		md_io->start_time = bio_start_io_acct(bio);
+
+		clone->bi_end_io = md_end_io;
+		clone->bi_private = md_io;
+		bio = clone;
+	}
+
 	/* bio could be mergeable after passing to underlayer */
 	bio->bi_opf &= ~REQ_NOMERGE;
 
@@ -2340,9 +2383,12 @@ int md_integrity_register(struct mddev *mddev)
 			       bdev_get_integrity(reference->bdev));
 
 	pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
-	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
+	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
+	    bioset_integrity_create(&mddev->md_io_bs, BIO_POOL_SIZE)) {
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
+		bioset_exit(&mddev->bio_set);
+		bioset_exit(&mddev->md_io_bs);
 		return -EINVAL;
 	}
 	return 0;
@@ -5569,6 +5615,7 @@ static void md_free(struct kobject *ko)
 
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 	kfree(mddev);
 }
 
@@ -5864,6 +5911,12 @@ int md_run(struct mddev *mddev)
 		if (err)
 			return err;
 	}
+	if (!bioset_initialized(&mddev->md_io_bs)) {
+		err = bioset_init(&mddev->md_io_bs, BIO_POOL_SIZE,
+				  offsetof(struct md_io, bio_clone), 0);
+		if (err)
+			return err;
+	}
 
 	spin_lock(&pers_lock);
 	pers = find_pers(mddev->level, mddev->clevel);
@@ -6041,6 +6094,7 @@ int md_run(struct mddev *mddev)
 abort:
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
@@ -6264,6 +6318,7 @@ void md_stop(struct mddev *mddev)
 	__md_stop(mddev);
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5125ccf9df06..d2f476c427a9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -487,6 +487,7 @@ struct mddev {
 	struct bio_set			sync_set; /* for sync operations like
 						   * metadata and bitmap writes
 						   */
+	struct bio_set			md_io_bs; /* for io accounting */
 
 	/* Generic flush handling.
 	 * The last to finish preflush schedules a worker to submit
-- 
2.25.1




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux