[PATCH] mdraid: fix read/write bytes accounting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jeff Mahoney <jeffm@xxxxxxxx>

The i/o accounting published in /proc/diskstats for mdraid is currently
broken.  md_make_request does the accounting for every bio passed but
when a bio needs to be split, all the split bios are also submitted
through md_make_request, resulting in multiple accounting.

As a result, a quick test on a RAID6 volume displayed the following
behavior:

131072+0 records in
131072+0 records out
67108864 bytes (67 MB, 64 MiB) copied, 13.9227 s, 4.8 MB/s

... shows 131072 sectors read -- 64 MiB.  Correct.

512+0 records in
512+0 records out
67108864 bytes (67 MB, 64 MiB) copied, 0.287365 s, 234 MB/s

... shows 196608 sectors read -- 96 MiB.  With a 64k stripe size, we're
seeing some splitting.

512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 0.705004 s, 762 MB/s

... shows 4718624 sectors read -- 2.25 GiB transferred.  Lots of splitting
and a stats explosion.

The fix is to push the accounting into the personality make_request
callbacks so that only the bios actually submitted for I/O will be
accounted.

Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx>
---
 drivers/md/md-faulty.c    |  1 +
 drivers/md/md-linear.c    |  1 +
 drivers/md/md-multipath.c |  1 +
 drivers/md/md.c           | 21 +++++++++++++++------
 drivers/md/md.h           |  1 +
 drivers/md/raid0.c        |  1 +
 drivers/md/raid1.c        |  3 +++
 drivers/md/raid10.c       |  3 +++
 drivers/md/raid5.c        |  3 +++
 9 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index 50ad4ba86f0e..25e2c72ef920 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -214,6 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
 	} else
 		bio_set_dev(bio, conf->rdev->bdev);
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(bio);
 	return true;
 }
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 26c75c0199fa..ec0dbc0f1d76 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -271,6 +271,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		bio = split;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	bio_set_dev(bio, tmp_dev->rdev->bdev);
 	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
 		start_sector + data_offset;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 152f9e65a226..c3afe3f62a88 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -131,6 +131,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	mp_bh->bio.bi_private = mp_bh;
 	mddev_check_writesame(mddev, &mp_bh->bio);
 	mddev_check_write_zeroes(mddev, &mp_bh->bio);
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(&mp_bh->bio);
 	return true;
 }
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529..e8078a16419b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -463,10 +463,24 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_handle_request);
 
+/*
+ * This is generic_start_io_acct without the inflight tracking.  Since
+ * we can't reliably call generic_end_io_acct, the inflight counter
+ * would also be unreliable and it's better to keep it at 0.
+ */
+void md_io_acct(struct mddev *mddev, int sgrp, unsigned int sectors)
+{
+	part_stat_lock();
+	part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
+	part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
+	part_stat_unlock();
+
+}
+EXPORT_SYMBOL_GPL(md_io_acct);
+
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
-	const int sgrp = op_stat_group(bio_op(bio));
 	struct mddev *mddev = bio->bi_disk->private_data;
 	unsigned int sectors;
 
@@ -498,11 +512,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 
 	md_handle_request(mddev, bio);
 
-	part_stat_lock();
-	part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
-	part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
-	part_stat_unlock();
-
 	return BLK_QC_T_NONE;
 }
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 612814d07d35..5834427f7557 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -741,6 +741,7 @@ extern void mddev_suspend(struct mddev *mddev);
 extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   struct mddev *mddev);
+extern void md_io_acct(struct mddev *mddev, int sgrp, unsigned int sectors);
 
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 322386ff5d22..ac1109d1d48d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -633,6 +633,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 				disk_devt(mddev->gendisk), bio_sector);
 	mddev_check_writesame(mddev, bio);
 	mddev_check_write_zeroes(mddev, bio);
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(bio);
 	return true;
 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index dcd27f3da84e..f0d620e2b90f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1318,6 +1318,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->sectors = max_sectors;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	r1_bio->read_disk = rdisk;
 
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
@@ -1489,6 +1490,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->sectors = max_sectors;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
+
 	atomic_set(&r1_bio->remaining, 1);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ec136e44aef7..b8e8d7f67f65 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1202,6 +1202,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	}
 	slot = r10_bio->read_slot;
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
+
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r10_bio->devs[slot].bio = read_bio;
@@ -1485,6 +1487,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 		r10_bio->master_bio = bio;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	atomic_set(&r10_bio->remaining, 1);
 	md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ab8067f9ce8c..e57109e39fcd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5316,6 +5316,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 	if (!raid5_read_one_chunk(mddev, raid_bio))
 		return raid_bio;
 
+	md_io_acct(mddev, bio_op(raid_bio), bio_sectors(raid_bio));
 	return NULL;
 }
 
@@ -5634,6 +5635,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 
+	md_io_acct(mddev, bio_op(bi), bio_sectors(bi));
+
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		int previous;
-- 
2.16.4




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux