[md PATCH V3 4/4] Improve discard request for far layout

Xiao Ni <xni@xxxxxxxxxx> · Thu, 13 Aug 2020 14:44:47 +0800

For far layout, the discard region is not continuous on disks. So it needs
far copies r10bio to cover all regions. It needs a way to know all r10bios
have finish or not. Similar with raid10_sync_request, only the first r10bio
master_bio records the discard bio. Other r10bios master_bio record the first
r10bio. The first r10bio can finish after other r10bios finish and then return
the discard bio.

Signed-off-by: Xiao Ni <xni@xxxxxxxxxx>
---
 drivers/md/raid10.c | 89 ++++++++++++++++++++++++++++++++++++++---------------
 drivers/md/raid10.h |  1 +
 2 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5431e1b..97f673a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1582,6 +1582,27 @@ static struct bio *raid10_split_bio(struct r10conf *conf,
 	return bio;
 }
 
+static void raid_end_discard_bio(struct r10bio *r10bio)
+{
+	struct r10conf *conf = r10bio->mddev->private;
+
+	while (atomic_dec_and_test(&r10bio->remaining)) {
+
+		allow_barrier(conf);
+
+		if (test_bit(R10BIO_Discard, &r10bio->state)) {
+			md_write_end(r10bio->mddev);
+			bio_endio(r10bio->master_bio);
+			free_r10bio(r10bio);
+			break;
+		} else {
+			struct r10bio *first_r10bio = (struct r10bio *)r10bio->master_bio;
+			free_r10bio(r10bio);
+			r10bio = first_r10bio;
+		}
+	}
+}
+
 static void raid10_end_discard_request(struct bio *bio)
 {
 	struct r10bio *r10_bio = bio->bi_private;
@@ -1605,10 +1626,7 @@ static void raid10_end_discard_request(struct bio *bio)
 		rdev = conf->mirrors[dev].rdev;
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		md_write_end(r10_bio->mddev);
-		raid_end_bio_io(r10_bio);
-	}
+	raid_end_discard_bio(r10_bio);
 
 	rdev_dec_pending(rdev, conf->mddev);
 }
@@ -1622,7 +1640,9 @@ static bool raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;
 	struct geom geo = conf->geo;
-	struct r10bio *r10_bio;
+	struct r10bio *r10_bio, *first_r10bio;
+	int far_copies = geo.far_copies;
+	bool first_copy = true;
 
 	int disk;
 	sector_t chunk;
@@ -1649,9 +1669,9 @@ static bool raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	bio_end = bio_end_sector(bio);
 
 	/* Maybe one discard bio is smaller than strip size or across one stripe
-	 * and discard region is larger than one stripe size. For far offset layout,
-	 * if the discard region is not aligned with stripe size, there is hole
-	 * when we submit discard bio to member disk. For simplicity, we only
+	 * and discard region is larger than one stripe size. For far and far offset
+	 * layout, if the discard region is not aligned with stripe size, there is
+	 * hole when we submit discard bio to member disk. For simplicity, we only
 	 * handle discard bio which discard region is bigger than stripe_size*2
 	 */
 	if (bio_sectors(bio) < stripe_size*2)
@@ -1662,29 +1682,20 @@ static bool raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 		bio_end > conf->reshape_progress)
 		goto out;
 
-	/* For far offset layout, if bio is not aligned with stripe size, it splits
-	 * the part that is not aligned with strip size.
+	/* For far and far offset layout, if bio is not aligned with stripe size,
+	 * it splits the part that is not aligned with strip size.
 	 */
-	if (geo.far_offset && (bio_start & stripe_mask)) {
+	if ((far_copies > 1) && (bio_start & stripe_mask)) {
 		sector_t split_size;
 		split_size = round_up(bio_start, stripe_size) - bio_start;
 		bio = raid10_split_bio(conf, bio, split_size, false);
 	}
-	if (geo.far_offset && (bio_end & stripe_mask)) {
+	if ((far_copies > 1) && (bio_end & stripe_mask)) {
 		sector_t split_size;
 		split_size = bio_sectors(bio) - (bio_end & stripe_mask);
 		bio = raid10_split_bio(conf, bio, split_size, true);
 	}
 
-	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
-	r10_bio->mddev = mddev;
-	r10_bio->state = 0;
-	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks);
-
-	wait_blocked_dev(mddev, geo.raid_disks);
-
-	r10_bio->master_bio = bio;
-
 	bio_start = bio->bi_iter.bi_sector;
 	bio_end = bio_end_sector(bio);
 
@@ -1710,6 +1721,28 @@ static bool raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	end_disk_offset = (bio_end & geo.chunk_mask) +
 				(last_stripe_index << geo.chunk_shift);
 
+	wait_blocked_dev(mddev, geo.raid_disks);
+
+retry_discard:
+	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
+	r10_bio->mddev = mddev;
+	r10_bio->state = 0;
+	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks);
+
+	/* For far layout it needs more than one r10bio to cover all regions.
+	 * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
+	 * to record the discard bio. Other r10bio->master_bio record the first
+	 * r10bio. The first r10bio only release after all other r10bios finish.
+	 * The discard bio returns only first r10bio finishes
+	 */
+	if (first_copy) {
+		r10_bio->master_bio = bio;
+		set_bit(R10BIO_Discard, &r10_bio->state);
+		first_copy = false;
+		first_r10bio = r10_bio;
+	} else
+		r10_bio->master_bio = (struct bio *)first_r10bio;
+
 	rcu_read_lock();
 	for (disk = 0; disk < geo.raid_disks; disk++) {
 		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1796,11 +1829,19 @@ static bool raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 		}
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		md_write_end(r10_bio->mddev);
-		raid_end_bio_io(r10_bio);
+	if (!geo.far_offset && --far_copies) {
+		first_stripe_index += geo.stride >> geo.chunk_shift;
+		start_disk_offset += geo.stride;
+		last_stripe_index += geo.stride >> geo.chunk_shift;
+		end_disk_offset += geo.stride;
+		atomic_inc(&first_r10bio->remaining);
+		raid_end_discard_bio(r10_bio);
+		wait_barrier(conf);
+		goto retry_discard;
 	}
 
+	raid_end_discard_bio(r10_bio);
+
 	return 0;
 out:
 	allow_barrier(conf);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 79cd2b7..1461fd5 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -179,5 +179,6 @@ enum r10bio_state {
 	R10BIO_Previous,
 /* failfast devices did receive failfast requests. */
 	R10BIO_FailFast,
+	R10BIO_Discard,
 };
 #endif
-- 
2.7.5