[md PATCH 4/4] async_tx: fix asynchronous raid6 recovery for ddf layouts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The raid6 recovery code currently requires special handling of the
4-disk and 5-disk recovery scenarios for the native layout.  Quoting
from commit 0a82a623:

     In these situations the default N-disk algorithm will present
     0-source or 1-source operations to dma devices.  To cover for
     dma devices where the minimum source count is 2 we implement
     4-disk and 5-disk handling in the recovery code.

Recovery in the ddf layout case needs explicit handling of the 6-disk
and 7-disk recovery cases which pose the same problems as the ones
mentioned above.  Note that N-disks refers to N syndrome-disks not the
width of the array.

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
 crypto/async_tx/async_raid6_recov.c |   98 ++++++++++++++++++++++++++++-------
 crypto/async_tx/raid6test.c         |    6 +-
 drivers/md/raid5.c                  |    6 +-
 include/linux/async_tx.h            |    6 +-
 4 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 8e30b6e..54b39d2 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -132,7 +132,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 
 static struct dma_async_tx_descriptor *
 __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
-	      struct async_submit_ctl *submit)
+		bool is_ddf, struct async_submit_ctl *submit)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct page *p, *q, *a, *b;
@@ -143,8 +143,13 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
 	void *cb_param = submit->cb_param;
 	void *scribble = submit->scribble;
 
-	p = blocks[4-2];
-	q = blocks[4-1];
+	if (is_ddf) {
+		p = blocks[6-2];
+		q = blocks[6-1];
+	} else {
+		p = blocks[4-2];
+		q = blocks[4-1];
+	}
 
 	a = blocks[faila];
 	b = blocks[failb];
@@ -171,7 +176,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
 
 static struct dma_async_tx_descriptor *
 __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
-	      struct async_submit_ctl *submit)
+		bool is_ddf, struct async_submit_ctl *submit)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct page *p, *q, *g, *dp, *dq;
@@ -181,21 +186,34 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	dma_async_tx_callback cb_fn = submit->cb_fn;
 	void *cb_param = submit->cb_param;
 	void *scribble = submit->scribble;
-	int uninitialized_var(good);
+	int good = -1;
 	int i;
 
-	for (i = 0; i < 3; i++) {
-		if (i == faila || i == failb)
-			continue;
-		else {
+	if (is_ddf) {
+		/* a 7-disk ddf operation devolves to the 5-disk native
+		 * layout case modulo these fixups
+		 */
+		for (i = 0; i < 7-2; i++) {
+			if (blocks[i] == NULL)
+				continue;
+			if (i == faila || i == failb)
+				continue;
+			BUG_ON(good != -1);
 			good = i;
-			break;
 		}
+		p = blocks[7-2];
+		q = blocks[7-1];
+	} else {
+		for (i = 0; i < 5-2; i++) {
+			if (i == faila || i == failb)
+				continue;
+			BUG_ON(good != -1);
+			good = i;
+		}
+		p = blocks[5-2];
+		q = blocks[5-1];
 	}
-	BUG_ON(i >= 3);
-
-	p = blocks[5-2];
-	q = blocks[5-1];
+	BUG_ON(good == -1);
 	g = blocks[good];
 
 	/* Compute syndrome with zero for the missing data pages
@@ -317,11 +335,13 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
  * @faila: first failed drive index
  * @failb: second failed drive index
  * @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
  * @submit: submission/completion modifiers
  */
 struct dma_async_tx_descriptor *
 async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
-			struct page **blocks, struct async_submit_ctl *submit)
+			struct page **blocks, bool is_ddf,
+			struct async_submit_ctl *submit)
 {
 	BUG_ON(faila == failb);
 	if (failb < faila)
@@ -356,13 +376,23 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
 		 * operation (in contrast to the synchronous case), so
 		 * explicitly handle the 4 disk special case
 		 */
-		return __2data_recov_4(bytes, faila, failb, blocks, submit);
+		return __2data_recov_4(bytes, faila, failb, blocks, false, submit);
 	case 5:
 		/* dma devices do not uniformly understand a single
 		 * source pq operation (in contrast to the synchronous
 		 * case), so explicitly handle the 5 disk special case
 		 */
-		return __2data_recov_5(bytes, faila, failb, blocks, submit);
+		return __2data_recov_5(bytes, faila, failb, blocks, false, submit);
+	case 6:
+		if (is_ddf)
+			return __2data_recov_4(bytes, faila, failb, blocks,
+					       true, submit);
+		/* fall through */
+	case 7:
+		if (is_ddf)
+			return __2data_recov_5(bytes, faila, failb, blocks,
+					       true, submit);
+		/* fall through */
 	default:
 		return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
 	}
@@ -375,11 +405,13 @@ EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
  * @bytes: block size
  * @faila: failed drive index
  * @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
  * @submit: submission/completion modifiers
  */
 struct dma_async_tx_descriptor *
 async_raid6_datap_recov(int disks, size_t bytes, int faila,
-			struct page **blocks, struct async_submit_ctl *submit)
+			struct page **blocks, bool is_ddf,
+			struct async_submit_ctl *submit)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct page *p, *q, *dq;
@@ -423,10 +455,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 	blocks[faila] = NULL;
 	blocks[disks-1] = dq;
 
-	/* in the 4 disk case we only need to perform a single source
-	 * multiplication
+	/* in the 4-disk (or 6-disk ddf layout) case we only need to
+	 * perform a single source multiplication with the one good data
+	 * block.
 	 */
-	if (disks == 4) {
+	if (disks == 4 && !is_ddf) {
 		int good = faila == 0 ? 1 : 0;
 		struct page *g = blocks[good];
 
@@ -437,6 +470,29 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
 				  scribble);
 		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+	} else if (disks == 6 && is_ddf) {
+		struct page *g;
+		int good = -1;
+		int i;
+
+		for (i = 0; i < 6-2; i++) {
+			if (blocks[i] == NULL)
+				continue;
+			if (i == faila)
+				continue;
+			BUG_ON(good != -1);
+			good = i;
+		}
+		BUG_ON(good == -1);
+		g = blocks[good];
+
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
+		tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
+		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 	} else {
 		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
 				  scribble);
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 3ec27c7..9d7b3e1 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -108,11 +108,13 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
 		if (failb == disks-2) {
 			/* data+P failure. */
 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
-			tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+			tx = async_raid6_datap_recov(disks, bytes, faila, ptrs,
+						     false, &submit);
 		} else {
 			/* data+data failure. */
 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
-			tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+			tx = async_raid6_2data_recov(disks, bytes, faila, failb,
+						     ptrs, false, &submit);
 		}
 	}
 	init_completion(&cmp);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 81abefc..18d6ed4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -888,12 +888,14 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 			/* We're missing D+P. */
 			return async_raid6_datap_recov(syndrome_disks+2,
 						       STRIPE_SIZE, faila,
-						       blocks, &submit);
+						       blocks, sh->ddf_layout,
+						       &submit);
 		} else {
 			/* We're missing D+D. */
 			return async_raid6_2data_recov(syndrome_disks+2,
 						       STRIPE_SIZE, faila, failb,
-						       blocks, &submit);
+						       blocks, sh->ddf_layout,
+						       &submit);
 		}
 	}
 }
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index a1c486a..7520204 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -199,11 +199,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
 
 struct dma_async_tx_descriptor *
 async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
-			struct page **ptrs, struct async_submit_ctl *submit);
+			struct page **ptrs, bool is_ddf,
+			struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_raid6_datap_recov(int src_num, size_t bytes, int faila,
-			struct page **ptrs, struct async_submit_ctl *submit);
+			struct page **ptrs, bool is_ddf,
+			struct async_submit_ctl *submit);
 
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux