[patch 1/2 v2]RAID5: make stripe size configurable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



stripe size is 4k default. Bigger stripe size is considered harmful, because if
IO size is small, big stripe size can cause a lot of unnecessary IO/parity
calculation. But if upper layer always sends full stripe write to RAID5 array,
this drawback goes away. And bigger stripe size can improve performance
actually in this case because of bigger size IO and less stripes to handle. In
my full stripe write test case, 16k stripe size can improve throughput 40% -
120% depending on RAID5 configuration.

V2: use order-0 page allocation

Signed-off-by: Shaohua Li<shli@xxxxxxxxxxxx>
---
 drivers/md/raid5.c |  738 +++++++++++++++++++++++++++++++++++------------------
 drivers/md/raid5.h |    8 
 2 files changed, 502 insertions(+), 244 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2014-07-23 14:09:45.844570945 +0800
+++ linux/drivers/md/raid5.c	2014-07-23 14:09:45.836571048 +0800
@@ -70,9 +70,10 @@ static struct workqueue_struct *raid5_wq
  */
 
 #define NR_STRIPES		256
-#define STRIPE_SIZE		PAGE_SIZE
-#define STRIPE_SHIFT		(PAGE_SHIFT - 9)
-#define STRIPE_SECTORS		(STRIPE_SIZE>>9)
+#define STRIPE_SIZE(conf)	(PAGE_SIZE << conf->stripe_size_order)
+#define STRIPE_SHIFT(conf)	(PAGE_SHIFT - 9 + conf->stripe_size_order)
+#define STRIPE_SECTORS(conf)	(STRIPE_SIZE(conf) >> 9)
+#define STRIPE_PAGES(conf)	(1 << conf->stripe_size_order)
 #define	IO_THRESHOLD		1
 #define BYPASS_THRESHOLD	1
 #define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
@@ -81,13 +82,13 @@ static struct workqueue_struct *raid5_wq
 
 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
 {
-	int hash = (sect >> STRIPE_SHIFT) & HASH_MASK;
+	int hash = (sect >> STRIPE_SHIFT(conf)) & HASH_MASK;
 	return &conf->stripe_hashtbl[hash];
 }
 
-static inline int stripe_hash_locks_hash(sector_t sect)
+static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
 {
-	return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+	return (sect >> STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK;
 }
 
 static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
@@ -130,10 +131,10 @@ static inline void unlock_all_device_has
  * This function is used to determine the 'next' bio in the list, given the sector
  * of the current stripe+device
  */
-static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
+static inline struct bio *r5_next_bio(struct r5conf *conf, struct bio *bio, sector_t sector)
 {
 	int sectors = bio_sectors(bio);
-	if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
+	if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS(conf))
 		return bio->bi_next;
 	else
 		return NULL;
@@ -483,36 +484,51 @@ out:
 static void shrink_buffers(struct stripe_head *sh)
 {
 	struct page *p;
-	int i;
+	int i, j;
 	int num = sh->raid_conf->pool_size;
 
 	for (i = 0; i < num ; i++) {
-		WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
-		p = sh->dev[i].page;
-		if (!p)
-			continue;
-		sh->dev[i].page = NULL;
-		put_page(p);
+		for (j = 0; j < STRIPE_PAGES(sh->raid_conf); j++) {
+			p = sh->dev[i].orig_pages[j];
+			if (!p)
+				continue;
+			WARN_ON(sh->dev[i].pages[j] !=
+					sh->dev[i].orig_pages[j]);
+			put_page(p);
+			sh->dev[i].pages[j] = NULL;
+			sh->dev[i].orig_pages[j] = NULL;
+		}
 	}
 }
 
 static int grow_buffers(struct stripe_head *sh)
 {
-	int i;
+	int i, j;
 	int num = sh->raid_conf->pool_size;
 
 	for (i = 0; i < num; i++) {
 		struct page *page;
 
-		if (!(page = alloc_page(GFP_KERNEL))) {
-			return 1;
+		for (j = 0; j < STRIPE_PAGES(sh->raid_conf); j++) {
+			page = alloc_page(GFP_KERNEL);
+			if (!page)
+				return 1;
+			sh->dev[i].pages[j] = page;
+			sh->dev[i].orig_pages[j] = page;
 		}
-		sh->dev[i].page = page;
-		sh->dev[i].orig_page = page;
 	}
 	return 0;
 }
 
+static void reset_stripe_devpage(struct stripe_head *sh, int i)
+{
+	struct r5conf *conf = sh->raid_conf;
+	int j;
+
+	for (j = 0; j < STRIPE_PAGES(conf); j++)
+		sh->dev[i].pages[j] = sh->dev[i].orig_pages[j];
+}
+
 static void raid5_build_block(struct stripe_head *sh, int i, int previous);
 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
 			    struct stripe_head *sh);
@@ -659,7 +675,7 @@ get_active_stripe(struct r5conf *conf, s
 		  int previous, int noblock, int noquiesce)
 {
 	struct stripe_head *sh;
-	int hash = stripe_hash_locks_hash(sector);
+	int hash = stripe_hash_locks_hash(conf, sector);
 
 	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -740,7 +756,7 @@ raid5_end_write_request(struct bio *bi,
 static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 {
 	struct r5conf *conf = sh->raid_conf;
-	int i, disks = sh->disks;
+	int i, disks = sh->disks, j;
 
 	might_sleep();
 
@@ -808,7 +824,7 @@ static void ops_run_io(struct stripe_hea
 		       test_bit(WriteErrorSeen, &rdev->flags)) {
 			sector_t first_bad;
 			int bad_sectors;
-			int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+			int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS(conf),
 					      &first_bad, &bad_sectors);
 			if (!bad)
 				break;
@@ -840,7 +856,7 @@ static void ops_run_io(struct stripe_hea
 		if (rdev) {
 			if (s->syncing || s->expanding || s->expanded
 			    || s->replacing)
-				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+				md_sync_acct(rdev->bdev, STRIPE_SECTORS(conf));
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
@@ -867,11 +883,12 @@ static void ops_run_io(struct stripe_hea
 
 			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
 				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
-			sh->dev[i].vec.bv_page = sh->dev[i].page;
-			bi->bi_vcnt = 1;
-			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
-			bi->bi_io_vec[0].bv_offset = 0;
-			bi->bi_iter.bi_size = STRIPE_SIZE;
+
+			bi->bi_max_vecs = 1 << conf->stripe_size_order;
+			bi->bi_io_vec = sh->dev[i].vecs;
+
+			for (j = 0; j < STRIPE_PAGES(conf); j++)
+				bio_add_page(bi, sh->dev[i].pages[j], PAGE_SIZE, 0);
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -890,7 +907,7 @@ static void ops_run_io(struct stripe_hea
 		if (rrdev) {
 			if (s->syncing || s->expanding || s->expanded
 			    || s->replacing)
-				md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
+				md_sync_acct(rrdev->bdev, STRIPE_SECTORS(conf));
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
@@ -914,11 +931,12 @@ static void ops_run_io(struct stripe_hea
 						  + rrdev->data_offset);
 			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
 				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
-			sh->dev[i].rvec.bv_page = sh->dev[i].page;
-			rbi->bi_vcnt = 1;
-			rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
-			rbi->bi_io_vec[0].bv_offset = 0;
-			rbi->bi_iter.bi_size = STRIPE_SIZE;
+
+			rbi->bi_max_vecs = 1 << conf->stripe_size_order;
+			rbi->bi_io_vec = sh->dev[i].rvecs;
+
+			for (j = 0; j < STRIPE_PAGES(conf); j++)
+				bio_add_page(rbi, sh->dev[i].pages[j], PAGE_SIZE, 0);
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -943,7 +961,7 @@ static void ops_run_io(struct stripe_hea
 }
 
 static struct dma_async_tx_descriptor *
-async_copy_data(int frombio, struct bio *bio, struct page **page,
+async_copy_one_page(int frombio, struct bio *bio, struct page **page,
 	sector_t sector, struct dma_async_tx_descriptor *tx,
 	struct stripe_head *sh)
 {
@@ -974,8 +992,8 @@ async_copy_data(int frombio, struct bio
 			len -= b_offset;
 		}
 
-		if (len > 0 && page_offset + len > STRIPE_SIZE)
-			clen = STRIPE_SIZE - page_offset;
+		if (len > 0 && page_offset + len > PAGE_SIZE)
+			clen = PAGE_SIZE - page_offset;
 		else
 			clen = len;
 
@@ -985,7 +1003,7 @@ async_copy_data(int frombio, struct bio
 			if (frombio) {
 				if (sh->raid_conf->skip_copy &&
 				    b_offset == 0 && page_offset == 0 &&
-				    clen == STRIPE_SIZE)
+				    clen == PAGE_SIZE)
 					*page = bio_page;
 				else
 					tx = async_memcpy(*page, bio_page, page_offset,
@@ -997,14 +1015,42 @@ async_copy_data(int frombio, struct bio
 		/* chain the operations */
 		submit.depend_tx = tx;
 
-		if (clen < len) /* hit end of page */
-			break;
 		page_offset +=  len;
+		/* hit end of page */
+		if (page_offset > 0 && (page_offset % PAGE_SIZE) == 0)
+			break;
 	}
 
 	return tx;
 }
 
+static struct dma_async_tx_descriptor *
+async_copy_data(int frombio, struct bio *bio, struct page **pages,
+	sector_t sector, struct dma_async_tx_descriptor *tx,
+	struct stripe_head *sh, int *skip_copy)
+{
+	sector_t offset;
+	struct page **cur_page, *tmp;
+
+	*skip_copy = 0;
+	if (sector > bio->bi_iter.bi_sector)
+		offset = sector;
+	else {
+		offset = bio->bi_iter.bi_sector >> 3;
+		offset <<= 3;
+	}
+	while (offset < bio_end_sector(bio) &&
+	       offset < sector + STRIPE_SECTORS(sh->raid_conf)) {
+		cur_page = &pages[(offset - sector) >> 3];
+		tmp = *cur_page;
+		tx = async_copy_one_page(frombio, bio, cur_page, offset, tx, sh);
+		if (tmp != *cur_page)
+			*skip_copy = 1;
+		offset += PAGE_SIZE >> 9;
+	}
+	return tx;
+}
+
 static void ops_complete_biofill(void *stripe_head_ref)
 {
 	struct stripe_head *sh = stripe_head_ref;
@@ -1030,8 +1076,8 @@ static void ops_complete_biofill(void *s
 			rbi = dev->read;
 			dev->read = NULL;
 			while (rbi && rbi->bi_iter.bi_sector <
-				dev->sector + STRIPE_SECTORS) {
-				rbi2 = r5_next_bio(rbi, dev->sector);
+				dev->sector + STRIPE_SECTORS(sh->raid_conf)) {
+				rbi2 = r5_next_bio(sh->raid_conf, rbi, dev->sector);
 				if (!raid5_dec_bi_active_stripes(rbi)) {
 					rbi->bi_next = return_bi;
 					return_bi = rbi;
@@ -1052,7 +1098,7 @@ static void ops_run_biofill(struct strip
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct async_submit_ctl submit;
-	int i;
+	int i, dummy;
 
 	pr_debug("%s: stripe %llu\n", __func__,
 		(unsigned long long)sh->sector);
@@ -1066,10 +1112,10 @@ static void ops_run_biofill(struct strip
 			dev->toread = NULL;
 			spin_unlock_irq(&sh->stripe_lock);
 			while (rbi && rbi->bi_iter.bi_sector <
-				dev->sector + STRIPE_SECTORS) {
-				tx = async_copy_data(0, rbi, &dev->page,
-					dev->sector, tx, sh);
-				rbi = r5_next_bio(rbi, dev->sector);
+				dev->sector + STRIPE_SECTORS(sh->raid_conf)) {
+				tx = async_copy_data(0, rbi, dev->pages,
+					dev->sector, tx, sh, &dummy);
+				rbi = r5_next_bio(sh->raid_conf, rbi, dev->sector);
 			}
 		}
 	}
@@ -1112,40 +1158,64 @@ static void ops_complete_compute(void *s
 
 /* return a pointer to the address conversion region of the scribble buffer */
 static addr_conv_t *to_addr_conv(struct stripe_head *sh,
-				 struct raid5_percpu *percpu)
+				 struct raid5_percpu *percpu, int page_index)
+{
+
+	return percpu->scribble + sizeof(struct page *) * (sh->disks + 2) +
+		page_index * (sh->raid_conf->scribble_len /
+		STRIPE_PAGES(sh->raid_conf));
+}
+
+static struct page **to_scribble_page(struct stripe_head *sh,
+				struct raid5_percpu *percpu, int page_index)
 {
-	return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+	return percpu->scribble + page_index * (sh->raid_conf->scribble_len /
+		STRIPE_PAGES(sh->raid_conf));
 }
 
 static struct dma_async_tx_descriptor *
 ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
 	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
+	struct page **xor_srcs;
 	int target = sh->ops.target;
 	struct r5dev *tgt = &sh->dev[target];
-	struct page *xor_dest = tgt->page;
-	int count = 0;
-	struct dma_async_tx_descriptor *tx;
+	struct page *xor_dest;
+	int count;
+	struct dma_async_tx_descriptor *tx = NULL;
 	struct async_submit_ctl submit;
-	int i;
+	int i, j = 0;
 
 	pr_debug("%s: stripe %llu block: %d\n",
 		__func__, (unsigned long long)sh->sector, target);
 	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
 
+again:
+	count = 0;
+	xor_srcs = to_scribble_page(sh, percpu, j);
+	xor_dest = tgt->pages[j];
+
 	for (i = disks; i--; )
 		if (i != target)
-			xor_srcs[count++] = sh->dev[i].page;
+			xor_srcs[count++] = sh->dev[i].pages[j];
 
-	atomic_inc(&sh->count);
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1) {
+		atomic_inc(&sh->count);
+
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
+			ops_complete_compute, sh, to_addr_conv(sh, percpu, j));
+	} else
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
+				  NULL, NULL, to_addr_conv(sh, percpu, j));
 
-	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
-			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
 	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, PAGE_SIZE, &submit);
 	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+		tx = async_xor(xor_dest, xor_srcs, 0, count, PAGE_SIZE, &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 
 	return tx;
 }
@@ -1159,7 +1229,8 @@ ops_run_compute5(struct stripe_head *sh,
  * destination buffer is recorded in srcs[count] and the Q destination
  * is recorded in srcs[count+1]].
  */
-static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh,
+	int page_index)
 {
 	int disks = sh->disks;
 	int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
@@ -1175,7 +1246,7 @@ static int set_syndrome_sources(struct p
 	do {
 		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 
-		srcs[slot] = sh->dev[i].page;
+		srcs[slot] = sh->dev[i].pages[page_index];
 		i = raid6_next_disk(i, disks);
 	} while (i != d0_idx);
 
@@ -1186,14 +1257,14 @@ static struct dma_async_tx_descriptor *
 ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
 	int disks = sh->disks;
-	struct page **blocks = percpu->scribble;
+	struct page **blocks;
 	int target;
 	int qd_idx = sh->qd_idx;
-	struct dma_async_tx_descriptor *tx;
+	struct dma_async_tx_descriptor *tx = NULL;
 	struct async_submit_ctl submit;
 	struct r5dev *tgt;
 	struct page *dest;
-	int i;
+	int i, j = 0;
 	int count;
 
 	if (sh->ops.target < 0)
@@ -1209,40 +1280,57 @@ ops_run_compute6_1(struct stripe_head *s
 
 	tgt = &sh->dev[target];
 	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
-	dest = tgt->page;
 
-	atomic_inc(&sh->count);
+again:
+	dest = tgt->pages[j];
+	blocks = to_scribble_page(sh, percpu, j);
+
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+		atomic_inc(&sh->count);
 
 	if (target == qd_idx) {
-		count = set_syndrome_sources(blocks, sh);
+		count = set_syndrome_sources(blocks, sh, j);
 		blocks[count] = NULL; /* regenerating p is not necessary */
 		BUG_ON(blocks[count+1] != dest); /* q should already be set */
-		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
-				  ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
-		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+
+		if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+ 				  ops_complete_compute, sh,
+				  to_addr_conv(sh, percpu, j));
+		else
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+				  NULL, NULL, to_addr_conv(sh, percpu, j));
+		tx = async_gen_syndrome(blocks, 0, count+2, PAGE_SIZE, &submit);
 	} else {
 		/* Compute any data- or p-drive using XOR */
 		count = 0;
 		for (i = disks; i-- ; ) {
 			if (i == target || i == qd_idx)
 				continue;
-			blocks[count++] = sh->dev[i].page;
+			blocks[count++] = sh->dev[i].pages[j];
 		}
 
-		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
-				  NULL, ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
-		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+		if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+			init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+				  tx, ops_complete_compute, sh,
+				  to_addr_conv(sh, percpu, j));
+		else
+			init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+				  tx, NULL, NULL,
+				  to_addr_conv(sh, percpu, j));
+		tx = async_xor(dest, blocks, 0, count, PAGE_SIZE, &submit);
 	}
 
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 	return tx;
 }
 
 static struct dma_async_tx_descriptor *
 ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
-	int i, count, disks = sh->disks;
+	int i, count, disks = sh->disks, j = 0;
 	int syndrome_disks = sh->ddf_layout ? disks : disks-2;
 	int d0_idx = raid6_d0(sh);
 	int faila = -1, failb = -1;
@@ -1250,8 +1338,8 @@ ops_run_compute6_2(struct stripe_head *s
 	int target2 = sh->ops.target2;
 	struct r5dev *tgt = &sh->dev[target];
 	struct r5dev *tgt2 = &sh->dev[target2];
-	struct dma_async_tx_descriptor *tx;
-	struct page **blocks = percpu->scribble;
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page **blocks;
 	struct async_submit_ctl submit;
 
 	pr_debug("%s: stripe %llu block1: %d block2: %d\n",
@@ -1260,6 +1348,8 @@ ops_run_compute6_2(struct stripe_head *s
 	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
 	BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
 
+again:
+	blocks = to_scribble_page(sh, percpu, j);
 	/* we need to open-code set_syndrome_sources to handle the
 	 * slot number conversion for 'faila' and 'failb'
 	 */
@@ -1270,7 +1360,7 @@ ops_run_compute6_2(struct stripe_head *s
 	do {
 		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 
-		blocks[slot] = sh->dev[i].page;
+		blocks[slot] = sh->dev[i].pages[j];
 
 		if (i == target)
 			faila = slot;
@@ -1285,17 +1375,23 @@ ops_run_compute6_2(struct stripe_head *s
 	pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
 		 __func__, (unsigned long long)sh->sector, faila, failb);
 
-	atomic_inc(&sh->count);
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+		atomic_inc(&sh->count);
 
 	if (failb == syndrome_disks+1) {
 		/* Q disk is one of the missing disks */
 		if (faila == syndrome_disks) {
 			/* Missing P+Q, just recompute */
-			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+			if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+				init_async_submit(&submit, ASYNC_TX_FENCE, tx,
 					  ops_complete_compute, sh,
-					  to_addr_conv(sh, percpu));
-			return async_gen_syndrome(blocks, 0, syndrome_disks+2,
-						  STRIPE_SIZE, &submit);
+					  to_addr_conv(sh, percpu, j));
+			else
+				init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+					  NULL, NULL,
+					  to_addr_conv(sh, percpu, j));
+			tx = async_gen_syndrome(blocks, 0, syndrome_disks+2,
+						  PAGE_SIZE, &submit);
 		} else {
 			struct page *dest;
 			int data_target;
@@ -1311,39 +1407,55 @@ ops_run_compute6_2(struct stripe_head *s
 			for (i = disks; i-- ; ) {
 				if (i == data_target || i == qd_idx)
 					continue;
-				blocks[count++] = sh->dev[i].page;
+				blocks[count++] = sh->dev[i].pages[j];
 			}
-			dest = sh->dev[data_target].page;
+			dest = sh->dev[data_target].pages[j];
 			init_async_submit(&submit,
 					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
-					  NULL, NULL, NULL,
-					  to_addr_conv(sh, percpu));
-			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+					  tx, NULL, NULL,
+					  to_addr_conv(sh, percpu, j));
+			tx = async_xor(dest, blocks, 0, count, PAGE_SIZE,
 				       &submit);
 
-			count = set_syndrome_sources(blocks, sh);
-			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+			count = set_syndrome_sources(blocks, sh, j);
+			if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+				init_async_submit(&submit, ASYNC_TX_FENCE, tx,
 					  ops_complete_compute, sh,
-					  to_addr_conv(sh, percpu));
-			return async_gen_syndrome(blocks, 0, count+2,
-						  STRIPE_SIZE, &submit);
+					  to_addr_conv(sh, percpu, j));
+			else
+				init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+					  NULL, NULL,
+					  to_addr_conv(sh, percpu, j));
+			tx = async_gen_syndrome(blocks, 0, count+2,
+						  PAGE_SIZE, &submit);
 		}
 	} else {
-		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+		if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
 				  ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
+				  to_addr_conv(sh, percpu, j));
+		else
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+				  NULL, NULL, to_addr_conv(sh, percpu, j));
+
 		if (failb == syndrome_disks) {
 			/* We're missing D+P. */
-			return async_raid6_datap_recov(syndrome_disks+2,
-						       STRIPE_SIZE, faila,
+			tx = async_raid6_datap_recov(syndrome_disks+2,
+						       PAGE_SIZE, faila,
 						       blocks, &submit);
 		} else {
 			/* We're missing D+D. */
-			return async_raid6_2data_recov(syndrome_disks+2,
-						       STRIPE_SIZE, faila, failb,
+			tx = async_raid6_2data_recov(syndrome_disks+2,
+						       PAGE_SIZE, faila, failb,
 						       blocks, &submit);
 		}
 	}
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
+
+	return tx;
 }
 
 
@@ -1360,26 +1472,40 @@ ops_run_prexor(struct stripe_head *sh, s
 	       struct dma_async_tx_descriptor *tx)
 {
 	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
-	int count = 0, pd_idx = sh->pd_idx, i;
+	struct page **xor_srcs;
+	int count, pd_idx = sh->pd_idx, i, j = 0;
 	struct async_submit_ctl submit;
 
 	/* existing parity data subtracted */
-	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+	struct page *xor_dest;
 
 	pr_debug("%s: stripe %llu\n", __func__,
 		(unsigned long long)sh->sector);
 
+again:
+	count = 0;
+	xor_srcs = to_scribble_page(sh, percpu, j);
+	/* existing parity data subtracted */
+	xor_dest = xor_srcs[count++] = sh->dev[pd_idx].pages[j];
+
 	for (i = disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
 		/* Only process blocks that are known to be uptodate */
 		if (test_bit(R5_Wantdrain, &dev->flags))
-			xor_srcs[count++] = dev->page;
+			xor_srcs[count++] = dev->pages[j];
 	}
 
-	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
-			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
-	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1)
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  ops_complete_prexor, sh, to_addr_conv(sh, percpu, j));
+	else
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, to_addr_conv(sh, percpu, j));
+	tx = async_xor(xor_dest, xor_srcs, 0, count, PAGE_SIZE, &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 
 	return tx;
 }
@@ -1406,10 +1532,10 @@ ops_run_biodrain(struct stripe_head *sh,
 			BUG_ON(dev->written);
 			wbi = dev->written = chosen;
 			spin_unlock_irq(&sh->stripe_lock);
-			WARN_ON(dev->page != dev->orig_page);
+			WARN_ON(dev->pages[0] != dev->orig_pages[0]);
 
 			while (wbi && wbi->bi_iter.bi_sector <
-				dev->sector + STRIPE_SECTORS) {
+				dev->sector + STRIPE_SECTORS(sh->raid_conf)) {
 				if (wbi->bi_rw & REQ_FUA)
 					set_bit(R5_WantFUA, &dev->flags);
 				if (wbi->bi_rw & REQ_SYNC)
@@ -1417,15 +1543,16 @@ ops_run_biodrain(struct stripe_head *sh,
 				if (wbi->bi_rw & REQ_DISCARD)
 					set_bit(R5_Discard, &dev->flags);
 				else {
-					tx = async_copy_data(1, wbi, &dev->page,
-						dev->sector, tx, sh);
-					if (dev->page != dev->orig_page) {
+					int skip_copy;
+					tx = async_copy_data(1, wbi, dev->pages,
+						dev->sector, tx, sh, &skip_copy);
+					if (skip_copy) {
 						set_bit(R5_SkipCopy, &dev->flags);
 						clear_bit(R5_UPTODATE, &dev->flags);
 						clear_bit(R5_OVERWRITE, &dev->flags);
 					}
 				}
-				wbi = r5_next_bio(wbi, dev->sector);
+				wbi = r5_next_bio(sh->raid_conf, wbi, dev->sector);
 			}
 		}
 	}
@@ -1482,9 +1609,9 @@ ops_run_reconstruct5(struct stripe_head
 		     struct dma_async_tx_descriptor *tx)
 {
 	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
+	struct page **xor_srcs;
 	struct async_submit_ctl submit;
-	int count = 0, pd_idx = sh->pd_idx, i;
+	int count, pd_idx = sh->pd_idx, i, j = 0;
 	struct page *xor_dest;
 	int prexor = 0;
 	unsigned long flags;
@@ -1504,23 +1631,27 @@ ops_run_reconstruct5(struct stripe_head
 		ops_complete_reconstruct(sh);
 		return;
 	}
+
+again:
+	count = 0;
+	xor_srcs = to_scribble_page(sh, percpu, j);
 	/* check if prexor is active which means only process blocks
 	 * that are part of a read-modify-write (written)
 	 */
 	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
 		prexor = 1;
-		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].pages[j];
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if (dev->written)
-				xor_srcs[count++] = dev->page;
+				xor_srcs[count++] = dev->pages[j];
 		}
 	} else {
-		xor_dest = sh->dev[pd_idx].page;
+		xor_dest = sh->dev[pd_idx].pages[j];
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if (i != pd_idx)
-				xor_srcs[count++] = dev->page;
+				xor_srcs[count++] = dev->pages[j];
 		}
 	}
 
@@ -1529,17 +1660,28 @@ ops_run_reconstruct5(struct stripe_head
 	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
 	 * for the synchronous xor case
 	 */
-	flags = ASYNC_TX_ACK |
-		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1) {
+		flags = ASYNC_TX_ACK |
+			(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
 
-	atomic_inc(&sh->count);
+		atomic_inc(&sh->count);
+
+		init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+			  to_addr_conv(sh, percpu, j));
+	} else {
+		flags = prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST;
+		init_async_submit(&submit, flags, tx, NULL, NULL,
+			  to_addr_conv(sh, percpu, j));
+	}
 
-	init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
-			  to_addr_conv(sh, percpu));
 	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, PAGE_SIZE, &submit);
 	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+		tx = async_xor(xor_dest, xor_srcs, 0, count, PAGE_SIZE, &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 }
 
 static void
@@ -1547,8 +1689,8 @@ ops_run_reconstruct6(struct stripe_head
 		     struct dma_async_tx_descriptor *tx)
 {
 	struct async_submit_ctl submit;
-	struct page **blocks = percpu->scribble;
-	int count, i;
+	struct page **blocks;
+	int count, i, j = 0;
 
 	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
 
@@ -1566,22 +1708,38 @@ ops_run_reconstruct6(struct stripe_head
 		return;
 	}
 
-	count = set_syndrome_sources(blocks, sh);
+again:
+	blocks = to_scribble_page(sh, percpu, j);
 
-	atomic_inc(&sh->count);
+	count = set_syndrome_sources(blocks, sh, j);
+
+	if (j == STRIPE_PAGES(sh->raid_conf) - 1) {
+		atomic_inc(&sh->count);
 
-	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
-			  sh, to_addr_conv(sh, percpu));
-	async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+		init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+			  sh, to_addr_conv(sh, percpu, j));
+	} else
+		init_async_submit(&submit, 0, tx, NULL,
+			  NULL, to_addr_conv(sh, percpu, j));
+	tx = async_gen_syndrome(blocks, 0, count+2, PAGE_SIZE,  &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 }
 
 static void ops_complete_check(void *stripe_head_ref)
 {
 	struct stripe_head *sh = stripe_head_ref;
+	int i;
 
 	pr_debug("%s: stripe %llu\n", __func__,
 		(unsigned long long)sh->sector);
 
+	sh->ops.zero_sum_result = 0;
+	for (i = 0; i < STRIPE_PAGES(sh->raid_conf); i++)
+		sh->ops.zero_sum_result |= sh->ops.sum_results[i];
+
 	sh->check_state = check_state_check_result;
 	set_bit(STRIPE_HANDLE, &sh->state);
 	release_stripe(sh);
@@ -1593,28 +1751,34 @@ static void ops_run_check_p(struct strip
 	int pd_idx = sh->pd_idx;
 	int qd_idx = sh->qd_idx;
 	struct page *xor_dest;
-	struct page **xor_srcs = percpu->scribble;
-	struct dma_async_tx_descriptor *tx;
+	struct page **xor_srcs;
+	struct dma_async_tx_descriptor *tx = NULL;
 	struct async_submit_ctl submit;
 	int count;
-	int i;
+	int i, j = 0;
 
 	pr_debug("%s: stripe %llu\n", __func__,
 		(unsigned long long)sh->sector);
 
+again:
+	xor_srcs = to_scribble_page(sh, percpu, j);
 	count = 0;
-	xor_dest = sh->dev[pd_idx].page;
+	xor_dest = sh->dev[pd_idx].pages[j];
 	xor_srcs[count++] = xor_dest;
 	for (i = disks; i--; ) {
 		if (i == pd_idx || i == qd_idx)
 			continue;
-		xor_srcs[count++] = sh->dev[i].page;
+		xor_srcs[count++] = sh->dev[i].pages[j];
 	}
 
-	init_async_submit(&submit, 0, NULL, NULL, NULL,
-			  to_addr_conv(sh, percpu));
-	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			   &sh->ops.zero_sum_result, &submit);
+	init_async_submit(&submit, 0, tx, NULL, NULL,
+			  to_addr_conv(sh, percpu, j));
+	tx = async_xor_val(xor_dest, xor_srcs, 0, count, PAGE_SIZE,
+			   &sh->ops.sum_results[j], &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
 
 	atomic_inc(&sh->count);
 	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
@@ -1623,22 +1787,32 @@ static void ops_run_check_p(struct strip
 
 static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
 {
-	struct page **srcs = percpu->scribble;
+	struct page **srcs;
 	struct async_submit_ctl submit;
-	int count;
+	int count, j = 0;
+	struct dma_async_tx_descriptor *tx = NULL;
 
 	pr_debug("%s: stripe %llu checkp: %d\n", __func__,
 		(unsigned long long)sh->sector, checkp);
 
-	count = set_syndrome_sources(srcs, sh);
+again:
+	srcs = to_scribble_page(sh, percpu, j);
+	count = set_syndrome_sources(srcs, sh, j);
 	if (!checkp)
 		srcs[count] = NULL;
 
-	atomic_inc(&sh->count);
-	init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
-			  sh, to_addr_conv(sh, percpu));
-	async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
-			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);
+	init_async_submit(&submit, 0, tx, NULL,
+			  NULL, to_addr_conv(sh, percpu, j));
+	async_syndrome_val(srcs, 0, count+2, PAGE_SIZE,
+			   &sh->ops.sum_results[j], percpu->spare_pages[j], &submit);
+
+	j++;
+	if (j < STRIPE_PAGES(sh->raid_conf))
+		goto again;
+
+ 	atomic_inc(&sh->count);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+	tx = async_trigger_callback(&submit);
 }
 
 static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
@@ -1706,6 +1880,37 @@ static void raid_run_ops(struct stripe_h
 	put_cpu();
 }
 
+#define STRIPE_ALLOC_SIZE(conf, devs) \
+	(sizeof(struct stripe_head) + (devs - 1) * sizeof(struct r5dev) + \
+	 sizeof(enum sum_check_flags) * STRIPE_PAGES(conf) + \
+	 sizeof(struct bio_vec) * devs * STRIPE_PAGES(conf) * 2 + \
+	 sizeof(struct page *) * devs * STRIPE_PAGES(conf) * 2)
+
+static void init_stripe_pointer(struct r5conf *conf, struct stripe_head *sh, int devs)
+{
+	void *p = sh;
+	struct bio_vec *vecs, *rvecs;
+	struct page **pages, **orig_pages;
+	int i;
+
+	p += sizeof(struct stripe_head) + (devs - 1) * sizeof(struct r5dev);
+	sh->ops.sum_results = p;
+	p += sizeof(enum sum_check_flags) * STRIPE_PAGES(conf);
+	vecs = p;
+	p += sizeof(struct bio_vec) * devs * STRIPE_PAGES(conf);
+	rvecs = p;
+	p += sizeof(struct bio_vec) * devs * STRIPE_PAGES(conf);
+	pages = p;
+	p += sizeof(struct page *) * devs * STRIPE_PAGES(conf);
+	orig_pages = p;
+	for (i = 0; i < devs; i++) {
+		sh->dev[i].vecs = vecs + i * STRIPE_PAGES(conf);
+		sh->dev[i].rvecs = rvecs + i * STRIPE_PAGES(conf);
+		sh->dev[i].pages = pages + i * STRIPE_PAGES(conf);
+		sh->dev[i].orig_pages = orig_pages + i * STRIPE_PAGES(conf);
+	}
+}
+
 static int grow_one_stripe(struct r5conf *conf, int hash)
 {
 	struct stripe_head *sh;
@@ -1713,6 +1918,7 @@ static int grow_one_stripe(struct r5conf
 	if (!sh)
 		return 0;
 
+	init_stripe_pointer(conf, sh, conf->pool_size);
 	sh->raid_conf = conf;
 
 	spin_lock_init(&sh->stripe_lock);
@@ -1747,7 +1953,7 @@ static int grow_stripes(struct r5conf *c
 
 	conf->active_name = 0;
 	sc = kmem_cache_create(conf->cache_name[conf->active_name],
-			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
+			       STRIPE_ALLOC_SIZE(conf, devs),
 			       0, 0, NULL);
 	if (!sc)
 		return 1;
@@ -1776,11 +1982,12 @@ static int grow_stripes(struct r5conf *c
  * calculate over all devices (not just the data blocks), using zeros in place
  * of the P and Q blocks.
  */
-static size_t scribble_len(int num)
+static size_t scribble_len(struct r5conf *conf, int num)
 {
 	size_t len;
 
 	len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+	len *= STRIPE_PAGES(conf);
 
 	return len;
 }
@@ -1816,7 +2023,7 @@ static int resize_stripes(struct r5conf
 	unsigned long cpu;
 	int err;
 	struct kmem_cache *sc;
-	int i;
+	int i, j;
 	int hash, cnt;
 
 	if (newsize <= conf->pool_size)
@@ -1828,7 +2035,7 @@ static int resize_stripes(struct r5conf
 
 	/* Step 1 */
 	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
-			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+			       STRIPE_ALLOC_SIZE(conf, newsize),
 			       0, 0, NULL);
 	if (!sc)
 		return -ENOMEM;
@@ -1838,6 +2045,8 @@ static int resize_stripes(struct r5conf
 		if (!nsh)
 			break;
 
+		init_stripe_pointer(conf, nsh, newsize);
+
 		nsh->raid_conf = conf;
 		spin_lock_init(&nsh->stripe_lock);
 
@@ -1869,11 +2078,17 @@ static int resize_stripes(struct r5conf
 		unlock_device_hash_lock(conf, hash);
 		atomic_set(&nsh->count, 1);
 		for(i=0; i<conf->pool_size; i++) {
-			nsh->dev[i].page = osh->dev[i].page;
-			nsh->dev[i].orig_page = osh->dev[i].page;
+			for (j = 0; j < STRIPE_PAGES(conf); j++) {
+				nsh->dev[i].pages[j] = osh->dev[i].pages[j];
+				nsh->dev[i].orig_pages[j] = osh->dev[i].orig_pages[j];
+			}
+		}
+		for( ; i < newsize; i++) {
+			for (j = 0; j < STRIPE_PAGES(conf); j++) {
+				nsh->dev[i].pages[j] = NULL;
+				nsh->dev[i].orig_pages[j] = NULL;
+			}
 		}
-		for( ; i<newsize; i++)
-			nsh->dev[i].page = NULL;
 		nsh->hash_lock_index = hash;
 		kmem_cache_free(conf->slab_cache, osh);
 		cnt++;
@@ -1900,7 +2115,7 @@ static int resize_stripes(struct r5conf
 		err = -ENOMEM;
 
 	get_online_cpus();
-	conf->scribble_len = scribble_len(newsize);
+	conf->scribble_len = scribble_len(conf, newsize);
 	for_each_present_cpu(cpu) {
 		struct raid5_percpu *percpu;
 		void *scribble;
@@ -1923,14 +2138,21 @@ static int resize_stripes(struct r5conf
 		nsh = list_entry(newstripes.next, struct stripe_head, lru);
 		list_del_init(&nsh->lru);
 
-		for (i=conf->raid_disks; i < newsize; i++)
-			if (nsh->dev[i].page == NULL) {
-				struct page *p = alloc_page(GFP_NOIO);
-				nsh->dev[i].page = p;
-				nsh->dev[i].orig_page = p;
-				if (!p)
+		for (i=conf->raid_disks; i < newsize; i++) {
+			for (j = 0; j < STRIPE_PAGES(conf); j++) {
+				struct page *p;
+				if (nsh->dev[i].orig_pages[j])
+					continue;
+
+				p = alloc_page(GFP_NOIO);
+				if (!p) {
 					err = -ENOMEM;
+					continue;
+				}
+				nsh->dev[i].orig_pages[j] = p;
+				nsh->dev[i].pages[j] = p;
 			}
+		}
 		release_stripe(nsh);
 	}
 	/* critical section pass, GFP_NOIO no longer needed */
@@ -2015,10 +2237,10 @@ static void raid5_end_read_request(struc
 				KERN_INFO
 				"md/raid:%s: read error corrected"
 				" (%lu sectors at %llu on %s)\n",
-				mdname(conf->mddev), STRIPE_SECTORS,
+				mdname(conf->mddev), STRIPE_SECTORS(conf),
 				(unsigned long long)s,
 				bdevname(rdev->bdev, b));
-			atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
+			atomic_add(STRIPE_SECTORS(conf), &rdev->corrected_errors);
 			clear_bit(R5_ReadError, &sh->dev[i].flags);
 			clear_bit(R5_ReWrite, &sh->dev[i].flags);
 		} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
@@ -2082,7 +2304,7 @@ static void raid5_end_read_request(struc
 			if (!(set_bad
 			      && test_bit(In_sync, &rdev->flags)
 			      && rdev_set_badblocks(
-				      rdev, sh->sector, STRIPE_SECTORS, 0)))
+				      rdev, sh->sector, STRIPE_SECTORS(conf), 0)))
 				md_error(conf->mddev, rdev);
 		}
 	}
@@ -2133,7 +2355,7 @@ static void raid5_end_write_request(stru
 		if (!uptodate)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
-				     STRIPE_SECTORS,
+				     STRIPE_SECTORS(conf),
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
@@ -2145,7 +2367,7 @@ static void raid5_end_write_request(stru
 				set_bit(MD_RECOVERY_NEEDED,
 					&rdev->mddev->recovery);
 		} else if (is_badblock(rdev, sh->sector,
-				       STRIPE_SECTORS,
+				       STRIPE_SECTORS(conf),
 				       &first_bad, &bad_sectors)) {
 			set_bit(R5_MadeGood, &sh->dev[i].flags);
 			if (test_bit(R5_ReadError, &sh->dev[i].flags))
@@ -2171,13 +2393,9 @@ static void raid5_build_block(struct str
 	struct r5dev *dev = &sh->dev[i];
 
 	bio_init(&dev->req);
-	dev->req.bi_io_vec = &dev->vec;
-	dev->req.bi_max_vecs = 1;
 	dev->req.bi_private = sh;
 
 	bio_init(&dev->rreq);
-	dev->rreq.bi_io_vec = &dev->rvec;
-	dev->rreq.bi_max_vecs = 1;
 	dev->rreq.bi_private = sh;
 
 	dev->flags = 0;
@@ -2674,13 +2892,13 @@ static int add_stripe_bio(struct stripe_
 		/* check if page is covered */
 		sector_t sector = sh->dev[dd_idx].sector;
 		for (bi=sh->dev[dd_idx].towrite;
-		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
+		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS(conf) &&
 			     bi && bi->bi_iter.bi_sector <= sector;
-		     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
+		     bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) {
 			if (bio_end_sector(bi) >= sector)
 				sector = bio_end_sector(bi);
 		}
-		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
+		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS(conf))
 			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
 	}
 
@@ -2691,7 +2909,7 @@ static int add_stripe_bio(struct stripe_
 
 	if (conf->mddev->bitmap && firstwrite) {
 		bitmap_startwrite(conf->mddev->bitmap, sh->sector,
-				  STRIPE_SECTORS, 0);
+				  STRIPE_SECTORS(conf), 0);
 		sh->bm_seq = conf->seq_flush+1;
 		set_bit(STRIPE_BIT_DELAY, &sh->state);
 	}
@@ -2744,7 +2962,7 @@ handle_failed_stripe(struct r5conf *conf
 				if (!rdev_set_badblocks(
 					    rdev,
 					    sh->sector,
-					    STRIPE_SECTORS, 0))
+					    STRIPE_SECTORS(conf), 0))
 					md_error(conf->mddev, rdev);
 				rdev_dec_pending(rdev, conf->mddev);
 			}
@@ -2761,8 +2979,8 @@ handle_failed_stripe(struct r5conf *conf
 			wake_up(&conf->wait_for_overlap);
 
 		while (bi && bi->bi_iter.bi_sector <
-			sh->dev[i].sector + STRIPE_SECTORS) {
-			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+			sh->dev[i].sector + STRIPE_SECTORS(conf)) {
+			struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
 			if (!raid5_dec_bi_active_stripes(bi)) {
 				md_write_end(conf->mddev);
@@ -2773,20 +2991,20 @@ handle_failed_stripe(struct r5conf *conf
 		}
 		if (bitmap_end)
 			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
-				STRIPE_SECTORS, 0, 0);
+				STRIPE_SECTORS(conf), 0, 0);
 		bitmap_end = 0;
 		/* and fail all 'written' */
 		bi = sh->dev[i].written;
 		sh->dev[i].written = NULL;
 		if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
 			WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
-			sh->dev[i].page = sh->dev[i].orig_page;
+			reset_stripe_devpage(sh, i);
 		}
 
 		if (bi) bitmap_end = 1;
 		while (bi && bi->bi_iter.bi_sector <
-		       sh->dev[i].sector + STRIPE_SECTORS) {
-			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+		       sh->dev[i].sector + STRIPE_SECTORS(conf)) {
+			struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
 			if (!raid5_dec_bi_active_stripes(bi)) {
 				md_write_end(conf->mddev);
@@ -2809,9 +3027,9 @@ handle_failed_stripe(struct r5conf *conf
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
 			while (bi && bi->bi_iter.bi_sector <
-			       sh->dev[i].sector + STRIPE_SECTORS) {
+			       sh->dev[i].sector + STRIPE_SECTORS(conf)) {
 				struct bio *nextbi =
-					r5_next_bio(bi, sh->dev[i].sector);
+					r5_next_bio(conf, bi, sh->dev[i].sector);
 				clear_bit(BIO_UPTODATE, &bi->bi_flags);
 				if (!raid5_dec_bi_active_stripes(bi)) {
 					bi->bi_next = *return_bi;
@@ -2822,7 +3040,7 @@ handle_failed_stripe(struct r5conf *conf
 		}
 		if (bitmap_end)
 			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
-					STRIPE_SECTORS, 0, 0);
+					STRIPE_SECTORS(conf), 0, 0);
 		/* If we were in the middle of a write the parity block might
 		 * still be locked - so just clear all R5_LOCKED flags
 		 */
@@ -2863,21 +3081,21 @@ handle_failed_sync(struct r5conf *conf,
 			    && !test_bit(Faulty, &rdev->flags)
 			    && !test_bit(In_sync, &rdev->flags)
 			    && !rdev_set_badblocks(rdev, sh->sector,
-						   STRIPE_SECTORS, 0))
+						   STRIPE_SECTORS(conf), 0))
 				abort = 1;
 			rdev = conf->disks[i].replacement;
 			if (rdev
 			    && !test_bit(Faulty, &rdev->flags)
 			    && !test_bit(In_sync, &rdev->flags)
 			    && !rdev_set_badblocks(rdev, sh->sector,
-						   STRIPE_SECTORS, 0))
+						   STRIPE_SECTORS(conf), 0))
 				abort = 1;
 		}
 		if (abort)
 			conf->recovery_disabled =
 				conf->mddev->recovery_disabled;
 	}
-	md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
+	md_done_sync(conf->mddev, STRIPE_SECTORS(conf), !abort);
 }
 
 static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3036,13 +3254,13 @@ static void handle_stripe_clean_event(st
 					clear_bit(R5_UPTODATE, &dev->flags);
 				if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
 					WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
-					dev->page = dev->orig_page;
+					reset_stripe_devpage(sh, i);
 				}
 				wbi = dev->written;
 				dev->written = NULL;
 				while (wbi && wbi->bi_iter.bi_sector <
-					dev->sector + STRIPE_SECTORS) {
-					wbi2 = r5_next_bio(wbi, dev->sector);
+					dev->sector + STRIPE_SECTORS(conf)) {
+					wbi2 = r5_next_bio(conf, wbi, dev->sector);
 					if (!raid5_dec_bi_active_stripes(wbi)) {
 						md_write_end(conf->mddev);
 						wbi->bi_next = *return_bi;
@@ -3051,13 +3269,13 @@ static void handle_stripe_clean_event(st
 					wbi = wbi2;
 				}
 				bitmap_endwrite(conf->mddev->bitmap, sh->sector,
-						STRIPE_SECTORS,
+						STRIPE_SECTORS(conf),
 					 !test_bit(STRIPE_DEGRADED, &sh->state),
 						0);
 			} else if (test_bit(R5_Discard, &dev->flags))
 				discard_pending = 1;
 			WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
-			WARN_ON(dev->page != dev->orig_page);
+			WARN_ON(dev->pages[0] != dev->orig_pages[0]);
 		}
 	if (!discard_pending &&
 	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -3274,7 +3492,7 @@ static void handle_parity_checks5(struct
 			 */
 			set_bit(STRIPE_INSYNC, &sh->state);
 		else {
-			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
+			atomic64_add(STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
 			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
@@ -3426,7 +3644,7 @@ static void handle_parity_checks6(struct
 				 */
 			}
 		} else {
-			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
+			atomic64_add(STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
 			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
@@ -3466,7 +3684,7 @@ static void handle_parity_checks6(struct
 
 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
 {
-	int i;
+	int i, k;
 
 	/* We have read all the blocks in this stripe and now we need to
 	 * copy some of them into a target stripe for expand.
@@ -3496,11 +3714,13 @@ static void handle_stripe_expansion(stru
 				continue;
 			}
 
-			/* place all the copies on one channel */
-			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
-			tx = async_memcpy(sh2->dev[dd_idx].page,
-					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
-					  &submit);
+			for (k = 0; k < STRIPE_PAGES(sh->raid_conf); k++) {
+				/* place all the copies on one channel */
+				init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
+				tx = async_memcpy(sh2->dev[dd_idx].pages[k],
+						  sh->dev[i].pages[k], 0, 0, PAGE_SIZE,
+						  &submit);
+			}
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -3597,8 +3817,8 @@ static void analyse_stripe(struct stripe
 		 */
 		rdev = rcu_dereference(conf->disks[i].replacement);
 		if (rdev && !test_bit(Faulty, &rdev->flags) &&
-		    rdev->recovery_offset >= sh->sector + STRIPE_SECTORS &&
-		    !is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+		    rdev->recovery_offset >= sh->sector + STRIPE_SECTORS(conf) &&
+		    !is_badblock(rdev, sh->sector, STRIPE_SECTORS(conf),
 				 &first_bad, &bad_sectors))
 			set_bit(R5_ReadRepl, &dev->flags);
 		else {
@@ -3610,7 +3830,7 @@ static void analyse_stripe(struct stripe
 		if (rdev && test_bit(Faulty, &rdev->flags))
 			rdev = NULL;
 		if (rdev) {
-			is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+			is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS(conf),
 					     &first_bad, &bad_sectors);
 			if (s->blocked_rdev == NULL
 			    && (test_bit(Blocked, &rdev->flags)
@@ -3637,7 +3857,7 @@ static void analyse_stripe(struct stripe
 			}
 		} else if (test_bit(In_sync, &rdev->flags))
 			set_bit(R5_Insync, &dev->flags);
-		else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
+		else if (sh->sector + STRIPE_SECTORS(conf) <= rdev->recovery_offset)
 			/* in sync if before recovery_offset */
 			set_bit(R5_Insync, &dev->flags);
 		else if (test_bit(R5_UPTODATE, &dev->flags) &&
@@ -3903,7 +4123,7 @@ static void handle_stripe(struct stripe_
 	if ((s.syncing || s.replacing) && s.locked == 0 &&
 	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
 	    test_bit(STRIPE_INSYNC, &sh->state)) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+		md_done_sync(conf->mddev, STRIPE_SECTORS(conf), 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
 			wake_up(&conf->wait_for_overlap);
@@ -3972,7 +4192,7 @@ static void handle_stripe(struct stripe_
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
 		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+		md_done_sync(conf->mddev, STRIPE_SECTORS(conf), 1);
 	}
 
 	if (s.expanding && s.locked == 0 &&
@@ -4002,14 +4222,14 @@ finish:
 				/* We own a safe reference to the rdev */
 				rdev = conf->disks[i].rdev;
 				if (!rdev_set_badblocks(rdev, sh->sector,
-							STRIPE_SECTORS, 0))
+							STRIPE_SECTORS(conf), 0))
 					md_error(conf->mddev, rdev);
 				rdev_dec_pending(rdev, conf->mddev);
 			}
 			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
 				rdev = conf->disks[i].rdev;
 				rdev_clear_badblocks(rdev, sh->sector,
-						     STRIPE_SECTORS, 0);
+						     STRIPE_SECTORS(conf), 0);
 				rdev_dec_pending(rdev, conf->mddev);
 			}
 			if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
@@ -4018,7 +4238,7 @@ finish:
 					/* rdev have been moved down */
 					rdev = conf->disks[i].rdev;
 				rdev_clear_badblocks(rdev, sh->sector,
-						     STRIPE_SECTORS, 0);
+						     STRIPE_SECTORS(conf), 0);
 				rdev_dec_pending(rdev, conf->mddev);
 			}
 		}
@@ -4502,7 +4722,7 @@ static void make_discard_request(struct
 		/* Skip discard while reshape is happening */
 		return;
 
-	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS(conf)-1);
 	last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
 
 	bi->bi_next = NULL;
@@ -4518,7 +4738,7 @@ static void make_discard_request(struct
 	last_sector *= conf->chunk_sectors;
 
 	for (; logical_sector < last_sector;
-	     logical_sector += STRIPE_SECTORS) {
+	     logical_sector += STRIPE_SECTORS(conf)) {
 		DEFINE_WAIT(w);
 		int d;
 	again:
@@ -4560,7 +4780,7 @@ static void make_discard_request(struct
 			     d++)
 				bitmap_startwrite(mddev->bitmap,
 						  sh->sector,
-						  STRIPE_SECTORS,
+						  STRIPE_SECTORS(conf),
 						  0);
 			sh->bm_seq = conf->seq_flush + 1;
 			set_bit(STRIPE_BIT_DELAY, &sh->state);
@@ -4609,13 +4829,13 @@ static void make_request(struct mddev *m
 		return;
 	}
 
-	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS(conf)-1);
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
 
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
+	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS(conf)) {
 		int previous;
 		int seq;
 
@@ -4895,7 +5115,7 @@ static sector_t reshape_request(struct m
 	}
 
 	INIT_LIST_HEAD(&stripes);
-	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
+	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS(conf)) {
 		int j;
 		int skipped_disk = 0;
 		sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
@@ -4906,6 +5126,7 @@ static sector_t reshape_request(struct m
 		 */
 		for (j=sh->disks; j--;) {
 			sector_t s;
+			int k;
 			if (j == sh->pd_idx)
 				continue;
 			if (conf->level == 6 &&
@@ -4916,7 +5137,8 @@ static sector_t reshape_request(struct m
 				skipped_disk = 1;
 				continue;
 			}
-			memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
+			for (k = 0; k < STRIPE_PAGES(conf); k++)
+				memset(page_address(sh->dev[j].pages[k]), 0, PAGE_SIZE);
 			set_bit(R5_Expanded, &sh->dev[j].flags);
 			set_bit(R5_UPTODATE, &sh->dev[j].flags);
 		}
@@ -4951,7 +5173,7 @@ static sector_t reshape_request(struct m
 		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		release_stripe(sh);
-		first_sector += STRIPE_SECTORS;
+		first_sector += STRIPE_SECTORS(conf);
 	}
 	/* Now that the sources are clearly marked, we can release
 	 * the destination stripes
@@ -5046,11 +5268,11 @@ static inline sector_t sync_request(stru
 	if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
 	    !conf->fullsync &&
 	    !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
-	    sync_blocks >= STRIPE_SECTORS) {
+	    sync_blocks >= STRIPE_SECTORS(conf)) {
 		/* we can skip this block, and probably more */
-		sync_blocks /= STRIPE_SECTORS;
+		sync_blocks /= STRIPE_SECTORS(conf);
 		*skipped = 1;
-		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
+		return sync_blocks * STRIPE_SECTORS(conf); /* keep things rounded to whole stripes */
 	}
 
 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
@@ -5078,7 +5300,7 @@ static inline sector_t sync_request(stru
 
 	release_stripe(sh);
 
-	return STRIPE_SECTORS;
+	return STRIPE_SECTORS(conf);
 }
 
 static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
@@ -5101,14 +5323,14 @@ static int  retry_aligned_read(struct r5
 	int handled = 0;
 
 	logical_sector = raid_bio->bi_iter.bi_sector &
-		~((sector_t)STRIPE_SECTORS-1);
+		~((sector_t)STRIPE_SECTORS(conf)-1);
 	sector = raid5_compute_sector(conf, logical_sector,
 				      0, &dd_idx, NULL);
 	last_sector = bio_end_sector(raid_bio);
 
 	for (; logical_sector < last_sector;
-	     logical_sector += STRIPE_SECTORS,
-		     sector += STRIPE_SECTORS,
+	     logical_sector += STRIPE_SECTORS(conf),
+		     sector += STRIPE_SECTORS(conf),
 		     scnt++) {
 
 		if (scnt < raid5_bi_processed_stripes(raid_bio))
@@ -5607,20 +5829,42 @@ raid5_size(struct mddev *mddev, sector_t
 
 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
 {
-	safe_put_page(percpu->spare_page);
+	int i;
+	if (percpu->spare_pages) {
+		for (i = 0; i < STRIPE_PAGES(conf); i++)
+			safe_put_page(percpu->spare_pages[i]);
+		kfree(percpu->spare_pages);
+	}
 	kfree(percpu->scribble);
-	percpu->spare_page = NULL;
+	percpu->spare_pages = NULL;
 	percpu->scribble = NULL;
 }
 
 static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
 {
-	if (conf->level == 6 && !percpu->spare_page)
-		percpu->spare_page = alloc_page(GFP_KERNEL);
+	bool sp_alloc_fail = false;
+	if (conf->level == 6 && !percpu->spare_pages) {
+		struct page **pages;
+		int i;
+
+		pages = kzalloc(sizeof(struct page *) * STRIPE_PAGES(conf),
+			GFP_KERNEL);
+		sp_alloc_fail = true;
+		if (pages) {
+			percpu->spare_pages = pages;
+			for (i = 0; i < STRIPE_PAGES(conf); i++) {
+				pages[i] = alloc_page(GFP_KERNEL);
+				if (!pages[i])
+					break;
+			}
+			if (i == STRIPE_PAGES(conf))
+				sp_alloc_fail = false;
+		}
+	}
 	if (!percpu->scribble)
 		percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
 
-	if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
+	if (!percpu->scribble || sp_alloc_fail) {
 		free_scratch_buffer(conf, percpu);
 		return -ENOMEM;
 	}
@@ -5788,7 +6032,7 @@ static struct r5conf *setup_conf(struct
 	else
 		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
 	max_disks = max(conf->raid_disks, conf->previous_raid_disks);
-	conf->scribble_len = scribble_len(max_disks);
+	conf->scribble_len = scribble_len(conf, max_disks);
 
 	conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
 			      GFP_KERNEL);
@@ -6512,14 +6756,25 @@ static int check_stripe_cache(struct mdd
 	 * stripe_heads first.
 	 */
 	struct r5conf *conf = mddev->private;
-	if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+
+	/*
+	 * stripe size is bigger than chunk size is possible, but not very
+	 * useful. We don't allow it at this point.
+	 */
+	if ((mddev->new_chunk_sectors << 9) < STRIPE_SIZE(conf)) {
+		printk(KERN_WARNING
+		  "md/raid:%s: reshape: chunk size is smaller than stripe cache size\n",
+		  mdname(mddev));
+		return 0;
+	}
+	if (((mddev->chunk_sectors << 9) / STRIPE_SIZE(conf)) * 4
 	    > conf->max_nr_stripes ||
-	    ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+	    ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE(conf)) * 4
 	    > conf->max_nr_stripes) {
 		printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
 		       mdname(mddev),
 		       ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
-			/ STRIPE_SIZE)*4);
+			/ STRIPE_SIZE(conf))*4);
 		return 0;
 	}
 	return 1;
@@ -6827,6 +7082,7 @@ static void *raid45_takeover_raid0(struc
 static void *raid5_takeover_raid1(struct mddev *mddev)
 {
 	int chunksect;
+	struct r5conf *conf = mddev->private;
 
 	if (mddev->raid_disks != 2 ||
 	    mddev->degraded > 1)
@@ -6840,7 +7096,7 @@ static void *raid5_takeover_raid1(struct
 	while (chunksect && (mddev->array_sectors & (chunksect-1)))
 		chunksect >>= 1;
 
-	if ((chunksect<<9) < STRIPE_SIZE)
+	if ((chunksect<<9) < STRIPE_SIZE(conf))
 		/* array size does not allow a suitable chunk size */
 		return ERR_PTR(-EINVAL);
 
Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h	2014-07-23 14:09:45.844570945 +0800
+++ linux/drivers/md/raid5.h	2014-07-23 14:09:45.836571048 +0800
@@ -225,14 +225,15 @@ struct stripe_head {
 	struct stripe_operations {
 		int 		     target, target2;
 		enum sum_check_flags zero_sum_result;
+		enum sum_check_flags *sum_results;
 	} ops;
 	struct r5dev {
 		/* rreq and rvec are used for the replacement device when
 		 * writing data to both devices.
 		 */
 		struct bio	req, rreq;
-		struct bio_vec	vec, rvec;
-		struct page	*page, *orig_page;
+		struct bio_vec	*vecs, *rvecs;
+		struct page	**pages, **orig_pages;
 		struct bio	*toread, *read, *towrite, *written;
 		sector_t	sector;			/* sector of this page */
 		unsigned long	flags;
@@ -458,7 +459,7 @@ struct r5conf {
 	int			recovery_disabled;
 	/* per cpu variables */
 	struct raid5_percpu {
-		struct page	*spare_page; /* Used when checking P/Q in raid6 */
+		struct page	**spare_pages; /* Used when checking P/Q in raid6 */
 		void		*scribble;   /* space for constructing buffer
 					      * lists and performing address
 					      * conversions
@@ -487,6 +488,7 @@ struct r5conf {
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
+	int			stripe_size_order;
 
 	/* When taking over an array from a different personality, we store
 	 * the new thread here until we fully activate the array.
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux