[PATCH v3 2/2] md/raid6: fix algorithm choice under larger PAGE_SIZE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There are several algorithms available for raid6 to generate xor and syndrome
parity, including basic int1, int2 ... int32 and SIMD optimized implementation
like sse and neon.  To test and choose the best algorithms at the initial
stage, we need provide enough disk data to feed the algorithms. However, the
disk number we provided depends on page size and gfmul table, seeing bellow:

    const int disks = (65536/PAGE_SIZE) + 2;

So when come to 64K PAGE_SIZE, there is only one data disk plus 2 parity disk,
as a result the chosed algorithm is not reliable. For example, on my arm64
machine with 64K page enabled, it will choose intx32 as the best one, although
the NEON implementation is better.

This patch tries to fix the problem by defining a constant raid6 disk number to
supporting arbitrary page size.

Suggested-by: H. Peter Anvin <hpa@xxxxxxxxx>
Signed-off-by: Zhengyuan Liu <liuzhengyuan@xxxxxxxxxx>
---
 include/linux/raid/pq.h | 17 +++++++---
 lib/raid6/algos.c       | 71 +++++++++++++++++++++++++++--------------
 2 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index e0ddb47f4402..6b68b9590a6b 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -8,6 +8,8 @@
 #ifndef LINUX_RAID_RAID6_H
 #define LINUX_RAID_RAID6_H
 
+#define RAID6_DISKS 8
+
 #ifdef __KERNEL__
 
 /* Set to 1 to use kernel-wide empty_zero_page */
@@ -31,6 +33,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
 #include <sys/mman.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <string.h>
 
 /* Not standard, but glibc defines it */
 #define BITS_PER_LONG __WORDSIZE
@@ -43,6 +46,9 @@ typedef uint64_t u64;
 #ifndef PAGE_SIZE
 # define PAGE_SIZE 4096
 #endif
+#ifndef PAGE_SHIFT
+# define PAGE_SHIFT 12
+#endif
 extern const char raid6_empty_zero_page[PAGE_SIZE];
 
 #define __init
@@ -168,11 +174,12 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 # define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
 # define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL	0
-# define __get_free_pages(x, y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
-						     PROT_READ|PROT_WRITE,   \
-						     MAP_PRIVATE|MAP_ANONYMOUS,\
-						     0, 0))
-# define free_pages(x, y)	munmap((void *)(x), PAGE_SIZE << (y))
+# define kmalloc(x, y)	((unsigned long)mmap(NULL, (x), PROT_READ|PROT_WRITE, \
+						 MAP_PRIVATE|MAP_ANONYMOUS,   \
+						 0, 0))
+# define kfree(x)	munmap((void *)(x), (RAID6_DISKS - 2) * PAGE_SIZE     \
+						<= 65536 ? 2 * PAGE_SIZE :    \
+						(RAID6_DISKS - 2) * PAGE_SIZE)
 
 static inline void cpu_relax(void)
 {
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 17417eee0866..959e6e23aa5f 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -146,7 +146,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 }
 
 static inline const struct raid6_calls *raid6_choose_gen(
-	void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
+	void *(*const dptrs)[RAID6_DISKS], const int disks)
 {
 	unsigned long perf, bestgenperf, bestxorperf, j0, j1;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
@@ -181,7 +181,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				best = *algo;
 			}
 			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+				(perf * HZ * (disks-2)) >>
+				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
 
 			if (!(*algo)->xor_syndrome)
 				continue;
@@ -204,17 +205,24 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				bestxorperf = perf;
 
 			pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
-				(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
+				(perf * HZ * (disks-2)) >>
+				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
 		}
 	}
 
 	if (best) {
-		pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		       best->name,
-		       (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
-		if (best->xor_syndrome)
-			pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			       (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
+		if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
+			pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
+				best->name,
+				(bestgenperf * HZ * (disks-2)) >>
+				(20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2));
+			if (best->xor_syndrome)
+				pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+					(bestxorperf * HZ * (disks-2)) >>
+					(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		} else
+			pr_info("raid6: skip pq benchmark and using algorithm %s\n",
+				best->name);
 		raid6_call = *best;
 	} else
 		pr_err("raid6: Yikes!  No algorithm found!\n");
@@ -228,27 +236,42 @@ static inline const struct raid6_calls *raid6_choose_gen(
 
 int __init raid6_select_algo(void)
 {
-	const int disks = (65536/PAGE_SIZE)+2;
+	const int disks = RAID6_DISKS;
 
 	const struct raid6_calls *gen_best;
 	const struct raid6_recov_calls *rec_best;
-	char *syndromes;
-	void *dptrs[(65536/PAGE_SIZE)+2];
-	int i;
-
-	for (i = 0; i < disks-2; i++)
-		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+	char *alloc_ptr, *p;
+	void *dptrs[RAID6_DISKS];
+	int i, cycle;
+
+	/*
+	 * use raid6_gfmul table to fill the RAID6_DISKS-2 page-sized data disks
+	 * if the total disk size is less then raid6_gfmul, just make dptrs point
+	 * to it, otherwise do a dynamic allocation and copy the table circularly
+	 */
+	if ((disks - 2) * PAGE_SIZE <= 65536 ) {
+		for (i = 0; i < disks - 2; i++)
+			dptrs[i] = (char *)raid6_gfmul + PAGE_SIZE * i;
+
+		alloc_ptr = (char *)kmalloc(2 * PAGE_SIZE, GFP_KERNEL | __GFP_NOFAIL);
+		dptrs[disks-2] = alloc_ptr;
+		dptrs[disks-1] = alloc_ptr + PAGE_SIZE;
+	} else {
+		alloc_ptr = (char *)kmalloc(disks * PAGE_SIZE, GFP_KERNEL | __GFP_NOFAIL);
+		p = alloc_ptr;
+		cycle = ((disks - 2) * PAGE_SIZE) / 65536;
+		for (i = 0; i < cycle; i++) {
+			memcpy(p, raid6_gfmul, 65536);
+			p += 65536;
+		}
 
-	/* Normal code - use a 2-page allocation to avoid D$ conflict */
-	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+		if ((disks - 2) * PAGE_SIZE % 65536)
+			memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
 
-	if (!syndromes) {
-		pr_err("raid6: Yikes!  No memory available.\n");
-		return -ENOMEM;
+		for (i=0; i < disks; i++)
+			dptrs[i] = alloc_ptr + PAGE_SIZE * i;
 	}
 
-	dptrs[disks-2] = syndromes;
-	dptrs[disks-1] = syndromes + PAGE_SIZE;
 
 	/* select raid gen_syndrome function */
 	gen_best = raid6_choose_gen(&dptrs, disks);
@@ -256,7 +279,7 @@ int __init raid6_select_algo(void)
 	/* select raid recover functions */
 	rec_best = raid6_choose_recov();
 
-	free_pages((unsigned long)syndromes, 1);
+	kfree(alloc_ptr);
 
 	return gen_best && rec_best ? 0 : -EINVAL;
 }
-- 
2.20.1






[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux