On December 16, 2019 5:09:33 AM PST, Zhengyuan Liu <liuzhengyuan@xxxxxxxxxx> wrote: >There are several algorithms available for raid6 to generate xor and >syndrome >parity, including basic int1, int2 ... int32 and SIMD optimized >implementation >like sse and neon. To test and choose the best algorithms at the >initial >stage, we need provide enough disk data to feed the algorithms. >However, the >disk number we provided depends on page size and gfmul table, seeing >bellow: > > const int disks = (65536/PAGE_SIZE) + 2; > >So when come to 64K PAGE_SIZE, there is only one data disk plus 2 >parity disk, >as a result the chosed algorithm is not reliable. For example, on my >arm64 >machine with 64K page enabled, it will choose intx32 as the best one, >although >the NEON implementation is better. > >This patch tries to fix the problem by defining a constant raid6 disk >number to >supporting arbitrary page size. > >Suggested-by: H. Peter Anvin <hpa@xxxxxxxxx> >Signed-off-by: Zhengyuan Liu <liuzhengyuan@xxxxxxxxxx> >--- > include/linux/raid/pq.h | 17 +++++++--- > lib/raid6/algos.c | 71 +++++++++++++++++++++++++++-------------- > 2 files changed, 59 insertions(+), 29 deletions(-) > >diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h >index e0ddb47f4402..6b68b9590a6b 100644 >--- a/include/linux/raid/pq.h >+++ b/include/linux/raid/pq.h >@@ -8,6 +8,8 @@ > #ifndef LINUX_RAID_RAID6_H > #define LINUX_RAID_RAID6_H > >+#define RAID6_DISKS 8 >+ > #ifdef __KERNEL__ > > /* Set to 1 to use kernel-wide empty_zero_page */ >@@ -31,6 +33,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; > #include <sys/mman.h> > #include <sys/time.h> > #include <sys/types.h> >+#include <string.h> > > /* Not standard, but glibc defines it */ > #define BITS_PER_LONG __WORDSIZE >@@ -43,6 +46,9 @@ typedef uint64_t u64; > #ifndef PAGE_SIZE > # define PAGE_SIZE 4096 > #endif >+#ifndef PAGE_SHIFT >+# define PAGE_SHIFT 12 >+#endif > extern const char raid6_empty_zero_page[PAGE_SIZE]; > > #define __init >@@ -168,11 +174,12 @@ void raid6_dual_recov(int disks, size_t bytes, >int faila, int failb, > # define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) > # define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) > # define GFP_KERNEL 0 >-# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE ><< (y), \ >- PROT_READ|PROT_WRITE, \ >- MAP_PRIVATE|MAP_ANONYMOUS,\ >- 0, 0)) >-# define free_pages(x, y) munmap((void *)(x), PAGE_SIZE << (y)) >+# define kmalloc(x, y) ((unsigned long)mmap(NULL, (x), >PROT_READ|PROT_WRITE, \ >+ MAP_PRIVATE|MAP_ANONYMOUS, \ >+ 0, 0)) >+# define kfree(x) munmap((void *)(x), (RAID6_DISKS - 2) * PAGE_SIZE > \ >+ <= 65536 ? 2 * PAGE_SIZE : \ >+ (RAID6_DISKS - 2) * PAGE_SIZE) > > static inline void cpu_relax(void) > { >diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c >index 17417eee0866..959e6e23aa5f 100644 >--- a/lib/raid6/algos.c >+++ b/lib/raid6/algos.c >@@ -146,7 +146,7 @@ static inline const struct raid6_recov_calls >*raid6_choose_recov(void) > } > > static inline const struct raid6_calls *raid6_choose_gen( >- void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) >+ void *(*const dptrs)[RAID6_DISKS], const int disks) > { > unsigned long perf, bestgenperf, bestxorperf, j0, j1; > int start = (disks>>1)-1, stop = disks-3; /* work on the second half >of the disks */ >@@ -181,7 +181,8 @@ static inline const struct raid6_calls >*raid6_choose_gen( > best = *algo; > } > pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, >- (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); >+ (perf * HZ * (disks-2)) >> >+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); > > if (!(*algo)->xor_syndrome) > continue; >@@ -204,17 +205,24 @@ static inline const struct raid6_calls >*raid6_choose_gen( > bestxorperf = perf; > > pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, >- (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); >+ (perf * HZ * (disks-2)) >> >+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); > } > } > > if (best) { >- pr_info("raid6: using algorithm %s gen() %ld MB/s\n", >- best->name, >- (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); >- if (best->xor_syndrome) >- pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", >- (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); >+ if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { >+ pr_info("raid6: using algorithm %s gen() %ld MB/s\n", >+ best->name, >+ (bestgenperf * HZ * (disks-2)) >> >+ (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2)); >+ if (best->xor_syndrome) >+ pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", >+ (bestxorperf * HZ * (disks-2)) >> >+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); >+ } else >+ pr_info("raid6: skip pq benchmark and using algorithm %s\n", >+ best->name); > raid6_call = *best; > } else > pr_err("raid6: Yikes! No algorithm found!\n"); >@@ -228,27 +236,42 @@ static inline const struct raid6_calls >*raid6_choose_gen( > > int __init raid6_select_algo(void) > { >- const int disks = (65536/PAGE_SIZE)+2; >+ const int disks = RAID6_DISKS; > > const struct raid6_calls *gen_best; > const struct raid6_recov_calls *rec_best; >- char *syndromes; >- void *dptrs[(65536/PAGE_SIZE)+2]; >- int i; >- >- for (i = 0; i < disks-2; i++) >- dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; >+ char *alloc_ptr, *p; >+ void *dptrs[RAID6_DISKS]; >+ int i, cycle; >+ >+ /* >+ * use raid6_gfmul table to fill the RAID6_DISKS-2 page-sized data >disks >+ * if the total disk size is less then raid6_gfmul, just make dptrs >point >+ * to it, otherwise do a dynamic allocation and copy the table >circularly >+ */ >+ if ((disks - 2) * PAGE_SIZE <= 65536 ) { >+ for (i = 0; i < disks - 2; i++) >+ dptrs[i] = (char *)raid6_gfmul + PAGE_SIZE * i; >+ >+ alloc_ptr = (char *)kmalloc(2 * PAGE_SIZE, GFP_KERNEL | >__GFP_NOFAIL); >+ dptrs[disks-2] = alloc_ptr; >+ dptrs[disks-1] = alloc_ptr + PAGE_SIZE; >+ } else { >+ alloc_ptr = (char *)kmalloc(disks * PAGE_SIZE, GFP_KERNEL | >__GFP_NOFAIL); >+ p = alloc_ptr; >+ cycle = ((disks - 2) * PAGE_SIZE) / 65536; >+ for (i = 0; i < cycle; i++) { >+ memcpy(p, raid6_gfmul, 65536); >+ p += 65536; >+ } > >- /* Normal code - use a 2-page allocation to avoid D$ conflict */ >- syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); >+ if ((disks - 2) * PAGE_SIZE % 65536) >+ memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536); > >- if (!syndromes) { >- pr_err("raid6: Yikes! No memory available.\n"); >- return -ENOMEM; >+ for (i=0; i < disks; i++) >+ dptrs[i] = alloc_ptr + PAGE_SIZE * i; > } > >- dptrs[disks-2] = syndromes; >- dptrs[disks-1] = syndromes + PAGE_SIZE; > > /* select raid gen_syndrome function */ > gen_best = raid6_choose_gen(&dptrs, disks); >@@ -256,7 +279,7 @@ int __init raid6_select_algo(void) > /* select raid recover functions */ > rec_best = raid6_choose_recov(); > >- free_pages((unsigned long)syndromes, 1); >+ kfree(alloc_ptr); > > return gen_best && rec_best ? 0 : -EINVAL; > } Don't bother making this two cases. The order-3 allocation is actually better for measurement purposes in all cases, as it had the most predictable cache effects and therefore should be the most deterministic. -- Sent from my Android device with K-9 Mail. Please excuse my brevity.