The performance measurement of xor() was introduced in fe5cbc6e06c7d8b3, but its result was unused. Given that all implementations should give the same output, it makes sense to use the best function for each operation. For example, on my machine more unrolling can benefit gen but not xor: raid6: sse2x1 gen() 9560 MB/s raid6: sse2x1 xor() 7021 MB/s raid6: sse2x2 gen() 11741 MB/s raid6: sse2x2 xor() 8111 MB/s raid6: sse2x4 gen() 13801 MB/s raid6: sse2x4 xor() 8002 MB/s raid6: avx2x1 gen() 19298 MB/s raid6: avx2x1 xor() 13780 MB/s raid6: avx2x2 gen() 23303 MB/s raid6: avx2x2 xor() 15258 MB/s raid6: avx2x4 gen() 27255 MB/s raid6: avx2x4 xor() 14617 MB/s raid6: using algorithm avx2x4 gen() 27255 MB/s raid6: and algorithm avx2x2 xor() 15258 MB/s, rmw enabled Signed-off-by: Hristo Venev <hristo@xxxxxxxxxx> --- lib/raid6/algos.c | 57 ++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 5065b1e7e327..961348622655 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -150,19 +150,19 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) return best; } -static inline const struct raid6_calls *raid6_choose_gen( +static inline bool raid6_choose_gen( void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - unsigned long perf, bestgenperf, bestxorperf, j0, j1; + unsigned long perf, bestgenperf = 0, bestxorperf = 0, j0, j1; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; - const struct raid6_calls *best; + const struct raid6_calls *bestgen = NULL, *bestxor = NULL; - for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { - if (!best || (*algo)->prefer >= best->prefer) { - if ((*algo)->valid && !(*algo)->valid()) - continue; + for (algo = raid6_algos; *algo; algo++) { + if ((*algo)->valid && !(*algo)->valid()) + continue; + if (!bestgen || (*algo)->prefer >= bestgen->prefer) { perf = 0; preempt_disable(); @@ -178,14 +178,16 @@ static inline const struct raid6_calls *raid6_choose_gen( if (perf > bestgenperf) { bestgenperf = perf; - best = *algo; + bestgen = *algo; } pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + } - if (!(*algo)->xor_syndrome) - continue; + if(!(*algo)->xor_syndrome) + continue; + if (!bestxor || (*algo)->prefer >= bestxor->prefer) { perf = 0; preempt_disable(); @@ -200,26 +202,35 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (best == *algo) + if (perf > bestxorperf) { bestxorperf = perf; - + bestxor = *algo; + } pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); } } - if (best) { - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); - if (best->xor_syndrome) - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); - raid6_call = *best; - } else + if (!bestgen) { pr_err("raid6: Yikes! No algorithm found!\n"); + return false; + } - return best; + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", + bestgen->name, + (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + + raid6_call = *bestgen; + + if(bestxor) { + pr_info("raid6: and algorithm %s xor() %ld MB/s, rmw enabled\n", + bestxor->name, + (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); + + raid6_call.xor_syndrome = bestxor->xor_syndrome; + } + + return true; } @@ -230,7 +241,7 @@ int __init raid6_select_algo(void) { const int disks = (65536/PAGE_SIZE)+2; - const struct raid6_calls *gen_best; + bool gen_best; const struct raid6_recov_calls *rec_best; char *syndromes; void *dptrs[(65536/PAGE_SIZE)+2]; -- 2.20.1
Attachment:
signature.asc
Description: This is a digitally signed message part