Optimized the performance of the three functions (load_ecc8 store_ecc8 and bch_encode) using a larger calculation length. Signed-off-by: John Sanpe <sanpeqf@xxxxxxxxx> --- lib/bch.c | 87 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/lib/bch.c b/lib/bch.c index 5f71fd76eca8..e9a9517e7b1b 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -73,6 +73,7 @@ #include <linux/bitops.h> #include <linux/bitrev.h> #include <asm/byteorder.h> +#include <asm/unaligned.h> #include <linux/bch.h> #if defined(CONFIG_BCH_CONST_PARAMS) @@ -115,14 +116,6 @@ struct gf_poly_deg1 { unsigned int c[2]; }; -static u8 swap_bits(struct bch_control *bch, u8 in) -{ - if (!bch->swap_bits) - return in; - - return bitrev8(in); -} - /* * same as bch_encode(), but process input data one byte at a time */ @@ -135,7 +128,10 @@ static void bch_encode_unaligned(struct bch_control *bch, const int l = BCH_ECC_WORDS(bch)-1; while (len--) { - u8 tmp = swap_bits(bch, *data++); + u8 tmp = *data++; + + if (bch->swap_bits) + tmp = bitrev8(tmp); p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff); @@ -152,20 +148,27 @@ static void bch_encode_unaligned(struct bch_control *bch, static void load_ecc8(struct bch_control *bch, uint32_t *dst, const uint8_t *src) { - uint8_t pad[4] = {0, 0, 0, 0}; unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; + uint32_t value, bytes = 0; + + for (i = 0; i < nwords; i++, src += 4) { + if (!bch->swap_bits) + value = get_unaligned_be32(src); + else { + value = get_unaligned_le32(src); + value = bitrev32(value); + } + dst[i] = value; + } - for (i = 0; i < nwords; i++, src += 4) - dst[i] = ((u32)swap_bits(bch, src[0]) << 24) | - ((u32)swap_bits(bch, src[1]) << 16) | - ((u32)swap_bits(bch, src[2]) << 8) | - swap_bits(bch, src[3]); - - memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords); - dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) | - ((u32)swap_bits(bch, pad[1]) << 16) | - ((u32)swap_bits(bch, pad[2]) << 8) | - swap_bits(bch, pad[3]); + memcpy(&bytes, src, BCH_ECC_BYTES(bch)-4*nwords); + if (!bch->swap_bits) + value = be32_to_cpup((__be32 *)&bytes); + else { + value = le32_to_cpup((__le32 *)&bytes); + value = bitrev32(value); + } + dst[nwords] = value; } /* @@ -174,20 +177,27 @@ static void load_ecc8(struct bch_control *bch, uint32_t *dst, static void store_ecc8(struct bch_control *bch, uint8_t *dst, const uint32_t *src) { - uint8_t pad[4]; unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; + uint32_t value, bytes; + + for (i = 0; i < nwords; i++, dst += 4) { + value = src[i]; + if (!bch->swap_bits) + put_unaligned_be32(value, dst); + else { + value = bitrev32(value); + put_unaligned_le32(value, dst); + } + } - for (i = 0; i < nwords; i++) { - *dst++ = swap_bits(bch, src[i] >> 24); - *dst++ = swap_bits(bch, src[i] >> 16); - *dst++ = swap_bits(bch, src[i] >> 8); - *dst++ = swap_bits(bch, src[i]); + value = src[nwords]; + if (!bch->swap_bits) + *(__be32 *)&bytes = cpu_to_be32(value); + else { + value = bitrev32(value); + *(__le32 *)&bytes = cpu_to_le32(value); } - pad[0] = swap_bits(bch, src[nwords] >> 24); - pad[1] = swap_bits(bch, src[nwords] >> 16); - pad[2] = swap_bits(bch, src[nwords] >> 8); - pad[3] = swap_bits(bch, src[nwords]); - memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords); + memcpy(dst, &bytes, BCH_ECC_BYTES(bch)-4*nwords); } /** @@ -257,12 +267,13 @@ void bch_encode(struct bch_control *bch, const uint8_t *data, */ while (mlen--) { /* input data is read in big-endian format */ - w = cpu_to_be32(*pdata++); - if (bch->swap_bits) - w = (u32)swap_bits(bch, w) | - ((u32)swap_bits(bch, w >> 8) << 8) | - ((u32)swap_bits(bch, w >> 16) << 16) | - ((u32)swap_bits(bch, w >> 24) << 24); + if (!bch->swap_bits) + w = be32_to_cpup((__be32 *)pdata++); + else { + w = le32_to_cpup((__le32 *)pdata++); + w = bitrev32(w); + } + w ^= r[0]; p0 = tab0 + (l+1)*((w >> 0) & 0xff); p1 = tab1 + (l+1)*((w >> 8) & 0xff); -- 2.43.0