Signed-off-by: Charlie Jenkins <charlie@xxxxxxxxxxxx> --- lib/raid6/Makefile | 2 -- lib/raid6/recov_rvv.c | 12 ++++--- lib/raid6/rvv.c | 81 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index e62fb7cd773e..5be0a4e60ab1 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -11,8 +11,6 @@ raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o re raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o raid6_pq-$(CONFIG_RISCV_ISA_V) += rvv.o recov_rvv.o -CFLAGS_rvv.o += -march=rv64gcv -CFLAGS_recov_rvv.o += -march=rv64gcv hostprogs += mktables diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index 8ae74803ea7f..02b97d885510 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -17,6 +17,7 @@ static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, ".option push\n" ".option arch,+v\n" "vsetvli x0, %[avl], e8, m1, ta, ma\n" + ".option pop\n" : : [avl]"r"(16) ); @@ -42,6 +43,8 @@ static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, * v14:p/qm[vx], v15:p/qm[vy] */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[px])\n" "vle8.v v1, (%[dp])\n" "vxor.vv v0, v0, v1\n" @@ -67,6 +70,7 @@ static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, "vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */ "vse8.v v3, (%[dq])\n" "vse8.v v1, (%[dp])\n" + ".option pop\n" : : [px]"r"(p), [dp]"r"(dp), @@ -84,8 +88,6 @@ static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, dp += 16; dq += 16; } - - asm volatile (".option pop\n"); } static void __raid6_datap_recov_rvv(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, @@ -95,6 +97,7 @@ static void __raid6_datap_recov_rvv(int bytes, uint8_t *p, uint8_t *q, uint8_t * ".option push\n" ".option arch,+v\n" "vsetvli x0, %[avl], e8, m1, ta, ma\n" + ".option pop\n" : : [avl]"r"(16) ); @@ -113,6 +116,8 @@ static void __raid6_datap_recov_rvv(int bytes, uint8_t *p, uint8_t *q, uint8_t * * v10:m[vx], v11:m[vy] */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[vx])\n" "vle8.v v2, (%[dq])\n" "vxor.vv v0, v0, v2\n" @@ -127,6 +132,7 @@ static void __raid6_datap_recov_rvv(int bytes, uint8_t *p, uint8_t *q, uint8_t * "vxor.vv v1, v0, v1\n" "vse8.v v0, (%[dq])\n" "vse8.v v1, (%[vy])\n" + ".option pop\n" : : [vx]"r"(q), [vy]"r"(p), @@ -140,8 +146,6 @@ static void __raid6_datap_recov_rvv(int bytes, uint8_t *p, uint8_t *q, uint8_t * q += 16; dq += 16; } - - asm volatile (".option pop\n"); } diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index 21f5432506da..81b38dcafeb6 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -31,14 +31,18 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ for (d = 0 ; d < bytes ; d += NSIZE*1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]) ); @@ -54,6 +58,8 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -61,6 +67,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vle8.v v2, (%[wd0])\n" "vxor.vv v1, v3, v2\n" "vxor.vv v0, v0, v2\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [x1d]"r"(0x1d) @@ -72,15 +79,16 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** * *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vse8.v v0, (%[wp0])\n" "vse8.v v1, (%[wq0])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]) ); } - - asm volatile (".option pop\n"); } static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, @@ -98,14 +106,18 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ for (d = 0 ; d < bytes ; d += NSIZE*1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]) ); @@ -122,6 +134,8 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -129,6 +143,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vle8.v v2, (%[wd0])\n" "vxor.vv v1, v3, v2\n" "vxor.vv v0, v0, v2\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [x1d]"r"(0x1d) @@ -144,10 +159,13 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, * wq$$ = w1$$ ^ w2$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" "vxor.vv v1, v3, v2\n" + ".option pop\n" : : [x1d]"r"(0x1d) ); @@ -159,19 +177,20 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, * v0:wp0, v1:wq0, v2:p0, v3:q0 */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v2, (%[wp0])\n" "vle8.v v3, (%[wq0])\n" "vxor.vv v2, v2, v0\n" "vxor.vv v3, v3, v1\n" "vse8.v v2, (%[wp0])\n" "vse8.v v3, (%[wq0])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]) ); } - - asm volatile (".option pop\n"); } static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) @@ -188,6 +207,7 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* @@ -197,10 +217,13 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** for (d = 0 ; d < bytes ; d += NSIZE*2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" "vle8.v v4, (%[wp1])\n" "vle8.v v5, (%[wp1])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]), [wp1]"r"(&dptr[z0][d+1*NSIZE]) @@ -217,6 +240,8 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -232,6 +257,7 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vle8.v v6, (%[wd1])\n" "vxor.vv v5, v7, v6\n" "vxor.vv v4, v4, v6\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [wd1]"r"(&dptr[z][d+1*NSIZE]), @@ -244,10 +270,13 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** * *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vse8.v v0, (%[wp0])\n" "vse8.v v1, (%[wq0])\n" "vse8.v v4, (%[wp1])\n" "vse8.v v5, (%[wq1])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]), @@ -255,8 +284,6 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** [wq1]"r"(&q[d+NSIZE*1]) ); } - - asm volatile (".option pop\n"); } static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, @@ -274,6 +301,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* @@ -283,10 +311,13 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, for (d = 0 ; d < bytes ; d += NSIZE*2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" "vle8.v v4, (%[wp1])\n" "vle8.v v5, (%[wp1])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]), [wp1]"r"(&dptr[z0][d+1*NSIZE]) @@ -304,6 +335,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -319,6 +352,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vle8.v v6, (%[wd1])\n" "vxor.vv v5, v7, v6\n" "vxor.vv v4, v4, v6\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [wd1]"r"(&dptr[z][d+1*NSIZE]), @@ -335,6 +369,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, * wq$$ = w1$$ ^ w2$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -344,6 +380,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vsll.vi v7, v5, 1\n" "vand.vx v6, v6, %[x1d]\n" "vxor.vv v5, v7, v6\n" + ".option pop\n" : : [x1d]"r"(0x1d) ); @@ -356,6 +393,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, * v4:wp1, v5:wq1, v6:p1, v7:q1 */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v2, (%[wp0])\n" "vle8.v v3, (%[wq0])\n" "vxor.vv v2, v2, v0\n" @@ -369,6 +408,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v7, v7, v5\n" "vse8.v v6, (%[wp1])\n" "vse8.v v7, (%[wq1])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]), @@ -376,8 +416,6 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, [wq1]"r"(&q[d+NSIZE*1]) ); } - - asm volatile (".option pop\n"); } static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) @@ -394,6 +432,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* @@ -405,6 +444,8 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** for (d = 0 ; d < bytes ; d += NSIZE*4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" "vle8.v v4, (%[wp1])\n" @@ -413,6 +454,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vle8.v v9, (%[wp2])\n" "vle8.v v12, (%[wp3])\n" "vle8.v v13, (%[wp3])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]), [wp1]"r"(&dptr[z0][d+1*NSIZE]), @@ -431,6 +473,8 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -462,6 +506,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vle8.v v14, (%[wd3])\n" "vxor.vv v13, v15, v14\n" "vxor.vv v12, v12, v14\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [wd1]"r"(&dptr[z][d+1*NSIZE]), @@ -476,6 +521,8 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** * *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vse8.v v0, (%[wp0])\n" "vse8.v v1, (%[wq0])\n" "vse8.v v4, (%[wp1])\n" @@ -484,6 +531,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v9, (%[wq2])\n" "vse8.v v12, (%[wp3])\n" "vse8.v v13, (%[wq3])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]), @@ -495,8 +543,6 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** [wq3]"r"(&q[d+NSIZE*3]) ); } - - asm volatile (".option pop\n"); } static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, @@ -514,6 +560,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, ".option push\n" ".option arch,+v\n" "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" ); /* @@ -525,6 +572,8 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, for (d = 0 ; d < bytes ; d += NSIZE*4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" "vle8.v v1, (%[wp0])\n" "vle8.v v4, (%[wp1])\n" @@ -533,6 +582,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vle8.v v9, (%[wp2])\n" "vle8.v v12, (%[wp3])\n" "vle8.v v13, (%[wp3])\n" + ".option pop\n" : : [wp0]"r"(&dptr[z0][d+0*NSIZE]), [wp1]"r"(&dptr[z0][d+1*NSIZE]), @@ -552,6 +602,8 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, * wp$$ ^= wd$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -583,6 +635,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vle8.v v14, (%[wd3])\n" "vxor.vv v13, v15, v14\n" "vxor.vv v12, v12, v14\n" + ".option pop\n" : : [wd0]"r"(&dptr[z][d+0*NSIZE]), [wd1]"r"(&dptr[z][d+1*NSIZE]), @@ -601,6 +654,8 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, * wq$$ = w1$$ ^ w2$$; */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vsra.vi v2, v1, 7\n" "vsll.vi v3, v1, 1\n" "vand.vx v2, v2, %[x1d]\n" @@ -620,6 +675,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vsll.vi v15, v13, 1\n" "vand.vx v14, v14, %[x1d]\n" "vxor.vv v13, v15, v14\n" + ".option pop\n" : : [x1d]"r"(0x1d) ); @@ -634,6 +690,8 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, * v12:wp3, v13:wq3, v14:p3, v15:q3 */ asm volatile ( + ".option push\n" + ".option arch,+v\n" "vle8.v v2, (%[wp0])\n" "vle8.v v3, (%[wq0])\n" "vxor.vv v2, v2, v0\n" @@ -661,6 +719,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v15, v15, v13\n" "vse8.v v14, (%[wp3])\n" "vse8.v v15, (%[wq3])\n" + ".option pop\n" : : [wp0]"r"(&p[d+NSIZE*0]), [wq0]"r"(&q[d+NSIZE*0]), @@ -672,8 +731,6 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, [wq3]"r"(&q[d+NSIZE*3]) ); } - - asm volatile (".option pop\n"); } #define RAID6_RVV_WRAPPER(_n) \ -- 2.34.1 - Charlie