From: Guo Ren <ren_guo@xxxxxxxxx> The implementation follow the RISC-V "V" Vector Extension draft v0.8 with 128bit-vlen and it's based on linux-5.5-rc4. The patch implement basic context switch, signal and ptrace interface with a new regset NT_RISCV_VECTOR. Only fixed 128bit-vlen is implemented. We need to discuss about the abi for libc sigcontext. (The maximum size of vlen for libc signal context structure 128/256/512 ?) Signed-off-by: Guo Ren <ren_guo@xxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx> Cc: Albert Ou <aou@xxxxxxxxxxxxxxxxx> Cc: Alan Kao <alankao@xxxxxxxxxxxxx> Cc: Anup Patel <Anup.Patel@xxxxxxx> CC: Andreas Schwab <schwab@xxxxxxxxxxxxxx> --- Changelog V2: - Fixup typo "vecotr, fstate_save->vstate_save". - Fixup wrong saved registers' length in vector.S. - Seperate unrelated patches from this one. --- arch/riscv/Kconfig | 9 ++ arch/riscv/Makefile | 1 + arch/riscv/include/asm/csr.h | 17 +- arch/riscv/include/asm/processor.h | 1 + arch/riscv/include/asm/switch_to.h | 48 ++++++ arch/riscv/include/uapi/asm/elf.h | 1 + arch/riscv/include/uapi/asm/ptrace.h | 9 ++ arch/riscv/include/uapi/asm/sigcontext.h | 1 + arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/asm-offsets.c | 188 +++++++++++++++++++++++ arch/riscv/kernel/cpufeature.c | 8 + arch/riscv/kernel/entry.S | 2 +- arch/riscv/kernel/head.S | 49 +++++- arch/riscv/kernel/process.c | 10 ++ arch/riscv/kernel/ptrace.c | 41 +++++ arch/riscv/kernel/signal.c | 40 +++++ arch/riscv/kernel/vector.S | 84 ++++++++++ include/uapi/linux/elf.h | 1 + 18 files changed, 506 insertions(+), 5 deletions(-) create mode 100644 arch/riscv/kernel/vector.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b16954229c96..c5b2e5df1349 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -292,6 +292,15 @@ config FPU If you don't know what to do here, say Y. +config VECTOR + bool "VECTOR support" + default n + help + Say N here if you want to disable all vector related procedure + in the kernel. + + If you don't know what to do here, say Y. + endmenu menu "Kernel features" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6d09b53cf106..071eb1148e01 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -44,6 +44,7 @@ riscv-march-aflags-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-aflags-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-aflags-$(CONFIG_FPU) := $(riscv-march-aflags-y)fd riscv-march-aflags-$(CONFIG_RISCV_ISA_C) := $(riscv-march-aflags-y)c +riscv-march-aflags-$(CONFIG_VECTOR) := $(riscv-march-aflags-y)v KBUILD_CFLAGS += -march=$(riscv-march-cflags-y) KBUILD_AFLAGS += -march=$(riscv-march-aflags-y) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 6a06277a84d6..ce3aa826bb16 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -24,6 +24,12 @@ #define SR_FS_CLEAN _AC(0x00004000, UL) #define SR_FS_DIRTY _AC(0x00006000, UL) +#define SR_VS _AC(0x01800000, UL) /* Vector Status */ +#define SR_VS_OFF _AC(0x00000000, UL) +#define SR_VS_INITIAL _AC(0x00800000, UL) +#define SR_VS_CLEAN _AC(0x01000000, UL) +#define SR_VS_DIRTY _AC(0x01800000, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) @@ -31,9 +37,9 @@ #define SR_XS_DIRTY _AC(0x00018000, UL) #ifndef CONFIG_64BIT -#define SR_SD _AC(0x80000000, UL) /* FS/XS dirty */ +#define SR_SD _AC(0x80000000, UL) /* FS/VS/XS dirty */ #else -#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */ +#define SR_SD _AC(0x8000000000000000, UL) /* FS/VS/XS dirty */ #endif /* SATP flags */ @@ -108,6 +114,13 @@ #define CSR_MIP 0x344 #define CSR_MHARTID 0xf14 +#define CSR_VSTART 0x8 +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 +#define CSR_VLENB 0xc22 + #ifdef CONFIG_RISCV_M_MODE # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3ddb798264f1..217273375cfb 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -32,6 +32,7 @@ struct thread_struct { unsigned long sp; /* Kernel mode stack */ unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; + struct __riscv_v_state vstate; }; #define INIT_THREAD { \ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index b9234e7178d0..6e1c7fa599be 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -63,6 +63,52 @@ extern bool has_fpu; #define __switch_to_fpu(__prev, __next) do { } while (0) #endif +#ifdef CONFIG_VECTOR +extern void __vstate_save(struct task_struct *save_to); +extern void __vstate_restore(struct task_struct *restore_from); + +static inline void __vstate_clean(struct pt_regs *regs) +{ + regs->status |= (regs->status & ~(SR_VS)) | SR_VS_CLEAN; +} + +static inline void vstate_save(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) == SR_VS_DIRTY) { + __vstate_save(task); + __vstate_clean(regs); + } +} + +static inline void vstate_restore(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) != SR_VS_OFF) { + __vstate_restore(task); + __vstate_clean(regs); + } +} + +static inline void __switch_to_vector(struct task_struct *prev, + struct task_struct *next) +{ + struct pt_regs *regs; + + regs = task_pt_regs(prev); + if (unlikely(regs->status & SR_SD)) + vstate_save(prev, regs); + vstate_restore(next, task_pt_regs(next)); +} + +extern bool has_vector; +#else +#define has_vector false +#define vstate_save(task, regs) do { } while (0) +#define vstate_restore(task, regs) do { } while (0) +#define __switch_to_vector(__prev, __next) do { } while (0) +#endif + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -72,6 +118,8 @@ do { \ struct task_struct *__next = (next); \ if (has_fpu) \ __switch_to_fpu(__prev, __next); \ + if (has_vector) \ + __switch_to_vector(__prev, __next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index d696d6610231..099434d075a7 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -23,6 +23,7 @@ typedef struct user_regs_struct elf_gregset_t; typedef __u64 elf_fpreg_t; typedef union __riscv_fp_state elf_fpregset_t; #define ELF_NFPREG (sizeof(struct __riscv_d_ext_state) / sizeof(elf_fpreg_t)) +#define ELF_NVREG (sizeof(struct __riscv_v_state) / sizeof(elf_greg_t)) #if __riscv_xlen == 64 #define ELF_RISCV_R_SYM(r_info) ELF64_R_SYM(r_info) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 882547f6bd5c..d913e8949b87 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -71,6 +71,15 @@ struct __riscv_q_ext_state { __u32 reserved[3]; }; +struct __riscv_v_state { + __uint128_t v[32]; + unsigned long vstart; + unsigned long vxsat; + unsigned long vxrm; + unsigned long vl; + unsigned long vtype; +}; + union __riscv_fp_state { struct __riscv_f_ext_state f; struct __riscv_d_ext_state d; diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index 84f2dfcfdbce..f74b3c814423 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -17,6 +17,7 @@ struct sigcontext { struct user_regs_struct sc_regs; union __riscv_fp_state sc_fpregs; + struct __riscv_v_state sc_vregs; }; #endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f40205cb9a22..e5276c3bdffc 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += clint.o obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_VECTOR) += vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 07cb9c10de4e..b662db7c71c0 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -70,6 +70,45 @@ void asm_offsets(void) OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]); OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr); + OFFSET(TASK_THREAD_V0, task_struct, thread.vstate.v[0]); + OFFSET(TASK_THREAD_V1, task_struct, thread.vstate.v[1]); + OFFSET(TASK_THREAD_V2, task_struct, thread.vstate.v[2]); + OFFSET(TASK_THREAD_V3, task_struct, thread.vstate.v[3]); + OFFSET(TASK_THREAD_V4, task_struct, thread.vstate.v[4]); + OFFSET(TASK_THREAD_V5, task_struct, thread.vstate.v[5]); + OFFSET(TASK_THREAD_V6, task_struct, thread.vstate.v[6]); + OFFSET(TASK_THREAD_V7, task_struct, thread.vstate.v[7]); + OFFSET(TASK_THREAD_V8, task_struct, thread.vstate.v[8]); + OFFSET(TASK_THREAD_V9, task_struct, thread.vstate.v[9]); + OFFSET(TASK_THREAD_V10, task_struct, thread.vstate.v[10]); + OFFSET(TASK_THREAD_V11, task_struct, thread.vstate.v[11]); + OFFSET(TASK_THREAD_V12, task_struct, thread.vstate.v[12]); + OFFSET(TASK_THREAD_V13, task_struct, thread.vstate.v[13]); + OFFSET(TASK_THREAD_V14, task_struct, thread.vstate.v[14]); + OFFSET(TASK_THREAD_V15, task_struct, thread.vstate.v[15]); + OFFSET(TASK_THREAD_V16, task_struct, thread.vstate.v[16]); + OFFSET(TASK_THREAD_V17, task_struct, thread.vstate.v[17]); + OFFSET(TASK_THREAD_V18, task_struct, thread.vstate.v[18]); + OFFSET(TASK_THREAD_V19, task_struct, thread.vstate.v[19]); + OFFSET(TASK_THREAD_V20, task_struct, thread.vstate.v[20]); + OFFSET(TASK_THREAD_V21, task_struct, thread.vstate.v[21]); + OFFSET(TASK_THREAD_V22, task_struct, thread.vstate.v[22]); + OFFSET(TASK_THREAD_V23, task_struct, thread.vstate.v[23]); + OFFSET(TASK_THREAD_V24, task_struct, thread.vstate.v[24]); + OFFSET(TASK_THREAD_V25, task_struct, thread.vstate.v[25]); + OFFSET(TASK_THREAD_V26, task_struct, thread.vstate.v[26]); + OFFSET(TASK_THREAD_V27, task_struct, thread.vstate.v[27]); + OFFSET(TASK_THREAD_V28, task_struct, thread.vstate.v[28]); + OFFSET(TASK_THREAD_V29, task_struct, thread.vstate.v[29]); + OFFSET(TASK_THREAD_V30, task_struct, thread.vstate.v[30]); + OFFSET(TASK_THREAD_V31, task_struct, thread.vstate.v[31]); + OFFSET(TASK_THREAD_VSTART, task_struct, thread.vstate.vstart); + OFFSET(TASK_THREAD_VXSAT, task_struct, thread.vstate.vxsat); + OFFSET(TASK_THREAD_VXRM, task_struct, thread.vstate.vxrm); + OFFSET(TASK_THREAD_VL, task_struct, thread.vstate.vl); + OFFSET(TASK_THREAD_VTYPE, task_struct, thread.vstate.vtype); + DEFINE(RISCV_VECTOR_VLENB, sizeof(__uint128_t)); + DEFINE(PT_SIZE, sizeof(struct pt_regs)); OFFSET(PT_EPC, pt_regs, epc); OFFSET(PT_RA, pt_regs, ra); @@ -304,6 +343,155 @@ void asm_offsets(void) - offsetof(struct task_struct, thread.fstate.f[0]) ); + DEFINE(TASK_THREAD_V0_V0, + offsetof(struct task_struct, thread.vstate.v[0]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V1_V0, + offsetof(struct task_struct, thread.vstate.v[1]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V2_V0, + offsetof(struct task_struct, thread.vstate.v[2]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V3_V0, + offsetof(struct task_struct, thread.vstate.v[3]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V4_V0, + offsetof(struct task_struct, thread.vstate.v[4]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V5_V0, + offsetof(struct task_struct, thread.vstate.v[5]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V6_V0, + offsetof(struct task_struct, thread.vstate.v[6]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V7_V0, + offsetof(struct task_struct, thread.vstate.v[7]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V8_V0, + offsetof(struct task_struct, thread.vstate.v[8]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V9_V0, + offsetof(struct task_struct, thread.vstate.v[9]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V10_V0, + offsetof(struct task_struct, thread.vstate.v[10]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V11_V0, + offsetof(struct task_struct, thread.vstate.v[11]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V12_V0, + offsetof(struct task_struct, thread.vstate.v[12]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V13_V0, + offsetof(struct task_struct, thread.vstate.v[13]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V14_V0, + offsetof(struct task_struct, thread.vstate.v[14]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V15_V0, + offsetof(struct task_struct, thread.vstate.v[15]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V16_V0, + offsetof(struct task_struct, thread.vstate.v[16]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V17_V0, + offsetof(struct task_struct, thread.vstate.v[17]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V18_V0, + offsetof(struct task_struct, thread.vstate.v[18]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V19_V0, + offsetof(struct task_struct, thread.vstate.v[19]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V20_V0, + offsetof(struct task_struct, thread.vstate.v[20]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V21_V0, + offsetof(struct task_struct, thread.vstate.v[21]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V22_V0, + offsetof(struct task_struct, thread.vstate.v[22]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V23_V0, + offsetof(struct task_struct, thread.vstate.v[23]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V24_V0, + offsetof(struct task_struct, thread.vstate.v[24]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V25_V0, + offsetof(struct task_struct, thread.vstate.v[25]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V26_V0, + offsetof(struct task_struct, thread.vstate.v[26]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V27_V0, + offsetof(struct task_struct, thread.vstate.v[27]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V28_V0, + offsetof(struct task_struct, thread.vstate.v[28]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V29_V0, + offsetof(struct task_struct, thread.vstate.v[29]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V30_V0, + offsetof(struct task_struct, thread.vstate.v[30]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_V31_V0, + offsetof(struct task_struct, thread.vstate.v[31]) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_VSTART_V0, + offsetof(struct task_struct, thread.vstate.vstart) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_VXSAT_V0, + offsetof(struct task_struct, thread.vstate.vxsat) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_VXRM_V0, + offsetof(struct task_struct, thread.vstate.vxrm) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_VL_V0, + offsetof(struct task_struct, thread.vstate.vl) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + DEFINE(TASK_THREAD_VTYPE_V0, + offsetof(struct task_struct, thread.vstate.vtype) + - offsetof(struct task_struct, thread.vstate.v[0]) + ); + /* * We allocate a pt_regs on the stack when entering the kernel. This * ensures the alignment is sane. diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c8527d770c98..c9ab24e3c79e 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -16,6 +16,9 @@ unsigned long elf_hwcap __read_mostly; #ifdef CONFIG_FPU bool has_fpu __read_mostly; #endif +#ifdef CONFIG_VECTOR +bool has_vector __read_mostly; +#endif void riscv_fill_hwcap(void) { @@ -73,4 +76,9 @@ void riscv_fill_hwcap(void) if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) has_fpu = true; #endif + +#ifdef CONFIG_VECTOR + if (elf_hwcap & COMPAT_HWCAP_ISA_V) + has_vector = true; +#endif } diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index e163b7b64c86..828f0ebce4b5 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -76,7 +76,7 @@ _save_context: * Disable the FPU to detect illegal usage of floating point in kernel * space. */ - li t0, SR_SUM | SR_FS + li t0, SR_SUM | SR_FS | SR_VS REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, CSR_STATUS, t0 diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 2227db63f895..fab03292284e 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -72,10 +72,10 @@ _start_kernel: .option pop /* - * Disable FPU to detect illegal usage of + * Disable FPU & VECTOR to detect illegal usage of * floating point in kernel space */ - li t0, SR_FS + li t0, SR_FS | SR_VS csrc CSR_STATUS, t0 #ifdef CONFIG_SMP @@ -290,6 +290,51 @@ ENTRY(reset_regs) csrw fcsr, 0 /* note that the caller must clear SR_FS */ #endif /* CONFIG_FPU */ + +#ifdef CONFIG_VECTOR + csrr t0, CSR_MISA + li t1, (COMPAT_HWCAP_ISA_V >> 16) + slli t1, t1, 16 + and t0, t0, t1 + beqz t0, .Lreset_regs_done + + li t1, SR_VS + csrs CSR_STATUS, t1 + vmv.v.i v0, 0 + vmv.v.i v1, 0 + vmv.v.i v2, 0 + vmv.v.i v3, 0 + vmv.v.i v4, 0 + vmv.v.i v5, 0 + vmv.v.i v6, 0 + vmv.v.i v7, 0 + vmv.v.i v8, 0 + vmv.v.i v9, 0 + vmv.v.i v10, 0 + vmv.v.i v11, 0 + vmv.v.i v12, 0 + vmv.v.i v13, 0 + vmv.v.i v14, 0 + vmv.v.i v15, 0 + vmv.v.i v16, 0 + vmv.v.i v17, 0 + vmv.v.i v18, 0 + vmv.v.i v19, 0 + vmv.v.i v20, 0 + vmv.v.i v21, 0 + vmv.v.i v22, 0 + vmv.v.i v23, 0 + vmv.v.i v24, 0 + vmv.v.i v25, 0 + vmv.v.i v26, 0 + vmv.v.i v27, 0 + vmv.v.i v28, 0 + vmv.v.i v29, 0 + vmv.v.i v30, 0 + vmv.v.i v31, 0 + /* note that the caller must clear SR_VS */ +#endif /* CONFIG_VECTOR */ + .Lreset_regs_done: ret END(reset_regs) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 95a3031e5c7c..3665f8c1a2c1 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -74,6 +74,16 @@ void start_thread(struct pt_regs *regs, unsigned long pc, */ fstate_restore(current, regs); } + + if (has_vector) { + regs->status |= SR_VS_INITIAL; + /* + * Restore the initial value to the vector register + * before starting the user program. + */ + vstate_restore(current, regs); + } + regs->epc = pc; regs->sp = sp; set_fs(USER_DS); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 407464201b91..0e3c3543476c 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -26,6 +26,9 @@ enum riscv_regset { #ifdef CONFIG_FPU REGSET_F, #endif +#ifdef CONFIG_VECTOR + REGSET_V, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -92,6 +95,34 @@ static int riscv_fpr_set(struct task_struct *target, } #endif +#ifdef CONFIG_VECTOR +static int riscv_vr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + struct __riscv_v_state *vstate = &target->thread.vstate; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, vstate, 0, + offsetof(struct __riscv_v_state, vtype)); + return ret; +} + +static int riscv_vr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct __riscv_v_state *vstate = &target->thread.vstate; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0, + offsetof(struct __riscv_v_state, vtype)); + return ret; +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -111,6 +142,16 @@ static const struct user_regset riscv_user_regset[] = { .set = &riscv_fpr_set, }, #endif +#ifdef CONFIG_VECTOR + [REGSET_V] = { + .core_note_type = NT_RISCV_VECTOR, + .n = ELF_NVREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .get = &riscv_vr_get, + .set = &riscv_vr_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 17ba190e84a5..4295c00e8934 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -83,6 +83,40 @@ static long save_fp_state(struct pt_regs *regs, #define restore_fp_state(task, regs) (0) #endif +#ifdef CONFIG_VECTOR +static long restore_v_state(struct pt_regs *regs, + struct __riscv_v_state *sc_vregs) +{ + long err; + struct __riscv_v_state __user *state = sc_vregs; + + err = __copy_from_user(¤t->thread.vstate, state, sizeof(*state)); + if (unlikely(err)) + return err; + + vstate_restore(current, regs); + + return err; +} + +static long save_v_state(struct pt_regs *regs, + struct __riscv_v_state *sc_vregs) +{ + long err; + struct __riscv_v_state __user *state = sc_vregs; + + vstate_save(current, regs); + err = __copy_to_user(state, ¤t->thread.vstate, sizeof(*state)); + if (unlikely(err)) + return err; + + return err; +} +#else +#define save_v_state(task, regs) (0) +#define restore_v_state(task, regs) (0) +#endif + static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { @@ -92,6 +126,9 @@ static long restore_sigcontext(struct pt_regs *regs, /* Restore the floating-point state. */ if (has_fpu) err |= restore_fp_state(regs, &sc->sc_fpregs); + /* Restore the vector state. */ + if (has_vector) + err |= restore_v_state(regs, &sc->sc_vregs); return err; } @@ -145,6 +182,9 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, /* Save the floating-point state. */ if (has_fpu) err |= save_fp_state(regs, &sc->sc_fpregs); + /* Save the vector state. */ + if (has_vector) + err |= save_v_state(regs, &sc->sc_vregs); return err; } diff --git a/arch/riscv/kernel/vector.S b/arch/riscv/kernel/vector.S new file mode 100644 index 000000000000..0afc308a6e02 --- /dev/null +++ b/arch/riscv/kernel/vector.S @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * Copyright (C) 2019 T-HEAD + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> + +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/asm-offsets.h> + +ENTRY(__vstate_save) + li a2, TASK_THREAD_V0 + add a0, a0, a2 + + li t1, (SR_VS | SR_FS) + csrs sstatus, t1 + + csrr t0, CSR_VSTART + sd t0, TASK_THREAD_VSTART_V0(a0) + csrr t0, CSR_VXSAT + sd t0, TASK_THREAD_VXSAT_V0(a0) + csrr t0, CSR_VXRM + sd t0, TASK_THREAD_VXRM_V0(a0) + csrr t0, CSR_VL + sd t0, TASK_THREAD_VL_V0(a0) + csrr t0, CSR_VTYPE + sd t0, TASK_THREAD_VTYPE_V0(a0) + + vsetvli t0, x0, e8,m8 + vsb.v v0, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vsb.v v8, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vsb.v v16, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vsb.v v24, (a0) + + csrc sstatus, t1 + ret +ENDPROC(__vstate_save) + +ENTRY(__vstate_restore) + li a2, TASK_THREAD_V0 + add a0, a0, a2 + mv t2, a0 + + li t1, (SR_VS | SR_FS) + csrs sstatus, t1 + + vsetvli t0, x0, e8,m8 + vlb.v v0, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vlb.v v8, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vlb.v v16, (a0) + addi a0, a0, RISCV_VECTOR_VLENB*8 + vlb.v v24, (a0) + + mv a0, t2 + ld t0, TASK_THREAD_VSTART_V0(a0) + csrw CSR_VSTART, t0 + ld t0, TASK_THREAD_VXSAT_V0(a0) + csrw CSR_VXSAT, t0 + ld t0, TASK_THREAD_VXRM_V0(a0) + csrw CSR_VXRM, t0 + ld t0, TASK_THREAD_VL_V0(a0) + ld t2, TASK_THREAD_VTYPE_V0(a0) + vsetvl t0, t0, t2 + + csrc sstatus, t1 + ret +ENDPROC(__vstate_restore) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 34c02e4290fe..e428f9e8710a 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -428,6 +428,7 @@ typedef struct elf64_shdr { #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_VECTOR 0x900 /* RISC-V vector registers */ /* Note header in a PT_NOTE section */ typedef struct elf32_note { -- 2.17.0