On 11/8/19 8:20 AM, Eugeniy Paltsev wrote: > Implement jump label patching for ARC. Jump labels provide > an interface to generate dynamic branches using > self-modifying code. > > This allows us to implement conditional branches where > changing branch direction is expensive but branch selection > is basically 'free' > > This implementation uses 32-bit NOP and BRANCH instructions > which forced to be aligned by 4 to guarantee that they don't > cross L1 cache line boundary / L1 I$ cache fetch block > boundary and can be update atomically. As confirmed by h/w folks, for ARC cache flush, the L1 fetch block is not really relevant. > Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@xxxxxxxxxxxx> LGTM (I've removed the fetch block mention from changelog) Added to for-next Thx, -Vineet > --- > Changes v1->v2: > * Patched instruction should not cross L1 I$ fetch block boundary and > not only L1 I$ line. Fix comments and asserts in code. > * Other small comments fix and code cleanup. > Changes v2->v3: > * Code cleanup. > * Mark testdata array as __initconst > * Drop check about fetch block in code. > * Invert ARC_DBG_JUMP_LABEL vs STATIC_KEYS_SELFTEST dependency. > > arch/arc/Kconfig | 8 ++ > arch/arc/include/asm/cache.h | 2 + > arch/arc/include/asm/jump_label.h | 72 +++++++++++++ > arch/arc/kernel/Makefile | 1 + > arch/arc/kernel/jump_label.c | 170 ++++++++++++++++++++++++++++++ > 5 files changed, 253 insertions(+) > create mode 100644 arch/arc/include/asm/jump_label.h > create mode 100644 arch/arc/kernel/jump_label.c > > diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig > index 8383155c8c82..375f9d278139 100644 > --- a/arch/arc/Kconfig > +++ b/arch/arc/Kconfig > @@ -46,6 +46,7 @@ config ARC > select OF_EARLY_FLATTREE > select PCI_SYSCALL if PCI > select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING > + select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 > > config ARCH_HAS_CACHE_LINE_SIZE > def_bool y > @@ -525,6 +526,13 @@ config ARC_DW2_UNWIND > config ARC_DBG_TLB_PARANOIA > bool "Paranoia Checks in Low Level TLB Handlers" > > +config ARC_DBG_JUMP_LABEL > + bool "Paranoid checks in Static Keys (jump labels) code" > + depends on JUMP_LABEL > + default y if STATIC_KEYS_SELFTEST > + help > + Enable paranoid checks and self-test of both ARC-specific and generic > + part of static keys (jump labels) related code. > endif > > config ARC_BUILTIN_DTB_NAME > diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h > index 918804c7c1a4..d8ece4292388 100644 > --- a/arch/arc/include/asm/cache.h > +++ b/arch/arc/include/asm/cache.h > @@ -25,6 +25,8 @@ > > #ifndef __ASSEMBLY__ > > +#include <linux/build_bug.h> > + > /* Uncached access macros */ > #define arc_read_uncached_32(ptr) \ > ({ \ > diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h > new file mode 100644 > index 000000000000..9d9618079739 > --- /dev/null > +++ b/arch/arc/include/asm/jump_label.h > @@ -0,0 +1,72 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_ARC_JUMP_LABEL_H > +#define _ASM_ARC_JUMP_LABEL_H > + > +#ifndef __ASSEMBLY__ > + > +#include <linux/stringify.h> > +#include <linux/types.h> > + > +#define JUMP_LABEL_NOP_SIZE 4 > + > +/* > + * NOTE about '.balign 4': > + * > + * To make atomic update of patched instruction available we need to guarantee > + * that this instruction doesn't cross L1 cache line boundary. > + * > + * As of today we simply align instruction which can be patched by 4 byte using > + * ".balign 4" directive. In that case patched instruction is aligned with one > + * 16-bit NOP_S if this is required. > + * However 'align by 4' directive is much stricter than it actually required. > + * It's enough that our 32-bit instruction don't cross L1 cache line boundary / > + * L1 I$ fetch block boundary which can be achieved by using > + * ".bundle_align_mode" assembler directive. That will save us from adding > + * useless NOP_S padding in most of the cases. > + * > + * TODO: switch to ".bundle_align_mode" directive using whin it will be > + * supported by ARC toolchain. > + */ > + > +static __always_inline bool arch_static_branch(struct static_key *key, > + bool branch) > +{ > + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" > + "1: \n" > + "nop \n" > + ".pushsection __jump_table, \"aw\" \n" > + ".word 1b, %l[l_yes], %c0 \n" > + ".popsection \n" > + : : "i" (&((char *)key)[branch]) : : l_yes); > + > + return false; > +l_yes: > + return true; > +} > + > +static __always_inline bool arch_static_branch_jump(struct static_key *key, > + bool branch) > +{ > + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" > + "1: \n" > + "b %l[l_yes] \n" > + ".pushsection __jump_table, \"aw\" \n" > + ".word 1b, %l[l_yes], %c0 \n" > + ".popsection \n" > + : : "i" (&((char *)key)[branch]) : : l_yes); > + > + return false; > +l_yes: > + return true; > +} > + > +typedef u32 jump_label_t; > + > +struct jump_entry { > + jump_label_t code; > + jump_label_t target; > + jump_label_t key; > +}; > + > +#endif /* __ASSEMBLY__ */ > +#endif > diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile > index de6251132310..e784f5396dda 100644 > --- a/arch/arc/kernel/Makefile > +++ b/arch/arc/kernel/Makefile > @@ -20,6 +20,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o > obj-$(CONFIG_KGDB) += kgdb.o > obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o > obj-$(CONFIG_PERF_EVENTS) += perf_event.o > +obj-$(CONFIG_JUMP_LABEL) += jump_label.o > > obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o > CFLAGS_fpu.o += -mdpfp > diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c > new file mode 100644 > index 000000000000..b8600dc325b5 > --- /dev/null > +++ b/arch/arc/kernel/jump_label.c > @@ -0,0 +1,170 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/kernel.h> > +#include <linux/jump_label.h> > + > +#include "asm/cacheflush.h" > + > +#define JUMPLABEL_ERR "ARC: jump_label: ERROR: " > + > +/* Halt system on fatal error to make debug easier */ > +#define arc_jl_fatal(format...) \ > +({ \ > + pr_err(JUMPLABEL_ERR format); \ > + BUG(); \ > +}) > + > +static inline u32 arc_gen_nop(void) > +{ > + /* 1x 32bit NOP in middle endian */ > + return 0x7000264a; > +} > + > +/* > + * Atomic update of patched instruction is only available if this > + * instruction doesn't cross L1 cache line boundary. You can read about > + * the way we achieve this in arc/include/asm/jump_label.h > + */ > +static inline void instruction_align_assert(void *addr, int len) > +{ > + unsigned long a = (unsigned long)addr; > + > + if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT)) > + arc_jl_fatal("instruction (addr %px) cross L1 cache line border", > + addr); > +} > + > +/* > + * ARCv2 'Branch unconditionally' instruction: > + * 00000ssssssssss1SSSSSSSSSSNRtttt > + * s S[n:0] lower bits signed immediate (number is bitfield size) > + * S S[m:n+1] upper bits signed immediate (number is bitfield size) > + * t S[24:21] upper bits signed immediate (branch unconditionally far) > + * N N <.d> delay slot mode > + * R R Reserved > + */ > +static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target) > +{ > + u32 instruction_l, instruction_r; > + u32 pcl = pc & GENMASK(31, 2); > + u32 u_offset = target - pcl; > + u32 s, S, t; > + > + /* > + * Offset in 32-bit branch instruction must to fit into s25. > + * Something is terribly broken if we get such huge offset within one > + * function. > + */ > + if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214) > + arc_jl_fatal("gen branch with offset (%d) not fit in s25", > + (s32)u_offset); > + > + /* > + * All instructions are aligned by 2 bytes so we should never get offset > + * here which is not 2 bytes aligned. > + */ > + if (u_offset & 0x1) > + arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes", > + (s32)u_offset); > + > + s = (u_offset >> 1) & GENMASK(9, 0); > + S = (u_offset >> 11) & GENMASK(9, 0); > + t = (u_offset >> 21) & GENMASK(3, 0); > + > + /* 00000ssssssssss1 */ > + instruction_l = (s << 1) | 0x1; > + /* SSSSSSSSSSNRtttt */ > + instruction_r = (S << 6) | t; > + > + return (instruction_r << 16) | (instruction_l & GENMASK(15, 0)); > +} > + > +void arch_jump_label_transform(struct jump_entry *entry, > + enum jump_label_type type) > +{ > + jump_label_t *instr_addr = (jump_label_t *)entry->code; > + u32 instr; > + > + instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE); > + > + if (type == JUMP_LABEL_JMP) > + instr = arc_gen_branch(entry->code, entry->target); > + else > + instr = arc_gen_nop(); > + > + WRITE_ONCE(*instr_addr, instr); > + flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE); > +} > + > +void arch_jump_label_transform_static(struct jump_entry *entry, > + enum jump_label_type type) > +{ > + /* > + * We use only one NOP type (1x, 4 byte) in arch_static_branch, so > + * there's no need to patch an identical NOP over the top of it here. > + * The generic code calls 'arch_jump_label_transform' if the NOP needs > + * to be replaced by a branch, so 'arch_jump_label_transform_static' is > + * never called with type other than JUMP_LABEL_NOP. > + */ > + BUG_ON(type != JUMP_LABEL_NOP); > +} > + > +#ifdef CONFIG_ARC_DBG_JUMP_LABEL > +#define SELFTEST_MSG "ARC: instruction generation self-test: " > + > +struct arc_gen_branch_testdata { > + jump_label_t pc; > + jump_label_t target_address; > + u32 expected_instr; > +}; > + > +static __init int branch_gen_test(const struct arc_gen_branch_testdata *test) > +{ > + u32 instr_got; > + > + instr_got = arc_gen_branch(test->pc, test->target_address); > + if (instr_got == test->expected_instr) > + return 0; > + > + pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n", > + test->pc, test->target_address, > + test->expected_instr, instr_got); > + > + return -EFAULT; > +} > + > +/* > + * Offset field in branch instruction is not continuous. Test all > + * available offset field and sign combinations. Test data is generated > + * from real working code. > + */ > +static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = { > + {0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */ > + {0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */ > + {0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */ > + {0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */ > + {0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big (-30892) offs */ > + {0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */ > + {0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */ > + {0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */ > + {0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */ > + {0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */ > + {0x9000bb38, 0x9003b340, 0x17c00009}, /* big (+194568) offs */ > + {0x90008f44, 0x90578d80, 0xb7c2063d} /* huge (+5701180) offs */ > +}; > + > +static __init int instr_gen_test(void) > +{ > + int i; > + > + for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++) > + if (branch_gen_test(&arcgenbr_test_data[i])) > + return -EFAULT; > + > + pr_info(SELFTEST_MSG "OK\n"); > + > + return 0; > +} > +early_initcall(instr_gen_test); > + > +#endif /* CONFIG_ARC_DBG_JUMP_LABEL */ _______________________________________________ linux-snps-arc mailing list linux-snps-arc@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/linux-snps-arc