These macros will come in handy to have loads in assembly routines not generate relocation entries. Signed-off-by: Ahmad Fatoum <a.fatoum@xxxxxxxxxxxxxx> --- arch/arm/include/asm/assembler.h | 211 +++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7f9281b9a1a1..5db0f692eec6 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -130,3 +130,214 @@ #else #define CPU_LE(code...) code #endif + +#ifdef CONFIG_CPU_64 +/* + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where + * <symbol> is within the range +/- 4 GB of the PC. + */ + /* + * @dst: destination register (64 bit wide) + * @sym: name of the symbol + */ + .macro adr_l, dst, sym + adrp \dst, \sym + add \dst, \dst, :lo12:\sym + .endm + + /* + * @dst: destination register (32 or 64 bit wide) + * @sym: name of the symbol + * @tmp: optional 64-bit scratch register to be used if <dst> is a + * 32-bit wide register, in which case it cannot be used to hold + * the address + */ + .macro ldr_l, dst, sym, tmp= + .ifb \tmp + adrp \dst, \sym + ldr \dst, [\dst, :lo12:\sym] + .else + adrp \tmp, \sym + ldr \dst, [\tmp, :lo12:\sym] + .endif + .endm + + /* + * @src: source register (32 or 64 bit wide) + * @sym: name of the symbol + * @tmp: mandatory 64-bit scratch register to calculate the address + * while <src> needs to be preserved. + */ + .macro str_l, src, sym, tmp + adrp \tmp, \sym + str \src, [\tmp, :lo12:\sym] + .endm + +#else + + .macro __adldst_l, op, reg, sym, tmp, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \tmp, .La\@ + .subsection 1 + .align 2 +.La\@: .long \sym - .Lpc\@ + .previous + .else + .ifnb \c + THUMB( ittt \c ) + .endif + movw\c \tmp, #:lower16:\sym - .Lpc\@ + movt\c \tmp, #:upper16:\sym - .Lpc\@ + .endif + +#ifndef CONFIG_THUMB2_BAREBOX + .set .Lpc\@, . + 8 // PC bias + .ifc \op, add + add\c \reg, \tmp, pc + .else + \op\c \reg, [pc, \tmp] + .endif +#else +.Lb\@: add\c \tmp, \tmp, pc + /* + * In Thumb-2 builds, the PC bias depends on whether we are currently + * emitting into a .arm or a .thumb section. The size of the add opcode + * above will be 2 bytes when emitting in Thumb mode and 4 bytes when + * emitting in ARM mode, so let's use this to account for the bias. + */ + .set .Lpc\@, . + (. - .Lb\@) + + .ifnc \op, add + \op\c \reg, [\tmp] + .endif +#endif + .endm + + /* + * mov_l - move a constant value or [relocated] address into a register + */ + .macro mov_l, dst:req, imm:req, cond + .if __LINUX_ARM_ARCH__ < 7 + ldr\cond \dst, =\imm + .else + movw\cond \dst, #:lower16:\imm + movt\cond \dst, #:upper16:\imm + .endif + .endm + + /* + * adr_l - adr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro adr_l, dst:req, sym:req, cond + __adldst_l add, \dst, \sym, \dst, \cond + .endm + + /* + * ldr_l - ldr <literal> pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro ldr_l, dst:req, sym:req, cond + __adldst_l ldr, \dst, \sym, \dst, \cond + .endm + + /* + * str_l - str <literal> pseudo-op with unlimited range + * + * @src: source register + * @sym: name of the symbol + * @tmp: mandatory scratch register + * @cond: conditional opcode suffix + */ + .macro str_l, src:req, sym:req, tmp:req, cond + __adldst_l str, \src, \sym, \tmp, \cond + .endm + + .macro __ldst_va, op, reg, tmp, sym, cond, offset +#if __LINUX_ARM_ARCH__ >= 7 || \ + (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) + mov_l \tmp, \sym, \cond +#else + /* + * Avoid a literal load, by emitting a sequence of ADD/LDR instructions + * with the appropriate relocations. The combined sequence has a range + * of -/+ 256 MiB, which should be sufficient for the core kernel and + * for modules loaded into the module region. + */ + .globl \sym + .reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym + .reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym + .reloc .L2_\@, R_ARM_LDR_PC_G2, \sym +.L0_\@: sub\cond \tmp, pc, #8 - \offset +.L1_\@: sub\cond \tmp, \tmp, #4 - \offset +.L2_\@: +#endif + \op\cond \reg, [\tmp, #\offset] + .endm + + /* + * ldr_va - load a 32-bit word from the virtual address of \sym + */ + .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0 + .ifnb \tmp + __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset + .else + __ldst_va ldr, \rd, \rd, \sym, \cond, \offset + .endif + .endm + + /* + * str_va - store a 32-bit word to the virtual address of \sym + */ + .macro str_va, rn:req, sym:req, tmp:req, cond + __ldst_va str, \rn, \tmp, \sym, \cond, 0 + .endm + + /* + * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym' + * into register 'rd', which may be the stack pointer, + * using 't1' and 't2' as general temp registers. These + * are permitted to overlap with 'rd' if != sp + */ + .macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req + ldr_va \rd, \sym, tmp=\t1 + .endm + + /* + * rev_l - byte-swap a 32-bit value + * + * @val: source/destination register + * @tmp: scratch register + */ + .macro rev_l, val:req, tmp:req + .if __LINUX_ARM_ARCH__ < 6 + eor \tmp, \val, \val, ror #16 + bic \tmp, \tmp, #0x00ff0000 + mov \val, \val, ror #8 + eor \val, \val, \tmp, lsr #8 + .else + rev \val, \val + .endif + .endm + + /* + * bl_r - branch and link to register + * + * @dst: target to branch to + * @c: conditional opcode suffix + */ + .macro bl_r, dst:req, c + .if __LINUX_ARM_ARCH__ < 6 + mov\c lr, pc + mov\c pc, \dst + .else + blx\c \dst + .endif + .endm +#endif -- 2.39.2