Do we expect to use this more widely? If this only really matters for d_hash() it might be better to handle this via the alternatives framework with callbacks and avoid the need for new infrastructure. > diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h > new file mode 100644 > index 000000000000..02462b2cb6f9 > --- /dev/null > +++ b/arch/arm64/include/asm/runtime-const.h > @@ -0,0 +1,75 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_RUNTIME_CONST_H > +#define _ASM_RUNTIME_CONST_H > + > +#define runtime_const_ptr(sym) ({ \ > + typeof(sym) __ret; \ > + asm_inline("1:\t" \ > + "movz %0, #0xcdef\n\t" \ > + "movk %0, #0x89ab, lsl #16\n\t" \ > + "movk %0, #0x4567, lsl #32\n\t" \ > + "movk %0, #0x0123, lsl #48\n\t" \ > + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ > + ".long 1b - .\n\t" \ > + ".popsection" \ > + :"=r" (__ret)); \ > + __ret; }) > + > +#define runtime_const_shift_right_32(val, sym) ({ \ > + unsigned long __ret; \ > + asm_inline("1:\t" \ > + "lsr %w0,%w1,#12\n\t" \ > + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ > + ".long 1b - .\n\t" \ > + ".popsection" \ > + :"=r" (__ret) \ > + :"r" (0u+(val))); \ > + __ret; }) > + > +#define runtime_const_init(type, sym) do { \ > + extern s32 __start_runtime_##type##_##sym[]; \ > + extern s32 __stop_runtime_##type##_##sym[]; \ > + runtime_const_fixup(__runtime_fixup_##type, \ > + (unsigned long)(sym), \ > + __start_runtime_##type##_##sym, \ > + __stop_runtime_##type##_##sym); \ > +} while (0) > + > +// 16-bit immediate for wide move (movz and movk) in bits 5..20 > +static inline void __runtime_fixup_16(unsigned int *p, unsigned int val) > +{ > + unsigned int insn = *p; > + insn &= 0xffe0001f; > + insn |= (val & 0xffff) << 5; > + *p = insn; > +} As-is this will break BE kernels as instructions are always encoded little-endian regardless of data endianness. We usually handle that by using __le32 instruction pointers and using le32_to_cpu()/cpu_to_le32() when reading/writing, e.g. #include <asm/byteorder.h> static inline void __runtime_fixup_16(__le32 *p, unsigned int val) { u32 insn = le32_to_cpu(*p); insn &= 0xffe0001f; insn |= (val & 0xffff) << 5; *p = cpu_to_le32(insn); } We have some helpers for instruction manipulation, and we can use aarch64_insn_encode_immediate() here, e.g. #include <asm/insn.h> static inline void __runtime_fixup_16(__le32 *p, unsigned int val) { u32 insn = le32_to_cpu(*p); insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, val); *p = cpu_to_le32(insn); } > +static inline void __runtime_fixup_ptr(void *where, unsigned long val) > +{ > + unsigned int *p = lm_alias(where); > + __runtime_fixup_16(p, val); > + __runtime_fixup_16(p+1, val >> 16); > + __runtime_fixup_16(p+2, val >> 32); > + __runtime_fixup_16(p+3, val >> 48); > +} This is missing the necessary cache maintenance and context synchronization event. After the new values are written, we need cache maintenance (a D$ clean to PoU, then an I$ invalidate to PoU) followed by a context synchronization event (e.g. an ISB) before CPUs are guaranteed to use the new instructions rather than the old ones. Depending on how your system has been integrated, you might get away without that. If you see: Data cache clean to the PoU not required for I/D coherence ... in your dmesg, that means you only need the I$ invalidate and context synchronization event, and you might happen to get those by virtue of alternative patching running between dcache_init_early() and the use of the patched instructions. However, in general, we do need all of that. As long as this runs before secondaries are brought up, we can handle that with caches_clean_inval_pou(). Assuming the __le32 changes above, I'd expect this to be: static inline void __runtime_fixup_ptr(void *where, unsigned long val) { __le32 *p = lm_alias(where); __runtime_fixup_16(p, val); __runtime_fixup_16(p + 1, val >> 16); __runtime_fixup_16(p + 2, val >> 32); __runtime_fixup_16(p + 3, val >> 48); caches_clean_inval_pou((unsigned long)p, (unsigned long)(p + 4)); } > +// Immediate value is 5 bits starting at bit #16 > +static inline void __runtime_fixup_shift(void *where, unsigned long val) > +{ > + unsigned int *p = lm_alias(where); > + unsigned int insn = *p; > + insn &= 0xffc0ffff; > + insn |= (val & 63) << 16; > + *p = insn; > +} As with the other bits above, I'd expect this to be: static inline void __runtime_fixup_shift(void *where, unsigned long val) { __le32 *p = lm_alias(where); u32 insn = le32_to_cpu(*p); insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, val); *p = cpu_to_le32(insn); caches_clean_inval_pou((unsigned long)p, (unsigned long)(p + 1)); } Mark. > + > +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), > + unsigned long val, s32 *start, s32 *end) > +{ > + while (start < end) { > + fn(*start + (void *)start, val); > + start++; > + } > +} > + > +#endif > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S > index 755a22d4f840..55a8e310ea12 100644 > --- a/arch/arm64/kernel/vmlinux.lds.S > +++ b/arch/arm64/kernel/vmlinux.lds.S > @@ -264,6 +264,9 @@ SECTIONS > EXIT_DATA > } > > + RUNTIME_CONST(shift, d_hash_shift) > + RUNTIME_CONST(ptr, dentry_hashtable) > + > PERCPU_SECTION(L1_CACHE_BYTES) > HYPERVISOR_PERCPU_SECTION > > -- > 2.45.1.209.gc6f12300df > >