Xu Kuohai <xukuohai@xxxxxxxxxxxxxxx> writes: > On 9/8/2023 10:43 PM, Puranjay Mohan wrote: >> This will be used by BPF JIT compiler to dump JITed binary to a RX huge >> page, and thus allow multiple BPF programs sharing the a huge (2MB) >> page. >> >> The bpf_prog_pack allocator that implements the above feature allocates >> a RX/RW buffer pair. The JITed code is written to the RW buffer and then >> this function will be used to copy the code from RW to RX buffer. >> >> Signed-off-by: Puranjay Mohan <puranjay12@xxxxxxxxx> >> Acked-by: Song Liu <song@xxxxxxxxxx> >> --- >> arch/arm64/include/asm/patching.h | 1 + >> arch/arm64/kernel/patching.c | 41 +++++++++++++++++++++++++++++++ >> 2 files changed, 42 insertions(+) >> >> diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h >> index 68908b82b168..f78a0409cbdb 100644 >> --- a/arch/arm64/include/asm/patching.h >> +++ b/arch/arm64/include/asm/patching.h >> @@ -8,6 +8,7 @@ int aarch64_insn_read(void *addr, u32 *insnp); >> int aarch64_insn_write(void *addr, u32 insn); >> >> int aarch64_insn_write_literal_u64(void *addr, u64 val); >> +void *aarch64_insn_copy(void *dst, const void *src, size_t len); >> >> int aarch64_insn_patch_text_nosync(void *addr, u32 insn); >> int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); >> diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c >> index b4835f6d594b..243d6ae8d2d8 100644 >> --- a/arch/arm64/kernel/patching.c >> +++ b/arch/arm64/kernel/patching.c >> @@ -105,6 +105,47 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val) >> return ret; >> } >> >> +/** >> + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory >> + * @dst: address to modify >> + * @src: source of the copy >> + * @len: length to copy >> + * >> + * Useful for JITs to dump new code blocks into unused regions of RX memory. >> + */ >> +noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len) >> +{ >> + unsigned long flags; >> + size_t patched = 0; >> + size_t size; >> + void *waddr; >> + void *ptr; >> + int ret; >> + > > check whether the input address and length are aligned to instruction size? Will add a check that dst is aligned to instruction size and len is a multiple of instruction size. > >> + raw_spin_lock_irqsave(&patch_lock, flags); >> + >> + while (patched < len) { >> + ptr = dst + patched; >> + size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr), >> + len - patched); >> + >> + waddr = patch_map(ptr, FIX_TEXT_POKE0); >> + ret = copy_to_kernel_nofault(waddr, src + patched, size); >> + patch_unmap(FIX_TEXT_POKE0); >> + >> + if (ret < 0) { >> + raw_spin_unlock_irqrestore(&patch_lock, flags); >> + return NULL; >> + } >> + patched += size; >> + } >> + raw_spin_unlock_irqrestore(&patch_lock, flags); >> + >> + caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len); >> + > > seems flush_icache_range() or something like should be called here to > ensure the other CPUs' pipelines are cleared, otherwise the old instructions > at the dst address might be executed on other CPUs after the copy is complete, > which is not expected. Sure, I will use flush_icache_range() in place of caches_clean_inval_pou() in the next version > >> + return dst; >> +} >> + >> int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) >> { >> u32 *tp = addr;