Le 28/09/2023 à 21:48, Hari Bathini a écrit : > patch_instruction() entails setting up pte, patching the instruction, > clearing the pte and flushing the tlb. If multiple instructions need > to be patched, every instruction would have to go through the above > drill unnecessarily. Instead, introduce function patch_instructions() > that sets up the pte, clears the pte and flushes the tlb only once per > page range of instructions to be patched. This adds a slight overhead > to patch_instruction() call while improving the patching time for > scenarios where more than one instruction needs to be patched. Not a "slight" but a "significant" overhead on PPC32. Thinking about it once more I don't think it is a good idea to try and merge that into the existing code_patching logic which is really single instruction performance oriented. Anyway, comments below. > > Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxx> > --- > arch/powerpc/include/asm/code-patching.h | 1 + > arch/powerpc/lib/code-patching.c | 93 +++++++++++++++++++++--- > 2 files changed, 85 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h > index 3f881548fb61..43a4aedfa703 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > int patch_branch(u32 *addr, unsigned long target, int flags); > int patch_instruction(u32 *addr, ppc_inst_t instr); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr); I don't like void *, you can do to much nasty things with that. I think you want u32 * > > static inline unsigned long patch_site_addr(s32 *site) > { > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index b00112d7ad46..4ff002bc41f6 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) > +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, bool repeat_instr) > +{ > + unsigned long start = (unsigned long)patch_addr; > + > + /* Repeat instruction */ > + if (repeat_instr) { > + ppc_inst_t instr = ppc_inst_read(code); > + > + if (ppc_inst_prefixed(instr)) { > + u64 val = ppc_inst_as_ulong(instr); > + > + memset64((uint64_t *)patch_addr, val, len / 8); Use u64 instead of uint64_t. > + } else { > + u32 val = ppc_inst_val(instr); > + > + memset32(patch_addr, val, len / 4); > + } > + } else > + memcpy(patch_addr, code, len); Missing braces, see https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces > + > + smp_wmb(); /* smp write barrier */ > + flush_icache_range(start, start + len); > + return 0; > +} > + > +/* > + * A page is mapped and instructions that fit the page are patched. > + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. > + */ > +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, bool repeat_instr) > { > int err; > u32 *patch_addr; > @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) > > orig_mm = start_using_temp_mm(patching_mm); > > - err = __patch_instruction(addr, instr, patch_addr); > + /* Single instruction case. */ > + if (len == 0) { > + err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr); Take care, you can't convert u32 * to ppc_inst_t that way, you have to use ppc_inst_read() otherwise you'll get odd result with prefixed instructions depending on endianness. > > - /* hwsync performed by __patch_instruction (sync) if successful */ > - if (err) > - mb(); /* sync */ > + /* hwsync performed by __patch_instruction (sync) if successful */ > + if (err) > + mb(); /* sync */ Get this away, see my patch at https://patchwork.ozlabs.org/project/linuxppc-dev/patch/e88b154eaf2efd9ff177d472d3411dcdec8ff4f5.1696675567.git.christophe.leroy@xxxxxxxxxx/ > + } else > + err = __patch_instructions(patch_addr, code, len, repeat_instr); > > /* context synchronisation performed by __patch_instruction (isync or exception) */ > stop_using_temp_mm(patching_mm, orig_mm); > @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) > return err; > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +/* > + * A page is mapped and instructions that fit the page are patched. > + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. > + */ > +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool repeat_instr) > { > int err; > u32 *patch_addr; > @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + /* Single instruction case. */ > + if (len == 0) > + err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr); Same, use ppc_inst_read() instead of this nasty casting. > + else > + err = __patch_instructions(patch_addr, code, len, repeat_instr); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) > > local_irq_save(flags); > if (mm_patch_enabled()) > - err = __do_patch_instruction_mm(addr, instr); > + err = __do_patch_instructions_mm(addr, &instr, 0, false); > else > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_instructions(addr, &instr, 0, false); > local_irq_restore(flags); > > return err; > } > NOKPROBE_SYMBOL(patch_instruction); > > +/* > + * Patch 'addr' with 'len' bytes of instructions from 'code'. > + * > + * If repeat_instr is true, the same instruction is filled for > + * 'len' bytes. > + */ > +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr) I'd like to see code as a u32 * > +{ > + unsigned long flags; > + size_t plen; > + int err; Move those three variables inside the only block in which they are used. > + > + while (len > 0) { > + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); > + > + local_irq_save(flags); > + if (mm_patch_enabled()) > + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); > + else > + err = __do_patch_instructions(addr, code, plen, repeat_instr); > + local_irq_restore(flags); > + if (err) > + break; replace by 'return err' > + > + len -= plen; > + addr = addr + plen; > + if (!repeat_instr) > + code = code + plen; > + } > + > + return err; If len is 0 err will be undefined. Is that expected ? Replace by return 0; > +} > +NOKPROBE_SYMBOL(patch_instructions); > + > int patch_branch(u32 *addr, unsigned long target, int flags) > { > ppc_inst_t instr;