From: Christophe Leroy <christophe.leroy@xxxxxxxxxx> [ Upstream commit 5517d500829c683a358a8de04ecb2e28af629ae5 ] When a static call is updated with __static_call_return0() as target, arch_static_call_transform() set it to use an optimised set of instructions which are meant to lay in the same cacheline. But when initialising a static call with DEFINE_STATIC_CALL_RET0(), we get a branch to the real __static_call_return0() function instead of getting the optimised setup: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 4b ff ff f4 b c00d8114 <__static_call_return0> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture to setup the optimised configuration, and rework DEFINE_STATIC_CALL_RET0() to call it: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 48 00 00 14 b c00d8134 <__SCT__perf_snapshot_branch_stack+0x14> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Acked-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Link: https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.leroy@xxxxxxxxxx Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- arch/powerpc/include/asm/static_call.h | 1 + arch/x86/include/asm/static_call.h | 2 ++ include/linux/static_call.h | 20 +++++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h index 0a0bc79bd1fa..de1018cc522b 100644 --- a/arch/powerpc/include/asm/static_call.h +++ b/arch/powerpc/include/asm/static_call.h @@ -24,5 +24,6 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20") #endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index ed4f8bb6c2d9..2455d721503e 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -38,6 +38,8 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) #define ARCH_ADD_TRAMP_KEY(name) \ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3e56a9751c06..e2d70435988c 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -196,6 +196,14 @@ extern long __static_call_return0(void); }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ @@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; } }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) #define static_call_cond(name) (void)__static_call(name) @@ -287,6 +301,9 @@ static inline long __static_call_return0(void) .func = NULL, \ } +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, __static_call_return0) + static inline void __static_call_nop(void) { } /* @@ -330,7 +347,4 @@ static inline int static_call_text_reserved(void *start, void *end) #define DEFINE_STATIC_CALL(name, _func) \ __DEFINE_STATIC_CALL(name, _func, _func) -#define DEFINE_STATIC_CALL_RET0(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, __static_call_return0) - #endif /* _LINUX_STATIC_CALL_H */ -- 2.35.1