On Wed, Oct 28, 2020 at 10:15 AM Ard Biesheuvel <ardb@xxxxxxxxxx> wrote: > > Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for > ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a > function scope __attribute__((optimize("-fno-gcse"))), to disable a > GCC specific optimization that was causing trouble on x86 builds, and > was not expected to have any positive effect in the first place. > > However, as the GCC manual documents, __attribute__((optimize)) > is not for production use, and results in all other optimization > options to be forgotten for the function in question. This can > cause all kinds of trouble, but in one particular reported case, > it causes -fno-asynchronous-unwind-tables to be disregarded, > resulting in .eh_frame info to be emitted for the function. > > This reverts commit 3193c0836, and instead, it disables the -fgcse > optimization for the entire source file, but only when building for > X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the > original commit states that CONFIG_RETPOLINE=n triggers the issue, > whereas CONFIG_RETPOLINE=y performs better without the optimization, > so it is kept disabled in both cases. > > Fixes: 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") > Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@xxxxxxxxxxxxxx/ > Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> > --- > include/linux/compiler-gcc.h | 2 -- > include/linux/compiler_types.h | 4 ---- > kernel/bpf/Makefile | 6 +++++- > kernel/bpf/core.c | 2 +- > 4 files changed, 6 insertions(+), 8 deletions(-) > > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h > index d1e3c6896b71..5deb37024574 100644 > --- a/include/linux/compiler-gcc.h > +++ b/include/linux/compiler-gcc.h > @@ -175,5 +175,3 @@ > #else > #define __diag_GCC_8(s) > #endif > - > -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) > diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h > index 6e390d58a9f8..ac3fa37a84f9 100644 > --- a/include/linux/compiler_types.h > +++ b/include/linux/compiler_types.h > @@ -247,10 +247,6 @@ struct ftrace_likely_data { > #define asm_inline asm > #endif > > -#ifndef __no_fgcse > -# define __no_fgcse > -#endif > - > /* Are two types/vars the same type (ignoring qualifiers)? */ > #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index bdc8cd1b6767..c1b9f71ee6aa 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -1,6 +1,10 @@ > # SPDX-License-Identifier: GPL-2.0 > obj-y := core.o > -CFLAGS_core.o += $(call cc-disable-warning, override-init) > +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) > +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details > +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse > +endif > +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) Writing multiple conditions in a conditional block in GNU make is painful, hence the double `y` trick. I feel like either 3 nested conditionals (one for CONFIG_BPF_JIT_ALWAYS_ON, CONFIG_X86, and CONFIG_CC_IS_GCC) would have been clearer, or using three `y`, rather than mixing and matching `if`s with multiple `y`s, but regardless of what color I think we should paint the bikeshed: Reviewed-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx> This also doesn't resolve all issues here, but is a step in the right direction, IMO. > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 9268d77898b7..55454d2278b1 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) > * > * Decode and execute eBPF instructions. > */ > -static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) > +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) > { > #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y > #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z > -- > 2.17.1 > -- Thanks, ~Nick Desaulniers