puranjay12@xxxxxxxxx writes: > I would propose the following changes to the compiler that could fix this > issue: > > 1. The kCFI hash should always be generated at func_addr - 4, this would > make the calling code consistent. > > 2. The two(n) nops should be generated before the kCFI hash. We would > modify the ftrace code to look for these nops at (fun_addr - 12) and > (func_addr - 8) when CFI is enabled, and (func_addr - 8), (func_addr - > 4) when CFI is disabled. > > The generated code could then look like: > > ffff80008033e9b0: d503201f nop > ffff80008033e9b4: d503201f nop > ffff80008033e9b8: 16c516ce kCFI hash > ffff80008033e9bc <jump_label_cmp>: > ffff80008033e9bc: d503245f bti c > ffff80008033e9c0: d503201f nop > ffff80008033e9c4: d503201f nop > [.....] > I hacked some patches and tried the above approach: Both CFI and DIRECT CALLS are enabled: [root@localhost ~]# zcat /proc/config.gz | grep DIRECT_CALLS CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y [root@localhost ~]# zcat /proc/config.gz | grep CONFIG_CFI CONFIG_CFI_CLANG=y CONFIG_CFI_PERMISSIVE=y CONFIG_CFI_WITHOUT_STATIC_CALL=y Running some BPF selftests that use this feature. ./test_progs -a "*fentry*,*fexit*,tracing_struct" -b "fentry_test/fentry_many_args" -b "fexit_test/fexit_many_args" #77 fentry_attach_btf_presence:OK #78 fentry_fexit:OK #79/1 fentry_test/fentry:OK #79 fentry_test:OK #80/1 fexit_bpf2bpf/target_no_callees:OK #80/2 fexit_bpf2bpf/target_yes_callees:OK #80/3 fexit_bpf2bpf/func_replace:OK #80/4 fexit_bpf2bpf/func_replace_verify:OK #80/5 fexit_bpf2bpf/func_sockmap_update:OK #80/6 fexit_bpf2bpf/func_replace_return_code:OK #80/7 fexit_bpf2bpf/func_map_prog_compatibility:OK #80/8 fexit_bpf2bpf/func_replace_unreliable:OK #80/9 fexit_bpf2bpf/func_replace_multi:OK #80/10 fexit_bpf2bpf/fmod_ret_freplace:OK #80/11 fexit_bpf2bpf/func_replace_global_func:OK #80/12 fexit_bpf2bpf/fentry_to_cgroup_bpf:OK #80/13 fexit_bpf2bpf/func_replace_progmap:OK #80 fexit_bpf2bpf:OK #81 fexit_sleep:OK #82 fexit_stress:OK #83/1 fexit_test/fexit:OK #83 fexit_test:OK #158 module_fentry_shadow:OK #193/1 recursive_fentry/attach:OK #193/2 recursive_fentry/load:OK #193/3 recursive_fentry/detach:OK #193 recursive_fentry:OK #385 tracing_struct:OK Summary: 10/18 PASSED, 0 SKIPPED, 0 FAILED Running basic ftrace selftest: [root@localhost ftrace]# ./ftracetest test.d/00basic/ === Ftrace unit tests === [1] Basic trace file check [PASS] [2] Basic test for tracers [PASS] [3] Basic trace clock test [PASS] [4] Basic event tracing check [PASS] [5] Change the ringbuffer size [PASS] [6] Change the ringbuffer sub-buffer size [PASS] [7] Snapshot and tracing setting [PASS] [8] Snapshot and tracing_cpumask [PASS] [9] Test file and directory owership changes for eventfs [PASS] [10] Basic tests on writing to trace_marker [PASS] [11] trace_pipe and trace_marker [PASS] [12] (instance) Basic test for tracers [PASS] [13] (instance) Basic trace clock test [PASS] [14] (instance) Change the ringbuffer size [PASS] [15] (instance) Change the ringbuffer sub-buffer size [PASS] [16] (instance) Snapshot and tracing setting [PASS] [17] (instance) Snapshot and tracing_cpumask [PASS] [18] (instance) Basic tests on writing to trace_marker [PASS] [19] (instance) trace_pipe and trace_marker [PASS] # of passed: 19 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 Here are the patches(RFC) that I created: LLVM Patch: --- >8 --- diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e89a1c26c..f1ca33c26 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -982,9 +982,6 @@ void AsmPrinter::emitFunctionHeader() { } } - // Emit KCFI type information before patchable-function-prefix nops. - emitKCFITypeId(*MF); - // Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily // place prefix data before NOPs. unsigned PatchableFunctionPrefix = 0; @@ -1006,6 +1003,9 @@ void AsmPrinter::emitFunctionHeader() { CurrentPatchableFunctionEntrySym = CurrentFnBegin; } + // Emit KCFI type information after patchable-function-prefix nops. + emitKCFITypeId(*MF); + // Emit the function prologue data for the indirect call sanitizer. if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) { assert(MD->getNumOperands() == 2); diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 4fa719ad6..678a38a6a 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -474,20 +474,11 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { assert(ScratchRegs[0] != AddrReg && ScratchRegs[1] != AddrReg && "Invalid scratch registers for KCFI_CHECK"); - // Adjust the offset for patchable-function-prefix. This assumes that - // patchable-function-prefix is the same for all functions. - int64_t PrefixNops = 0; - (void)MI.getMF() - ->getFunction() - .getFnAttribute("patchable-function-prefix") - .getValueAsString() - .getAsInteger(10, PrefixNops); - // Load the target function type hash. EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDURWi) .addReg(ScratchRegs[0]) .addReg(AddrReg) - .addImm(-(PrefixNops * 4 + 4))); + .addImm(-4)); } // Load the expected type hash. --- 8< --- Kernel Patch: --- >8 --- diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa7c1d435..979c290e6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -197,7 +197,7 @@ config ARM64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \ - if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \ + if (DYNAMIC_FTRACE_WITH_ARGS && \ !CC_OPTIMIZE_FOR_SIZE) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ if DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index f0c16640e..9be421583 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -48,8 +48,15 @@ SYM_CODE_START(ftrace_caller) * alignment first as this allows us to fold the subtraction into the * LDR. */ + +#ifdef CONFIG_CFI_CLANG + sub x11, x30, #20 + bic x11, x11, 0x7 + ldr x11, [x11, 0] // op +#else bic x11, x30, 0x7 ldr x11, [x11, #-(4 * AARCH64_INSN_SIZE)] // op +#endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index a650f5e11..ed7a86b31 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -125,6 +125,10 @@ unsigned long ftrace_call_adjust(unsigned long addr) /* Skip the NOPs placed before the function entry point */ addr += 2 * AARCH64_INSN_SIZE; + /* Skip the hash in case of CLANG_CFI */ + if (IS_ENABLED(CONFIG_CFI_CLANG)) + addr += AARCH64_INSN_SIZE; + /* Skip any BTI */ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { u32 insn = le32_to_cpu(*(__le32 *)addr); @@ -299,7 +303,11 @@ static const struct ftrace_ops *arm64_rec_get_ops(struct dyn_ftrace *rec) static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops) { +#ifdef CONFIG_CFI_CLANG + unsigned long literal = ALIGN_DOWN(rec->ip - 16, 8); +#elif unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8); +#endif return aarch64_insn_write_literal_u64((void *)literal, (unsigned long)ops); } --- 8< --- I applied the kernel patch over my tree that has patches to fix kCFI with BPF and also has a static call related fix [1]. Thanks, Puranjay [1] https://github.com/puranjaymohan/linux/tree/kcfi_bpf