The patch titled Subject: bpf: rework the compat kernel probe handling has been added to the -mm tree. Its filename is bpf-rework-the-compat-kernel-probe-handling.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/bpf-rework-the-compat-kernel-probe-handling.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/bpf-rework-the-compat-kernel-probe-handling.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Christoph Hellwig <hch@xxxxxx> Subject: bpf: rework the compat kernel probe handling Instead of using the dangerous probe_kernel_read and strncpy_from_unsafe helpers, rework the compat probes to check if an address is a kernel or userspace one, and then use the low-level kernel or user probe helper shared by the proper kernel and user probe helpers. This slightly changes behavior as the compat probe on a user address doesn't check the lockdown flags, just as the pure user probes do. Link: http://lkml.kernel.org/r/20200521152301.2587579-14-hch@xxxxxx Signed-off-by: Christoph Hellwig <hch@xxxxxx> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxx> Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- kernel/trace/bpf_trace.c | 109 ++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 42 deletions(-) --- a/kernel/trace/bpf_trace.c~bpf-rework-the-compat-kernel-probe-handling +++ a/kernel/trace/bpf_trace.c @@ -136,17 +136,23 @@ static const struct bpf_func_proto bpf_o }; #endif -BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, - const void __user *, unsafe_ptr) +static __always_inline int +bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { - int ret = probe_user_read(dst, unsafe_ptr, size); + int ret; + ret = probe_user_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); - return ret; } +BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, + const void __user *, unsafe_ptr) +{ + return bpf_probe_read_user_common(dst, size, unsafe_ptr); +} + static const struct bpf_func_proto bpf_probe_read_user_proto = { .func = bpf_probe_read_user, .gpl_only = true, @@ -156,17 +162,24 @@ static const struct bpf_func_proto bpf_p .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, - const void __user *, unsafe_ptr) +static __always_inline int +bpf_probe_read_user_str_common(void *dst, u32 size, + const void __user *unsafe_ptr) { - int ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); + int ret; + ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); - return ret; } +BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, + const void __user *, unsafe_ptr) +{ + return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); +} + static const struct bpf_func_proto bpf_probe_read_user_str_proto = { .func = bpf_probe_read_user_str, .gpl_only = true, @@ -177,25 +190,25 @@ static const struct bpf_func_proto bpf_p }; static __always_inline int -bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr, - const bool compat) +bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) { int ret = security_locked_down(LOCKDOWN_BPF_READ); if (unlikely(ret < 0)) - goto out; - ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) : - probe_kernel_read_strict(dst, unsafe_ptr, size); + goto fail; + ret = probe_kernel_read_strict(dst, unsafe_ptr, size); if (unlikely(ret < 0)) -out: - memset(dst, 0, size); + goto fail; + return ret; +fail: + memset(dst, 0, size); return ret; } BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, const void *, unsafe_ptr) { - return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false); + return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); } static const struct bpf_func_proto bpf_probe_read_kernel_proto = { @@ -207,50 +220,37 @@ static const struct bpf_func_proto bpf_p .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, - const void *, unsafe_ptr) -{ - return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true); -} - -static const struct bpf_func_proto bpf_probe_read_compat_proto = { - .func = bpf_probe_read_compat, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_ANYTHING, -}; - static __always_inline int -bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr, - const bool compat) +bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) { int ret = security_locked_down(LOCKDOWN_BPF_READ); if (unlikely(ret < 0)) - goto out; + goto fail; + /* - * The strncpy_from_unsafe_*() call will likely not fill the entire - * buffer, but that's okay in this circumstance as we're probing + * The strncpy_from_kernel_nofault() call will likely not fill the + * entire buffer, but that's okay in this circumstance as we're probing * arbitrary memory anyway similar to bpf_probe_read_*() and might * as well probe the stack. Thus, memory is explicitly cleared * only in error case, so that improper users ignoring return * code altogether don't copy garbage; otherwise length of string * is returned that can be used for bpf_perf_event_output() et al. */ - ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) : - strncpy_from_kernel_nofault(dst, unsafe_ptr, size); + ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) -out: - memset(dst, 0, size); + goto fail; + + return 0; +fail: + memset(dst, 0, size); return ret; } BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, const void *, unsafe_ptr) { - return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false); + return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); } static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { @@ -262,10 +262,34 @@ static const struct bpf_func_proto bpf_p .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE +BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, + const void *, unsafe_ptr) +{ + if ((unsigned long)unsafe_ptr < TASK_SIZE) { + return bpf_probe_read_user_common(dst, size, + (__force void __user *)unsafe_ptr); + } + return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); +} + +static const struct bpf_func_proto bpf_probe_read_compat_proto = { + .func = bpf_probe_read_compat, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, const void *, unsafe_ptr) { - return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true); + if ((unsigned long)unsafe_ptr < TASK_SIZE) { + return bpf_probe_read_user_str_common(dst, size, + (__force void __user *)unsafe_ptr); + } + return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); } static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { @@ -276,6 +300,7 @@ static const struct bpf_func_proto bpf_p .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; +#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, u32, size) _ Patches currently in -mm which might be from hch@xxxxxx are x86-hyperv-use-vmalloc_exec-for-the-hypercall-page.patch x86-fix-vmap-arguments-in-map_irq_stack.patch staging-android-ion-use-vmap-instead-of-vm_map_ram.patch staging-media-ipu3-use-vmap-instead-of-reimplementing-it.patch dma-mapping-use-vmap-insted-of-reimplementing-it.patch powerpc-add-an-ioremap_phb-helper.patch powerpc-remove-__ioremap_at-and-__iounmap_at.patch mm-remove-__get_vm_area.patch mm-unexport-unmap_kernel_range_noflush.patch mm-rename-config_pgtable_mapping-to-config_zsmalloc_pgtable_mapping.patch mm-only-allow-page-table-mappings-for-built-in-zsmalloc.patch mm-pass-addr-as-unsigned-long-to-vb_free.patch mm-remove-vmap_page_range_noflush-and-vunmap_page_range.patch mm-rename-vmap_page_range-to-map_kernel_range.patch mm-dont-return-the-number-of-pages-from-map_kernel_range_noflush.patch mm-remove-map_vm_range.patch mm-remove-unmap_vmap_area.patch mm-remove-the-prot-argument-from-vm_map_ram.patch mm-enforce-that-vmap-cant-map-pages-executable.patch gpu-drm-remove-the-powerpc-hack-in-drm_legacy_sg_alloc.patch mm-remove-the-pgprot-argument-to-__vmalloc.patch mm-remove-the-prot-argument-to-__vmalloc_node.patch mm-remove-both-instances-of-__vmalloc_node_flags.patch mm-remove-__vmalloc_node_flags_caller.patch mm-switch-the-test_vmalloc-module-to-use-__vmalloc_node.patch mm-remove-vmalloc_user_node_flags.patch arm64-use-__vmalloc_node-in-arch_alloc_vmap_stack.patch powerpc-use-__vmalloc_node-in-alloc_vm_stack.patch s390-use-__vmalloc_node-in-stack_alloc.patch exec-simplify-the-copy_strings_kernel-calling-convention.patch exec-open-code-copy_string_kernel.patch amdgpu-a-null-mm-does-not-mean-a-thread-is-a-kthread.patch kernel-move-use_mm-unuse_mm-to-kthreadc.patch kernel-move-use_mm-unuse_mm-to-kthreadc-v2.patch kernel-better-document-the-use_mm-unuse_mm-api-contract.patch kernel-better-document-the-use_mm-unuse_mm-api-contract-v2.patch kernel-set-user_ds-in-kthread_use_mm.patch arm-fix-the-flush_icache_range-arguments-in-set_fiq_handler.patch nds32-unexport-flush_icache_page.patch powerpc-unexport-flush_icache_user_range.patch unicore32-remove-flush_cache_user_range.patch asm-generic-fix-the-inclusion-guards-for-cacheflushh.patch asm-generic-dont-include-linux-mmh-in-cacheflushh.patch asm-generic-dont-include-linux-mmh-in-cacheflushh-fix.patch asm-generic-improve-the-flush_dcache_page-stub.patch alpha-use-asm-generic-cacheflushh.patch arm64-use-asm-generic-cacheflushh.patch c6x-use-asm-generic-cacheflushh.patch hexagon-use-asm-generic-cacheflushh.patch ia64-use-asm-generic-cacheflushh.patch microblaze-use-asm-generic-cacheflushh.patch m68knommu-use-asm-generic-cacheflushh.patch openrisc-use-asm-generic-cacheflushh.patch powerpc-use-asm-generic-cacheflushh.patch riscv-use-asm-generic-cacheflushh.patch armsparcunicore32-remove-flush_icache_user_range.patch mm-rename-flush_icache_user_range-to-flush_icache_user_page.patch asm-generic-add-a-flush_icache_user_range-stub.patch sh-implement-flush_icache_user_range.patch xtensa-implement-flush_icache_user_range.patch arm-rename-flush_cache_user_range-to-flush_icache_user_range.patch m68k-implement-flush_icache_user_range.patch exec-only-build-read_code-when-needed.patch exec-use-flush_icache_user_range-in-read_code.patch binfmt_flat-use-flush_icache_user_range.patch nommu-use-flush_icache_user_range-in-brk-and-mmap.patch module-move-the-set_fs-hack-for-flush_icache_range-to-m68k.patch maccess-unexport-probe_kernel_write-and-probe_user_write.patch maccess-remove-various-unused-weak-aliases.patch maccess-remove-duplicate-kerneldoc-comments.patch maccess-clarify-kerneldoc-comments.patch maccess-update-the-top-of-file-comment.patch maccess-rename-strncpy_from_unsafe_user-to-strncpy_from_user_nofault.patch maccess-rename-strncpy_from_unsafe_strict-to-strncpy_from_kernel_nofault.patch maccess-rename-strnlen_unsafe_user-to-strnlen_user_nofault.patch maccess-remove-probe_read_common-and-probe_write_common.patch maccess-unify-the-probe-kernel-arch-hooks.patch bpf-factor-out-a-bpf_trace_copy_string-helper.patch bpf-handle-the-compat-string-in-bpf_trace_copy_string-better.patch bpf-rework-the-compat-kernel-probe-handling.patch tracing-kprobes-handle-mixed-kernel-userspace-probes-better.patch maccess-remove-strncpy_from_unsafe.patch maccess-always-use-strict-semantics-for-probe_kernel_read.patch maccess-move-user-access-routines-together.patch maccess-allow-architectures-to-provide-kernel-probing-directly.patch x86-use-non-set_fs-based-maccess-routines.patch maccess-return-erange-when-copy_from_kernel_nofault_allowed-fails.patch