We are in the middle of the merge window right now but I expect Kees could take a simplified patch (aka no knob) after the merge window closes and get it into linux-next. Which should give plenty of time to spot any regressions caused by sorting the vmas. Eric > > Please let me know your thoughts! > > Best, > Brian Mak > >> Eric >> >>> Signed-off-by: Brian Mak <makb@xxxxxxxxxxx> >>> --- >>> >>> Hi all, >>> >>> My initial testing with a program that spawns several threads and allocates heap >>> memory shows that this patch does indeed prioritize information such as stacks, >>> which is crucial to forming a backtrace and debugging core dumps. >>> >>> Requesting for comments on the following: >>> >>> Are there cases where this might not necessarily prioritize dumping VMAs >>> needed to obtain a usable backtrace? >>> >>> Thanks, >>> Brian Mak >>> >>> fs/binfmt_elf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-- >>> 1 file changed, 62 insertions(+), 2 deletions(-) >>> >>> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c >>> index 19fa49cd9907..d45240b0748d 100644 >>> --- a/fs/binfmt_elf.c >>> +++ b/fs/binfmt_elf.c >>> @@ -13,6 +13,7 @@ >>> #include <linux/module.h> >>> #include <linux/kernel.h> >>> #include <linux/fs.h> >>> +#include <linux/debugfs.h> >>> #include <linux/log2.h> >>> #include <linux/mm.h> >>> #include <linux/mman.h> >>> @@ -37,6 +38,7 @@ >>> #include <linux/elf-randomize.h> >>> #include <linux/utsname.h> >>> #include <linux/coredump.h> >>> +#include <linux/sort.h> >>> #include <linux/sched.h> >>> #include <linux/sched/coredump.h> >>> #include <linux/sched/task_stack.h> >>> @@ -1990,6 +1992,22 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, >>> shdr4extnum->sh_info = segs; >>> } >>> >>> +static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr) >>> +{ >>> + const struct core_vma_metadata *vma_meta_lhs = *(const struct core_vma_metadata **) >>> + vma_meta_lhs_ptr; >>> + const struct core_vma_metadata *vma_meta_rhs = *(const struct core_vma_metadata **) >>> + vma_meta_rhs_ptr; >>> + >>> + if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size) >>> + return -1; >>> + if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size) >>> + return 1; >>> + return 0; >>> +} >>> + >>> +static bool sort_elf_core_vmas = true; >>> + >>> /* >>> * Actual dumper >>> * >>> @@ -2008,6 +2026,7 @@ static int elf_core_dump(struct coredump_params *cprm) >>> struct elf_shdr *shdr4extnum = NULL; >>> Elf_Half e_phnum; >>> elf_addr_t e_shoff; >>> + struct core_vma_metadata **sorted_vmas = NULL; >>> >>> /* >>> * The number of segs are recored into ELF header as 16bit value. >>> @@ -2071,11 +2090,27 @@ static int elf_core_dump(struct coredump_params *cprm) >>> if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) >>> goto end_coredump; >>> >>> + /* Allocate memory to sort VMAs and sort if needed. */ >>> + if (sort_elf_core_vmas) >>> + sorted_vmas = kvmalloc_array(cprm->vma_count, sizeof(*sorted_vmas), GFP_KERNEL); >>> + >>> + if (!ZERO_OR_NULL_PTR(sorted_vmas)) { >>> + for (i = 0; i < cprm->vma_count; i++) >>> + sorted_vmas[i] = cprm->vma_meta + i; >>> + >>> + sort(sorted_vmas, cprm->vma_count, sizeof(*sorted_vmas), cmp_vma_size, NULL); >>> + } >>> + >>> /* Write program headers for segments dump */ >>> for (i = 0; i < cprm->vma_count; i++) { >>> - struct core_vma_metadata *meta = cprm->vma_meta + i; >>> + struct core_vma_metadata *meta; >>> struct elf_phdr phdr; >>> >>> + if (ZERO_OR_NULL_PTR(sorted_vmas)) >>> + meta = cprm->vma_meta + i; >>> + else >>> + meta = sorted_vmas[i]; >>> + >>> phdr.p_type = PT_LOAD; >>> phdr.p_offset = offset; >>> phdr.p_vaddr = meta->start; >>> @@ -2111,7 +2146,12 @@ static int elf_core_dump(struct coredump_params *cprm) >>> dump_skip_to(cprm, dataoff); >>> >>> for (i = 0; i < cprm->vma_count; i++) { >>> - struct core_vma_metadata *meta = cprm->vma_meta + i; >>> + struct core_vma_metadata *meta; >>> + >>> + if (ZERO_OR_NULL_PTR(sorted_vmas)) >>> + meta = cprm->vma_meta + i; >>> + else >>> + meta = sorted_vmas[i]; >>> >>> if (!dump_user_range(cprm, meta->start, meta->dump_size)) >>> goto end_coredump; >>> @@ -2128,10 +2168,26 @@ static int elf_core_dump(struct coredump_params *cprm) >>> end_coredump: >>> free_note_info(&info); >>> kfree(shdr4extnum); >>> + kvfree(sorted_vmas); >>> kfree(phdr4note); >>> return has_dumped; >>> } >>> >>> +#ifdef CONFIG_DEBUG_FS >>> + >>> +static struct dentry *elf_core_debugfs; >>> + >>> +static int __init init_elf_core_debugfs(void) >>> +{ >>> + elf_core_debugfs = debugfs_create_dir("elf_core", NULL); >>> + debugfs_create_bool("sort_elf_core_vmas", 0644, elf_core_debugfs, &sort_elf_core_vmas); >>> + return 0; >>> +} >>> + >>> +fs_initcall(init_elf_core_debugfs); >>> + >>> +#endif /* CONFIG_DEBUG_FS */ >>> + >>> #endif /* CONFIG_ELF_CORE */ >>> >>> static int __init init_elf_binfmt(void) >>> @@ -2144,6 +2200,10 @@ static void __exit exit_elf_binfmt(void) >>> { >>> /* Remove the COFF and ELF loaders. */ >>> unregister_binfmt(&elf_format); >>> + >>> +#if defined(CONFIG_ELF_CORE) && defined(CONFIG_DEBUG_FS) >>> + debugfs_remove(elf_core_debugfs); >>> +#endif >>> } >>> >>> core_initcall(init_elf_binfmt); >>> >>> base-commit: 94ede2a3e9135764736221c080ac7c0ad993dc2d