Re: [RFC PATCH] binfmt_elf: Dump smaller VMAs first in ELF cores

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Brian Mak <makb@xxxxxxxxxxx> writes:

> Large cores may be truncated in some scenarios, such as daemons with stop
> timeouts that are not large enough or lack of disk space. This impacts
> debuggability with large core dumps since critical information necessary to
> form a usable backtrace, such as stacks and shared library information, are
> omitted. We can mitigate the impact of core dump truncation by dumping
> smaller VMAs first, which may be more likely to contain memory for stacks
> and shared library information, thus allowing a usable backtrace to be
> formed.

This sounds theoretical.  Do you happen to have a description of a
motivating case?  A situtation that bit someone and resulted in a core
file that wasn't usable?

A concrete situation would help us imagine what possible caveats there
are with sorting vmas this way.

The most common case I am aware of is distributions setting the core
file size to 0 (ulimit -c 0).

One practical concern with this approach is that I think the ELF
specification says that program headers should be written in memory
order.  So a comment on your testing to see if gdb or rr or any of
the other debuggers that read core dumps cares would be appreciated.


> We implement this by sorting the VMAs by dump size and dumping in that
> order.

Since your concern is about stacks, and the kernel has information about
stacks it might be worth using that information explicitly when sorting
vmas, instead of just assuming stacks will be small.

I expect the priorities would look something like jit generated
executable code segments, stacks, and then heap data.

I don't have enough information what is causing your truncated core
dumps, so I can't guess what the actual problem is your are fighting,
so I could be wrong on priorities.

Though I do wonder if this might be a buggy interaction between
core dumps and something like signals, or io_uring.  If it is something
other than a shortage of storage space causing your truncated core
dumps I expect we should first debug why the coredumps are being
truncated rather than proceed directly to working around truncation.

Eric

> Signed-off-by: Brian Mak <makb@xxxxxxxxxxx>
> ---
>
> Hi all,
>
> My initial testing with a program that spawns several threads and allocates heap
> memory shows that this patch does indeed prioritize information such as stacks,
> which is crucial to forming a backtrace and debugging core dumps.
>
> Requesting for comments on the following:
>
> Are there cases where this might not necessarily prioritize dumping VMAs
> needed to obtain a usable backtrace?
>
> Thanks,
> Brian Mak
>
>  fs/binfmt_elf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 19fa49cd9907..d45240b0748d 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -13,6 +13,7 @@
>  #include <linux/module.h>
>  #include <linux/kernel.h>
>  #include <linux/fs.h>
> +#include <linux/debugfs.h>
>  #include <linux/log2.h>
>  #include <linux/mm.h>
>  #include <linux/mman.h>
> @@ -37,6 +38,7 @@
>  #include <linux/elf-randomize.h>
>  #include <linux/utsname.h>
>  #include <linux/coredump.h>
> +#include <linux/sort.h>
>  #include <linux/sched.h>
>  #include <linux/sched/coredump.h>
>  #include <linux/sched/task_stack.h>
> @@ -1990,6 +1992,22 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
>  	shdr4extnum->sh_info = segs;
>  }
>  
> +static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
> +{
> +	const struct core_vma_metadata *vma_meta_lhs = *(const struct core_vma_metadata **)
> +		vma_meta_lhs_ptr;
> +	const struct core_vma_metadata *vma_meta_rhs = *(const struct core_vma_metadata **)
> +		vma_meta_rhs_ptr;
> +
> +	if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
> +		return -1;
> +	if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
> +		return 1;
> +	return 0;
> +}
> +
> +static bool sort_elf_core_vmas = true;
> +
>  /*
>   * Actual dumper
>   *
> @@ -2008,6 +2026,7 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	struct elf_shdr *shdr4extnum = NULL;
>  	Elf_Half e_phnum;
>  	elf_addr_t e_shoff;
> +	struct core_vma_metadata **sorted_vmas = NULL;
>  
>  	/*
>  	 * The number of segs are recored into ELF header as 16bit value.
> @@ -2071,11 +2090,27 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
>  		goto end_coredump;
>  
> +	/* Allocate memory to sort VMAs and sort if needed. */
> +	if (sort_elf_core_vmas)
> +		sorted_vmas = kvmalloc_array(cprm->vma_count, sizeof(*sorted_vmas), GFP_KERNEL);
> +
> +	if (!ZERO_OR_NULL_PTR(sorted_vmas)) {
> +		for (i = 0; i < cprm->vma_count; i++)
> +			sorted_vmas[i] = cprm->vma_meta + i;
> +
> +		sort(sorted_vmas, cprm->vma_count, sizeof(*sorted_vmas), cmp_vma_size, NULL);
> +	}
> +
>  	/* Write program headers for segments dump */
>  	for (i = 0; i < cprm->vma_count; i++) {
> -		struct core_vma_metadata *meta = cprm->vma_meta + i;
> +		struct core_vma_metadata *meta;
>  		struct elf_phdr phdr;
>  
> +		if (ZERO_OR_NULL_PTR(sorted_vmas))
> +			meta = cprm->vma_meta + i;
> +		else
> +			meta = sorted_vmas[i];
> +
>  		phdr.p_type = PT_LOAD;
>  		phdr.p_offset = offset;
>  		phdr.p_vaddr = meta->start;
> @@ -2111,7 +2146,12 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	dump_skip_to(cprm, dataoff);
>  
>  	for (i = 0; i < cprm->vma_count; i++) {
> -		struct core_vma_metadata *meta = cprm->vma_meta + i;
> +		struct core_vma_metadata *meta;
> +
> +		if (ZERO_OR_NULL_PTR(sorted_vmas))
> +			meta = cprm->vma_meta + i;
> +		else
> +			meta = sorted_vmas[i];
>  
>  		if (!dump_user_range(cprm, meta->start, meta->dump_size))
>  			goto end_coredump;
> @@ -2128,10 +2168,26 @@ static int elf_core_dump(struct coredump_params *cprm)
>  end_coredump:
>  	free_note_info(&info);
>  	kfree(shdr4extnum);
> +	kvfree(sorted_vmas);
>  	kfree(phdr4note);
>  	return has_dumped;
>  }
>  
> +#ifdef CONFIG_DEBUG_FS
> +
> +static struct dentry *elf_core_debugfs;
> +
> +static int __init init_elf_core_debugfs(void)
> +{
> +	elf_core_debugfs = debugfs_create_dir("elf_core", NULL);
> +	debugfs_create_bool("sort_elf_core_vmas", 0644, elf_core_debugfs, &sort_elf_core_vmas);
> +	return 0;
> +}
> +
> +fs_initcall(init_elf_core_debugfs);
> +
> +#endif		/* CONFIG_DEBUG_FS */
> +
>  #endif		/* CONFIG_ELF_CORE */
>  
>  static int __init init_elf_binfmt(void)
> @@ -2144,6 +2200,10 @@ static void __exit exit_elf_binfmt(void)
>  {
>  	/* Remove the COFF and ELF loaders. */
>  	unregister_binfmt(&elf_format);
> +
> +#if defined(CONFIG_ELF_CORE) && defined(CONFIG_DEBUG_FS)
> +	debugfs_remove(elf_core_debugfs);
> +#endif
>  }
>  
>  core_initcall(init_elf_binfmt);
>
> base-commit: 94ede2a3e9135764736221c080ac7c0ad993dc2d




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux