From: Peilin Ye <peilin.ye@xxxxxxxxxxxxx> Currently, there is a copy for each page when dumping VMAs to pipe handlers using dump_emit_page(). For example: fs/binfmt_elf.c:elf_core_dump() fs/coredump.c:dump_user_range() :dump_emit_page() fs/read_write.c:__kernel_write_iter() fs/pipe.c:pipe_write() lib/iov_iter.c:copy_page_from_iter() Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this copy for pipe handlers. Tested by dumping a 32-GByte core into a simple handler that splice()s from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time. Before After Improved by Time to Completion 40.77 seconds 35.49 seconds 12.95% CPU Usage 92.27% 86.40% 6.36% Suggested-by: Cong Wang <cong.wang@xxxxxxxxxxxxx> Signed-off-by: Peilin Ye <peilin.ye@xxxxxxxxxxxxx> --- fs/coredump.c | 10 +++++++++- fs/splice.c | 4 ++-- include/linux/coredump.h | 3 +++ include/linux/splice.h | 3 +++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index f27d734f3102..4078069ede88 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -42,6 +42,7 @@ #include <linux/timekeeping.h> #include <linux/sysctl.h> #include <linux/elf.h> +#include <linux/splice.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -586,6 +587,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto fail_unlock; } + set_bit(COREDUMP_USE_PIPE, &cprm.flags); + if (cprm.limit == 1) { /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. * @@ -861,7 +864,12 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 0; pos = file->f_pos; iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); - n = __kernel_write_iter(cprm->file, &iter, &pos); + + if (test_bit(COREDUMP_USE_PIPE, &cprm->flags)) + n = vmsplice_to_pipe(file, &iter, 0); + else + n = __kernel_write_iter(cprm->file, &iter, &pos); + if (n != PAGE_SIZE) return 0; file->f_pos = pos; diff --git a/fs/splice.c b/fs/splice.c index 5969b7a1d353..c9be20f4115e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, - unsigned int flags) +long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe; long ret = 0; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index d3eba4360150..3e34009487bf 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,8 +28,11 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + unsigned long flags; }; +#define COREDUMP_USE_PIPE 0 + /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. diff --git a/include/linux/splice.h b/include/linux/splice.h index a55179fd60fc..38b3560a318b 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -10,6 +10,7 @@ #define SPLICE_H #include <linux/pipe_fs_i.h> +#include <linux/uio.h> /* * Flags passed in from splice/tee/vmsplice @@ -81,6 +82,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, extern long do_splice(struct file *in, loff_t *off_in, struct file *out, loff_t *off_out, size_t len, unsigned int flags); +extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags); extern long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags); -- 2.20.1