Quoting Alex Kelly (alex.page.kelly@xxxxxxxxx): > This prepares for making core dump functionality optional. > > The variable "suid_dumpable" and associated functions are left in fs/exec.c > because they're used elsewhere, such as in ptrace. > > Signed-off-by: Alex Kelly <alex.page.kelly@xxxxxxxxx> > Reviewed-by: Josh Triplett <josh@xxxxxxxxxxxxxxxx> Acked-by: Serge Hallyn <serge.hallyn@xxxxxxxxxxxxx> > --- > v2: This patch set is a second revision that follows some suggestions from > Ingo Molnar and Josh Triplett. Specifically, authorship of commits is > revised for consistency, and an additional two patches cleaning up artifacts > and making headers more sane are added. > > v3: This version fixes a few more authorship issues and some problems caused > by a bad git send-email config. Sorry about the extra mails > > v4: This version fixes some ordering issues pointed out by Kees Cook and Josh > Triplett, such that the order of the functions moved to fs/coredump.c is now > consistent with their original order in fs/exec.c. v4 also drops some extra > blank lines unintentionally introduced in fs/coredump.c, to avoid the need to > clean them up later. That left the cleanup patch just reformatting a comment, > so I dropped that patch. Some of the functions moved to coredump.c need a lot > of cleaning up, but I'm not sure that those formatting changes should be > folded into this patch series. > > fs/Makefile | 2 +- > fs/coredump.c | 689 ++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/exec.c | 647 +---------------------------------------------- > include/linux/sched.h | 1 + > 4 files changed, 692 insertions(+), 647 deletions(-) > create mode 100644 fs/coredump.c > > diff --git a/fs/Makefile b/fs/Makefile > index 2fb9779..8938f82 100644 > --- a/fs/Makefile > +++ b/fs/Makefile > @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ > attr.o bad_inode.o file.o filesystems.o namespace.o \ > seq_file.o xattr.o libfs.o fs-writeback.o \ > pnode.o drop_caches.o splice.o sync.o utimes.o \ > - stack.o fs_struct.o statfs.o > + stack.o fs_struct.o statfs.o coredump.o > > ifeq ($(CONFIG_BLOCK),y) > obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o > diff --git a/fs/coredump.c b/fs/coredump.c > new file mode 100644 > index 0000000..9692329 > --- /dev/null > +++ b/fs/coredump.c > @@ -0,0 +1,689 @@ > +#include <linux/slab.h> > +#include <linux/file.h> > +#include <linux/fdtable.h> > +#include <linux/mm.h> > +#include <linux/stat.h> > +#include <linux/fcntl.h> > +#include <linux/swap.h> > +#include <linux/string.h> > +#include <linux/init.h> > +#include <linux/pagemap.h> > +#include <linux/perf_event.h> > +#include <linux/highmem.h> > +#include <linux/spinlock.h> > +#include <linux/key.h> > +#include <linux/personality.h> > +#include <linux/binfmts.h> > +#include <linux/utsname.h> > +#include <linux/pid_namespace.h> > +#include <linux/module.h> > +#include <linux/namei.h> > +#include <linux/mount.h> > +#include <linux/security.h> > +#include <linux/syscalls.h> > +#include <linux/tsacct_kern.h> > +#include <linux/cn_proc.h> > +#include <linux/audit.h> > +#include <linux/tracehook.h> > +#include <linux/kmod.h> > +#include <linux/fsnotify.h> > +#include <linux/fs_struct.h> > +#include <linux/pipe_fs_i.h> > +#include <linux/oom.h> > +#include <linux/compat.h> > + > +#include <asm/uaccess.h> > +#include <asm/mmu_context.h> > +#include <asm/tlb.h> > +#include <asm/exec.h> > + > +#include <trace/events/task.h> > +#include "internal.h" > + > +#include <trace/events/sched.h> > + > +int core_uses_pid; > +char core_pattern[CORENAME_MAX_SIZE] = "core"; > +unsigned int core_pipe_limit; > + > +struct core_name { > + char *corename; > + int used, size; > +}; > +static atomic_t call_count = ATOMIC_INIT(1); > + > +/* The maximal length of core_pattern is also specified in sysctl.c */ > + > +static int expand_corename(struct core_name *cn) > +{ > + char *old_corename = cn->corename; > + > + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); > + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); > + > + if (!cn->corename) { > + kfree(old_corename); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static int cn_printf(struct core_name *cn, const char *fmt, ...) > +{ > + char *cur; > + int need; > + int ret; > + va_list arg; > + > + va_start(arg, fmt); > + need = vsnprintf(NULL, 0, fmt, arg); > + va_end(arg); > + > + if (likely(need < cn->size - cn->used - 1)) > + goto out_printf; > + > + ret = expand_corename(cn); > + if (ret) > + goto expand_fail; > + > +out_printf: > + cur = cn->corename + cn->used; > + va_start(arg, fmt); > + vsnprintf(cur, need + 1, fmt, arg); > + va_end(arg); > + cn->used += need; > + return 0; > + > +expand_fail: > + return ret; > +} > + > +static void cn_escape(char *str) > +{ > + for (; *str; str++) > + if (*str == '/') > + *str = '!'; > +} > + > +static int cn_print_exe_file(struct core_name *cn) > +{ > + struct file *exe_file; > + char *pathbuf, *path; > + int ret; > + > + exe_file = get_mm_exe_file(current->mm); > + if (!exe_file) { > + char *commstart = cn->corename + cn->used; > + ret = cn_printf(cn, "%s (path unknown)", current->comm); > + cn_escape(commstart); > + return ret; > + } > + > + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); > + if (!pathbuf) { > + ret = -ENOMEM; > + goto put_exe_file; > + } > + > + path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); > + if (IS_ERR(path)) { > + ret = PTR_ERR(path); > + goto free_buf; > + } > + > + cn_escape(path); > + > + ret = cn_printf(cn, "%s", path); > + > +free_buf: > + kfree(pathbuf); > +put_exe_file: > + fput(exe_file); > + return ret; > +} > + > +/* format_corename will inspect the pattern parameter, and output a > + * name into corename, which must have space for at least > + * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. > + */ > +static int format_corename(struct core_name *cn, long signr) > +{ > + const struct cred *cred = current_cred(); > + const char *pat_ptr = core_pattern; > + int ispipe = (*pat_ptr == '|'); > + int pid_in_pattern = 0; > + int err = 0; > + > + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); > + cn->corename = kmalloc(cn->size, GFP_KERNEL); > + cn->used = 0; > + > + if (!cn->corename) > + return -ENOMEM; > + > + /* Repeat as long as we have more pattern to process and more output > + space */ > + while (*pat_ptr) { > + if (*pat_ptr != '%') { > + if (*pat_ptr == 0) > + goto out; > + err = cn_printf(cn, "%c", *pat_ptr++); > + } else { > + switch (*++pat_ptr) { > + /* single % at the end, drop that */ > + case 0: > + goto out; > + /* Double percent, output one percent */ > + case '%': > + err = cn_printf(cn, "%c", '%'); > + break; > + /* pid */ > + case 'p': > + pid_in_pattern = 1; > + err = cn_printf(cn, "%d", > + task_tgid_vnr(current)); > + break; > + /* uid */ > + case 'u': > + err = cn_printf(cn, "%d", cred->uid); > + break; > + /* gid */ > + case 'g': > + err = cn_printf(cn, "%d", cred->gid); > + break; > + /* signal that caused the coredump */ > + case 's': > + err = cn_printf(cn, "%ld", signr); > + break; > + /* UNIX time of coredump */ > + case 't': { > + struct timeval tv; > + do_gettimeofday(&tv); > + err = cn_printf(cn, "%lu", tv.tv_sec); > + break; > + } > + /* hostname */ > + case 'h': { > + char *namestart = cn->corename + cn->used; > + down_read(&uts_sem); > + err = cn_printf(cn, "%s", > + utsname()->nodename); > + up_read(&uts_sem); > + cn_escape(namestart); > + break; > + } > + /* executable */ > + case 'e': { > + char *commstart = cn->corename + cn->used; > + err = cn_printf(cn, "%s", current->comm); > + cn_escape(commstart); > + break; > + } > + case 'E': > + err = cn_print_exe_file(cn); > + break; > + /* core limit size */ > + case 'c': > + err = cn_printf(cn, "%lu", > + rlimit(RLIMIT_CORE)); > + break; > + default: > + break; > + } > + ++pat_ptr; > + } > + > + if (err) > + return err; > + } > + > + /* Backward compatibility with core_uses_pid: > + * > + * If core_pattern does not include a %p (as is the default) > + * and core_uses_pid is set, then .%pid will be appended to > + * the filename. Do not do this for piped commands. */ > + if (!ispipe && !pid_in_pattern && core_uses_pid) { > + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); > + if (err) > + return err; > + } > +out: > + return ispipe; > +} > + > +static int zap_process(struct task_struct *start, int exit_code) > +{ > + struct task_struct *t; > + int nr = 0; > + > + start->signal->flags = SIGNAL_GROUP_EXIT; > + start->signal->group_exit_code = exit_code; > + start->signal->group_stop_count = 0; > + > + t = start; > + do { > + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); > + if (t != current && t->mm) { > + sigaddset(&t->pending.signal, SIGKILL); > + signal_wake_up(t, 1); > + nr++; > + } > + } while_each_thread(start, t); > + > + return nr; > +} > + > +static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, > + struct core_state *core_state, int exit_code) > +{ > + struct task_struct *g, *p; > + unsigned long flags; > + int nr = -EAGAIN; > + > + spin_lock_irq(&tsk->sighand->siglock); > + if (!signal_group_exit(tsk->signal)) { > + mm->core_state = core_state; > + nr = zap_process(tsk, exit_code); > + } > + spin_unlock_irq(&tsk->sighand->siglock); > + if (unlikely(nr < 0)) > + return nr; > + > + if (atomic_read(&mm->mm_users) == nr + 1) > + goto done; > + /* > + * We should find and kill all tasks which use this mm, and we should > + * count them correctly into ->nr_threads. We don't take tasklist > + * lock, but this is safe wrt: > + * > + * fork: > + * None of sub-threads can fork after zap_process(leader). All > + * processes which were created before this point should be > + * visible to zap_threads() because copy_process() adds the new > + * process to the tail of init_task.tasks list, and lock/unlock > + * of ->siglock provides a memory barrier. > + * > + * do_exit: > + * The caller holds mm->mmap_sem. This means that the task which > + * uses this mm can't pass exit_mm(), so it can't exit or clear > + * its ->mm. > + * > + * de_thread: > + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), > + * we must see either old or new leader, this does not matter. > + * However, it can change p->sighand, so lock_task_sighand(p) > + * must be used. Since p->mm != NULL and we hold ->mmap_sem > + * it can't fail. > + * > + * Note also that "g" can be the old leader with ->mm == NULL > + * and already unhashed and thus removed from ->thread_group. > + * This is OK, __unhash_process()->list_del_rcu() does not > + * clear the ->next pointer, we will find the new leader via > + * next_thread(). > + */ > + rcu_read_lock(); > + for_each_process(g) { > + if (g == tsk->group_leader) > + continue; > + if (g->flags & PF_KTHREAD) > + continue; > + p = g; > + do { > + if (p->mm) { > + if (unlikely(p->mm == mm)) { > + lock_task_sighand(p, &flags); > + nr += zap_process(p, exit_code); > + unlock_task_sighand(p, &flags); > + } > + break; > + } > + } while_each_thread(g, p); > + } > + rcu_read_unlock(); > +done: > + atomic_set(&core_state->nr_threads, nr); > + return nr; > +} > + > +static int coredump_wait(int exit_code, struct core_state *core_state) > +{ > + struct task_struct *tsk = current; > + struct mm_struct *mm = tsk->mm; > + int core_waiters = -EBUSY; > + > + init_completion(&core_state->startup); > + core_state->dumper.task = tsk; > + core_state->dumper.next = NULL; > + > + down_write(&mm->mmap_sem); > + if (!mm->core_state) > + core_waiters = zap_threads(tsk, mm, core_state, exit_code); > + up_write(&mm->mmap_sem); > + > + if (core_waiters > 0) { > + struct core_thread *ptr; > + > + wait_for_completion(&core_state->startup); > + /* > + * Wait for all the threads to become inactive, so that > + * all the thread context (extended register state, like > + * fpu etc) gets copied to the memory. > + */ > + ptr = core_state->dumper.next; > + while (ptr != NULL) { > + wait_task_inactive(ptr->task, 0); > + ptr = ptr->next; > + } > + } > + > + return core_waiters; > +} > + > +static void coredump_finish(struct mm_struct *mm) > +{ > + struct core_thread *curr, *next; > + struct task_struct *task; > + > + next = mm->core_state->dumper.next; > + while ((curr = next) != NULL) { > + next = curr->next; > + task = curr->task; > + /* > + * see exit_mm(), curr->task must not see > + * ->task == NULL before we read ->next. > + */ > + smp_mb(); > + curr->task = NULL; > + wake_up_process(task); > + } > + > + mm->core_state = NULL; > +} > + > +static void wait_for_dump_helpers(struct file *file) > +{ > + struct pipe_inode_info *pipe; > + > + pipe = file->f_path.dentry->d_inode->i_pipe; > + > + pipe_lock(pipe); > + pipe->readers++; > + pipe->writers--; > + > + while ((pipe->readers > 1) && (!signal_pending(current))) { > + wake_up_interruptible_sync(&pipe->wait); > + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > + pipe_wait(pipe); > + } > + > + pipe->readers--; > + pipe->writers++; > + pipe_unlock(pipe); > + > +} > + > + > +/* > + * umh_pipe_setup > + * helper function to customize the process used > + * to collect the core in userspace. Specifically > + * it sets up a pipe and installs it as fd 0 (stdin) > + * for the process. Returns 0 on success, or > + * PTR_ERR on failure. > + * Note that it also sets the core limit to 1. This > + * is a special value that we use to trap recursive > + * core dumps > + */ > +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > +{ > + struct file *files[2]; > + struct fdtable *fdt; > + struct coredump_params *cp = (struct coredump_params *)info->data; > + struct files_struct *cf = current->files; > + int err = create_pipe_files(files, 0); > + if (err) > + return err; > + > + cp->file = files[1]; > + > + sys_close(0); > + fd_install(0, files[0]); > + spin_lock(&cf->file_lock); > + fdt = files_fdtable(cf); > + __set_open_fd(0, fdt); > + __clear_close_on_exec(0, fdt); > + spin_unlock(&cf->file_lock); > + > + /* and disallow core files too */ > + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; > + > + return 0; > +} > + > +void do_coredump(long signr, int exit_code, struct pt_regs *regs) > +{ > + struct core_state core_state; > + struct core_name cn; > + struct mm_struct *mm = current->mm; > + struct linux_binfmt * binfmt; > + const struct cred *old_cred; > + struct cred *cred; > + int retval = 0; > + int flag = 0; > + int ispipe; > + bool need_nonrelative = false; > + static atomic_t core_dump_count = ATOMIC_INIT(0); > + struct coredump_params cprm = { > + .signr = signr, > + .regs = regs, > + .limit = rlimit(RLIMIT_CORE), > + /* > + * We must use the same mm->flags while dumping core to avoid > + * inconsistency of bit flags, since this flag is not protected > + * by any locks. > + */ > + .mm_flags = mm->flags, > + }; > + > + audit_core_dumps(signr); > + > + binfmt = mm->binfmt; > + if (!binfmt || !binfmt->core_dump) > + goto fail; > + if (!__get_dumpable(cprm.mm_flags)) > + goto fail; > + > + cred = prepare_creds(); > + if (!cred) > + goto fail; > + /* > + * We cannot trust fsuid as being the "true" uid of the process > + * nor do we know its entire history. We only know it was tainted > + * so we dump it as root in mode 2, and only into a controlled > + * environment (pipe handler or fully qualified path). > + */ > + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { > + /* Setuid core dump mode */ > + flag = O_EXCL; /* Stop rewrite attacks */ > + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ > + need_nonrelative = true; > + } > + > + retval = coredump_wait(exit_code, &core_state); > + if (retval < 0) > + goto fail_creds; > + > + old_cred = override_creds(cred); > + > + /* > + * Clear any false indication of pending signals that might > + * be seen by the filesystem code called to write the core file. > + */ > + clear_thread_flag(TIF_SIGPENDING); > + > + ispipe = format_corename(&cn, signr); > + > + if (ispipe) { > + int dump_count; > + char **helper_argv; > + > + if (ispipe < 0) { > + printk(KERN_WARNING "format_corename failed\n"); > + printk(KERN_WARNING "Aborting core\n"); > + goto fail_corename; > + } > + > + if (cprm.limit == 1) { > + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. > + * > + * Normally core limits are irrelevant to pipes, since > + * we're not writing to the file system, but we use > + * cprm.limit of 1 here as a speacial value, this is a > + * consistent way to catch recursive crashes. > + * We can still crash if the core_pattern binary sets > + * RLIM_CORE = !1, but it runs as root, and can do > + * lots of stupid things. > + * > + * Note that we use task_tgid_vnr here to grab the pid > + * of the process group leader. That way we get the > + * right pid if a thread in a multi-threaded > + * core_pattern process dies. > + */ > + printk(KERN_WARNING > + "Process %d(%s) has RLIMIT_CORE set to 1\n", > + task_tgid_vnr(current), current->comm); > + printk(KERN_WARNING "Aborting core\n"); > + goto fail_unlock; > + } > + cprm.limit = RLIM_INFINITY; > + > + dump_count = atomic_inc_return(&core_dump_count); > + if (core_pipe_limit && (core_pipe_limit < dump_count)) { > + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", > + task_tgid_vnr(current), current->comm); > + printk(KERN_WARNING "Skipping core dump\n"); > + goto fail_dropcount; > + } > + > + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); > + if (!helper_argv) { > + printk(KERN_WARNING "%s failed to allocate memory\n", > + __func__); > + goto fail_dropcount; > + } > + > + retval = call_usermodehelper_fns(helper_argv[0], helper_argv, > + NULL, UMH_WAIT_EXEC, umh_pipe_setup, > + NULL, &cprm); > + argv_free(helper_argv); > + if (retval) { > + printk(KERN_INFO "Core dump to %s pipe failed\n", > + cn.corename); > + goto close_fail; > + } > + } else { > + struct inode *inode; > + > + if (cprm.limit < binfmt->min_coredump) > + goto fail_unlock; > + > + if (need_nonrelative && cn.corename[0] != '/') { > + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ > + "to fully qualified path!\n", > + task_tgid_vnr(current), current->comm); > + printk(KERN_WARNING "Skipping core dump\n"); > + goto fail_unlock; > + } > + > + cprm.file = filp_open(cn.corename, > + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, > + 0600); > + if (IS_ERR(cprm.file)) > + goto fail_unlock; > + > + inode = cprm.file->f_path.dentry->d_inode; > + if (inode->i_nlink > 1) > + goto close_fail; > + if (d_unhashed(cprm.file->f_path.dentry)) > + goto close_fail; > + /* > + * AK: actually i see no reason to not allow this for named > + * pipes etc, but keep the previous behaviour for now. > + */ > + if (!S_ISREG(inode->i_mode)) > + goto close_fail; > + /* > + * Dont allow local users get cute and trick others to coredump > + * into their pre-created files. > + */ > + if (!uid_eq(inode->i_uid, current_fsuid())) > + goto close_fail; > + if (!cprm.file->f_op || !cprm.file->f_op->write) > + goto close_fail; > + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) > + goto close_fail; > + } > + > + retval = binfmt->core_dump(&cprm); > + if (retval) > + current->signal->group_exit_code |= 0x80; > + > + if (ispipe && core_pipe_limit) > + wait_for_dump_helpers(cprm.file); > +close_fail: > + if (cprm.file) > + filp_close(cprm.file, NULL); > +fail_dropcount: > + if (ispipe) > + atomic_dec(&core_dump_count); > +fail_unlock: > + kfree(cn.corename); > +fail_corename: > + coredump_finish(mm); > + revert_creds(old_cred); > +fail_creds: > + put_cred(cred); > +fail: > + return; > +} > + > +/* > + * Core dumping helper functions. These are the only things you should > + * do on a core-file: use only these functions to write out all the > + * necessary info. > + */ > +int dump_write(struct file *file, const void *addr, int nr) > +{ > + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; > +} > +EXPORT_SYMBOL(dump_write); > + > +int dump_seek(struct file *file, loff_t off) > +{ > + int ret = 1; > + > + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { > + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) > + return 0; > + } else { > + char *buf = (char *)get_zeroed_page(GFP_KERNEL); > + > + if (!buf) > + return 0; > + while (off > 0) { > + unsigned long n = off; > + > + if (n > PAGE_SIZE) > + n = PAGE_SIZE; > + if (!dump_write(file, buf, n)) { > + ret = 0; > + break; > + } > + off -= n; > + } > + free_page((unsigned long)buf); > + } > + return ret; > +} > +EXPORT_SYMBOL(dump_seek); > diff --git a/fs/exec.c b/fs/exec.c > index 574cf4d..b604050 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -66,19 +66,8 @@ > > #include <trace/events/sched.h> > > -int core_uses_pid; > -char core_pattern[CORENAME_MAX_SIZE] = "core"; > -unsigned int core_pipe_limit; > int suid_dumpable = 0; > > -struct core_name { > - char *corename; > - int used, size; > -}; > -static atomic_t call_count = ATOMIC_INIT(1); > - > -/* The maximal length of core_pattern is also specified in sysctl.c */ > - > static LIST_HEAD(formats); > static DEFINE_RWLOCK(binfmt_lock); > > @@ -1632,353 +1621,6 @@ void set_binfmt(struct linux_binfmt *new) > > EXPORT_SYMBOL(set_binfmt); > > -static int expand_corename(struct core_name *cn) > -{ > - char *old_corename = cn->corename; > - > - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); > - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); > - > - if (!cn->corename) { > - kfree(old_corename); > - return -ENOMEM; > - } > - > - return 0; > -} > - > -static int cn_printf(struct core_name *cn, const char *fmt, ...) > -{ > - char *cur; > - int need; > - int ret; > - va_list arg; > - > - va_start(arg, fmt); > - need = vsnprintf(NULL, 0, fmt, arg); > - va_end(arg); > - > - if (likely(need < cn->size - cn->used - 1)) > - goto out_printf; > - > - ret = expand_corename(cn); > - if (ret) > - goto expand_fail; > - > -out_printf: > - cur = cn->corename + cn->used; > - va_start(arg, fmt); > - vsnprintf(cur, need + 1, fmt, arg); > - va_end(arg); > - cn->used += need; > - return 0; > - > -expand_fail: > - return ret; > -} > - > -static void cn_escape(char *str) > -{ > - for (; *str; str++) > - if (*str == '/') > - *str = '!'; > -} > - > -static int cn_print_exe_file(struct core_name *cn) > -{ > - struct file *exe_file; > - char *pathbuf, *path; > - int ret; > - > - exe_file = get_mm_exe_file(current->mm); > - if (!exe_file) { > - char *commstart = cn->corename + cn->used; > - ret = cn_printf(cn, "%s (path unknown)", current->comm); > - cn_escape(commstart); > - return ret; > - } > - > - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); > - if (!pathbuf) { > - ret = -ENOMEM; > - goto put_exe_file; > - } > - > - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); > - if (IS_ERR(path)) { > - ret = PTR_ERR(path); > - goto free_buf; > - } > - > - cn_escape(path); > - > - ret = cn_printf(cn, "%s", path); > - > -free_buf: > - kfree(pathbuf); > -put_exe_file: > - fput(exe_file); > - return ret; > -} > - > -/* format_corename will inspect the pattern parameter, and output a > - * name into corename, which must have space for at least > - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. > - */ > -static int format_corename(struct core_name *cn, long signr) > -{ > - const struct cred *cred = current_cred(); > - const char *pat_ptr = core_pattern; > - int ispipe = (*pat_ptr == '|'); > - int pid_in_pattern = 0; > - int err = 0; > - > - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); > - cn->corename = kmalloc(cn->size, GFP_KERNEL); > - cn->used = 0; > - > - if (!cn->corename) > - return -ENOMEM; > - > - /* Repeat as long as we have more pattern to process and more output > - space */ > - while (*pat_ptr) { > - if (*pat_ptr != '%') { > - if (*pat_ptr == 0) > - goto out; > - err = cn_printf(cn, "%c", *pat_ptr++); > - } else { > - switch (*++pat_ptr) { > - /* single % at the end, drop that */ > - case 0: > - goto out; > - /* Double percent, output one percent */ > - case '%': > - err = cn_printf(cn, "%c", '%'); > - break; > - /* pid */ > - case 'p': > - pid_in_pattern = 1; > - err = cn_printf(cn, "%d", > - task_tgid_vnr(current)); > - break; > - /* uid */ > - case 'u': > - err = cn_printf(cn, "%d", cred->uid); > - break; > - /* gid */ > - case 'g': > - err = cn_printf(cn, "%d", cred->gid); > - break; > - /* signal that caused the coredump */ > - case 's': > - err = cn_printf(cn, "%ld", signr); > - break; > - /* UNIX time of coredump */ > - case 't': { > - struct timeval tv; > - do_gettimeofday(&tv); > - err = cn_printf(cn, "%lu", tv.tv_sec); > - break; > - } > - /* hostname */ > - case 'h': { > - char *namestart = cn->corename + cn->used; > - down_read(&uts_sem); > - err = cn_printf(cn, "%s", > - utsname()->nodename); > - up_read(&uts_sem); > - cn_escape(namestart); > - break; > - } > - /* executable */ > - case 'e': { > - char *commstart = cn->corename + cn->used; > - err = cn_printf(cn, "%s", current->comm); > - cn_escape(commstart); > - break; > - } > - case 'E': > - err = cn_print_exe_file(cn); > - break; > - /* core limit size */ > - case 'c': > - err = cn_printf(cn, "%lu", > - rlimit(RLIMIT_CORE)); > - break; > - default: > - break; > - } > - ++pat_ptr; > - } > - > - if (err) > - return err; > - } > - > - /* Backward compatibility with core_uses_pid: > - * > - * If core_pattern does not include a %p (as is the default) > - * and core_uses_pid is set, then .%pid will be appended to > - * the filename. Do not do this for piped commands. */ > - if (!ispipe && !pid_in_pattern && core_uses_pid) { > - err = cn_printf(cn, ".%d", task_tgid_vnr(current)); > - if (err) > - return err; > - } > -out: > - return ispipe; > -} > - > -static int zap_process(struct task_struct *start, int exit_code) > -{ > - struct task_struct *t; > - int nr = 0; > - > - start->signal->flags = SIGNAL_GROUP_EXIT; > - start->signal->group_exit_code = exit_code; > - start->signal->group_stop_count = 0; > - > - t = start; > - do { > - task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); > - if (t != current && t->mm) { > - sigaddset(&t->pending.signal, SIGKILL); > - signal_wake_up(t, 1); > - nr++; > - } > - } while_each_thread(start, t); > - > - return nr; > -} > - > -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, > - struct core_state *core_state, int exit_code) > -{ > - struct task_struct *g, *p; > - unsigned long flags; > - int nr = -EAGAIN; > - > - spin_lock_irq(&tsk->sighand->siglock); > - if (!signal_group_exit(tsk->signal)) { > - mm->core_state = core_state; > - nr = zap_process(tsk, exit_code); > - } > - spin_unlock_irq(&tsk->sighand->siglock); > - if (unlikely(nr < 0)) > - return nr; > - > - if (atomic_read(&mm->mm_users) == nr + 1) > - goto done; > - /* > - * We should find and kill all tasks which use this mm, and we should > - * count them correctly into ->nr_threads. We don't take tasklist > - * lock, but this is safe wrt: > - * > - * fork: > - * None of sub-threads can fork after zap_process(leader). All > - * processes which were created before this point should be > - * visible to zap_threads() because copy_process() adds the new > - * process to the tail of init_task.tasks list, and lock/unlock > - * of ->siglock provides a memory barrier. > - * > - * do_exit: > - * The caller holds mm->mmap_sem. This means that the task which > - * uses this mm can't pass exit_mm(), so it can't exit or clear > - * its ->mm. > - * > - * de_thread: > - * It does list_replace_rcu(&leader->tasks, ¤t->tasks), > - * we must see either old or new leader, this does not matter. > - * However, it can change p->sighand, so lock_task_sighand(p) > - * must be used. Since p->mm != NULL and we hold ->mmap_sem > - * it can't fail. > - * > - * Note also that "g" can be the old leader with ->mm == NULL > - * and already unhashed and thus removed from ->thread_group. > - * This is OK, __unhash_process()->list_del_rcu() does not > - * clear the ->next pointer, we will find the new leader via > - * next_thread(). > - */ > - rcu_read_lock(); > - for_each_process(g) { > - if (g == tsk->group_leader) > - continue; > - if (g->flags & PF_KTHREAD) > - continue; > - p = g; > - do { > - if (p->mm) { > - if (unlikely(p->mm == mm)) { > - lock_task_sighand(p, &flags); > - nr += zap_process(p, exit_code); > - unlock_task_sighand(p, &flags); > - } > - break; > - } > - } while_each_thread(g, p); > - } > - rcu_read_unlock(); > -done: > - atomic_set(&core_state->nr_threads, nr); > - return nr; > -} > - > -static int coredump_wait(int exit_code, struct core_state *core_state) > -{ > - struct task_struct *tsk = current; > - struct mm_struct *mm = tsk->mm; > - int core_waiters = -EBUSY; > - > - init_completion(&core_state->startup); > - core_state->dumper.task = tsk; > - core_state->dumper.next = NULL; > - > - down_write(&mm->mmap_sem); > - if (!mm->core_state) > - core_waiters = zap_threads(tsk, mm, core_state, exit_code); > - up_write(&mm->mmap_sem); > - > - if (core_waiters > 0) { > - struct core_thread *ptr; > - > - wait_for_completion(&core_state->startup); > - /* > - * Wait for all the threads to become inactive, so that > - * all the thread context (extended register state, like > - * fpu etc) gets copied to the memory. > - */ > - ptr = core_state->dumper.next; > - while (ptr != NULL) { > - wait_task_inactive(ptr->task, 0); > - ptr = ptr->next; > - } > - } > - > - return core_waiters; > -} > - > -static void coredump_finish(struct mm_struct *mm) > -{ > - struct core_thread *curr, *next; > - struct task_struct *task; > - > - next = mm->core_state->dumper.next; > - while ((curr = next) != NULL) { > - next = curr->next; > - task = curr->task; > - /* > - * see exit_mm(), curr->task must not see > - * ->task == NULL before we read ->next. > - */ > - smp_mb(); > - curr->task = NULL; > - wake_up_process(task); > - } > - > - mm->core_state = NULL; > -} > - > /* > * set_dumpable converts traditional three-value dumpable to two flags and > * stores them into mm->flags. It modifies lower two bits of mm->flags, but > @@ -2020,7 +1662,7 @@ void set_dumpable(struct mm_struct *mm, int value) > } > } > > -static int __get_dumpable(unsigned long mm_flags) > +int __get_dumpable(unsigned long mm_flags) > { > int ret; > > @@ -2032,290 +1674,3 @@ int get_dumpable(struct mm_struct *mm) > { > return __get_dumpable(mm->flags); > } > - > -static void wait_for_dump_helpers(struct file *file) > -{ > - struct pipe_inode_info *pipe; > - > - pipe = file->f_path.dentry->d_inode->i_pipe; > - > - pipe_lock(pipe); > - pipe->readers++; > - pipe->writers--; > - > - while ((pipe->readers > 1) && (!signal_pending(current))) { > - wake_up_interruptible_sync(&pipe->wait); > - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > - pipe_wait(pipe); > - } > - > - pipe->readers--; > - pipe->writers++; > - pipe_unlock(pipe); > - > -} > - > - > -/* > - * umh_pipe_setup > - * helper function to customize the process used > - * to collect the core in userspace. Specifically > - * it sets up a pipe and installs it as fd 0 (stdin) > - * for the process. Returns 0 on success, or > - * PTR_ERR on failure. > - * Note that it also sets the core limit to 1. This > - * is a special value that we use to trap recursive > - * core dumps > - */ > -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > -{ > - struct file *files[2]; > - struct fdtable *fdt; > - struct coredump_params *cp = (struct coredump_params *)info->data; > - struct files_struct *cf = current->files; > - int err = create_pipe_files(files, 0); > - if (err) > - return err; > - > - cp->file = files[1]; > - > - sys_close(0); > - fd_install(0, files[0]); > - spin_lock(&cf->file_lock); > - fdt = files_fdtable(cf); > - __set_open_fd(0, fdt); > - __clear_close_on_exec(0, fdt); > - spin_unlock(&cf->file_lock); > - > - /* and disallow core files too */ > - current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; > - > - return 0; > -} > - > -void do_coredump(long signr, int exit_code, struct pt_regs *regs) > -{ > - struct core_state core_state; > - struct core_name cn; > - struct mm_struct *mm = current->mm; > - struct linux_binfmt * binfmt; > - const struct cred *old_cred; > - struct cred *cred; > - int retval = 0; > - int flag = 0; > - int ispipe; > - bool need_nonrelative = false; > - static atomic_t core_dump_count = ATOMIC_INIT(0); > - struct coredump_params cprm = { > - .signr = signr, > - .regs = regs, > - .limit = rlimit(RLIMIT_CORE), > - /* > - * We must use the same mm->flags while dumping core to avoid > - * inconsistency of bit flags, since this flag is not protected > - * by any locks. > - */ > - .mm_flags = mm->flags, > - }; > - > - audit_core_dumps(signr); > - > - binfmt = mm->binfmt; > - if (!binfmt || !binfmt->core_dump) > - goto fail; > - if (!__get_dumpable(cprm.mm_flags)) > - goto fail; > - > - cred = prepare_creds(); > - if (!cred) > - goto fail; > - /* > - * We cannot trust fsuid as being the "true" uid of the process > - * nor do we know its entire history. We only know it was tainted > - * so we dump it as root in mode 2, and only into a controlled > - * environment (pipe handler or fully qualified path). > - */ > - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { > - /* Setuid core dump mode */ > - flag = O_EXCL; /* Stop rewrite attacks */ > - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ > - need_nonrelative = true; > - } > - > - retval = coredump_wait(exit_code, &core_state); > - if (retval < 0) > - goto fail_creds; > - > - old_cred = override_creds(cred); > - > - /* > - * Clear any false indication of pending signals that might > - * be seen by the filesystem code called to write the core file. > - */ > - clear_thread_flag(TIF_SIGPENDING); > - > - ispipe = format_corename(&cn, signr); > - > - if (ispipe) { > - int dump_count; > - char **helper_argv; > - > - if (ispipe < 0) { > - printk(KERN_WARNING "format_corename failed\n"); > - printk(KERN_WARNING "Aborting core\n"); > - goto fail_corename; > - } > - > - if (cprm.limit == 1) { > - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. > - * > - * Normally core limits are irrelevant to pipes, since > - * we're not writing to the file system, but we use > - * cprm.limit of 1 here as a speacial value, this is a > - * consistent way to catch recursive crashes. > - * We can still crash if the core_pattern binary sets > - * RLIM_CORE = !1, but it runs as root, and can do > - * lots of stupid things. > - * > - * Note that we use task_tgid_vnr here to grab the pid > - * of the process group leader. That way we get the > - * right pid if a thread in a multi-threaded > - * core_pattern process dies. > - */ > - printk(KERN_WARNING > - "Process %d(%s) has RLIMIT_CORE set to 1\n", > - task_tgid_vnr(current), current->comm); > - printk(KERN_WARNING "Aborting core\n"); > - goto fail_unlock; > - } > - cprm.limit = RLIM_INFINITY; > - > - dump_count = atomic_inc_return(&core_dump_count); > - if (core_pipe_limit && (core_pipe_limit < dump_count)) { > - printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", > - task_tgid_vnr(current), current->comm); > - printk(KERN_WARNING "Skipping core dump\n"); > - goto fail_dropcount; > - } > - > - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); > - if (!helper_argv) { > - printk(KERN_WARNING "%s failed to allocate memory\n", > - __func__); > - goto fail_dropcount; > - } > - > - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, > - NULL, UMH_WAIT_EXEC, umh_pipe_setup, > - NULL, &cprm); > - argv_free(helper_argv); > - if (retval) { > - printk(KERN_INFO "Core dump to %s pipe failed\n", > - cn.corename); > - goto close_fail; > - } > - } else { > - struct inode *inode; > - > - if (cprm.limit < binfmt->min_coredump) > - goto fail_unlock; > - > - if (need_nonrelative && cn.corename[0] != '/') { > - printk(KERN_WARNING "Pid %d(%s) can only dump core "\ > - "to fully qualified path!\n", > - task_tgid_vnr(current), current->comm); > - printk(KERN_WARNING "Skipping core dump\n"); > - goto fail_unlock; > - } > - > - cprm.file = filp_open(cn.corename, > - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, > - 0600); > - if (IS_ERR(cprm.file)) > - goto fail_unlock; > - > - inode = cprm.file->f_path.dentry->d_inode; > - if (inode->i_nlink > 1) > - goto close_fail; > - if (d_unhashed(cprm.file->f_path.dentry)) > - goto close_fail; > - /* > - * AK: actually i see no reason to not allow this for named > - * pipes etc, but keep the previous behaviour for now. > - */ > - if (!S_ISREG(inode->i_mode)) > - goto close_fail; > - /* > - * Dont allow local users get cute and trick others to coredump > - * into their pre-created files. > - */ > - if (!uid_eq(inode->i_uid, current_fsuid())) > - goto close_fail; > - if (!cprm.file->f_op || !cprm.file->f_op->write) > - goto close_fail; > - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) > - goto close_fail; > - } > - > - retval = binfmt->core_dump(&cprm); > - if (retval) > - current->signal->group_exit_code |= 0x80; > - > - if (ispipe && core_pipe_limit) > - wait_for_dump_helpers(cprm.file); > -close_fail: > - if (cprm.file) > - filp_close(cprm.file, NULL); > -fail_dropcount: > - if (ispipe) > - atomic_dec(&core_dump_count); > -fail_unlock: > - kfree(cn.corename); > -fail_corename: > - coredump_finish(mm); > - revert_creds(old_cred); > -fail_creds: > - put_cred(cred); > -fail: > - return; > -} > - > -/* > - * Core dumping helper functions. These are the only things you should > - * do on a core-file: use only these functions to write out all the > - * necessary info. > - */ > -int dump_write(struct file *file, const void *addr, int nr) > -{ > - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; > -} > -EXPORT_SYMBOL(dump_write); > - > -int dump_seek(struct file *file, loff_t off) > -{ > - int ret = 1; > - > - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { > - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) > - return 0; > - } else { > - char *buf = (char *)get_zeroed_page(GFP_KERNEL); > - > - if (!buf) > - return 0; > - while (off > 0) { > - unsigned long n = off; > - > - if (n > PAGE_SIZE) > - n = PAGE_SIZE; > - if (!dump_write(file, buf, n)) { > - ret = 0; > - break; > - } > - off -= n; > - } > - free_page((unsigned long)buf); > - } > - return ret; > -} > -EXPORT_SYMBOL(dump_seek); > diff --git a/include/linux/sched.h b/include/linux/sched.h > index c147e70..7bb5047 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -413,6 +413,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} > > extern void set_dumpable(struct mm_struct *mm, int value); > extern int get_dumpable(struct mm_struct *mm); > +extern int __get_dumpable(unsigned long mm_flags); > > /* get/set_dumpable() values */ > #define SUID_DUMPABLE_DISABLED 0 > -- > 1.7.11.2 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html