On Sun, May 31, 2009 at 03:19:53PM -0700, Andrew Morton wrote: > On Mon, 1 Jun 2009 01:54:27 +0400 Alexey Dobriyan <adobriyan@xxxxxxxxx> wrote: > > > And BTW, there is something unnatural when executable path is attached > > to mm_struct(!) not task_struct, > > mm_struct is the central object for a heavyweight process. All threads > within that process share the same executable path (don't they?) so > attaching the executable path to the mm seems OK to me. OK, let's try this: [PATCH 1/9] exec_path 1/9: introduce ->exec_path and switch /proc/*/exe ->exec_path marks executable which is associated with running task. Binfmt loader decides which executable is such and can, in theory, assign anything. Unlike current status quo when first VM_EXECUTABLE mapping is sort of marks running executable. If executable unmaps its all VM_EXECUTABLE mappings, /proc/*/exe ceases to exists, ick! And userpsace can't even use MAP_EXECUTABLE. Tasks which aren't created by running clone(2) and execve(2) (read: kernel threads) get empty ->exec_path and ->exec_path is copied on clone(2) and put at do_exit() time. ->exec_path is going to replace struct mm_struct::exe_file et al and allows to remove VM_EXECUTABLE flag while keeping readlink("/proc/*/exe") without loop over all VMAs. Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx> --- fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 1 + fs/binfmt_elf_fdpic.c | 1 + fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/proc/base.c | 38 ++++++++++++++------------------------ include/linux/sched.h | 25 +++++++++++++++++++++++++ kernel/exit.c | 1 + kernel/fork.c | 2 ++ 9 files changed, 47 insertions(+), 24 deletions(-) diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index b639dcf..a19b185 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -379,6 +379,7 @@ beyond_if: regs->gp = ex.a_gpvalue; #endif start_thread(regs, ex.a_entry, current->mm->start_stack); + set_task_exec_path(current, &bprm->file->f_path); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 40381df..b815bfc 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -999,6 +999,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) #endif start_thread(regs, elf_entry, bprm->p); + set_task_exec_path(current, &bprm->file->f_path); retval = 0; out: kfree(loc); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index fdb66fa..f545504 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1185,6 +1185,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, seg++; } + set_task_exec_path(current, &file->f_path); return 0; } diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 697f6b5..a16f977 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -798,6 +798,7 @@ static int load_flat_file(struct linux_binprm * bprm, libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); + set_task_exec_path(current, &bprm->file->f_path); return 0; err: return ret; diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index eff74b9..6c56262 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -174,6 +174,7 @@ static int map_som_binary(struct file *file, up_write(¤t->mm->mmap_sem); if (retval > 0 || retval < -1024) retval = 0; + set_task_exec_path(current, &bprm->file->f_path); out: set_fs(old_fs); return retval; diff --git a/fs/proc/base.c b/fs/proc/base.c index 3326bbf..dc4ee6a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -201,6 +201,20 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } +static int proc_exe_link(struct inode *inode, struct path *path) +{ + struct task_struct *tsk; + + tsk = get_proc_task(inode); + if (!tsk) + return -ENOENT; + get_task_exec_path(tsk, path); + put_task_struct(tsk); + if (!path->mnt || !path->dentry) + return -ENOENT; + return 0; +} + /* * Return zero if current may access user memory in @task, -error if not. */ @@ -1302,30 +1316,6 @@ void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) newmm->exe_file = get_mm_exe_file(oldmm); } -static int proc_exe_link(struct inode *inode, struct path *exe_path) -{ - struct task_struct *task; - struct mm_struct *mm; - struct file *exe_file; - - task = get_proc_task(inode); - if (!task) - return -ENOENT; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - return -ENOENT; - exe_file = get_mm_exe_file(mm); - mmput(mm); - if (exe_file) { - *exe_path = exe_file->f_path; - path_get(&exe_file->f_path); - fput(exe_file); - return 0; - } else - return -ENOENT; -} - static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc..6b2dd01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1265,6 +1265,12 @@ struct task_struct { #endif /* CPU-specific state of this task */ struct thread_struct thread; + /* + * Executable, binfmt loader wants to associate with task + * (read: execve(2) argument). + * Empty, if concept isn't applicable, e. g. kernel thread. + */ + struct path exec_path; /* filesystem information */ struct fs_struct *fs; /* open file information */ @@ -2403,6 +2409,25 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #define TASK_STATE_TO_CHAR_STR "RSDTtZX" +static inline void get_task_exec_path(struct task_struct *tsk, struct path *path) +{ + task_lock(tsk); + path_get(&tsk->exec_path); + *path = tsk->exec_path; + task_unlock(tsk); +} + +static inline void set_task_exec_path(struct task_struct *tsk, struct path *path) +{ + struct path old_path; + + path_get(path); + task_lock(tsk); + old_path = tsk->exec_path; + tsk->exec_path = *path; + task_unlock(tsk); + path_put(&old_path); +} #endif /* __KERNEL__ */ #endif diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3..8e70b54 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -962,6 +962,7 @@ NORET_TYPE void do_exit(long code) exit_sem(tsk); exit_files(tsk); + set_task_exec_path(tsk, &(struct path){ .mnt = NULL, .dentry = NULL }); exit_fs(tsk); check_stack_usage(); exit_thread(); diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd..c0ee931 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1191,6 +1191,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; + get_task_exec_path(current, &p->exec_path); + /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html