On Sun, Mar 15, 2015 at 8:00 AM, Josh Triplett <josh@xxxxxxxxxxxxxxxx> wrote: > diff --git a/include/linux/compat.h b/include/linux/compat.h > index 6c4a68d..c90df5a 100644 > --- a/include/linux/compat.h > +++ b/include/linux/compat.h > @@ -299,6 +299,8 @@ struct compat_clone4_args { > compat_ulong_t stack_start; > compat_ulong_t stack_size; > compat_ulong_t tls; > + compat_uptr_t clonefd; > + u32 clonefd_flags; > }; > > struct compat_statfs; > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 9daa017..1dc680b 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1374,6 +1374,11 @@ struct task_struct { > > unsigned autoreap:1; /* Do not become a zombie on exit */ > > +#ifdef CONFIG_CLONEFD > + unsigned clonefd:1; /* Notify clonefd_wqh on exit */ > + wait_queue_head_t clonefd_wqh; > +#endif > + > unsigned long atomic_flags; /* Flags needing atomic access. */ > > struct restart_block restart_block; Idle thought: are there any concerns about the occupancy impact of adding a wait_queue_head to every task_struct, whether it has a clonefd or not? I guess we could reduce the size somewhat by just storing a struct file *clonefd_file in the task, and then have a separate structure (with the wqh and a task_struct*) referenced by file->private_data. Not sure whether the added complication would be worthwhile, though. > diff --git a/kernel/clonefd.c b/kernel/clonefd.c > new file mode 100644 > index 0000000..eac560c > --- /dev/null > +++ b/kernel/clonefd.c > @@ -0,0 +1,121 @@ > +/* > + * Support functions for CLONE_FD > + * > + * Copyright (c) 2015 Intel Corporation > + * Original authors: Josh Triplett <josh@xxxxxxxxxxxxxxxx> > + * Thiago Macieira <thiago@xxxxxxxxxxxx> > + */ > +#include <linux/anon_inodes.h> > +#include <linux/file.h> > +#include <linux/fs.h> > +#include <linux/poll.h> > +#include <linux/slab.h> > +#include "clonefd.h" > + > +static int clonefd_release(struct inode *inode, struct file *file) > +{ > + put_task_struct(file->private_data); > + return 0; > +} > + > +static unsigned int clonefd_poll(struct file *file, poll_table *wait) > +{ > + struct task_struct *p = file->private_data; > + poll_wait(file, &p->clonefd_wqh, wait); > + return p->exit_state ? (POLLIN | POLLRDNORM | POLLHUP) : 0; > +} > + > +static ssize_t clonefd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) > +{ > + struct task_struct *p = file->private_data; > + int ret = 0; > + > + /* EOF after first read */ > + if (*ppos) > + return 0; > + > + if (file->f_flags & O_NONBLOCK) > + ret = -EAGAIN; > + else > + ret = wait_event_interruptible(p->clonefd_wqh, p->exit_state); > + > + if (p->exit_state) { > + struct clonefd_info info = {}; > + cputime_t utime, stime; > + task_exit_code_status(p->exit_code, &info.code, &info.status); > + info.code &= ~__SI_MASK; > + task_cputime(p, &utime, &stime); > + info.utime = cputime_to_clock_t(utime + p->signal->utime); > + info.stime = cputime_to_clock_t(stime + p->signal->stime); > + ret = simple_read_from_buffer(buf, count, ppos, &info, sizeof(info)); > + } > + return ret; > +} > + > +static struct file_operations clonefd_fops = { > + .release = clonefd_release, > + .poll = clonefd_poll, > + .read = clonefd_read, > + .llseek = no_llseek, > +}; It might be nice to include a show_fdinfo() implementation that shows (say) the pid that the clonefd refers to. E.g. something like: static void clonefd_show_fdinfo(struct seq_file *m, struct file *file) { struct task_struct *p = file->private_data; seq_printf(m, "tid:\t%d\n", task_tgid_vnr(p)); } > + > +/* Do process exit notification for clonefd. */ > +void clonefd_do_notify(struct task_struct *p) > +{ > + if (p->clonefd) > + wake_up_all(&p->clonefd_wqh); > +} > + > +/* Handle the CLONE_FD case for copy_process. */ > +int clonefd_do_clone(u64 clone_flags, struct task_struct *p, > + struct clone4_args *args, struct clonefd_setup *setup) > +{ > + int flags; > + struct file *file; > + int fd; > + > + p->clonefd = !!(clone_flags & CLONE_FD); > + if (!p->clonefd) > + return 0; > + > + if (args->clonefd_flags & ~(O_CLOEXEC | O_NONBLOCK)) > + return -EINVAL; > + Maybe also check for (args->clonefd == NULL) in advance, and return -EINVAL or -EFAULT? > + init_waitqueue_head(&p->clonefd_wqh); > + > + get_task_struct(p); > + flags = O_RDONLY | FMODE_ATOMIC_POS | args->clonefd_flags; > + file = anon_inode_getfile("[process]", &clonefd_fops, p, flags); > + if (IS_ERR(file)) { > + put_task_struct(p); > + return PTR_ERR(file); > + } > + > + fd = get_unused_fd_flags(flags); > + if (fd < 0) { > + fput(file); > + return fd; > + } > + > + setup->fd = fd; > + setup->file = file; > + return 0; > +} > + > +/* Clean up clonefd information after a partially complete clone */ > +void clonefd_cleanup_failed_clone(struct clonefd_setup *setup) > +{ > + if (setup->file) { > + put_unused_fd(setup->fd); > + fput(setup->file); > + } > +} > + > +/* Finish setting up the clonefd */ > +void clonefd_install_fd(struct clone4_args *args, struct clonefd_setup *setup) > +{ > + if (setup->file) { > + fd_install(setup->fd, setup->file); > + put_user(setup->fd, args->clonefd); > + } > +} -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html