Hi Jann, Thank you for the review. Here are a few comments inline. On Thu, Apr 25, 2019 at 08:16:41PM +0200, Jann Horn wrote: > On Thu, Apr 25, 2019 at 6:15 PM Dmitry Safonov <dima@xxxxxxxxxx> wrote: > > API to set time namespace offsets for children processes, i.e.: > > echo "clockid off_ses off_nsec" > /proc/self/timens_offsets > [...] > > diff --git a/fs/proc/base.c b/fs/proc/base.c > > index 6a803a0b75df..76d58e9b5178 100644 > > --- a/fs/proc/base.c > > +++ b/fs/proc/base.c > [...] > > @@ -1521,6 +1522,103 @@ static const struct file_operations proc_pid_sched_autogroup_operations = { > > > > #endif /* CONFIG_SCHED_AUTOGROUP */ > > > > +#ifdef CONFIG_TIME_NS > > +static int timens_offsets_show(struct seq_file *m, void *v) > > +{ > > + struct inode *inode = m->private; > > + struct task_struct *p; > > + > > + p = get_proc_task(inode); > > (FYI, this could also be "p = get_proc_task(file_inode(m->file));". > But this works, too.) > > > + if (!p) > > + return -ESRCH; > > + proc_timens_show_offsets(p, m); > > + > > + put_task_struct(p); > > + > > + return 0; > > +} > > + > > +static ssize_t > > +timens_offsets_write(struct file *file, const char __user *buf, > > + size_t count, loff_t *ppos) > > +{ > > + struct inode *inode = file_inode(file); > > + struct proc_timens_offset offsets[2]; > > + char *kbuf = NULL, *pos, *next_line; > > + struct task_struct *p; > > + int ret, noffsets; > > + > > + /* Only allow < page size writes at the beginning of the file */ > > + if ((*ppos != 0) || (count >= PAGE_SIZE)) > > + return -EINVAL; > > + > > + /* Slurp in the user data */ > > + kbuf = memdup_user_nul(buf, count); > > + if (IS_ERR(kbuf)) > > + return PTR_ERR(kbuf); > > + > > + /* Parse the user data */ > > + ret = -EINVAL; > > + noffsets = 0; > > + pos = kbuf; > > + for (; pos; pos = next_line) { > > + struct proc_timens_offset *off = &offsets[noffsets]; > > + int err; > > + > > + /* Find the end of line and ensure I don't look past it */ > > + next_line = strchr(pos, '\n'); > > + if (next_line) { > > + *next_line = '\0'; > > + next_line++; > > + if (*next_line == '\0') > > + next_line = NULL; > > + } > > + > > + err = sscanf(pos, "%u %lld %lu", &off->clockid, > > + &off->val.tv_sec, &off->val.tv_nsec); > > + if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) > > + goto out; > > + if (noffsets++ == ARRAY_SIZE(offsets)) > > + break; > > This is equivalent to: > > if (noffsets == ARRAY_SIZE(offsets)) { noffsets++; > break; } > noffsets++; > > So we can reach the start of the loop with > noffsets==ARRAY_SIZE(offsets), right? Which means that an > out-of-bounds write can happen? good catch. I will add a test for this case. > > I think that for code like this, it makes sense to write the increment > and the check out separately so that it's easier to spot problems; > e.g. like this: > > noffsets++; > if (noffsets == ARRAY_SIZE(offsets)) > break; will rewrite this way. Thanks! > > > + } > > + > > + ret = -ESRCH; > > + p = get_proc_task(inode); > > + if (!p) > > + goto out; > > + ret = proc_timens_set_offset(p, offsets, noffsets); > > + put_task_struct(p); > > + if (ret) > > + goto out; > > + > > + ret = count; > > +out: > > + kfree(kbuf); > > + return ret; > > +} > > + > > +static int timens_offsets_open(struct inode *inode, struct file *filp) > > +{ > > + int ret; > > + > > + ret = single_open(filp, timens_offsets_show, NULL); > > + if (!ret) { > > + struct seq_file *m = filp->private_data; > > + > > + m->private = inode; > > + } > > + return ret; > > +} > > Why did you write it like this? Wouldn't the following be equivalent? Probably I looked at sched_autogroup_open > > static int timens_offsets_open(struct inode *inode, struct file *file) > { > return single_open(file, timens_offsets_show, inode); > } > > (But also, you can reach the inode of a seq_file as file_inode(m->file).) > > [...] > > diff --git a/kernel/time_namespace.c b/kernel/time_namespace.c > > index e806accc4eaf..9ad4b63c4ed2 100644 > > --- a/kernel/time_namespace.c > > +++ b/kernel/time_namespace.c > [...] > > + > > +int proc_timens_set_offset(struct task_struct *p, > > + struct proc_timens_offset *offsets, int noffsets) > > +{ > > + struct ns_common *ns; > > + struct time_namespace *time_ns; > > + struct timens_offsets *ns_offsets; > > + struct timespec64 *offset; > > + struct timespec64 tp; > > + int i, err; > > + > > + ns = timens_for_children_get(p); > > + if (!ns) > > + return -ESRCH; > > + time_ns = to_time_ns(ns); > > + > > + if (!time_ns->offsets || time_ns->initialized || > > + !ns_capable(time_ns->user_ns, CAP_SYS_TIME)) { > > Capability checks in VFS read/write handlers are bad. Please pass > through the file pointer to this function and replace the call with > "file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)". Will fix. Thanks! > > > + put_time_ns(time_ns); > > + return -EPERM; > > + } > [...] > > +}