On Fri, Apr 24, 2020 at 08:43:38AM +0200, Christoph Hellwig wrote: > Instead of having all the sysctl handlers deal with user pointers, which > is rather hairy in terms of the BPF interaction, copy the input to and > from userspace in common code. This also means that the strings are > always NUL-terminated by the common code, making the API a little bit > safer. > > As most handler just pass through the data to one of the common handlers > a lot of the changes are mechnical. This is a lovely cleanup; thank you! Tiny notes below... > diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c > index b6f5d459b087d..df2143e05c571 100644 > --- a/fs/proc/proc_sysctl.c > +++ b/fs/proc/proc_sysctl.c > @@ -539,13 +539,13 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, > return err; > } > > -static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, > +static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, > size_t count, loff_t *ppos, int write) > { > struct inode *inode = file_inode(filp); > struct ctl_table_header *head = grab_header(inode); > struct ctl_table *table = PROC_I(inode)->sysctl_entry; > - void *new_buf = NULL; > + void *kbuf; > ssize_t error; > > if (IS_ERR(head)) > @@ -564,27 +564,38 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, > if (!table->proc_handler) > goto out; > > - error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, > - ppos, &new_buf); > + if (write) { > + kbuf = memdup_user_nul(ubuf, count); > + if (IS_ERR(kbuf)) { > + error = PTR_ERR(kbuf); > + goto out; > + } > + } else { > + error = -ENOMEM; > + kbuf = kzalloc(count, GFP_KERNEL); > + if (!kbuf) > + goto out; > + } > + > + error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, > + ppos); > if (error) > - goto out; > + goto out_free_buf; > > /* careful: calling conventions are nasty here */ Is this comment still valid after doing these cleanups? > - if (new_buf) { > - mm_segment_t old_fs; > - > - old_fs = get_fs(); > - set_fs(KERNEL_DS); > - error = table->proc_handler(table, write, (void __user *)new_buf, > - &count, ppos); > - set_fs(old_fs); > - kfree(new_buf); > - } else { > - error = table->proc_handler(table, write, buf, &count, ppos); > + error = table->proc_handler(table, write, kbuf, &count, ppos); > + if (error) > + goto out_free_buf; > + > + if (!write) { > + error = -EFAULT; > + if (copy_to_user(ubuf, kbuf, count)) > + goto out_free_buf; > } Something I noticed here that existed in the original code, but might be nice to improve while we're here is to make sure that the "count" returned from proc_handler() cannot grow _larger_, since then we might expose heap memory beyond the end of the allocation. I'll send a patch for this... > > - if (!error) > - error = count; > + error = count; > +out_free_buf: > + kfree(kbuf); > out: > sysctl_head_finish(head); > > [...] > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 511543d238794..e26fe7e8e19d7 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > [...] > @@ -682,7 +661,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, > left -= proc_skip_spaces(&p); > > out_free: > - kfree(kbuf); > if (err) > return -EINVAL; This label name isn't accurate any more... *shrug* -- Kees Cook