Christoph Hellwig <hch@xxxxxx> [Tue, 2020-04-21 10:17 -0700]: > Instead of having all the sysctl handlers deal with user pointers, which > is rather hairy in terms of the BPF interaction, copy the input to and > from userspace in common code. This also means that the strings are > always NUL-terminated by the common code, making the API a little bit > safer. > > As most handler just pass through the data to one of the common handlers > a lot of the changes are mechnical. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> ... > @@ -1172,36 +1168,28 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, > .new_updated = 0, > }; > struct cgroup *cgrp; > + loff_t pos = 0; > int ret; > > ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); > - if (ctx.cur_val) { > - mm_segment_t old_fs; > - loff_t pos = 0; > - > - old_fs = get_fs(); > - set_fs(KERNEL_DS); > - if (table->proc_handler(table, 0, (void __user *)ctx.cur_val, > - &ctx.cur_len, &pos)) { > - /* Let BPF program decide how to proceed. */ > - ctx.cur_len = 0; > - } > - set_fs(old_fs); > - } else { > + if (!ctx.cur_val || > + table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { > /* Let BPF program decide how to proceed. */ > ctx.cur_len = 0; > } > > - if (write && buf && *pcount) { > + if (write && *buf && *pcount) { > /* BPF program should be able to override new value with a > * buffer bigger than provided by user. > */ > ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); > ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); > - if (!ctx.new_val || > - copy_from_user(ctx.new_val, buf, ctx.new_len)) > + if (ctx.new_val) { > + memcpy(ctx.new_val, *buf, ctx.new_len); > + } else { > /* Let BPF program decide how to proceed. */ > ctx.new_len = 0; > + } > } > > rcu_read_lock(); > @@ -1212,7 +1200,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, > kfree(ctx.cur_val); > > if (ret == 1 && ctx.new_updated) { > - *new_buf = ctx.new_val; > + *buf = ctx.new_val; Original value of *buf should be freed before overriding it here otherwise it's lost/leaked unless I missed something. Other than this BPF part of this patch looks good to me. Feel free to add my Ack on the next iteration with this fix. > *pcount = ctx.new_len; > } else { > kfree(ctx.new_val); -- Andrey Ignatov