Re: [PATCH v2 bpf-next 05/21] bpf: Introduce bpf_sysctl_{get,set}_new_value helpers

Daniel Borkmann <daniel@xxxxxxxxxxxxx> · Thu, 4 Apr 2019 16:37:55 +0200



On 03/26/2019 01:43 AM, Andrey Ignatov wrote:
> Add helpers to work with new value being written to sysctl by user
> space.
> 
> bpf_sysctl_get_new_value() copies value being written to sysctl into
> provided buffer.
> 
> bpf_sysctl_set_new_value() overrides new value being written by user
> space with a one from provided buffer. Buffer should contain string
> representation of the value, similar to what can be seen in /proc/sys/.
> 
> Both helpers can be used only on sysctl write.
> 
> File position matters and can be managed by an interface that will be
> introduced separately. E.g. if user space calls sys_write to a file in
> /proc/sys/ at file position = X, where X > 0, then the value set by
> bpf_sysctl_set_new_value() will be written starting from X. If program
> wants to override whole value with specified buffer, file position has
> to be set to zero.
> 
> Documentation for the new helpers is provided in bpf.h UAPI.
> 
> Signed-off-by: Andrey Ignatov <rdna@xxxxxx>
> ---
>  fs/proc/proc_sysctl.c      | 22 ++++++++---
>  include/linux/bpf-cgroup.h |  8 ++--
>  include/linux/filter.h     |  3 ++
>  include/uapi/linux/bpf.h   | 38 +++++++++++++++++-
>  kernel/bpf/cgroup.c        | 81 +++++++++++++++++++++++++++++++++++++-
>  5 files changed, 142 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 72f4a096c146..4d1ab22774f7 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -570,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
>  	struct inode *inode = file_inode(filp);
>  	struct ctl_table_header *head = grab_header(inode);
>  	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
> +	void *new_buf = NULL;
>  	ssize_t error;
> -	size_t res;
>  
>  	if (IS_ERR(head))
>  		return PTR_ERR(head);
> @@ -589,15 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
>  	if (!table->proc_handler)
>  		goto out;
>  
> -	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
> +	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
> +					   &new_buf);
>  	if (error)
>  		goto out;
>  
>  	/* careful: calling conventions are nasty here */
> -	res = count;
> -	error = table->proc_handler(table, write, buf, &res, ppos);
> +	if (new_buf) {
> +		mm_segment_t old_fs;
> +
> +		old_fs = get_fs();
> +		set_fs(KERNEL_DS);
> +		error = table->proc_handler(table, write, (void __user *)new_buf,
> +					    &count, ppos);
> +		set_fs(old_fs);

>From quick glance on the set, the above stood out. Afaik, there is an ongoing
effort by Al and other fs/core folks (as visible in the git log) to get rid of
set_fs() calls in the tree with the goal of eliminating this interface /entirely/
(more context on 'why' here: https://lwn.net/Articles/722267/). Is there a better
way to achieve the above w/o needing it?

> +		kfree(new_buf);
> +	} else {
> +		error = table->proc_handler(table, write, buf, &count, ppos);
> +	}
> +
>  	if (!error)
> -		error = res;
> +		error = count;
>  out:
>  	sysctl_head_finish(head);
>  
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index b1c45da20a26..1e97271f9a10 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
>  
>  int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
>  				   struct ctl_table *table, int write,
> -				   enum bpf_attach_type type);
> +				   void __user *buf, size_t *pcount,
> +				   void **new_buf, enum bpf_attach_type type);
>  
>  static inline enum bpf_cgroup_storage_type cgroup_storage_type(
>  	struct bpf_map *map)
> @@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>  })
>  
>  
> -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write)			       \
> +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf)       \
>  ({									       \
>  	int __ret = 0;							       \
>  	if (cgroup_bpf_enabled)						       \
>  		__ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
> +						       buf, count, nbuf,       \
>  						       BPF_CGROUP_SYSCTL);     \
>  	__ret;								       \
>  })
> @@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>  #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; })
>  
>  #define for_each_cgroup_storage_type(stype) for (; false; )
>