On 08/15, thinker.li@xxxxxxxxx wrote: > From: Kui-Feng Lee <thinker.li@xxxxxxxxx> > > Enable sleepable cgroup/{get,set}sockopt hooks. > > The sleepable BPF programs attached to cgroup/{get,set}sockopt hooks may > received a pointer to the optval in user space instead of a kernel > copy. ctx->user_optval and ctx->user_optval_end are the pointers to the > begin and end of the user space buffer if receiving a user space > buffer. ctx->optval and ctx->optval_end will be a kernel copy if receiving > a kernel space buffer. > > A program receives a user space buffer if ctx->flags & > BPF_SOCKOPT_FLAG_OPTVAL_USER is true, otherwise it receives a kernel space > buffer. The BPF programs should not read/write from/to a user space buffer > dirrectly. It should access the buffer through bpf_copy_from_user() and > bpf_copy_to_user() provided in the following patches. > > Signed-off-by: Kui-Feng Lee <thinker.li@xxxxxxxxx> > --- > include/linux/bpf.h | 6 ++ > include/linux/filter.h | 6 ++ > kernel/bpf/cgroup.c | 207 ++++++++++++++++++++++++++++++++--------- > kernel/bpf/verifier.c | 5 +- > 4 files changed, 177 insertions(+), 47 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index cfabbcf47bdb..edb35bcfa548 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1769,9 +1769,15 @@ struct bpf_prog_array_item { > > struct bpf_prog_array { > struct rcu_head rcu; > + u32 flags; > struct bpf_prog_array_item items[]; > }; > > +enum bpf_prog_array_flags { > + BPF_PROG_ARRAY_F_SLEEPABLE = 1 << 0, > + BPF_PROG_ARRAY_F_NON_SLEEPABLE = 1 << 1, > +}; > + > struct bpf_empty_prog_array { > struct bpf_prog_array hdr; > struct bpf_prog *null_prog; > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 761af6b3cf2b..2aa2a96526de 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -1337,12 +1337,18 @@ struct bpf_sockopt_kern { > s32 level; > s32 optname; > s32 optlen; > + u32 flags; > /* for retval in struct bpf_cg_run_ctx */ > struct task_struct *current_task; > /* Temporary "register" for indirect stores to ppos. */ > u64 tmp_reg; > }; > > +enum bpf_sockopt_kern_flags { > + /* optval is a pointer to user space memory */ > + BPF_SOCKOPT_FLAG_OPTVAL_USER = (1U << 0), > +}; > + > int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); > > struct bpf_sk_lookup_kern { > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 5b2741aa0d9b..b977768a28e5 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -28,25 +28,46 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key); > * function pointer. > */ > static __always_inline int > -bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, > - enum cgroup_bpf_attach_type atype, > - const void *ctx, bpf_prog_run_fn run_prog, > - int retval, u32 *ret_flags) > +bpf_prog_run_array_cg_cb(const struct cgroup_bpf *cgrp, > + enum cgroup_bpf_attach_type atype, > + const void *ctx, bpf_prog_run_fn run_prog, > + int retval, u32 *ret_flags, > + int (*progs_cb)(void *, const struct bpf_prog_array *), > + void *progs_cb_arg) > { > const struct bpf_prog_array_item *item; > const struct bpf_prog *prog; > const struct bpf_prog_array *array; > struct bpf_run_ctx *old_run_ctx; > struct bpf_cg_run_ctx run_ctx; > + bool do_sleepable; > u32 func_ret; > + int err; > + > + do_sleepable = > + atype == CGROUP_SETSOCKOPT || atype == CGROUP_GETSOCKOPT; > > run_ctx.retval = retval; > migrate_disable(); > - rcu_read_lock(); > + if (do_sleepable) { > + might_fault(); > + rcu_read_lock_trace(); > + } else > + rcu_read_lock(); nit: wrap 'else' branch with {} braces as well