Adding a few more emails from get_maintainer.pl and bumping this thread since there hasn't been any comments so far. Is this too crazy? Am I missing something fundamental? Regards, Kenny On Wed, Oct 7, 2020 at 11:24 AM Kenny Ho <Kenny.Ho@xxxxxxx> wrote: > > This is a skeleton implementation to invite comments and generate > discussion around the idea of introducing a bpf-cgroup program type to > control ioctl access. This is modelled after > BPF_PROG_TYPE_CGROUP_DEVICE. The premise is to allow system admins to > write bpf programs to block some ioctl access, potentially in conjunction > with data collected by other bpf programs stored in some bpf maps and > with bpf_spin_lock. > > For example, a bpf program has been accumulating resource usaging > statistic and a second bpf program of BPF_PROG_TYPE_CGROUP_IOCTL would > block access to previously mentioned resource via ioctl when the stats > stored in a bpf map reaches certain threshold. > > Like BPF_PROG_TYPE_CGROUP_DEVICE, the default is permissive (i.e., > ioctls are not blocked if no bpf program is present for the cgroup.) to > maintain current interface behaviour when this functionality is unused. > > Performance impact to ioctl calls is minimal as bpf's in-kernel verifier > ensure attached bpf programs cannot crash and always terminate quickly. > > TODOs: > - correct usage of the verifier > - toolings > - samples > - device driver may provide helper functions that take > bpf_cgroup_ioctl_ctx and return something more useful for specific > device > > Signed-off-by: Kenny Ho <Kenny.Ho@xxxxxxx> > --- > fs/ioctl.c | 5 +++ > include/linux/bpf-cgroup.h | 14 ++++++++ > include/linux/bpf_types.h | 2 ++ > include/uapi/linux/bpf.h | 8 +++++ > kernel/bpf/cgroup.c | 66 ++++++++++++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 7 ++++ > kernel/bpf/verifier.c | 1 + > 7 files changed, 103 insertions(+) > > diff --git a/fs/ioctl.c b/fs/ioctl.c > index 4e6cc0a7d69c..a3925486d417 100644 > --- a/fs/ioctl.c > +++ b/fs/ioctl.c > @@ -19,6 +19,7 @@ > #include <linux/falloc.h> > #include <linux/sched/signal.h> > #include <linux/fiemap.h> > +#include <linux/cgroup.h> > > #include "internal.h" > > @@ -45,6 +46,10 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > if (!filp->f_op->unlocked_ioctl) > goto out; > > + error = BPF_CGROUP_RUN_PROG_IOCTL(filp, cmd, arg); > + if (error) > + goto out; > + > error = filp->f_op->unlocked_ioctl(filp, cmd, arg); > if (error == -ENOIOCTLCMD) > error = -ENOTTY; > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > index 64f367044e25..a5f0b0a8f82b 100644 > --- a/include/linux/bpf-cgroup.h > +++ b/include/linux/bpf-cgroup.h > @@ -134,6 +134,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, > int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, > short access, enum bpf_attach_type type); > > +int __cgroup_bpf_check_ioctl_permission(struct file *filp, unsigned int cmd, unsigned long arg, > + enum bpf_attach_type type); > + > int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, > struct ctl_table *table, int write, > void **buf, size_t *pcount, loff_t *ppos, > @@ -346,6 +349,16 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, > __ret; \ > }) > > +#define BPF_CGROUP_RUN_PROG_IOCTL(filp, cmd, arg) \ > +({ \ > + int __ret = 0; \ > + if (cgroup_bpf_enabled) \ > + __ret = __cgroup_bpf_check_ioctl_permission(filp, cmd, arg, \ > + BPF_CGROUP_IOCTL);\ > + \ > + __ret; \ > +}) > + > int cgroup_bpf_prog_attach(const union bpf_attr *attr, > enum bpf_prog_type ptype, struct bpf_prog *prog); > int cgroup_bpf_prog_detach(const union bpf_attr *attr, > @@ -429,6 +442,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, > optlen, max_optlen, retval) ({ retval; }) > #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ > kernel_optval) ({ 0; }) > +#define BPF_CGROUP_RUN_PROG_IOCTL(type,major,minor,access) ({ 0; }) > > #define for_each_cgroup_storage_type(stype) for (; false; ) > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index a52a5688418e..3055e7e4918c 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -56,6 +56,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, > struct bpf_sysctl, struct bpf_sysctl_kern) > BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, > struct bpf_sockopt, struct bpf_sockopt_kern) > +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_IOCTL, cg_ioctl, > + struct bpf_cgroup_ioctl_ctx, struct bpf_cgroup_ioctl_ctx) > #endif > #ifdef CONFIG_BPF_LIRC_MODE2 > BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index b6238b2209b7..6a908e13d3a3 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -197,6 +197,7 @@ enum bpf_prog_type { > BPF_PROG_TYPE_EXT, > BPF_PROG_TYPE_LSM, > BPF_PROG_TYPE_SK_LOOKUP, > + BPF_PROG_TYPE_CGROUP_IOCTL, > }; > > enum bpf_attach_type { > @@ -238,6 +239,7 @@ enum bpf_attach_type { > BPF_XDP_CPUMAP, > BPF_SK_LOOKUP, > BPF_XDP, > + BPF_CGROUP_IOCTL, > __MAX_BPF_ATTACH_TYPE > }; > > @@ -4276,6 +4278,12 @@ struct bpf_cgroup_dev_ctx { > __u32 minor; > }; > > +struct bpf_cgroup_ioctl_ctx { > + __u64 filp; > + __u32 cmd; > + __u32 arg; > +}; > + > struct bpf_raw_tracepoint_args { > __u64 args[0]; > }; > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 83ff127ef7ae..0958bae3b0b7 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -1203,6 +1203,72 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { > .is_valid_access = cgroup_dev_is_valid_access, > }; > > +int __cgroup_bpf_check_ioctl_permission(struct file *filp, unsigned int cmd, unsigned long arg, > + enum bpf_attach_type type) > +{ > + struct cgroup *cgrp; > + struct bpf_cgroup_ioctl_ctx ctx = { > + .filp = filp, > + .cmd = cmd, > + .arg = arg, > + }; > + int allow = 1; > + > + rcu_read_lock(); > + cgrp = task_dfl_cgroup(current); > + allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, > + BPF_PROG_RUN); > + rcu_read_unlock(); > + > + return !allow; > +} > + > +static const struct bpf_func_proto * > +cgroup_ioctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > +{ > + return cgroup_base_func_proto(func_id, prog); > +} > + > +static bool cgroup_ioctl_is_valid_access(int off, int size, > + enum bpf_access_type type, > + const struct bpf_prog *prog, > + struct bpf_insn_access_aux *info) > +{ > + const int size_default = sizeof(__u32); > + > + if (type == BPF_WRITE) > + return false; > + > + if (off < 0 || off + size > sizeof(struct bpf_cgroup_ioctl_ctx)) > + return false; > + /* The verifier guarantees that size > 0. */ > + if (off % size != 0) > + return false; > + > + switch (off) { > + case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, filp): > + bpf_ctx_record_field_size(info, size_default); > + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) > + return false; > + break; > + case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, cmd): > + case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, arg): > + default: > + if (size != size_default) > + return false; > + } > + > + return true; > +} > + > +const struct bpf_prog_ops cg_ioctl_prog_ops = { > +}; > + > +const struct bpf_verifier_ops cg_ioctl_verifier_ops = { > + .get_func_proto = cgroup_ioctl_func_proto, > + .is_valid_access = cgroup_ioctl_is_valid_access, > +}; > + > /** > * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl > * > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 86299a292214..6984a62c96f4 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2054,6 +2054,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) > case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > + case BPF_PROG_TYPE_CGROUP_IOCTL: > case BPF_PROG_TYPE_SOCK_OPS: > case BPF_PROG_TYPE_EXT: /* extends any prog */ > return true; > @@ -2806,6 +2807,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) > return BPF_PROG_TYPE_SOCK_OPS; > case BPF_CGROUP_DEVICE: > return BPF_PROG_TYPE_CGROUP_DEVICE; > + case BPF_CGROUP_IOCTL: > + return BPF_PROG_TYPE_CGROUP_IOCTL; > case BPF_SK_MSG_VERDICT: > return BPF_PROG_TYPE_SK_MSG; > case BPF_SK_SKB_STREAM_PARSER: > @@ -2878,6 +2881,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) > case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > + case BPF_PROG_TYPE_CGROUP_IOCTL: > case BPF_PROG_TYPE_SOCK_OPS: > ret = cgroup_bpf_prog_attach(attr, ptype, prog); > break; > @@ -2915,6 +2919,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) > case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > + case BPF_PROG_TYPE_CGROUP_IOCTL: > case BPF_PROG_TYPE_SOCK_OPS: > return cgroup_bpf_prog_detach(attr, ptype); > default: > @@ -2958,6 +2963,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > case BPF_CGROUP_SYSCTL: > case BPF_CGROUP_GETSOCKOPT: > case BPF_CGROUP_SETSOCKOPT: > + case BPF_CGROUP_IOCTL: > return cgroup_bpf_prog_query(attr, uattr); > case BPF_LIRC_MODE2: > return lirc_prog_query(attr, uattr); > @@ -3914,6 +3920,7 @@ static int link_create(union bpf_attr *attr) > case BPF_PROG_TYPE_CGROUP_DEVICE: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > + case BPF_PROG_TYPE_CGROUP_IOCTL: > ret = cgroup_bpf_link_attach(attr, prog); > break; > case BPF_PROG_TYPE_TRACING: > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index ef938f17b944..af68f463e828 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -7419,6 +7419,7 @@ static int check_return_code(struct bpf_verifier_env *env) > case BPF_PROG_TYPE_CGROUP_DEVICE: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > + case BPF_PROG_TYPE_CGROUP_IOCTL: > break; > case BPF_PROG_TYPE_RAW_TRACEPOINT: > if (!env->prog->aux->attach_btf_id) > -- > 2.25.1 >