Re: [RFC] Add BPF_PROG_TYPE_CGROUP_IOCTL

Kenny Ho <y2kenny@xxxxxxxxx> · Mon, 2 Nov 2020 14:23:02 -0500

Adding a few more emails from get_maintainer.pl and bumping this
thread since there hasn't been any comments so far.  Is this too
crazy?  Am I missing something fundamental?

Regards,
Kenny

On Wed, Oct 7, 2020 at 11:24 AM Kenny Ho <Kenny.Ho@xxxxxxx> wrote:
>
> This is a skeleton implementation to invite comments and generate
> discussion around the idea of introducing a bpf-cgroup program type to
> control ioctl access.  This is modelled after
> BPF_PROG_TYPE_CGROUP_DEVICE.  The premise is to allow system admins to
> write bpf programs to block some ioctl access, potentially in conjunction
> with data collected by other bpf programs stored in some bpf maps and
> with bpf_spin_lock.
>
> For example, a bpf program has been accumulating resource usaging
> statistic and a second bpf program of BPF_PROG_TYPE_CGROUP_IOCTL would
> block access to previously mentioned resource via ioctl when the stats
> stored in a bpf map reaches certain threshold.
>
> Like BPF_PROG_TYPE_CGROUP_DEVICE, the default is permissive (i.e.,
> ioctls are not blocked if no bpf program is present for the cgroup.) to
> maintain current interface behaviour when this functionality is unused.
>
> Performance impact to ioctl calls is minimal as bpf's in-kernel verifier
> ensure attached bpf programs cannot crash and always terminate quickly.
>
> TODOs:
> - correct usage of the verifier
> - toolings
> - samples
> - device driver may provide helper functions that take
> bpf_cgroup_ioctl_ctx and return something more useful for specific
> device
>
> Signed-off-by: Kenny Ho <Kenny.Ho@xxxxxxx>
> ---
>  fs/ioctl.c                 |  5 +++
>  include/linux/bpf-cgroup.h | 14 ++++++++
>  include/linux/bpf_types.h  |  2 ++
>  include/uapi/linux/bpf.h   |  8 +++++
>  kernel/bpf/cgroup.c        | 66 ++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c       |  7 ++++
>  kernel/bpf/verifier.c      |  1 +
>  7 files changed, 103 insertions(+)
>
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 4e6cc0a7d69c..a3925486d417 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -19,6 +19,7 @@
>  #include <linux/falloc.h>
>  #include <linux/sched/signal.h>
>  #include <linux/fiemap.h>
> +#include <linux/cgroup.h>
>
>  #include "internal.h"
>
> @@ -45,6 +46,10 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>         if (!filp->f_op->unlocked_ioctl)
>                 goto out;
>
> +       error = BPF_CGROUP_RUN_PROG_IOCTL(filp, cmd, arg);
> +       if (error)
> +               goto out;
> +
>         error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
>         if (error == -ENOIOCTLCMD)
>                 error = -ENOTTY;
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 64f367044e25..a5f0b0a8f82b 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -134,6 +134,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
>  int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
>                                       short access, enum bpf_attach_type type);
>
> +int __cgroup_bpf_check_ioctl_permission(struct file *filp, unsigned int cmd, unsigned long arg,
> +                                       enum bpf_attach_type type);
> +
>  int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
>                                    struct ctl_table *table, int write,
>                                    void **buf, size_t *pcount, loff_t *ppos,
> @@ -346,6 +349,16 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>         __ret;                                                                 \
>  })
>
> +#define BPF_CGROUP_RUN_PROG_IOCTL(filp, cmd, arg)                            \
> +({                                                                           \
> +       int __ret = 0;                                                        \
> +       if (cgroup_bpf_enabled)                                               \
> +               __ret = __cgroup_bpf_check_ioctl_permission(filp, cmd, arg,   \
> +                                                           BPF_CGROUP_IOCTL);\
> +                                                                             \
> +       __ret;                                                                \
> +})
> +
>  int cgroup_bpf_prog_attach(const union bpf_attr *attr,
>                            enum bpf_prog_type ptype, struct bpf_prog *prog);
>  int cgroup_bpf_prog_detach(const union bpf_attr *attr,
> @@ -429,6 +442,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>                                        optlen, max_optlen, retval) ({ retval; })
>  #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
>                                        kernel_optval) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_IOCTL(type,major,minor,access) ({ 0; })
>
>  #define for_each_cgroup_storage_type(stype) for (; false; )
>
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index a52a5688418e..3055e7e4918c 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -56,6 +56,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl,
>               struct bpf_sysctl, struct bpf_sysctl_kern)
>  BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt,
>               struct bpf_sockopt, struct bpf_sockopt_kern)
> +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_IOCTL, cg_ioctl,
> +             struct bpf_cgroup_ioctl_ctx, struct bpf_cgroup_ioctl_ctx)
>  #endif
>  #ifdef CONFIG_BPF_LIRC_MODE2
>  BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b6238b2209b7..6a908e13d3a3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -197,6 +197,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_EXT,
>         BPF_PROG_TYPE_LSM,
>         BPF_PROG_TYPE_SK_LOOKUP,
> +       BPF_PROG_TYPE_CGROUP_IOCTL,
>  };
>
>  enum bpf_attach_type {
> @@ -238,6 +239,7 @@ enum bpf_attach_type {
>         BPF_XDP_CPUMAP,
>         BPF_SK_LOOKUP,
>         BPF_XDP,
> +       BPF_CGROUP_IOCTL,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -4276,6 +4278,12 @@ struct bpf_cgroup_dev_ctx {
>         __u32 minor;
>  };
>
> +struct bpf_cgroup_ioctl_ctx {
> +       __u64 filp;
> +       __u32 cmd;
> +       __u32 arg;
> +};
> +
>  struct bpf_raw_tracepoint_args {
>         __u64 args[0];
>  };
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 83ff127ef7ae..0958bae3b0b7 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1203,6 +1203,72 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
>         .is_valid_access        = cgroup_dev_is_valid_access,
>  };
>
> +int __cgroup_bpf_check_ioctl_permission(struct file *filp, unsigned int cmd, unsigned long arg,
> +                                     enum bpf_attach_type type)
> +{
> +       struct cgroup *cgrp;
> +       struct bpf_cgroup_ioctl_ctx ctx = {
> +               .filp = filp,
> +               .cmd = cmd,
> +               .arg = arg,
> +       };
> +       int allow = 1;
> +
> +       rcu_read_lock();
> +       cgrp = task_dfl_cgroup(current);
> +       allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
> +                                  BPF_PROG_RUN);
> +       rcu_read_unlock();
> +
> +       return !allow;
> +}
> +
> +static const struct bpf_func_proto *
> +cgroup_ioctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +       return cgroup_base_func_proto(func_id, prog);
> +}
> +
> +static bool cgroup_ioctl_is_valid_access(int off, int size,
> +                                      enum bpf_access_type type,
> +                                      const struct bpf_prog *prog,
> +                                      struct bpf_insn_access_aux *info)
> +{
> +       const int size_default = sizeof(__u32);
> +
> +       if (type == BPF_WRITE)
> +               return false;
> +
> +       if (off < 0 || off + size > sizeof(struct bpf_cgroup_ioctl_ctx))
> +               return false;
> +       /* The verifier guarantees that size > 0. */
> +       if (off % size != 0)
> +               return false;
> +
> +       switch (off) {
> +       case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, filp):
> +               bpf_ctx_record_field_size(info, size_default);
> +               if (!bpf_ctx_narrow_access_ok(off, size, size_default))
> +                       return false;
> +               break;
> +       case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, cmd):
> +       case bpf_ctx_range(struct bpf_cgroup_ioctl_ctx, arg):
> +       default:
> +               if (size != size_default)
> +                       return false;
> +       }
> +
> +       return true;
> +}
> +
> +const struct bpf_prog_ops cg_ioctl_prog_ops = {
> +};
> +
> +const struct bpf_verifier_ops cg_ioctl_verifier_ops = {
> +       .get_func_proto         = cgroup_ioctl_func_proto,
> +       .is_valid_access        = cgroup_ioctl_is_valid_access,
> +};
> +
>  /**
>   * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
>   *
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 86299a292214..6984a62c96f4 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2054,6 +2054,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
>         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
>         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
>         case BPF_PROG_TYPE_CGROUP_SYSCTL:
> +       case BPF_PROG_TYPE_CGROUP_IOCTL:
>         case BPF_PROG_TYPE_SOCK_OPS:
>         case BPF_PROG_TYPE_EXT: /* extends any prog */
>                 return true;
> @@ -2806,6 +2807,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
>                 return BPF_PROG_TYPE_SOCK_OPS;
>         case BPF_CGROUP_DEVICE:
>                 return BPF_PROG_TYPE_CGROUP_DEVICE;
> +       case BPF_CGROUP_IOCTL:
> +               return BPF_PROG_TYPE_CGROUP_IOCTL;
>         case BPF_SK_MSG_VERDICT:
>                 return BPF_PROG_TYPE_SK_MSG;
>         case BPF_SK_SKB_STREAM_PARSER:
> @@ -2878,6 +2881,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
>         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
>         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
>         case BPF_PROG_TYPE_CGROUP_SYSCTL:
> +       case BPF_PROG_TYPE_CGROUP_IOCTL:
>         case BPF_PROG_TYPE_SOCK_OPS:
>                 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
>                 break;
> @@ -2915,6 +2919,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
>         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
>         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
>         case BPF_PROG_TYPE_CGROUP_SYSCTL:
> +       case BPF_PROG_TYPE_CGROUP_IOCTL:
>         case BPF_PROG_TYPE_SOCK_OPS:
>                 return cgroup_bpf_prog_detach(attr, ptype);
>         default:
> @@ -2958,6 +2963,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
>         case BPF_CGROUP_SYSCTL:
>         case BPF_CGROUP_GETSOCKOPT:
>         case BPF_CGROUP_SETSOCKOPT:
> +       case BPF_CGROUP_IOCTL:
>                 return cgroup_bpf_prog_query(attr, uattr);
>         case BPF_LIRC_MODE2:
>                 return lirc_prog_query(attr, uattr);
> @@ -3914,6 +3920,7 @@ static int link_create(union bpf_attr *attr)
>         case BPF_PROG_TYPE_CGROUP_DEVICE:
>         case BPF_PROG_TYPE_CGROUP_SYSCTL:
>         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
> +       case BPF_PROG_TYPE_CGROUP_IOCTL:
>                 ret = cgroup_bpf_link_attach(attr, prog);
>                 break;
>         case BPF_PROG_TYPE_TRACING:
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index ef938f17b944..af68f463e828 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7419,6 +7419,7 @@ static int check_return_code(struct bpf_verifier_env *env)
>         case BPF_PROG_TYPE_CGROUP_DEVICE:
>         case BPF_PROG_TYPE_CGROUP_SYSCTL:
>         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
> +       case BPF_PROG_TYPE_CGROUP_IOCTL:
>                 break;
>         case BPF_PROG_TYPE_RAW_TRACEPOINT:
>                 if (!env->prog->aux->attach_btf_id)
> --
> 2.25.1
>