Re: [PATCH bpf-next] RFC: bpf: query effective progs without cgroup_mutex

Stanislav Fomichev <sdf@xxxxxxxxxx> · Fri, 5 May 2023 11:47:25 -0700

On Fri, May 5, 2023 at 11:45 AM Stanislav Fomichev <sdf@xxxxxxxxxx> wrote:
>
> We're observing some stalls on the heavily loaded machines
> in the cgroup_bpf_prog_query path. This is likely due to
> being blocked on cgroup_mutex.
>
> IIUC, the cgroup_mutex is there mostly to protect the non-effective
> fields (cgrp->bpf.progs) which might be changed by the update path.
> For the BPF_F_QUERY_EFFECTIVE case, all we need is to rcu_dereference
> a bunch of pointers (and keep them around for consistency), so
> let's do it.
>
> Sending out as an RFC because it looks a bit ugly. It would also
> be nice to handle non-effective case locklessly as well, but it
> might require a larger rework.
>
> Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
> ---
>  kernel/bpf/cgroup.c | 30 ++++++++++++++++++++----------
>  1 file changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index a06e118a9be5..c9d4b66e2c15 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1022,10 +1022,10 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>         __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
>         bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
>         __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> +       struct bpf_prog_array *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
>         enum bpf_attach_type type = attr->query.attach_type;
>         enum cgroup_bpf_attach_type from_atype, to_atype;
>         enum cgroup_bpf_attach_type atype;
> -       struct bpf_prog_array *effective;
>         int cnt, ret = 0, i;
>         int total_cnt = 0;
>         u32 flags;
> @@ -1051,9 +1051,9 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>
>         for (atype = from_atype; atype <= to_atype; atype++) {
>                 if (effective_query) {
> -                       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
> -                                                             lockdep_is_held(&cgroup_mutex));
> -                       total_cnt += bpf_prog_array_length(effective);
> +                       effective[atype] = rcu_dereference_protected(cgrp->bpf.effective[atype],
> +                                                                    lockdep_is_held(&cgroup_mutex));
> +                       total_cnt += bpf_prog_array_length(effective[atype]);
>                 } else {
>                         total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
>                 }
> @@ -1076,10 +1076,8 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>
>         for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
>                 if (effective_query) {
> -                       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
> -                                                             lockdep_is_held(&cgroup_mutex));
> -                       cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
> -                       ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
> +                       cnt = min_t(int, bpf_prog_array_length(effective[atype]), total_cnt);
> +                       ret = bpf_prog_array_copy_to_user(effective[atype], prog_ids, cnt);
>                 } else {
>                         struct hlist_head *progs;
>                         struct bpf_prog_list *pl;
> @@ -1118,11 +1116,23 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>  static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>                             union bpf_attr __user *uattr)
>  {
> +       __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
> +       bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
> +       bool need_mutex = false;

Oops, this has to be true, but you get the idea...

>         int ret;
>
> -       mutex_lock(&cgroup_mutex);
> +       if (effective_query && !prog_attach_flags)
> +               need_mutex = false;
> +
> +       if (need_mutex)
> +               mutex_lock(&cgroup_mutex);
> +       else
> +               rcu_read_lock();
>         ret = __cgroup_bpf_query(cgrp, attr, uattr);
> -       mutex_unlock(&cgroup_mutex);
> +       if (need_mutex)
> +               mutex_unlock(&cgroup_mutex);
> +       else
> +               rcu_read_unlock();
>         return ret;
>  }
>
> --
> 2.40.1.521.gf1e218fcd8-goog
>