Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk belongs to a descendant of a cgroup2. It is similar to the feature added in netfilter: commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY which will be used by the bpf_skb_in_cgroup. Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY and bpf_skb_in_cgroup() are always used together. Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> Cc: Alexei Starovoitov <ast@xxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> --- include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 8 ++++++++ net/core/filter.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ef4e386..a91714bd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -314,6 +314,7 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt, + BPF_FUNC_skb_in_cgroup, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 668e079..68753e0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1062,6 +1062,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (func_id != BPF_FUNC_get_stackid) goto error; break; + case BPF_MAP_TYPE_CGROUP_ARRAY: + if (func_id != BPF_FUNC_skb_in_cgroup) + goto error; + break; default: break; } @@ -1081,6 +1085,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_skb_in_cgroup: + if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) + goto error; + break; default: break; } diff --git a/net/core/filter.c b/net/core/filter.c index df6860c..410da89 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2024,6 +2024,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } +static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long)r1; + struct bpf_map *map = (struct bpf_map *)(long)r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 i = (u32)r3; + struct cgroup *cgrp; + struct sock *sk; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + sk = skb->sk; + if (!sk || !sk_fullsock(sk)) + return -ENOENT; + + if (unlikely(i >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[i]); + if (unlikely(!cgrp)) + return -ENOENT; + + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); +} + +static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { + .func = bpf_skb_in_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -2086,6 +2120,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_route_realm_proto; case BPF_FUNC_perf_event_output: return bpf_get_event_output_proto(); + case BPF_FUNC_skb_in_cgroup: + return &bpf_skb_in_cgroup_proto; default: return sk_filter_func_proto(func_id); } -- 2.5.1 -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html