On Wed, Dec 18, 2024 at 11:37 PM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 12/13/24 3:29 PM, Amery Hung wrote: > > Add basic kfuncs for working on skb in qdisc. > > > > Both bpf_qdisc_skb_drop() and bpf_kfree_skb() can be used to release > > a reference to an skb. However, bpf_qdisc_skb_drop() can only be called > > in .enqueue where a to_free skb list is available from kernel to defer > > the release. bpf_kfree_skb() should be used elsewhere. It is also used > > in bpf_obj_free_fields() when cleaning up skb in maps and collections. > > > > bpf_skb_get_hash() returns the flow hash of an skb, which can be used > > to build flow-based queueing algorithms. > > > > Finally, allow users to create read-only dynptr via bpf_dynptr_from_skb(). > > > > Signed-off-by: Amery Hung <amery.hung@xxxxxxxxxxxxx> > > --- > > net/sched/bpf_qdisc.c | 77 ++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 76 insertions(+), 1 deletion(-) > > > > diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c > > index a2e2db29e5fc..28959424eab0 100644 > > --- a/net/sched/bpf_qdisc.c > > +++ b/net/sched/bpf_qdisc.c > > @@ -106,6 +106,67 @@ static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log, > > return 0; > > } > > > > +__bpf_kfunc_start_defs(); > > + > > +/* bpf_skb_get_hash - Get the flow hash of an skb. > > + * @skb: The skb to get the flow hash from. > > + */ > > +__bpf_kfunc u32 bpf_skb_get_hash(struct sk_buff *skb) > > +{ > > + return skb_get_hash(skb); > > +} > > + > > +/* bpf_kfree_skb - Release an skb's reference and drop it immediately. > > + * @skb: The skb whose reference to be released and dropped. > > + */ > > +__bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb) > > +{ > > + kfree_skb(skb); > > +} > > + > > +/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list. > > + * @skb: The skb whose reference to be released and dropped. > > + * @to_free_list: The list of skbs to be dropped. > > + */ > > +__bpf_kfunc void bpf_qdisc_skb_drop(struct sk_buff *skb, > > + struct bpf_sk_buff_ptr *to_free_list) > > +{ > > + __qdisc_drop(skb, (struct sk_buff **)to_free_list); > > +} > > + > > +__bpf_kfunc_end_defs(); > > + > > +#define BPF_QDISC_KFUNC_xxx \ > > + BPF_QDISC_KFUNC(bpf_skb_get_hash, KF_TRUSTED_ARGS) \ > > + BPF_QDISC_KFUNC(bpf_kfree_skb, KF_RELEASE) \ > > + BPF_QDISC_KFUNC(bpf_qdisc_skb_drop, KF_RELEASE) \ > > + > > +BTF_KFUNCS_START(bpf_qdisc_kfunc_ids) > > +#define BPF_QDISC_KFUNC(name, flag) BTF_ID_FLAGS(func, name, flag) > > +BPF_QDISC_KFUNC_xxx > > +#undef BPF_QDISC_KFUNC > > +BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) > > +BTF_KFUNCS_END(bpf_qdisc_kfunc_ids) > > + > > +#define BPF_QDISC_KFUNC(name, _) BTF_ID_LIST_SINGLE(name##_ids, func, name) > > > > +BPF_QDISC_KFUNC_xxx > > +#undef BPF_QDISC_KFUNC > > + > > +static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id) > > +{ > > + if (kfunc_id == bpf_qdisc_skb_drop_ids[0]) > > + if (strcmp(prog->aux->attach_func_name, "enqueue")) > > The kfunc is registered for all BPF_PROG_TYPE_STRUCT_OPS. Checking func_name > alone is not enough, e.g. another future struct_ops may have the "enqueue" ops. > > Checking the btf type of "struct Qdisc_ops" is better. Something like the > following (untested): > Got it. I will add a structp_ops type check in the filter. > diff --git i/include/linux/bpf.h w/include/linux/bpf.h > index c81ac98db439..cf3133f81e7f 100644 > --- i/include/linux/bpf.h > +++ w/include/linux/bpf.h > @@ -1809,6 +1809,7 @@ struct bpf_struct_ops { > void *cfi_stubs; > struct module *owner; > const char *name; > + const struct btf_type *type; > struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; > }; > > diff --git i/kernel/bpf/bpf_struct_ops.c w/kernel/bpf/bpf_struct_ops.c > index d9e0af00580b..5c2ca5a84384 100644 > --- i/kernel/bpf/bpf_struct_ops.c > +++ w/kernel/bpf/bpf_struct_ops.c > @@ -432,6 +432,8 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc > *st_ops_desc, > goto errout; > } > > + st_ops->type = t; > + > return 0; > > errout: > diff --git i/net/sched/bpf_qdisc.c w/net/sched/bpf_qdisc.c > index 1caa9f696d2d..94e45ea59fef 100644 > --- i/net/sched/bpf_qdisc.c > +++ w/net/sched/bpf_qdisc.c > @@ -250,6 +250,11 @@ BPF_QDISC_KFUNC_xxx > > static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id) > { > + > + if (bpf_Qdisc_ops.type != btf_type_by_id(prog->aux->attach_btf, > + prog->aux->attach_btf_id)) > + return -EACCES; > + > if (kfunc_id == bpf_qdisc_skb_drop_ids[0]) { > if (strcmp(prog->aux->attach_func_name, "enqueue")) > return -EACCES; > > > st_ops->type (and a few others) was refactored to bpf_struct_ops_desc when > adding the kernel module support. I think adding st_ops->type back should be enough. > > Also, a bike shedding here, from looking at patch 7 and patch 8 which limit a > set of kfuncs to a particular ops. I think using btf_id_set_contains() is more > inline to other verifier usages. > > BTF_SET_START(qdisc_enqueue_kfunc_set) > BTF_ID(func, bpf_qdisc_skb_drop) > BTF_ID(func, bpf_qdisc_watchdog_schedule) > BTF_SET_END(qdisc_enqueue_kfunc_set) > > BTF_SET_START(qdisc_dequeue_kfunc_set) > BTF_ID(func, bpf_qdisc_bstats_update) > BTF_ID(func, bpf_qdisc_watchdog_schedule) > BTF_SET_END(qdisc_dequeue_kfunc_set) > > BTF_SET_START(qdisc_common_kfunc_set) > BTF_ID(func, bpf_skb_get_hash) > BTF_ID(func, bpf_kfree_skb) > BTF_SET_END(qdisc_common_kfunc_set) > I will change the style of kfunc ops availability check to the one you suggested. Thanks, Amery > > + return -EACCES; > > + > > + return 0; > > +} > > + > > +static const struct btf_kfunc_id_set bpf_qdisc_kfunc_set = { > > + .owner = THIS_MODULE, > > + .set = &bpf_qdisc_kfunc_ids, > > + .filter = bpf_qdisc_kfunc_filter, > > +}; > > + > > static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = { > > .get_func_proto = bpf_qdisc_get_func_proto, > > .is_valid_access = bpf_qdisc_is_valid_access, > > @@ -209,6 +270,20 @@ static struct bpf_struct_ops bpf_Qdisc_ops = { > > > > static int __init bpf_qdisc_kfunc_init(void) > > { > > - return register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops); > > + int ret; > > + const struct btf_id_dtor_kfunc skb_kfunc_dtors[] = { > > + { > > + .btf_id = bpf_sk_buff_ids[0], > > + .kfunc_btf_id = bpf_kfree_skb_ids[0] > > + }, > > + }; > > + > > + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_qdisc_kfunc_set); > > + ret = ret ?: register_btf_id_dtor_kfuncs(skb_kfunc_dtors, > > + ARRAY_SIZE(skb_kfunc_dtors), > > + THIS_MODULE); > > + ret = ret ?: register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops); > > + > > + return ret; > > } > > late_initcall(bpf_qdisc_kfunc_init); >