Re: [PATCH net-next v12 14/15] p4tc: add set of P4TC table kfuncs

Martin KaFai Lau <martin.lau@xxxxxxxxx> · Sat, 2 Mar 2024 17:32:42 -0800

On 3/1/24 4:31 AM, Jamal Hadi Salim wrote:
On Fri, Mar 1, 2024 at 1:53 AM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote:

On 2/25/24 8:54 AM, Jamal Hadi Salim wrote:
+struct p4tc_table_entry_act_bpf_params {

Will this struct be extended in the future?

+     u32 pipeid;
+     u32 tblid;
+};
+

Not that i can think of. We probably want to have the option to do so
if needed. Do you see any harm if we were to make changes for whatever
reason in the future?

It will be useful to add an argument named with "__sz" suffix to the kfunc.
Take a look at how the kfunc in nf_conntrack_bpf.c is handling the "opts" and 
"opts__sz" argument in its kfunc.


+struct p4tc_table_entry_create_bpf_params {
+     u32 profile_id;
+     u32 pipeid;
+     u32 tblid;
+};
+

[ ... ]

diff --git a/include/net/tc_act/p4tc.h b/include/net/tc_act/p4tc.h
index c5256d821..155068de0 100644
--- a/include/net/tc_act/p4tc.h
+++ b/include/net/tc_act/p4tc.h
@@ -13,10 +13,26 @@ struct tcf_p4act_params {
       u32 tot_params_sz;
   };

+#define P4TC_MAX_PARAM_DATA_SIZE 124
+
+struct p4tc_table_entry_act_bpf {
+     u32 act_id;
+     u32 hit:1,
+         is_default_miss_act:1,
+         is_default_hit_act:1;
+     u8 params[P4TC_MAX_PARAM_DATA_SIZE];
+} __packed;
+
+struct p4tc_table_entry_act_bpf_kern {
+     struct rcu_head rcu;
+     struct p4tc_table_entry_act_bpf act_bpf;
+};
+
   struct tcf_p4act {
       struct tc_action common;
       /* Params IDR reference passed during runtime */
       struct tcf_p4act_params __rcu *params;
+     struct p4tc_table_entry_act_bpf_kern __rcu *act_bpf;
       u32 p_id;
       u32 act_id;
       struct list_head node;
@@ -24,4 +40,39 @@ struct tcf_p4act {

   #define to_p4act(a) ((struct tcf_p4act *)a)

+static inline struct p4tc_table_entry_act_bpf *
+p4tc_table_entry_act_bpf(struct tc_action *action)
+{
+     struct p4tc_table_entry_act_bpf_kern *act_bpf;
+     struct tcf_p4act *p4act = to_p4act(action);
+
+     act_bpf = rcu_dereference(p4act->act_bpf);
+
+     return &act_bpf->act_bpf;
+}
+
+static inline int
+p4tc_table_entry_act_bpf_change_flags(struct tc_action *action, u32 hit,
+                                   u32 dflt_miss, u32 dflt_hit)
+{
+     struct p4tc_table_entry_act_bpf_kern *act_bpf, *act_bpf_old;
+     struct tcf_p4act *p4act = to_p4act(action);
+
+     act_bpf = kzalloc(sizeof(*act_bpf), GFP_KERNEL);


[ ... ]

+__bpf_kfunc static struct p4tc_table_entry_act_bpf *
+bpf_p4tc_tbl_read(struct __sk_buff *skb_ctx,

The argument could be "struct sk_buff *skb" instead of __sk_buff. Take a look at
commit 2f4643934670.

We'll make that change.


+               struct p4tc_table_entry_act_bpf_params *params,
+               void *key, const u32 key__sz)
+{
+     struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+     struct net *caller_net;
+
+     caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+
+     return __bpf_p4tc_tbl_read(caller_net, params, key, key__sz);
+}
+
+__bpf_kfunc static struct p4tc_table_entry_act_bpf *
+xdp_p4tc_tbl_read(struct xdp_md *xdp_ctx,
+               struct p4tc_table_entry_act_bpf_params *params,
+               void *key, const u32 key__sz)
+{
+     struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+     struct net *caller_net;
+
+     caller_net = dev_net(ctx->rxq->dev);
+
+     return __bpf_p4tc_tbl_read(caller_net, params, key, key__sz);
+}
+
+static int
+__bpf_p4tc_entry_create(struct net *net,
+                     struct p4tc_table_entry_create_bpf_params *params,
+                     void *key, const u32 key__sz,
+                     struct p4tc_table_entry_act_bpf *act_bpf)
+{
+     struct p4tc_table_entry_key *entry_key = key;
+     struct p4tc_pipeline *pipeline;
+     struct p4tc_table *table;
+
+     if (!params || !key)
+             return -EINVAL;
+     if (key__sz != P4TC_ENTRY_KEY_SZ_BYTES(entry_key->keysz))
+             return -EINVAL;
+
+     pipeline = p4tc_pipeline_find_byid(net, params->pipeid);
+     if (!pipeline)
+             return -ENOENT;
+
+     table = p4tc_tbl_cache_lookup(net, params->pipeid, params->tblid);
+     if (!table)
+             return -ENOENT;
+
+     if (entry_key->keysz != table->tbl_keysz)
+             return -EINVAL;
+
+     return p4tc_table_entry_create_bpf(pipeline, table, entry_key, act_bpf,
+                                        params->profile_id);

My understanding is this kfunc will allocate a "struct
p4tc_table_entry_act_bpf_kern" object. If the bpf_p4tc_entry_delete() kfunc is
never called and the bpf prog is unloaded, how the act_bpf object will be
cleaned up?


The TC code takes care of this. Unloading the bpf prog does not affect
the deletion, it is the TC control side that will take care of it. If
we delete the pipeline otoh then not just this entry but all entries
will be flushed.

It looks like the "struct p4tc_table_entry_act_bpf_kern" object is allocated by 
the bpf prog through kfunc and will only be useful for the bpf prog but not 
other parts of the kernel. However, if the bpf prog is unloaded, these bpf 
specific objects will be left over in the kernel until the tc pipeline (where 
the act_bpf_kern object resided) is gone.

It is the expectation on bpf prog (not only tc/xdp bpf prog) about resources 
clean up that these bpf objects will be gone after unloading the bpf prog and 
unpinning its bpf map.

[ ... ]

+BTF_SET8_START(p4tc_kfunc_check_tbl_set_skb)

This soon will be broken with the latest change in bpf-next. It is replaced by
BTF_KFUNCS_START. commit a05e90427ef6.

It has already been included in the latest bpf-next pull-request, so should 
reach net-next soon.



Ok, this wasnt in net-next when we pushed. We base our changes on
net-next. When do you plan to merge that into net-next?

What is the plan on the selftest ?


We may need some guidance. How do you see us writing a selftest for this?
We have extensive testing on the control side which is netlink (not
part of the current series).

There are examples in tools/testing/selftests/bpf, e.g. the test_bpf_nf.c to 
test the kfuncs in nf_conntrack_bpf mentioned above. There are also selftests 
doing netlink to setup the test. The bpf/test_progs tries to avoid external 
dependency as much as possible, so linking to an extra external library and 
using an extra tool/binary will be unacceptable.
and only the bpf/test_progs binary will be run by bpf CI.

The selftest does not have to be complicated. It can exercise the kfunc and show 
how the new struct (e.g. struct p4tc_table_entry_bpf_*) will be used. There is 
BPF_PROG_RUN for the tc and xdp prog, so should be quite doable.