On Mon, Oct 9, 2023 at 5:27 AM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote: > > Currently, the kfree_skb_reason() in sch_handle_{ingress,egress}() can only > express a basic SKB_DROP_REASON_TC_INGRESS or SKB_DROP_REASON_TC_EGRESS reason. > > Victor kicked-off an initial proposal to make this more flexible by disambiguating > verdict from return code by moving the verdict into struct tcf_result and > letting tcf_classify() return a negative error. If hit, then two new drop > reasons were added in the proposal, that is SKB_DROP_REASON_TC_INGRESS_ERROR > as well as SKB_DROP_REASON_TC_EGRESS_ERROR. Further analysis of the actual > error codes would have required to attach to tcf_classify via kprobe/kretprobe > to more deeply debug skb and the returned error. > > In order to make the kfree_skb_reason() in sch_handle_{ingress,egress}() more > extensible, it can be addressed in a more straight forward way, that is: Instead > of placing the verdict into struct tcf_result, we can just put the drop reason > in there, which does not require changes throughout various classful schedulers > given the existing verdict logic can stay as is. > > Then, SKB_DROP_REASON_TC_ERROR{,_*} can be added to the enum skb_drop_reason > to disambiguate between an error or an intentional drop. New drop reason error > codes can be added successively to the tc code base. > > For internal error locations which have not yet been annotated with a > SKB_DROP_REASON_TC_ERROR{,_*}, the fallback is SKB_DROP_REASON_TC_INGRESS and > SKB_DROP_REASON_TC_EGRESS, respectively. Generic errors could be marked with a > SKB_DROP_REASON_TC_ERROR code until they are converted to more specific ones > if it is found that they would be useful for troubleshooting. > > While drop reasons have infrastructure for subsystem specific error codes which > are currently used by mac80211 and ovs, Jakub mentioned that it is preferred > for tc to use the enum skb_drop_reason core codes given it is a better fit and > currently the tooling support is better, too. Daniel - one of us will get to this sometime this week (kind of loaded on some other stuff atm). cheers, jamal > With regards to the latter: > > [...] I think Alastair (bpftrace) is working on auto-prettifying enums when > bpftrace outputs maps. So we can do something like: > > $ bpftrace -e 'tracepoint:skb:kfree_skb { @[args->reason] = count(); }' > Attaching 1 probe... > ^C > > @[SKB_DROP_REASON_TC_INGRESS]: 2 > @[SKB_CONSUMED]: 34 > > ^^^^^^^^^^^^ names!! > > Auto-magically. [...] > > Add a small helper tcf_set_drop_reason() which can be used to set the drop reason > into the tcf_result. > > Signed-off-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx> > Cc: Jamal Hadi Salim <jhs@xxxxxxxxxxxx> > Cc: Victor Nogueira <victor@xxxxxxxxxxxx> > Cc: Jakub Kicinski <kuba@xxxxxxxxxx> > Link: https://lore.kernel.org/netdev/20231006063233.74345d36@xxxxxxxxxx > --- > v1 -> v2: > - Renamed tc_set_drop_reason -> tcf_set_drop_reason > - Moved tcf_set_drop_reason into pkt_cls.h (Cong) > > include/net/pkt_cls.h | 6 ++++++ > include/net/sch_generic.h | 3 +-- > net/core/dev.c | 15 ++++++++++----- > 3 files changed, 17 insertions(+), 7 deletions(-) > > diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h > index f308e8268651..a76c9171db0e 100644 > --- a/include/net/pkt_cls.h > +++ b/include/net/pkt_cls.h > @@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl) > return xchg(clp, cl); > } > > +static inline void tcf_set_drop_reason(struct tcf_result *res, > + enum skb_drop_reason reason) > +{ > + res->drop_reason = reason; > +} > + > static inline void > __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) > { > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h > index c7318c73cfd6..dcb9160e6467 100644 > --- a/include/net/sch_generic.h > +++ b/include/net/sch_generic.h > @@ -324,7 +324,6 @@ struct Qdisc_ops { > struct module *owner; > }; > > - > struct tcf_result { > union { > struct { > @@ -332,8 +331,8 @@ struct tcf_result { > u32 classid; > }; > const struct tcf_proto *goto_tp; > - > }; > + enum skb_drop_reason drop_reason; > }; > > struct tcf_chain; > diff --git a/net/core/dev.c b/net/core/dev.c > index 606a366cc209..664426285fa3 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -3910,7 +3910,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); > #endif /* CONFIG_NET_EGRESS */ > > #ifdef CONFIG_NET_XGRESS > -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) > +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, > + enum skb_drop_reason *drop_reason) > { > int ret = TC_ACT_UNSPEC; > #ifdef CONFIG_NET_CLS_ACT > @@ -3922,12 +3923,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) > > tc_skb_cb(skb)->mru = 0; > tc_skb_cb(skb)->post_ct = false; > + res.drop_reason = *drop_reason; > > mini_qdisc_bstats_cpu_update(miniq, skb); > ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); > /* Only tcf related quirks below. */ > switch (ret) { > case TC_ACT_SHOT: > + *drop_reason = res.drop_reason; > mini_qdisc_qstats_cpu_drop(miniq); > break; > case TC_ACT_OK: > @@ -3977,6 +3980,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, > struct net_device *orig_dev, bool *another) > { > struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); > + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; > int sch_ret; > > if (!entry) > @@ -3994,7 +3998,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, > if (sch_ret != TC_ACT_UNSPEC) > goto ingress_verdict; > } > - sch_ret = tc_run(tcx_entry(entry), skb); > + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); > ingress_verdict: > switch (sch_ret) { > case TC_ACT_REDIRECT: > @@ -4011,7 +4015,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, > *ret = NET_RX_SUCCESS; > return NULL; > case TC_ACT_SHOT: > - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); > + kfree_skb_reason(skb, drop_reason); > *ret = NET_RX_DROP; > return NULL; > /* used by tc_run */ > @@ -4032,6 +4036,7 @@ static __always_inline struct sk_buff * > sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) > { > struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); > + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; > int sch_ret; > > if (!entry) > @@ -4045,7 +4050,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) > if (sch_ret != TC_ACT_UNSPEC) > goto egress_verdict; > } > - sch_ret = tc_run(tcx_entry(entry), skb); > + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); > egress_verdict: > switch (sch_ret) { > case TC_ACT_REDIRECT: > @@ -4054,7 +4059,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) > *ret = NET_XMIT_SUCCESS; > return NULL; > case TC_ACT_SHOT: > - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); > + kfree_skb_reason(skb, drop_reason); > *ret = NET_XMIT_DROP; > return NULL; > /* used by tc_run */ > -- > 2.34.1 >