On Tue, Sep 19, 2023 at 6:15 PM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote: > > [ +Martin, bpf ] > > On 9/19/23 4:59 PM, Victor Nogueira wrote: > > Currently there is no way to distinguish between an error and a > > classification verdict. This patch adds the verdict field as a part of > > struct tcf_result. That way, tcf_classify can return a proper > > error number when it fails, and we keep the classification result > > information encapsulated in struct tcf_result. > > > > Also add values SKB_DROP_REASON_TC_EGRESS_ERROR and > > SKB_DROP_REASON_TC_INGRESS_ERROR to enum skb_drop_reason. > > With that we can distinguish between a drop from a processing error versus > > a drop from classification. > > > > Signed-off-by: Victor Nogueira <victor@xxxxxxxxxxxx> > > --- > > include/net/dropreason-core.h | 6 +++++ > > include/net/sch_generic.h | 7 ++++++ > > net/core/dev.c | 42 ++++++++++++++++++++++++++--------- > > net/sched/cls_api.c | 38 ++++++++++++++++++++----------- > > net/sched/sch_cake.c | 32 +++++++++++++------------- > > net/sched/sch_drr.c | 33 +++++++++++++-------------- > > net/sched/sch_ets.c | 6 +++-- > > net/sched/sch_fq_codel.c | 29 ++++++++++++------------ > > net/sched/sch_fq_pie.c | 28 +++++++++++------------ > > net/sched/sch_hfsc.c | 6 +++-- > > net/sched/sch_htb.c | 6 +++-- > > net/sched/sch_multiq.c | 6 +++-- > > net/sched/sch_prio.c | 7 ++++-- > > net/sched/sch_qfq.c | 34 +++++++++++++--------------- > > net/sched/sch_sfb.c | 29 ++++++++++++------------ > > net/sched/sch_sfq.c | 28 +++++++++++------------ > > 16 files changed, 195 insertions(+), 142 deletions(-) > > > > diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h > > index a587e83fc169..b1c069c8e7f2 100644 > > --- a/include/net/dropreason-core.h > > +++ b/include/net/dropreason-core.h > > @@ -80,6 +80,8 @@ > > FN(IPV6_NDISC_BAD_OPTIONS) \ > > FN(IPV6_NDISC_NS_OTHERHOST) \ > > FN(QUEUE_PURGE) \ > > + FN(TC_EGRESS_ERROR) \ > > + FN(TC_INGRESS_ERROR) \ > > FNe(MAX) > > > > /** > > @@ -345,6 +347,10 @@ enum skb_drop_reason { > > SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, > > /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ > > SKB_DROP_REASON_QUEUE_PURGE, > > + /** @SKB_DROP_REASON_TC_EGRESS_ERROR: dropped in TC egress HOOK due to error */ > > + SKB_DROP_REASON_TC_EGRESS_ERROR, > > + /** @SKB_DROP_REASON_TC_INGRESS_ERROR: dropped in TC ingress HOOK due to error */ > > + SKB_DROP_REASON_TC_INGRESS_ERROR, > > /** > > * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which > > * shouldn't be used as a real 'reason' - only for tracing code gen > > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h > > index f232512505f8..9a3f71d2545e 100644 > > --- a/include/net/sch_generic.h > > +++ b/include/net/sch_generic.h > > @@ -326,6 +326,7 @@ struct Qdisc_ops { > > > > > > struct tcf_result { > > + u32 verdict; > > union { > > struct { > > unsigned long class; > > @@ -336,6 +337,12 @@ struct tcf_result { > > }; > > }; > > > > +static inline void tcf_result_set_verdict(struct tcf_result *res, > > + const u32 verdict) > > +{ > > + res->verdict = verdict; > > +} > > + > > struct tcf_chain; > > > > struct tcf_proto_ops { > > diff --git a/net/core/dev.c b/net/core/dev.c > > index ccff2b6ef958..1450f4741d9b 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -3910,31 +3910,39 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); > > #endif /* CONFIG_NET_EGRESS */ > > > > #ifdef CONFIG_NET_XGRESS > > -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) > > +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, > > + struct tcf_result *res) > > { > > - int ret = TC_ACT_UNSPEC; > > + int ret = 0; > > #ifdef CONFIG_NET_CLS_ACT > > struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq); > > - struct tcf_result res; > > > > - if (!miniq) > > + if (!miniq) { > > + tcf_result_set_verdict(res, TC_ACT_UNSPEC); > > return ret; > > + } > > > > tc_skb_cb(skb)->mru = 0; > > tc_skb_cb(skb)->post_ct = false; > > > > mini_qdisc_bstats_cpu_update(miniq, skb); > > - ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); > > + ret = tcf_classify(skb, miniq->block, miniq->filter_list, res, false); > > + if (ret < 0) { > > + mini_qdisc_qstats_cpu_drop(miniq); > > + return ret; > > + } > > /* Only tcf related quirks below. */ > > - switch (ret) { > > + switch (res->verdict) { > > case TC_ACT_SHOT: > > mini_qdisc_qstats_cpu_drop(miniq); > > break; > > case TC_ACT_OK: > > case TC_ACT_RECLASSIFY: > > - skb->tc_index = TC_H_MIN(res.classid); > > + skb->tc_index = TC_H_MIN(res->classid); > > break; > > } > > +#else > > + tcf_result_set_verdict(res, TC_ACT_UNSPEC); > > #endif /* CONFIG_NET_CLS_ACT */ > > return ret; > > } > > @@ -3977,6 +3985,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, > > struct net_device *orig_dev, bool *another) > > { > > struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); > > + struct tcf_result res = {0}; > > int sch_ret; > > > > if (!entry) > > @@ -3994,9 +4003,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, > > if (sch_ret != TC_ACT_UNSPEC) > > goto ingress_verdict; > > } > > - sch_ret = tc_run(tcx_entry(entry), skb); > > + sch_ret = tc_run(tcx_entry(entry), skb, &res); > > + if (sch_ret < 0) { > > + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS_ERROR); > > + *ret = NET_RX_DROP; > > + return NULL; > > + } > > ingress_verdict: > > - switch (sch_ret) { > > + switch (res.verdict) { > > This breaks tcx, please move all this logic into tc_run(). No changes to sch_handle_ingress() > or sch_handle_egress should be necessary, you can then just remap the return code to TC_ACT_SHOT > in such case. > I think it is valuable to have a good reason code like SKB_DROP_REASON_TC_XXX_ERROR to disambiguate between errors vs verdicts in the case of tc_run() variant. For tcx_run(), does this look ok (for consistency)?: if (static_branch_unlikely(&tcx_needed_key)) { sch_ret = tcx_run(entry, skb, true); if (sch_ret != TC_ACT_UNSPEC) { res.verdict = sch_ret; goto ingress_verdict; } } cheers, jamal > > case TC_ACT_REDIRECT: > > /* skb_mac_header check was done by BPF, so we can safely > > * push the L2 header back before redirecting to another > > @@ -4032,6 +4046,7 @@ static __always_inline struct sk_buff * > > sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) > > { > > struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); > > + struct tcf_result res = {0}; > > int sch_ret; > > > > if (!entry) > > @@ -4045,9 +4060,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) > > if (sch_ret != TC_ACT_UNSPEC) > > goto egress_verdict; > > } > > - sch_ret = tc_run(tcx_entry(entry), skb); > > + sch_ret = tc_run(tcx_entry(entry), skb, &res); > > + if (sch_ret < 0) { > > + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS_ERROR); > > + *ret = NET_XMIT_DROP; > > + return NULL; > > + } > > egress_verdict: > > - switch (sch_ret) { > > + switch (res.verdict) { > > case TC_ACT_REDIRECT: > > /* No need to push/pop skb's mac_header here on egress! */ > > skb_do_redirect(skb);