The current tc-bpf@ingress reads and writes the __sk_buff->tstamp as a (rcv) timestamp. This patch is to backward compatible with the (rcv) timestamp expectation when the skb->tstamp has a mono delivery_time. If needed, the patch first saves the mono delivery_time. Depending on the static key "netstamp_needed_key", it then resets the skb->tstamp to either 0 or ktime_get_real() before running the tc-bpf@ingress. After the tc-bpf prog returns, if the (rcv) timestamp in skb->tstamp has not been changed, it will restore the earlier saved mono delivery_time. The current logic to run tc-bpf@ingress is refactored to a new bpf_prog_run_at_ingress() function and shared between cls_bpf and act_bpf. The above new delivery_time save/restore logic is also done together in this function. Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> --- include/linux/filter.h | 28 ++++++++++++++++++++++++++++ net/sched/act_bpf.c | 5 +---- net/sched/cls_bpf.c | 6 +----- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d23e999dc032..e43e1701a80e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -699,6 +699,34 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +static __always_inline u32 bpf_prog_run_at_ingress(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + ktime_t tstamp, saved_mono_dtime = 0; + int filter_res; + + if (unlikely(skb->mono_delivery_time)) { + saved_mono_dtime = skb->tstamp; + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = tstamp = ktime_get_real(); + else + skb->tstamp = tstamp = 0; + } + + /* It is safe to push/pull even if skb_shared() */ + __skb_push(skb, skb->mac_len); + bpf_compute_data_pointers(skb); + filter_res = bpf_prog_run(prog, skb); + __skb_pull(skb, skb->mac_len); + + /* __sk_buff->tstamp was not changed, restore the delivery_time */ + if (unlikely(saved_mono_dtime) && skb_tstamp(skb) == tstamp) + skb_set_delivery_time(skb, saved_mono_dtime, true); + + return filter_res; +} + /* Similar to bpf_compute_data_pointers(), except that save orginal * data in cb->data and cb->meta_data for restore. */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index a77d8908e737..14c3bd0a5088 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -45,10 +45,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { - __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); - __skb_pull(skb, skb->mac_len); + filter_res = bpf_prog_run_at_ingress(filter, skb); } else { bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index df19a847829e..036b2e1f74af 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -93,11 +93,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tc_skip_sw(prog->gen_flags)) { filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; } else if (at_ingress) { - /* It is safe to push/pull even if skb_shared() */ - __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); - __skb_pull(skb, skb->mac_len); + filter_res = bpf_prog_run_at_ingress(prog->filter, skb); } else { bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); -- 2.30.2