If sockmap enable strparser, there are lose offset info in sk_psock_skb_ingress. If the length determined by parse_msg function is not skb->len, the skb will be converted to sk_msg multiple times, and userspace app will get the data multiple times. Fix this by get the offset and length from strp_msg. And as Cong suggestion, add one bit in skb->_sk_redir to distinguish enable or disable strparser. Signed-off-by: Liu Jian <liujian56@xxxxxxxxxx> Acked-by: John Fastabend <john.fastabend@xxxxxxxxx> --- v1->v2: fix build error when disable CONFIG_BPF_STREAM_PARSER v2->v3: Add one bit in skb->_sk_redir to distinguish enable or disable strparser include/linux/skmsg.h | 18 ++++++++++++++-- net/core/skmsg.c | 48 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc924..74eb0e97b4bd 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) #if IS_ENABLED(CONFIG_NET_SOCK_MSG) -/* We only have one bit so far. */ -#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) +#define BPF_F_STRPARSER (1UL << 1) + +/* We only have two bit so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) + +static inline bool skb_bpf_strparser(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_STRPARSER; +} + +static inline void skb_bpf_set_strparser(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_STRPARSER; +} static inline bool skb_bpf_ingress(const struct sk_buff *skb) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2d6249b28928..2faf2fa110f2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, + u32 off, u32 len, struct sk_psock *psock, struct sock *sk, struct sk_msg *msg) @@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, */ if (skb_linearize(skb)) return -EAGAIN; - num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); + num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (unlikely(num_sge < 0)) return num_sge; - copied = skb->len; + copied = len; msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; @@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return copied; } -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sock *sk = psock->sk; struct sk_msg *msg; @@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * correctly. */ if (unlikely(skb->sk == sk)) - return sk_psock_skb_ingress_self(psock, skb); + return sk_psock_skb_ingress_self(psock, skb, off, len); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; @@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * skb. In this case we do not need to check memory limits or skb_set_owner_r * because the skb is already accounted for here. */ -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; @@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - return sk_psock_skb_ingress(psock, skb); + return sk_psock_skb_ingress(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -604,6 +608,9 @@ static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + struct strp_msg *stm = NULL; +#endif struct sk_buff *skb = NULL; bool ingress; u32 len, off; @@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct *work) while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; off = 0; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + if (skb_bpf_strparser(skb)) { + stm = strp_msg(skb); + off = stm->offset; + len = stm->full_len; + } +#endif start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); @@ -930,6 +944,10 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, { struct sock *sk_other; int err = 0; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + struct strp_msg *stm = NULL; +#endif + u32 len, off; switch (verdict) { case __SK_PASS: @@ -949,7 +967,16 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { - err = sk_psock_skb_ingress_self(psock, skb); + len = skb->len; + off = 0; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + if (skb_bpf_strparser(skb)) { + stm = strp_msg(skb); + off = stm->offset; + len = stm->full_len; + } +#endif + err = sk_psock_skb_ingress_self(psock, skb, off, len); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); @@ -1018,6 +1045,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } + skb_bpf_set_strparser(skb); sk_psock_verdict_apply(psock, skb, ret); out: rcu_read_unlock(); -- 2.17.1