> > Lorenzo Bianconi wrote: > > For XDP frames split over multiple buffers, the xdp_md->data and > > xdp_md->data_end pointers will point to the start and end of the first > > fragment only. bpf_xdp_adjust_data can be used to access subsequent > > fragments by moving the data pointers. To use, an XDP program can call > > this helper with the byte offset of the packet payload that > > it wants to access; the helper will move xdp_md->data and xdp_md ->data_end > > so they point to the requested payload offset and to the end of the > > fragment containing this byte offset, and return the byte offset of the > > start of the fragment. > > To move back to the beginning of the packet, simply call the > > helper with an offset of '0'. > > Note also that the helpers that modify the packet boundaries > > (bpf_xdp_adjust_head(), bpf_xdp_adjust_tail() and > > bpf_xdp_adjust_meta()) will fail if the pointers have been > > moved; it is the responsibility of the BPF program to move them > > back before using these helpers. > > I'm ok with this for a first iteration I guess with more work we > can make the helpers use the updated pointers though. > > > > > Suggested-by: John Fastabend <john.fastabend@xxxxxxxxx> > > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> > > Overall looks good couple small nits/questions below. Thanks! > > > --- > > include/net/xdp.h | 8 +++++ > > include/uapi/linux/bpf.h | 32 ++++++++++++++++++ > > net/bpf/test_run.c | 8 +++++ > > net/core/filter.c | 62 +++++++++++++++++++++++++++++++++- > > tools/include/uapi/linux/bpf.h | 32 ++++++++++++++++++ > > 5 files changed, 141 insertions(+), 1 deletion(-) > > > > diff --git a/include/net/xdp.h b/include/net/xdp.h > > index cdaecf8d4d61..ce4764c7cd40 100644 > > --- a/include/net/xdp.h > > +++ b/include/net/xdp.h > > @@ -82,6 +82,11 @@ struct xdp_buff { > > struct xdp_txq_info *txq; > > u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ > > u16 flags; /* supported values defined in xdp_flags */ > > + /* xdp multi-buff metadata used for frags iteration */ > > + struct { > > + u16 headroom; /* frame headroom: data - data_hard_start */ > > + u16 headlen; /* first buffer length: data_end - data */ > > + } mb; > > }; > > > > static __always_inline bool xdp_buff_is_mb(struct xdp_buff *xdp) > > @@ -127,6 +132,9 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, > > xdp->data = data; > > xdp->data_end = data + data_len; > > xdp->data_meta = meta_valid ? data : data + 1; > > + /* mb metadata for frags iteration */ > > + xdp->mb.headroom = headroom; > > + xdp->mb.headlen = data_len; > > } > > > > /* Reserve memory area at end-of data area. > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 9e2c3b12ea49..a7b5185a718a 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -4877,6 +4877,37 @@ union bpf_attr { > > * Get the total size of a given xdp buff (linear and paged area) > > * Return > > * The total size of a given xdp buffer. > > + * > > + * long bpf_xdp_adjust_data(struct xdp_buff *xdp_md, u32 offset) > > + * Description > > + * For XDP frames split over multiple buffers, the > > + * *xdp_md*\ **->data** and*xdp_md *\ **->data_end** pointers > ^^^^ > missing space? ack, right. I will fix it. > > > + * will point to the start and end of the first fragment only. > > + * This helper can be used to access subsequent fragments by > > + * moving the data pointers. To use, an XDP program can call > > + * this helper with the byte offset of the packet payload that > > + * it wants to access; the helper will move *xdp_md*\ **->data** > > + * and *xdp_md *\ **->data_end** so they point to the requested > > + * payload offset and to the end of the fragment containing this > > + * byte offset, and return the byte offset of the start of the > > + * fragment. > > + * To move back to the beginning of the packet, simply call the > > + * helper with an offset of '0'. > > + * Note also that the helpers that modify the packet boundaries > > + * (*bpf_xdp_adjust_head()*, *bpf_xdp_adjust_tail()* and > > + * *bpf_xdp_adjust_meta()*) will fail if the pointers have been > > + * moved; it is the responsibility of the BPF program to move them > > + * back before using these helpers. > > + * > > + * A call to this helper is susceptible to change the underlying > > + * packet buffer. Therefore, at load time, all checks on pointers > > + * previously done by the verifier are invalidated and must be > > + * performed again, if the helper is used in combination with > > + * direct packet access. > > + * Return > > + * offset between the beginning of the current fragment and > > + * original *xdp_md*\ **->data** on success, or a negative error > > + * in case of failure. > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -5055,6 +5086,7 @@ union bpf_attr { > > FN(get_func_ip), \ > > FN(get_attach_cookie), \ > > FN(xdp_get_buff_len), \ > > + FN(xdp_adjust_data), \ > > /* */ > > > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > > index 869dcf23a1ca..f09c2c8c0d6c 100644 > > --- a/net/bpf/test_run.c > > +++ b/net/bpf/test_run.c > > @@ -757,6 +757,8 @@ static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) > > } > > > > xdp->data = xdp->data_meta + xdp_md->data; > > + xdp->mb.headroom = xdp->data - xdp->data_hard_start; > > + xdp->mb.headlen = xdp->data_end - xdp->data; > > return 0; > > > > free_dev: > > @@ -871,6 +873,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, > > if (ret) > > goto out; > > > > + /* data pointers need to be reset after frag iteration */ > > + if (unlikely(xdp.data_hard_start + xdp.mb.headroom != xdp.data)) { > > + ret = -EFAULT; > > + goto out; > > + } > > + > > size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; > > ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, > > retval, duration); > > diff --git a/net/core/filter.c b/net/core/filter.c > > index 2122c00c680f..ed2a6632adce 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -3827,6 +3827,10 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > > void *data_start = xdp_frame_end + metalen; > > void *data = xdp->data + offset; > > > > + /* data pointers need to be reset after frag iteration */ > > + if (unlikely(xdp->data_hard_start + xdp->mb.headroom != xdp->data)) > > + return -EINVAL; > > -EFAULT? It might be nice if error code is different from below > for debugging? ack, I will fix it in v13 > > > + > > if (unlikely(data < data_start || > > data > xdp->data_end - ETH_HLEN)) > > return -EINVAL; > > @@ -3836,6 +3840,9 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > > xdp->data_meta, metalen); > > xdp->data_meta += offset; > > xdp->data = data; > > + /* update metada for multi-buff frag iteration */ > > + xdp->mb.headroom = xdp->data - xdp->data_hard_start; > > + xdp->mb.headlen = xdp->data_end - xdp->data; > > > > return 0; > > } > > @@ -3910,6 +3917,10 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) > > void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ > > void *data_end = xdp->data_end + offset; > > > > + /* data pointer needs to be reset after frag iteration */ > > + if (unlikely(xdp->data + xdp->mb.headlen != xdp->data_end)) > > + return -EINVAL; > > EFAULT? ack, I will fix it in v13 > > > + > > if (unlikely(xdp_buff_is_mb(xdp))) > > return bpf_xdp_mb_adjust_tail(xdp, offset); > > > > @@ -3949,6 +3960,10 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) > > void *meta = xdp->data_meta + offset; > > unsigned long metalen = xdp->data - meta; > > > > + /* data pointer needs to be reset after frag iteration */ > > + if (unlikely(xdp->data_hard_start + xdp->mb.headroom != xdp->data)) > > + return -EINVAL; > > same comment. ack, I will fix it in v13 > > > if (xdp_data_meta_unsupported(xdp)) > > return -ENOTSUPP; > > if (unlikely(meta < xdp_frame_end || > > @@ -3970,6 +3985,48 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { > > .arg2_type = ARG_ANYTHING, > > }; > > > > +BPF_CALL_2(bpf_xdp_adjust_data, struct xdp_buff *, xdp, u32, offset) > > +{ > > + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); > > + u32 base_offset = xdp->mb.headlen; > > + int i; > > + > > + if (!xdp_buff_is_mb(xdp) || offset > sinfo->xdp_frags_size) > > + return -EINVAL; > > Do we need to error this? If its not mb we can just return the same > as offset==0? ack, we can check do something like: u32 max_offset = xdp->mb.headlen; if (xdp_buff_is_mb(xdp)) max_offset += sinfo->xdp_frags_size; if (offset > max_offset) return -EINVAL; what do you think? Regards, Lorenzo > > > + > > + if (offset < xdp->mb.headlen) { > > + /* linear area */ > > + xdp->data = xdp->data_hard_start + xdp->mb.headroom + offset; > > + xdp->data_end = xdp->data_hard_start + xdp->mb.headroom + > > + xdp->mb.headlen; > > + return 0; > > + } > > + > > + for (i = 0; i < sinfo->nr_frags; i++) { > > + /* paged area */ > > + skb_frag_t *frag = &sinfo->frags[i]; > > + unsigned int size = skb_frag_size(frag); > > + > > + if (offset < base_offset + size) { > > + u8 *addr = skb_frag_address(frag); > > + > > + xdp->data = addr + offset - base_offset; > > + xdp->data_end = addr + size; > > + break; > > + } > > + base_offset += size; > > + } > > + return base_offset; > > +} >