Lorenzo Bianconi wrote: > For XDP frames split over multiple buffers, the xdp_md->data and > xdp_md->data_end pointers will point to the start and end of the first > fragment only. bpf_xdp_adjust_data can be used to access subsequent > fragments by moving the data pointers. To use, an XDP program can call > this helper with the byte offset of the packet payload that > it wants to access; the helper will move xdp_md->data and xdp_md ->data_end > so they point to the requested payload offset and to the end of the > fragment containing this byte offset, and return the byte offset of the > start of the fragment. > To move back to the beginning of the packet, simply call the > helper with an offset of '0'. > Note also that the helpers that modify the packet boundaries > (bpf_xdp_adjust_head(), bpf_xdp_adjust_tail() and > bpf_xdp_adjust_meta()) will fail if the pointers have been > moved; it is the responsibility of the BPF program to move them > back before using these helpers. I'm ok with this for a first iteration I guess with more work we can make the helpers use the updated pointers though. > > Suggested-by: John Fastabend <john.fastabend@xxxxxxxxx> > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> Overall looks good couple small nits/questions below. Thanks! > --- > include/net/xdp.h | 8 +++++ > include/uapi/linux/bpf.h | 32 ++++++++++++++++++ > net/bpf/test_run.c | 8 +++++ > net/core/filter.c | 62 +++++++++++++++++++++++++++++++++- > tools/include/uapi/linux/bpf.h | 32 ++++++++++++++++++ > 5 files changed, 141 insertions(+), 1 deletion(-) > > diff --git a/include/net/xdp.h b/include/net/xdp.h > index cdaecf8d4d61..ce4764c7cd40 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -82,6 +82,11 @@ struct xdp_buff { > struct xdp_txq_info *txq; > u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ > u16 flags; /* supported values defined in xdp_flags */ > + /* xdp multi-buff metadata used for frags iteration */ > + struct { > + u16 headroom; /* frame headroom: data - data_hard_start */ > + u16 headlen; /* first buffer length: data_end - data */ > + } mb; > }; > > static __always_inline bool xdp_buff_is_mb(struct xdp_buff *xdp) > @@ -127,6 +132,9 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, > xdp->data = data; > xdp->data_end = data + data_len; > xdp->data_meta = meta_valid ? data : data + 1; > + /* mb metadata for frags iteration */ > + xdp->mb.headroom = headroom; > + xdp->mb.headlen = data_len; > } > > /* Reserve memory area at end-of data area. > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 9e2c3b12ea49..a7b5185a718a 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -4877,6 +4877,37 @@ union bpf_attr { > * Get the total size of a given xdp buff (linear and paged area) > * Return > * The total size of a given xdp buffer. > + * > + * long bpf_xdp_adjust_data(struct xdp_buff *xdp_md, u32 offset) > + * Description > + * For XDP frames split over multiple buffers, the > + * *xdp_md*\ **->data** and*xdp_md *\ **->data_end** pointers ^^^^ missing space? > + * will point to the start and end of the first fragment only. > + * This helper can be used to access subsequent fragments by > + * moving the data pointers. To use, an XDP program can call > + * this helper with the byte offset of the packet payload that > + * it wants to access; the helper will move *xdp_md*\ **->data** > + * and *xdp_md *\ **->data_end** so they point to the requested > + * payload offset and to the end of the fragment containing this > + * byte offset, and return the byte offset of the start of the > + * fragment. > + * To move back to the beginning of the packet, simply call the > + * helper with an offset of '0'. > + * Note also that the helpers that modify the packet boundaries > + * (*bpf_xdp_adjust_head()*, *bpf_xdp_adjust_tail()* and > + * *bpf_xdp_adjust_meta()*) will fail if the pointers have been > + * moved; it is the responsibility of the BPF program to move them > + * back before using these helpers. > + * > + * A call to this helper is susceptible to change the underlying > + * packet buffer. Therefore, at load time, all checks on pointers > + * previously done by the verifier are invalidated and must be > + * performed again, if the helper is used in combination with > + * direct packet access. > + * Return > + * offset between the beginning of the current fragment and > + * original *xdp_md*\ **->data** on success, or a negative error > + * in case of failure. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5055,6 +5086,7 @@ union bpf_attr { > FN(get_func_ip), \ > FN(get_attach_cookie), \ > FN(xdp_get_buff_len), \ > + FN(xdp_adjust_data), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > index 869dcf23a1ca..f09c2c8c0d6c 100644 > --- a/net/bpf/test_run.c > +++ b/net/bpf/test_run.c > @@ -757,6 +757,8 @@ static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) > } > > xdp->data = xdp->data_meta + xdp_md->data; > + xdp->mb.headroom = xdp->data - xdp->data_hard_start; > + xdp->mb.headlen = xdp->data_end - xdp->data; > return 0; > > free_dev: > @@ -871,6 +873,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, > if (ret) > goto out; > > + /* data pointers need to be reset after frag iteration */ > + if (unlikely(xdp.data_hard_start + xdp.mb.headroom != xdp.data)) { > + ret = -EFAULT; > + goto out; > + } > + > size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; > ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, > retval, duration); > diff --git a/net/core/filter.c b/net/core/filter.c > index 2122c00c680f..ed2a6632adce 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3827,6 +3827,10 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > void *data_start = xdp_frame_end + metalen; > void *data = xdp->data + offset; > > + /* data pointers need to be reset after frag iteration */ > + if (unlikely(xdp->data_hard_start + xdp->mb.headroom != xdp->data)) > + return -EINVAL; -EFAULT? It might be nice if error code is different from below for debugging? > + > if (unlikely(data < data_start || > data > xdp->data_end - ETH_HLEN)) > return -EINVAL; > @@ -3836,6 +3840,9 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > xdp->data_meta, metalen); > xdp->data_meta += offset; > xdp->data = data; > + /* update metada for multi-buff frag iteration */ > + xdp->mb.headroom = xdp->data - xdp->data_hard_start; > + xdp->mb.headlen = xdp->data_end - xdp->data; > > return 0; > } > @@ -3910,6 +3917,10 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) > void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ > void *data_end = xdp->data_end + offset; > > + /* data pointer needs to be reset after frag iteration */ > + if (unlikely(xdp->data + xdp->mb.headlen != xdp->data_end)) > + return -EINVAL; EFAULT? > + > if (unlikely(xdp_buff_is_mb(xdp))) > return bpf_xdp_mb_adjust_tail(xdp, offset); > > @@ -3949,6 +3960,10 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) > void *meta = xdp->data_meta + offset; > unsigned long metalen = xdp->data - meta; > > + /* data pointer needs to be reset after frag iteration */ > + if (unlikely(xdp->data_hard_start + xdp->mb.headroom != xdp->data)) > + return -EINVAL; same comment. > if (xdp_data_meta_unsupported(xdp)) > return -ENOTSUPP; > if (unlikely(meta < xdp_frame_end || > @@ -3970,6 +3985,48 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { > .arg2_type = ARG_ANYTHING, > }; > > +BPF_CALL_2(bpf_xdp_adjust_data, struct xdp_buff *, xdp, u32, offset) > +{ > + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); > + u32 base_offset = xdp->mb.headlen; > + int i; > + > + if (!xdp_buff_is_mb(xdp) || offset > sinfo->xdp_frags_size) > + return -EINVAL; Do we need to error this? If its not mb we can just return the same as offset==0? > + > + if (offset < xdp->mb.headlen) { > + /* linear area */ > + xdp->data = xdp->data_hard_start + xdp->mb.headroom + offset; > + xdp->data_end = xdp->data_hard_start + xdp->mb.headroom + > + xdp->mb.headlen; > + return 0; > + } > + > + for (i = 0; i < sinfo->nr_frags; i++) { > + /* paged area */ > + skb_frag_t *frag = &sinfo->frags[i]; > + unsigned int size = skb_frag_size(frag); > + > + if (offset < base_offset + size) { > + u8 *addr = skb_frag_address(frag); > + > + xdp->data = addr + offset - base_offset; > + xdp->data_end = addr + size; > + break; > + } > + base_offset += size; > + } > + return base_offset; > +}