On 23 Jun 2021, at 1:49, John Fastabend wrote: > Lorenzo Bianconi wrote: >> From: Eelco Chaudron <echaudro@xxxxxxxxxx> >> >> This patch adds support for multi-buffer for the following helpers: >> - bpf_xdp_output() >> - bpf_perf_event_output() >> >> Signed-off-by: Eelco Chaudron <echaudro@xxxxxxxxxx> >> Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> >> --- > > Ah ok so at least xdp_output will work with all bytes. But this is > getting close to having access into the frags so I think doing > the last bit shouldn't be too hard? Guess you are talking about multi-buffer access in the XDP program? I did suggest an API a while back, https://lore.kernel.org/bpf/FD3E6E08-DE78-4FBA-96F6-646C93E88631@xxxxxxxxxx/ but I had/have not time to work on it. Guess the difficult part is to convince the verifier to allow the data to be accessed. > >> kernel/trace/bpf_trace.c | 3 + >> net/core/filter.c | 72 +++++++++- >> .../selftests/bpf/prog_tests/xdp_bpf2bpf.c | 127 ++++++++++++------ >> .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 +- >> 4 files changed, 160 insertions(+), 44 deletions(-) >> >> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c >> index d2d7cf6cfe83..ee926ec64f78 100644 >> --- a/kernel/trace/bpf_trace.c >> +++ b/kernel/trace/bpf_trace.c >> @@ -1365,6 +1365,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { >> >> extern const struct bpf_func_proto bpf_skb_output_proto; >> extern const struct bpf_func_proto bpf_xdp_output_proto; >> +extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; >> >> BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, >> struct bpf_map *, map, u64, flags) >> @@ -1460,6 +1461,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) >> return &bpf_sock_from_file_proto; >> case BPF_FUNC_get_socket_cookie: >> return &bpf_get_socket_ptr_cookie_proto; >> + case BPF_FUNC_xdp_get_buff_len: >> + return &bpf_xdp_get_buff_len_trace_proto; >> #endif >> case BPF_FUNC_seq_printf: >> return prog->expected_attach_type == BPF_TRACE_ITER ? >> diff --git a/net/core/filter.c b/net/core/filter.c >> index b0855f2d4726..f7211b7908a9 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -3939,6 +3939,15 @@ const struct bpf_func_proto bpf_xdp_get_buff_len_proto = { >> .arg1_type = ARG_PTR_TO_CTX, >> }; >> >> +BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff) >> + >> +const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = { >> + .func = bpf_xdp_get_buff_len, >> + .gpl_only = false, >> + .arg1_type = ARG_PTR_TO_BTF_ID, >> + .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0], >> +}; >> + >> BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) >> { >> void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ >> @@ -4606,10 +4615,56 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { >> }; >> #endif >> >> -static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, >> +static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx, >> unsigned long off, unsigned long len) >> { >> - memcpy(dst_buff, src_buff + off, len); >> + struct xdp_buff *xdp = (struct xdp_buff *)ctx; >> + struct skb_shared_info *sinfo; >> + unsigned long base_len; >> + >> + if (likely(!xdp_buff_is_mb(xdp))) { >> + memcpy(dst_buff, xdp->data + off, len); >> + return 0; >> + } >> + >> + base_len = xdp->data_end - xdp->data; >> + sinfo = xdp_get_shared_info_from_buff(xdp); >> + do { >> + const void *src_buff = NULL; >> + unsigned long copy_len = 0; >> + >> + if (off < base_len) { >> + src_buff = xdp->data + off; >> + copy_len = min(len, base_len - off); >> + } else { >> + unsigned long frag_off_total = base_len; >> + int i; >> + >> + for (i = 0; i < sinfo->nr_frags; i++) { >> + skb_frag_t *frag = &sinfo->frags[i]; >> + unsigned long frag_len, frag_off; >> + >> + frag_len = skb_frag_size(frag); >> + frag_off = off - frag_off_total; >> + if (frag_off < frag_len) { >> + src_buff = skb_frag_address(frag) + >> + frag_off; >> + copy_len = min(len, >> + frag_len - frag_off); >> + break; >> + } >> + frag_off_total += frag_len; >> + } >> + } >> + if (!src_buff) >> + break; >> + >> + memcpy(dst_buff, src_buff, copy_len); >> + off += copy_len; >> + len -= copy_len; >> + dst_buff += copy_len; > > This block reads odd to be because it requires looping over the frags > multiple times? Why not something like this, > > if (off < base_len) { > src_buff = xdp->data + off > copy_len = min... > memcpy(dst_buff, src_buff, copy_len) > off += copylen > len -= copylen > dst_buff += copylen; > } > > for (i = 0; i , nr_frags; i++) { > frag = ... > ... > if frag_off < fraglen > ... > memcpy() > update(off, len, dst_buff) > } > > > Maybe use a helper to set off,len and dst_buff if worried about the > duplication. Seems cleaner than walking through 0..n-1 frags for > each copy. You are right it looks odd, will re-write this in the next iteration. >> + } while (len); >> + >> return 0; >> }