On 2/21/20 1:23 PM, Jason Wang wrote: > > On 2020/2/20 下午4:55, Yuya Kusakabe wrote: >> Implement support for transferring XDP meta data into skb for >> virtio_net driver; before calling into the program, xdp.data_meta points >> to xdp.data, where on program return with pass verdict, we call >> into skb_metadata_set(). >> >> Tested with the script at >> https://github.com/higebu/virtio_net-xdp-metadata-test. >> >> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") > > > I'm not sure this is correct since virtio-net claims to not support metadata by calling xdp_set_data_meta_invalid()? virtio_net doesn't support by calling xdp_set_data_meta_invalid() for now. https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/drivers/net/virtio_net.c?id=e42da4c62abb547d9c9138e0e7fcd1f36057b5e8#n686 https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/drivers/net/virtio_net.c?id=e42da4c62abb547d9c9138e0e7fcd1f36057b5e8#n842 And xdp_set_data_meta_invalid() are added by de8f3a83b0a0. $ git blame ./drivers/net/virtio_net.c | grep xdp_set_data_meta_invalid de8f3a83b0a0f (Daniel Borkmann 2017-09-25 02:25:51 +0200 686) xdp_set_data_meta_invalid(&xdp); de8f3a83b0a0f (Daniel Borkmann 2017-09-25 02:25:51 +0200 842) xdp_set_data_meta_invalid(&xdp); So I added `Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")` to the comment. > > >> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@xxxxxxxxx> >> --- >> v5: >> - page_to_skb(): copy vnet header if hdr_valid without checking metasize. >> - receive_small(): do not copy vnet header if xdp_prog is availavle. >> - __virtnet_xdp_xmit_one(): remove the xdp_set_data_meta_invalid(). >> - improve comments. >> v4: >> - improve commit message >> v3: >> - fix preserve the vnet header in receive_small(). >> v2: >> - keep copy untouched in page_to_skb(). >> - preserve the vnet header in receive_small(). >> - fix indentation. >> --- >> drivers/net/virtio_net.c | 54 ++++++++++++++++++++++++---------------- >> 1 file changed, 33 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index 2fe7a3188282..4ea0ae60c000 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> struct receive_queue *rq, >> struct page *page, unsigned int offset, >> unsigned int len, unsigned int truesize, >> - bool hdr_valid) >> + bool hdr_valid, unsigned int metasize) >> { >> struct sk_buff *skb; >> struct virtio_net_hdr_mrg_rxbuf *hdr; >> @@ -393,6 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> else >> hdr_padded_len = sizeof(struct padded_vnet_hdr); >> + /* hdr_valid means no XDP, so we can copy the vnet header */ >> if (hdr_valid) >> memcpy(hdr, p, hdr_len); >> @@ -405,6 +406,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> copy = skb_tailroom(skb); >> skb_put_data(skb, p, copy); >> + if (metasize) { >> + __skb_pull(skb, metasize); >> + skb_metadata_set(skb, metasize); >> + } >> + >> len -= copy; >> offset += copy; >> @@ -450,10 +456,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, >> struct virtio_net_hdr_mrg_rxbuf *hdr; >> int err; >> - /* virtqueue want to use data area in-front of packet */ >> - if (unlikely(xdpf->metasize > 0)) >> - return -EOPNOTSUPP; >> - >> if (unlikely(xdpf->headroom < vi->hdr_len)) >> return -EOVERFLOW; >> @@ -644,6 +646,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >> unsigned int delta = 0; >> struct page *xdp_page; >> int err; >> + unsigned int metasize = 0; >> len -= vi->hdr_len; >> stats->bytes += len; >> @@ -683,8 +686,8 @@ static struct sk_buff *receive_small(struct net_device *dev, >> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >> xdp.data = xdp.data_hard_start + xdp_headroom; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + len; >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> orig_data = xdp.data; >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> @@ -695,6 +698,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >> /* Recalculate length in case bpf program changed it */ >> delta = orig_data - xdp.data; >> len = xdp.data_end - xdp.data; >> + metasize = xdp.data - xdp.data_meta; >> break; >> case XDP_TX: >> stats->xdp_tx++; >> @@ -735,11 +739,14 @@ static struct sk_buff *receive_small(struct net_device *dev, >> } >> skb_reserve(skb, headroom - delta); >> skb_put(skb, len); >> - if (!delta) { >> + if (!xdp_prog) { >> buf += header_offset; >> memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); >> } /* keep zeroed vnet hdr since packet was changed by bpf */ > > > I prefer to make this an independent patch and cc stable. > > Other looks good. > > Thanks I see. So I need to revert to delta from xdp_prog? Thank you. > >> + if (metasize) >> + skb_metadata_set(skb, metasize); >> + >> err: >> return skb; >> @@ -760,8 +767,8 @@ static struct sk_buff *receive_big(struct net_device *dev, >> struct virtnet_rq_stats *stats) >> { >> struct page *page = buf; >> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, >> - PAGE_SIZE, true); >> + struct sk_buff *skb = >> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); >> stats->bytes += len - vi->hdr_len; >> if (unlikely(!skb)) >> @@ -793,6 +800,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> unsigned int truesize; >> unsigned int headroom = mergeable_ctx_to_headroom(ctx); >> int err; >> + unsigned int metasize = 0; >> head_skb = NULL; >> stats->bytes += len - vi->hdr_len; >> @@ -839,8 +847,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> data = page_address(xdp_page) + offset; >> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; >> xdp.data = data + vi->hdr_len; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + (len - vi->hdr_len); >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> @@ -848,24 +856,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> switch (act) { >> case XDP_PASS: >> + metasize = xdp.data - xdp.data_meta; >> + >> /* recalculate offset to account for any header >> - * adjustments. Note other cases do not build an >> - * skb and avoid using offset >> + * adjustments and minus the metasize to copy the >> + * metadata in page_to_skb(). Note other cases do not >> + * build an skb and avoid using offset >> */ >> - offset = xdp.data - >> - page_address(xdp_page) - vi->hdr_len; >> + offset = xdp.data - page_address(xdp_page) - >> + vi->hdr_len - metasize; >> - /* recalculate len if xdp.data or xdp.data_end were >> - * adjusted >> + /* recalculate len if xdp.data, xdp.data_end or >> + * xdp.data_meta were adjusted >> */ >> - len = xdp.data_end - xdp.data + vi->hdr_len; >> + len = xdp.data_end - xdp.data + vi->hdr_len + metasize; >> /* We can only create skb based on xdp_page. */ >> if (unlikely(xdp_page != page)) { >> rcu_read_unlock(); >> put_page(page); >> - head_skb = page_to_skb(vi, rq, xdp_page, >> - offset, len, >> - PAGE_SIZE, false); >> + head_skb = page_to_skb(vi, rq, xdp_page, offset, >> + len, PAGE_SIZE, false, >> + metasize); >> return head_skb; >> } >> break; >> @@ -921,7 +932,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> goto err_skb; >> } >> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); >> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, >> + metasize); >> curr_skb = head_skb; >> if (unlikely(!curr_skb)) >