On Fri, Feb 21, 2020 at 05:36:08PM +0900, Yuya Kusakabe wrote: > On 2/21/20 1:23 PM, Jason Wang wrote: > > > > On 2020/2/20 下午4:55, Yuya Kusakabe wrote: > >> Implement support for transferring XDP meta data into skb for > >> virtio_net driver; before calling into the program, xdp.data_meta points > >> to xdp.data, where on program return with pass verdict, we call > >> into skb_metadata_set(). > >> > >> Tested with the script at > >> https://github.com/higebu/virtio_net-xdp-metadata-test. > >> > >> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") > > > > > > I'm not sure this is correct since virtio-net claims to not support metadata by calling xdp_set_data_meta_invalid()? > > virtio_net doesn't support by calling xdp_set_data_meta_invalid() for now. > > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/drivers/net/virtio_net.c?id=e42da4c62abb547d9c9138e0e7fcd1f36057b5e8#n686 > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/drivers/net/virtio_net.c?id=e42da4c62abb547d9c9138e0e7fcd1f36057b5e8#n842 > > And xdp_set_data_meta_invalid() are added by de8f3a83b0a0. > > $ git blame ./drivers/net/virtio_net.c | grep xdp_set_data_meta_invalid > de8f3a83b0a0f (Daniel Borkmann 2017-09-25 02:25:51 +0200 686) xdp_set_data_meta_invalid(&xdp); > de8f3a83b0a0f (Daniel Borkmann 2017-09-25 02:25:51 +0200 842) xdp_set_data_meta_invalid(&xdp); > > So I added `Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")` to the comment. Fixes basically means "must be backported to any kernel that has de8f3a83b0a0 in order to fix a bug". This looks more like a feature than a bug though, so I'm not sure Fixes is approproate. Correct me if I'm wrong. > > > > > >> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@xxxxxxxxx> > >> --- > >> v5: > >> - page_to_skb(): copy vnet header if hdr_valid without checking metasize. > >> - receive_small(): do not copy vnet header if xdp_prog is availavle. > >> - __virtnet_xdp_xmit_one(): remove the xdp_set_data_meta_invalid(). > >> - improve comments. > >> v4: > >> - improve commit message > >> v3: > >> - fix preserve the vnet header in receive_small(). > >> v2: > >> - keep copy untouched in page_to_skb(). > >> - preserve the vnet header in receive_small(). > >> - fix indentation. > >> --- > >> drivers/net/virtio_net.c | 54 ++++++++++++++++++++++++---------------- > >> 1 file changed, 33 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > >> index 2fe7a3188282..4ea0ae60c000 100644 > >> --- a/drivers/net/virtio_net.c > >> +++ b/drivers/net/virtio_net.c > >> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >> struct receive_queue *rq, > >> struct page *page, unsigned int offset, > >> unsigned int len, unsigned int truesize, > >> - bool hdr_valid) > >> + bool hdr_valid, unsigned int metasize) > >> { > >> struct sk_buff *skb; > >> struct virtio_net_hdr_mrg_rxbuf *hdr; > >> @@ -393,6 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >> else > >> hdr_padded_len = sizeof(struct padded_vnet_hdr); > >> + /* hdr_valid means no XDP, so we can copy the vnet header */ > >> if (hdr_valid) > >> memcpy(hdr, p, hdr_len); > >> @@ -405,6 +406,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >> copy = skb_tailroom(skb); > >> skb_put_data(skb, p, copy); > >> + if (metasize) { > >> + __skb_pull(skb, metasize); > >> + skb_metadata_set(skb, metasize); > >> + } > >> + > >> len -= copy; > >> offset += copy; > >> @@ -450,10 +456,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, > >> struct virtio_net_hdr_mrg_rxbuf *hdr; > >> int err; > >> - /* virtqueue want to use data area in-front of packet */ > >> - if (unlikely(xdpf->metasize > 0)) > >> - return -EOPNOTSUPP; > >> - > >> if (unlikely(xdpf->headroom < vi->hdr_len)) > >> return -EOVERFLOW; > >> @@ -644,6 +646,7 @@ static struct sk_buff *receive_small(struct net_device *dev, > >> unsigned int delta = 0; > >> struct page *xdp_page; > >> int err; > >> + unsigned int metasize = 0; > >> len -= vi->hdr_len; > >> stats->bytes += len; > >> @@ -683,8 +686,8 @@ static struct sk_buff *receive_small(struct net_device *dev, > >> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; > >> xdp.data = xdp.data_hard_start + xdp_headroom; > >> - xdp_set_data_meta_invalid(&xdp); > >> xdp.data_end = xdp.data + len; > >> + xdp.data_meta = xdp.data; > >> xdp.rxq = &rq->xdp_rxq; > >> orig_data = xdp.data; > >> act = bpf_prog_run_xdp(xdp_prog, &xdp); > >> @@ -695,6 +698,7 @@ static struct sk_buff *receive_small(struct net_device *dev, > >> /* Recalculate length in case bpf program changed it */ > >> delta = orig_data - xdp.data; > >> len = xdp.data_end - xdp.data; > >> + metasize = xdp.data - xdp.data_meta; > >> break; > >> case XDP_TX: > >> stats->xdp_tx++; > >> @@ -735,11 +739,14 @@ static struct sk_buff *receive_small(struct net_device *dev, > >> } > >> skb_reserve(skb, headroom - delta); > >> skb_put(skb, len); > >> - if (!delta) { > >> + if (!xdp_prog) { > >> buf += header_offset; > >> memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); > >> } /* keep zeroed vnet hdr since packet was changed by bpf */ > > > > > > I prefer to make this an independent patch and cc stable. > > > > Other looks good. > > > > Thanks > > I see. So I need to revert to delta from xdp_prog? > > Thank you. > > > > >> + if (metasize) > >> + skb_metadata_set(skb, metasize); > >> + > >> err: > >> return skb; > >> @@ -760,8 +767,8 @@ static struct sk_buff *receive_big(struct net_device *dev, > >> struct virtnet_rq_stats *stats) > >> { > >> struct page *page = buf; > >> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, > >> - PAGE_SIZE, true); > >> + struct sk_buff *skb = > >> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); > >> stats->bytes += len - vi->hdr_len; > >> if (unlikely(!skb)) > >> @@ -793,6 +800,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > >> unsigned int truesize; > >> unsigned int headroom = mergeable_ctx_to_headroom(ctx); > >> int err; > >> + unsigned int metasize = 0; > >> head_skb = NULL; > >> stats->bytes += len - vi->hdr_len; > >> @@ -839,8 +847,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > >> data = page_address(xdp_page) + offset; > >> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; > >> xdp.data = data + vi->hdr_len; > >> - xdp_set_data_meta_invalid(&xdp); > >> xdp.data_end = xdp.data + (len - vi->hdr_len); > >> + xdp.data_meta = xdp.data; > >> xdp.rxq = &rq->xdp_rxq; > >> act = bpf_prog_run_xdp(xdp_prog, &xdp); > >> @@ -848,24 +856,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > >> switch (act) { > >> case XDP_PASS: > >> + metasize = xdp.data - xdp.data_meta; > >> + > >> /* recalculate offset to account for any header > >> - * adjustments. Note other cases do not build an > >> - * skb and avoid using offset > >> + * adjustments and minus the metasize to copy the > >> + * metadata in page_to_skb(). Note other cases do not > >> + * build an skb and avoid using offset > >> */ > >> - offset = xdp.data - > >> - page_address(xdp_page) - vi->hdr_len; > >> + offset = xdp.data - page_address(xdp_page) - > >> + vi->hdr_len - metasize; > >> - /* recalculate len if xdp.data or xdp.data_end were > >> - * adjusted > >> + /* recalculate len if xdp.data, xdp.data_end or > >> + * xdp.data_meta were adjusted > >> */ > >> - len = xdp.data_end - xdp.data + vi->hdr_len; > >> + len = xdp.data_end - xdp.data + vi->hdr_len + metasize; > >> /* We can only create skb based on xdp_page. */ > >> if (unlikely(xdp_page != page)) { > >> rcu_read_unlock(); > >> put_page(page); > >> - head_skb = page_to_skb(vi, rq, xdp_page, > >> - offset, len, > >> - PAGE_SIZE, false); > >> + head_skb = page_to_skb(vi, rq, xdp_page, offset, > >> + len, PAGE_SIZE, false, > >> + metasize); > >> return head_skb; > >> } > >> break; > >> @@ -921,7 +932,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > >> goto err_skb; > >> } > >> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); > >> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, > >> + metasize); > >> curr_skb = head_skb; > >> if (unlikely(!curr_skb)) > >