On 2/5/20 1:10 PM, Jason Wang wrote: > > On 2020/2/4 下午3:16, Yuya Kusakabe wrote: >> Implement support for transferring XDP meta data into skb for >> virtio_net driver; before calling into the program, xdp.data_meta points >> to xdp.data and copy vnet header to the front of xdp.data_hard_start >> to avoid overwriting it, where on program return with pass verdict, >> we call into skb_metadata_set(). >> >> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@xxxxxxxxx> >> --- >> drivers/net/virtio_net.c | 47 ++++++++++++++++++++++++++++------------ >> 1 file changed, 33 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index 2fe7a3188282..5fdd6ea0e3f1 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> struct receive_queue *rq, >> struct page *page, unsigned int offset, >> unsigned int len, unsigned int truesize, >> - bool hdr_valid) >> + bool hdr_valid, unsigned int metasize) >> { >> struct sk_buff *skb; >> struct virtio_net_hdr_mrg_rxbuf *hdr; >> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> else >> hdr_padded_len = sizeof(struct padded_vnet_hdr); >> - if (hdr_valid) >> + if (hdr_valid && !metasize) > > > hdr_valid means no XDP, so I think we can remove the check for metasize here and add a comment instead? I will fix it on next patch. > >> memcpy(hdr, p, hdr_len); >> len -= hdr_len; >> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> copy = skb_tailroom(skb); >> skb_put_data(skb, p, copy); >> + if (metasize) { >> + __skb_pull(skb, metasize); >> + skb_metadata_set(skb, metasize); >> + } >> + >> len -= copy; >> offset += copy; >> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >> unsigned int delta = 0; >> struct page *xdp_page; >> int err; >> + unsigned int metasize = 0; >> len -= vi->hdr_len; >> stats->bytes += len; >> @@ -683,10 +689,15 @@ static struct sk_buff *receive_small(struct net_device *dev, >> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >> xdp.data = xdp.data_hard_start + xdp_headroom; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + len; >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> orig_data = xdp.data; >> + /* Copy the vnet header to the front of data_hard_start to avoid >> + * overwriting it by XDP meta data. >> + */ >> + memcpy(xdp.data_hard_start - vi->hdr_len, >> + xdp.data - vi->hdr_len, vi->hdr_len); > > > I think we don't need this. And it looks to me there's a bug in the current code. > > Commit 436c9453a1ac0 ("virtio-net: keep vnet header zeroed after processing XDP") leave the a corner case for receive_small() which still use: > > if (!delta) { > buf += header_offset; > memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); > } /* keep zeroed vnet hdr since packet was changed by bpf */ > > Which seems wrong, we need check xdp_prog instead of delta. > > With this fixed, there's no need to care about the vnet header here since we don't know whether or not packet is modified by XDP. I missed this commit. I understand this is the reason for "Awaiting Upstream". > >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> stats->xdp_packets++; >> @@ -695,9 +706,11 @@ static struct sk_buff *receive_small(struct net_device *dev, >> /* Recalculate length in case bpf program changed it */ >> delta = orig_data - xdp.data; >> len = xdp.data_end - xdp.data; >> + metasize = xdp.data - xdp.data_meta; >> break; >> case XDP_TX: >> stats->xdp_tx++; >> + xdp.data_meta = xdp.data; > > > I think we should remove the xdp_set_data_meta_invalid() at least? And move this initialization just after xdp.data is initialized. > > Testing receive_small() requires to disable mrg_rxbuf, guest_tso4, guest_tso6 and guest_ufo from qemu command line. > > >> xdpf = convert_to_xdp_frame(&xdp); >> if (unlikely(!xdpf)) >> goto err_xdp; >> @@ -736,10 +749,12 @@ static struct sk_buff *receive_small(struct net_device *dev, >> skb_reserve(skb, headroom - delta); >> skb_put(skb, len); >> if (!delta) { > > > Need to check xdp_prog (need another patch). I will fix it on next patch. > > >> - buf += header_offset; >> - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); >> + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); >> } /* keep zeroed vnet hdr since packet was changed by bpf */ >> + if (metasize) >> + skb_metadata_set(skb, metasize); >> + >> err: >> return skb; >> @@ -760,8 +775,8 @@ static struct sk_buff *receive_big(struct net_device *dev, >> struct virtnet_rq_stats *stats) >> { >> struct page *page = buf; >> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, >> - PAGE_SIZE, true); >> + struct sk_buff *skb = >> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); >> stats->bytes += len - vi->hdr_len; >> if (unlikely(!skb)) >> @@ -793,6 +808,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> unsigned int truesize; >> unsigned int headroom = mergeable_ctx_to_headroom(ctx); >> int err; >> + unsigned int metasize = 0; >> head_skb = NULL; >> stats->bytes += len - vi->hdr_len; >> @@ -839,8 +855,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> data = page_address(xdp_page) + offset; >> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; >> xdp.data = data + vi->hdr_len; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + (len - vi->hdr_len); >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> @@ -852,8 +868,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> * adjustments. Note other cases do not build an >> * skb and avoid using offset >> */ >> - offset = xdp.data - >> - page_address(xdp_page) - vi->hdr_len; >> + metasize = xdp.data - xdp.data_meta; >> + offset = xdp.data - page_address(xdp_page) - >> + vi->hdr_len - metasize; >> /* recalculate len if xdp.data or xdp.data_end were >> * adjusted >> @@ -863,14 +880,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> if (unlikely(xdp_page != page)) { >> rcu_read_unlock(); >> put_page(page); >> - head_skb = page_to_skb(vi, rq, xdp_page, >> - offset, len, >> - PAGE_SIZE, false); >> + head_skb = page_to_skb(vi, rq, xdp_page, offset, >> + len, PAGE_SIZE, false, >> + metasize); >> return head_skb; >> } >> break; >> case XDP_TX: >> stats->xdp_tx++; >> + xdp.data_meta = xdp.data; > > > Any reason for doing this? XDP_TX can not support metadata for now, because if metasize > 0, __virtnet_xdp_xmit_one() returns EOPNOTSUPP. static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct send_queue *sq, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; int err; /* virtqueue want to use data area in-front of packet */ if (unlikely(xdpf->metasize > 0)) return -EOPNOTSUPP; > > Thanks > > >> xdpf = convert_to_xdp_frame(&xdp); >> if (unlikely(!xdpf)) >> goto err_xdp; >> @@ -921,7 +939,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> goto err_skb; >> } >> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); >> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, >> + metasize); >> curr_skb = head_skb; >> if (unlikely(!curr_skb)) > Thank you for your kind review.