> On Mon, 4 Dec 2023 16:43:56 +0100 Lorenzo Bianconi wrote: > > yes, I was thinking about it actually. > > I run some preliminary tests to check if we are introducing any performance > > penalties or so. > > My setup relies on a couple of veth pairs and an eBPF program to perform > > XDP_REDIRECT from one pair to another one. I am running the program in xdp > > driver mode (not generic one). > > > > v00 (NS:ns0 - 192.168.0.1/24) <---> (NS:ns1 - 192.168.0.2/24) v01 v10 (NS:ns1 - 192.168.1.1/24) <---> (NS:ns2 - 192.168.1.2/24) v11 > > > > v00: iperf3 client > > v11: iperf3 server > > > > I am run the test with different MTU valeus (1500B, 8KB, 64KB) > > > > net-next veth codebase: > > ======================= > > - MTU 1500: iperf3 ~ 4.37Gbps > > - MTU 8000: iperf3 ~ 9.75Gbps > > - MTU 64000: iperf3 ~ 11.24Gbps > > > > net-next veth codebase + page_frag_cache instead of page_pool: > > ============================================================== > > - MTU 1500: iperf3 ~ 4.99Gbps (+14%) > > - MTU 8000: iperf3 ~ 8.5Gbps (-12%) > > - MTU 64000: iperf3 ~ 11.9Gbps ( +6%) > > > > It seems there is no a clear win situation of using page_pool or > > page_frag_cache. What do you think? > > Hm, interesting. Are the iperf processes running on different cores? > May be worth pinning (both same and different) to make sure the cache > effects are isolated. Hi Jakub, I carried out some more tests today based on your suggestion on both veth driver and xdp_generic codebase (on a more powerful system). Test setup: v00 (NS:ns0 - 192.168.0.1/24) <---> (NS:ns1 - 192.168.0.2/24) v01 ==(XDP_REDIRECT)==> v10 (NS:ns1 - 192.168.1.1/24) <---> (NS:ns2 - 192.168.1.2/24) v11 - v00: iperf3 client (pinned on core 0) - v11: iperf3 server (pinned on core 7) net-next veth codebase (page_pool APIs): ======================================= - MTU 1500: ~ 5.42 Gbps - MTU 8000: ~ 14.1 Gbps - MTU 64000: ~ 18.4 Gbps net-next veth codebase + page_frag_cahe APIs [0]: ================================================= - MTU 1500: ~ 6.62 Gbps - MTU 8000: ~ 14.7 Gbps - MTU 64000: ~ 19.7 Gbps xdp_generic codebase + page_frag_cahe APIs (current proposed patch): ==================================================================== - MTU 1500: ~ 6.41 Gbps - MTU 8000: ~ 14.2 Gbps - MTU 64000: ~ 19.8 Gbps xdp_generic codebase + page_frag_cahe APIs [1]: =============================================== - MTU 1500: ~ 5.75 Gbps - MTU 8000: ~ 15.3 Gbps - MTU 64000: ~ 21.2 Gbps It seems page_pool APIs are working better for xdp_generic codebase (except MTU 1500 case) while page_frag_cache APIs are better for veth driver. What do you think? Am I missing something? Regards, Lorenzo [0] Here I have just used napi_alloc_frag() instead of page_pool_dev_alloc_va()/page_pool_dev_alloc() in veth_convert_skb_to_xdp_buff() [1] I developed this PoC to use page_pool APIs for xdp_generic code: diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index cdcafb30d437..5115b61f38f1 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -21,6 +21,7 @@ struct netdev_rx_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + struct page_pool *page_pool; } ____cacheline_aligned_in_smp; /* diff --git a/net/core/dev.c b/net/core/dev.c index ed827b443d48..06fb568427c4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -153,6 +153,8 @@ #include <linux/prandom.h> #include <linux/once_lite.h> #include <net/netdev_rx_queue.h> +#include <net/page_pool/types.h> +#include <net/page_pool/helpers.h> #include "dev.h" #include "net-sysfs.h" @@ -4964,6 +4966,7 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb, */ if (skb_cloned(skb) || skb_shinfo(skb)->nr_frags || skb_headroom(skb) < XDP_PACKET_HEADROOM) { + struct netdev_rx_queue *rxq = netif_get_rxqueue(skb); u32 mac_len = skb->data - skb_mac_header(skb); u32 size, truesize, len, max_head_size, off; struct sk_buff *nskb; @@ -4978,18 +4981,19 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb, size = min_t(u32, skb->len, max_head_size); truesize = SKB_HEAD_ALIGN(size) + XDP_PACKET_HEADROOM; - data = napi_alloc_frag(truesize); + data = page_pool_dev_alloc_va(rxq->page_pool, &truesize); if (!data) return -ENOMEM; nskb = napi_build_skb(data, truesize); if (!nskb) { - skb_free_frag(data); + page_pool_free_va(rxq->page_pool, data, true); return -ENOMEM; } skb_reserve(nskb, XDP_PACKET_HEADROOM); skb_copy_header(nskb, skb); + skb_mark_for_recycle(nskb); err = skb_copy_bits(skb, 0, nskb->data, size); if (err) { @@ -5005,18 +5009,21 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb, len = skb->len - off; for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { struct page *page; + u32 page_off; size = min_t(u32, len, PAGE_SIZE); - data = napi_alloc_frag(size); + truesize = size; + page = page_pool_dev_alloc(rxq->page_pool, &page_off, + &truesize); if (!data) { consume_skb(nskb); return -ENOMEM; } - page = virt_to_head_page(data); - skb_add_rx_frag(nskb, i, page, - data - page_address(page), size, size); - err = skb_copy_bits(skb, off, data, size); + skb_add_rx_frag(nskb, i, page, page_off, size, truesize); + err = skb_copy_bits(skb, off, + page_address(page) + page_off, + size); if (err) { consume_skb(nskb); return err; @@ -10057,6 +10064,11 @@ EXPORT_SYMBOL(netif_stacked_transfer_operstate); static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; + struct page_pool_params page_pool_params = { + .pool_size = 256, + .nid = NUMA_NO_NODE, + .dev = &dev->dev, + }; struct netdev_rx_queue *rx; size_t sz = count * sizeof(*rx); int err = 0; @@ -10075,14 +10087,25 @@ static int netif_alloc_rx_queues(struct net_device *dev) /* XDP RX-queue setup */ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0); if (err < 0) - goto err_rxq_info; + goto err_rxq; + + /* rx queue page pool allocator */ + rx[i].page_pool = page_pool_create(&page_pool_params); + if (IS_ERR(rx[i].page_pool)) { + rx[i].page_pool = NULL; + goto err_rxq; + } } return 0; -err_rxq_info: +err_rxq: /* Rollback successful reg's and free other resources */ - while (i--) + while (i--) { xdp_rxq_info_unreg(&rx[i].xdp_rxq); + if (rx[i].page_pool) + page_pool_destroy(rx[i].page_pool); + } + kvfree(dev->_rx); dev->_rx = NULL; return err; @@ -10096,8 +10119,11 @@ static void netif_free_rx_queues(struct net_device *dev) if (!dev->_rx) return; - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq); + if (dev->_rx[i].page_pool) + page_pool_destroy(dev->_rx[i].page_pool); + } kvfree(dev->_rx); } -- 2.43.0
Attachment:
signature.asc
Description: PGP signature