Introduce a batched version of xsk_rcv called xsk_rcv_batch which takes an array of xdp_buffs and pushes them to the Rx ring. Also introduce a batched version of xsk_buff_dma_sync_for_cpu. Signed-off-by: Ciara Loftus <ciara.loftus@xxxxxxxxx> --- include/net/xdp_sock_drv.h | 28 ++++++++++++++++++++++++++++ include/net/xsk_buff_pool.h | 22 ++++++++++++++++++++++ net/xdp/xsk.c | 29 +++++++++++++++++++++++++++++ net/xdp/xsk_queue.h | 31 +++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index e923f5d1adb6..0b352d7a34af 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -23,6 +23,7 @@ void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool); void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool); bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool); int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); +int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size); void xsk_flush(struct xdp_sock *xs); static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) @@ -125,6 +126,22 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_bu xp_dma_sync_for_cpu(xskb); } +static inline void xsk_buff_dma_sync_for_cpu_batch(struct xdp_buff **bufs, + struct xsk_buff_pool *pool, + int batch_size) +{ + struct xdp_buff_xsk *xskb; + int i; + + if (!pool->dma_need_sync) + return; + + for (i = 0; i < batch_size; i++) { + xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp); + xp_dma_sync_for_cpu(xskb); + } +} + static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) @@ -191,6 +208,11 @@ static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return 0; } +static inline int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size) +{ + return 0; +} + static inline void xsk_flush(struct xdp_sock *xs) { } @@ -274,6 +296,12 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_bu { } +static inline void xsk_buff_dma_sync_for_cpu_batch(struct xdp_buff **bufs, + struct xsk_buff_pool *pool, + int batch_size) +{ +} + static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index ddeefc4a1040..f6d76c7eaf6b 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -214,6 +214,28 @@ static inline void xp_release(struct xdp_buff_xsk *xskb) xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; } +/* Release a batch of xdp_buffs back to an xdp_buff_pool. + * The batch of buffs must all come from the same xdp_buff_pool. This way + * it is safe to push the batch to the top of the free_heads stack, because + * at least the same amount will have been popped from the stack earlier in + * the datapath. + */ +static inline void xp_release_batch(struct xdp_buff **bufs, int batch_size) +{ + struct xdp_buff_xsk *xskb = container_of(*bufs, struct xdp_buff_xsk, xdp); + struct xsk_buff_pool *pool = xskb->pool; + u32 tail = pool->free_heads_cnt; + u32 i; + + if (pool->unaligned) { + for (i = 0; i < batch_size; i++) { + xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp); + pool->free_heads[tail + i] = xskb; + } + pool->free_heads_cnt += batch_size; + } +} + static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) { u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ce004f5fae64..22d00173a96f 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -151,6 +151,20 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return 0; } +static int __xsk_rcv_zc_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size) +{ + int err; + + err = xskq_prod_reserve_desc_batch(xs->rx, bufs, batch_size); + if (err) { + xs->rx_queue_full++; + return -1; + } + + xp_release_batch(bufs, batch_size); + return 0; +} + static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len) { void *from_buf, *to_buf; @@ -269,6 +283,21 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) } EXPORT_SYMBOL(xsk_rcv); +int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size) +{ + int err; + + err = xsk_rcv_check(xs, *bufs); + if (err) + return err; + + if ((*bufs)->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) + return -1; + + return __xsk_rcv_zc_batch(xs, bufs, batch_size); +} +EXPORT_SYMBOL(xsk_rcv_batch); + int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index e9aa2c236356..3be9f4a01d77 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -338,6 +338,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q) return xskq_prod_nb_free(q, 1) ? false : true; } +static inline bool xskq_prod_is_full_n(struct xsk_queue *q, u32 n) +{ + return xskq_prod_nb_free(q, n) ? false : true; +} + static inline void xskq_prod_cancel(struct xsk_queue *q) { q->cached_prod--; @@ -399,6 +404,32 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q, return 0; } +static inline int xskq_prod_reserve_desc_batch(struct xsk_queue *q, struct xdp_buff **bufs, + int batch_size) +{ + struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; + struct xdp_buff_xsk *xskb; + u64 addr; + u32 len; + u32 i; + + if (xskq_prod_is_full_n(q, batch_size)) + return -ENOSPC; + + /* A, matches D */ + for (i = 0; i < batch_size; i++) { + len = (*(bufs + i))->data_end - (*(bufs + i))->data; + xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp); + addr = xp_get_handle(xskb); + ring->desc[(q->cached_prod + i) & q->ring_mask].addr = addr; + ring->desc[(q->cached_prod + i) & q->ring_mask].len = len; + } + + q->cached_prod += batch_size; + + return 0; +} + static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) { smp_store_release(&q->ring->producer, idx); /* B, matches C */ -- 2.17.1