Re: [net-next PATCH v6 3/6] octeontx2-pf: AF_XDP zero copy receive support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 13, 2025 at 11:01:38AM +0530, Suman Ghosh wrote:
> This patch adds support to AF_XDP zero copy for CN10K.
> This patch specifically adds receive side support. In this approach once
> a xdp program with zero copy support on a specific rx queue is enabled,
> then that receive quse is disabled/detached from the existing kernel
> queue and re-assigned to the umem memory.
> 
> Signed-off-by: Suman Ghosh <sumang@xxxxxxxxxxx>
> ---
>  .../ethernet/marvell/octeontx2/nic/Makefile   |   2 +-
>  .../ethernet/marvell/octeontx2/nic/cn10k.c    |   7 +-
>  .../marvell/octeontx2/nic/otx2_common.c       | 114 ++++++++---
>  .../marvell/octeontx2/nic/otx2_common.h       |   6 +-
>  .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |  25 ++-
>  .../marvell/octeontx2/nic/otx2_txrx.c         |  73 +++++--
>  .../marvell/octeontx2/nic/otx2_txrx.h         |   6 +
>  .../ethernet/marvell/octeontx2/nic/otx2_vf.c  |  12 +-
>  .../ethernet/marvell/octeontx2/nic/otx2_xsk.c | 182 ++++++++++++++++++
>  .../ethernet/marvell/octeontx2/nic/otx2_xsk.h |  21 ++
>  .../ethernet/marvell/octeontx2/nic/qos_sq.c   |   2 +-
>  11 files changed, 389 insertions(+), 61 deletions(-)
>  create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
>  create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h
> 
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
> index cb6513ab35e7..69e0778f9ac1 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
> @@ -9,7 +9,7 @@ obj-$(CONFIG_RVU_ESWITCH) += rvu_rep.o
>  
>  rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
>                 otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
> -               otx2_devlink.o qos_sq.o qos.o
> +               otx2_devlink.o qos_sq.o qos.o otx2_xsk.o
>  rvu_nicvf-y := otx2_vf.o
>  rvu_rep-y := rep.o
>  
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
> index a15cc86635d6..c3b6e0f60a79 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
> @@ -112,9 +112,12 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
>  	struct otx2_nic *pfvf = dev;
>  	int cnt = cq->pool_ptrs;
>  	u64 ptrs[NPA_MAX_BURST];
> +	struct otx2_pool *pool;
>  	dma_addr_t bufptr;
>  	int num_ptrs = 1;
>  
> +	pool = &pfvf->qset.pool[cq->cq_idx];
> +
>  	/* Refill pool with new buffers */
>  	while (cq->pool_ptrs) {
>  		if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
> @@ -124,7 +127,9 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
>  			break;
>  		}
>  		cq->pool_ptrs--;
> -		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
> +		ptrs[num_ptrs] = pool->xsk_pool ?
> +				 (u64)bufptr : (u64)bufptr + OTX2_HEAD_ROOM;
> +
>  		num_ptrs++;
>  		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
>  			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> index 161cf33ef89e..92b0dba07853 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> @@ -17,6 +17,7 @@
>  #include "otx2_common.h"
>  #include "otx2_struct.h"
>  #include "cn10k.h"
> +#include "otx2_xsk.h"
>  
>  static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf)
>  {
> @@ -549,10 +550,13 @@ static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
>  }
>  
>  static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
> -			     dma_addr_t *dma)
> +			     dma_addr_t *dma, int qidx, int idx)
>  {
>  	u8 *buf;
>  
> +	if (pool->xsk_pool)
> +		return otx2_xsk_pool_alloc_buf(pfvf, pool, dma, idx);
> +
>  	if (pool->page_pool)
>  		return otx2_alloc_pool_buf(pfvf, pool, dma);
>  
> @@ -571,12 +575,12 @@ static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
>  }
>  
>  int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
> -		    dma_addr_t *dma)
> +		    dma_addr_t *dma, int qidx, int idx)
>  {
>  	int ret;
>  
>  	local_bh_disable();
> -	ret = __otx2_alloc_rbuf(pfvf, pool, dma);
> +	ret = __otx2_alloc_rbuf(pfvf, pool, dma, qidx, idx);
>  	local_bh_enable();
>  	return ret;
>  }
> @@ -584,7 +588,8 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
>  int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
>  		      dma_addr_t *dma)
>  {
> -	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma)))
> +	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma,
> +				       cq->cq_idx, cq->pool_ptrs - 1)))
>  		return -ENOMEM;
>  	return 0;
>  }
> @@ -884,7 +889,7 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
>  #define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
>  #define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */
>  
> -static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
> +int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
>  {
>  	struct otx2_qset *qset = &pfvf->qset;
>  	struct nix_aq_enq_req *aq;
> @@ -1041,7 +1046,7 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
>  
>  }
>  
> -static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
> +int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
>  {
>  	struct otx2_qset *qset = &pfvf->qset;
>  	int err, pool_id, non_xdp_queues;
> @@ -1057,11 +1062,18 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
>  		cq->cint_idx = qidx;
>  		cq->cqe_cnt = qset->rqe_cnt;
>  		if (pfvf->xdp_prog) {
> -			pool = &qset->pool[qidx];
>  			xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0);
> -			xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
> -						   MEM_TYPE_PAGE_POOL,
> -						   pool->page_pool);
> +			pool = &qset->pool[qidx];
> +			if (pool->xsk_pool) {
> +				xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
> +							   MEM_TYPE_XSK_BUFF_POOL,
> +							   NULL);
> +				xsk_pool_set_rxq_info(pool->xsk_pool, &cq->xdp_rxq);
> +			} else if (pool->page_pool) {
> +				xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
> +							   MEM_TYPE_PAGE_POOL,
> +							   pool->page_pool);
> +			}
>  		}
>  	} else if (qidx < non_xdp_queues) {
>  		cq->cq_type = CQ_TX;
> @@ -1281,9 +1293,10 @@ void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
>  
>  	pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
>  	page = virt_to_head_page(phys_to_virt(pa));
> -
>  	if (pool->page_pool) {
>  		page_pool_put_full_page(pool->page_pool, page, true);
> +	} else if (pool->xsk_pool) {
> +		/* Note: No way of identifying xdp_buff */
>  	} else {
>  		dma_unmap_page_attrs(pfvf->dev, iova, size,
>  				     DMA_FROM_DEVICE,
> @@ -1298,6 +1311,7 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
>  	int pool_id, pool_start = 0, pool_end = 0, size = 0;
>  	struct otx2_pool *pool;
>  	u64 iova;
> +	int idx;
>  
>  	if (type == AURA_NIX_SQ) {
>  		pool_start = otx2_get_pool_idx(pfvf, type, 0);
> @@ -1312,16 +1326,21 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
>  
>  	/* Free SQB and RQB pointers from the aura pool */
>  	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
> -		iova = otx2_aura_allocptr(pfvf, pool_id);
>  		pool = &pfvf->qset.pool[pool_id];
> +		iova = otx2_aura_allocptr(pfvf, pool_id);
>  		while (iova) {
>  			if (type == AURA_NIX_RQ)
>  				iova -= OTX2_HEAD_ROOM;
> -
>  			otx2_free_bufs(pfvf, pool, iova, size);
> -
>  			iova = otx2_aura_allocptr(pfvf, pool_id);
>  		}
> +
> +		for (idx = 0 ; idx < pool->xdp_cnt; idx++) {
> +			if (!pool->xdp[idx])
> +				continue;
> +
> +			xsk_buff_free(pool->xdp[idx]);
> +		}
>  	}
>  }
>  
> @@ -1338,7 +1357,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf)
>  		qmem_free(pfvf->dev, pool->stack);
>  		qmem_free(pfvf->dev, pool->fc_addr);
>  		page_pool_destroy(pool->page_pool);
> -		pool->page_pool = NULL;
> +		devm_kfree(pfvf->dev, pool->xdp);
> +		pool->xsk_pool = NULL;
>  	}
>  	devm_kfree(pfvf->dev, pfvf->qset.pool);
>  	pfvf->qset.pool = NULL;
> @@ -1425,6 +1445,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
>  		   int stack_pages, int numptrs, int buf_size, int type)
>  {
>  	struct page_pool_params pp_params = { 0 };
> +	struct xsk_buff_pool *xsk_pool;
>  	struct npa_aq_enq_req *aq;
>  	struct otx2_pool *pool;
>  	int err;
> @@ -1468,21 +1489,35 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
>  	aq->ctype = NPA_AQ_CTYPE_POOL;
>  	aq->op = NPA_AQ_INSTOP_INIT;
>  
> -	if (type != AURA_NIX_RQ) {
> -		pool->page_pool = NULL;
> +	if (type != AURA_NIX_RQ)
> +		return 0;
> +
> +	if (!test_bit(pool_id, pfvf->af_xdp_zc_qidx)) {
> +		pp_params.order = get_order(buf_size);
> +		pp_params.flags = PP_FLAG_DMA_MAP;
> +		pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
> +		pp_params.nid = NUMA_NO_NODE;
> +		pp_params.dev = pfvf->dev;
> +		pp_params.dma_dir = DMA_FROM_DEVICE;
> +		pool->page_pool = page_pool_create(&pp_params);
> +		if (IS_ERR(pool->page_pool)) {
> +			netdev_err(pfvf->netdev, "Creation of page pool failed\n");
> +			return PTR_ERR(pool->page_pool);
> +		}
>  		return 0;
>  	}
>  
> -	pp_params.order = get_order(buf_size);
> -	pp_params.flags = PP_FLAG_DMA_MAP;
> -	pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
> -	pp_params.nid = NUMA_NO_NODE;
> -	pp_params.dev = pfvf->dev;
> -	pp_params.dma_dir = DMA_FROM_DEVICE;
> -	pool->page_pool = page_pool_create(&pp_params);
> -	if (IS_ERR(pool->page_pool)) {
> -		netdev_err(pfvf->netdev, "Creation of page pool failed\n");
> -		return PTR_ERR(pool->page_pool);
> +	/* Set XSK pool to support AF_XDP zero-copy */
> +	xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, pool_id);
> +	if (xsk_pool) {
> +		pool->xsk_pool = xsk_pool;
> +		pool->xdp_cnt = numptrs;
> +		pool->xdp = devm_kcalloc(pfvf->dev,
> +					 numptrs, sizeof(struct xdp_buff *), GFP_KERNEL);

What is the rationale behind having a buffer pool within your driver while
you have this very same thing within xsk_buff_pool?

You're doubling your work. Just use the xsk_buff_alloc_batch() and have a
simpler ZC implementation in your driver.

> +		if (IS_ERR(pool->xdp)) {
> +			netdev_err(pfvf->netdev, "Creation of xsk pool failed\n");
> +			return PTR_ERR(pool->xdp);
> +		}
>  	}
>  
>  	return 0;
> @@ -1543,9 +1578,18 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
>  		}
>  
>  		for (ptr = 0; ptr < num_sqbs; ptr++) {
> -			err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
> -			if (err)
> +			err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
> +			if (err) {
> +				if (pool->xsk_pool) {
> +					ptr--;
> +					while (ptr >= 0) {
> +						xsk_buff_free(pool->xdp[ptr]);
> +						ptr--;
> +					}
> +				}
>  				goto err_mem;
> +			}
> +
>  			pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
>  			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
>  		}
> @@ -1595,11 +1639,19 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
>  	/* Allocate pointers and free them to aura/pool */
>  	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
>  		pool = &pfvf->qset.pool[pool_id];
> +
>  		for (ptr = 0; ptr < num_ptrs; ptr++) {
> -			err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
> -			if (err)
> +			err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
> +			if (err) {
> +				if (pool->xsk_pool) {
> +					while (ptr)
> +						xsk_buff_free(pool->xdp[--ptr]);
> +				}
>  				return -ENOMEM;
> +			}
> +
>  			pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
> +						   pool->xsk_pool ? bufptr :
>  						   bufptr + OTX2_HEAD_ROOM);
>  		}
>  	}
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> index d5fbccb289df..60508971b62f 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> @@ -532,6 +532,8 @@ struct otx2_nic {
>  
>  	/* Inline ipsec */
>  	struct cn10k_ipsec	ipsec;
> +	/* af_xdp zero-copy */
> +	unsigned long		*af_xdp_zc_qidx;
>  };
>  
>  static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
> @@ -1003,7 +1005,7 @@ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
>  void otx2_free_pending_sqe(struct otx2_nic *pfvf);
>  void otx2_sqb_flush(struct otx2_nic *pfvf);
>  int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
> -		    dma_addr_t *dma);
> +		    dma_addr_t *dma, int qidx, int idx);
>  int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
>  void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
>  int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
> @@ -1033,6 +1035,8 @@ void otx2_pfaf_mbox_destroy(struct otx2_nic *pf);
>  void otx2_disable_mbox_intr(struct otx2_nic *pf);
>  void otx2_disable_napi(struct otx2_nic *pf);
>  irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq);
> +int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura);
> +int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx);
>  
>  /* RSS configuration APIs*/
>  int otx2_rss_init(struct otx2_nic *pfvf);
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> index 4347a3c95350..50a42cd5d50a 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> @@ -27,6 +27,7 @@
>  #include "qos.h"
>  #include <rvu_trace.h>
>  #include "cn10k_ipsec.h"
> +#include "otx2_xsk.h"
>  
>  #define DRV_NAME	"rvu_nicpf"
>  #define DRV_STRING	"Marvell RVU NIC Physical Function Driver"
> @@ -1662,9 +1663,7 @@ void otx2_free_hw_resources(struct otx2_nic *pf)
>  	struct nix_lf_free_req *free_req;
>  	struct mbox *mbox = &pf->mbox;
>  	struct otx2_cq_queue *cq;
> -	struct otx2_pool *pool;
>  	struct msg_req *req;
> -	int pool_id;
>  	int qidx;
>  
>  	/* Ensure all SQE are processed */
> @@ -1705,13 +1704,6 @@ void otx2_free_hw_resources(struct otx2_nic *pf)
>  	/* Free RQ buffer pointers*/
>  	otx2_free_aura_ptr(pf, AURA_NIX_RQ);
>  
> -	for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) {
> -		pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx);
> -		pool = &pf->qset.pool[pool_id];
> -		page_pool_destroy(pool->page_pool);
> -		pool->page_pool = NULL;
> -	}
> -
>  	otx2_free_cq_res(pf);
>  
>  	/* Free all ingress bandwidth profiles allocated */
> @@ -2788,6 +2780,8 @@ static int otx2_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
>  	switch (xdp->command) {
>  	case XDP_SETUP_PROG:
>  		return otx2_xdp_setup(pf, xdp->prog);
> +	case XDP_SETUP_XSK_POOL:
> +		return otx2_xsk_pool_setup(pf, xdp->xsk.pool, xdp->xsk.queue_id);
>  	default:
>  		return -EINVAL;
>  	}
> @@ -2865,6 +2859,7 @@ static const struct net_device_ops otx2_netdev_ops = {
>  	.ndo_set_vf_vlan	= otx2_set_vf_vlan,
>  	.ndo_get_vf_config	= otx2_get_vf_config,
>  	.ndo_bpf		= otx2_xdp,
> +	.ndo_xsk_wakeup		= otx2_xsk_wakeup,
>  	.ndo_xdp_xmit           = otx2_xdp_xmit,
>  	.ndo_setup_tc		= otx2_setup_tc,
>  	.ndo_set_vf_trust	= otx2_ndo_set_vf_trust,
> @@ -3203,16 +3198,26 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	/* Enable link notifications */
>  	otx2_cgx_config_linkevents(pf, true);
>  
> +	pf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);

if this is taken from ice drivers then be aware we got rid of bitmap
tracking zc enabled queues. see adbf5a42341f ("ice: remove af_xdp_zc_qps
bitmap").

in case you would still have a need for that after going through
referenced commit, please provide us some justification why.

> +	if (!pf->af_xdp_zc_qidx) {
> +		err = -ENOMEM;
> +		goto err_sriov_cleannup;
> +	}
> +
>  #ifdef CONFIG_DCB
>  	err = otx2_dcbnl_set_ops(netdev);
>  	if (err)
> -		goto err_pf_sriov_init;
> +		goto err_free_zc_bmap;
>  #endif
>  
>  	otx2_qos_init(pf, qos_txqs);
>  
>  	return 0;
>  
> +err_free_zc_bmap:
> +	bitmap_free(pf->af_xdp_zc_qidx);
> +err_sriov_cleannup:
> +	otx2_sriov_vfcfg_cleanup(pf);
>  err_pf_sriov_init:
>  	otx2_shutdown_tc(pf);
>  err_mcam_flow_del:

[...]




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux