RE: [PATCH V3 net-next] net: fec: add XDP_TX feature support

Wei Fang <wei.fang@xxxxxxx> · Wed, 2 Aug 2023 12:33:34 +0000

Sorry, I missed some comments before.

> > @@ -1482,7 +1486,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16
> queue_id, int budget)
> >   			/* Free the sk buffer associated with this last transmit */
> >   			dev_kfree_skb_any(skb);
> >   		} else {
> > -			xdp_return_frame(xdpf);
> > +			xdp_return_frame_rx_napi(xdpf);
> >
> >   			txq->tx_buf[index].xdp = NULL;
> >   			/* restore default tx buffer type: FEC_TXBUF_T_SKB */
> > @@ -1573,11 +1577,18 @@ fec_enet_run_xdp(struct fec_enet_private
> *fep, struct bpf_prog *prog,
> >   		}
> >   		break;
> >
> > -	default:
> > -		bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
> > -		fallthrough;
> > -
> >   	case XDP_TX:
> > +		err = fec_enet_xdp_tx_xmit(fep->netdev, xdp);
> 
> You should pass along the "sync" length value to fec_enet_xdp_tx_xmit().
> Because we know DMA comes from same device (it is already DMA mapped
> to), then we can do a DMA sync "to_device" with only the sync length.
> 
I think it's okay if the frame length does not change, but if we increase the
length of the received frame, such as add a VLAN header into the frame, I
think the "sync" length value is not correct.

> > +		if (err) {
> 
> Add an unlikely(err) or do like above case XDP_REDIRECT, where it takes
> the likely case "if (!err)" first.
Yes, you are right, I will improve it.

> 
> > +			ret = FEC_ENET_XDP_CONSUMED;
> > +			page = virt_to_head_page(xdp->data);
> > +			page_pool_put_page(rxq->page_pool, page, sync, true);
> > +		} else {
> > +			ret = FEC_ENET_XDP_TX;
> > +		}
> > +		break;
> > +
> > +	default:
> >   		bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
> >   		fallthrough;
> >
> > @@ -3793,7 +3804,8 @@ fec_enet_xdp_get_tx_queue(struct
> fec_enet_private *fep, int index)
> >
> >   static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
> >   				   struct fec_enet_priv_tx_q *txq,
> > -				   struct xdp_frame *frame)
> > +				   struct xdp_frame *frame,
> > +				   bool ndo_xmit)
> 
> E.g add parameter dma_sync_len.
> 
> >   {
> >   	unsigned int index, status, estatus;
> >   	struct bufdesc *bdp;
> > @@ -3813,10 +3825,24 @@ static int fec_enet_txq_xmit_frame(struct
> fec_enet_private *fep,
> >
> >   	index = fec_enet_get_bd_index(bdp, &txq->bd);
> >
> > -	dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
> > -				  frame->len, DMA_TO_DEVICE);
> > -	if (dma_mapping_error(&fep->pdev->dev, dma_addr))
> > -		return -ENOMEM;
> > +	if (ndo_xmit) {
> > +		dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
> > +					  frame->len, DMA_TO_DEVICE);
> > +		if (dma_mapping_error(&fep->pdev->dev, dma_addr))
> > +			return -ENOMEM;
> > +
> > +		txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
> > +	} else {
> > +		struct page *page = virt_to_page(frame->data);
> > +
> > +		dma_addr = page_pool_get_dma_addr(page) + sizeof(*frame) +
> > +			   frame->headroom;
> > +		dma_sync_single_for_device(&fep->pdev->dev, dma_addr,
> > +					   frame->len, DMA_BIDIRECTIONAL);
> 
> Optimization: use dma_sync_len here instead of frame->len.
> 
> > +		txq->tx_buf[index].type = FEC_TXBUF_T_XDP_TX;
> > +	}
> > +
> > +	txq->tx_buf[index].xdp = frame;
> >
> >   	status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
> >   	if (fep->bufdesc_ex)
> > @@ -3835,9 +3861,6 @@ static int fec_enet_txq_xmit_frame(struct
> fec_enet_private *fep,
> >   		ebdp->cbd_esc = cpu_to_fec32(estatus);
> >   	}
> >
> > -	txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
> > -	txq->tx_buf[index].xdp = frame;
> > -
> >   	/* Make sure the updates to rest of the descriptor are performed
> before
> >   	 * transferring ownership.
> >   	 */
> > @@ -3863,6 +3886,31 @@ static int fec_enet_txq_xmit_frame(struct
> fec_enet_private *fep,
> >   	return 0;
> >   }
> >
> > +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> > +				struct xdp_buff *xdp)
> > +{
> 
> E.g add parameter dma_sync_len.
> 
> > +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> 
> XDP_TX can avoid this conversion to xdp_frame.
> It would requires some refactor of fec_enet_txq_xmit_frame().
> 
Yes, but I'm not intend to change it, using the existing interface is enough.

> > +	struct fec_enet_private *fep = netdev_priv(ndev);
> > +	struct fec_enet_priv_tx_q *txq;
> > +	int cpu = smp_processor_id();
> > +	struct netdev_queue *nq;
> > +	int queue, ret;
> > +
> > +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> > +	txq = fep->tx_queue[queue];
> > +	nq = netdev_get_tx_queue(fep->netdev, queue);
> > +
> > +	__netif_tx_lock(nq, cpu);
> 
> It is sad that XDP_TX takes a lock for each frame.
> 
Yes, but the XDP path share the queue with the kernel network stack, so
we need a lock here, unless there is a dedicated queue for XDP path. Do
you have a better solution?

> > +
> > +	/* Avoid tx timeout as XDP shares the queue with kernel stack */
> > +	txq_trans_cond_update(nq);
> > +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> 
> Add/pass parameter dma_sync_len to fec_enet_txq_xmit_frame().
> 
> 
> > +
> > +	__netif_tx_unlock(nq);
> > +
> > +	return ret;
> > +}
> > +
> >   static int fec_enet_xdp_xmit(struct net_device *dev,
> >   			     int num_frames,
> >   			     struct xdp_frame **frames,
> > @@ -3885,7 +3933,7 @@ static int fec_enet_xdp_xmit(struct net_device
> *dev,
> >   	/* Avoid tx timeout as XDP shares the queue with kernel stack */
> >   	txq_trans_cond_update(nq);
> >   	for (i = 0; i < num_frames; i++) {
> > -		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
> > +		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
> >   			break;
> >   		sent_frames++;
> >   	}