>> +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, >> + struct xdp_buff *xdp) >> +{ >> + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); >> + int cpu = smp_processor_id(); >> + struct netdev_queue *nq; >> + int queue; >> + int res; >> + >> + if (unlikely(!xdpf)) >> + return -EFAULT; > >Can you return -EFAULT here? looks like the function is otherwise >returning positive STMMAC_XDP_* return codes/masks. Good catch. Thanks. It should return STMMAC_XDP_CONSUMED. > >> + queue = stmmac_xdp_get_tx_queue(priv, cpu); >> + nq = netdev_get_tx_queue(priv->dev, queue); >> + >> + __netif_tx_lock(nq, cpu); >> + /* Avoids TX time-out as we are sharing with slow path */ >> + nq->trans_start = jiffies; >> + res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf); >> + if (res == STMMAC_XDP_TX) { >> + stmmac_flush_tx_descriptors(priv, queue); >> + stmmac_tx_timer_arm(priv, queue); > >Would it make sense to arm the timer and flush descriptors at the end >of the NAPI poll cycle? Instead of after every TX frame? Agree. The Tx clean timer function can be scheduled once at the end of the NAPI poll for better optimization. > >> + } >> + __netif_tx_unlock(nq); >> + >> + return res; >> +} > >> @@ -4365,16 +4538,26 @@ static int stmmac_rx(struct stmmac_priv *priv, >int limit, u32 queue) >> xdp.data_hard_start = page_address(buf->page); >> xdp_set_data_meta_invalid(&xdp); >> xdp.frame_sz = buf_sz; >> + xdp.rxq = &rx_q->xdp_rxq; >> >> + pre_len = xdp.data_end - xdp.data_hard_start - >> + buf->page_offset; >> skb = stmmac_xdp_run_prog(priv, &xdp); >> + /* Due xdp_adjust_tail: DMA sync for_device >> + * cover max len CPU touch >> + */ >> + sync_len = xdp.data_end - xdp.data_hard_start - >> + buf->page_offset; >> + sync_len = max(sync_len, pre_len); >> >> /* For Not XDP_PASS verdict */ >> if (IS_ERR(skb)) { >> unsigned int xdp_res = -PTR_ERR(skb); >> >> if (xdp_res & STMMAC_XDP_CONSUMED) { >> - page_pool_recycle_direct(rx_q- >>page_pool, >> - buf->page); >> + page_pool_put_page(rx_q- >>page_pool, >> + >virt_to_head_page(xdp.data), >> + sync_len, true); > >IMHO the dma_sync_size logic is a little question, but it's not really >related to your patch, others are already doing the same thing, so it's >fine, I guess. Ok. We may leave it as it is now until a better/cleaner solution is found.