On Mon, 12 Apr 2021 12:17:13 +0200 Kurt Kanzenbach <kurt@xxxxxxxxxxxxx> wrote: > When using native XDP with the igb driver, the XDP frame data doesn't point to > the beginning of the packet. It's off by 16 bytes. Everything works as expected > with XDP skb mode. > > Actually these 16 bytes are used to store the packet timestamps. Therefore, pull > the timestamp before executing any XDP operations and adjust all other code > accordingly. The igc driver does it like that as well. (Cc. Alexander Duyck) Do we have enough room for the packet page-split tricks when these 16 bytes are added? AFAIK this driver like ixgbe+i40e split the page in two 2048 bytes packets. The XDP headroom is reduced to 192 bytes. The skb_shared_info is 320 bytes in size. 2048-192-320 = 1536 bytes MTU(L3) 1500 Ethernet (L2) headers 14 bytes VLAN 4 bytes, but Q-in-Q vlan 8 bytes. Single VLAN: 1536-1500-14-4 = 18 bytes left Q-in-q VLAN: 1536-1500-14-8 = 14 bytes left > Tested with Intel i210 card and AF_XDP sockets. > > Fixes: 9cbc948b5a20 ("igb: add XDP support") > Signed-off-by: Kurt Kanzenbach <kurt@xxxxxxxxxxxxx> > --- > drivers/net/ethernet/intel/igb/igb.h | 3 +- > drivers/net/ethernet/intel/igb/igb_main.c | 45 ++++++++++++----------- > drivers/net/ethernet/intel/igb/igb_ptp.c | 18 ++++----- > 3 files changed, 32 insertions(+), 34 deletions(-) (no comments on code below, but kept it if Alex need to see it) > diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h > index 7bda8c5edea5..72cf967c1a00 100644 > --- a/drivers/net/ethernet/intel/igb/igb.h > +++ b/drivers/net/ethernet/intel/igb/igb.h > @@ -748,8 +748,7 @@ void igb_ptp_suspend(struct igb_adapter *adapter); > void igb_ptp_rx_hang(struct igb_adapter *adapter); > void igb_ptp_tx_hang(struct igb_adapter *adapter); > void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); > -int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, > - struct sk_buff *skb); > +ktime_t igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va); > int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); > int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); > void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); > diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c > index a45cd2b416c8..8fab55fd18fc 100644 > --- a/drivers/net/ethernet/intel/igb/igb_main.c > +++ b/drivers/net/ethernet/intel/igb/igb_main.c > @@ -8281,7 +8281,7 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring, > static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, > struct igb_rx_buffer *rx_buffer, > struct xdp_buff *xdp, > - union e1000_adv_rx_desc *rx_desc) > + ktime_t timestamp) > { > #if (PAGE_SIZE < 8192) > unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; > @@ -8301,12 +8301,8 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, > if (unlikely(!skb)) > return NULL; > > - if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { > - if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) { > - xdp->data += IGB_TS_HDR_LEN; > - size -= IGB_TS_HDR_LEN; > - } > - } > + if (timestamp) > + skb_hwtstamps(skb)->hwtstamp = timestamp; > > /* Determine available headroom for copy */ > headlen = size; > @@ -8337,7 +8333,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, > static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, > struct igb_rx_buffer *rx_buffer, > struct xdp_buff *xdp, > - union e1000_adv_rx_desc *rx_desc) > + ktime_t timestamp) > { > #if (PAGE_SIZE < 8192) > unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; > @@ -8364,11 +8360,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, > if (metasize) > skb_metadata_set(skb, metasize); > > - /* pull timestamp out of packet data */ > - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { > - if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb)) > - __skb_pull(skb, IGB_TS_HDR_LEN); > - } > + if (timestamp) > + skb_hwtstamps(skb)->hwtstamp = timestamp; > > /* update buffer offset */ > #if (PAGE_SIZE < 8192) > @@ -8683,7 +8676,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) > while (likely(total_packets < budget)) { > union e1000_adv_rx_desc *rx_desc; > struct igb_rx_buffer *rx_buffer; > + ktime_t timestamp = 0; > + int pkt_offset = 0; > unsigned int size; > + void *pktbuf; > > /* return some buffers to hardware, one at a time is too slow */ > if (cleaned_count >= IGB_RX_BUFFER_WRITE) { > @@ -8703,15 +8699,21 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) > dma_rmb(); > > rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); > + pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; > + > + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { > + timestamp = igb_ptp_rx_pktstamp(rx_ring->q_vector, > + pktbuf); > + pkt_offset += IGB_TS_HDR_LEN; > + size -= IGB_TS_HDR_LEN; > + } > > /* retrieve a buffer from the ring */ > if (!skb) { > - unsigned int offset = igb_rx_offset(rx_ring); > - unsigned char *hard_start; > - > - hard_start = page_address(rx_buffer->page) + > - rx_buffer->page_offset - offset; > - xdp_prepare_buff(&xdp, hard_start, offset, size, true); > + xdp.data = pktbuf + pkt_offset; > + xdp.data_end = xdp.data + size; > + xdp.data_meta = xdp.data; > + xdp.data_hard_start = pktbuf - igb_rx_offset(rx_ring); > #if (PAGE_SIZE > 4096) > /* At larger PAGE_SIZE, frame_sz depend on len size */ > xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size); > @@ -8733,10 +8735,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) > } else if (skb) > igb_add_rx_frag(rx_ring, rx_buffer, skb, size); > else if (ring_uses_build_skb(rx_ring)) > - skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc); > + skb = igb_build_skb(rx_ring, rx_buffer, &xdp, > + timestamp); > else > skb = igb_construct_skb(rx_ring, rx_buffer, > - &xdp, rx_desc); > + &xdp, timestamp); > > /* exit if we failed to retrieve a buffer */ > if (!skb) { > diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c > index 86a576201f5f..0cbdf48285d3 100644 > --- a/drivers/net/ethernet/intel/igb/igb_ptp.c > +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c > @@ -863,23 +863,22 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) > * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp > * @q_vector: Pointer to interrupt specific structure > * @va: Pointer to address containing Rx buffer > - * @skb: Buffer containing timestamp and packet > * > * This function is meant to retrieve a timestamp from the first buffer of an > * incoming frame. The value is stored in little endian format starting on > * byte 8 > * > - * Returns: 0 if success, nonzero if failure > + * Returns: 0 on failure, timestamp on success > **/ > -int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, > - struct sk_buff *skb) > +ktime_t igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va) > { > struct igb_adapter *adapter = q_vector->adapter; > + struct skb_shared_hwtstamps ts; > __le64 *regval = (__le64 *)va; > int adjust = 0; > > if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) > - return IGB_RET_PTP_DISABLED; > + return 0; > > /* The timestamp is recorded in little endian format. > * DWORD: 0 1 2 3 > @@ -888,10 +887,9 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, > > /* check reserved dwords are zero, be/le doesn't matter for zero */ > if (regval[0]) > - return IGB_RET_PTP_INVALID; > + return 0; > > - igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), > - le64_to_cpu(regval[1])); > + igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); > > /* adjust timestamp for the RX latency based on link speed */ > if (adapter->hw.mac.type == e1000_i210) { > @@ -907,10 +905,8 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, > break; > } > } > - skb_hwtstamps(skb)->hwtstamp = > - ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); > > - return 0; > + return ktime_sub_ns(ts.hwtstamp, adjust); > } > > /** -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer