On Tue, Feb 14, 2023 at 06:00:18PM +0530, Tirthendu Sarkar wrote: > This patch adds multi-buffer support for the i40e_driver. > > i40e_clean_rx_irq() is modified to collate all the buffers of a packet > before calling the XDP program. xdp_buff is built for the first frag of > the packet and subsequent frags are added to it. 'next_to_process' is > incremented for all non-EOP frags while 'next_to_clean' stays at the > first descriptor of the packet. XDP program is called only on receiving > EOP frag. > > New functions are added for adding frags to xdp_buff and for post > processing of the buffers once the xdp prog has run. For XDP_PASS this > results in a skb with multiple fragments. > > i40e_build_skb() builds the skb around xdp buffer that already contains > frags data. So i40e_add_rx_frag() helper function is now removed. Since > fields before 'dataref' in skb_shared_info are cleared during > napi_skb_build(), xdp_update_skb_shared_info() is called to set those. > > For i40e_construct_skb(), all the frags data needs to be copied from > xdp_buffer's shared_skb_info to newly constructed skb's shared_skb_info. > > This also means 'skb' does not need to be preserved across i40e_napi_poll() > calls and hence is removed from i40e_ring structure. > > Previously i40e_alloc_rx_buffers() was called for every 32 cleaned > buffers. For multi-buffers this may not be optimal as there may be more > cleaned buffers in each i40e_clean_rx_irq() call. So this is now called > when at least half of the ring size has been cleaned. Please align this patch with xdp_features update > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@xxxxxxxxx> > --- > drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +- > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 314 +++++++++++++------- > drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 - > 3 files changed, 209 insertions(+), 117 deletions(-) > (...) > } > > +/** > + * i40e_add_xdp_frag: Add a frag to xdp_buff > + * @xdp: xdp_buff pointing to the data > + * @nr_frags: return number of buffers for the packet > + * @rx_buffer: rx_buffer holding data of the current frag > + * @size: size of data of current frag > + */ > +static int i40e_add_xdp_frag(struct xdp_buff *xdp, u32 *nr_frags, > + struct i40e_rx_buffer *rx_buffer, u32 size) > +{ > + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); > + > + if (!xdp_buff_has_frags(xdp)) { > + sinfo->nr_frags = 0; > + sinfo->xdp_frags_size = 0; > + xdp_buff_set_frags_flag(xdp); > + } else if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) { > + /* Overflowing packet: All frags need to be dropped */ > + return -ENOMEM; nit: double space > + } > + > + __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buffer->page, > + rx_buffer->page_offset, size); > + > + sinfo->xdp_frags_size += size; > + > + if (page_is_pfmemalloc(rx_buffer->page)) > + xdp_buff_set_frag_pfmemalloc(xdp); > + *nr_frags = sinfo->nr_frags; > + > + return 0; > +} > + > +/** > + * i40e_consume_xdp_buff - Consume all the buffers of the packet and update ntc > + * @rx_ring: rx descriptor ring to transact packets on > + * @xdp: xdp_buff pointing to the data > + * @rx_buffer: rx_buffer of eop desc > + */ > +static void i40e_consume_xdp_buff(struct i40e_ring *rx_ring, > + struct xdp_buff *xdp, > + struct i40e_rx_buffer *rx_buffer) > +{ > + i40e_process_rx_buffs(rx_ring, I40E_XDP_CONSUMED, xdp); > + i40e_put_rx_buffer(rx_ring, rx_buffer); > + rx_ring->next_to_clean = rx_ring->next_to_process; > + xdp->data = NULL; > +} > + > /** > * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf > * @rx_ring: rx descriptor ring to transact packets on > @@ -2405,9 +2495,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, > { > unsigned int total_rx_bytes = 0, total_rx_packets = 0; > u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); > + u16 clean_threshold = rx_ring->count / 2; > unsigned int offset = rx_ring->rx_offset; > struct xdp_buff *xdp = &rx_ring->xdp; > - struct sk_buff *skb = rx_ring->skb; > unsigned int xdp_xmit = 0; > struct bpf_prog *xdp_prog; > bool failure = false; > @@ -2419,11 +2509,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, > u16 ntp = rx_ring->next_to_process; > struct i40e_rx_buffer *rx_buffer; > union i40e_rx_desc *rx_desc; > + struct sk_buff *skb; > unsigned int size; > + u32 nfrags = 0; > + bool neop; > u64 qword; > > /* return some buffers to hardware, one at a time is too slow */ > - if (cleaned_count >= I40E_RX_BUFFER_WRITE) { > + if (cleaned_count >= clean_threshold) { > failure = failure || > i40e_alloc_rx_buffers(rx_ring, cleaned_count); > cleaned_count = 0; > @@ -2461,76 +2554,83 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, > break; > > i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp); > + /* retrieve a buffer from the ring */ > rx_buffer = i40e_get_rx_buffer(rx_ring, size); > > - /* retrieve a buffer from the ring */ > - if (!skb) { > + neop = i40e_is_non_eop(rx_ring, rx_desc); > + i40e_inc_ntp(rx_ring); > + > + if (!xdp->data) { > unsigned char *hard_start; > > hard_start = page_address(rx_buffer->page) + > rx_buffer->page_offset - offset; > xdp_prepare_buff(xdp, hard_start, offset, size, true); > - xdp_buff_clear_frags_flag(xdp); > #if (PAGE_SIZE > 4096) > /* At larger PAGE_SIZE, frame_sz depend on len size */ > xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size); > #endif > - xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog); > + } else if (i40e_add_xdp_frag(xdp, &nfrags, rx_buffer, size) && > + !neop) { > + /* Overflowing packet: Drop all frags on EOP */ > + i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer); > + break; > } > > + if (neop) > + continue; > + > + xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog); > + > if (xdp_res) { > - if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { > - xdp_xmit |= xdp_res; > + xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); what was wrong with having above included in the } else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { branch? > + > + if (unlikely(xdp_buff_has_frags(xdp))) { > + i40e_process_rx_buffs(rx_ring, xdp_res, xdp); > + size = xdp_get_buff_len(xdp); > + } else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { > i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); > } else { > rx_buffer->pagecnt_bias++; > } > total_rx_bytes += size; > - total_rx_packets++; > - } else if (skb) { > - i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); > - } else if (ring_uses_build_skb(rx_ring)) { > - skb = i40e_build_skb(rx_ring, rx_buffer, xdp); > } else { > - skb = i40e_construct_skb(rx_ring, rx_buffer, xdp); > - } > + if (ring_uses_build_skb(rx_ring)) > + skb = i40e_build_skb(rx_ring, xdp, nfrags); > + else > + skb = i40e_construct_skb(rx_ring, xdp, nfrags); > + > + /* drop if we failed to retrieve a buffer */ > + if (!skb) { > + rx_ring->rx_stats.alloc_buff_failed++; > + i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer); > + break; > + } > > - /* exit if we failed to retrieve a buffer */ > - if (!xdp_res && !skb) { > - rx_ring->rx_stats.alloc_buff_failed++; > - rx_buffer->pagecnt_bias++; > - break; > - } > + if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) > + goto process_next; > > - i40e_put_rx_buffer(rx_ring, rx_buffer); > - cleaned_count++; > + /* probably a little skewed due to removing CRC */ > + total_rx_bytes += skb->len; > > - i40e_inc_ntp(rx_ring); > - rx_ring->next_to_clean = rx_ring->next_to_process; > - if (i40e_is_non_eop(rx_ring, rx_desc)) > - continue; > + /* populate checksum, VLAN, and protocol */ > + i40e_process_skb_fields(rx_ring, rx_desc, skb); > > - if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) { > - skb = NULL; > - continue; > + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp); > + napi_gro_receive(&rx_ring->q_vector->napi, skb); > } > > - /* probably a little skewed due to removing CRC */ > - total_rx_bytes += skb->len; > - > - /* populate checksum, VLAN, and protocol */ > - i40e_process_skb_fields(rx_ring, rx_desc, skb); > - > - i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp); > - napi_gro_receive(&rx_ring->q_vector->napi, skb); > - skb = NULL; > - > /* update budget accounting */ > total_rx_packets++; > +process_next: > + cleaned_count += nfrags + 1; > + i40e_put_rx_buffer(rx_ring, rx_buffer); > + rx_ring->next_to_clean = rx_ring->next_to_process; > + > + xdp->data = NULL; > } > > i40e_finalize_xdp_rx(rx_ring, xdp_xmit); > - rx_ring->skb = skb; > > i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h > index e86abc25bb5e..14ad074639ab 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h > @@ -393,14 +393,6 @@ struct i40e_ring { > > struct rcu_head rcu; /* to avoid race on free */ > u16 next_to_alloc; > - struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must > - * return before it sees the EOP for > - * the current packet, we save that skb > - * here and resume receiving this > - * packet the next time > - * i40e_clean_rx_ring_irq() is called > - * for this ring. > - */ this comment was valuable to me back when i was getting started with i40e, so maybe we could have something equivalent around xdp_buff now? > > struct i40e_channel *ch; > u16 rx_offset; > -- > 2.34.1 >