On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote: > In order to use XDP hints via kfuncs we need to put > RX descriptor and ring pointers just next to xdp_buff. > Same as in hints implementations in other drivers, we achieve > this through putting xdp_buff into a child structure. Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff if i'm reading this right. > > Currently, xdp_buff is stored in the ring structure, > so replace it with union that includes child structure. > This way enough memory is available while existing XDP code > remains isolated from hints. > > Minimum size of the new child structure (ice_xdp_buff) is exactly > 64 bytes (single cache line). To place it at the start of a cache line, > move 'next' field from CL1 to CL3, as it isn't used often. This still > leaves 128 bits available in CL3 for packet context extensions. I believe ice_xdp_buff will be beefed up in later patches, so what is the point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single CL anyway. > > Signed-off-by: Larysa Zaremba <larysa.zaremba@xxxxxxxxx> > --- > drivers/net/ethernet/intel/ice/ice_txrx.c | 7 +++-- > drivers/net/ethernet/intel/ice/ice_txrx.h | 26 ++++++++++++++++--- > drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++ > 3 files changed, 38 insertions(+), 5 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c > index 40f2f6dabb81..4e6546d9cf85 100644 > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) > * @xdp_prog: XDP program to run > * @xdp_ring: ring to be used for XDP_TX action > * @rx_buf: Rx buffer to store the XDP action > + * @eop_desc: Last descriptor in packet to read metadata from > * > * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} > */ > static void > ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, > struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, > - struct ice_rx_buf *rx_buf) > + struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) > { > unsigned int ret = ICE_XDP_PASS; > u32 act; > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, > if (!xdp_prog) > goto exit; > > + ice_xdp_meta_set_desc(xdp, eop_desc); I am currently not sure if for multi-buffer case HW repeats all the necessary info within each descriptor for every frag? IOW shouldn't you be using the ice_rx_ring::first_desc? Would be good to test hints for mbuf case for sure. > + > act = bpf_prog_run_xdp(xdp_prog, xdp); > switch (act) { > case XDP_PASS: > @@ -1240,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) > if (ice_is_non_eop(rx_ring, rx_desc)) > continue; > > - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); > + ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); > if (rx_buf->act == ICE_XDP_PASS) > goto construct_skb; > total_rx_bytes += xdp_get_buff_len(xdp); > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h > index 166413fc33f4..d0ab2c4c0c91 100644 > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h > @@ -257,6 +257,18 @@ enum ice_rx_dtype { > ICE_RX_DTYPE_SPLIT_ALWAYS = 2, > }; > > +struct ice_pkt_ctx { > + const union ice_32b_rx_flex_desc *eop_desc; > +}; > + > +struct ice_xdp_buff { > + struct xdp_buff xdp_buff; > + struct ice_pkt_ctx pkt_ctx; > +}; > + > +/* Required for compatibility with xdp_buffs from xsk_pool */ > +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0); > + > /* indices into GLINT_ITR registers */ > #define ICE_RX_ITR ICE_IDX_ITR0 > #define ICE_TX_ITR ICE_IDX_ITR1 > @@ -298,7 +310,6 @@ enum ice_dynamic_itr { > /* descriptor ring, associated with a VSI */ > struct ice_rx_ring { > /* CL1 - 1st cacheline starts here */ > - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ > void *desc; /* Descriptor ring memory */ > struct device *dev; /* Used for DMA mapping */ > struct net_device *netdev; /* netdev ring maps to */ > @@ -310,12 +321,19 @@ struct ice_rx_ring { > u16 count; /* Number of descriptors */ > u16 reg_idx; /* HW register index of the ring */ > u16 next_to_alloc; > - /* CL2 - 2nd cacheline starts here */ > + > union { > struct ice_rx_buf *rx_buf; > struct xdp_buff **xdp_buf; > }; > - struct xdp_buff xdp; > + /* CL2 - 2nd cacheline starts here */ > + union { > + struct ice_xdp_buff xdp_ext; > + struct { > + struct xdp_buff xdp; > + struct ice_pkt_ctx pkt_ctx; > + }; > + }; > /* CL3 - 3rd cacheline starts here */ > struct bpf_prog *xdp_prog; > u16 rx_offset; > @@ -325,6 +343,8 @@ struct ice_rx_ring { > u16 next_to_clean; > u16 first_desc; > > + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ > + > /* stats structs */ > struct ice_ring_stats *ring_stats; > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h > index e1d49e1235b3..145883eec129 100644 > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h > @@ -151,4 +151,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, > struct sk_buff *skb); > void > ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); > + > +static inline void > +ice_xdp_meta_set_desc(struct xdp_buff *xdp, > + union ice_32b_rx_flex_desc *eop_desc) > +{ > + struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff, > + xdp_buff); > + > + xdp_ext->pkt_ctx.eop_desc = eop_desc; > +} > #endif /* !_ICE_TXRX_LIB_H_ */ > -- > 2.41.0 >