On Mon, Aug 14, 2023 at 11:05 AM Stanislav Fomichev <sdf@xxxxxxxxxx> wrote: > > On Mon, Aug 14, 2023 at 3:57 AM Maciej Fijalkowski > <maciej.fijalkowski@xxxxxxxxx> wrote: > > > > On Wed, Aug 09, 2023 at 09:54:10AM -0700, Stanislav Fomichev wrote: > > > For zerocopy mode, tx_desc->addr can point to the arbitrary offset > > > and carry some TX metadata in the headroom. For copy mode, there > > > is no way currently to populate skb metadata. > > > > > > Introduce new XDP_TX_METADATA_LEN that indicates how many bytes > > > to treat as metadata. Metadata bytes come prior to tx_desc address > > > (same as in RX case). > > > > > > The size of the metadata has the same constraints as XDP: > > > - less than 256 bytes > > > - 4-byte aligned > > > - non-zero > > > > > > This data is not interpreted in any way right now. > > > > > > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> > > > --- > > > include/net/xdp_sock.h | 1 + > > > include/net/xsk_buff_pool.h | 1 + > > > include/uapi/linux/if_xdp.h | 1 + > > > net/xdp/xsk.c | 20 ++++++++++++++++++++ > > > net/xdp/xsk_buff_pool.c | 1 + > > > net/xdp/xsk_queue.h | 17 ++++++++++------- > > > 6 files changed, 34 insertions(+), 7 deletions(-) > > > > > > diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h > > > index 1617af380162..467b9fb56827 100644 > > > --- a/include/net/xdp_sock.h > > > +++ b/include/net/xdp_sock.h > > > @@ -51,6 +51,7 @@ struct xdp_sock { > > > struct list_head flush_node; > > > struct xsk_buff_pool *pool; > > > u16 queue_id; > > > + u8 tx_metadata_len; > > > bool zc; > > > bool sg; > > > enum { > > > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h > > > index b0bdff26fc88..9c31e8d1e198 100644 > > > --- a/include/net/xsk_buff_pool.h > > > +++ b/include/net/xsk_buff_pool.h > > > @@ -77,6 +77,7 @@ struct xsk_buff_pool { > > > u32 chunk_size; > > > u32 chunk_shift; > > > u32 frame_len; > > > + u8 tx_metadata_len; /* inherited from xsk_sock */ > > > u8 cached_need_wakeup; > > > bool uses_need_wakeup; > > > bool dma_need_sync; > > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h > > > index 8d48863472b9..b37b50102e1c 100644 > > > --- a/include/uapi/linux/if_xdp.h > > > +++ b/include/uapi/linux/if_xdp.h > > > @@ -69,6 +69,7 @@ struct xdp_mmap_offsets { > > > #define XDP_UMEM_COMPLETION_RING 6 > > > #define XDP_STATISTICS 7 > > > #define XDP_OPTIONS 8 > > > +#define XDP_TX_METADATA_LEN 9 > > > > > > struct xdp_umem_reg { > > > __u64 addr; /* Start of packet data area */ > > > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c > > > index 47796a5a79b3..28df3280501d 100644 > > > --- a/net/xdp/xsk.c > > > +++ b/net/xdp/xsk.c > > > @@ -1338,6 +1338,26 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, > > > mutex_unlock(&xs->mutex); > > > return err; > > > } > > > + case XDP_TX_METADATA_LEN: > > > + { > > > + int val; > > > + > > > + if (optlen < sizeof(val)) > > > + return -EINVAL; > > > + if (copy_from_sockptr(&val, optval, sizeof(val))) > > > + return -EFAULT; > > > + if (!val || val > 256 || val % 4) > > > + return -EINVAL; > > > + > > > + mutex_lock(&xs->mutex); > > > + if (xs->state != XSK_READY) { > > > + mutex_unlock(&xs->mutex); > > > + return -EBUSY; > > > + } > > > + xs->tx_metadata_len = val; > > > + mutex_unlock(&xs->mutex); > > > + return 0; > > > + } > > > default: > > > break; > > > } > > > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c > > > index b3f7b310811e..b351732f1032 100644 > > > --- a/net/xdp/xsk_buff_pool.c > > > +++ b/net/xdp/xsk_buff_pool.c > > > @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, > > > XDP_PACKET_HEADROOM; > > > pool->umem = umem; > > > pool->addrs = umem->addrs; > > > + pool->tx_metadata_len = xs->tx_metadata_len; > > > > Hey Stan, > > > > what would happen in case when one socket sets pool->tx_metadata_len say > > to 16 and the other one that is sharing the pool to 24? If sockets should > > and can have different padding, then this will not work unless the > > metadata_len is on a per socket level. > > Hmm, good point. I didn't think about umem sharing :-/ > Maybe they all have to agree on the size? And once the size has been > size by one socket, the same size should be set on the others? (or at > least be implied that the other sockets will use the same metadata > layout) Thinking about it a bit more: should that tx_metadata_len be part of a umem then? Users can provide it as part of xdp_umem_reg which is probably a better place to pass it compared to the setsockopt?