Re: [xdp-hints] [RFC net-next v4 2/8] xsk: add TX timestamp and TX checksum offload support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 25/07/2023 01.59, Stanislav Fomichev wrote:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 11652e464f5d..8b40c80557aa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1660,6 +1660,31 @@ struct xdp_metadata_ops {
  			       enum xdp_rss_hash_type *rss_type);
  };
+/*
+ * This structure defines the AF_XDP TX metadata hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * int (*tmo_request_timestamp)(void *priv)
+ *     This function is called when AF_XDP frame requested egress timestamp.
+ *
+ * int (*tmo_fill_timestamp)(void *priv)
+ *     This function is called when AF_XDP frame, that had requested
+ *     egress timestamp, received a completion. The hook needs to return
+ *     the actual HW timestamp.
+ *
+ * int (*tmo_request_timestamp)(u16 csum_start, u16 csum_offset, void *priv)
+ *     This function is called when AF_XDP frame requested HW checksum
+ *     offload. csum_start indicates position where checksumming should start.
+ *     csum_offset indicates position where checksum should be stored.
+ *
+ */
+struct xsk_tx_metadata_ops {
+	void	(*tmo_request_timestamp)(void *priv);
+	u64	(*tmo_fill_timestamp)(void *priv);
+	void	(*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+};
+
  /**
   * enum netdev_priv_flags - &struct net_device priv_flags
   *
@@ -1844,6 +1869,7 @@ enum netdev_ml_priv_type {
   *	@netdev_ops:	Includes several pointers to callbacks,
   *			if one wants to override the ndo_*() functions
   *	@xdp_metadata_ops:	Includes pointers to XDP metadata callbacks.
+ *	@xsk_tx_metadata_ops:	Includes pointers to AF_XDP TX metadata callbacks.
   *	@ethtool_ops:	Management operations
   *	@l3mdev_ops:	Layer 3 master device operations
   *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
@@ -2100,6 +2126,7 @@ struct net_device {
  	unsigned long long	priv_flags;
  	const struct net_device_ops *netdev_ops;
  	const struct xdp_metadata_ops *xdp_metadata_ops;
+	const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
  	int			ifindex;
  	unsigned short		gflags;
  	unsigned short		hard_header_len;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index faaba050f843..5febc1a5131e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -581,7 +581,10 @@ struct skb_shared_info {
  	/* Warning: this field is not always filled in (UFO)! */
  	unsigned short	gso_segs;
  	struct sk_buff	*frag_list;
-	struct skb_shared_hwtstamps hwtstamps;
+	union {
+		struct skb_shared_hwtstamps hwtstamps;
+		struct xsk_tx_metadata *xsk_meta;
+	};
  	unsigned int	gso_type;
  	u32		tskey;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 467b9fb56827..288fa58c4665 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -90,6 +90,54 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
  int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
  void __xsk_map_flush(void);
+/**
+ *  xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
+ *  and call appropriate xsk_tx_metadata_ops operation.
+ *  @meta: pointer to AF_XDP metadata area
+ *  @ops: pointer to struct xsk_tx_metadata_ops
+ *  @priv: pointer to driver-private aread
+ *
+ *  This function should be called by the networking device when
+ *  it prepares AF_XDP egress packet.
+ */
+static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
+					   const struct xsk_tx_metadata_ops *ops,
+					   void *priv)

(As you mentioned) this gets inlined in drivers for performance.

+{
+	if (!meta)
+		return;
+
+	if (ops->tmo_request_timestamp)
+		if (meta->flags & XDP_TX_METADATA_TIMESTAMP)
+			ops->tmo_request_timestamp(priv);

We still have the overhead of function pointer call.
With RETPOLINE this is costly.

Measured on my testlab CPU E5-1650 v4 @ 3.60GHz
 Type:function_call_cost:  3 cycles(tsc) 1.010 ns
 Type:func_ptr_call_cost: 30 cycles(tsc) 8.341 ns

Given this get inlined in drivers, perhaps we can add some
INDIRECT_CALL_1 macros to avoid these indirect calls?

+
+	if (ops->tmo_request_checksum)
+		if (meta->flags & XDP_TX_METADATA_CHECKSUM)
+			ops->tmo_request_checksum(meta->csum_start, meta->csum_offset, priv);
+}
+





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux