Re: [PATCH net-next v9 2/4] ethtool: provide customized dim profile management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 17 Apr 2024 23:55:44 +0800 Heng Qi wrote:
> $ ethtool -c ethx
> ...
> rx-eqe-profile:
> {.usec =   1, .pkts = 256, .comps =   0,},
> {.usec =   8, .pkts = 256, .comps =   0,},
> {.usec =  64, .pkts = 256, .comps =   0,},
> {.usec = 128, .pkts = 256, .comps =   0,},
> {.usec = 256, .pkts = 256, .comps =   0,}
> rx-cqe-profile:   n/a
> tx-eqe-profile:   n/a
> tx-cqe-profile:   n/a

I don't think that exposing CQE vs EQE mode makes much sense here.
We enable the right mode via:

struct kernel_ethtool_coalesce {
	u8 use_cqe_mode_tx;
	u8 use_cqe_mode_rx;

the user needs to set the packets and usecs in an educated way 
(matching the mode). The kernel never changes the mode.

Am I missing something?

> +  -
> +    name: moderation

irq-moderation ?

> +    attributes:
> +      -
> +        name: usec
> +        type: u32
> +      -
> +        name: pkts
> +        type: u32
> +      -
> +        name: comps
> +        type: u32

> +++ b/include/linux/ethtool.h
> @@ -284,7 +284,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
>  #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES	BIT(24)
>  #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES	BIT(25)
>  #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS	BIT(26)
> -#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(26, 0)
> +#define ETHTOOL_COALESCE_RX_EQE_PROFILE         BIT(27)
> +#define ETHTOOL_COALESCE_RX_CQE_PROFILE         BIT(28)
> +#define ETHTOOL_COALESCE_TX_EQE_PROFILE         BIT(29)
> +#define ETHTOOL_COALESCE_TX_CQE_PROFILE         BIT(30)
> +#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(30, 0)

I think it's better to add these to netdev_ops, see below.

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d45f330d083d..a1c7e9c2be86 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -80,6 +80,25 @@ struct xdp_frame;
>  struct xdp_metadata_ops;
>  struct xdp_md;
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)

Don't wrap type definitions in an ifdef.

> +struct dim_cq_moder;

Unnecessary.

> +#define NETDEV_PROFILE_USEC	BIT(0)	/* device supports usec field modification */
> +#define NETDEV_PROFILE_PKTS	BIT(1)	/* device supports pkts field modification */
> +#define NETDEV_PROFILE_COMPS	BIT(2)	/* device supports comps field modification */
> +
> +struct netdev_profile_moder {
> +	/* See NETDEV_PROFILE_* */
> +	unsigned int flags;
> +
> +	/* DIM profile lists for different dim cq modes */
> +	struct dim_cq_moder *rx_eqe_profile;
> +	struct dim_cq_moder *rx_cqe_profile;
> +	struct dim_cq_moder *tx_eqe_profile;
> +	struct dim_cq_moder *tx_cqe_profile;
> +};
> +#endif

All of this can move to the dim header. No need to grow netdevice.h

>  typedef u32 xdp_features_t;
>  
>  void synchronize_net(void);


> +static int dev_dim_profile_init(struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +	u32 supported = dev->ethtool_ops->supported_coalesce_params;
> +
> +	struct netdev_profile_moder *moder;
> +	int length;
> +
> +	dev->moderation = kzalloc(sizeof(*dev->moderation), GFP_KERNEL);
> +	if (!dev->moderation)
> +		goto err_moder;

if the device has no DIM we should allocate nothing

> +	moder = dev->moderation;
> +	length = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_eqe_profile);
> +
> +	if (supported & ETHTOOL_COALESCE_RX_EQE_PROFILE) {
> +		moder->rx_eqe_profile = kmemdup(dim_rx_profile[0], length, GFP_KERNEL);
> +		if (!moder->rx_eqe_profile)
> +			goto err_rx_eqe;
> +	}
> +	if (supported & ETHTOOL_COALESCE_RX_CQE_PROFILE) {
> +		moder->rx_cqe_profile = kmemdup(dim_rx_profile[1], length, GFP_KERNEL);
> +		if (!moder->rx_cqe_profile)
> +			goto err_rx_cqe;
> +	}
> +	if (supported & ETHTOOL_COALESCE_TX_EQE_PROFILE) {
> +		moder->tx_eqe_profile = kmemdup(dim_tx_profile[0], length, GFP_KERNEL);
> +		if (!moder->tx_eqe_profile)
> +			goto err_tx_eqe;
> +	}
> +	if (supported & ETHTOOL_COALESCE_TX_CQE_PROFILE) {
> +		moder->tx_cqe_profile = kmemdup(dim_tx_profile[1], length, GFP_KERNEL);
> +		if (!moder->tx_cqe_profile)
> +			goto err_tx_cqe;
> +	}

This code should also live in dim rather than dev.c.

> +#endif
> +	return 0;
> +
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +err_tx_cqe:
> +	kfree(moder->tx_eqe_profile);
> +err_tx_eqe:
> +	kfree(moder->rx_cqe_profile);
> +err_rx_cqe:
> +	kfree(moder->rx_eqe_profile);
> +err_rx_eqe:
> +	kfree(moder);
> +err_moder:
> +	return -ENOMEM;
> +#endif
> +}
> +
>  /**
>   * register_netdevice() - register a network device
>   * @dev: device to register
> @@ -10258,6 +10310,10 @@ int register_netdevice(struct net_device *dev)
>  	if (ret)
>  		return ret;
>  
> +	ret = dev_dim_profile_init(dev);
> +	if (ret)
> +		return ret;

This is fine but the driver still has to manually do bunch of init:

		INIT_WORK(&vi->rq[i].dim.work, virtnet_rx_dim_work);
		vi->rq[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;

It'd be better to collect all this static info (flags, mode, work func)
in one place / struct, attached to netdev or netdev_ops. Then the
driver can call a helper like which only needs to take netdev and dim
as arguments.

>  	spin_lock_init(&dev->addr_list_lock);
>  	netdev_set_addr_lockdep_class(dev);
>  
> @@ -11011,6 +11067,27 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>  }
>  EXPORT_SYMBOL(alloc_netdev_mqs);
>  
> +static void netif_free_profile(struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +	u32 supported = dev->ethtool_ops->supported_coalesce_params;
> +
> +	if (supported & ETHTOOL_COALESCE_RX_EQE_PROFILE)
> +		kfree(dev->moderation->rx_eqe_profile);

kfree(NULL) is valid, you don't have to check the flags

> +	if (supported & ETHTOOL_COALESCE_RX_CQE_PROFILE)
> +		kfree(dev->moderation->rx_cqe_profile);
> +
> +	if (supported & ETHTOOL_COALESCE_TX_EQE_PROFILE)
> +		kfree(dev->moderation->tx_eqe_profile);
> +
> +	if (supported & ETHTOOL_COALESCE_TX_CQE_PROFILE)
> +		kfree(dev->moderation->tx_cqe_profile);
> +
> +	kfree(dev->moderation);
> +#endif
> +}
> +
>  /**
>   * free_netdev - free network device
>   * @dev: device
> @@ -11036,6 +11113,8 @@ void free_netdev(struct net_device *dev)
>  		return;
>  	}
>  
> +	netif_free_profile(dev);
> +
>  	netif_free_tx_queues(dev);
>  	netif_free_rx_queues(dev);
>  
> diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
> index 83112c1a71ae..3a41840fbcc7 100644
> --- a/net/ethtool/coalesce.c
> +++ b/net/ethtool/coalesce.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  
> +#include <linux/dim.h>
>  #include "netlink.h"
>  #include "common.h"
>  
> @@ -51,6 +52,10 @@ __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_RATE_SAMPLE_INTERVAL);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_RX_EQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_RX_CQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_TX_EQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_TX_CQE_PROFILE);
>  
>  const struct nla_policy ethnl_coalesce_get_policy[] = {
>  	[ETHTOOL_A_COALESCE_HEADER]		=
> @@ -82,6 +87,14 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
>  static int coalesce_reply_size(const struct ethnl_req_info *req_base,
>  			       const struct ethnl_reply_data *reply_base)
>  {
> +	int modersz = nla_total_size(0) + /* _MODERATIONS_MODERATION, nest */
> +		      nla_total_size(sizeof(u32)) + /* _MODERATION_USEC */
> +		      nla_total_size(sizeof(u32)) + /* _MODERATION_PKTS */
> +		      nla_total_size(sizeof(u32));  /* _MODERATION_COMPS */
> +
> +	int total_modersz = nla_total_size(0) +  /* _{R,T}X_{E,C}QE_PROFILE, nest */
> +			modersz * NET_DIM_PARAMS_NUM_PROFILES;
> +
>  	return nla_total_size(sizeof(u32)) +	/* _RX_USECS */
>  	       nla_total_size(sizeof(u32)) +	/* _RX_MAX_FRAMES */
>  	       nla_total_size(sizeof(u32)) +	/* _RX_USECS_IRQ */
> @@ -108,7 +121,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
>  	       nla_total_size(sizeof(u8)) +	/* _USE_CQE_MODE_RX */
>  	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_MAX_BYTES */
>  	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_MAX_FRAMES */
> -	       nla_total_size(sizeof(u32));	/* _TX_AGGR_TIME_USECS */
> +	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_TIME_USECS */
> +	       total_modersz * 4;		/* _{R,T}X_{E,C}QE_PROFILE */
>  }
>  
>  static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
> @@ -127,6 +141,62 @@ static bool coalesce_put_bool(struct sk_buff *skb, u16 attr_type, u32 val,
>  	return nla_put_u8(skb, attr_type, !!val);
>  }
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)

prefer using

	if (IS_ENABLED(CONFIG_DIMLIB))
		return -EOPNOTSUPP;

rather than hiding code from the compiler, where possible 

> +/**
> + * coalesce_put_profile - fill reply with a nla nest with four child nla nests.
> + * @skb: socket buffer the message is stored in
> + * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_*QE_PROFILE
> + * @profile: data passed to userspace
> + * @supported_params: modifiable parameters supported by the driver
> + *
> + * Put a dim profile nest attribute. Refer to ETHTOOL_A_MODERATIONS_MODERATION.
> + *
> + * Return: false to indicate successful placement or no placement, and
> + * true to pass the -EMSGSIZE error to the wrapper.

Why the bool? Doesn't most of the similar code return the error?

> + */
> +static bool coalesce_put_profile(struct sk_buff *skb, u16 attr_type,
> +				 const struct dim_cq_moder *profile,
> +				 u32 supported_params)
> +{
> +	struct nlattr *profile_attr, *moder_attr;
> +	bool emsg = !!-EMSGSIZE;
> +	int i;
> +
> +	if (!profile)
> +		return false;
> +
> +	if (!(supported_params & attr_to_mask(attr_type)))
> +		return false;
> +
> +	profile_attr = nla_nest_start(skb, attr_type);
> +	if (!profile_attr)
> +		return emsg;
> +
> +	for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) {
> +		moder_attr = nla_nest_start(skb, ETHTOOL_A_MODERATIONS_MODERATION);
> +		if (!moder_attr)
> +			goto nla_cancel_profile;
> +
> +		if (nla_put_u32(skb, ETHTOOL_A_MODERATION_USEC, profile[i].usec) ||
> +		    nla_put_u32(skb, ETHTOOL_A_MODERATION_PKTS, profile[i].pkts) ||
> +		    nla_put_u32(skb, ETHTOOL_A_MODERATION_COMPS, profile[i].comps))

Only put attrs for supported fields

> +			goto nla_cancel_moder;
> +
> +		nla_nest_end(skb, moder_attr);
> +	}
> +
> +	nla_nest_end(skb, profile_attr);
> +
> +	return 0;
> +
> +nla_cancel_moder:
> +	nla_nest_cancel(skb, moder_attr);
> +nla_cancel_profile:
> +	nla_nest_cancel(skb, profile_attr);
> +	return emsg;
> +}
> +#endif
> +
>  static int coalesce_fill_reply(struct sk_buff *skb,
>  			       const struct ethnl_req_info *req_base,
>  			       const struct ethnl_reply_data *reply_base)
> @@ -134,6 +204,9 @@ static int coalesce_fill_reply(struct sk_buff *skb,
>  	const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
>  	const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
>  	const struct ethtool_coalesce *coal = &data->coalesce;
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +	struct net_device *dev = req_base->dev;
> +#endif
>  	u32 supported = data->supported_params;
>  
>  	if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS,
> @@ -192,6 +265,21 @@ static int coalesce_fill_reply(struct sk_buff *skb,
>  			     kcoal->tx_aggr_time_usecs, supported))
>  		return -EMSGSIZE;
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +	if (!(dev->moderation->flags & (NETDEV_PROFILE_USEC | NETDEV_PROFILE_PKTS |
> +					NETDEV_PROFILE_COMPS)))
> +		return 0;
> +
> +	if (coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_EQE_PROFILE,
> +				 dev->moderation->rx_eqe_profile, supported) ||
> +	    coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_CQE_PROFILE,
> +				 dev->moderation->rx_cqe_profile, supported) ||
> +	    coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_EQE_PROFILE,
> +				 dev->moderation->tx_eqe_profile, supported) ||
> +	    coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_CQE_PROFILE,
> +				 dev->moderation->tx_cqe_profile, supported))
> +		return -EMSGSIZE;
> +#endif
>  	return 0;
>  }
>  
> @@ -227,7 +315,19 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
>  	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
>  	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
>  	[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
> +	[ETHTOOL_A_COALESCE_RX_EQE_PROFILE]     = { .type = NLA_NESTED },
> +	[ETHTOOL_A_COALESCE_RX_CQE_PROFILE]     = { .type = NLA_NESTED },
> +	[ETHTOOL_A_COALESCE_TX_EQE_PROFILE]     = { .type = NLA_NESTED },
> +	[ETHTOOL_A_COALESCE_TX_CQE_PROFILE]     = { .type = NLA_NESTED },

NLA_POLICY_NESTED(coalesce_set_profile_policy)

> +};
> +
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +static const struct nla_policy coalesce_set_profile_policy[] = {
> +	[ETHTOOL_A_MODERATION_USEC]	= {.type = NLA_U32},
> +	[ETHTOOL_A_MODERATION_PKTS]	= {.type = NLA_U32},
> +	[ETHTOOL_A_MODERATION_COMPS]	= {.type = NLA_U32},
>  };
> +#endif
>  
>  static int
>  ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
> @@ -253,6 +353,76 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
>  	return 1;
>  }
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +/**
> + * ethnl_update_profile - get a nla nest with four child nla nests from userspace.
> + * @dev: netdevice to update the profile
> + * @dst: data get from the driver and modified by ethnl_update_profile.
> + * @nests: nest attr ETHTOOL_A_COALESCE_*X_*QE_PROFILE to set driver's profile.
> + * @extack: Netlink extended ack
> + *
> + * Layout of nests:
> + *   Nested ETHTOOL_A_COALESCE_*X_*QE_PROFILE attr
> + *     Nested ETHTOOL_A_MODERATIONS_MODERATION attr
> + *       ETHTOOL_A_MODERATION_USEC attr
> + *       ETHTOOL_A_MODERATION_PKTS attr
> + *       ETHTOOL_A_MODERATION_COMPS attr
> + *     ...
> + *     Nested ETHTOOL_A_MODERATIONS_MODERATION attr
> + *       ETHTOOL_A_MODERATION_USEC attr
> + *       ETHTOOL_A_MODERATION_PKTS attr
> + *       ETHTOOL_A_MODERATION_COMPS attr
> + *
> + * Return: 0 on success or a negative error code.
> + */
> +static int ethnl_update_profile(struct net_device *dev,
> +				struct dim_cq_moder *dst,
> +				const struct nlattr *nests,
> +				struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb_moder[ARRAY_SIZE(coalesce_set_profile_policy)];
> +	struct dim_cq_moder profile[NET_DIM_PARAMS_NUM_PROFILES];
> +	struct netdev_profile_moder *moder = dev->moderation;
> +	struct nlattr *nest;
> +	int ret, rem, i = 0;
> +
> +	if (!nests)
> +		return 0;
> +
> +	if (!dst)
> +		return -EOPNOTSUPP;
> +
> +	nla_for_each_nested_type(nest, ETHTOOL_A_MODERATIONS_MODERATION, nests, rem) {
> +		ret = nla_parse_nested(tb_moder,
> +				       ARRAY_SIZE(coalesce_set_profile_policy) - 1,
> +				       nest, coalesce_set_profile_policy,
> +				       extack);
> +		if (ret)
> +			return ret;
> +
> +		if (NL_REQ_ATTR_CHECK(extack, nest, tb_moder, ETHTOOL_A_MODERATION_USEC) ||
> +		    NL_REQ_ATTR_CHECK(extack, nest, tb_moder, ETHTOOL_A_MODERATION_PKTS) ||
> +		    NL_REQ_ATTR_CHECK(extack, nest, tb_moder, ETHTOOL_A_MODERATION_COMPS))

Only require what the device supports, reject what it doesn't

> +			return -EINVAL;
> +
> +		profile[i].usec = nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_USEC]);
> +		profile[i].pkts = nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_PKTS]);
> +		profile[i].comps = nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_COMPS]);
> +
> +		if ((dst[i].usec != profile[i].usec && !(moder->flags & NETDEV_PROFILE_USEC)) ||
> +		    (dst[i].pkts != profile[i].pkts && !(moder->flags & NETDEV_PROFILE_PKTS)) ||
> +		    (dst[i].comps != profile[i].comps && !(moder->flags & NETDEV_PROFILE_COMPS)))
> +			return -EOPNOTSUPP;
> +
> +		i++;
> +	}
> +
> +	memcpy(dst, profile, sizeof(profile));

Is this safe? I think you need to use some synchronization when
swapping profiles, maybe RCU?..

> +	return 0;
> +}
> +#endif
> +
>  static int
>  __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
>  		     bool *dual_change)
> @@ -317,6 +487,35 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
>  	ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
>  			 tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +	ret = ethnl_update_profile(dev, dev->moderation->rx_eqe_profile,
> +				   tb[ETHTOOL_A_COALESCE_RX_EQE_PROFILE],
> +				   info->extack);
> +	if (ret < 0)
> +		return ret;
> +	ret = ethnl_update_profile(dev, dev->moderation->rx_cqe_profile,
> +				   tb[ETHTOOL_A_COALESCE_RX_CQE_PROFILE],
> +				   info->extack);
> +	if (ret < 0)
> +		return ret;
> +	ret = ethnl_update_profile(dev, dev->moderation->tx_eqe_profile,
> +				   tb[ETHTOOL_A_COALESCE_TX_EQE_PROFILE],
> +				   info->extack);
> +	if (ret < 0)
> +		return ret;
> +	ret = ethnl_update_profile(dev, dev->moderation->tx_cqe_profile,
> +				   tb[ETHTOOL_A_COALESCE_TX_CQE_PROFILE],
> +				   info->extack);
> +	if (ret < 0)
> +		return ret;
> +#else
> +	if (tb[ETHTOOL_A_COALESCE_RX_EQE_PROFILE] ||
> +	    tb[ETHTOOL_A_COALESCE_RX_CQE_PROFILE] ||
> +	    tb[ETHTOOL_A_COALESCE_TX_EQE_PROFILE] ||
> +	    tb[ETHTOOL_A_COALESCE_TX_CQE_PROFILE])
> +		return -EOPNOTSUPP;
> +
> +#endif
>  	/* Update operation modes */
>  	ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce,
>  			    tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode);





[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux