Re: [net-next v3 1/2] devlink: Support setting max_io_eqs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2024-04-04 11:50, Parav Pandit wrote:
> 
> 
>> From: David Wei <dw@xxxxxxxxxxx>
>> Sent: Friday, April 5, 2024 12:10 AM
>>
>> On 2024-04-04 10:58, Parav Pandit wrote:
>>> Hi David,
>>>
>>>> From: David Wei <dw@xxxxxxxxxxx>
>>>> Sent: Thursday, April 4, 2024 10:29 PM
>>>>
>>>> On 2024-04-03 10:41, Parav Pandit wrote:
>>>>> Many devices send event notifications for the IO queues, such as tx
>>>>> and rx queues, through event queues.
>>>>>
>>>>> Enable a privileged owner, such as a hypervisor PF, to set the
>>>>> number of IO event queues for the VF and SF during the provisioning
>> stage.
>>>>>
>>>>> example:
>>>>> Get maximum IO event queues of the VF device::
>>>>>
>>>>>   $ devlink port show pci/0000:06:00.0/2
>>>>>   pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
>>>>> pfnum 0
>>>> vfnum 1
>>>>>       function:
>>>>>           hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs
>>>>> 10
>>>>>
>>>>> Set maximum IO event queues of the VF device::
>>>>>
>>>>>   $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32
>>>>>
>>>>>   $ devlink port show pci/0000:06:00.0/2
>>>>>   pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
>>>>> pfnum 0
>>>> vfnum 1
>>>>>       function:
>>>>>           hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs
>>>>> 32
>>>>>
>>>>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx>
>>>>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx>
>>>>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx>
>>>>> ---
>>>>> changelog:
>>>>> v2->v3:
>>>>> - limited 80 chars per line
>>>>> v1->v2:
>>>>> - limited comment to 80 chars per line in header file
>>>>> ---
>>>>>  .../networking/devlink/devlink-port.rst       | 25 +++++++++
>>>>>  include/net/devlink.h                         | 14 +++++
>>>>>  include/uapi/linux/devlink.h                  |  1 +
>>>>>  net/devlink/port.c                            | 53 +++++++++++++++++++
>>>>>  4 files changed, 93 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/networking/devlink/devlink-port.rst
>>>>> b/Documentation/networking/devlink/devlink-port.rst
>>>>> index 562f46b41274..451f57393f11 100644
>>>>> --- a/Documentation/networking/devlink/devlink-port.rst
>>>>> +++ b/Documentation/networking/devlink/devlink-port.rst
>>>>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability
>>>>> of the function using  Users may also set the IPsec packet
>>>>> capability of the function using  `devlink port function set ipsec_packet`
>> command.
>>>>>
>>>>> +Users may also set the maximum IO event queues of the function
>>>>> +using `devlink port function set max_io_eqs` command.
>>>>> +
>>>>>  Function attributes
>>>>>  ===================
>>>>>
>>>>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel.
>>>>>          function:
>>>>>              hw_addr 00:00:00:00:00:00 ipsec_packet enabled
>>>>>
>>>>> +Maximum IO events queues setup
>>>>> +------------------------------
>>>>> +When user sets maximum number of IO event queues for a SF or a VF,
>>>>> +such function driver is limited to consume only enforced number of
>>>>> +IO event queues.
>>>>> +
>>>>> +- Get maximum IO event queues of the VF device::
>>>>> +
>>>>> +    $ devlink port show pci/0000:06:00.0/2
>>>>> +    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
>>>>> + pfnum
>>>> 0 vfnum 1
>>>>> +        function:
>>>>> +            hw_addr 00:00:00:00:00:00 ipsec_packet disabled
>>>>> + max_io_eqs 10
>>>>> +
>>>>> +- Set maximum IO event queues of the VF device::
>>>>> +
>>>>> +    $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32
>>>>> +
>>>>> +    $ devlink port show pci/0000:06:00.0/2
>>>>> +    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
>>>>> + pfnum
>>>> 0 vfnum 1
>>>>> +        function:
>>>>> +            hw_addr 00:00:00:00:00:00 ipsec_packet disabled
>>>>> + max_io_eqs 32
>>>>> +
>>>>>  Subfunction
>>>>>  ============
>>>>>
>>>>> diff --git a/include/net/devlink.h b/include/net/devlink.h index
>>>>> 9ac394bdfbe4..bb1af599d101 100644
>>>>> --- a/include/net/devlink.h
>>>>> +++ b/include/net/devlink.h
>>>>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink);
>>>>>   *			      capability. Should be used by device drivers to
>>>>>   *			      enable/disable ipsec_packet capability of a
>>>>>   *			      function managed by the devlink port.
>>>>> + * @port_fn_max_io_eqs_get: Callback used to get port function's
>>>> maximum number
>>>>> + *			    of event queues. Should be used by device drivers
>>>> to
>>>>> + *			    report the maximum event queues of a function
>>>>> + *			    managed by the devlink port.
>>>>> + * @port_fn_max_io_eqs_set: Callback used to set port function's
>>>> maximum number
>>>>> + *			    of event queues. Should be used by device drivers
>>>> to
>>>>> + *			    configure maximum number of event queues
>>>>> + *			    of a function managed by the devlink port.
>>>>>   *
>>>>>   * Note: Driver should return -EOPNOTSUPP if it doesn't support
>>>>>   * port function (@port_fn_*) handling for a particular port.
>>>>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops {
>>>>>  	int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
>>>>>  					bool enable,
>>>>>  					struct netlink_ext_ack *extack);
>>>>> +	int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port,
>>>>> +				      u32 *max_eqs,
>>>>> +				      struct netlink_ext_ack *extack);
>>>>> +	int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port,
>>>>> +				      u32 max_eqs,
>>>>> +				      struct netlink_ext_ack *extack);
>>>>>  };
>>>>>
>>>>>  void devlink_port_init(struct devlink *devlink, diff --git
>>>>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index
>>>>> 2da0c7eb6710..9401aa343673 100644
>>>>> --- a/include/uapi/linux/devlink.h
>>>>> +++ b/include/uapi/linux/devlink.h
>>>>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr {
>>>>>  	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
>>>>>  	DEVLINK_PORT_FN_ATTR_CAPS,	/* bitfield32 */
>>>>>  	DEVLINK_PORT_FN_ATTR_DEVLINK,	/* nested */
>>>>> +	DEVLINK_PORT_FN_ATTR_MAX_IO_EQS,	/* u32 */
>>>>>
>>>>>  	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
>>>>>  	DEVLINK_PORT_FUNCTION_ATTR_MAX =
>>>> __DEVLINK_PORT_FUNCTION_ATTR_MAX -
>>>>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index
>>>>> 118d130d2afd..be9158b4453c 100644
>>>>> --- a/net/devlink/port.c
>>>>> +++ b/net/devlink/port.c
>>>>> @@ -16,6 +16,7 @@ static const struct nla_policy
>>>> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
>>>>>  				 DEVLINK_PORT_FN_STATE_ACTIVE),
>>>>>  	[DEVLINK_PORT_FN_ATTR_CAPS] =
>>>>>
>>>> 	NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
>>>>> +	[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 },
>>>>>  };
>>>>>
>>>>>  #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port)
>>>> 		\
>>>>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct
>>>> devlink_port *devlink_port,
>>>>>  	return 0;
>>>>>  }
>>>>>
>>>>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port,
>>>>> +					   struct sk_buff *msg,
>>>>> +					   struct netlink_ext_ack *extack,
>>>>> +					   bool *msg_updated)
>>>>> +{
>>>>> +	u32 max_io_eqs;
>>>>> +	int err;
>>>>> +
>>>>> +	if (!port->ops->port_fn_max_io_eqs_get)
>>>>> +		return 0;
>>>>> +
>>>>> +	err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs,
>>>> extack);
>>>>> +	if (err) {
>>>>> +		if (err == -EOPNOTSUPP)
>>>>> +			return 0;
>>>>
>>>> Docs above says:
>>>>    * Note: Driver should return -EOPNOTSUPP if it doesn't support
>>>>    * port function (@port_fn_*) handling for a particular port.
>>>>
>>>> But here you're returning 0 in both cases of no
>>>> port_fn_max_io_eqs_get or
>>>> port_fn_max_io_eqs_get() returns EOPNOTSUPP.
>>>>
>>> When the port does not support this op, the function pointer is null and, 0
>> is returned as expected.
>>>
>>> When the port for some reason has the ops function pointer set for a port,
>> but if the port does not support the ops, it will return ENOPNOTSUPP.
>>> This may be possible when the driver has chosen to use same ops callback
>> structure for multiple port flavors.
>>>
>>> This code pattern is likely left over code of relatively recent work that
>> moved ops from devlink instance to per port ops.
>>> I propose to keep the current check as done in this patch, and run a
>>> full audit of all the drivers, if all drivers have moved to per port ops, then
>> simplify the code to drop the check for EOPNOTSUPP in a new series that
>> may touch more drivers.
>>> Otherwise, we may end up failing the port show operation when it returns
>> - ENOPNOTSUPP.
>>
>> Thanks for the explanation. So ideally each port flavour has its own unique
>> set of struct ops, and if something is not supported then don't set the func
>> ptr in the struct ops.
>>
>> Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to succeed.
>>
>> Had a brief look and there's only a handful of drivers (mlx, nfp, ice) that use
>> devlink_port_ops.
>>
> And netdevsim too. :)
> Can we please the cleanup in a separate series?
> Post this may be later in the month, I may have cycles to audit and simplify.
> Would it be ok with you?

Yeah of course, thanks. I'm making some changes in netdevsim and can do
a drive-by cleanup there too.

>  
>>>
>>>>> +		return err;
>>>>> +	}
>>>>> +	err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS,
>>>> max_io_eqs);
>>>>> +	if (err)
>>>>> +		return err;
>>>>> +	*msg_updated = true;
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>  int devlink_nl_port_handle_fill(struct sk_buff *msg, struct
>>>>> devlink_port *devlink_port)  {
>>>>>  	if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6
>>>>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port
>>>> *devlink_port,
>>>>>  	return 0;
>>>>>  }
>>>>>
>>>>> +static int
>>>>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port,
>>>>> +			       const struct nlattr *attr,
>>>>> +			       struct netlink_ext_ack *extack) {
>>>>> +	u32 max_io_eqs;
>>>>> +
>>>>> +	max_io_eqs = nla_get_u32(attr);
>>>>> +	return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port,
>>>>> +							 max_io_eqs, extack);
>>>>> +}
>>>>> +
>>>>>  static int
>>>>>  devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct
>>>> devlink_port *port,
>>>>>  				   struct netlink_ext_ack *extack) @@ -428,6
>>>> +465,9 @@
>>>>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct
>>>> devlink_port *por
>>>>>  	if (err)
>>>>>  		goto out;
>>>>>  	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
>>>>> +	if (err)
>>>>> +		goto out;
>>>>> +	err = devlink_port_fn_max_io_eqs_fill(port, msg, extack,
>>>>> +&msg_updated);
>>>>>  	if (err)
>>>>>  		goto out;
>>>>>  	err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6
>>>>> +766,12 @@ static int devlink_port_function_validate(struct
>>>>> +devlink_port
>>>> *devlink_port,
>>>>>  			}
>>>>>  		}
>>>>>  	}
>>>>> +	if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] &&
>>>>> +	    !ops->port_fn_max_io_eqs_set) {
>>>>> +		NL_SET_ERR_MSG_ATTR(extack,
>>>> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS],
>>>>> +				    "Function does not support max_io_eqs
>>>> setting");
>>>>> +		return -EOPNOTSUPP;
>>>>> +	}
>>>>>  	return 0;
>>>>>  }
>>>>>
>>>>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct
>>>> devlink_port *port,
>>>>>  			return err;
>>>>>  	}
>>>>>
>>>>> +	attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS];
>>>>> +	if (attr) {
>>>>> +		err = devlink_port_fn_max_io_eqs_set(port, attr, extack);
>>>>> +		if (err)
>>>>> +			return err;
>>>>> +	}
>>>>> +
>>>>>  	/* Keep this as the last function attribute set, so that when
>>>>>  	 * multiple port function attributes are set along with state,
>>>>>  	 * Those can be applied first before activating the state.




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux