RE: [net-next v3 1/2] devlink: Support setting max_io_eqs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> From: David Wei <dw@xxxxxxxxxxx>
> Sent: Friday, April 5, 2024 12:10 AM
> 
> On 2024-04-04 10:58, Parav Pandit wrote:
> > Hi David,
> >
> >> From: David Wei <dw@xxxxxxxxxxx>
> >> Sent: Thursday, April 4, 2024 10:29 PM
> >>
> >> On 2024-04-03 10:41, Parav Pandit wrote:
> >>> Many devices send event notifications for the IO queues, such as tx
> >>> and rx queues, through event queues.
> >>>
> >>> Enable a privileged owner, such as a hypervisor PF, to set the
> >>> number of IO event queues for the VF and SF during the provisioning
> stage.
> >>>
> >>> example:
> >>> Get maximum IO event queues of the VF device::
> >>>
> >>>   $ devlink port show pci/0000:06:00.0/2
> >>>   pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
> >>> pfnum 0
> >> vfnum 1
> >>>       function:
> >>>           hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs
> >>> 10
> >>>
> >>> Set maximum IO event queues of the VF device::
> >>>
> >>>   $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32
> >>>
> >>>   $ devlink port show pci/0000:06:00.0/2
> >>>   pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
> >>> pfnum 0
> >> vfnum 1
> >>>       function:
> >>>           hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs
> >>> 32
> >>>
> >>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx>
> >>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx>
> >>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx>
> >>> ---
> >>> changelog:
> >>> v2->v3:
> >>> - limited 80 chars per line
> >>> v1->v2:
> >>> - limited comment to 80 chars per line in header file
> >>> ---
> >>>  .../networking/devlink/devlink-port.rst       | 25 +++++++++
> >>>  include/net/devlink.h                         | 14 +++++
> >>>  include/uapi/linux/devlink.h                  |  1 +
> >>>  net/devlink/port.c                            | 53 +++++++++++++++++++
> >>>  4 files changed, 93 insertions(+)
> >>>
> >>> diff --git a/Documentation/networking/devlink/devlink-port.rst
> >>> b/Documentation/networking/devlink/devlink-port.rst
> >>> index 562f46b41274..451f57393f11 100644
> >>> --- a/Documentation/networking/devlink/devlink-port.rst
> >>> +++ b/Documentation/networking/devlink/devlink-port.rst
> >>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability
> >>> of the function using  Users may also set the IPsec packet
> >>> capability of the function using  `devlink port function set ipsec_packet`
> command.
> >>>
> >>> +Users may also set the maximum IO event queues of the function
> >>> +using `devlink port function set max_io_eqs` command.
> >>> +
> >>>  Function attributes
> >>>  ===================
> >>>
> >>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel.
> >>>          function:
> >>>              hw_addr 00:00:00:00:00:00 ipsec_packet enabled
> >>>
> >>> +Maximum IO events queues setup
> >>> +------------------------------
> >>> +When user sets maximum number of IO event queues for a SF or a VF,
> >>> +such function driver is limited to consume only enforced number of
> >>> +IO event queues.
> >>> +
> >>> +- Get maximum IO event queues of the VF device::
> >>> +
> >>> +    $ devlink port show pci/0000:06:00.0/2
> >>> +    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
> >>> + pfnum
> >> 0 vfnum 1
> >>> +        function:
> >>> +            hw_addr 00:00:00:00:00:00 ipsec_packet disabled
> >>> + max_io_eqs 10
> >>> +
> >>> +- Set maximum IO event queues of the VF device::
> >>> +
> >>> +    $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32
> >>> +
> >>> +    $ devlink port show pci/0000:06:00.0/2
> >>> +    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf
> >>> + pfnum
> >> 0 vfnum 1
> >>> +        function:
> >>> +            hw_addr 00:00:00:00:00:00 ipsec_packet disabled
> >>> + max_io_eqs 32
> >>> +
> >>>  Subfunction
> >>>  ============
> >>>
> >>> diff --git a/include/net/devlink.h b/include/net/devlink.h index
> >>> 9ac394bdfbe4..bb1af599d101 100644
> >>> --- a/include/net/devlink.h
> >>> +++ b/include/net/devlink.h
> >>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink);
> >>>   *			      capability. Should be used by device drivers to
> >>>   *			      enable/disable ipsec_packet capability of a
> >>>   *			      function managed by the devlink port.
> >>> + * @port_fn_max_io_eqs_get: Callback used to get port function's
> >> maximum number
> >>> + *			    of event queues. Should be used by device drivers
> >> to
> >>> + *			    report the maximum event queues of a function
> >>> + *			    managed by the devlink port.
> >>> + * @port_fn_max_io_eqs_set: Callback used to set port function's
> >> maximum number
> >>> + *			    of event queues. Should be used by device drivers
> >> to
> >>> + *			    configure maximum number of event queues
> >>> + *			    of a function managed by the devlink port.
> >>>   *
> >>>   * Note: Driver should return -EOPNOTSUPP if it doesn't support
> >>>   * port function (@port_fn_*) handling for a particular port.
> >>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops {
> >>>  	int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
> >>>  					bool enable,
> >>>  					struct netlink_ext_ack *extack);
> >>> +	int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port,
> >>> +				      u32 *max_eqs,
> >>> +				      struct netlink_ext_ack *extack);
> >>> +	int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port,
> >>> +				      u32 max_eqs,
> >>> +				      struct netlink_ext_ack *extack);
> >>>  };
> >>>
> >>>  void devlink_port_init(struct devlink *devlink, diff --git
> >>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index
> >>> 2da0c7eb6710..9401aa343673 100644
> >>> --- a/include/uapi/linux/devlink.h
> >>> +++ b/include/uapi/linux/devlink.h
> >>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr {
> >>>  	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
> >>>  	DEVLINK_PORT_FN_ATTR_CAPS,	/* bitfield32 */
> >>>  	DEVLINK_PORT_FN_ATTR_DEVLINK,	/* nested */
> >>> +	DEVLINK_PORT_FN_ATTR_MAX_IO_EQS,	/* u32 */
> >>>
> >>>  	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
> >>>  	DEVLINK_PORT_FUNCTION_ATTR_MAX =
> >> __DEVLINK_PORT_FUNCTION_ATTR_MAX -
> >>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index
> >>> 118d130d2afd..be9158b4453c 100644
> >>> --- a/net/devlink/port.c
> >>> +++ b/net/devlink/port.c
> >>> @@ -16,6 +16,7 @@ static const struct nla_policy
> >> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
> >>>  				 DEVLINK_PORT_FN_STATE_ACTIVE),
> >>>  	[DEVLINK_PORT_FN_ATTR_CAPS] =
> >>>
> >> 	NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
> >>> +	[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 },
> >>>  };
> >>>
> >>>  #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port)
> >> 		\
> >>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct
> >> devlink_port *devlink_port,
> >>>  	return 0;
> >>>  }
> >>>
> >>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port,
> >>> +					   struct sk_buff *msg,
> >>> +					   struct netlink_ext_ack *extack,
> >>> +					   bool *msg_updated)
> >>> +{
> >>> +	u32 max_io_eqs;
> >>> +	int err;
> >>> +
> >>> +	if (!port->ops->port_fn_max_io_eqs_get)
> >>> +		return 0;
> >>> +
> >>> +	err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs,
> >> extack);
> >>> +	if (err) {
> >>> +		if (err == -EOPNOTSUPP)
> >>> +			return 0;
> >>
> >> Docs above says:
> >>    * Note: Driver should return -EOPNOTSUPP if it doesn't support
> >>    * port function (@port_fn_*) handling for a particular port.
> >>
> >> But here you're returning 0 in both cases of no
> >> port_fn_max_io_eqs_get or
> >> port_fn_max_io_eqs_get() returns EOPNOTSUPP.
> >>
> > When the port does not support this op, the function pointer is null and, 0
> is returned as expected.
> >
> > When the port for some reason has the ops function pointer set for a port,
> but if the port does not support the ops, it will return ENOPNOTSUPP.
> > This may be possible when the driver has chosen to use same ops callback
> structure for multiple port flavors.
> >
> > This code pattern is likely left over code of relatively recent work that
> moved ops from devlink instance to per port ops.
> > I propose to keep the current check as done in this patch, and run a
> > full audit of all the drivers, if all drivers have moved to per port ops, then
> simplify the code to drop the check for EOPNOTSUPP in a new series that
> may touch more drivers.
> > Otherwise, we may end up failing the port show operation when it returns
> - ENOPNOTSUPP.
> 
> Thanks for the explanation. So ideally each port flavour has its own unique
> set of struct ops, and if something is not supported then don't set the func
> ptr in the struct ops.
> 
> Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to succeed.
> 
> Had a brief look and there's only a handful of drivers (mlx, nfp, ice) that use
> devlink_port_ops.
>
And netdevsim too. :)
Can we please the cleanup in a separate series?
Post this may be later in the month, I may have cycles to audit and simplify.
Would it be ok with you?
 
> >
> >>> +		return err;
> >>> +	}
> >>> +	err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS,
> >> max_io_eqs);
> >>> +	if (err)
> >>> +		return err;
> >>> +	*msg_updated = true;
> >>> +	return 0;
> >>> +}
> >>> +
> >>>  int devlink_nl_port_handle_fill(struct sk_buff *msg, struct
> >>> devlink_port *devlink_port)  {
> >>>  	if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6
> >>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port
> >> *devlink_port,
> >>>  	return 0;
> >>>  }
> >>>
> >>> +static int
> >>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port,
> >>> +			       const struct nlattr *attr,
> >>> +			       struct netlink_ext_ack *extack) {
> >>> +	u32 max_io_eqs;
> >>> +
> >>> +	max_io_eqs = nla_get_u32(attr);
> >>> +	return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port,
> >>> +							 max_io_eqs, extack);
> >>> +}
> >>> +
> >>>  static int
> >>>  devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct
> >> devlink_port *port,
> >>>  				   struct netlink_ext_ack *extack) @@ -428,6
> >> +465,9 @@
> >>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct
> >> devlink_port *por
> >>>  	if (err)
> >>>  		goto out;
> >>>  	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
> >>> +	if (err)
> >>> +		goto out;
> >>> +	err = devlink_port_fn_max_io_eqs_fill(port, msg, extack,
> >>> +&msg_updated);
> >>>  	if (err)
> >>>  		goto out;
> >>>  	err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6
> >>> +766,12 @@ static int devlink_port_function_validate(struct
> >>> +devlink_port
> >> *devlink_port,
> >>>  			}
> >>>  		}
> >>>  	}
> >>> +	if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] &&
> >>> +	    !ops->port_fn_max_io_eqs_set) {
> >>> +		NL_SET_ERR_MSG_ATTR(extack,
> >> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS],
> >>> +				    "Function does not support max_io_eqs
> >> setting");
> >>> +		return -EOPNOTSUPP;
> >>> +	}
> >>>  	return 0;
> >>>  }
> >>>
> >>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct
> >> devlink_port *port,
> >>>  			return err;
> >>>  	}
> >>>
> >>> +	attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS];
> >>> +	if (attr) {
> >>> +		err = devlink_port_fn_max_io_eqs_set(port, attr, extack);
> >>> +		if (err)
> >>> +			return err;
> >>> +	}
> >>> +
> >>>  	/* Keep this as the last function attribute set, so that when
> >>>  	 * multiple port function attributes are set along with state,
> >>>  	 * Those can be applied first before activating the state.




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux