> From: David Wei <dw@xxxxxxxxxxx> > Sent: Friday, April 5, 2024 12:10 AM > > On 2024-04-04 10:58, Parav Pandit wrote: > > Hi David, > > > >> From: David Wei <dw@xxxxxxxxxxx> > >> Sent: Thursday, April 4, 2024 10:29 PM > >> > >> On 2024-04-03 10:41, Parav Pandit wrote: > >>> Many devices send event notifications for the IO queues, such as tx > >>> and rx queues, through event queues. > >>> > >>> Enable a privileged owner, such as a hypervisor PF, to set the > >>> number of IO event queues for the VF and SF during the provisioning > stage. > >>> > >>> example: > >>> Get maximum IO event queues of the VF device:: > >>> > >>> $ devlink port show pci/0000:06:00.0/2 > >>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>> pfnum 0 > >> vfnum 1 > >>> function: > >>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs > >>> 10 > >>> > >>> Set maximum IO event queues of the VF device:: > >>> > >>> $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 > >>> > >>> $ devlink port show pci/0000:06:00.0/2 > >>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>> pfnum 0 > >> vfnum 1 > >>> function: > >>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs > >>> 32 > >>> > >>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx> > >>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx> > >>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> > >>> --- > >>> changelog: > >>> v2->v3: > >>> - limited 80 chars per line > >>> v1->v2: > >>> - limited comment to 80 chars per line in header file > >>> --- > >>> .../networking/devlink/devlink-port.rst | 25 +++++++++ > >>> include/net/devlink.h | 14 +++++ > >>> include/uapi/linux/devlink.h | 1 + > >>> net/devlink/port.c | 53 +++++++++++++++++++ > >>> 4 files changed, 93 insertions(+) > >>> > >>> diff --git a/Documentation/networking/devlink/devlink-port.rst > >>> b/Documentation/networking/devlink/devlink-port.rst > >>> index 562f46b41274..451f57393f11 100644 > >>> --- a/Documentation/networking/devlink/devlink-port.rst > >>> +++ b/Documentation/networking/devlink/devlink-port.rst > >>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability > >>> of the function using Users may also set the IPsec packet > >>> capability of the function using `devlink port function set ipsec_packet` > command. > >>> > >>> +Users may also set the maximum IO event queues of the function > >>> +using `devlink port function set max_io_eqs` command. > >>> + > >>> Function attributes > >>> =================== > >>> > >>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel. > >>> function: > >>> hw_addr 00:00:00:00:00:00 ipsec_packet enabled > >>> > >>> +Maximum IO events queues setup > >>> +------------------------------ > >>> +When user sets maximum number of IO event queues for a SF or a VF, > >>> +such function driver is limited to consume only enforced number of > >>> +IO event queues. > >>> + > >>> +- Get maximum IO event queues of the VF device:: > >>> + > >>> + $ devlink port show pci/0000:06:00.0/2 > >>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>> + pfnum > >> 0 vfnum 1 > >>> + function: > >>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>> + max_io_eqs 10 > >>> + > >>> +- Set maximum IO event queues of the VF device:: > >>> + > >>> + $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 > >>> + > >>> + $ devlink port show pci/0000:06:00.0/2 > >>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>> + pfnum > >> 0 vfnum 1 > >>> + function: > >>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>> + max_io_eqs 32 > >>> + > >>> Subfunction > >>> ============ > >>> > >>> diff --git a/include/net/devlink.h b/include/net/devlink.h index > >>> 9ac394bdfbe4..bb1af599d101 100644 > >>> --- a/include/net/devlink.h > >>> +++ b/include/net/devlink.h > >>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink); > >>> * capability. Should be used by device drivers to > >>> * enable/disable ipsec_packet capability of a > >>> * function managed by the devlink port. > >>> + * @port_fn_max_io_eqs_get: Callback used to get port function's > >> maximum number > >>> + * of event queues. Should be used by device drivers > >> to > >>> + * report the maximum event queues of a function > >>> + * managed by the devlink port. > >>> + * @port_fn_max_io_eqs_set: Callback used to set port function's > >> maximum number > >>> + * of event queues. Should be used by device drivers > >> to > >>> + * configure maximum number of event queues > >>> + * of a function managed by the devlink port. > >>> * > >>> * Note: Driver should return -EOPNOTSUPP if it doesn't support > >>> * port function (@port_fn_*) handling for a particular port. > >>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops { > >>> int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, > >>> bool enable, > >>> struct netlink_ext_ack *extack); > >>> + int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port, > >>> + u32 *max_eqs, > >>> + struct netlink_ext_ack *extack); > >>> + int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port, > >>> + u32 max_eqs, > >>> + struct netlink_ext_ack *extack); > >>> }; > >>> > >>> void devlink_port_init(struct devlink *devlink, diff --git > >>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index > >>> 2da0c7eb6710..9401aa343673 100644 > >>> --- a/include/uapi/linux/devlink.h > >>> +++ b/include/uapi/linux/devlink.h > >>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr { > >>> DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ > >>> DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ > >>> DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ > >>> + DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, /* u32 */ > >>> > >>> __DEVLINK_PORT_FUNCTION_ATTR_MAX, > >>> DEVLINK_PORT_FUNCTION_ATTR_MAX = > >> __DEVLINK_PORT_FUNCTION_ATTR_MAX - > >>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index > >>> 118d130d2afd..be9158b4453c 100644 > >>> --- a/net/devlink/port.c > >>> +++ b/net/devlink/port.c > >>> @@ -16,6 +16,7 @@ static const struct nla_policy > >> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ > >>> DEVLINK_PORT_FN_STATE_ACTIVE), > >>> [DEVLINK_PORT_FN_ATTR_CAPS] = > >>> > >> NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), > >>> + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 }, > >>> }; > >>> > >>> #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) > >> \ > >>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct > >> devlink_port *devlink_port, > >>> return 0; > >>> } > >>> > >>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, > >>> + struct sk_buff *msg, > >>> + struct netlink_ext_ack *extack, > >>> + bool *msg_updated) > >>> +{ > >>> + u32 max_io_eqs; > >>> + int err; > >>> + > >>> + if (!port->ops->port_fn_max_io_eqs_get) > >>> + return 0; > >>> + > >>> + err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, > >> extack); > >>> + if (err) { > >>> + if (err == -EOPNOTSUPP) > >>> + return 0; > >> > >> Docs above says: > >> * Note: Driver should return -EOPNOTSUPP if it doesn't support > >> * port function (@port_fn_*) handling for a particular port. > >> > >> But here you're returning 0 in both cases of no > >> port_fn_max_io_eqs_get or > >> port_fn_max_io_eqs_get() returns EOPNOTSUPP. > >> > > When the port does not support this op, the function pointer is null and, 0 > is returned as expected. > > > > When the port for some reason has the ops function pointer set for a port, > but if the port does not support the ops, it will return ENOPNOTSUPP. > > This may be possible when the driver has chosen to use same ops callback > structure for multiple port flavors. > > > > This code pattern is likely left over code of relatively recent work that > moved ops from devlink instance to per port ops. > > I propose to keep the current check as done in this patch, and run a > > full audit of all the drivers, if all drivers have moved to per port ops, then > simplify the code to drop the check for EOPNOTSUPP in a new series that > may touch more drivers. > > Otherwise, we may end up failing the port show operation when it returns > - ENOPNOTSUPP. > > Thanks for the explanation. So ideally each port flavour has its own unique > set of struct ops, and if something is not supported then don't set the func > ptr in the struct ops. > > Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to succeed. > > Had a brief look and there's only a handful of drivers (mlx, nfp, ice) that use > devlink_port_ops. > And netdevsim too. :) Can we please the cleanup in a separate series? Post this may be later in the month, I may have cycles to audit and simplify. Would it be ok with you? > > > >>> + return err; > >>> + } > >>> + err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, > >> max_io_eqs); > >>> + if (err) > >>> + return err; > >>> + *msg_updated = true; > >>> + return 0; > >>> +} > >>> + > >>> int devlink_nl_port_handle_fill(struct sk_buff *msg, struct > >>> devlink_port *devlink_port) { > >>> if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 > >>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port > >> *devlink_port, > >>> return 0; > >>> } > >>> > >>> +static int > >>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, > >>> + const struct nlattr *attr, > >>> + struct netlink_ext_ack *extack) { > >>> + u32 max_io_eqs; > >>> + > >>> + max_io_eqs = nla_get_u32(attr); > >>> + return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port, > >>> + max_io_eqs, extack); > >>> +} > >>> + > >>> static int > >>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct > >> devlink_port *port, > >>> struct netlink_ext_ack *extack) @@ -428,6 > >> +465,9 @@ > >>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct > >> devlink_port *por > >>> if (err) > >>> goto out; > >>> err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); > >>> + if (err) > >>> + goto out; > >>> + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, > >>> +&msg_updated); > >>> if (err) > >>> goto out; > >>> err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6 > >>> +766,12 @@ static int devlink_port_function_validate(struct > >>> +devlink_port > >> *devlink_port, > >>> } > >>> } > >>> } > >>> + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && > >>> + !ops->port_fn_max_io_eqs_set) { > >>> + NL_SET_ERR_MSG_ATTR(extack, > >> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], > >>> + "Function does not support max_io_eqs > >> setting"); > >>> + return -EOPNOTSUPP; > >>> + } > >>> return 0; > >>> } > >>> > >>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct > >> devlink_port *port, > >>> return err; > >>> } > >>> > >>> + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; > >>> + if (attr) { > >>> + err = devlink_port_fn_max_io_eqs_set(port, attr, extack); > >>> + if (err) > >>> + return err; > >>> + } > >>> + > >>> /* Keep this as the last function attribute set, so that when > >>> * multiple port function attributes are set along with state, > >>> * Those can be applied first before activating the state.