On 2024-04-04 10:58, Parav Pandit wrote: > Hi David, > >> From: David Wei <dw@xxxxxxxxxxx> >> Sent: Thursday, April 4, 2024 10:29 PM >> >> On 2024-04-03 10:41, Parav Pandit wrote: >>> Many devices send event notifications for the IO queues, such as tx >>> and rx queues, through event queues. >>> >>> Enable a privileged owner, such as a hypervisor PF, to set the number >>> of IO event queues for the VF and SF during the provisioning stage. >>> >>> example: >>> Get maximum IO event queues of the VF device:: >>> >>> $ devlink port show pci/0000:06:00.0/2 >>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 >> vfnum 1 >>> function: >>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs >>> 10 >>> >>> Set maximum IO event queues of the VF device:: >>> >>> $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 >>> >>> $ devlink port show pci/0000:06:00.0/2 >>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 >> vfnum 1 >>> function: >>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs >>> 32 >>> >>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx> >>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx> >>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> >>> --- >>> changelog: >>> v2->v3: >>> - limited 80 chars per line >>> v1->v2: >>> - limited comment to 80 chars per line in header file >>> --- >>> .../networking/devlink/devlink-port.rst | 25 +++++++++ >>> include/net/devlink.h | 14 +++++ >>> include/uapi/linux/devlink.h | 1 + >>> net/devlink/port.c | 53 +++++++++++++++++++ >>> 4 files changed, 93 insertions(+) >>> >>> diff --git a/Documentation/networking/devlink/devlink-port.rst >>> b/Documentation/networking/devlink/devlink-port.rst >>> index 562f46b41274..451f57393f11 100644 >>> --- a/Documentation/networking/devlink/devlink-port.rst >>> +++ b/Documentation/networking/devlink/devlink-port.rst >>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability of >>> the function using Users may also set the IPsec packet capability of >>> the function using `devlink port function set ipsec_packet` command. >>> >>> +Users may also set the maximum IO event queues of the function using >>> +`devlink port function set max_io_eqs` command. >>> + >>> Function attributes >>> =================== >>> >>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel. >>> function: >>> hw_addr 00:00:00:00:00:00 ipsec_packet enabled >>> >>> +Maximum IO events queues setup >>> +------------------------------ >>> +When user sets maximum number of IO event queues for a SF or a VF, >>> +such function driver is limited to consume only enforced number of IO >>> +event queues. >>> + >>> +- Get maximum IO event queues of the VF device:: >>> + >>> + $ devlink port show pci/0000:06:00.0/2 >>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum >> 0 vfnum 1 >>> + function: >>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled >>> + max_io_eqs 10 >>> + >>> +- Set maximum IO event queues of the VF device:: >>> + >>> + $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 >>> + >>> + $ devlink port show pci/0000:06:00.0/2 >>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum >> 0 vfnum 1 >>> + function: >>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled >>> + max_io_eqs 32 >>> + >>> Subfunction >>> ============ >>> >>> diff --git a/include/net/devlink.h b/include/net/devlink.h index >>> 9ac394bdfbe4..bb1af599d101 100644 >>> --- a/include/net/devlink.h >>> +++ b/include/net/devlink.h >>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink); >>> * capability. Should be used by device drivers to >>> * enable/disable ipsec_packet capability of a >>> * function managed by the devlink port. >>> + * @port_fn_max_io_eqs_get: Callback used to get port function's >> maximum number >>> + * of event queues. Should be used by device drivers >> to >>> + * report the maximum event queues of a function >>> + * managed by the devlink port. >>> + * @port_fn_max_io_eqs_set: Callback used to set port function's >> maximum number >>> + * of event queues. Should be used by device drivers >> to >>> + * configure maximum number of event queues >>> + * of a function managed by the devlink port. >>> * >>> * Note: Driver should return -EOPNOTSUPP if it doesn't support >>> * port function (@port_fn_*) handling for a particular port. >>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops { >>> int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, >>> bool enable, >>> struct netlink_ext_ack *extack); >>> + int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port, >>> + u32 *max_eqs, >>> + struct netlink_ext_ack *extack); >>> + int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port, >>> + u32 max_eqs, >>> + struct netlink_ext_ack *extack); >>> }; >>> >>> void devlink_port_init(struct devlink *devlink, diff --git >>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index >>> 2da0c7eb6710..9401aa343673 100644 >>> --- a/include/uapi/linux/devlink.h >>> +++ b/include/uapi/linux/devlink.h >>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr { >>> DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ >>> DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ >>> DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ >>> + DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, /* u32 */ >>> >>> __DEVLINK_PORT_FUNCTION_ATTR_MAX, >>> DEVLINK_PORT_FUNCTION_ATTR_MAX = >> __DEVLINK_PORT_FUNCTION_ATTR_MAX - >>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index >>> 118d130d2afd..be9158b4453c 100644 >>> --- a/net/devlink/port.c >>> +++ b/net/devlink/port.c >>> @@ -16,6 +16,7 @@ static const struct nla_policy >> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ >>> DEVLINK_PORT_FN_STATE_ACTIVE), >>> [DEVLINK_PORT_FN_ATTR_CAPS] = >>> >> NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), >>> + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 }, >>> }; >>> >>> #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) >> \ >>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct >> devlink_port *devlink_port, >>> return 0; >>> } >>> >>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, >>> + struct sk_buff *msg, >>> + struct netlink_ext_ack *extack, >>> + bool *msg_updated) >>> +{ >>> + u32 max_io_eqs; >>> + int err; >>> + >>> + if (!port->ops->port_fn_max_io_eqs_get) >>> + return 0; >>> + >>> + err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, >> extack); >>> + if (err) { >>> + if (err == -EOPNOTSUPP) >>> + return 0; >> >> Docs above says: >> * Note: Driver should return -EOPNOTSUPP if it doesn't support >> * port function (@port_fn_*) handling for a particular port. >> >> But here you're returning 0 in both cases of no port_fn_max_io_eqs_get or >> port_fn_max_io_eqs_get() returns EOPNOTSUPP. >> > When the port does not support this op, the function pointer is null and, 0 is returned as expected. > > When the port for some reason has the ops function pointer set for a port, but if the port does not support the ops, it will return ENOPNOTSUPP. > This may be possible when the driver has chosen to use same ops callback structure for multiple port flavors. > > This code pattern is likely left over code of relatively recent work that moved ops from devlink instance to per port ops. > I propose to keep the current check as done in this patch, > and run a full audit of all the drivers, if all drivers have moved to per port ops, then simplify the code to drop the check for EOPNOTSUPP in a new series that may touch more drivers. > Otherwise, we may end up failing the port show operation when it returns - ENOPNOTSUPP. Thanks for the explanation. So ideally each port flavour has its own unique set of struct ops, and if something is not supported then don't set the func ptr in the struct ops. Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to succeed. Had a brief look and there's only a handful of drivers (mlx, nfp, ice) that use devlink_port_ops. > >>> + return err; >>> + } >>> + err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, >> max_io_eqs); >>> + if (err) >>> + return err; >>> + *msg_updated = true; >>> + return 0; >>> +} >>> + >>> int devlink_nl_port_handle_fill(struct sk_buff *msg, struct >>> devlink_port *devlink_port) { >>> if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 >>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port >> *devlink_port, >>> return 0; >>> } >>> >>> +static int >>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, >>> + const struct nlattr *attr, >>> + struct netlink_ext_ack *extack) { >>> + u32 max_io_eqs; >>> + >>> + max_io_eqs = nla_get_u32(attr); >>> + return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port, >>> + max_io_eqs, extack); >>> +} >>> + >>> static int >>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct >> devlink_port *port, >>> struct netlink_ext_ack *extack) @@ -428,6 >> +465,9 @@ >>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct >> devlink_port *por >>> if (err) >>> goto out; >>> err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); >>> + if (err) >>> + goto out; >>> + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, >>> +&msg_updated); >>> if (err) >>> goto out; >>> err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6 >>> +766,12 @@ static int devlink_port_function_validate(struct devlink_port >> *devlink_port, >>> } >>> } >>> } >>> + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && >>> + !ops->port_fn_max_io_eqs_set) { >>> + NL_SET_ERR_MSG_ATTR(extack, >> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], >>> + "Function does not support max_io_eqs >> setting"); >>> + return -EOPNOTSUPP; >>> + } >>> return 0; >>> } >>> >>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct >> devlink_port *port, >>> return err; >>> } >>> >>> + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; >>> + if (attr) { >>> + err = devlink_port_fn_max_io_eqs_set(port, attr, extack); >>> + if (err) >>> + return err; >>> + } >>> + >>> /* Keep this as the last function attribute set, so that when >>> * multiple port function attributes are set along with state, >>> * Those can be applied first before activating the state.