> From: David Wei <dw@xxxxxxxxxxx> > Sent: Friday, April 5, 2024 12:23 AM > > On 2024-04-04 11:50, Parav Pandit wrote: > > > > > >> From: David Wei <dw@xxxxxxxxxxx> > >> Sent: Friday, April 5, 2024 12:10 AM > >> > >> On 2024-04-04 10:58, Parav Pandit wrote: > >>> Hi David, > >>> > >>>> From: David Wei <dw@xxxxxxxxxxx> > >>>> Sent: Thursday, April 4, 2024 10:29 PM > >>>> > >>>> On 2024-04-03 10:41, Parav Pandit wrote: > >>>>> Many devices send event notifications for the IO queues, such as > >>>>> tx and rx queues, through event queues. > >>>>> > >>>>> Enable a privileged owner, such as a hypervisor PF, to set the > >>>>> number of IO event queues for the VF and SF during the > >>>>> provisioning > >> stage. > >>>>> > >>>>> example: > >>>>> Get maximum IO event queues of the VF device:: > >>>>> > >>>>> $ devlink port show pci/0000:06:00.0/2 > >>>>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>>>> pfnum 0 > >>>> vfnum 1 > >>>>> function: > >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>>>> max_io_eqs > >>>>> 10 > >>>>> > >>>>> Set maximum IO event queues of the VF device:: > >>>>> > >>>>> $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 > >>>>> > >>>>> $ devlink port show pci/0000:06:00.0/2 > >>>>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf > >>>>> pfnum 0 > >>>> vfnum 1 > >>>>> function: > >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>>>> max_io_eqs > >>>>> 32 > >>>>> > >>>>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx> > >>>>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx> > >>>>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> > >>>>> --- > >>>>> changelog: > >>>>> v2->v3: > >>>>> - limited 80 chars per line > >>>>> v1->v2: > >>>>> - limited comment to 80 chars per line in header file > >>>>> --- > >>>>> .../networking/devlink/devlink-port.rst | 25 +++++++++ > >>>>> include/net/devlink.h | 14 +++++ > >>>>> include/uapi/linux/devlink.h | 1 + > >>>>> net/devlink/port.c | 53 +++++++++++++++++++ > >>>>> 4 files changed, 93 insertions(+) > >>>>> > >>>>> diff --git a/Documentation/networking/devlink/devlink-port.rst > >>>>> b/Documentation/networking/devlink/devlink-port.rst > >>>>> index 562f46b41274..451f57393f11 100644 > >>>>> --- a/Documentation/networking/devlink/devlink-port.rst > >>>>> +++ b/Documentation/networking/devlink/devlink-port.rst > >>>>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability > >>>>> of the function using Users may also set the IPsec packet > >>>>> capability of the function using `devlink port function set > >>>>> ipsec_packet` > >> command. > >>>>> > >>>>> +Users may also set the maximum IO event queues of the function > >>>>> +using `devlink port function set max_io_eqs` command. > >>>>> + > >>>>> Function attributes > >>>>> =================== > >>>>> > >>>>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel. > >>>>> function: > >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet enabled > >>>>> > >>>>> +Maximum IO events queues setup > >>>>> +------------------------------ > >>>>> +When user sets maximum number of IO event queues for a SF or a > >>>>> +VF, such function driver is limited to consume only enforced > >>>>> +number of IO event queues. > >>>>> + > >>>>> +- Get maximum IO event queues of the VF device:: > >>>>> + > >>>>> + $ devlink port show pci/0000:06:00.0/2 > >>>>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour > >>>>> + pcivf pfnum > >>>> 0 vfnum 1 > >>>>> + function: > >>>>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>>>> + max_io_eqs 10 > >>>>> + > >>>>> +- Set maximum IO event queues of the VF device:: > >>>>> + > >>>>> + $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 > >>>>> + > >>>>> + $ devlink port show pci/0000:06:00.0/2 > >>>>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour > >>>>> + pcivf pfnum > >>>> 0 vfnum 1 > >>>>> + function: > >>>>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled > >>>>> + max_io_eqs 32 > >>>>> + > >>>>> Subfunction > >>>>> ============ > >>>>> > >>>>> diff --git a/include/net/devlink.h b/include/net/devlink.h index > >>>>> 9ac394bdfbe4..bb1af599d101 100644 > >>>>> --- a/include/net/devlink.h > >>>>> +++ b/include/net/devlink.h > >>>>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink); > >>>>> * capability. Should be used by device > drivers to > >>>>> * enable/disable ipsec_packet capability of > a > >>>>> * function managed by the devlink port. > >>>>> + * @port_fn_max_io_eqs_get: Callback used to get port function's > >>>> maximum number > >>>>> + * of event queues. Should be used by device > drivers > >>>> to > >>>>> + * report the maximum event queues of a > function > >>>>> + * managed by the devlink port. > >>>>> + * @port_fn_max_io_eqs_set: Callback used to set port function's > >>>> maximum number > >>>>> + * of event queues. Should be used by device > drivers > >>>> to > >>>>> + * configure maximum number of event > queues > >>>>> + * of a function managed by the devlink port. > >>>>> * > >>>>> * Note: Driver should return -EOPNOTSUPP if it doesn't support > >>>>> * port function (@port_fn_*) handling for a particular port. > >>>>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops { > >>>>> int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, > >>>>> bool enable, > >>>>> struct netlink_ext_ack *extack); > >>>>> + int (*port_fn_max_io_eqs_get)(struct devlink_port > *devlink_port, > >>>>> + u32 *max_eqs, > >>>>> + struct netlink_ext_ack *extack); > >>>>> + int (*port_fn_max_io_eqs_set)(struct devlink_port > *devlink_port, > >>>>> + u32 max_eqs, > >>>>> + struct netlink_ext_ack *extack); > >>>>> }; > >>>>> > >>>>> void devlink_port_init(struct devlink *devlink, diff --git > >>>>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h > >>>>> index > >>>>> 2da0c7eb6710..9401aa343673 100644 > >>>>> --- a/include/uapi/linux/devlink.h > >>>>> +++ b/include/uapi/linux/devlink.h > >>>>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr { > >>>>> DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ > >>>>> DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ > >>>>> DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ > >>>>> + DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, /* u32 */ > >>>>> > >>>>> __DEVLINK_PORT_FUNCTION_ATTR_MAX, > >>>>> DEVLINK_PORT_FUNCTION_ATTR_MAX = > >>>> __DEVLINK_PORT_FUNCTION_ATTR_MAX - > >>>>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index > >>>>> 118d130d2afd..be9158b4453c 100644 > >>>>> --- a/net/devlink/port.c > >>>>> +++ b/net/devlink/port.c > >>>>> @@ -16,6 +16,7 @@ static const struct nla_policy > >>>> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ > >>>>> DEVLINK_PORT_FN_STATE_ACTIVE), > >>>>> [DEVLINK_PORT_FN_ATTR_CAPS] = > >>>>> > >>>> NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), > >>>>> + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = > NLA_U32 }, > >>>>> }; > >>>>> > >>>>> #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) > >>>> \ > >>>>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct > >>>> devlink_port *devlink_port, > >>>>> return 0; > >>>>> } > >>>>> > >>>>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, > >>>>> + struct sk_buff *msg, > >>>>> + struct netlink_ext_ack > *extack, > >>>>> + bool *msg_updated) > >>>>> +{ > >>>>> + u32 max_io_eqs; > >>>>> + int err; > >>>>> + > >>>>> + if (!port->ops->port_fn_max_io_eqs_get) > >>>>> + return 0; > >>>>> + > >>>>> + err = port->ops->port_fn_max_io_eqs_get(port, > &max_io_eqs, > >>>> extack); > >>>>> + if (err) { > >>>>> + if (err == -EOPNOTSUPP) > >>>>> + return 0; > >>>> > >>>> Docs above says: > >>>> * Note: Driver should return -EOPNOTSUPP if it doesn't support > >>>> * port function (@port_fn_*) handling for a particular port. > >>>> > >>>> But here you're returning 0 in both cases of no > >>>> port_fn_max_io_eqs_get or > >>>> port_fn_max_io_eqs_get() returns EOPNOTSUPP. > >>>> > >>> When the port does not support this op, the function pointer is null > >>> and, 0 > >> is returned as expected. > >>> > >>> When the port for some reason has the ops function pointer set for a > >>> port, > >> but if the port does not support the ops, it will return ENOPNOTSUPP. > >>> This may be possible when the driver has chosen to use same ops > >>> callback > >> structure for multiple port flavors. > >>> > >>> This code pattern is likely left over code of relatively recent work > >>> that > >> moved ops from devlink instance to per port ops. > >>> I propose to keep the current check as done in this patch, and run a > >>> full audit of all the drivers, if all drivers have moved to per port > >>> ops, then > >> simplify the code to drop the check for EOPNOTSUPP in a new series > >> that may touch more drivers. > >>> Otherwise, we may end up failing the port show operation when it > >>> returns > >> - ENOPNOTSUPP. > >> > >> Thanks for the explanation. So ideally each port flavour has its own > >> unique set of struct ops, and if something is not supported then > >> don't set the func ptr in the struct ops. > >> > >> Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to > succeed. > >> > >> Had a brief look and there's only a handful of drivers (mlx, nfp, > >> ice) that use devlink_port_ops. > >> > > And netdevsim too. :) > > Can we please the cleanup in a separate series? > > Post this may be later in the month, I may have cycles to audit and simplify. > > Would it be ok with you? > > Yeah of course, thanks. I'm making some changes in netdevsim and can do a > drive-by cleanup there too. > Ok. yes, that will be helpful. Thanks. > > > >>> > >>>>> + return err; > >>>>> + } > >>>>> + err = nla_put_u32(msg, > DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, > >>>> max_io_eqs); > >>>>> + if (err) > >>>>> + return err; > >>>>> + *msg_updated = true; > >>>>> + return 0; > >>>>> +} > >>>>> + > >>>>> int devlink_nl_port_handle_fill(struct sk_buff *msg, struct > >>>>> devlink_port *devlink_port) { > >>>>> if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 > >>>>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port > >>>> *devlink_port, > >>>>> return 0; > >>>>> } > >>>>> > >>>>> +static int > >>>>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, > >>>>> + const struct nlattr *attr, > >>>>> + struct netlink_ext_ack *extack) { > >>>>> + u32 max_io_eqs; > >>>>> + > >>>>> + max_io_eqs = nla_get_u32(attr); > >>>>> + return devlink_port->ops- > >port_fn_max_io_eqs_set(devlink_port, > >>>>> + max_io_eqs, > extack); > >>>>> +} > >>>>> + > >>>>> static int > >>>>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct > >>>> devlink_port *port, > >>>>> struct netlink_ext_ack *extack) @@ -428,6 > >>>> +465,9 @@ > >>>>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct > >>>> devlink_port *por > >>>>> if (err) > >>>>> goto out; > >>>>> err = devlink_port_fn_state_fill(port, msg, extack, > >>>>> &msg_updated); > >>>>> + if (err) > >>>>> + goto out; > >>>>> + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, > >>>>> +&msg_updated); > >>>>> if (err) > >>>>> goto out; > >>>>> err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ > >>>>> -726,6 > >>>>> +766,12 @@ static int devlink_port_function_validate(struct > >>>>> +devlink_port > >>>> *devlink_port, > >>>>> } > >>>>> } > >>>>> } > >>>>> + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && > >>>>> + !ops->port_fn_max_io_eqs_set) { > >>>>> + NL_SET_ERR_MSG_ATTR(extack, > >>>> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], > >>>>> + "Function does not support > max_io_eqs > >>>> setting"); > >>>>> + return -EOPNOTSUPP; > >>>>> + } > >>>>> return 0; > >>>>> } > >>>>> > >>>>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct > >>>> devlink_port *port, > >>>>> return err; > >>>>> } > >>>>> > >>>>> + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; > >>>>> + if (attr) { > >>>>> + err = devlink_port_fn_max_io_eqs_set(port, attr, > extack); > >>>>> + if (err) > >>>>> + return err; > >>>>> + } > >>>>> + > >>>>> /* Keep this as the last function attribute set, so that when > >>>>> * multiple port function attributes are set along with state, > >>>>> * Those can be applied first before activating the state.