On 2024-04-04 11:50, Parav Pandit wrote: > > >> From: David Wei <dw@xxxxxxxxxxx> >> Sent: Friday, April 5, 2024 12:10 AM >> >> On 2024-04-04 10:58, Parav Pandit wrote: >>> Hi David, >>> >>>> From: David Wei <dw@xxxxxxxxxxx> >>>> Sent: Thursday, April 4, 2024 10:29 PM >>>> >>>> On 2024-04-03 10:41, Parav Pandit wrote: >>>>> Many devices send event notifications for the IO queues, such as tx >>>>> and rx queues, through event queues. >>>>> >>>>> Enable a privileged owner, such as a hypervisor PF, to set the >>>>> number of IO event queues for the VF and SF during the provisioning >> stage. >>>>> >>>>> example: >>>>> Get maximum IO event queues of the VF device:: >>>>> >>>>> $ devlink port show pci/0000:06:00.0/2 >>>>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf >>>>> pfnum 0 >>>> vfnum 1 >>>>> function: >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs >>>>> 10 >>>>> >>>>> Set maximum IO event queues of the VF device:: >>>>> >>>>> $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 >>>>> >>>>> $ devlink port show pci/0000:06:00.0/2 >>>>> pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf >>>>> pfnum 0 >>>> vfnum 1 >>>>> function: >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs >>>>> 32 >>>>> >>>>> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx> >>>>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx> >>>>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> >>>>> --- >>>>> changelog: >>>>> v2->v3: >>>>> - limited 80 chars per line >>>>> v1->v2: >>>>> - limited comment to 80 chars per line in header file >>>>> --- >>>>> .../networking/devlink/devlink-port.rst | 25 +++++++++ >>>>> include/net/devlink.h | 14 +++++ >>>>> include/uapi/linux/devlink.h | 1 + >>>>> net/devlink/port.c | 53 +++++++++++++++++++ >>>>> 4 files changed, 93 insertions(+) >>>>> >>>>> diff --git a/Documentation/networking/devlink/devlink-port.rst >>>>> b/Documentation/networking/devlink/devlink-port.rst >>>>> index 562f46b41274..451f57393f11 100644 >>>>> --- a/Documentation/networking/devlink/devlink-port.rst >>>>> +++ b/Documentation/networking/devlink/devlink-port.rst >>>>> @@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability >>>>> of the function using Users may also set the IPsec packet >>>>> capability of the function using `devlink port function set ipsec_packet` >> command. >>>>> >>>>> +Users may also set the maximum IO event queues of the function >>>>> +using `devlink port function set max_io_eqs` command. >>>>> + >>>>> Function attributes >>>>> =================== >>>>> >>>>> @@ -295,6 +298,28 @@ policy is processed in software by the kernel. >>>>> function: >>>>> hw_addr 00:00:00:00:00:00 ipsec_packet enabled >>>>> >>>>> +Maximum IO events queues setup >>>>> +------------------------------ >>>>> +When user sets maximum number of IO event queues for a SF or a VF, >>>>> +such function driver is limited to consume only enforced number of >>>>> +IO event queues. >>>>> + >>>>> +- Get maximum IO event queues of the VF device:: >>>>> + >>>>> + $ devlink port show pci/0000:06:00.0/2 >>>>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf >>>>> + pfnum >>>> 0 vfnum 1 >>>>> + function: >>>>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled >>>>> + max_io_eqs 10 >>>>> + >>>>> +- Set maximum IO event queues of the VF device:: >>>>> + >>>>> + $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 >>>>> + >>>>> + $ devlink port show pci/0000:06:00.0/2 >>>>> + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf >>>>> + pfnum >>>> 0 vfnum 1 >>>>> + function: >>>>> + hw_addr 00:00:00:00:00:00 ipsec_packet disabled >>>>> + max_io_eqs 32 >>>>> + >>>>> Subfunction >>>>> ============ >>>>> >>>>> diff --git a/include/net/devlink.h b/include/net/devlink.h index >>>>> 9ac394bdfbe4..bb1af599d101 100644 >>>>> --- a/include/net/devlink.h >>>>> +++ b/include/net/devlink.h >>>>> @@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink); >>>>> * capability. Should be used by device drivers to >>>>> * enable/disable ipsec_packet capability of a >>>>> * function managed by the devlink port. >>>>> + * @port_fn_max_io_eqs_get: Callback used to get port function's >>>> maximum number >>>>> + * of event queues. Should be used by device drivers >>>> to >>>>> + * report the maximum event queues of a function >>>>> + * managed by the devlink port. >>>>> + * @port_fn_max_io_eqs_set: Callback used to set port function's >>>> maximum number >>>>> + * of event queues. Should be used by device drivers >>>> to >>>>> + * configure maximum number of event queues >>>>> + * of a function managed by the devlink port. >>>>> * >>>>> * Note: Driver should return -EOPNOTSUPP if it doesn't support >>>>> * port function (@port_fn_*) handling for a particular port. >>>>> @@ -1651,6 +1659,12 @@ struct devlink_port_ops { >>>>> int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, >>>>> bool enable, >>>>> struct netlink_ext_ack *extack); >>>>> + int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port, >>>>> + u32 *max_eqs, >>>>> + struct netlink_ext_ack *extack); >>>>> + int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port, >>>>> + u32 max_eqs, >>>>> + struct netlink_ext_ack *extack); >>>>> }; >>>>> >>>>> void devlink_port_init(struct devlink *devlink, diff --git >>>>> a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index >>>>> 2da0c7eb6710..9401aa343673 100644 >>>>> --- a/include/uapi/linux/devlink.h >>>>> +++ b/include/uapi/linux/devlink.h >>>>> @@ -686,6 +686,7 @@ enum devlink_port_function_attr { >>>>> DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ >>>>> DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ >>>>> DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ >>>>> + DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, /* u32 */ >>>>> >>>>> __DEVLINK_PORT_FUNCTION_ATTR_MAX, >>>>> DEVLINK_PORT_FUNCTION_ATTR_MAX = >>>> __DEVLINK_PORT_FUNCTION_ATTR_MAX - >>>>> 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index >>>>> 118d130d2afd..be9158b4453c 100644 >>>>> --- a/net/devlink/port.c >>>>> +++ b/net/devlink/port.c >>>>> @@ -16,6 +16,7 @@ static const struct nla_policy >>>> devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ >>>>> DEVLINK_PORT_FN_STATE_ACTIVE), >>>>> [DEVLINK_PORT_FN_ATTR_CAPS] = >>>>> >>>> NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), >>>>> + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 }, >>>>> }; >>>>> >>>>> #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) >>>> \ >>>>> @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct >>>> devlink_port *devlink_port, >>>>> return 0; >>>>> } >>>>> >>>>> +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, >>>>> + struct sk_buff *msg, >>>>> + struct netlink_ext_ack *extack, >>>>> + bool *msg_updated) >>>>> +{ >>>>> + u32 max_io_eqs; >>>>> + int err; >>>>> + >>>>> + if (!port->ops->port_fn_max_io_eqs_get) >>>>> + return 0; >>>>> + >>>>> + err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, >>>> extack); >>>>> + if (err) { >>>>> + if (err == -EOPNOTSUPP) >>>>> + return 0; >>>> >>>> Docs above says: >>>> * Note: Driver should return -EOPNOTSUPP if it doesn't support >>>> * port function (@port_fn_*) handling for a particular port. >>>> >>>> But here you're returning 0 in both cases of no >>>> port_fn_max_io_eqs_get or >>>> port_fn_max_io_eqs_get() returns EOPNOTSUPP. >>>> >>> When the port does not support this op, the function pointer is null and, 0 >> is returned as expected. >>> >>> When the port for some reason has the ops function pointer set for a port, >> but if the port does not support the ops, it will return ENOPNOTSUPP. >>> This may be possible when the driver has chosen to use same ops callback >> structure for multiple port flavors. >>> >>> This code pattern is likely left over code of relatively recent work that >> moved ops from devlink instance to per port ops. >>> I propose to keep the current check as done in this patch, and run a >>> full audit of all the drivers, if all drivers have moved to per port ops, then >> simplify the code to drop the check for EOPNOTSUPP in a new series that >> may touch more drivers. >>> Otherwise, we may end up failing the port show operation when it returns >> - ENOPNOTSUPP. >> >> Thanks for the explanation. So ideally each port flavour has its own unique >> set of struct ops, and if something is not supported then don't set the func >> ptr in the struct ops. >> >> Yes, I see that 0 has to be returned for devlink_port_fn_caps_fill() to succeed. >> >> Had a brief look and there's only a handful of drivers (mlx, nfp, ice) that use >> devlink_port_ops. >> > And netdevsim too. :) > Can we please the cleanup in a separate series? > Post this may be later in the month, I may have cycles to audit and simplify. > Would it be ok with you? Yeah of course, thanks. I'm making some changes in netdevsim and can do a drive-by cleanup there too. > >>> >>>>> + return err; >>>>> + } >>>>> + err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, >>>> max_io_eqs); >>>>> + if (err) >>>>> + return err; >>>>> + *msg_updated = true; >>>>> + return 0; >>>>> +} >>>>> + >>>>> int devlink_nl_port_handle_fill(struct sk_buff *msg, struct >>>>> devlink_port *devlink_port) { >>>>> if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 >>>>> +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port >>>> *devlink_port, >>>>> return 0; >>>>> } >>>>> >>>>> +static int >>>>> +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, >>>>> + const struct nlattr *attr, >>>>> + struct netlink_ext_ack *extack) { >>>>> + u32 max_io_eqs; >>>>> + >>>>> + max_io_eqs = nla_get_u32(attr); >>>>> + return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port, >>>>> + max_io_eqs, extack); >>>>> +} >>>>> + >>>>> static int >>>>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct >>>> devlink_port *port, >>>>> struct netlink_ext_ack *extack) @@ -428,6 >>>> +465,9 @@ >>>>> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct >>>> devlink_port *por >>>>> if (err) >>>>> goto out; >>>>> err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); >>>>> + if (err) >>>>> + goto out; >>>>> + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, >>>>> +&msg_updated); >>>>> if (err) >>>>> goto out; >>>>> err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6 >>>>> +766,12 @@ static int devlink_port_function_validate(struct >>>>> +devlink_port >>>> *devlink_port, >>>>> } >>>>> } >>>>> } >>>>> + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && >>>>> + !ops->port_fn_max_io_eqs_set) { >>>>> + NL_SET_ERR_MSG_ATTR(extack, >>>> tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], >>>>> + "Function does not support max_io_eqs >>>> setting"); >>>>> + return -EOPNOTSUPP; >>>>> + } >>>>> return 0; >>>>> } >>>>> >>>>> @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct >>>> devlink_port *port, >>>>> return err; >>>>> } >>>>> >>>>> + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; >>>>> + if (attr) { >>>>> + err = devlink_port_fn_max_io_eqs_set(port, attr, extack); >>>>> + if (err) >>>>> + return err; >>>>> + } >>>>> + >>>>> /* Keep this as the last function attribute set, so that when >>>>> * multiple port function attributes are set along with state, >>>>> * Those can be applied first before activating the state.