On 2024-04-04 11:05, Parav Pandit wrote: > > >> From: David Wei <dw@xxxxxxxxxxx> >> Sent: Thursday, April 4, 2024 10:27 PM >> >> On 2024-04-03 10:41, Parav Pandit wrote: >>> Implement get and set for the maximum IO event queues for SF and VF. >>> This enables administrator on the hypervisor to control the maximum IO >>> event queues which are typically used to derive the maximum and >>> default number of net device channels or rdma device completion vectors. >>> >>> Reviewed-by: Shay Drory <shayd@xxxxxxxxxx> >>> Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> >>> --- >>> changelog: >>> v2->v3: >>> - limited to 80 chars per line in devlink >>> - fixed comments from Jakub in mlx5 driver to fix missing mutex unlock >>> on error path >>> v1->v2: >>> - fixed comments from Kalesh >>> - fixed missing kfree in get call >>> - returning error code for get cmd failure >>> - fixed error msg copy paste error in set on cmd failure >>> - limited code to 80 chars limit >>> - fixed set function variables for reverse christmas tree >>> --- >>> .../mellanox/mlx5/core/esw/devlink_port.c | 4 + >>> .../net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++ >>> .../mellanox/mlx5/core/eswitch_offloads.c | 97 +++++++++++++++++++ >>> 3 files changed, 108 insertions(+) >>> >>> diff --git >>> a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c >>> b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c >>> index d8e739cbcbce..f8869c9b6802 100644 >>> --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c >>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c >>> @@ -98,6 +98,8 @@ static const struct devlink_port_ops >> mlx5_esw_pf_vf_dl_port_ops = { >>> .port_fn_ipsec_packet_get = >> mlx5_devlink_port_fn_ipsec_packet_get, >>> .port_fn_ipsec_packet_set = >> mlx5_devlink_port_fn_ipsec_packet_set, >>> #endif /* CONFIG_XFRM_OFFLOAD */ >>> + .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get, >>> + .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set, >>> }; >>> >>> static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct >>> mlx5_eswitch *esw, @@ -143,6 +145,8 @@ static const struct >> devlink_port_ops mlx5_esw_dl_sf_port_ops = { >>> .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, >>> .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, >>> #endif >>> + .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get, >>> + .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set, >>> }; >>> >>> int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, >>> struct mlx5_vport *vport) diff --git >>> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h >>> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h >>> index 349e28a6dd8d..50ce1ea20dd4 100644 >>> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h >>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h >>> @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct >>> devlink_port *port, bool *is_en int >> mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool >> enable, >>> struct netlink_ext_ack *extack); >> #endif /* >>> CONFIG_XFRM_OFFLOAD */ >>> +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, >>> + u32 *max_io_eqs, >>> + struct netlink_ext_ack *extack); int >>> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, >>> + u32 max_io_eqs, >>> + struct netlink_ext_ack *extack); >>> + >>> void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 >>> rep_type); >>> >>> int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, diff >>> --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c >>> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c >>> index baaae628b0a0..2ad50634b401 100644 >>> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c >>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c >>> @@ -66,6 +66,8 @@ >>> >>> #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) >>> >>> +#define MLX5_ESW_MAX_CTRL_EQS 4 >>> + >>> static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { >>> .max_fte = MLX5_ESW_VPORT_TBL_SIZE, >>> .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@ - >> 4557,3 +4559,98 >>> @@ int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, >>> return err; >>> } >>> #endif /* CONFIG_XFRM_OFFLOAD */ >>> + >>> +int >>> +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 >> *max_io_eqs, >>> + struct netlink_ext_ack *extack) { >>> + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); >>> + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); >>> + u16 vport_num = vport->vport; >>> + struct mlx5_eswitch *esw; >>> + void *query_ctx; >>> + void *hca_caps; >>> + u32 max_eqs; >>> + int err; >>> + >>> + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); >>> + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { >>> + NL_SET_ERR_MSG_MOD(extack, >>> + "Device doesn't support VHCA >> management"); >>> + return -EOPNOTSUPP; >>> + } >>> + >>> + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); >>> + if (!query_ctx) >>> + return -ENOMEM; >>> + >>> + mutex_lock(&esw->state_lock); >>> + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, >> query_ctx, >>> + MLX5_CAP_GENERAL); >>> + if (err) { >>> + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); >>> + goto out; >>> + } >>> + >>> + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, >> capability); >>> + max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs); >>> + if (max_eqs < MLX5_ESW_MAX_CTRL_EQS) >>> + *max_io_eqs = 0; >>> + else >>> + *max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS; >>> +out: >>> + mutex_unlock(&esw->state_lock); >>> + kfree(query_ctx); >>> + return err; >>> +} >>> + >>> +int >>> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 >> max_io_eqs, >>> + struct netlink_ext_ack *extack) { >>> + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); >>> + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); >>> + u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS; >>> + u16 vport_num = vport->vport; >>> + struct mlx5_eswitch *esw; >>> + void *query_ctx; >>> + void *hca_caps; >>> + int err; >>> + >>> + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); >>> + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { >>> + NL_SET_ERR_MSG_MOD(extack, >>> + "Device doesn't support VHCA >> management"); >>> + return -EOPNOTSUPP; >>> + } >>> + >>> + if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) { >> >> nit: this is the same as max_eqs >> > Yes. > max_eqs may overflow and we may miss the overflow check by reusing max_eqs. > Above if condition avoids that bug. Thanks, I missed that max_io_eqs is u32. > That reminds me that I can replace above max_eqs line and this with check_add_overflow() and store the result in max_io_eqs. > And save one line. :) > > Shall I resend with this change? Yes that sounds good, thank you. > >>> + NL_SET_ERR_MSG_MOD(extack, "Supplied value out of >> range"); >>> + return -EINVAL; >>> + } >>> + >>> + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); >>> + if (!query_ctx) >>> + return -ENOMEM; >>> + >>> + mutex_lock(&esw->state_lock); >>> + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, >> query_ctx, >>> + MLX5_CAP_GENERAL); >>> + if (err) { >>> + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); >>> + goto out; >>> + } >>> + >>> + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, >> capability); >>> + MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs); >>> + >>> + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, >> vport_num, >>> + >> MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); >>> + if (err) >>> + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps"); >>> + >>> +out: >>> + mutex_unlock(&esw->state_lock); >>> + kfree(query_ctx); >>> + return err; >>> +}