Set basic caps that are required to initialize the device properly. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- providers/mlx5/mlx5_ifc.h | 87 ++++++++++++++++++++- providers/mlx5/mlx5_vfio.c | 185 ++++++++++++++++++++++++++++++++++++++++++++- providers/mlx5/mlx5_vfio.h | 21 +++++ 3 files changed, 290 insertions(+), 3 deletions(-) diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h index ac741cd..082ac1f 100644 --- a/providers/mlx5/mlx5_ifc.h +++ b/providers/mlx5/mlx5_ifc.h @@ -36,6 +36,7 @@ #define u8 uint8_t enum mlx5_cap_mode { + HCA_CAP_OPMOD_GET_MAX = 0, HCA_CAP_OPMOD_GET_CUR = 1, }; @@ -46,6 +47,7 @@ enum { MLX5_CMD_OP_ENABLE_HCA = 0x104, MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, + MLX5_CMD_OP_SET_HCA_CAP = 0x109, MLX5_CMD_OP_QUERY_ISSI = 0x10a, MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_CREATE_MKEY = 0x200, @@ -61,6 +63,7 @@ enum { MLX5_CMD_OP_QUERY_DCT = 0x713, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_QUERY_LAG = 0x842, @@ -110,6 +113,11 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, }; +enum { + MLX5_CAP_PORT_TYPE_IB = 0x0, + MLX5_CAP_PORT_TYPE_ETH = 0x1, +}; + struct mlx5_ifc_atomic_caps_bits { u8 reserved_at_0[0x40]; @@ -140,7 +148,8 @@ struct mlx5_ifc_atomic_caps_bits { }; struct mlx5_ifc_roce_cap_bits { - u8 reserved_0[0x5]; + u8 reserved_0[0x4]; + u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; u8 reserved_at_7[0x17]; @@ -912,7 +921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uar_4k[0x1]; u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; - u8 reserved_at_250[0x3]; + u8 reserved_at_250[0x2]; + u8 umem_uid_0[0x1]; u8 log_max_dc_cnak_qps[0x5]; u8 log_pg_sz[0x8]; @@ -1339,8 +1349,11 @@ struct mlx5_ifc_query_hca_cap_in_bits { }; enum mlx5_cap_type { + MLX5_CAP_GENERAL = 0, MLX5_CAP_ODP = 2, MLX5_CAP_ATOMIC = 3, + MLX5_CAP_ROCE, + MLX5_CAP_NUM, }; enum { @@ -4346,4 +4359,74 @@ struct mlx5_ifc_access_register_in_bits { u8 register_data[][0x20]; }; +struct mlx5_ifc_modify_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_nic_vport_field_select_bits { + u8 reserved_at_0[0x12]; + u8 affiliation[0x1]; + u8 reserved_at_13[0x1]; + u8 disable_uc_local_lb[0x1]; + u8 disable_mc_local_lb[0x1]; + u8 node_guid[0x1]; + u8 port_guid[0x1]; + u8 min_inline[0x1]; + u8 mtu[0x1]; + u8 change_event[0x1]; + u8 promisc[0x1]; + u8 permanent_address[0x1]; + u8 addresses_list[0x1]; + u8 roce_en[0x1]; + u8 reserved_at_1f[0x1]; +}; + +struct mlx5_ifc_modify_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; + + u8 reserved_at_80[0x780]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_set_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_set_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 other_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + #endif /* MLX5_IFC_H */ diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c index 4d12807..bd128c2 100644 --- a/providers/mlx5/mlx5_vfio.c +++ b/providers/mlx5/mlx5_vfio.c @@ -1141,6 +1141,177 @@ out: return err; } +static int mlx5_vfio_get_caps_mode(struct mlx5_vfio_context *ctx, + enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) +{ + uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {}; + int out_sz = DEVX_ST_SZ_BYTES(query_hca_cap_out); + void *out, *hca_caps; + uint16_t opmod = (cap_type << 1) | (cap_mode & 0x01); + int err; + + out = calloc(1, out_sz); + if (!out) { + errno = ENOMEM; + return errno; + } + + DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + DEVX_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, out_sz, 0); + if (err) + goto query_ex; + + hca_caps = DEVX_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(ctx->caps.hca_max[cap_type], hca_caps, + DEVX_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(ctx->caps.hca_cur[cap_type], hca_caps, + DEVX_UN_SZ_BYTES(hca_cap_union)); + break; + default: + err = EINVAL; + assert(false); + break; + } + +query_ex: + free(out); + return err; +} + +enum mlx5_vport_roce_state { + MLX5_VPORT_ROCE_DISABLED = 0, + MLX5_VPORT_ROCE_ENABLED = 1, +}; + +static int mlx5_vfio_nic_vport_update_roce_state(struct mlx5_vfio_context *ctx, + enum mlx5_vport_roce_state state) +{ + uint32_t out[DEVX_ST_SZ_DW(modify_nic_vport_context_out)] = {}; + int inlen = DEVX_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = calloc(1, inlen); + if (!in) { + errno = ENOMEM; + return errno; + } + + DEVX_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); + DEVX_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, + state); + DEVX_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0); + + free(in); + + return err; +} + +static int mlx5_vfio_get_caps(struct mlx5_vfio_context *ctx, enum mlx5_cap_type cap_type) +{ + int ret; + + ret = mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_CUR); + if (ret) + return ret; + + return mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_MAX); +} + +static int handle_hca_cap_roce(struct mlx5_vfio_context *ctx, void *set_ctx, + int ctx_size) +{ + int err; + uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {}; + void *set_hca_cap; + + if (!MLX5_VFIO_CAP_GEN(ctx, roce)) + return 0; + + err = mlx5_vfio_get_caps(ctx, MLX5_CAP_ROCE); + if (err) + return err; + + if (MLX5_VFIO_CAP_ROCE(ctx, sw_r_roce_src_udp_port) || + !MLX5_VFIO_CAP_ROCE_MAX(ctx, sw_r_roce_src_udp_port)) + return 0; + + set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE], + DEVX_ST_SZ_BYTES(roce_cap)); + DEVX_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); + DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); + DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ROCE); + return mlx5_vfio_cmd_exec(ctx, set_ctx, ctx_size, out, sizeof(out), 0); +} + +static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_sz) +{ + struct mlx5_vfio_device *dev = to_mvfio_dev(ctx->vctx.context.device); + int sys_page_shift = ilog32(dev->page_size - 1); + uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {}; + void *set_hca_cap; + int err; + + err = mlx5_vfio_get_caps(ctx, MLX5_CAP_GENERAL); + if (err) + return err; + + set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL], + DEVX_ST_SZ_BYTES(cmd_hca_cap)); + + /* disable cmdif checksum */ + DEVX_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + + if (dev->flags & MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN) + DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1); + + DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12); + + if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name)) + DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + + DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); + DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + + return mlx5_vfio_cmd_exec(ctx, set_ctx, set_sz, out, sizeof(out), 0); +} + +static int set_hca_cap(struct mlx5_vfio_context *ctx) +{ + int set_sz = DEVX_ST_SZ_BYTES(set_hca_cap_in); + void *set_ctx; + int err; + + set_ctx = calloc(1, set_sz); + if (!set_ctx) { + errno = ENOMEM; + return errno; + } + + err = handle_hca_cap(ctx, set_ctx, set_sz); + if (err) + goto out; + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_roce(ctx, set_ctx, set_sz); +out: + free(set_ctx); + return err; +} + static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx) { struct mlx5_reg_host_endianness he_in = {}; @@ -1217,6 +1388,15 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx) if (err) return err; + err = set_hca_cap(ctx); + if (err) + return err; + + if (!MLX5_VFIO_CAP_GEN(ctx, umem_uid_0)) { + errno = EOPNOTSUPP; + return errno; + } + err = mlx5_vfio_satisfy_startup_pages(ctx, 0); if (err) return err; @@ -1225,7 +1405,10 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx) if (err) return err; - return 0; + if (MLX5_VFIO_CAP_GEN(ctx, port_type) == MLX5_CAP_PORT_TYPE_ETH) + err = mlx5_vfio_nic_vport_update_roce_state(ctx, MLX5_VPORT_ROCE_ENABLED); + + return err; } static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx) diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h index 36b1f40..225c1b9 100644 --- a/providers/mlx5/mlx5_vfio.h +++ b/providers/mlx5/mlx5_vfio.h @@ -12,6 +12,7 @@ #include <infiniband/driver.h> #include <util/interval_set.h> +#include "mlx5_ifc.h" #define FW_INIT_WAIT_MS 2 #define FW_PRE_INIT_TIMEOUT_MILI 120000 @@ -43,6 +44,22 @@ struct mlx5_vfio_device { #error Host endianness not defined #endif +/* GET Dev Caps macros */ +#define MLX5_VFIO_CAP_GEN(ctx, cap) \ + DEVX_GET(cmd_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL], cap) + +#define MLX5_VFIO_CAP_GEN_64(mdev, cap) \ + DEVX_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) + +#define MLX5_VFIO_CAP_GEN_MAX(ctx, cap) \ + DEVX_GET(cmd_hca_cap, ctx->caps.hca_max[MLX5_CAP_GENERAL], cap) + +#define MLX5_VFIO_CAP_ROCE(ctx, cap) \ + DEVX_GET(roce_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE], cap) + +#define MLX5_VFIO_CAP_ROCE_MAX(ctx, cap) \ + DEVX_GET(roce_cap, ctx->caps.hca_max[MLX5_CAP_ROCE], cap) + struct mlx5_reg_host_endianness { uint8_t he; uint8_t rsvd[15]; @@ -162,6 +179,10 @@ struct mlx5_vfio_context { size_t bar_map_size; struct mlx5_vfio_cmd cmd; bool have_eq; + struct { + uint32_t hca_cur[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)]; + uint32_t hca_max[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)]; + } caps; }; static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev) -- 1.8.3.1