Setup device function support by following the required command sequence and steps based on the device specification. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- providers/mlx5/mlx5_ifc.h | 215 +++++++++++++++++++++++++++++++++++++++ providers/mlx5/mlx5_vfio.c | 246 +++++++++++++++++++++++++++++++++++++++++++++ providers/mlx5/mlx5_vfio.h | 16 +++ providers/mlx5/mlx5dv.h | 4 + 4 files changed, 481 insertions(+) diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h index aef6196..ac741cd 100644 --- a/providers/mlx5/mlx5_ifc.h +++ b/providers/mlx5/mlx5_ifc.h @@ -41,6 +41,13 @@ enum mlx5_cap_mode { enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, + MLX5_CMD_OP_INIT_HCA = 0x102, + MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_ENABLE_HCA = 0x104, + MLX5_CMD_OP_QUERY_PAGES = 0x107, + MLX5_CMD_OP_MANAGE_PAGES = 0x108, + MLX5_CMD_OP_QUERY_ISSI = 0x10a, + MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_CREATE_QP = 0x500, MLX5_CMD_OP_RST2INIT_QP = 0x502, @@ -55,6 +62,7 @@ enum { MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, + MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_QUERY_LAG = 0x842, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_SQ = 0x905, @@ -92,6 +100,16 @@ enum { MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2, +}; + +enum { + MLX5_REG_HOST_ENDIANNESS = 0x7004, +}; + struct mlx5_ifc_atomic_caps_bits { u8 reserved_at_0[0x40]; @@ -4131,4 +4149,201 @@ struct mlx5_ifc_mbox_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_enable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_enable_hca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_query_issi_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x10]; + u8 current_issi[0x10]; + + u8 reserved_at_60[0xa0]; + + u8 reserved_at_100[76][0x8]; + u8 supported_issi_dw0[0x20]; +}; + +struct mlx5_ifc_query_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_set_issi_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_set_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 current_issi[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_query_pages_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 embedded_cpu_function[0x01]; + u8 reserved_bits[0x0f]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; +}; + +struct mlx5_ifc_query_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_manage_pages_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 output_num_entries[0x20]; + + u8 reserved_at_60[0x20]; + + u8 pas[][0x40]; +}; + +struct mlx5_ifc_manage_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 input_num_entries[0x20]; + + u8 pas[][0x40]; +}; + +struct mlx5_ifc_teardown_hca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x3f]; + + u8 state[0x1]; +}; + +enum { + MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, +}; + +struct mlx5_ifc_teardown_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 profile[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_init_hca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_init_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_access_register_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 register_data[][0x20]; +}; + +struct mlx5_ifc_access_register_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 register_id[0x10]; + + u8 argument[0x20]; + + u8 register_data[][0x20]; +}; + #endif /* MLX5_IFC_H */ diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c index 37f06a9..4d12807 100644 --- a/providers/mlx5/mlx5_vfio.c +++ b/providers/mlx5/mlx5_vfio.c @@ -988,6 +988,246 @@ close_cont: return -1; } +static int mlx5_vfio_enable_hca(struct mlx5_vfio_context *ctx) +{ + uint32_t in[DEVX_ST_SZ_DW(enable_hca_in)] = {}; + uint32_t out[DEVX_ST_SZ_DW(enable_hca_out)] = {}; + + DEVX_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0); +} + +static int mlx5_vfio_set_issi(struct mlx5_vfio_context *ctx) +{ + uint32_t query_in[DEVX_ST_SZ_DW(query_issi_in)] = {}; + uint32_t query_out[DEVX_ST_SZ_DW(query_issi_out)] = {}; + uint32_t set_in[DEVX_ST_SZ_DW(set_issi_in)] = {}; + uint32_t set_out[DEVX_ST_SZ_DW(set_issi_out)] = {}; + uint32_t sup_issi; + int err; + + DEVX_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + err = mlx5_vfio_cmd_exec(ctx, query_in, sizeof(query_in), query_out, + sizeof(query_out), 0); + if (err) + return err; + + sup_issi = DEVX_GET(query_issi_out, query_out, supported_issi_dw0); + + if (!(sup_issi & (1 << 1))) { + errno = EOPNOTSUPP; + return errno; + } + + DEVX_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + DEVX_SET(set_issi_in, set_in, current_issi, 1); + return mlx5_vfio_cmd_exec(ctx, set_in, sizeof(set_in), set_out, + sizeof(set_out), 0); +} + +static int mlx5_vfio_give_pages(struct mlx5_vfio_context *ctx, + uint16_t func_id, + int32_t npages) +{ + int32_t out[DEVX_ST_SZ_DW(manage_pages_out)] = {}; + int inlen = DEVX_ST_SZ_BYTES(manage_pages_in); + int i, err; + int32_t *in; + uint64_t iova; + + inlen += npages * DEVX_FLD_SZ_BYTES(manage_pages_in, pas[0]); + in = calloc(1, inlen); + if (!in) { + errno = ENOMEM; + return errno; + } + + for (i = 0; i < npages; i++) { + err = mlx5_vfio_alloc_page(ctx, &iova); + if (err) + goto err; + + DEVX_ARRAY_SET64(manage_pages_in, in, pas, i, iova); + } + + DEVX_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + DEVX_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + DEVX_SET(manage_pages_in, in, function_id, func_id); + DEVX_SET(manage_pages_in, in, input_num_entries, npages); + + err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), + MLX5_MAX_COMMANDS - 1); + if (!err) + goto end; +err: + for (i--; i >= 0; i--) + mlx5_vfio_free_page(ctx, DEVX_GET64(manage_pages_in, in, pas[i])); +end: + free(in); + return err; +} + +static int mlx5_vfio_query_pages(struct mlx5_vfio_context *ctx, int boot, + uint16_t *func_id, int32_t *npages) +{ + uint32_t query_pages_in[DEVX_ST_SZ_DW(query_pages_in)] = {}; + uint32_t query_pages_out[DEVX_ST_SZ_DW(query_pages_out)] = {}; + int ret; + + DEVX_SET(query_pages_in, query_pages_in, opcode, MLX5_CMD_OP_QUERY_PAGES); + DEVX_SET(query_pages_in, query_pages_in, op_mod, boot ? 0x01 : 0x02); + + ret = mlx5_vfio_cmd_exec(ctx, query_pages_in, sizeof(query_pages_in), + query_pages_out, sizeof(query_pages_out), 0); + if (ret) + return ret; + + *npages = DEVX_GET(query_pages_out, query_pages_out, num_pages); + *func_id = DEVX_GET(query_pages_out, query_pages_out, function_id); + + return 0; +} + +static int mlx5_vfio_satisfy_startup_pages(struct mlx5_vfio_context *ctx, + int boot) +{ + uint16_t function_id; + int32_t npages = 0; + int ret; + + ret = mlx5_vfio_query_pages(ctx, boot, &function_id, &npages); + if (ret) + return ret; + + return mlx5_vfio_give_pages(ctx, function_id, npages); +} + +static int mlx5_vfio_access_reg(struct mlx5_vfio_context *ctx, void *data_in, + int size_in, void *data_out, int size_out, + uint16_t reg_id, int arg, int write) +{ + int outlen = DEVX_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = DEVX_ST_SZ_BYTES(access_register_in) + size_in; + int err = ENOMEM; + uint32_t *out = NULL; + uint32_t *in = NULL; + void *data; + + in = calloc(1, inlen); + out = calloc(1, outlen); + if (!in || !out) { + errno = ENOMEM; + goto out; + } + + data = DEVX_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); + + DEVX_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + DEVX_SET(access_register_in, in, op_mod, !write); + DEVX_SET(access_register_in, in, argument, arg); + DEVX_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, outlen, 0); + if (err) + goto out; + + data = DEVX_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); + +out: + free(out); + free(in); + return err; +} + +static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx) +{ + struct mlx5_reg_host_endianness he_in = {}; + struct mlx5_reg_host_endianness he_out = {}; + + he_in.he = MLX5_SET_HOST_ENDIANNESS; + return mlx5_vfio_access_reg(ctx, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); +} + +static int mlx5_vfio_init_hca(struct mlx5_vfio_context *ctx) +{ + uint32_t in[DEVX_ST_SZ_DW(init_hca_in)] = {}; + uint32_t out[DEVX_ST_SZ_DW(init_hca_out)] = {}; + + DEVX_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0); +} + +static int fw_initializing(struct mlx5_init_seg *init_seg) +{ + return be32toh(init_seg->initializing) >> 31; +} + +static int wait_fw_init(struct mlx5_init_seg *init_seg, uint32_t max_wait_mili) +{ + int num_loops = max_wait_mili / FW_INIT_WAIT_MS; + int loop = 0; + + while (fw_initializing(init_seg)) { + usleep(FW_INIT_WAIT_MS * 1000); + loop++; + if (loop == num_loops) { + errno = EBUSY; + return errno; + } + } + + return 0; +} + +static int mlx5_vfio_teardown_hca(struct mlx5_vfio_context *ctx) +{ + uint32_t in[DEVX_ST_SZ_DW(teardown_hca_in)] = {}; + uint32_t out[DEVX_ST_SZ_DW(teardown_hca_out)] = {}; + + DEVX_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + DEVX_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE); + return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0); +} + +static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx) +{ + int err; + + err = wait_fw_init(ctx->bar_map, FW_PRE_INIT_TIMEOUT_MILI); + if (err) + return err; + + err = mlx5_vfio_enable_hca(ctx); + if (err) + return err; + + err = mlx5_vfio_set_issi(ctx); + if (err) + return err; + + err = mlx5_vfio_satisfy_startup_pages(ctx, 1); + if (err) + return err; + + err = mlx5_vfio_set_hca_ctrl(ctx); + if (err) + return err; + + err = mlx5_vfio_satisfy_startup_pages(ctx, 0); + if (err) + return err; + + err = mlx5_vfio_init_hca(ctx); + if (err) + return err; + + return 0; +} + static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx) { mlx5_close_debug_file(ctx->dbg_fp); @@ -1000,6 +1240,7 @@ static void mlx5_vfio_free_context(struct ibv_context *ibctx) { struct mlx5_vfio_context *ctx = to_mvfio_ctx(ibctx); + mlx5_vfio_teardown_hca(ctx); mlx5_vfio_clean_cmd_interface(ctx); mlx5_vfio_clean_device_dma(ctx); mlx5_vfio_uninit_bar0(ctx); @@ -1040,9 +1281,14 @@ mlx5_vfio_alloc_context(struct ibv_device *ibdev, if (mlx5_vfio_init_cmd_interface(mctx)) goto err_dma; + if (mlx5_vfio_setup_function(mctx)) + goto clean_cmd; + verbs_set_ops(&mctx->vctx, &mlx5_vfio_common_ops); return &mctx->vctx; +clean_cmd: + mlx5_vfio_clean_cmd_interface(mctx); err_dma: mlx5_vfio_clean_device_dma(mctx); err_bar: diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h index 392ddcb..36b1f40 100644 --- a/providers/mlx5/mlx5_vfio.h +++ b/providers/mlx5/mlx5_vfio.h @@ -13,6 +13,9 @@ #include <infiniband/driver.h> #include <util/interval_set.h> +#define FW_INIT_WAIT_MS 2 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, @@ -32,6 +35,19 @@ struct mlx5_vfio_device { uint32_t flags; }; +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MLX5_SET_HOST_ENDIANNESS 0 +#elif __BYTE_ORDER == __BIG_ENDIAN +#define MLX5_SET_HOST_ENDIANNESS 0x80 +#else +#error Host endianness not defined +#endif + +struct mlx5_reg_host_endianness { + uint8_t he; + uint8_t rsvd[15]; +}; + struct health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h index e657527..6aaea37 100644 --- a/providers/mlx5/mlx5dv.h +++ b/providers/mlx5/mlx5dv.h @@ -1687,6 +1687,10 @@ static inline uint64_t _devx_get64(const void *p, size_t bit_off) #define DEVX_GET64(typ, p, fld) _devx_get64(p, __devx_bit_off(typ, fld)) +#define DEVX_ARRAY_SET64(typ, p, fld, idx, v) do { \ + DEVX_SET64(typ, p, fld[idx], v); \ +} while (0) + struct mlx5dv_dr_domain; struct mlx5dv_dr_table; struct mlx5dv_dr_matcher; -- 1.8.3.1