On Wed, Mar 21, 2018 at 7:39 AM, Leon Romanovsky <leon@xxxxxxxxxx> wrote: > From: Ariel Levkovich <lariel@xxxxxxxxxxxx> > > Adding MEMIC (device memory) allocation/deallocation for supporting > Mellanox devices. > > The device capability to allocate device memory is reported by the > device firmware in the general capabilities structure and the > specific device memory capabilities are queried via a new capabilities > type named device memory capabilities. > > The allocation/deallocation is performed by 2 new firmware commands > which are posted to the device's firwmare. > > mlx5_core exports the allocation/deallocation functions for use by > its clients. > > Signed-off-by: Ariel Levkovich <lariel@xxxxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > --- > drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 110 ++++++++++++++++++++++++ > drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 ++ > drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + > include/linux/mlx5/device.h | 17 ++++ > include/linux/mlx5/driver.h | 8 ++ > include/linux/mlx5/mlx5_ifc.h | 75 +++++++++++++++- > 6 files changed, 216 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c > index 323ffe8bf7e4..ab1eac6b6ae3 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c > @@ -309,3 +309,113 @@ void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) > pas[i] = cpu_to_be64(buf->frags[i].map); > } > EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); > + > +int mlx5_core_alloc_memic(struct mlx5_core_dev *dev, phys_addr_t *addr, > + u64 length, u32 alignment) > +{ > + u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) > + >> PAGE_SHIFT; > + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); > + u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment); > + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); > + u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {}; > + u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {}; > + struct mlx5_priv *priv = &dev->priv; > + u32 mlx5_alignment; > + u64 page_idx = 0; > + int ret = 0; > + > + mlx5_core_dbg(dev, "alloc_memic req: length=0x%llx log_alignment=%d\n", > + length, alignment); > + > + if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK)) > + return -EINVAL; > + > + /* mlx5 device sets alignment as 64*2^driver_value > + * so normalizing is needed. > + */ > + mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 : > + alignment - MLX5_MEMIC_BASE_ALIGN; > + if (mlx5_alignment > max_alignment) > + return -EINVAL; > + > + MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC); > + MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE); > + MLX5_SET(alloc_memic_in, in, memic_size, length); > + MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment, > + mlx5_alignment); > + > + do { > + spin_lock(&dev->priv.memic_lock); > + page_idx = bitmap_find_next_zero_area(priv->memic_alloc_pages, > + num_memic_hw_pages, > + page_idx, > + num_pages, 0); > + > + if (page_idx + num_pages <= num_memic_hw_pages) > + bitmap_set(dev->priv.memic_alloc_pages, > + page_idx, num_pages); > + else > + ret = -ENOMEM; > + > + spin_unlock(&dev->priv.memic_lock); > + > + if (ret) > + return ret; > + > + MLX5_SET64(alloc_memic_in, in, range_start_addr, > + hw_start_addr + (page_idx * PAGE_SIZE)); > + > + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); > + if (ret) { > + spin_lock(&dev->priv.memic_lock); > + bitmap_clear(dev->priv.memic_alloc_pages, > + page_idx, num_pages); > + spin_unlock(&dev->priv.memic_lock); > + > + if (ret == -EAGAIN) { > + page_idx++; > + continue; > + } > + > + return ret; > + } > + > + *addr = pci_resource_start(dev->pdev, 0) + > + MLX5_GET64(alloc_memic_out, out, memic_start_addr); > + > + return ret; > + } while (page_idx < num_memic_hw_pages); > + > + return ret; > +} > +EXPORT_SYMBOL(mlx5_core_alloc_memic); > + > +int mlx5_core_dealloc_memic(struct mlx5_core_dev *dev, u64 addr, u64 length) > +{ > + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); > + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); > + u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; > + u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; > + u64 start_page_idx; > + int err; > + > + addr -= pci_resource_start(dev->pdev, 0); > + start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT; > + > + MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC); > + MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); > + MLX5_SET(dealloc_memic_in, in, memic_size, length); > + > + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); > + > + if (!err) { > + spin_lock(&dev->priv.memic_lock); > + bitmap_clear(dev->priv.memic_alloc_pages, > + start_page_idx, num_pages); > + spin_unlock(&dev->priv.memic_lock); > + } > + > + return err; > +} > +EXPORT_SYMBOL(mlx5_core_dealloc_memic); No! this doesn't belong to drivers/net/ethernet/mellanox/mlx5/core/alloc.c if you look carefully at the file you woun't find any FW command in here and this is done intentionally this file is only for io/dma buffer allocations/management, no more .. Please re-spin and let me review any change drivers/net/ethernet/mellanox/mlx5/core/* before upstreaming. > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c > index 9d11e92fb541..17ec55874714 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c > @@ -192,6 +192,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) > if (MLX5_CAP_GEN(dev, qcam_reg)) > mlx5_get_qcam_reg(dev); > > + if (MLX5_CAP_GEN(dev, device_memory)) { > + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM); > + if (err) > + return err; > + } > + > return 0; > } > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c > index 08c33657677c..b005f2166813 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c > @@ -1370,6 +1370,7 @@ static int init_one(struct pci_dev *pdev, > > INIT_LIST_HEAD(&priv->ctx_list); > spin_lock_init(&priv->ctx_lock); > + spin_lock_init(&priv->memic_lock); > mutex_init(&dev->pci_status_mutex); > mutex_init(&dev->intf_state_mutex); > > diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h > index 413df3c11a46..6a58a1f54e46 100644 > --- a/include/linux/mlx5/device.h > +++ b/include/linux/mlx5/device.h > @@ -161,6 +161,11 @@ __mlx5_mask16(typ, fld)) > tmp; \ > }) > > +enum { > + MLX5_MAX_MEMIC_PAGES = 0x100, > + MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f, > +}; > + > enum mlx5_inline_modes { > MLX5_INLINE_MODE_NONE, > MLX5_INLINE_MODE_L2, > @@ -1013,6 +1018,7 @@ enum mlx5_cap_type { > MLX5_CAP_RESERVED, > MLX5_CAP_VECTOR_CALC, > MLX5_CAP_QOS, > + MLX5_CAP_DEV_MEM, > /* NUM OF CAP Types */ > MLX5_CAP_NUM > }; > @@ -1161,6 +1167,12 @@ enum mlx5_qcam_feature_groups { > #define MLX5_CAP64_FPGA(mdev, cap) \ > MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) > > +#define MLX5_CAP_DEV_MEM(mdev, cap)\ > + MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) > + > +#define MLX5_CAP64_DEV_MEM(mdev, cap)\ > + MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) > + > enum { > MLX5_CMD_STAT_OK = 0x0, > MLX5_CMD_STAT_INT_ERR = 0x1, > @@ -1211,4 +1223,9 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) > MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ > MLX5_BY_PASS_NUM_MULTICAST_PRIOS) > > +enum { > + MLX5_MEMIC_BASE_ALIGN = 6, > + MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, > +}; > + > #endif /* MLX5_DEVICE_H */ > diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h > index 767d193c269a..07293cf309ec 100644 > --- a/include/linux/mlx5/driver.h > +++ b/include/linux/mlx5/driver.h > @@ -46,6 +46,7 @@ > #include <linux/mempool.h> > #include <linux/interrupt.h> > #include <linux/idr.h> > +#include <linux/bitmap.h> > > #include <linux/mlx5/device.h> > #include <linux/mlx5/doorbell.h> > @@ -680,6 +681,10 @@ struct mlx5_priv { > spinlock_t mkey_lock; > u8 mkey_key; > > + /* memic page allocation bitmap */ > + spinlock_t memic_lock; > + DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); > + > struct list_head dev_list; > struct list_head ctx_list; > spinlock_t ctx_lock; > @@ -1059,6 +1064,9 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, > u32 *out, int outlen); > int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, > u32 *mkey); > +int mlx5_core_alloc_memic(struct mlx5_core_dev *dev, phys_addr_t *address, > + u64 length, u32 alignment); > +int mlx5_core_dealloc_memic(struct mlx5_core_dev *dev, u64 addr, u64 length); > int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); > int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); > int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, > diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h > index c63bbdc35503..34a931f72d7c 100644 > --- a/include/linux/mlx5/mlx5_ifc.h > +++ b/include/linux/mlx5/mlx5_ifc.h > @@ -92,6 +92,8 @@ enum { > MLX5_CMD_OP_DESTROY_MKEY = 0x202, > MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, > MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, > + MLX5_CMD_OP_ALLOC_MEMIC = 0x205, > + MLX5_CMD_OP_DEALLOC_MEMIC = 0x206, > MLX5_CMD_OP_CREATE_EQ = 0x301, > MLX5_CMD_OP_DESTROY_EQ = 0x302, > MLX5_CMD_OP_QUERY_EQ = 0x303, > @@ -658,6 +660,24 @@ struct mlx5_ifc_roce_cap_bits { > u8 reserved_at_100[0x700]; > }; > > +struct mlx5_ifc_device_mem_cap_bits { > + u8 memic[0x1]; > + u8 reserved_at_1[0x1f]; > + > + u8 reserved_at_20[0xb]; > + u8 log_min_memic_alloc_size[0x5]; > + u8 reserved_at_30[0x8]; > + u8 log_max_memic_addr_alignment[0x8]; > + > + u8 memic_bar_start_addr[0x40]; > + > + u8 memic_bar_size[0x20]; > + > + u8 max_memic_size[0x20]; > + > + u8 reserved_at_c0[0x740]; > +}; > + > enum { > MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, > MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, > @@ -872,7 +892,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { > u8 ets[0x1]; > u8 nic_flow_table[0x1]; > u8 eswitch_flow_table[0x1]; > - u8 early_vf_enable[0x1]; > + u8 device_memory[0x1]; > u8 mcam_reg[0x1]; > u8 pcam_reg[0x1]; > u8 local_ca_ack_delay[0x5]; > @@ -8864,4 +8884,57 @@ struct mlx5_ifc_destroy_vport_lag_in_bits { > u8 reserved_at_40[0x40]; > }; > > +struct mlx5_ifc_alloc_memic_in_bits { > + u8 opcode[0x10]; > + u8 reserved_at_10[0x10]; > + > + u8 reserved_at_20[0x10]; > + u8 op_mod[0x10]; > + > + u8 reserved_at_30[0x20]; > + > + u8 reserved_at_40[0x18]; > + u8 log_memic_addr_alignment[0x8]; > + > + u8 range_start_addr[0x40]; > + > + u8 range_size[0x20]; > + > + u8 memic_size[0x20]; > +}; > + > +struct mlx5_ifc_alloc_memic_out_bits { > + u8 status[0x8]; > + u8 reserved_at_8[0x18]; > + > + u8 syndrome[0x20]; > + > + u8 memic_start_addr[0x40]; > +}; > + > +struct mlx5_ifc_dealloc_memic_in_bits { > + u8 opcode[0x10]; > + u8 reserved_at_10[0x10]; > + > + u8 reserved_at_20[0x10]; > + u8 op_mod[0x10]; > + > + u8 reserved_at_40[0x40]; > + > + u8 memic_start_addr[0x40]; > + > + u8 memic_size[0x20]; > + > + u8 reserved_at_e0[0x20]; > +}; > + > +struct mlx5_ifc_dealloc_memic_out_bits { > + u8 status[0x8]; > + u8 reserved_at_8[0x18]; > + > + u8 syndrome[0x20]; > + > + u8 reserved_at_40[0x40]; > +}; > + > #endif /* MLX5_IFC_H */ > -- > 2.14.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html