Re: [PATCH for-next v3] RDMA/efa: CQ notifications

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/10/2021 2:31, Leon Romanovsky wrote:
> On Thu, Sep 30, 2021 at 03:16:00PM +0300, Gal Pressman wrote:
>> This patch adds support for CQ notifications through the standard verbs
>> api.
>>
>> In order to achieve that, a new event queue (EQ) object is introduced,
>> which is in charge of reporting completion events to the driver.
>> On driver load, EQs are allocated and their affinity is set to a single
>> cpu. When a user app creates a CQ with a completion channel, the
>> completion vector number is converted to a EQ number, which is in charge
>> of reporting the CQ events.
>>
>> In addition, the CQ creation admin command now returns an offset for the
>> CQ doorbell, which is mapped to the userspace provider and is used to
>> arm the CQ when requested by the user.
>>
>> The EQs use a single doorbell (located on the registers BAR), which
>> encodes the EQ number and arm as part of the doorbell value.
>> The EQs are polled by the driver on each new EQE, and arm it when the
>> poll is completed.
>>
>> Reviewed-by: Firas JahJah <firasj@xxxxxxxxxx>
>> Reviewed-by: Yossi Leybovich <sleybo@xxxxxxxxxx>
>> Signed-off-by: Gal Pressman <galpress@xxxxxxxxxx>
>> ---
>> PR was sent:
>> https://github.com/linux-rdma/rdma-core/pull/1044
>>
>> Changelog -
>> v2->v3: https://lore.kernel.org/linux-rdma/20210913120406.61745-1-galpress@xxxxxxxxxx/
>> * Only store CQs with interrupts enabled in the CQs xarray
>> * Add a comment before the xa_load to explain why it is safe
>>
>> v1->v2: https://lore.kernel.org/linux-rdma/20210811151131.39138-1-galpress@xxxxxxxxxx/
>> * Replace xa_init_flags() with xa_init()
>> * Add a synchronize_irq() in destroy_cq flow to prevent a race with
>>   interrupt flow.
>> ---
>>  drivers/infiniband/hw/efa/efa.h               |  19 +-
>>  .../infiniband/hw/efa/efa_admin_cmds_defs.h   | 100 +++++++++-
>>  drivers/infiniband/hw/efa/efa_admin_defs.h    |  41 ++++
>>  drivers/infiniband/hw/efa/efa_com.c           | 171 ++++++++++++++++
>>  drivers/infiniband/hw/efa/efa_com.h           |  38 +++-
>>  drivers/infiniband/hw/efa/efa_com_cmd.c       |  35 +++-
>>  drivers/infiniband/hw/efa/efa_com_cmd.h       |  10 +-
>>  drivers/infiniband/hw/efa/efa_main.c          | 185 +++++++++++++++---
>>  drivers/infiniband/hw/efa/efa_regs_defs.h     |   7 +-
>>  drivers/infiniband/hw/efa/efa_verbs.c         |  67 ++++++-
>>  include/uapi/rdma/efa-abi.h                   |  18 +-
>>  11 files changed, 636 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
>> index 87b1dadeb7fe..587d4bfbb3d1 100644
>> --- a/drivers/infiniband/hw/efa/efa.h
>> +++ b/drivers/infiniband/hw/efa/efa.h
>> @@ -20,14 +20,14 @@
>>  
>>  #define EFA_IRQNAME_SIZE        40
>>  
>> -/* 1 for AENQ + ADMIN */
>> -#define EFA_NUM_MSIX_VEC                  1
>>  #define EFA_MGMNT_MSIX_VEC_IDX            0
>> +#define EFA_COMP_EQS_VEC_BASE             1
>>  
>>  struct efa_irq {
>>  	irq_handler_t handler;
>>  	void *data;
>>  	u32 irqn;
>> +	u32 vector;
>>  	cpumask_t affinity_hint_mask;
>>  	char name[EFA_IRQNAME_SIZE];
>>  };
>> @@ -61,6 +61,13 @@ struct efa_dev {
>>  	struct efa_irq admin_irq;
>>  
>>  	struct efa_stats stats;
>> +
>> +	/* Array of completion EQs */
>> +	struct efa_eq *eqs;
>> +	unsigned int neqs;
>> +
>> +	/* Only stores CQs with interrupts enabled */
>> +	struct xarray cqs_xa;
>>  };
>>  
>>  struct efa_ucontext {
>> @@ -84,8 +91,11 @@ struct efa_cq {
>>  	dma_addr_t dma_addr;
>>  	void *cpu_addr;
>>  	struct rdma_user_mmap_entry *mmap_entry;
>> +	struct rdma_user_mmap_entry *db_mmap_entry;
>>  	size_t size;
>>  	u16 cq_idx;
>> +	/* NULL  when no interrupts requested */
>> +	struct efa_eq *eq;
>>  };
>>  
>>  struct efa_qp {
>> @@ -116,6 +126,11 @@ struct efa_ah {
>>  	u8 id[EFA_GID_SIZE];
>>  };
>>  
>> +struct efa_eq {
>> +	struct efa_com_eq eeq;
>> +	struct efa_irq irq;
>> +};
>> +
>>  int efa_query_device(struct ib_device *ibdev,
>>  		     struct ib_device_attr *props,
>>  		     struct ib_udata *udata);
>> diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
>> index fa38b34eddb8..0b0b93b529f3 100644
>> --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
>> +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
>> @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode {
>>  	EFA_ADMIN_DEALLOC_PD                        = 15,
>>  	EFA_ADMIN_ALLOC_UAR                         = 16,
>>  	EFA_ADMIN_DEALLOC_UAR                       = 17,
>> -	EFA_ADMIN_MAX_OPCODE                        = 17,
>> +	EFA_ADMIN_CREATE_EQ                         = 18,
>> +	EFA_ADMIN_DESTROY_EQ                        = 19,
>> +	EFA_ADMIN_MAX_OPCODE                        = 19,
>>  };
>>  
>>  enum efa_admin_aq_feature_id {
>> @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id {
>>  	EFA_ADMIN_QUEUE_ATTR                        = 4,
>>  	EFA_ADMIN_HW_HINTS                          = 5,
>>  	EFA_ADMIN_HOST_INFO                         = 6,
>> +	EFA_ADMIN_EVENT_QUEUE_ATTR                  = 7,
>>  };
>>  
>>  /* QP transport type */
>> @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd {
>>  	/*
>>  	 * 4:0 : reserved5 - MBZ
>>  	 * 5 : interrupt_mode_enabled - if set, cq operates
>> -	 *    in interrupt mode (i.e. CQ events and MSI-X are
>> -	 *    generated), otherwise - polling
>> +	 *    in interrupt mode (i.e. CQ events and EQ elements
>> +	 *    are generated), otherwise - polling
>>  	 * 6 : virt - If set, ring base address is virtual
>>  	 *    (IOVA returned by MR registration)
>>  	 * 7 : reserved6 - MBZ
>> @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd {
>>  	/* completion queue depth in # of entries. must be power of 2 */
>>  	u16 cq_depth;
>>  
>> -	/* msix vector assigned to this cq */
>> -	u32 msix_vector_idx;
>> +	/* EQ number assigned to this cq */
>> +	u16 eqn;
>> +
>> +	/* MBZ */
>> +	u16 reserved;
>>  
>>  	/*
>>  	 * CQ ring base address, virtual or physical depending on 'virt'
>> @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp {
>>  
>>  	/* actual cq depth in number of entries */
>>  	u16 cq_actual_depth;
>> +
>> +	/* CQ doorbell address, as offset to PCIe DB BAR */
>> +	u32 db_offset;
>> +
>> +	/*
>> +	 * 0 : db_valid - If set, doorbell offset is valid.
>> +	 *    Always set when interrupts are requested.
>> +	 */
>> +	u32 flags;
>>  };
>>  
>>  struct efa_admin_destroy_cq_cmd {
>> @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc {
>>  	u16 max_tx_batch;
>>  };
>>  
>> +struct efa_admin_event_queue_attr_desc {
>> +	/* The maximum number of event queues supported */
>> +	u32 max_eq;
>> +
>> +	/* Maximum number of EQEs per Event Queue */
>> +	u32 max_eq_depth;
>> +
>> +	/* Supported events bitmask */
>> +	u32 event_bitmask;
>> +};
>> +
>>  struct efa_admin_feature_aenq_desc {
>>  	/* bitmask for AENQ groups the device can report */
>>  	u32 supported_groups;
>> @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp {
>>  
>>  		struct efa_admin_feature_queue_attr_desc queue_attr;
>>  
>> +		struct efa_admin_event_queue_attr_desc event_queue_attr;
>> +
>>  		struct efa_admin_hw_hints hw_hints;
>>  	} u;
>>  };
>> @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp {
>>  	struct efa_admin_acq_common_desc acq_common_desc;
>>  };
>>  
>> +struct efa_admin_create_eq_cmd {
>> +	struct efa_admin_aq_common_desc aq_common_descriptor;
>> +
>> +	/* Size of the EQ in entries, must be power of 2 */
>> +	u16 depth;
>> +
>> +	/* MSI-X table entry index */
>> +	u8 msix_vec;
>> +
>> +	/*
>> +	 * 4:0 : entry_size_words - size of EQ entry in
>> +	 *    32-bit words
>> +	 * 7:5 : reserved - MBZ
>> +	 */
>> +	u8 caps;
>> +
>> +	/* EQ ring base address */
>> +	struct efa_common_mem_addr ba;
>> +
>> +	/*
>> +	 * Enabled events on this EQ
>> +	 * 0 : completion_events - Enable completion events
>> +	 * 31:1 : reserved - MBZ
>> +	 */
>> +	u32 event_bitmask;
>> +
>> +	/* MBZ */
>> +	u32 reserved;
>> +};
>> +
>> +struct efa_admin_create_eq_resp {
>> +	struct efa_admin_acq_common_desc acq_common_desc;
>> +
>> +	/* EQ number */
>> +	u16 eqn;
>> +
>> +	/* MBZ */
>> +	u16 reserved;
>> +};
>> +
>> +struct efa_admin_destroy_eq_cmd {
>> +	struct efa_admin_aq_common_desc aq_common_descriptor;
>> +
>> +	/* EQ number */
>> +	u16 eqn;
>> +
>> +	/* MBZ */
>> +	u16 reserved;
>> +};
>> +
>> +struct efa_admin_destroy_eq_resp {
>> +	struct efa_admin_acq_common_desc acq_common_desc;
>> +};
>> +
>>  /* asynchronous event notification groups */
>>  enum efa_admin_aenq_group {
>>  	EFA_ADMIN_FATAL_ERROR                       = 1,
>> @@ -899,10 +981,18 @@ struct efa_admin_host_info {
>>  #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK                   BIT(6)
>>  #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK    GENMASK(4, 0)
>>  
>> +/* create_cq_resp */
>> +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK              BIT(0)
>> +
>>  /* feature_device_attr_desc */
>>  #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK   BIT(0)
>>  #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK   BIT(1)
>>  
>> +/* create_eq_cmd */
>> +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK       GENMASK(4, 0)
>> +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK                   BIT(6)
>> +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK      BIT(0)
>> +
>>  /* host_info */
>>  #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK         GENMASK(7, 0)
>>  #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK           GENMASK(15, 8)
>> diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
>> index 78ff9389ae25..83f20c38a840 100644
>> --- a/drivers/infiniband/hw/efa/efa_admin_defs.h
>> +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
>> @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry {
>>  	u32 inline_data_w4[12];
>>  };
>>  
>> +enum efa_admin_eqe_event_type {
>> +	EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION         = 0,
>> +};
>> +
>> +/* Completion event */
>> +struct efa_admin_comp_event {
>> +	/* CQ number */
>> +	u16 cqn;
>> +
>> +	/* MBZ */
>> +	u16 reserved;
>> +
>> +	/* MBZ */
>> +	u32 reserved2;
>> +};
>> +
>> +/* Event Queue Element */
>> +struct efa_admin_eqe {
>> +	/*
>> +	 * 0 : phase
>> +	 * 8:1 : event_type - Event type
>> +	 * 31:9 : reserved - MBZ
>> +	 */
>> +	u32 common;
>> +
>> +	/* MBZ */
>> +	u32 reserved;
>> +
>> +	union {
>> +		/* Event data */
>> +		u32 event_data[2];
>> +
>> +		/* Completion Event */
>> +		struct efa_admin_comp_event comp_event;
>> +	} u;
>> +};
>> +
>>  /* aq_common_desc */
>>  #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK            GENMASK(11, 0)
>>  #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK                 BIT(0)
>> @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry {
>>  /* aenq_common_desc */
>>  #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK               BIT(0)
>>  
>> +/* eqe */
>> +#define EFA_ADMIN_EQE_PHASE_MASK                            BIT(0)
>> +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK                       GENMASK(8, 1)
>> +
>>  #endif /* _EFA_ADMIN_H_ */
>> diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
>> index 0d523ad736c7..c00c7f526067 100644
>> --- a/drivers/infiniband/hw/efa/efa_com.c
>> +++ b/drivers/infiniband/hw/efa/efa_com.c
>> @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd)
>>  	EFA_CMD_STR_CASE(DEALLOC_PD);
>>  	EFA_CMD_STR_CASE(ALLOC_UAR);
>>  	EFA_CMD_STR_CASE(DEALLOC_UAR);
>> +	EFA_CMD_STR_CASE(CREATE_EQ);
>> +	EFA_CMD_STR_CASE(DESTROY_EQ);
>>  	default: return "unknown command opcode";
>>  	}
>>  #undef EFA_CMD_STR_CASE
>>  }
>>  
>> +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
>> +{
>> +	*addr_low = lower_32_bits(addr);
>> +	*addr_high = upper_32_bits(addr);
>> +}
>> +
>>  static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
>>  {
>>  	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
>> @@ -1081,3 +1089,166 @@ int efa_com_dev_reset(struct efa_com_dev *edev,
>>  
>>  	return 0;
>>  }
>> +
>> +static int efa_com_create_eq(struct efa_com_dev *edev,
>> +			     struct efa_com_create_eq_params *params,
>> +			     struct efa_com_create_eq_result *result)
>> +{
>> +	struct efa_com_admin_queue *aq = &edev->aq;
>> +	struct efa_admin_create_eq_resp resp = {};
>> +	struct efa_admin_create_eq_cmd cmd = {};
>> +	int err;
>> +
>> +	cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ;
>> +	EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS,
>> +		params->entry_size_in_bytes / 4);
>> +	cmd.depth = params->depth;
>> +	cmd.event_bitmask = params->event_bitmask;
>> +	cmd.msix_vec = params->msix_vec;
>> +
>> +	efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high,
>> +			     &cmd.ba.mem_addr_low);
>> +
>> +	err = efa_com_cmd_exec(aq,
>> +			       (struct efa_admin_aq_entry *)&cmd,
>> +			       sizeof(cmd),
>> +			       (struct efa_admin_acq_entry *)&resp,
>> +			       sizeof(resp));
>> +	if (err) {
>> +		ibdev_err_ratelimited(edev->efa_dev,
>> +				      "Failed to create eq[%d]\n", err);
>> +		return err;
>> +	}
>> +
>> +	result->eqn = resp.eqn;
>> +
>> +	return 0;
>> +}
>> +
>> +static int efa_com_destroy_eq(struct efa_com_dev *edev,
>> +			      struct efa_com_destroy_eq_params *params)
>> +{
> 
> Single caller of this function is not interested in return value from
> this function. It is worth to make it void from the beginning.

Thanks, will change.



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux