Re: [PATCH rdma-next 11/13] RDMA/efa: Add EFA verbs implementation

Gal Pressman <galpress@xxxxxxxxxx> · Thu, 6 Dec 2018 10:44:41 +0200

On 05-Dec-18 23:52, Hefty, Sean wrote:
>> +int efa_query_device(struct ib_device *ibdev,
>> +		     struct ib_device_attr *props,
>> +		     struct ib_udata *udata)
>> +{
>> +	struct efa_ibv_ex_query_device_resp resp = {};
>> +	struct efa_com_get_device_attr_result result;
>> +	struct efa_dev *dev = to_edev(ibdev);
>> +	int err;
>> +
>> +	pr_debug("--->\n");
>> +	memset(props, 0, sizeof(*props));
>> +
>> +	if (udata && udata->inlen &&
>> +	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
>> +		pr_err_ratelimited("Incompatible ABI params, udata not
>> cleared\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	err = efa_get_device_attributes(dev, &result);
>> +	if (err) {
>> +		pr_err("failed to get device_attr err[%d]!\n", err);
>> +		return err;
>> +	}
>> +
>> +	props->max_mr_size              = result.max_mr_pages *
>> PAGE_SIZE;
>> +	props->page_size_cap            = result.page_size_cap;
>> +	props->vendor_id                = result.vendor_id;
>> +	props->vendor_part_id           = result.vendor_part_id;
>> +	props->hw_ver                   = dev->pdev->subsystem_device;
>> +	props->max_qp                   = result.max_sq;
>> +	props->device_cap_flags         = IB_DEVICE_PORT_ACTIVE_EVENT |
>> +					  IB_DEVICE_VIRTUAL_FUNCTION |
>> +					  IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
> 
> Does this mean that SRD supports multicast?  Or is this only for UD?

No, will remove.

> 
> 
>> +	props->max_cq                   = result.max_cq;
>> +	props->max_pd                   = result.max_pd;
>> +	props->max_mr                   = result.max_mr;
>> +	props->max_ah                   = result.max_ah;
>> +	props->max_cqe                  = result.max_cq_depth;
>> +	props->max_qp_wr                = min_t(u16,
>> result.max_sq_depth,
>> +						result.max_rq_depth);
>> +	props->max_send_sge             = result.max_sq_sge;
>> +	props->max_recv_sge             = result.max_rq_sge;
>> +
>> +	if (udata && udata->outlen) {
>> +		resp.sub_cqs_per_cq = result.sub_cqs_per_cq;
>> +		resp.max_sq_sge = result.max_sq_sge;
>> +		resp.max_rq_sge = result.max_rq_sge;
>> +		resp.max_sq_wr  = result.max_sq_depth;
>> +		resp.max_rq_wr  = result.max_rq_depth;
>> +		resp.max_inline_data = result.inline_buf_size;
>> +
>> +		err = ib_copy_to_udata(udata, &resp,
>> +				       min(sizeof(resp), udata->outlen));
>> +		if (err) {
>> +			pr_err_ratelimited("failed to copy udata for
>> query_device.\n");
>> +			return err;
>> +		}
>> +	}
>> +
>> +	return err;
>> +}
>> +
>> +int efa_query_port(struct ib_device *ibdev, u8 port,
>> +		   struct ib_port_attr *props)
>> +{
>> +	struct efa_dev *dev = to_edev(ibdev);
>> +
>> +	pr_debug("--->\n");
>> +
>> +	mutex_lock(&dev->efa_dev_lock);
>> +	memset(props, 0, sizeof(*props));
>> +
>> +	props->lid = 0;
>> +	props->lmc = 1;
>> +	props->sm_lid = 0;
>> +	props->sm_sl = 0;
>> +
>> +	props->state = IB_PORT_ACTIVE;
> 
> Is there no way to determine the current port state?

Not currently, will be reflected through an event from the device in the future.

> 
> 
>> +	props->phys_state = 5;
>> +	props->port_cap_flags = 0;
>> +	props->gid_tbl_len = 1;
>> +	props->pkey_tbl_len = 1;
>> +	props->bad_pkey_cntr = 0;
>> +	props->qkey_viol_cntr = 0;
>> +	props->active_speed = IB_SPEED_EDR;
>> +	props->active_width = IB_WIDTH_4X;
>> +	props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
>> +	props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
>> +	props->max_msg_sz = dev->mtu;
>> +	props->max_vl_num = 1;
>> +	mutex_unlock(&dev->efa_dev_lock);
>> +	return 0;
>> +}
>> +
>> +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
>> +		 int qp_attr_mask,
>> +		 struct ib_qp_init_attr *qp_init_attr) {
>> +	struct efa_qp *qp = to_eqp(ibqp);
>> +
>> +	pr_debug("--->\n");
>> +
>> +	memset(qp_attr, 0, sizeof(*qp_attr));
>> +	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
>> +
>> +	qp_attr->qp_state = qp->state;
>> +	qp_attr->cur_qp_state = qp->state;
>> +	qp_attr->port_num = 1;
>> +
>> +	qp_init_attr->qp_type = ibqp->qp_type;
>> +	qp_init_attr->recv_cq = ibqp->recv_cq;
>> +	qp_init_attr->send_cq = ibqp->send_cq;
>> +
>> +	return 0;
>> +}
>> +
>> +int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
>> +		  union ib_gid *gid)
>> +{
>> +	struct efa_dev *dev = to_edev(ibdev);
>> +
>> +	pr_debug("port %d gid index %d\n", port, index);
>> +
>> +	if (index > 1)
>> +		return -EINVAL;
>> +
>> +	mutex_lock(&dev->efa_dev_lock);
>> +	memcpy(gid->raw, dev->addr, sizeof(dev->addr));
>> +	mutex_unlock(&dev->efa_dev_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
>> +		   u16 *pkey)
>> +{
>> +	pr_debug("--->\n");
>> +	if (index > 1)
>> +		return -EINVAL;
>> +
>> +	*pkey = 0xffff;
>> +	return 0;
>> +}
>> +
>> +struct ib_pd *efa_alloc_pd(struct ib_device *ibdev,
>> +			   struct ib_ucontext *ibucontext,
>> +			   struct ib_udata *udata)
>> +{
>> +	struct efa_ibv_alloc_pd_resp resp = {};
>> +	struct efa_dev *dev = to_edev(ibdev);
>> +	struct efa_pd *pd;
>> +	int err;
>> +
>> +	pr_debug("--->\n");
>> +
>> +	if (!ibucontext) {
>> +		pr_err("ibucontext is not valid\n");
>> +		return ERR_PTR(-EOPNOTSUPP);
>> +	}
>> +
>> +	if (udata && udata->inlen &&
>> +	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
>> +		pr_err_ratelimited("Incompatible ABI params, udata not
>> cleared\n");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
>> +	if (!pd) {
>> +		dev->stats.sw_stats.alloc_pd_alloc_err++;
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	pd->pdn = efa_bitmap_alloc(&dev->pd_bitmap);
>> +	if (pd->pdn == EFA_BITMAP_INVAL) {
>> +		pr_err("Failed to alloc PD (max_pd %u)\n", dev-
>>> caps.max_pd);
>> +		dev->stats.sw_stats.alloc_pd_bitmap_full_err++;
>> +		kfree(pd);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	resp.pdn = pd->pdn;
>> +
>> +	if (udata && udata->outlen) {
>> +		err = ib_copy_to_udata(udata, &resp,
>> +				       min(sizeof(resp), udata->outlen));
>> +		if (err) {
>> +			pr_err_ratelimited("failed to copy udata for
>> alloc_pd\n");
>> +			efa_bitmap_free(&dev->pd_bitmap, pd->pdn);
>> +			kfree(pd);
>> +			return ERR_PTR(err);
>> +		}
>> +	}
>> +
>> +	pr_debug("Allocated pd[%d]\n", pd->pdn);
>> +
>> +	return &pd->ibpd;
>> +}
> 
> Is the PD purely a software construct?

The PD number is allocated by the software, it is passed to the device on
resource allocation and enforced by the device.

> 
> 
>> +static struct ib_cq *do_create_cq(struct ib_device *ibdev, int
>> entries,
>> +				  int vector, struct ib_ucontext *ibucontext,
>> +				  struct ib_udata *udata)
>> +{
>> +	struct efa_ibv_create_cq_resp resp = {};
>> +	struct efa_com_create_cq_params params;
>> +	struct efa_com_create_cq_result result;
>> +	struct efa_dev *dev = to_edev(ibdev);
>> +	struct efa_ibv_create_cq cmd = {};
>> +	struct efa_cq *cq;
>> +	int err;
>> +
>> +	pr_debug("entries %d udata %p\n", entries, udata);
>> +
>> +	if (entries < 1 || entries > dev->caps.max_cq_depth) {
>> +		pr_err("cq: requested entries[%u] non-positive or greater
>> than max[%u]\n",
>> +		       entries, dev->caps.max_cq_depth);
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	if (!ibucontext) {
>> +		pr_err("context is not valid ");
>> +		return ERR_PTR(-EOPNOTSUPP);
>> +	}
>> +
>> +	if (!udata || !field_avail(cmd, num_sub_cqs, udata->inlen)) {
>> +		pr_err_ratelimited("Incompatible ABI params, no input
>> udata\n");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	if (udata->inlen > sizeof(cmd) &&
>> +	    !ib_is_udata_cleared(udata, sizeof(cmd),
>> +				 udata->inlen - sizeof(cmd))) {
>> +		pr_err_ratelimited("Incompatible ABI params, unknown
>> fields in udata\n");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	err = ib_copy_from_udata(&cmd, udata,
>> +				 min(sizeof(cmd), udata->inlen));
>> +	if (err) {
>> +		pr_err_ratelimited("%s: cannot copy udata for
>> create_cq\n",
>> +				   dev_name(&dev->ibdev.dev));
>> +		return ERR_PTR(err);
>> +	}
>> +
>> +	if (cmd.comp_mask || !EFA_IS_RESERVED_CLEARED(cmd.reserved_50))
>> {
>> +		pr_err_ratelimited("Incompatible ABI params, unknown
>> fields in udata\n");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	if (!cmd.cq_entry_size) {
>> +		pr_err("invalid entry size [%u]\n", cmd.cq_entry_size);
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	if (cmd.num_sub_cqs != dev->caps.sub_cqs_per_cq) {
>> +		pr_err("invalid number of sub cqs[%u] expected[%u]\n",
>> +		       cmd.num_sub_cqs, dev->caps.sub_cqs_per_cq);
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
>> +	if (!cq) {
>> +		dev->stats.sw_stats.create_cq_alloc_err++;
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	memset(&resp, 0, sizeof(resp));
>> +	cq->ucontext = to_eucontext(ibucontext);
>> +	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries *
>> cmd.num_sub_cqs);
>> +	cq->cpu_addr = dma_zalloc_coherent(&dev->pdev->dev,
>> +					   cq->size, &cq->dma_addr,
>> +					   GFP_KERNEL);
>> +	if (!cq->cpu_addr) {
>> +		dev->stats.sw_stats.create_cq_alloc_err++;
> 
> Is there a reason why this error counter only tracks alloc failures?

No particular reason, will cover more failures.

> 
> - Sean
>