RE: [PATCH rdma-next 11/13] RDMA/efa: Add EFA verbs implementation

"Hefty, Sean" <sean.hefty@xxxxxxxxx> · Wed, 5 Dec 2018 21:52:28 +0000

> +int efa_query_device(struct ib_device *ibdev,
> +		     struct ib_device_attr *props,
> +		     struct ib_udata *udata)
> +{
> +	struct efa_ibv_ex_query_device_resp resp = {};
> +	struct efa_com_get_device_attr_result result;
> +	struct efa_dev *dev = to_edev(ibdev);
> +	int err;
> +
> +	pr_debug("--->\n");
> +	memset(props, 0, sizeof(*props));
> +
> +	if (udata && udata->inlen &&
> +	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
> +		pr_err_ratelimited("Incompatible ABI params, udata not
> cleared\n");
> +		return -EINVAL;
> +	}
> +
> +	err = efa_get_device_attributes(dev, &result);
> +	if (err) {
> +		pr_err("failed to get device_attr err[%d]!\n", err);
> +		return err;
> +	}
> +
> +	props->max_mr_size              = result.max_mr_pages *
> PAGE_SIZE;
> +	props->page_size_cap            = result.page_size_cap;
> +	props->vendor_id                = result.vendor_id;
> +	props->vendor_part_id           = result.vendor_part_id;
> +	props->hw_ver                   = dev->pdev->subsystem_device;
> +	props->max_qp                   = result.max_sq;
> +	props->device_cap_flags         = IB_DEVICE_PORT_ACTIVE_EVENT |
> +					  IB_DEVICE_VIRTUAL_FUNCTION |
> +					  IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;

Does this mean that SRD supports multicast?  Or is this only for UD?

> +	props->max_cq                   = result.max_cq;
> +	props->max_pd                   = result.max_pd;
> +	props->max_mr                   = result.max_mr;
> +	props->max_ah                   = result.max_ah;
> +	props->max_cqe                  = result.max_cq_depth;
> +	props->max_qp_wr                = min_t(u16,
> result.max_sq_depth,
> +						result.max_rq_depth);
> +	props->max_send_sge             = result.max_sq_sge;
> +	props->max_recv_sge             = result.max_rq_sge;
> +
> +	if (udata && udata->outlen) {
> +		resp.sub_cqs_per_cq = result.sub_cqs_per_cq;
> +		resp.max_sq_sge = result.max_sq_sge;
> +		resp.max_rq_sge = result.max_rq_sge;
> +		resp.max_sq_wr  = result.max_sq_depth;
> +		resp.max_rq_wr  = result.max_rq_depth;
> +		resp.max_inline_data = result.inline_buf_size;
> +
> +		err = ib_copy_to_udata(udata, &resp,
> +				       min(sizeof(resp), udata->outlen));
> +		if (err) {
> +			pr_err_ratelimited("failed to copy udata for
> query_device.\n");
> +			return err;
> +		}
> +	}
> +
> +	return err;
> +}
> +
> +int efa_query_port(struct ib_device *ibdev, u8 port,
> +		   struct ib_port_attr *props)
> +{
> +	struct efa_dev *dev = to_edev(ibdev);
> +
> +	pr_debug("--->\n");
> +
> +	mutex_lock(&dev->efa_dev_lock);
> +	memset(props, 0, sizeof(*props));
> +
> +	props->lid = 0;
> +	props->lmc = 1;
> +	props->sm_lid = 0;
> +	props->sm_sl = 0;
> +
> +	props->state = IB_PORT_ACTIVE;

Is there no way to determine the current port state?

> +	props->phys_state = 5;
> +	props->port_cap_flags = 0;
> +	props->gid_tbl_len = 1;
> +	props->pkey_tbl_len = 1;
> +	props->bad_pkey_cntr = 0;
> +	props->qkey_viol_cntr = 0;
> +	props->active_speed = IB_SPEED_EDR;
> +	props->active_width = IB_WIDTH_4X;
> +	props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
> +	props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
> +	props->max_msg_sz = dev->mtu;
> +	props->max_vl_num = 1;
> +	mutex_unlock(&dev->efa_dev_lock);
> +	return 0;
> +}
> +
> +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
> +		 int qp_attr_mask,
> +		 struct ib_qp_init_attr *qp_init_attr) {
> +	struct efa_qp *qp = to_eqp(ibqp);
> +
> +	pr_debug("--->\n");
> +
> +	memset(qp_attr, 0, sizeof(*qp_attr));
> +	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
> +
> +	qp_attr->qp_state = qp->state;
> +	qp_attr->cur_qp_state = qp->state;
> +	qp_attr->port_num = 1;
> +
> +	qp_init_attr->qp_type = ibqp->qp_type;
> +	qp_init_attr->recv_cq = ibqp->recv_cq;
> +	qp_init_attr->send_cq = ibqp->send_cq;
> +
> +	return 0;
> +}
> +
> +int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
> +		  union ib_gid *gid)
> +{
> +	struct efa_dev *dev = to_edev(ibdev);
> +
> +	pr_debug("port %d gid index %d\n", port, index);
> +
> +	if (index > 1)
> +		return -EINVAL;
> +
> +	mutex_lock(&dev->efa_dev_lock);
> +	memcpy(gid->raw, dev->addr, sizeof(dev->addr));
> +	mutex_unlock(&dev->efa_dev_lock);
> +
> +	return 0;
> +}
> +
> +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
> +		   u16 *pkey)
> +{
> +	pr_debug("--->\n");
> +	if (index > 1)
> +		return -EINVAL;
> +
> +	*pkey = 0xffff;
> +	return 0;
> +}
> +
> +struct ib_pd *efa_alloc_pd(struct ib_device *ibdev,
> +			   struct ib_ucontext *ibucontext,
> +			   struct ib_udata *udata)
> +{
> +	struct efa_ibv_alloc_pd_resp resp = {};
> +	struct efa_dev *dev = to_edev(ibdev);
> +	struct efa_pd *pd;
> +	int err;
> +
> +	pr_debug("--->\n");
> +
> +	if (!ibucontext) {
> +		pr_err("ibucontext is not valid\n");
> +		return ERR_PTR(-EOPNOTSUPP);
> +	}
> +
> +	if (udata && udata->inlen &&
> +	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
> +		pr_err_ratelimited("Incompatible ABI params, udata not
> cleared\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
> +	if (!pd) {
> +		dev->stats.sw_stats.alloc_pd_alloc_err++;
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	pd->pdn = efa_bitmap_alloc(&dev->pd_bitmap);
> +	if (pd->pdn == EFA_BITMAP_INVAL) {
> +		pr_err("Failed to alloc PD (max_pd %u)\n", dev-
> >caps.max_pd);
> +		dev->stats.sw_stats.alloc_pd_bitmap_full_err++;
> +		kfree(pd);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	resp.pdn = pd->pdn;
> +
> +	if (udata && udata->outlen) {
> +		err = ib_copy_to_udata(udata, &resp,
> +				       min(sizeof(resp), udata->outlen));
> +		if (err) {
> +			pr_err_ratelimited("failed to copy udata for
> alloc_pd\n");
> +			efa_bitmap_free(&dev->pd_bitmap, pd->pdn);
> +			kfree(pd);
> +			return ERR_PTR(err);
> +		}
> +	}
> +
> +	pr_debug("Allocated pd[%d]\n", pd->pdn);
> +
> +	return &pd->ibpd;
> +}

Is the PD purely a software construct?

> +static struct ib_cq *do_create_cq(struct ib_device *ibdev, int
> entries,
> +				  int vector, struct ib_ucontext *ibucontext,
> +				  struct ib_udata *udata)
> +{
> +	struct efa_ibv_create_cq_resp resp = {};
> +	struct efa_com_create_cq_params params;
> +	struct efa_com_create_cq_result result;
> +	struct efa_dev *dev = to_edev(ibdev);
> +	struct efa_ibv_create_cq cmd = {};
> +	struct efa_cq *cq;
> +	int err;
> +
> +	pr_debug("entries %d udata %p\n", entries, udata);
> +
> +	if (entries < 1 || entries > dev->caps.max_cq_depth) {
> +		pr_err("cq: requested entries[%u] non-positive or greater
> than max[%u]\n",
> +		       entries, dev->caps.max_cq_depth);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (!ibucontext) {
> +		pr_err("context is not valid ");
> +		return ERR_PTR(-EOPNOTSUPP);
> +	}
> +
> +	if (!udata || !field_avail(cmd, num_sub_cqs, udata->inlen)) {
> +		pr_err_ratelimited("Incompatible ABI params, no input
> udata\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (udata->inlen > sizeof(cmd) &&
> +	    !ib_is_udata_cleared(udata, sizeof(cmd),
> +				 udata->inlen - sizeof(cmd))) {
> +		pr_err_ratelimited("Incompatible ABI params, unknown
> fields in udata\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	err = ib_copy_from_udata(&cmd, udata,
> +				 min(sizeof(cmd), udata->inlen));
> +	if (err) {
> +		pr_err_ratelimited("%s: cannot copy udata for
> create_cq\n",
> +				   dev_name(&dev->ibdev.dev));
> +		return ERR_PTR(err);
> +	}
> +
> +	if (cmd.comp_mask || !EFA_IS_RESERVED_CLEARED(cmd.reserved_50))
> {
> +		pr_err_ratelimited("Incompatible ABI params, unknown
> fields in udata\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (!cmd.cq_entry_size) {
> +		pr_err("invalid entry size [%u]\n", cmd.cq_entry_size);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (cmd.num_sub_cqs != dev->caps.sub_cqs_per_cq) {
> +		pr_err("invalid number of sub cqs[%u] expected[%u]\n",
> +		       cmd.num_sub_cqs, dev->caps.sub_cqs_per_cq);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
> +	if (!cq) {
> +		dev->stats.sw_stats.create_cq_alloc_err++;
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	memset(&resp, 0, sizeof(resp));
> +	cq->ucontext = to_eucontext(ibucontext);
> +	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries *
> cmd.num_sub_cqs);
> +	cq->cpu_addr = dma_zalloc_coherent(&dev->pdev->dev,
> +					   cq->size, &cq->dma_addr,
> +					   GFP_KERNEL);
> +	if (!cq->cpu_addr) {
> +		dev->stats.sw_stats.create_cq_alloc_err++;

Is there a reason why this error counter only tracks alloc failures?

- Sean