Re: [PATCH 8/8] IB/srp: Add multichannel support

Ming Lei <ming.lei@xxxxxxxxxxxxx> · Fri, 19 Sep 2014 22:28:43 +0800

On Fri, Sep 19, 2014 at 9:00 PM, Bart Van Assche <bvanassche@xxxxxxx> wrote:
> Improve performance by using multiple RDMA/RC channels per SCSI host
> for communicating with an SRP target.
>
> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
> ---
>  Documentation/ABI/stable/sysfs-driver-ib_srp |  25 +-
>  drivers/infiniband/ulp/srp/ib_srp.c          | 337 ++++++++++++++++++++-------
>  drivers/infiniband/ulp/srp/ib_srp.h          |  20 +-
>  3 files changed, 287 insertions(+), 95 deletions(-)
>
> diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
> index b9688de..d5a459e 100644
> --- a/Documentation/ABI/stable/sysfs-driver-ib_srp
> +++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
> @@ -55,12 +55,12 @@ Description:        Interface for making ib_srp connect to a new target.
>                   only safe with partial memory descriptor list support enabled
>                   (allow_ext_sg=1).
>                 * comp_vector, a number in the range 0..n-1 specifying the
> -                 MSI-X completion vector. Some HCA's allocate multiple (n)
> -                 MSI-X vectors per HCA port. If the IRQ affinity masks of
> -                 these interrupts have been configured such that each MSI-X
> -                 interrupt is handled by a different CPU then the comp_vector
> -                 parameter can be used to spread the SRP completion workload
> -                 over multiple CPU's.
> +                 MSI-X completion vector of the first RDMA channel. Some
> +                 HCA's allocate multiple (n) MSI-X vectors per HCA port. If
> +                 the IRQ affinity masks of these interrupts have been
> +                 configured such that each MSI-X interrupt is handled by a
> +                 different CPU then the comp_vector parameter can be used to
> +                 spread the SRP completion workload over multiple CPU's.
>                 * tl_retry_count, a number in the range 2..7 specifying the
>                   IB RC retry count.
>                 * queue_size, the maximum number of commands that the
> @@ -88,6 +88,13 @@ Description: Whether ib_srp is allowed to include a partial memory
>                 descriptor list in an SRP_CMD when communicating with an SRP
>                 target.
>
> +What:          /sys/class/scsi_host/host<n>/ch_count
> +Date:          November 1, 2014
> +KernelVersion: 3.18
> +Contact:       linux-rdma@xxxxxxxxxxxxxxx
> +Description:   Number of RDMA channels used for communication with the SRP
> +               target.
> +
>  What:          /sys/class/scsi_host/host<n>/cmd_sg_entries
>  Date:          May 19, 2011
>  KernelVersion: 2.6.39
> @@ -95,6 +102,12 @@ Contact:    linux-rdma@xxxxxxxxxxxxxxx
>  Description:   Maximum number of data buffer descriptors that may be sent to
>                 the target in a single SRP_CMD request.
>
> +What:          /sys/class/scsi_host/host<n>/comp_vector
> +Date:          September 2, 2013
> +KernelVersion: 3.11
> +Contact:       linux-rdma@xxxxxxxxxxxxxxx
> +Description:   Completion vector used for the first RDMA channel.
> +
>  What:          /sys/class/scsi_host/host<n>/dgid
>  Date:          June 17, 2006
>  KernelVersion: 2.6.17
> diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
> index 9feeea1..58ca618 100644
> --- a/drivers/infiniband/ulp/srp/ib_srp.c
> +++ b/drivers/infiniband/ulp/srp/ib_srp.c
> @@ -123,6 +123,16 @@ MODULE_PARM_DESC(dev_loss_tmo,
>                  " if fast_io_fail_tmo has not been set. \"off\" means that"
>                  " this functionality is disabled.");
>
> +static unsigned ch_count;
> +module_param(ch_count, uint, 0444);
> +MODULE_PARM_DESC(ch_count,
> +                "Number of RDMA channels to use for communication with an SRP"
> +                " target. Using more than one channel improves performance"
> +                " if the HCA supports multiple completion vectors. The"
> +                " default value is the minimum of four times the number of"
> +                " online CPU sockets and the number of completion vectors"
> +                " supported by the HCA.");
> +
>  static void srp_add_one(struct ib_device *device);
>  static void srp_remove_one(struct ib_device *device);
>  static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
> @@ -556,17 +566,32 @@ err:
>   * Note: this function may be called without srp_alloc_iu_bufs() having been
>   * invoked. Hence the ch->[rt]x_ring checks.
>   */
> -static void srp_free_ch_ib(struct srp_rdma_ch *ch)
> +static void srp_free_ch_ib(struct srp_target_port *target,
> +                          struct srp_rdma_ch *ch)
>  {
> -       struct srp_target_port *target = ch->target;
>         struct srp_device *dev = target->srp_host->srp_dev;
>         int i;
>
> +       if (!ch->target)
> +               return;
> +
> +       /*
> +        * Avoid that the SCSI error handler tries to use this channel after
> +        * it has been freed. The SCSI error handler can namely continue
> +        * trying to perform recovery actions after scsi_remove_host()
> +        * returned.
> +        */
> +       ch->target = NULL;
> +
>         if (ch->cm_id) {
>                 ib_destroy_cm_id(ch->cm_id);
>                 ch->cm_id = NULL;
>         }
>
> +       /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
> +       if (!ch->qp)
> +               return;
> +
>         if (dev->use_fast_reg) {
>                 if (ch->fr_pool)
>                         srp_destroy_fr_pool(ch->fr_pool);
> @@ -647,7 +672,7 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
>         return ch->status;
>  }
>
> -static int srp_send_req(struct srp_rdma_ch *ch)
> +static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
>  {
>         struct srp_target_port *target = ch->target;
>         struct {
> @@ -688,6 +713,8 @@ static int srp_send_req(struct srp_rdma_ch *ch)
>         req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
>         req->priv.req_buf_fmt   = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
>                                               SRP_BUF_FORMAT_INDIRECT);
> +       req->priv.req_flags     = (multich ? SRP_MULTICHAN_MULTI :
> +                                  SRP_MULTICHAN_SINGLE);
>         /*
>          * In the published SRP specification (draft rev. 16a), the
>          * port identifier format is 8 bytes of ID extension followed
> @@ -769,27 +796,31 @@ static bool srp_change_conn_state(struct srp_target_port *target,
>
>  static void srp_disconnect_target(struct srp_target_port *target)
>  {
> -       struct srp_rdma_ch *ch = &target->ch;
> +       struct srp_rdma_ch *ch;
> +       int i;
>
>         if (srp_change_conn_state(target, false)) {
>                 /* XXX should send SRP_I_LOGOUT request */
>
> -               if (ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
> -                       shost_printk(KERN_DEBUG, target->scsi_host,
> -                                    PFX "Sending CM DREQ failed\n");
> +               for (i = 0; i < target->ch_count; i++) {
> +                       ch = &target->ch[i];
> +                       if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
> +                               shost_printk(KERN_DEBUG, target->scsi_host,
> +                                            PFX "Sending CM DREQ failed\n");
> +                       }
>                 }
>         }
>  }
>
> -static void srp_free_req_data(struct srp_rdma_ch *ch)
> +static void srp_free_req_data(struct srp_target_port *target,
> +                             struct srp_rdma_ch *ch)
>  {
> -       struct srp_target_port *target = ch->target;
>         struct srp_device *dev = target->srp_host->srp_dev;
>         struct ib_device *ibdev = dev->dev;
>         struct srp_request *req;
>         int i;
>
> -       if (!ch->req_ring)
> +       if (!ch->target || !ch->req_ring)
>                 return;
>
>         for (i = 0; i < target->req_ring_size; ++i) {
> @@ -853,7 +884,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
>                         goto out;
>
>                 req->indirect_dma_addr = dma_addr;
> -               req->index = i;
> +               req->tag = build_srp_tag(ch - target->ch, i);
>                 list_add_tail(&req->list, &ch->free_reqs);
>         }
>         ret = 0;
> @@ -879,7 +910,8 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
>
>  static void srp_remove_target(struct srp_target_port *target)
>  {
> -       struct srp_rdma_ch *ch = &target->ch;
> +       struct srp_rdma_ch *ch;
> +       int i;
>
>         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
>
> @@ -889,10 +921,18 @@ static void srp_remove_target(struct srp_target_port *target)
>         scsi_remove_host(target->scsi_host);
>         srp_stop_rport_timers(target->rport);
>         srp_disconnect_target(target);
> -       srp_free_ch_ib(ch);
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               srp_free_ch_ib(target, ch);
> +       }
>         cancel_work_sync(&target->tl_err_work);
>         srp_rport_put(target->rport);
> -       srp_free_req_data(ch);
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               srp_free_req_data(target, ch);
> +       }
> +       kfree(target->ch);
> +       target->ch = NULL;
>
>         spin_lock(&target->srp_host->target_lock);
>         list_del(&target->list);
> @@ -918,12 +958,12 @@ static void srp_rport_delete(struct srp_rport *rport)
>         srp_queue_remove_work(target);
>  }
>
> -static int srp_connect_ch(struct srp_rdma_ch *ch)
> +static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
>  {
>         struct srp_target_port *target = ch->target;
>         int ret;
>
> -       WARN_ON_ONCE(target->connected);
> +       WARN_ON_ONCE(!multich && target->connected);
>
>         target->qp_in_error = false;
>
> @@ -933,7 +973,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch)
>
>         while (1) {
>                 init_completion(&ch->done);
> -               ret = srp_send_req(ch);
> +               ret = srp_send_req(ch, multich);
>                 if (ret)
>                         return ret;
>                 ret = wait_for_completion_interruptible(&ch->done);
> @@ -1095,10 +1135,10 @@ static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
>  static void srp_terminate_io(struct srp_rport *rport)
>  {
>         struct srp_target_port *target = rport->lld_data;
> -       struct srp_rdma_ch *ch = &target->ch;
> +       struct srp_rdma_ch *ch;
>         struct Scsi_Host *shost = target->scsi_host;
>         struct scsi_device *sdev;
> -       int i;
> +       int i, j;
>
>         /*
>          * Invoking srp_terminate_io() while srp_queuecommand() is running
> @@ -1107,10 +1147,15 @@ static void srp_terminate_io(struct srp_rport *rport)
>         shost_for_each_device(sdev, shost)
>                 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
>
> -       for (i = 0; i < target->req_ring_size; ++i) {
> -               struct srp_request *req = &ch->req_ring[i];
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +
> +               for (j = 0; j < target->req_ring_size; ++j) {
> +                       struct srp_request *req = &ch->req_ring[j];
>
> -               srp_finish_req(ch, req, NULL, DID_TRANSPORT_FAILFAST << 16);
> +                       srp_finish_req(ch, req, NULL,
> +                                      DID_TRANSPORT_FAILFAST << 16);
> +               }
>         }
>  }
>
> @@ -1126,8 +1171,9 @@ static void srp_terminate_io(struct srp_rport *rport)
>  static int srp_rport_reconnect(struct srp_rport *rport)
>  {
>         struct srp_target_port *target = rport->lld_data;
> -       struct srp_rdma_ch *ch = &target->ch;
> -       int i, ret;
> +       struct srp_rdma_ch *ch;
> +       int i, j, ret = 0;
> +       bool multich = false;
>
>         srp_disconnect_target(target);
>
> @@ -1139,27 +1185,43 @@ static int srp_rport_reconnect(struct srp_rport *rport)
>          * case things are really fouled up. Doing so also ensures that all CM
>          * callbacks will have finished before a new QP is allocated.
>          */
> -       ret = srp_new_cm_id(ch);
> -
> -       for (i = 0; i < target->req_ring_size; ++i) {
> -               struct srp_request *req = &ch->req_ring[i];
> -
> -               srp_finish_req(ch, req, NULL, DID_RESET << 16);
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               if (!ch->target)
> +                       return -ENODEV;
> +               ret += srp_new_cm_id(ch);
> +       }
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               for (j = 0; j < target->req_ring_size; ++j) {
> +                       struct srp_request *req = &ch->req_ring[j];
> +
> +                       srp_finish_req(ch, req, NULL, DID_RESET << 16);
> +               }
>         }
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               /*
> +                * Whether or not creating a new CM ID succeeded, create a new
> +                * QP. This guarantees that all completion callback function
> +                * invocations have finished before request resetting starts.
> +                */
> +               ret += srp_create_ch_ib(ch);
>
> -       /*
> -        * Whether or not creating a new CM ID succeeded, create a new
> -        * QP. This guarantees that all callback functions for the old QP have
> -        * finished before any send requests are posted on the new QP.
> -        */
> -       ret += srp_create_ch_ib(ch);
> -
> -       INIT_LIST_HEAD(&ch->free_tx);
> -       for (i = 0; i < target->queue_size; ++i)
> -               list_add(&ch->tx_ring[i]->list, &ch->free_tx);
> -
> -       if (ret == 0)
> -               ret = srp_connect_ch(ch);
> +               INIT_LIST_HEAD(&ch->free_tx);
> +               for (j = 0; j < target->queue_size; ++j)
> +                       list_add(&ch->tx_ring[j]->list, &ch->free_tx);
> +       }
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               if (ret) {
> +                       if (i > 1)
> +                               ret = 0;
> +                       break;
> +               }
> +               ret = srp_connect_ch(ch, multich);
> +               multich = true;
> +       }
>
>         if (ret == 0)
>                 shost_printk(KERN_INFO, target->scsi_host,
> @@ -1573,7 +1635,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
>         s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
>         struct srp_iu *iu;
>
> -       srp_send_completion(ch->send_cq, target);
> +       srp_send_completion(ch->send_cq, ch);
>
>         if (list_empty(&ch->free_tx))
>                 return NULL;
> @@ -1637,6 +1699,7 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
>         struct srp_request *req;
>         struct scsi_cmnd *scmnd;
>         unsigned long flags;
> +       unsigned i;
>
>         if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
>                 spin_lock_irqsave(&ch->lock, flags);
> @@ -1648,12 +1711,20 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
>                         ch->tsk_mgmt_status = rsp->data[3];
>                 complete(&ch->tsk_mgmt_done);
>         } else {
> -               req = &ch->req_ring[rsp->tag];
> -               scmnd = srp_claim_req(ch, req, NULL, NULL);
> +               if (srp_tag_ch(rsp->tag) != ch - target->ch)
> +                       pr_err("Channel idx mismatch: tag %#llx <> ch %#lx\n",
> +                              rsp->tag, ch - target->ch);
> +               i = srp_tag_idx(rsp->tag);
> +               if (i < target->req_ring_size) {
> +                       req = &ch->req_ring[i];
> +                       scmnd = srp_claim_req(ch, req, NULL, NULL);
> +               } else {
> +                       scmnd = NULL;
> +               }
>                 if (!scmnd) {
>                         shost_printk(KERN_ERR, target->scsi_host,
> -                                    "Null scmnd for RSP w/tag %016llx\n",
> -                                    (unsigned long long) rsp->tag);
> +                                    "Null scmnd for RSP w/tag %#016llx received on ch %ld / QP %#x\n",
> +                                    rsp->tag, ch - target->ch, ch->qp->qp_num);
>
>                         spin_lock_irqsave(&ch->lock, flags);
>                         ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
> @@ -1879,7 +1950,8 @@ static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
>         }
>  }
>
> -static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
> +static int srp_queuecommand(unsigned hwq, struct Scsi_Host *shost,
> +                           struct scsi_cmnd *scmnd)
>  {
>         struct srp_target_port *target = host_to_target(shost);
>         struct srp_rport *rport = target->rport;
> @@ -1905,7 +1977,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
>         if (unlikely(scmnd->result))
>                 goto err;
>
> -       ch = &target->ch;
> +       ch = &target->ch[hwq];
>
>         spin_lock_irqsave(&ch->lock, flags);
>         iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
> @@ -1927,7 +1999,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
>
>         cmd->opcode = SRP_CMD;
>         cmd->lun    = cpu_to_be64((u64) scmnd->device->lun << 48);
> -       cmd->tag    = req->index;
> +       cmd->tag    = req->tag;
>         memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
>
>         req->scmnd    = scmnd;
> @@ -1993,6 +2065,17 @@ err:
>         goto unlock_rport;
>  }
>
> +static int srp_sq_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
> +{
> +       return srp_queuecommand(0, shost, scmnd);
> +}
> +
> +static int srp_mq_queuecommand(struct blk_mq_hw_ctx *hctx,
> +                              struct scsi_cmnd *scmnd)
> +{
> +       return srp_queuecommand(hctx->queue_num, scmnd->device->host, scmnd);
> +}
> +
>  /*
>   * Note: the resources allocated in this function are freed in
>   * srp_free_ch_ib().
> @@ -2409,15 +2492,23 @@ static int srp_abort(struct scsi_cmnd *scmnd)
>  {
>         struct srp_target_port *target = host_to_target(scmnd->device->host);
>         struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
> +       u16 ch_idx;
>         struct srp_rdma_ch *ch;
>         int ret;
>
>         shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
>
> -       ch = &target->ch;
> -       if (!req || !srp_claim_req(ch, req, NULL, scmnd))
> +       if (!req)
> +               return SUCCESS;
> +       ch_idx = srp_tag_ch(req->tag);
> +       if (WARN_ON_ONCE(ch_idx >= target->ch_count))
>                 return SUCCESS;
> -       if (srp_send_tsk_mgmt(ch, req->index, scmnd->device->lun,
> +       ch = &target->ch[ch_idx];
> +       if (!srp_claim_req(ch, req, NULL, scmnd))
> +               return SUCCESS;
> +       shost_printk(KERN_ERR, target->scsi_host,
> +                    "Sending SRP abort for tag %#x\n", req->tag);
> +       if (srp_send_tsk_mgmt(ch, req->tag, scmnd->device->lun,
>                               SRP_TSK_ABORT_TASK) == 0)
>                 ret = SUCCESS;
>         else if (target->rport->state == SRP_RPORT_LOST)
> @@ -2434,21 +2525,25 @@ static int srp_abort(struct scsi_cmnd *scmnd)
>  static int srp_reset_device(struct scsi_cmnd *scmnd)
>  {
>         struct srp_target_port *target = host_to_target(scmnd->device->host);
> -       struct srp_rdma_ch *ch = &target->ch;
> +       struct srp_rdma_ch *ch;
>         int i;
>
>         shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
>
> +       ch = &target->ch[0];
>         if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
>                               SRP_TSK_LUN_RESET))
>                 return FAILED;
>         if (ch->tsk_mgmt_status)
>                 return FAILED;
>
> -       for (i = 0; i < target->req_ring_size; ++i) {
> -               struct srp_request *req = &ch->req_ring[i];
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               for (i = 0; i < target->req_ring_size; ++i) {
> +                       struct srp_request *req = &ch->req_ring[i];
>
> -               srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
> +                       srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
> +               }
>         }
>
>         return SUCCESS;
> @@ -2525,7 +2620,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
>                          char *buf)
>  {
>         struct srp_target_port *target = host_to_target(class_to_shost(dev));
> -       struct srp_rdma_ch *ch = &target->ch;
> +       struct srp_rdma_ch *ch = &target->ch[0];
>
>         return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
>  }
> @@ -2542,8 +2637,14 @@ static ssize_t show_req_lim(struct device *dev,
>                             struct device_attribute *attr, char *buf)
>  {
>         struct srp_target_port *target = host_to_target(class_to_shost(dev));
> +       struct srp_rdma_ch *ch;
> +       int i, req_lim = INT_MAX;
>
> -       return sprintf(buf, "%d\n", target->ch.req_lim);
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               req_lim = min(req_lim, ch->req_lim);
> +       }
> +       return sprintf(buf, "%d\n", req_lim);
>  }
>
>  static ssize_t show_zero_req_lim(struct device *dev,
> @@ -2570,6 +2671,14 @@ static ssize_t show_local_ib_device(struct device *dev,
>         return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
>  }
>
> +static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
> +                            char *buf)
> +{
> +       struct srp_target_port *target = host_to_target(class_to_shost(dev));
> +
> +       return sprintf(buf, "%d\n", target->ch_count);
> +}
> +
>  static ssize_t show_comp_vector(struct device *dev,
>                                 struct device_attribute *attr, char *buf)
>  {
> @@ -2613,6 +2722,7 @@ static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
>  static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,           NULL);
>  static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
>  static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
> +static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
>  static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
>  static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
>  static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
> @@ -2630,6 +2740,7 @@ static struct device_attribute *srp_host_attrs[] = {
>         &dev_attr_zero_req_lim,
>         &dev_attr_local_ib_port,
>         &dev_attr_local_ib_device,
> +       &dev_attr_ch_count,
>         &dev_attr_comp_vector,
>         &dev_attr_tl_retry_count,
>         &dev_attr_cmd_sg_entries,
> @@ -2643,7 +2754,8 @@ static struct scsi_host_template srp_template = {
>         .proc_name                      = DRV_NAME,
>         .slave_configure                = srp_slave_configure,
>         .info                           = srp_target_info,
> -       .queuecommand                   = srp_queuecommand,
> +       .queuecommand                   = srp_sq_queuecommand,
> +       .mq_queuecommand                = srp_mq_queuecommand,

Another choice is to obtain hctx from request directly, then mq can
reuse the .queuecommand interface too.

>         .change_queue_depth             = srp_change_queue_depth,
>         .change_queue_type              = srp_change_queue_type,
>         .eh_abort_handler               = srp_abort,
> @@ -3038,7 +3150,8 @@ static ssize_t srp_create_target(struct device *dev,
>         struct srp_rdma_ch *ch;
>         struct srp_device *srp_dev = host->srp_dev;
>         struct ib_device *ibdev = srp_dev->dev;
> -       int ret;
> +       int ret, node_idx, node, cpu, i;
> +       bool multich = false;
>
>         target_host = scsi_host_alloc(&srp_template,
>                                       sizeof (struct srp_target_port));
> @@ -3098,34 +3211,82 @@ static ssize_t srp_create_target(struct device *dev,
>         INIT_WORK(&target->tl_err_work, srp_tl_err_work);
>         INIT_WORK(&target->remove_work, srp_remove_work);
>         spin_lock_init(&target->lock);
> -       ch = &target->ch;
> -       ch->target = target;
> -       ch->comp_vector = target->comp_vector;
> -       spin_lock_init(&ch->lock);
> -       INIT_LIST_HEAD(&ch->free_tx);
> -       ret = srp_alloc_req_data(ch);
> -       if (ret)
> -               goto err_free_mem;
> -
>         ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
>         if (ret)
> -               goto err_free_mem;
> +               goto err;
>
> -       ret = srp_create_ch_ib(ch);
> -       if (ret)
> -               goto err_free_mem;
> +       ret = -ENOMEM;
> +       target->ch_count = max_t(unsigned, num_online_nodes(),
> +                                min(ch_count ? :
> +                                    min(4 * num_online_nodes(),
> +                                        ibdev->num_comp_vectors),
> +                                    num_online_cpus()));
> +       target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
> +                            GFP_KERNEL);
> +       if (!target->ch)
> +               goto err;
>
> -       ret = srp_new_cm_id(ch);
> -       if (ret)
> -               goto err_free_ib;
> +       node_idx = 0;
> +       for_each_online_node(node) {
> +               const int ch_start = (node_idx * target->ch_count /
> +                                     num_online_nodes());
> +               const int ch_end = ((node_idx + 1) * target->ch_count /
> +                                   num_online_nodes());
> +               const int cv_start = (node_idx * ibdev->num_comp_vectors /
> +                                     num_online_nodes() + target->comp_vector)
> +                                    % ibdev->num_comp_vectors;
> +               const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
> +                                   num_online_nodes() + target->comp_vector)
> +                                  % ibdev->num_comp_vectors;
> +               int cpu_idx = 0;
> +
> +               for_each_online_cpu(cpu) {
> +                       if (cpu_to_node(cpu) != node)
> +                               continue;
> +                       if (ch_start + cpu_idx >= ch_end)
> +                               continue;
> +                       ch = &target->ch[ch_start + cpu_idx];
> +                       ch->target = target;
> +                       ch->comp_vector = cv_start == cv_end ? cv_start :
> +                               cv_start + cpu_idx % (cv_end - cv_start);
> +                       spin_lock_init(&ch->lock);
> +                       INIT_LIST_HEAD(&ch->free_tx);
> +                       ret = srp_new_cm_id(ch);
> +                       if (ret)
> +                               goto err_disconnect;
>
> -       ret = srp_connect_ch(ch);
> -       if (ret) {
> -               shost_printk(KERN_ERR, target->scsi_host,
> -                            PFX "Connection failed\n");
> -               goto err_free_ib;
> +                       ret = srp_create_ch_ib(ch);
> +                       if (ret)
> +                               goto err_disconnect;
> +
> +                       ret = srp_alloc_req_data(ch);
> +                       if (ret)
> +                               goto err_disconnect;
> +
> +                       ret = srp_connect_ch(ch, multich);
> +                       if (ret) {
> +                               shost_printk(KERN_ERR, target->scsi_host,
> +                                            PFX "Connection %d/%d failed\n",
> +                                            ch_start + cpu_idx,
> +                                            target->ch_count);
> +                               if (node_idx == 0 && cpu_idx == 0) {
> +                                       goto err_disconnect;
> +                               } else {
> +                                       srp_free_ch_ib(target, ch);
> +                                       srp_free_req_data(target, ch);
> +                                       target->ch_count = ch - target->ch;
> +                                       break;
> +                               }
> +                       }
> +
> +                       multich = true;
> +                       cpu_idx++;
> +               }
> +               node_idx++;
>         }
>
> +       target->scsi_host->nr_hw_queues = target->ch_count;
> +
>         ret = srp_add_target(host, target);
>         if (ret)
>                 goto err_disconnect;
> @@ -3154,11 +3315,13 @@ out:
>  err_disconnect:
>         srp_disconnect_target(target);
>
> -err_free_ib:
> -       srp_free_ch_ib(ch);
> +       for (i = 0; i < target->ch_count; i++) {
> +               ch = &target->ch[i];
> +               srp_free_ch_ib(target, ch);
> +               srp_free_req_data(target, ch);
> +       }
>
> -err_free_mem:
> -       srp_free_req_data(ch);
> +       kfree(target->ch);
>
>  err:
>         scsi_host_put(target_host);
> diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
> index 0609124..d9660e1 100644
> --- a/drivers/infiniband/ulp/srp/ib_srp.h
> +++ b/drivers/infiniband/ulp/srp/ib_srp.h
> @@ -84,6 +84,21 @@ enum srp_iu_type {
>         SRP_IU_RSP,
>  };
>
> +static inline u32 build_srp_tag(u16 ch, u16 req_idx)
> +{
> +       return ch << 16 | req_idx;
> +}
> +
> +static inline u16 srp_tag_ch(u32 tag)
> +{
> +       return tag >> 16;
> +}
> +
> +static inline u16 srp_tag_idx(u32 tag)
> +{
> +       return tag & ((1 << 16) - 1);
> +}
> +
>  /*
>   * @mr_page_mask: HCA memory registration page mask.
>   * @mr_page_size: HCA memory registration page size.
> @@ -127,7 +142,7 @@ struct srp_request {
>         struct srp_direct_buf  *indirect_desc;
>         dma_addr_t              indirect_dma_addr;
>         short                   nmdesc;
> -       short                   index;
> +       uint32_t                tag;
>  };
>
>  struct srp_rdma_ch {
> @@ -173,8 +188,9 @@ struct srp_target_port {
>         /* read and written in the hot path */
>         spinlock_t              lock;
>
> -       struct srp_rdma_ch      ch;
>         /* read only in the hot path */
> +       struct srp_rdma_ch      *ch;
> +       u32                     ch_count;
>         u32                     lkey;
>         u32                     rkey;
>         enum srp_target_state   state;
> --
> 1.8.4.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html