Re: [PATCH v4 10/25] ibtrs: server: main functionality

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 24, 2019 at 1:49 AM Bart Van Assche <bvanassche@xxxxxxx> wrote:
>
> On 6/20/19 8:03 AM, Jack Wang wrote:
> > +module_param_named(max_chunk_size, max_chunk_size, int, 0444);
> > +MODULE_PARM_DESC(max_chunk_size,
> > +              "Max size for each IO request, when change the unit is in byte"
> > +              " (default: " __stringify(DEFAULT_MAX_CHUNK_SIZE_KB) "KB)");
>
> Where can I find the definition of DEFAULT_MAX_CHUNK_SIZE_KB?
oh, it's a typo, should be DEFAULT_MAX_CHUNK_SIZE.
>
> > +static char cq_affinity_list[256] = "";
>
> No empty initializers for file-scope variables please.
Is it guaranteed by the compiler, the file-scope variables will be
empty initialized?
>
> > +     pr_info("cq_affinity_list changed to %*pbl\n",
> > +             cpumask_pr_args(&cq_affinity_mask));
>
> Should this pr_info() call perhaps be changed into pr_debug()?
Because the setting could lead to performance drop, pr_info seems more
appropriate.

>
> > +static bool __ibtrs_srv_change_state(struct ibtrs_srv_sess *sess,
> > +                                  enum ibtrs_srv_state new_state)
> > +{
> > +     enum ibtrs_srv_state old_state;
> > +     bool changed = false;
> > +
> > +     old_state = sess->state;
> > +     switch (new_state) {
>
> Please add a lockdep_assert_held() statement that checks whether calls
> of this function are serialized properly.
will look into it.
>
> > +/**
> > + * rdma_write_sg() - response on successful READ request
> > + */
> > +static int rdma_write_sg(struct ibtrs_srv_op *id)
> > +{
> > +     struct ibtrs_srv_sess *sess = to_srv_sess(id->con->c.sess);
> > +     dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
> > +     struct ibtrs_srv *srv = sess->srv;
> > +     struct ib_send_wr inv_wr, imm_wr;
> > +     struct ib_rdma_wr *wr = NULL;
> > +     const struct ib_send_wr *bad_wr;
> > +     enum ib_send_flags flags;
> > +     size_t sg_cnt;
> > +     int err, i, offset;
> > +     bool need_inval;
> > +     u32 rkey = 0;
> > +
> > +     sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt);
> > +     need_inval = le16_to_cpu(id->rd_msg->flags) & IBTRS_MSG_NEED_INVAL_F;
> > +     if (unlikely(!sg_cnt))
> > +             return -EINVAL;
> > +
> > +     offset = 0;
> > +     for (i = 0; i < sg_cnt; i++) {
> > +             struct ib_sge *list;
> > +
> > +             wr              = &id->tx_wr[i];
> > +             list            = &id->tx_sg[i];
> > +             list->addr      = dma_addr + offset;
> > +             list->length    = le32_to_cpu(id->rd_msg->desc[i].len);
> > +
> > +             /* WR will fail with length error
> > +              * if this is 0
> > +              */
> > +             if (unlikely(list->length == 0)) {
> > +                     ibtrs_err(sess, "Invalid RDMA-Write sg list length 0\n");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             list->lkey = sess->s.dev->ib_pd->local_dma_lkey;
> > +             offset += list->length;
> > +
> > +             wr->wr.wr_cqe   = &io_comp_cqe;
> > +             wr->wr.sg_list  = list;
> > +             wr->wr.num_sge  = 1;
> > +             wr->remote_addr = le64_to_cpu(id->rd_msg->desc[i].addr);
> > +             wr->rkey        = le32_to_cpu(id->rd_msg->desc[i].key);
> > +             if (rkey == 0)
> > +                     rkey = wr->rkey;
> > +             else
> > +                     /* Only one key is actually used */
> > +                     WARN_ON_ONCE(rkey != wr->rkey);
> > +
> > +             if (i < (sg_cnt - 1))
> > +                     wr->wr.next = &id->tx_wr[i + 1].wr;
> > +             else if (need_inval)
> > +                     wr->wr.next = &inv_wr;
> > +             else
> > +                     wr->wr.next = &imm_wr;
> > +
> > +             wr->wr.opcode = IB_WR_RDMA_WRITE;
> > +             wr->wr.ex.imm_data = 0;
> > +             wr->wr.send_flags  = 0;
> > +     }
> > +     /*
> > +      * From time to time we have to post signalled sends,
> > +      * or send queue will fill up and only QP reset can help.
> > +      */
> > +     flags = atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth ?
> > +                     0 : IB_SEND_SIGNALED;
> > +
> > +     if (need_inval) {
> > +             inv_wr.next = &imm_wr;
> > +             inv_wr.wr_cqe = &io_comp_cqe;
> > +             inv_wr.sg_list = NULL;
> > +             inv_wr.num_sge = 0;
> > +             inv_wr.opcode = IB_WR_SEND_WITH_INV;
> > +             inv_wr.send_flags = 0;
> > +             inv_wr.ex.invalidate_rkey = rkey;
> > +     }
> > +     imm_wr.next = NULL;
> > +     imm_wr.wr_cqe = &io_comp_cqe;
> > +     imm_wr.sg_list = NULL;
> > +     imm_wr.num_sge = 0;
> > +     imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
> > +     imm_wr.send_flags = flags;
> > +     imm_wr.ex.imm_data = cpu_to_be32(ibtrs_to_io_rsp_imm(id->msg_id,
> > +                                                          0, need_inval));
> > +
> > +     ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
> > +                                   offset, DMA_BIDIRECTIONAL);
> > +
> > +     err = ib_post_send(id->con->c.qp, &id->tx_wr[0].wr, &bad_wr);
> > +     if (unlikely(err))
> > +             ibtrs_err(sess,
> > +                       "Posting RDMA-Write-Request to QP failed, err: %d\n",
> > +                       err);
> > +
> > +     return err;
> > +}
>
> All other RDMA server implementations use rdma_rw_ctx_init() and
> rdma_rw_ctx_wrs(). Please use these functions in IBTRS too.
rdma_rw_ctx_* api doesn't support RDMA_WRITE_WITH_IMM, and
ibtrs mainly use RDMA_WRITE_WITH_IMM.

>
> > +static void ibtrs_srv_hb_err_handler(struct ibtrs_con *c, int err)
> > +{
> > +     (void)err;
> > +     close_sess(to_srv_sess(c->sess));
> > +}
>
> Is the (void)err statement really necessary?
No, will be removed.
>
> > +static int ibtrs_srv_rdma_init(struct ibtrs_srv_ctx *ctx, unsigned int port)
> > +{
> > +     struct sockaddr_in6 sin = {
> > +             .sin6_family    = AF_INET6,
> > +             .sin6_addr      = IN6ADDR_ANY_INIT,
> > +             .sin6_port      = htons(port),
> > +     };
> > +     struct sockaddr_ib sib = {
> > +             .sib_family                     = AF_IB,
> > +             .sib_addr.sib_subnet_prefix     = 0ULL,
> > +             .sib_addr.sib_interface_id      = 0ULL,
> > +             .sib_sid        = cpu_to_be64(RDMA_IB_IP_PS_IB | port),
> > +             .sib_sid_mask   = cpu_to_be64(0xffffffffffffffffULL),
> > +             .sib_pkey       = cpu_to_be16(0xffff),
> > +     };
> > +     struct rdma_cm_id *cm_ip, *cm_ib;
> > +     int ret;
> > +
> > +     /*
> > +      * We accept both IPoIB and IB connections, so we need to keep
> > +      * two cm id's, one for each socket type and port space.
> > +      * If the cm initialization of one of the id's fails, we abort
> > +      * everything.
> > +      */
> > +     cm_ip = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sin, RDMA_PS_TCP);
> > +     if (unlikely(IS_ERR(cm_ip)))
> > +             return PTR_ERR(cm_ip);
> > +
> > +     cm_ib = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sib, RDMA_PS_IB);
> > +     if (unlikely(IS_ERR(cm_ib))) {
> > +             ret = PTR_ERR(cm_ib);
> > +             goto free_cm_ip;
> > +     }
> > +
> > +     ctx->cm_id_ip = cm_ip;
> > +     ctx->cm_id_ib = cm_ib;
> > +
> > +     return 0;
> > +
> > +free_cm_ip:
> > +     rdma_destroy_id(cm_ip);
> > +
> > +     return ret;
> > +}
>
> Will the above work if CONFIG_IPV6=n?
I tested with CONFIG_IPV6=n, it compiles.
>
> > +static int __init ibtrs_server_init(void)
> > +{
> > +     int err;
> > +
> > +     if (!strlen(cq_affinity_list))
> > +             init_cq_affinity();
>
> Is the above if-test useful? Can that if-test be left out?
You're right, will remove.
>
> Thanks,
>
> Bart.
Thanks!



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux