On 3/4/22 2:21 AM, Bernard Metzler wrote:
<...>
+static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
+ u8 pd_len)
+{
+ struct socket *s = cep->sock;
+ struct mpa_rr *rr = &cep->mpa.hdr;
+ struct kvec iov[3];
+ struct msghdr msg;
+ int iovec_num = 0;
+ int ret;
+ int mpa_len;
+
+ memset(&msg, 0, sizeof(msg));
+
+ rr->params.pd_len = cpu_to_be16(pd_len);
+
+ iov[iovec_num].iov_base = rr;
+ iov[iovec_num].iov_len = sizeof(*rr);
+ iovec_num++;
+ mpa_len = sizeof(*rr);
+
+ iov[iovec_num].iov_base = &cep->mpa.ext_data;
+ iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
+ iovec_num++;
+ mpa_len += sizeof(cep->mpa.ext_data);
+
+ if (pd_len) {
+ iov[iovec_num].iov_base = (char *)pdata;
+ iov[iovec_num].iov_len = pd_len;
+ mpa_len += pd_len;
+ iovec_num++;
+ }
+
+ ret = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len);
Before kernel_sendmsg(), iovec_num already reflects the
right number of iov elements, so don't add 1 here.
Did you ever test with private data?? iovec_num + 1 would be
at 4, which points behind iov[]. Maybe the correct mpa_len
parameter saved you.
Yeah, this is a bug. I tested it before, and re-run our testcase for
this again, it's passed just now. I check cp_sendmsg_locked function,
It seems that the 'mpa_len' limits the sendmsg range as you said.
Thanks
+
+ return ret < 0 ? ret : 0;
+}
+
<...>
+ if (!to_rcv) {
+ /*
+ * We have received the whole MPA Request/Reply message.
+ * CHeck against peer protocol violation.
CHeck -> Check
Will fix.
+ */
+ u32 word;
+
+ ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
+ &rcvd);
+ if (ret == -EAGAIN && rcvd == 0)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ return -EPROTO;
+ }
without a few comments, this function is hard to understand.
Maybe add a statement here that at this point private data are
signaled and are now being received, or continued to be received?
Will add it, and it's better with a comment here.
+
+ if (!cep->mpa.pdata) {
+ cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
+ if (!cep->mpa.pdata)
+ return -ENOMEM;
+ }
+
+ rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
+ MSG_DONTWAIT);
+ if (rcvd < 0)
+ return rcvd;
+
+ if (rcvd > to_rcv)
+ return -EPROTO;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (to_rcv == rcvd)
+ return 0;
+
+ return -EAGAIN;
+}
+
+/*
+ * erdma_proc_mpareq()
+ *
+ * Read MPA Request from socket and signal new connection to IWCM
+ * if success. Caller must hold lock on corresponding listening CEP.
+ */
+static int erdma_proc_mpareq(struct erdma_cep *cep)
+{
+ struct mpa_rr *req;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ return ret;
+
+ req = &cep->mpa.hdr;
+
+ if (__mpa_rr_revision(req->params.bits) != MPA_REVISION_EXT_1)
This has already been checked in erdma_recv_mpa_rr()
Yes, will remove it.
+ return -EPROTO;
+
+ if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
+ return -EPROTO;
+
+ memcpy(req->key, MPA_KEY_REP, 16);
+
+ /* Currently does not support marker and crc. */
+ if (req->params.bits & MPA_RR_FLAG_MARKERS ||
+ req->params.bits & MPA_RR_FLAG_CRC)
+ goto reject_conn;
+
+ cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
+
+ /* Keep reference until IWCM accepts/rejects */
+ erdma_cep_get(cep);
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
+ if (ret)
+ erdma_cep_put(cep);
+
+ return ret;
+
+reject_conn:
+ req->params.bits &= ~MPA_RR_FLAG_MARKERS;
+ req->params.bits |= MPA_RR_FLAG_REJECT;
+ req->params.bits &= ~MPA_RR_FLAG_CRC;
+
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ erdma_send_mpareqrep(cep, NULL, 0);
+
+ return -EOPNOTSUPP;
+}
+
+static int erdma_proc_mpareply(struct erdma_cep *cep)
+{
+ struct erdma_qp_attrs qp_attrs;
+ struct erdma_qp *qp = cep->qp;
+ struct mpa_rr *rep;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret != -EAGAIN)
+ erdma_cancel_mpatimer(cep);
+ if (ret)
+ goto out_err;
+
+ rep = &cep->mpa.hdr;
+
+ if (__mpa_rr_revision(rep->params.bits) != MPA_REVISION_EXT_1) {
This has already been checked in erdma_recv_mpa_rr()
Also will remove it.
+ ret = -EPROTO;
+ goto out_err;
+ }
+ if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
+ ret = -EPROTO;
+ goto out_err;
<...>
+
+ if (ret && ret != -EAGAIN)
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_CLOSE_LLP:
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_PEER_CLOSE:
+ if (cep->cm_id) {
+ if (cep->state == ERDMA_EPSTATE_CONNECTING ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA reply not received, but connection drop
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
+ /*
+ * NOTE: IW_CM_EVENT_DISCONNECT is given just
+ * to transition IWCM into CLOSING.
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* Socket close before MPA request received. */
+ erdma_disassoc_listen_cep(cep);
+ erdma_cep_put(cep);
+ }
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_MPATIMEOUT:
+ cep->mpa_timer = NULL;
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA request timed out:
+ * Hide any partially received private data and signal
+ * timeout
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* No MPA request received after peer TCP stream setup.
exceeds 80 chars per line
Will fix. clang-format seems not format comment lines. I will check this
in all the .c/.h files.
Thanks,
Cheng Xu