From: Long Li <longli@xxxxxxxxxxxxx> Implement cifs_rdma_write for send an upper layer data. Upper layer uses this function to do a RDMA send. This function is also used to pass SMB packets for doing a RDMA read/write via memory registration. Signed-off-by: Long Li <longli@xxxxxxxxxxxxx> --- fs/cifs/cifsrdma.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/cifsrdma.h | 5 ++ 2 files changed, 182 insertions(+) diff --git a/fs/cifs/cifsrdma.c b/fs/cifs/cifsrdma.c index ef21f1c..eb48651 100644 --- a/fs/cifs/cifsrdma.c +++ b/fs/cifs/cifsrdma.c @@ -229,6 +229,10 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].length, DMA_TO_DEVICE); + if (atomic_dec_and_test(&request->info->send_pending)) { + wake_up(&request->info->wait_send_pending); + } + kfree(request->sge); mempool_free(request, request->info->request_mempool); } @@ -551,12 +555,14 @@ static int cifs_rdma_post_send_negotiate_req(struct cifs_rdma_info *info) request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); + atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); if (!rc) return 0; // if we reach here, post send failed log_rdma_send("ib_post_send failed rc=%d\n", rc); + atomic_dec(&info->send_pending); ib_dma_unmap_single(info->id->device, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); @@ -662,12 +668,14 @@ static int cifs_rdma_post_send_page(struct cifs_rdma_info *info, struct page *pa send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; + atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); if (!rc) return 0; // post send failed log_rdma_send("ib_post_send failed rc=%d\n", rc); + atomic_dec(&info->send_pending); dma_mapping_failed: for (i=0; i<2; i++) @@ -768,11 +776,13 @@ static int cifs_rdma_post_send_empty(struct cifs_rdma_info *info) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; + atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); if (!rc) return 0; log_rdma_send("ib_post_send failed rc=%d\n", rc); + atomic_dec(&info->send_pending); ib_dma_unmap_single(info->id->device, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); @@ -885,12 +895,14 @@ static int cifs_rdma_post_send_data( send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; + atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); if (!rc) return 0; // post send failed log_rdma_send("ib_post_send failed rc=%d\n", rc); + atomic_dec(&info->send_pending); dma_mapping_failure: for (i=0; i<n_vec+1; i++) @@ -1185,6 +1197,9 @@ struct cifs_rdma_info* cifs_create_rdma_session( allocate_receive_buffers(info, info->receive_credit_max); init_waitqueue_head(&info->wait_send_queue); + init_waitqueue_head(&info->wait_send_pending); + atomic_set(&info->send_pending, 0); + init_waitqueue_head(&info->wait_recv_pending); atomic_set(&info->recv_pending, 0); @@ -1202,3 +1217,165 @@ struct cifs_rdma_info* cifs_create_rdma_session( kfree(info); return NULL; } + +/* + * Write data to transport + * Each rqst is transported as a SMBDirect payload + * rqst: the data to write + * return value: 0 if successfully write, otherwise error code + */ +int cifs_rdma_write(struct cifs_rdma_info *info, struct smb_rqst *rqst) +{ + struct kvec vec; + int nvecs; + int size; + int buflen=0, remaining_data_length; + int start, i, j; + int max_iov_size = info->max_send_size - sizeof(struct smbd_data_transfer); + struct kvec *iov; + int rc; + + if (info->transport_status != CIFS_RDMA_CONNECTED) { + log_cifs_write("disconnected returning -EIO\n"); + return -EIO; + } + + iov = kzalloc(sizeof(struct kvec)*rqst->rq_nvec, GFP_KERNEL); + if (!iov) { + log_cifs_write("failed to allocate iov returing -ENOMEM\n"); + return -ENOMEM; + } + + /* Strip the first 4 bytes MS-SMB2 section 2.1 + * they are used only for TCP transport */ + iov[0].iov_base = (char*)rqst->rq_iov[0].iov_base + 4; + iov[0].iov_len = rqst->rq_iov[0].iov_len - 4; + buflen += iov[0].iov_len; + + /* total up iov array first */ + for (i = 1; i < rqst->rq_nvec; i++) { + iov[i].iov_base = rqst->rq_iov[i].iov_base; + iov[i].iov_len = rqst->rq_iov[i].iov_len; + buflen += iov[i].iov_len; + } + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + if (buflen + sizeof(struct smbd_data_transfer) > + info->max_fragmented_send_size) { + log_cifs_write("payload size %d > max size %d\n", + buflen, info->max_fragmented_send_size); + rc = -EINVAL; + goto done; + } + + remaining_data_length = buflen; + + log_cifs_write("rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d " + "rq_tailsz=%d buflen=%d\n", + rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz, + rqst->rq_tailsz, buflen); + + start = i = iov[0].iov_len ? 0 : 1; + buflen = 0; + while (true){ + buflen += iov[i].iov_len; + if (buflen > max_iov_size) { + if (i > start) { + remaining_data_length -= + (buflen-iov[i].iov_len); + log_cifs_write("sending iov[] from start=%d " + "i=%d nvecs=%d " + "remaining_data_length=%d\n", + start, i, i-start, + remaining_data_length); + rc = cifs_rdma_post_send_data( + info, &iov[start], i-start, + remaining_data_length); + if (rc) + goto done; + } else { + // iov[start] is too big, break it to nvecs pieces + nvecs = (buflen+max_iov_size-1)/max_iov_size; + log_cifs_write("iov[%d] iov_base=%p buflen=%d" + " break to %d vectors\n", + start, iov[start].iov_base, + buflen, nvecs); + for (j=0; j<nvecs; j++) { + vec.iov_base = + (char *)iov[start].iov_base + + j*max_iov_size; + vec.iov_len = max_iov_size; + if (j == nvecs-1) + vec.iov_len = + buflen - + max_iov_size*(nvecs-1); + remaining_data_length -= vec.iov_len; + log_cifs_write( + "sending vec j=%d iov_base=%p" + " iov_len=%lu " + "remaining_data_length=%d\n", + j, vec.iov_base, vec.iov_len, + remaining_data_length); + rc = cifs_rdma_post_send_data( + info, &vec, 1, + remaining_data_length); + if (rc) + goto done; + } + i++; + } + start = i; + buflen = 0; + } else { + i++; + if (i == rqst->rq_nvec) { + // send out all remaining vecs and we are done + remaining_data_length -= buflen; + log_cifs_write( + "sending iov[] from start=%d i=%d " + "nvecs=%d remaining_data_length=%d\n", + start, i, i-start, + remaining_data_length); + rc = cifs_rdma_post_send_data(info, &iov[start], + i-start, remaining_data_length); + if (rc) + goto done; + break; + } + } + log_cifs_write("looping i=%d buflen=%d\n", i, buflen); + } + + // now sending pages + for (i = 0; i < rqst->rq_npages; i++) { + buflen = (i == rqst->rq_npages-1) ? + rqst->rq_tailsz : rqst->rq_pagesz; + nvecs = (buflen+max_iov_size-1)/max_iov_size; + log_cifs_write("sending pages buflen=%d nvecs=%d\n", + buflen, nvecs); + for (j=0; j<nvecs; j++) { + size = max_iov_size; + if (j == nvecs-1) + size = buflen - j*max_iov_size; + remaining_data_length -= size; + log_cifs_write("sending pages i=%d offset=%d size=%d" + " remaining_data_length=%d\n", + i, j*max_iov_size, size, remaining_data_length); + rc = cifs_rdma_post_send_page( + info, rqst->rq_pages[i], j*max_iov_size, + size, remaining_data_length); + if (rc) + goto done; + } + } + +done: + kfree(iov); + wait_event(info->wait_send_pending, atomic_read(&info->send_pending) == 0); + return rc; +} diff --git a/fs/cifs/cifsrdma.h b/fs/cifs/cifsrdma.h index 9618e0b..90746a4 100644 --- a/fs/cifs/cifsrdma.h +++ b/fs/cifs/cifsrdma.h @@ -73,6 +73,9 @@ struct cifs_rdma_info { atomic_t receive_credits; atomic_t receive_credit_target; + atomic_t send_pending; + wait_queue_head_t wait_send_pending; + atomic_t recv_pending; wait_queue_head_t wait_recv_pending; @@ -195,4 +198,6 @@ struct cifs_rdma_response { // Create a SMBDirect session struct cifs_rdma_info* cifs_create_rdma_session( struct TCP_Server_Info *server, struct sockaddr *dstaddr); + +int cifs_rdma_write(struct cifs_rdma_info *rdma, struct smb_rqst *rqst); #endif -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-cifs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html