Use FRMR when registering client memory if the memory registration
strategy is FRMR.
Signed-off-by: Tom Tucker <tom@xxxxxxxxxxxxxxxxxxxxx>
---
net/sunrpc/xprtrdma/verbs.c | 296 ++++++++++++++++++++++++++++++++
++++++-----
1 files changed, 263 insertions(+), 33 deletions(-)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/
verbs.c
index 8ea283e..ed401f9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -423,6 +423,7 @@ rpcrdma_clean_cq(struct ib_cq *cq)
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr,
int memreg)
{
+ struct ib_device_attr devattr;
int rc;
struct rpcrdma_ia *ia = &xprt->rx_ia;
@@ -443,6 +444,49 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt,
struct sockaddr *addr, int memreg)
}
/*
+ * Query the device to determine if the requested memory
+ * registration strategy is supported. If it isnt't set the
+ * strategy to a globally supported model.
+ */
+ rc = ib_query_device(ia->ri_id->device, &devattr);
+ if (rc) {
+ dprintk("RPC: %s: ib_query_device failed %d\n",
+ __func__, rc);
+ goto out2;
+ }
+ switch (memreg) {
+ case RPCRDMA_MEMWINDOWS:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
+ dprintk("RPC: %s: MEMWINDOWS specified but not "
+ "supported, using RPCRDMA_ALLPHYSICAL",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+ }
+ break;
+ case RPCRDMA_MTHCAFMR:
+ if (!ia->ri_id->device->alloc_fmr) {
+ dprintk("RPC: %s: MTHCAFMR specified but not "
+ "supported, using RPCRDMA_ALLPHYSICAL",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+ }
+ break;
+ case RPCRDMA_FASTREG:
+ /* Requires both fast reg and global dma lkey */
+ if ((0 ==
+ (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) ||
+ (0 == (devattr.device_cap_flags &
IB_DEVICE_LOCAL_DMA_LKEY))) {
+ dprintk("RPC: %s: FASTREG specified but not "
+ "supported, using RPCRDMA_ALLPHYSICAL",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+ }
+ break;
+ }
+ dprintk("RPC: memory registration strategy is %d\n", memreg);
+
+ /*
* Optionally obtain an underlying physical identity mapping in
* order to do a memory window-based bind. This base registration
* is protected from remote access - that is enabled only by
binding
@@ -450,7 +494,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt,
struct sockaddr *addr, int memreg)
* revoked after the corresponding completion similar to a storage
* adapter.
*/
- if (memreg > RPCRDMA_REGISTER) {
+ if ((memreg > RPCRDMA_REGISTER) && (memreg != RPCRDMA_FASTREG)) {
int mem_priv = IB_ACCESS_LOCAL_WRITE;
switch (memreg) {
#if RPCRDMA_PERSISTENT_REGISTRATION
@@ -475,7 +519,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt,
struct sockaddr *addr, int memreg)
memreg = RPCRDMA_REGISTER;
ia->ri_bind_mem = NULL;
}
+ ia->ri_dma_lkey = ia->ri_bind_mem->lkey;
}
+ if (memreg == RPCRDMA_FASTREG)
+ ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
/* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg;
@@ -541,6 +588,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FASTREG:
+ /* Add room for fast reg and invalidate */
+ ep->rep_attr.cap.max_send_wr *= 3;
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+ return -EINVAL;
+ break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -623,6 +676,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct
rpcrdma_ia *ia,
break;
case RPCRDMA_MTHCAFMR:
case RPCRDMA_REGISTER:
+ case RPCRDMA_FASTREG:
ep->rep_remote_cma.responder_resources = cdata->max_requests *
(RPCRDMA_MAX_DATA_SEGS / 8);
break;
@@ -866,6 +920,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer
*buf, struct rpcrdma_ep *ep,
buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock);
+ spin_lock_init(&buf->rb_frs_lock);
atomic_set(&buf->rb_credits, 1);
/* Need to allocate:
@@ -874,6 +929,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer
*buf, struct rpcrdma_ep *ep,
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
* 5. mw's, if any
+ * 6. frmr's, if any
* Send/recv buffers in req/rep need to be registered
*/
@@ -881,6 +937,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer
*buf, struct rpcrdma_ep *ep,
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FASTREG:
+ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_frmr);
+ break;
case RPCRDMA_MTHCAFMR:
/* TBD we are perhaps overallocating here */
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -895,7 +955,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer
*buf, struct rpcrdma_ep *ep,
break;
}
- /* allocate 1, 4 and 5 in one shot */
+ /* allocate 1, 4, 5 and 6 in one shot */
p = kzalloc(len, GFP_KERNEL);
if (p == NULL) {
dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
@@ -927,7 +987,38 @@ rpcrdma_buffer_create(struct rpcrdma_buffer
*buf, struct rpcrdma_ep *ep,
* and also reduce unbind-to-bind collision.
*/
INIT_LIST_HEAD(&buf->rb_mws);
+ INIT_LIST_HEAD(&buf->rb_frs);
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FASTREG:
+ {