From: Martin Wilck <martin.wilck@xxxxxxxxxxxxxx> Date: Fri, 30 Oct 2009 16:35:19 +1100 If {udp,tcp}_slot_table_entries exceeds 111 (on x86-64), the allocated slot table exceeds 32K and so requires an order-4 allocation. As 4 exceeds PAGE_ALLOC_COSTLY_ORDER (==3), these are more likely to fail, so the chance of a mount failing due to low or fragmented memory goes up significantly. This is particularly a problem for autofs which can try a mount at any time and does not retry in the face of failure. There is no really need for the slots to be allocated in a single slab of memory. Using a kmemcache, particularly when fronted by a mempool to allow allocation to usually succeed in atomic context, avoid the need for a large allocation, and also reduces memory waste in cases where not all of the slots are required. This patch replaces the single kmalloc per client with a mempool shared among all clients. Signed-off-by: NeilBrown <neilb@xxxxxxx> --- The only thing that I'm not completely confident about in this patch is #define RPC_RQST_POOLSIZE (128) simply because it is an arbitrary number. This allocations will only come from this pool when a GFP_ATOMIC alloc fails, so memory has to be tight. Allowing a further 128 requests which might serve to free up memory is probably enough. If/when the swap-over-nfs gets upstream it will need to handle this memory pool as well ofcourse. NeilBrown include/linux/sunrpc/sched.h | 2 ++ include/linux/sunrpc/xprt.h | 4 +--- net/sunrpc/sched.c | 36 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 31 ++++++++++++------------------- net/sunrpc/xprtsock.c | 11 ----------- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 4010977..4442b6a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -208,6 +208,8 @@ struct rpc_wait_queue { /* * Function prototypes */ +struct rpc_rqst *rpc_alloc_rqst(struct rpc_task *task); +void rpc_free_rqst(struct rpc_rqst *req); struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6f9457a..521a60b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -163,9 +163,8 @@ struct rpc_xprt { struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ - struct list_head free; /* free slots */ - struct rpc_rqst * slot; /* slot table storage */ unsigned int max_reqs; /* total slots */ + atomic_t busy_reqs; /* busy slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ resvport : 1; /* use a reserved port */ @@ -193,7 +192,6 @@ struct rpc_xprt { * Send stuff */ spinlock_t transport_lock; /* lock transport info */ - spinlock_t reserve_lock; /* lock slot table */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef74ba..89d6fe6 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,10 +34,13 @@ #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) +#define RPC_RQST_POOLSIZE (128) static struct kmem_cache *rpc_task_slabp __read_mostly; static struct kmem_cache *rpc_buffer_slabp __read_mostly; +static struct kmem_cache *rpc_rqst_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; +static mempool_t *rpc_rqst_mempool __read_mostly; static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -831,6 +834,18 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } +struct rpc_rqst * +rpc_alloc_rqst(struct rpc_task *task) +{ + gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + return (struct rpc_rqst *)mempool_alloc(rpc_rqst_mempool, gfp); +} + +void rpc_free_rqst(struct rpc_rqst *req) +{ + mempool_free(req, rpc_rqst_mempool); +} + /* * Create a new task for the specified client. */ @@ -993,11 +1008,22 @@ rpc_destroy_mempool(void) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) mempool_destroy(rpc_task_mempool); + if (rpc_rqst_mempool) + mempool_destroy(rpc_rqst_mempool); if (rpc_task_slabp) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); rpc_destroy_wait_queue(&delay_queue); + if (rpc_rqst_slabp) + kmem_cache_destroy(rpc_rqst_slabp); +} + +static void +init_rqst(void * foo) +{ + struct rpc_rqst *req = foo; + memset(req, 0, sizeof(*req)); } int @@ -1023,6 +1049,12 @@ rpc_init_mempool(void) NULL); if (!rpc_buffer_slabp) goto err_nomem; + rpc_rqst_slabp = kmem_cache_create("rpc_rqsts", + sizeof(struct rpc_rqst), + 0, SLAB_HWCACHE_ALIGN, + &init_rqst); + if (!rpc_rqst_slabp) + goto err_nomem; rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE, rpc_task_slabp); if (!rpc_task_mempool) @@ -1031,6 +1063,10 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; + rpc_rqst_mempool = mempool_create_slab_pool(RPC_RQST_POOLSIZE, + rpc_rqst_slabp); + if (!rpc_rqst_mempool) + goto err_nomem; return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42..f9bfec3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -937,12 +937,14 @@ static inline void do_xprt_reserve(struct rpc_task *task) task->tk_status = 0; if (task->tk_rqstp) return; - if (!list_empty(&xprt->free)) { - struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); - list_del_init(&req->rq_list); - task->tk_rqstp = req; - xprt_request_init(task, xprt); - return; + if (atomic_read(&xprt->busy_reqs) < xprt->max_reqs) { + struct rpc_rqst *req = rpc_alloc_rqst(task); + if (req != NULL) { + atomic_inc(&xprt->busy_reqs); + task->tk_rqstp = req; + xprt_request_init(task, xprt); + return; + } } dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; @@ -959,12 +961,8 @@ static inline void do_xprt_reserve(struct rpc_task *task) */ void xprt_reserve(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; - task->tk_status = -EIO; - spin_lock(&xprt->reserve_lock); do_xprt_reserve(task); - spin_unlock(&xprt->reserve_lock); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) @@ -981,6 +979,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; + INIT_LIST_HEAD(&req->rq_list); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; @@ -1039,10 +1038,9 @@ void xprt_release(struct rpc_task *task) dprintk("RPC: %5u release request %p\n", task->tk_pid, req); - spin_lock(&xprt->reserve_lock); - list_add(&req->rq_list, &xprt->free); + rpc_free_rqst(req); + atomic_dec(&xprt->busy_reqs); rpc_wake_up_next(&xprt->backlog); - spin_unlock(&xprt->reserve_lock); } /** @@ -1077,9 +1075,7 @@ found: kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->reserve_lock); - INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); #if defined(CONFIG_NFS_V4_1) spin_lock_init(&xprt->bc_pa_lock); @@ -1102,10 +1098,7 @@ found: rpc_init_wait_queue(&xprt->resend, "xprt_resend"); rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - + atomic_set(&xprt->busy_reqs, 0); xprt_init_xid(xprt); dprintk("RPC: created transport %p with %u slots\n", xprt, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475..a5d23cd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -806,7 +806,6 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_close(xprt); xs_free_peer_addresses(xprt); - kfree(xprt->slot); kfree(xprt); module_put(THIS_MODULE); } @@ -2309,13 +2308,6 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, xprt = &new->xprt; xprt->max_reqs = slot_table_size; - xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - dprintk("RPC: xs_setup_xprt: couldn't allocate slot " - "table\n"); - return ERR_PTR(-ENOMEM); - } memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; @@ -2397,7 +2389,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; - kfree(xprt->slot); kfree(xprt); return ERR_PTR(-EINVAL); } @@ -2472,7 +2463,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; - kfree(xprt->slot); kfree(xprt); return ERR_PTR(-EINVAL); } @@ -2554,7 +2544,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; - kfree(xprt->slot); kfree(xprt); return ERR_PTR(-EINVAL); } -- 1.6.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html