Re: [PATCH] sunrpc: replace large table of slots with mempool

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Oct 30, 2009, at 1:53 AM, Neil Brown wrote:
From: Martin Wilck <martin.wilck@xxxxxxxxxxxxxx>
Date: Fri, 30 Oct 2009 16:35:19 +1100

If {udp,tcp}_slot_table_entries exceeds 111 (on x86-64),
the allocated slot table exceeds 32K and so requires an
order-4 allocation.
As 4 exceeds PAGE_ALLOC_COSTLY_ORDER (==3), these are more
likely to fail, so the chance of a mount failing due to low or
fragmented memory goes up significantly.

This is particularly a problem for autofs which can try a mount
at any time and does not retry in the face of failure.

(aye, and that could be addressed too, separately)

There is no really need for the slots to be allocated in a single
slab of memory.  Using a kmemcache, particularly when fronted by
a mempool to allow allocation to usually succeed in atomic context,
avoid the need for a large allocation, and also reduces memory waste
in cases where not all of the slots are required.

This patch replaces the single  kmalloc per client with a mempool
shared among all clients.

I've thought getting rid of the slot tables was a good idea for many years.

One concern I have, though, is that this shared mempool would be a contention point for all RPC transports; especially bothersome on SMP/ NUMA?

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---

The only thing that I'm not completely confident about in this patch
is
  #define RPC_RQST_POOLSIZE	(128)
simply because it is an arbitrary number.  This allocations will only
come from this pool when a GFP_ATOMIC alloc fails, so memory has to
be tight.  Allowing a further 128 requests which might serve to
free up memory is probably enough.

If/when the swap-over-nfs gets upstream it will need to handle this
memory pool as well ofcourse.

NeilBrown


include/linux/sunrpc/sched.h |    2 ++
include/linux/sunrpc/xprt.h  |    4 +---
net/sunrpc/sched.c | 36 +++++++++++++++++++++++++++++++++ +++
net/sunrpc/xprt.c            |   31 ++++++++++++-------------------
net/sunrpc/xprtsock.c        |   11 -----------
5 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/ sched.h
index 4010977..4442b6a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -208,6 +208,8 @@ struct rpc_wait_queue {
/*
 * Function prototypes
 */
+struct rpc_rqst *rpc_alloc_rqst(struct rpc_task *task);
+void rpc_free_rqst(struct rpc_rqst *req);
struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6f9457a..521a60b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -163,9 +163,8 @@ struct rpc_xprt {
	struct rpc_wait_queue	resend;		/* requests waiting to resend */
	struct rpc_wait_queue	pending;	/* requests in flight */
	struct rpc_wait_queue	backlog;	/* waiting for slot */
-	struct list_head	free;		/* free slots */
-	struct rpc_rqst *	slot;		/* slot table storage */
	unsigned int		max_reqs;	/* total slots */
+	atomic_t		busy_reqs;      /* busy slots */
	unsigned long		state;		/* transport state */
	unsigned char		shutdown   : 1,	/* being shut down */
				resvport   : 1; /* use a reserved port */
@@ -193,7 +192,6 @@ struct rpc_xprt {
	 * Send stuff
	 */
	spinlock_t		transport_lock;	/* lock transport info */
-	spinlock_t		reserve_lock;	/* lock slot table */
	u32			xid;		/* Next XID value to use */
	struct rpc_task *	snd_task;	/* Task blocked in send */
	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef74ba..89d6fe6 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -34,10 +34,13 @@
#define RPC_BUFFER_MAXSIZE	(2048)
#define RPC_BUFFER_POOLSIZE	(8)
#define RPC_TASK_POOLSIZE	(8)
+#define RPC_RQST_POOLSIZE	(128)
static struct kmem_cache	*rpc_task_slabp __read_mostly;
static struct kmem_cache	*rpc_buffer_slabp __read_mostly;
+static struct kmem_cache	*rpc_rqst_slabp __read_mostly;
static mempool_t	*rpc_task_mempool __read_mostly;
static mempool_t	*rpc_buffer_mempool __read_mostly;
+static mempool_t	*rpc_rqst_mempool __read_mostly;

static void			rpc_async_schedule(struct work_struct *);
static void			 rpc_release_task(struct rpc_task *task);
@@ -831,6 +834,18 @@ rpc_alloc_task(void)
	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}

+struct rpc_rqst *
+rpc_alloc_rqst(struct rpc_task *task)
+{
+	gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
+	return (struct rpc_rqst *)mempool_alloc(rpc_rqst_mempool, gfp);
+}
+
+void rpc_free_rqst(struct rpc_rqst *req)
+{
+	mempool_free(req, rpc_rqst_mempool);
+}
+
/*
 * Create a new task for the specified client.
 */
@@ -993,11 +1008,22 @@ rpc_destroy_mempool(void)
		mempool_destroy(rpc_buffer_mempool);
	if (rpc_task_mempool)
		mempool_destroy(rpc_task_mempool);
+	if (rpc_rqst_mempool)
+		mempool_destroy(rpc_rqst_mempool);
	if (rpc_task_slabp)
		kmem_cache_destroy(rpc_task_slabp);
	if (rpc_buffer_slabp)
		kmem_cache_destroy(rpc_buffer_slabp);
	rpc_destroy_wait_queue(&delay_queue);
+	if (rpc_rqst_slabp)
+		kmem_cache_destroy(rpc_rqst_slabp);
+}
+
+static void
+init_rqst(void * foo)
+{
+	struct rpc_rqst *req = foo;
+	memset(req, 0, sizeof(*req));
}

int
@@ -1023,6 +1049,12 @@ rpc_init_mempool(void)
					     NULL);
	if (!rpc_buffer_slabp)
		goto err_nomem;
+	rpc_rqst_slabp = kmem_cache_create("rpc_rqsts",
+					     sizeof(struct rpc_rqst),
+					     0, SLAB_HWCACHE_ALIGN,
+					     &init_rqst);
+	if (!rpc_rqst_slabp)
+		goto err_nomem;
	rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,
						    rpc_task_slabp);
	if (!rpc_task_mempool)
@@ -1031,6 +1063,10 @@ rpc_init_mempool(void)
						      rpc_buffer_slabp);
	if (!rpc_buffer_mempool)
		goto err_nomem;
+	rpc_rqst_mempool = mempool_create_slab_pool(RPC_RQST_POOLSIZE,
+						    rpc_rqst_slabp);
+	if (!rpc_rqst_mempool)
+		goto err_nomem;
	return 0;
err_nomem:
	rpc_destroy_mempool();
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index fd46d42..f9bfec3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -937,12 +937,14 @@ static inline void do_xprt_reserve(struct rpc_task *task)
	task->tk_status = 0;
	if (task->tk_rqstp)
		return;
-	if (!list_empty(&xprt->free)) {
- struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
-		list_del_init(&req->rq_list);
-		task->tk_rqstp = req;
-		xprt_request_init(task, xprt);
-		return;
+	if (atomic_read(&xprt->busy_reqs) < xprt->max_reqs) {
+		struct rpc_rqst	*req = rpc_alloc_rqst(task);
+		if (req != NULL) {
+			atomic_inc(&xprt->busy_reqs);
+			task->tk_rqstp = req;
+			xprt_request_init(task, xprt);
+			return;
+		}
	}
	dprintk("RPC:       waiting for request slot\n");
	task->tk_status = -EAGAIN;
@@ -959,12 +961,8 @@ static inline void do_xprt_reserve(struct rpc_task *task)
 */
void xprt_reserve(struct rpc_task *task)
{
-	struct rpc_xprt	*xprt = task->tk_xprt;
-
	task->tk_status = -EIO;
-	spin_lock(&xprt->reserve_lock);
	do_xprt_reserve(task);
-	spin_unlock(&xprt->reserve_lock);
}

static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -981,6 +979,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
{
	struct rpc_rqst	*req = task->tk_rqstp;

+	INIT_LIST_HEAD(&req->rq_list);
	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
	req->rq_task	= task;
	req->rq_xprt    = xprt;
@@ -1039,10 +1038,9 @@ void xprt_release(struct rpc_task *task)

	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);

-	spin_lock(&xprt->reserve_lock);
-	list_add(&req->rq_list, &xprt->free);
+	rpc_free_rqst(req);
+	atomic_dec(&xprt->busy_reqs);
	rpc_wake_up_next(&xprt->backlog);
-	spin_unlock(&xprt->reserve_lock);
}

/**
@@ -1077,9 +1075,7 @@ found:

	kref_init(&xprt->kref);
	spin_lock_init(&xprt->transport_lock);
-	spin_lock_init(&xprt->reserve_lock);

-	INIT_LIST_HEAD(&xprt->free);
	INIT_LIST_HEAD(&xprt->recv);
#if defined(CONFIG_NFS_V4_1)
	spin_lock_init(&xprt->bc_pa_lock);
@@ -1102,10 +1098,7 @@ found:
	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");

-	/* initialize free list */
- for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
-		list_add(&req->rq_list, &xprt->free);
-
+	atomic_set(&xprt->busy_reqs, 0);
	xprt_init_xid(xprt);

	dprintk("RPC:       created transport %p with %u slots\n", xprt,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 37c5475..a5d23cd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -806,7 +806,6 @@ static void xs_destroy(struct rpc_xprt *xprt)

	xs_close(xprt);
	xs_free_peer_addresses(xprt);
-	kfree(xprt->slot);
	kfree(xprt);
	module_put(THIS_MODULE);
}
@@ -2309,13 +2308,6 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
	xprt = &new->xprt;

	xprt->max_reqs = slot_table_size;
- xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
-	if (xprt->slot == NULL) {
-		kfree(xprt);
-		dprintk("RPC:       xs_setup_xprt: couldn't allocate slot "
-				"table\n");
-		return ERR_PTR(-ENOMEM);
-	}

	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
	xprt->addrlen = args->addrlen;
@@ -2397,7 +2389,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
	if (try_module_get(THIS_MODULE))
		return xprt;

-	kfree(xprt->slot);
	kfree(xprt);
	return ERR_PTR(-EINVAL);
}
@@ -2472,7 +2463,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
	if (try_module_get(THIS_MODULE))
		return xprt;

-	kfree(xprt->slot);
	kfree(xprt);
	return ERR_PTR(-EINVAL);
}
@@ -2554,7 +2544,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)

	if (try_module_get(THIS_MODULE))
		return xprt;
-	kfree(xprt->slot);
	kfree(xprt);
	return ERR_PTR(-EINVAL);
}
--
1.6.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux