The current practice of waiting for cache updates by queueing the whole request to be retried has (at least) two problems. 1/ With NFSv4, requests can be quite complex and re-trying a whole request when a later part fails should only be a last-resort, not a normal practice. 2/ Large requests, and in particular any 'write' request, will not be queued by the current code and doing so would be undesirable. In many cases only a very sort wait is needed before the cache gets valid data. So, providing the underlying transport permits it by setting ->thread_wait, arrange to wait briefly for an upcall to be completed (as reflected in the clearing of CACHE_PENDING). If the short wait was not long enough and CACHE_PENDING is still set, fall back on the old approach. The 'thread_wait' value is set to 5 seconds when there are spare threads, and 1 second when there are no spare threads. These values are probably much higher than needed, but will ensure some forward progress. Note that as we only request an update for a non-valid item, and as non-valid items are updated in place it is extremely unlikely that cache_check will return -ETIMEDOUT. Normally cache_defer_req will sleep for a short while and then find that the item is_valid. Signed-off-by: NeilBrown <neilb@xxxxxxx> --- include/linux/sunrpc/cache.h | 3 +++ net/sunrpc/cache.c | 47 +++++++++++++++++++++++++++++++++++++++++- net/sunrpc/svc_xprt.c | 11 ++++++++++ 3 files changed, 60 insertions(+), 1 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 7bf3e84..61f521f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -125,6 +125,9 @@ struct cache_detail { */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2b06410..2fdd66b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); + struct thread_deferred_req sleeper; if (cache_defer_cnt >= DFR_MAX) { /* too much in the cache, randomly drop this one, @@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) if (net_random()&1) return -ENOMEM; } - dreq = req->defer(req); + if (req->thread_wait) { + dreq = &sleeper.handle; + sleeper.completion = + COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + } else + dreq = req->defer(req); + + retry: if (dreq == NULL) return -ENOMEM; @@ -555,6 +575,31 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } + + if (dreq == &sleeper.handle) { + wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait); + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + list_del_init(&sleeper.handle.recent); + list_del_init(&sleeper.handle.hash); + cache_defer_cnt--; + } + spin_unlock(&cache_defer_lock); + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral + */ + dreq = req->defer(req); + goto retry; + } + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; + } return 0; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index cbc0849..8ff6840 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return -EINTR; + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; + spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { @@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queue, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html