On Tue, Aug 17, 2010 at 03:15:09PM +1000, NeilBrown wrote: > If the 'wait_for_completion_interruptible_timeout' completes due > to interrupt or timeout, just before cache_revisit request gets around > to calling ->revisit and thus the completion, we have a race with bad > consequences. > > cache_defer_req will see that sleeper.handle.hash is empty (because > cache_revisit_request has already done the list_del_init), and that > CACHE_PENDING is clear, and so will exit that function leaving any > local variables such as 'sleeper' undefined. > > Meanwhile cache_revisit_request could delete sleeper.handle.recent > from the 'pending' list, and call sleeper.hande.revisit, any of which > could have new garbage values and so could trigger a BUG. Thanks, this looks correct to me! I wish it were simpler, but don't see how right now. Some superficial cleanup might help me at least--see below (untested), which attempts to make obvious the first-try-sleeping-then-try-deferral logic, by turning cache_defer_req into basically: if (req->thread_wait) { ret = cache_wait_req(req, item); if (ret != -ETIMEDOUT) return ret; } dreq = req->defer(req); if (dreq == NULL) return -ENOMEM; ... I also wonder whether there would be a way to make most of this just another ->defer() method, so that we wouldn't need to switch on req->thread_wait here at all. But maybe there's not a nice way to do that. (Certainly I prefer your approach to what the idmap code was doing.) --b. diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2c5297f..da872f9 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,60 +509,39 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -struct thread_deferred_req { - struct cache_deferred_req handle; - struct completion completion; -}; -static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +static void __unhash_deferred_req(struct cache_deferred_req *dreq) { - struct thread_deferred_req *dr = - container_of(dreq, struct thread_deferred_req, handle); - complete(&dr->completion); + list_del_init(&dreq->recent); + list_del_init(&dreq->hash); + cache_defer_cnt--; } -static int cache_defer_req(struct cache_req *req, struct cache_head *item) +static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) { - struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); - struct thread_deferred_req sleeper; - if (cache_defer_cnt >= DFR_MAX) { - /* too much in the cache, randomly drop this one, - * or continue and drop the oldest below - */ - if (net_random()&1) - return -ENOMEM; - } - if (req->thread_wait) { - dreq = &sleeper.handle; - sleeper.completion = - COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); - dreq->revisit = cache_restart_thread; - } else - dreq = req->defer(req); + list_add(&dreq->recent, &cache_defer_list); + if (cache_defer_hash[hash].next == NULL) + INIT_LIST_HEAD(&cache_defer_hash[hash]); + list_add(&dreq->hash, &cache_defer_hash[hash]); +} - retry: - if (dreq == NULL) - return -ENOMEM; +static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) +{ + struct cache_deferred_req *discard; dreq->item = item; spin_lock(&cache_defer_lock); - list_add(&dreq->recent, &cache_defer_list); - - if (cache_defer_hash[hash].next == NULL) - INIT_LIST_HEAD(&cache_defer_hash[hash]); - list_add(&dreq->hash, &cache_defer_hash[hash]); + __hash_deferred_req(dreq, item); /* it is in, now maybe clean up */ discard = NULL; if (++cache_defer_cnt > DFR_MAX) { discard = list_entry(cache_defer_list.prev, struct cache_deferred_req, recent); - list_del_init(&discard->recent); - list_del_init(&discard->hash); - cache_defer_cnt--; + __unhash_deferred_req(discard); } spin_unlock(&cache_defer_lock); @@ -575,44 +554,88 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } + return 0; +} - if (dreq == &sleeper.handle) { - if (wait_for_completion_interruptible_timeout( - &sleeper.completion, req->thread_wait) <= 0) { - /* The completion wasn't completed, so we need - * to clean up - */ - spin_lock(&cache_defer_lock); - if (!list_empty(&sleeper.handle.hash)) { - list_del_init(&sleeper.handle.recent); - list_del_init(&sleeper.handle.hash); - cache_defer_cnt--; - spin_unlock(&cache_defer_lock); - } else { - /* cache_revisit_request already removed - * this from the hash table, but hasn't - * called ->revisit yet. It will very soon - * and we need to wait for it. - */ - spin_unlock(&cache_defer_lock); - wait_for_completion(&sleeper.completion); - } - } - if (test_bit(CACHE_PENDING, &item->flags)) { - /* item is still pending, try request - * deferral +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; + +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + +static int cache_wait_req(struct cache_req *req, struct cache_head *item) +{ + struct thread_deferred_req sleeper; + struct cache_deferred_req *dreq = &sleeper.handle; + int ret; + + sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + + ret = setup_deferral(dreq, item); + if (ret) + return ret; + + if (wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + __unhash_deferred_req(&sleeper.handle); + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. */ - dreq = req->defer(req); - goto retry; + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); } - /* only return success if we actually deferred the - * request. In this case we waited until it was - * answered so no deferral has happened - rather - * an answer already exists. + } + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral */ - return -EEXIST; + return -ETIMEDOUT; } - return 0; + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; +} + +static int cache_defer_req(struct cache_req *req, struct cache_head *item) +{ + struct cache_deferred_req *dreq; + int ret; + + if (cache_defer_cnt >= DFR_MAX) { + /* too much in the cache, randomly drop this one, + * or continue and drop the oldest + */ + if (net_random()&1) + return -ENOMEM; + } + if (req->thread_wait) { + ret = cache_wait_req(req, item); + if (ret != -ETIMEDOUT) + return ret; + } + dreq = req->defer(req); + if (dreq == NULL) + return -ENOMEM; + return setup_deferral(dreq, item); } static void cache_revisit_request(struct cache_head *item) @@ -632,9 +655,8 @@ static void cache_revisit_request(struct cache_head *item) dreq = list_entry(lp, struct cache_deferred_req, hash); lp = lp->next; if (dreq->item == item) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; + __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); } } } @@ -657,11 +679,8 @@ void cache_clean_deferred(void *owner) spin_lock(&cache_defer_lock); list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { - if (dreq->owner == owner) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; - } + if (dreq->owner == owner) + __unhash_deferred_req(dreq); } spin_unlock(&cache_defer_lock); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8ff6840..95fc3e8 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -665,7 +665,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request - * had to be queue, don't allow the thread to wait so + * had to be queued, don't allow the thread to wait so * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html