On Thu, Sep 23, 2010 at 01:00:02PM +1000, Neil Brown wrote: > On Wed, 22 Sep 2010 13:50:27 -0400 > "J. Bruce Fields" <bfields@xxxxxxxxxxxx> wrote: > > > On Wed, Sep 22, 2010 at 12:55:06PM +1000, NeilBrown wrote: > > > If we set up to wait for a cache item to be filled in, and then find > > > that it is no longer pending, it could be that some other thread is > > > in 'cache_revisit_request' and has moved our request to its 'pending' list. > > > So when our setup_deferral calls cache_revisit_request it will find nothing to > > > put on the pending list, and do nothing. > > > > > > We then return from cache_wait_req, thus leaving the 'sleeper' > > > on-stack structure open to being corrupted by subsequent stack usage. > > > > > > However that 'sleeper' could still be on the 'pending' list that the > > > other thread is looking at and so any corruption could cause it to behave badly. > > > > > > To avoid this race we simply take the same path as if the > > > 'wait_for_completion_interruptible_timeout' was interrupted and if the > > > sleeper is no longer on the list (which it won't be) we wait on the > > > completion - which will ensure that any other cache_revisit_request > > > will have let go of the sleeper. > > > > OK, but I don't think we need that first CACHE_PENDING check in > > setup_deferral at all. Best just to ignore it?: > > > > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c > > index d789dfc..82804b4 100644 > > --- a/net/sunrpc/cache.c > > +++ b/net/sunrpc/cache.c > > @@ -567,10 +567,9 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item, int ti > > sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); > > dreq->revisit = cache_restart_thread; > > > > - ret = setup_deferral(dreq, item); > > + setup_deferral(dreq, item); > > > > - if (ret || > > - wait_for_completion_interruptible_timeout( > > + if (wait_for_completion_interruptible_timeout( > > &sleeper.completion, timeout) <= 0) { > > /* The completion wasn't completed, so we need > > * to clean up > > > > Then it's obvious that we always wait for completion, and that we fill > > the basic requirement to avoid corruption here. > > > > (Which is, in more detail: cache_wait_req must not return while dreq is > > still reachable from anywhere else. Since dreq is reachable from > > elsewhere only as long as it is hashed, and since anyone else that might > > unhash it will call our revisit (which unconditionally calls complete), > > then forget about it, it suffices for cache_wait_req to either wait for > > completion, or unhash dreq *itself* (before someone else does).) > > > > Also we don't need the PENDING check to ensure we wait only when > > necessary--we only wait while dreq is hashed, and as long as we're > > hashed anyone clearing PENDING will also end up doing the complete(). > > > > In the deferral case it's maybe a useful optimization if it avoids > > an unnecessary drop sometimes. Here it doesn't help. > > > > --b. > > I like your thinking and I agree with most of it. > After adding dreq to the hash table, we need to check CACHE_PENDING. It is > possible that it was cleared moments before we added dreq to the hash-table so > the cache_revisit_request associated with clearing it might have missed our > dreq, so we need to do something about that.... Arrgh, right--I wasn't thinking straight. > How about this. > It gets rid of the return values (which were confusing anyway) and adds > explicit checks on CAHCE_PENDING where needed. > ?? Thanks, I'll take a look in the morning when my head's (hopefully) clearer. > > Also, I noticed there is a race with the call to cache_limit_defers. The > 'dreq' could be freed before that is called. > > It looks like I need to resubmit a lot of this - do you want to just discard > it all from your -next and I'll make something new? I'm trying very hard not to rewind -next; so I'd prefer incremental patches for anything already there, replacements for the rest. --b. > > Thank for the review! > NeilBrown > > > cache.c | 53 +++++++++++++++++------------------------------------ > 1 file changed, 17 insertions(+), 36 deletions(-) > > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c > index c60c7f6..db0fa44 100644 > --- a/net/sunrpc/cache.c > +++ b/net/sunrpc/cache.c > @@ -37,7 +37,7 @@ > > #define RPCDBG_FACILITY RPCDBG_CACHE > > -static int cache_defer_req(struct cache_req *req, struct cache_head *item); > +static void cache_defer_req(struct cache_req *req, struct cache_head *item); > static void cache_revisit_request(struct cache_head *item); > > static void cache_init(struct cache_head *h) > @@ -268,7 +268,8 @@ int cache_check(struct cache_detail *detail, > } > > if (rv == -EAGAIN) { > - if (cache_defer_req(rqstp, h) < 0) { > + cache_defer_req(rqstp, h); > + if (!test_bit(CACHE_PENDING, &h->flags)) { > /* Request is not deferred */ > rv = cache_is_valid(detail, h); > if (rv == -EAGAIN) > @@ -527,7 +528,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he > hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); > } > > -static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) > +static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) > { > > dreq->item = item; > @@ -537,13 +538,6 @@ static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *it > __hash_deferred_req(dreq, item); > > spin_unlock(&cache_defer_lock); > - > - if (!test_bit(CACHE_PENDING, &item->flags)) { > - /* must have just been validated... */ > - cache_revisit_request(item); > - return -EAGAIN; > - } > - return 0; > } > > struct thread_deferred_req { > @@ -558,18 +552,17 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) > complete(&dr->completion); > } > > -static int cache_wait_req(struct cache_req *req, struct cache_head *item, int timeout) > +static void cache_wait_req(struct cache_req *req, struct cache_head *item, int timeout) > { > struct thread_deferred_req sleeper; > struct cache_deferred_req *dreq = &sleeper.handle; > - int ret; > > sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); > dreq->revisit = cache_restart_thread; > > - ret = setup_deferral(dreq, item); > + setup_deferral(dreq, item); > > - if (ret || > + if (!test_bit(CACHE_PENDING, &item->flags) || > wait_for_completion_interruptible_timeout( > &sleeper.completion, timeout) <= 0) { > /* The completion wasn't completed, so we need > @@ -589,18 +582,6 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item, int ti > wait_for_completion(&sleeper.completion); > } > } > - if (test_bit(CACHE_PENDING, &item->flags)) { > - /* item is still pending, try request > - * deferral > - */ > - return -ETIMEDOUT; > - } > - /* only return success if we actually deferred the > - * request. In this case we waited until it was > - * answered so no deferral has happened - rather > - * an answer already exists. > - */ > - return -EEXIST; > } > > static void cache_limit_defers(struct cache_deferred_req *dreq) > @@ -639,7 +620,7 @@ static void cache_limit_defers(struct cache_deferred_req *dreq) > discard->revisit(discard, 1); > } > > -static int cache_defer_req(struct cache_req *req, struct cache_head *item) > +static void cache_defer_req(struct cache_req *req, struct cache_head *item) > { > struct cache_deferred_req *dreq; > int ret; > @@ -647,17 +628,19 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) > > timeout = req->set_waiter(req, 1); > if (timeout) { > - ret = cache_wait_req(req, item, timeout); > + cache_wait_req(req, item, timeout); > req->set_waiter(req, 0); > - return ret; > } else { > dreq = req->defer(req); > if (dreq == NULL) > - return -ENOMEM; > - if (setup_deferral(dreq, item) < 0) > - return -EAGAIN; > + return; > + setup_deferral(dreq, item); > cache_limit_defers(dreq); > - return 0; > + if (!test_bit(CACHE_PENDING, &item->flags)) > + /* Bit could have been cleared before we managed to > + * set up the deferral, so need to revisit just in case > + */ > + cache_revisit_request(item); > } > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html