[RFC][PATCH] lockd: refine support for deferred blocking locks

"Mike Snitzer" <snitzer@xxxxxxxxx> · Fri, 21 Dec 2007 17:50:58 -0500

On Dec 19, 2007 12:45 AM, J. Bruce Fields <bfields@xxxxxxxxxxxx> wrote:
>
> On Tue, Dec 18, 2007 at 11:03:01PM -0500, Mike Snitzer wrote:
> > I could be missing something blatantly obvious so please be kind...
> >
> > I would like to understand how one _should_ implement ->lock() for a
> > filesystem that can't immediately determine that the request would
> > result in -EDEADLK or -ENOLCK.  Particularly for deferred SETLKW
> > requests.
> >
> > That is, the filesystem's ->lock() initially responds with
> > -EINPROGRESS but later goes on to fail the request with -EDEADLK or
> > -ENOLCK.  The filesystem's call to ->fl_grant() will be made with an
> > unsupported 'result'.
> >
> > Having reviewed the code in fs/lockd/svclock.c it would seem that it
> > was never designed to allow EDEADLK or ENOLCK _after_ nlmsvc_lock()'s
> > initial call to vfs_lock_file().
> >
> > nlmsvc_grant_deferred() properly handles deferred SETLKW processing
> > IFF the result is 0 (aka granted).
> > nlmsvc_grant_deferred()'s SETLK processing will at least return
> > failures (but they are assumed to be B_TIMED_OUT).
> > In the end the real 'result' gets dropped on the floor.
> >
> > Why must ->lock() immediately return error (e.g. EDEADLK), otherwise
> > it is assumed the request will complete successfully (or timeout in
> > the B_QUEUED/SETLK case)?  Seems overly constrained (painfully so for
> > SETLKW) when you consider that the whole point of deferred processing
> > (via EINPROGRESS and ->fl_grant) is to accomodate filesystems that
> > can't respond to a lock request immediately.
>
> Yeah, the assuumption was that we only needed to deal with non-blocking
> requests first, under the theory that the blocking case could always be
> handled with a deny followed by an eventual grant.  Which, as you point
> out, may not ever be coming in the EDEALK or ENOLCK cases.  I'm inclined
> to agree it's ugly.
>
> That said, I find it hard to care about the EDEADLK case--deadlock
> detection just isn't going to be reliable in the presence of nfs clients
> anyway.  And are there really a lot of cases where ENOLCK has to be
> returned and where the local filesystem doesn't know that immediately?
>
> But it should probably be rethought anyway.  I'm grateful for any
> suggestions (but probably won't be working on this seriously until the
> new year).

Hi Bruce,

The attached patch is an example of one possible way to refine how lockd
and ->lock process blocking lock.  Here is a general overview for the patch:

Filesystems that cannot respond to blocking lock requests immediately are
allowed to leverage asynchronous processing semantics via ->fl_grant().
The current lockd code unfortunately requires the filesystem's ->lock() to
respond immediately with an error if the lock will be denied (EDEADLOCK,
ENOLCK, etc).  Otherwise, lockd assumes that the blocking lock will
eventually be granted.

To refine deferred blocking lock processing semantics it would be ideal to
flag such requests as B_QUEUED_WAIT iff ->lock() initially responds with
EINPROGRESS.  B_QUEUED_WAIT conveys that ->lock() is actively working on
the blocking request and will respond with the result via ->fl_grant().
As such, subsequent lockd attempts to vfs_lock_file() are _not_ needed,
and are avoided entirely, because the eventual ->lock() result will be
returned via the ->fl_grant() callback.  The callback's result is stored
in a new 'b_result' data member of struct nlm_block.  b_result is known to
be valid iff B_GOT_CALLBACK is set.

The proposed implementation does not yet prevent/warn against some
filesystem's ->lock() from returning an ->fl_grant() result of EINPROGRESS
or EAGAIN.  Such a result does not make sense as the final result of the
deferred blocking lock request.

Please advise, thanks.
Mike

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d120ec3..35b97e1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -409,13 +409,20 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 		}
 		ret = nlm_drop_reply;
 		goto out;
-	}
-
-	if (!wait)
+	} else if (block->b_flags & B_QUEUED_WAIT) {
+		/* Don't retry a deferred blocking request */
+		dprintk("lockd: nlmsvc_lock deferred blocking block %p flags %d\n",
+							block, block->b_flags);
+		error = -EINPROGRESS;
+		if (block->b_flags & B_GOT_CALLBACK)
+			error = block->b_result;
+	} else {
+		if (!wait)
+			lock->fl.fl_flags &= ~FL_SLEEP;
+		error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 		lock->fl.fl_flags &= ~FL_SLEEP;
-	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
-	lock->fl.fl_flags &= ~FL_SLEEP;
-
+	}
+	
 	dprintk("lockd: vfs_lock_file returned %d\n", error);
 	switch(error) {
 		case 0:
@@ -425,8 +432,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			ret = nlm_lck_denied;
 			break;
 		case -EINPROGRESS:
-			if (wait)
+			if (wait) {
+				block->b_flags |= B_QUEUED_WAIT;
 				break;
+			}
 			/* Filesystem lock operation is in progress
 			   Add it to the queue waiting for callback */
 			ret = nlmsvc_defer_lock_rqst(rqstp, block);
@@ -622,9 +631,11 @@ nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
 			     int result)
 {
 	block->b_flags |= B_GOT_CALLBACK;
+	/* NOTE: a callback result of EAGAIN or EINPROGRESS is invalid */
+	block->b_result = result;
 	if (result == 0)
 		block->b_granted = 1;
-	else
+	else if (block->b_flags & B_QUEUED)
 		block->b_flags |= B_TIMED_OUT;
 	if (conf) {
 		if (block->b_fl)
@@ -648,10 +659,9 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
 					rc = -ENOLCK;
 					break;
 				}
-				nlmsvc_update_deferred_block(block, conf, result);
-			} else if (result == 0)
-				block->b_granted = 1;
-
+			}
+			nlmsvc_update_deferred_block(block, conf, result);
+			
 			nlmsvc_insert_block(block, 0);
 			svc_wake_up(block->b_daemon);
 			rc = 0;
@@ -732,11 +742,18 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 		goto callback;
 	}
 
-	/* Try the lock operation again */
-	lock->fl.fl_flags |= FL_SLEEP;
-	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
-	lock->fl.fl_flags &= ~FL_SLEEP;
-
+	if (block->b_flags & B_QUEUED_WAIT) {
+		/* Don't retry a deferred blocking request */
+		error = -EINPROGRESS;
+		if (block->b_flags & B_GOT_CALLBACK)
+			error = block->b_result;
+	} else {
+		/* Try the lock operation again */
+		lock->fl.fl_flags |= FL_SLEEP;
+		error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+		lock->fl.fl_flags &= ~FL_SLEEP;
+	}
+		
 	switch (error) {
 	case 0:
 		break;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index e2d1ce3..187afcf 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -137,10 +137,12 @@ struct nlm_block {
 	struct cache_req *	b_cache_req;	/* deferred request handling */
 	struct file_lock *	b_fl;		/* set for GETLK */
 	struct cache_deferred_req * b_deferred_req;
+	unsigned int		b_result;	/* callback result */
 	unsigned int		b_flags;	/* block flags */
 #define B_QUEUED		1	/* lock queued */
 #define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
 #define B_TIMED_OUT		4	/* filesystem too slow to respond */
+#define B_QUEUED_WAIT		8	/* blocking lock queued */
 };
 
 /*