Re: [PATCH v5 5/5] nfsd: add the infrastructure to handle the cld upcall

"J. Bruce Fields" <bfields@xxxxxxxxxxxx> · Fri, 3 Feb 2012 17:57:36 -0500

On Wed, Feb 01, 2012 at 10:44:12AM -0500, Jeff Layton wrote:
> ...and add a mechanism for switching between the "legacy" tracker and
> the new one. The decision is made by looking to see whether the
> v4recoverydir exists. If it does, then the legacy client tracker is
> used.
> 
> If it's not, then the kernel will create a "cld" pipe in rpc_pipefs.
> That pipe is used to talk to a daemon for handling the upcall.
> 
> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> ---
>  fs/nfsd/nfs4recover.c |  364 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 363 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index 3fbf1f4..592fe3d 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -1,5 +1,6 @@
>  /*
>  *  Copyright (c) 2004 The Regents of the University of Michigan.
> +*  Copyright (c) 2011 Jeff Layton <jlayton@xxxxxxxxxx>
>  *  All rights reserved.
>  *
>  *  Andy Adamson <andros@xxxxxxxxxxxxxx>
> @@ -36,6 +37,10 @@
>  #include <linux/namei.h>
>  #include <linux/crypto.h>
>  #include <linux/sched.h>
> +#include <linux/fs.h>
> +#include <linux/sunrpc/rpc_pipe_fs.h>
> +#include <linux/sunrpc/clnt.h>
> +#include <linux/nfsd/cld.h>
>  
>  #include "nfsd.h"
>  #include "state.h"
> @@ -472,12 +477,369 @@ static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
>  	.grace_done	= nfsd4_recdir_purge_old,
>  };
>  
> +/* Globals */
> +#define NFSD_PIPE_DIR		"/nfsd"
> +
> +static struct dentry *cld_pipe;
> +
> +/* list of cld_msg's that are currently in use */
> +static DEFINE_SPINLOCK(cld_lock);
> +static LIST_HEAD(cld_list);
> +static unsigned int cld_xid;
> +
> +struct cld_upcall {
> +	struct list_head	cu_list;
> +	struct task_struct	*cu_task;
> +	struct cld_msg	cu_msg;
> +};
> +
> +static int
> +__cld_pipe_upcall(struct cld_msg *cmsg)
> +{
> +	int ret;
> +	struct rpc_pipe_msg msg;
> +	struct inode *inode = cld_pipe->d_inode;
> +
> +	memset(&msg, 0, sizeof(msg));
> +	msg.data = cmsg;
> +	msg.len = sizeof(*cmsg);
> +
> +	/*
> +	 * Set task state before we queue the upcall. That prevents
> +	 * wake_up_process in the downcall from racing with schedule.
> +	 */
> +	set_current_state(TASK_UNINTERRUPTIBLE);

Is there a risk of nfsd being left unkillable if the daemon dies or
becomes unresponsive?

> +	ret = rpc_queue_upcall(inode, &msg);
> +	if (ret < 0) {
> +		set_current_state(TASK_RUNNING);
> +		goto out;
> +	}
> +
> +	schedule();
> +	set_current_state(TASK_RUNNING);
> +
> +	if (msg.errno < 0)
> +		ret = msg.errno;
> +out:
> +	return ret;
> +}
> +
> +static int
> +cld_pipe_upcall(struct cld_msg *cmsg)
> +{
> +	int ret;
> +
> +	/*
> +	 * -EAGAIN occurs when pipe is closed an reopened while there are
> +	 *  upcalls queued.
> +	 */
> +	do {
> +		ret = __cld_pipe_upcall(cmsg);
> +	} while (ret == -EAGAIN);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
> +{
> +	struct cld_upcall *tmp, *cup;
> +	struct cld_msg *cmsg = (struct cld_msg *)src;
> +	uint32_t xid;
> +
> +	if (mlen != sizeof(*cmsg)) {
> +		dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen,
> +			sizeof(*cmsg));
> +		return -EINVAL;
> +	}
> +
> +	/* copy just the xid so we can try to find that */
> +	if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
> +		dprintk("%s: error when copying xid from userspace", __func__);
> +		return -EFAULT;
> +	}
> +
> +	/* walk the list and find corresponding xid */
> +	cup = NULL;
> +	spin_lock(&cld_lock);
> +	list_for_each_entry(tmp, &cld_list, cu_list) {
> +		if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
> +			cup = tmp;
> +			list_del_init(&cup->cu_list);
> +			break;
> +		}
> +	}
> +	spin_unlock(&cld_lock);
> +
> +	/* couldn't find upcall? */
> +	if (!cup) {
> +		dprintk("%s: couldn't find upcall -- xid=%u\n", __func__,
> +			cup->cu_msg.cm_xid);
> +		return -EINVAL;
> +	}
> +
> +	if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
> +		return -EFAULT;
> +
> +	wake_up_process(cup->cu_task);
> +	return mlen;
> +}
> +
> +static void
> +cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
> +{
> +	struct cld_msg *cmsg = msg->data;
> +	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
> +						 cu_msg);
> +
> +	/* errno >= 0 means we got a downcall */
> +	if (msg->errno >= 0)
> +		return;
> +
> +	wake_up_process(cup->cu_task);
> +}
> +
> +static const struct rpc_pipe_ops cld_upcall_ops = {
> +	.upcall		= rpc_pipe_generic_upcall,
> +	.downcall	= cld_pipe_downcall,
> +	.destroy_msg	= cld_pipe_destroy_msg,
> +};
> +
> +/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
> +static int
> +nfsd4_init_cld_pipe(void)
> +{
> +	int ret;
> +	struct path path;
> +	struct vfsmount *mnt;
> +
> +	if (cld_pipe)
> +		return 0;
> +
> +	mnt = rpc_get_mount();
> +	if (IS_ERR(mnt))
> +		return PTR_ERR(mnt);
> +
> +	ret = vfs_path_lookup(mnt->mnt_root, mnt, NFSD_PIPE_DIR, 0, &path);
> +	if (ret)
> +		goto err;
> +
> +	cld_pipe = rpc_mkpipe(path.dentry, "cld", NULL,
> +				&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
> +	path_put(&path);
> +	if (!IS_ERR(cld_pipe))
> +		return 0;
> +
> +	ret = PTR_ERR(cld_pipe);
> +err:
> +	rpc_put_mount();
> +	printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
> +			ret);
> +	return ret;
> +}
> +
> +static void
> +nfsd4_remove_cld_pipe(void)
> +{
> +	int ret;
> +
> +	ret = rpc_unlink(cld_pipe);
> +	if (ret)
> +		printk(KERN_ERR "NFSD: error removing cld pipe: %d\n", ret);
> +	cld_pipe = NULL;
> +	rpc_put_mount();
> +}
> +
> +static struct cld_upcall *
> +alloc_cld_upcall(void)
> +{
> +	struct cld_upcall *new, *tmp;
> +
> +	new = kzalloc(sizeof(*new), GFP_KERNEL);
> +	if (!new)
> +		return new;
> +
> +	/* FIXME: hard cap on number in flight? */
> +restart_search:
> +	spin_lock(&cld_lock);
> +	list_for_each_entry(tmp, &cld_list, cu_list) {
> +		if (tmp->cu_msg.cm_xid == cld_xid) {
> +			cld_xid++;
> +			spin_unlock(&cld_lock);
> +			goto restart_search;
> +		}
> +	}
> +	new->cu_task = current;
> +	new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
> +	put_unaligned(cld_xid++, &new->cu_msg.cm_xid);
> +	list_add(&new->cu_list, &cld_list);
> +	spin_unlock(&cld_lock);
> +
> +	dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
> +
> +	return new;
> +}
> +
> +static void
> +free_cld_upcall(struct cld_upcall *victim)
> +{
> +	spin_lock(&cld_lock);
> +	list_del(&victim->cu_list);
> +	spin_unlock(&cld_lock);
> +	kfree(victim);
> +}
> +
> +/* Ask daemon to create a new record */
> +static void
> +nfsd4_cld_create(struct nfs4_client *clp)
> +{
> +	int ret;
> +	struct cld_upcall *cup;
> +
> +	/* Don't upcall if it's already stored */
> +	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
> +		return;
> +
> +	cup = alloc_cld_upcall();
> +	if (!cup) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	cup->cu_msg.cm_cmd = Cld_Create;
> +	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> +	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> +			clp->cl_name.len);
> +
> +	ret = cld_pipe_upcall(&cup->cu_msg);
> +	if (!ret) {
> +		ret = cup->cu_msg.cm_status;
> +		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> +	}
> +
> +	free_cld_upcall(cup);
> +out_err:
> +	if (ret)
> +		printk(KERN_ERR "NFSD: Unable to create client "
> +				"record on stable storage: %d\n", ret);
> +}
> +
> +/* Ask daemon to create a new record */
> +static void
> +nfsd4_cld_remove(struct nfs4_client *clp)
> +{
> +	int ret;
> +	struct cld_upcall *cup;
> +
> +	/* Don't upcall if it's already removed */
> +	if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
> +		return;
> +
> +	cup = alloc_cld_upcall();
> +	if (!cup) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	cup->cu_msg.cm_cmd = Cld_Remove;
> +	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> +	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> +			clp->cl_name.len);
> +
> +	ret = cld_pipe_upcall(&cup->cu_msg);
> +	if (!ret) {
> +		ret = cup->cu_msg.cm_status;
> +		clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> +	}
> +
> +	free_cld_upcall(cup);
> +out_err:
> +	if (ret)
> +		printk(KERN_ERR "NFSD: Unable to remove client "
> +				"record from stable storage: %d\n", ret);
> +}
> +/* Check for presence of a record, and update its timestamp */
> +static int
> +nfsd4_cld_check(struct nfs4_client *clp)
> +{
> +	int ret;
> +	struct cld_upcall *cup;
> +
> +	/* Don't upcall if one was already stored during this grace pd */
> +	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
> +		return 0;
> +
> +	cup = alloc_cld_upcall();
> +	if (!cup) {
> +		printk(KERN_ERR "NFSD: Unable to check client record on "
> +				"stable storage: %d\n", -ENOMEM);
> +		return -ENOMEM;
> +	}
> +
> +	cup->cu_msg.cm_cmd = Cld_Check;
> +	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> +	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> +			clp->cl_name.len);
> +
> +	ret = cld_pipe_upcall(&cup->cu_msg);
> +	if (!ret) {
> +		ret = cup->cu_msg.cm_status;
> +		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> +	}

Most of this nfsd4_cld_*() code is *really* similar from one function to
the next--would it make sense to share some more code?

Anyway, this basically all looks reasonable to me....

--b.

> +
> +	free_cld_upcall(cup);
> +	return ret;
> +}
> +
> +static void
> +nfsd4_cld_grace_done(time_t boot_time)
> +{
> +	int ret;
> +	struct cld_upcall *cup;
> +
> +	cup = alloc_cld_upcall();
> +	if (!cup) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	cup->cu_msg.cm_cmd = Cld_GraceDone;
> +	cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
> +	ret = cld_pipe_upcall(&cup->cu_msg);
> +	if (!ret)
> +		ret = cup->cu_msg.cm_status;
> +
> +	free_cld_upcall(cup);
> +out_err:
> +	if (ret)
> +		printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
> +}
> +
> +static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
> +	.init		= nfsd4_init_cld_pipe,
> +	.exit		= nfsd4_remove_cld_pipe,
> +	.create		= nfsd4_cld_create,
> +	.remove		= nfsd4_cld_remove,
> +	.check		= nfsd4_cld_check,
> +	.grace_done	= nfsd4_cld_grace_done,
> +};
> +
>  int
>  nfsd4_client_tracking_init(void)
>  {
>  	int status;
> +	struct path path;
>  
> -	client_tracking_ops = &nfsd4_legacy_tracking_ops;
> +	if (!client_tracking_ops) {
> +		client_tracking_ops = &nfsd4_cld_tracking_ops;
> +		status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
> +		if (!status) {
> +			if (S_ISDIR(path.dentry->d_inode->i_mode))
> +				client_tracking_ops =
> +						&nfsd4_legacy_tracking_ops;
> +			path_put(&path);
> +		}
> +	}
>  
>  	if (!client_tracking_ops->init)
>  		return 0;
> -- 
> 1.7.7.6
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html