Re: [PATCH RFC v6 2/2] nfsd: Initial implementation of NFSv4 Courteous Server

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dai-

Some comments and questions below:


> On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@xxxxxxxxxx> wrote:
> 
> Currently an NFSv4 client must maintain its lease by using the at least
> one of the state tokens or if nothing else, by issuing a RENEW (4.0), or
> a singleton SEQUENCE (4.1) at least once during each lease period. If the
> client fails to renew the lease, for any reason, the Linux server expunges
> the state tokens immediately upon detection of the "failure to renew the
> lease" condition and begins returning NFS4ERR_EXPIRED if the client should
> reconnect and attempt to use the (now) expired state.
> 
> The default lease period for the Linux server is 90 seconds.  The typical
> client cuts that in half and will issue a lease renewing operation every
> 45 seconds. The 90 second lease period is very short considering the
> potential for moderately long term network partitions.  A network partition
> refers to any loss of network connectivity between the NFS client and the
> NFS server, regardless of its root cause.  This includes NIC failures, NIC
> driver bugs, network misconfigurations & administrative errors, routers &
> switches crashing and/or having software updates applied, even down to
> cables being physically pulled.  In most cases, these network failures are
> transient, although the duration is unknown.
> 
> A server which does not immediately expunge the state on lease expiration
> is known as a Courteous Server.  A Courteous Server continues to recognize
> previously generated state tokens as valid until conflict arises between
> the expired state and the requests from another client, or the server
> reboots.
> 
> The initial implementation of the Courteous Server will do the following:
> 
> . when the laundromat thread detects an expired client and if that client
> still has established states on the Linux server and there is no waiters
> for the client's locks then mark the client as a COURTESY_CLIENT and skip
> destroying the client and all its states, otherwise destroy the client as
> usual.
> 
> . detects conflict of OPEN request with COURTESY_CLIENT, destroys the
> expired client and all its states, skips the delegation recall then allows
> the conflicting request to succeed.
> 
> . detects conflict of LOCK/LOCKT, NLM LOCK and TEST, and local locks
> requests with COURTESY_CLIENT, destroys the expired client and all its
> states then allows the conflicting request to succeed.
> 
> . detects conflict of LOCK/LOCKT, NLM LOCK and TEST, and local locks
> requests with COURTESY_CLIENT, destroys the expired client and all its
> states then allows the conflicting request to succeed.
> 
> Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
> ---
> fs/nfsd/nfs4state.c | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h     |   3 +
> 2 files changed, 293 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 3f4027a5de88..759f61dc6685 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -125,6 +125,11 @@ static void free_session(struct nfsd4_session *);
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> 
> +static struct workqueue_struct *laundry_wq;
> +static void laundromat_main(struct work_struct *);
> +
> +static int courtesy_client_expiry = (24 * 60 * 60);	/* in secs */
> +
> static bool is_session_dead(struct nfsd4_session *ses)
> {
> 	return ses->se_flags & NFS4_SESSION_DEAD;
> @@ -172,6 +177,7 @@ renew_client_locked(struct nfs4_client *clp)
> 
> 	list_move_tail(&clp->cl_lru, &nn->client_lru);
> 	clp->cl_time = ktime_get_boottime_seconds();
> +	clear_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);
> }
> 
> static void put_client_renew_locked(struct nfs4_client *clp)
> @@ -2389,6 +2395,10 @@ static int client_info_show(struct seq_file *m, void *v)
> 		seq_puts(m, "status: confirmed\n");
> 	else
> 		seq_puts(m, "status: unconfirmed\n");
> +	seq_printf(m, "courtesy client: %s\n",
> +		test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) ? "yes" : "no");
> +	seq_printf(m, "seconds from last renew: %lld\n",
> +		ktime_get_boottime_seconds() - clp->cl_time);
> 	seq_printf(m, "name: ");
> 	seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
> 	seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
> @@ -4662,6 +4672,33 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> 	nfsd4_run_cb(&dp->dl_recall);
> }
> 
> +/*
> + * This function is called when a file is opened and there is a
> + * delegation conflict with another client. If the other client
> + * is a courtesy client then kick start the laundromat to destroy
> + * it.
> + */
> +static bool
> +nfsd_check_courtesy_client(struct nfs4_delegation *dp)
> +{
> +	struct svc_rqst *rqst;
> +	struct nfs4_client *clp = dp->dl_recall.cb_clp;
> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> +	if (!i_am_nfsd())
> +		goto out;
> +	rqst = kthread_data(current);
> +	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
> +		return false;
> +out:
> +	if (test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags)) {
> +		set_bit(NFSD4_DESTROY_COURTESY_CLIENT, &clp->cl_flags);
> +		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
> +		return true;
> +	}
> +	return false;
> +}
> +
> /* Called from break_lease() with i_lock held. */
> static bool
> nfsd_break_deleg_cb(struct file_lock *fl)
> @@ -4670,6 +4707,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
> 	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
> 	struct nfs4_file *fp = dp->dl_stid.sc_file;
> 
> +	if (nfsd_check_courtesy_client(dp))
> +		return false;
> 	trace_nfsd_cb_recall(&dp->dl_stid);
> 
> 	/*
> @@ -4912,6 +4951,136 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
> 	return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
> }
> 
> +static bool
> +__nfs4_check_deny_bmap(struct nfs4_ol_stateid *stp, u32 access,
> +			bool share_access)
> +{
> +	if (share_access) {
> +		if (!stp->st_deny_bmap)
> +			return false;
> +
> +		if ((stp->st_deny_bmap & (1 << NFS4_SHARE_DENY_BOTH)) ||

Aren't the NFS4_SHARE_DENY macros already bit masks?

NFS4_SHARE_DENY_BOTH is (NFS4_SHARE_DENY_READ | NFS4_SHARE_DENY_WRITE).


> +			(access & NFS4_SHARE_ACCESS_READ &&
> +				stp->st_deny_bmap & (1 << NFS4_SHARE_DENY_READ)) ||
> +			(access & NFS4_SHARE_ACCESS_WRITE &&
> +				stp->st_deny_bmap & (1 << NFS4_SHARE_DENY_WRITE))) {
> +			return true;
> +		}
> +		return false;
> +	}
> +	if ((access & NFS4_SHARE_DENY_BOTH) ||
> +		(access & NFS4_SHARE_DENY_READ &&
> +			stp->st_access_bmap & (1 << NFS4_SHARE_ACCESS_READ)) ||
> +		(access & NFS4_SHARE_DENY_WRITE &&
> +			stp->st_access_bmap & (1 << NFS4_SHARE_ACCESS_WRITE))) {
> +		return true;
> +	}
> +	return false;
> +}
> +
> +/*
> + * access: if share_access is true then check access mode else check deny mode
> + */
> +static bool
> +nfs4_check_deny_bmap(struct nfs4_client *clp, struct nfs4_file *fp,
> +		struct nfs4_ol_stateid *st, u32 access, bool share_access)
> +{
> +	int i;
> +	struct nfs4_openowner *oo;
> +	struct nfs4_stateowner *so, *tmp;
> +	struct nfs4_ol_stateid *stp, *stmp;
> +
> +	spin_lock(&clp->cl_lock);
> +	for (i = 0; i < OWNER_HASH_SIZE; i++) {
> +		list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
> +					so_strhash) {
> +			if (!so->so_is_open_owner)
> +				continue;
> +			oo = openowner(so);
> +			list_for_each_entry_safe(stp, stmp,
> +				&oo->oo_owner.so_stateids, st_perstateowner) {
> +				if (stp == st || stp->st_stid.sc_file != fp)
> +					continue;
> +				if (__nfs4_check_deny_bmap(stp, access,
> +							share_access)) {
> +					spin_unlock(&clp->cl_lock);
> +					return true;
> +				}
> +			}
> +		}
> +	}
> +	spin_unlock(&clp->cl_lock);
> +	return false;
> +}
> +
> +/*
> + * Function to check if the nfserr_share_denied error for 'fp' resulted
> + * from conflict with courtesy clients then release their state to resolve
> + * the conflict.
> + *
> + * Function returns:
> + *	 0 -  no conflict with courtesy clients
> + *	>0 -  conflict with courtesy clients resolved, try access/deny check again
> + *	-1 -  conflict with courtesy clients being resolved in background
> + *            return nfserr_jukebox to NFS client
> + */
> +static int
> +nfs4_destroy_clnts_with_sresv_conflict(struct svc_rqst *rqstp,
> +			struct nfs4_file *fp, struct nfs4_ol_stateid *stp,
> +			u32 access, bool share_access)
> +{
> +	int cnt = 0;
> +	int async_cnt = 0;
> +	bool no_retry = false;
> +	struct nfs4_client *cl;
> +	struct list_head *pos, *next, reaplist;
> +	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +
> +	INIT_LIST_HEAD(&reaplist);
> +	spin_lock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &nn->client_lru) {
> +		cl = list_entry(pos, struct nfs4_client, cl_lru);
> +		/*
> +		 * check all nfs4_ol_stateid of this client
> +		 * for conflicts with 'access'mode.
> +		 */
> +		if (nfs4_check_deny_bmap(cl, fp, stp, access, share_access)) {
> +			if (!test_bit(NFSD4_COURTESY_CLIENT, &cl->cl_flags)) {
> +				/* conflict with non-courtesy client */
> +				no_retry = true;
> +				cnt = 0;
> +				goto out;
> +			}
> +			/*
> +			 * if too many to resolve synchronously
> +			 * then do the rest in background
> +			 */
> +			if (cnt > 100) {
> +				set_bit(NFSD4_DESTROY_COURTESY_CLIENT, &cl->cl_flags);
> +				async_cnt++;
> +				continue;
> +			}
> +			if (mark_client_expired_locked(cl))
> +				continue;
> +			cnt++;
> +			list_add(&cl->cl_lru, &reaplist);
> +		}
> +	}

Bruce suggested simply returning NFS4ERR_DELAY for all cases.
That would simplify this quite a bit for what is a rare edge
case.


> +out:
> +	spin_unlock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &reaplist) {
> +		cl = list_entry(pos, struct nfs4_client, cl_lru);
> +		list_del_init(&cl->cl_lru);
> +		expire_client(cl);
> +	}

A slightly nicer construct here would be something like this:

	while ((pos = list_del_first(&reaplist)))
		expire_client(list_entry(pos, struct nfs4_client, cl_lru));


> +	if (async_cnt) {
> +		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
> +		if (!no_retry)
> +			cnt = -1;
> +	}
> +	return cnt;
> +}
> +
> static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
> 		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
> 		struct nfsd4_open *open)
> @@ -4921,6 +5090,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
> 	int oflag = nfs4_access_to_omode(open->op_share_access);
> 	int access = nfs4_access_to_access(open->op_share_access);
> 	unsigned char old_access_bmap, old_deny_bmap;
> +	int cnt = 0;
> 
> 	spin_lock(&fp->fi_lock);
> 
> @@ -4928,16 +5098,38 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
> 	 * Are we trying to set a deny mode that would conflict with
> 	 * current access?
> 	 */
> +chk_deny:
> 	status = nfs4_file_check_deny(fp, open->op_share_deny);
> 	if (status != nfs_ok) {
> 		spin_unlock(&fp->fi_lock);
> +		if (status != nfserr_share_denied)
> +			goto out;
> +		cnt = nfs4_destroy_clnts_with_sresv_conflict(rqstp, fp,
> +				stp, open->op_share_deny, false);
> +		if (cnt > 0) {
> +			spin_lock(&fp->fi_lock);
> +			goto chk_deny;

I'm pondering whether a distributed set of clients can
cause this loop to never terminate.


> +		}
> +		if (cnt == -1)
> +			status = nfserr_jukebox;
> 		goto out;
> 	}
> 
> 	/* set access to the file */
> +get_access:
> 	status = nfs4_file_get_access(fp, open->op_share_access);
> 	if (status != nfs_ok) {
> 		spin_unlock(&fp->fi_lock);
> +		if (status != nfserr_share_denied)
> +			goto out;
> +		cnt = nfs4_destroy_clnts_with_sresv_conflict(rqstp, fp,
> +				stp, open->op_share_access, true);
> +		if (cnt > 0) {
> +			spin_lock(&fp->fi_lock);
> +			goto get_access;

Ditto.


> +		}
> +		if (cnt == -1)
> +			status = nfserr_jukebox;
> 		goto out;
> 	}
> 
> @@ -5289,6 +5481,22 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
> 	 */
> }
> 
> +static bool
> +nfs4_destroy_courtesy_client(struct nfs4_client *clp)
> +{
> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> +	spin_lock(&nn->client_lock);
> +	if (!test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) ||
> +			mark_client_expired_locked(clp)) {
> +		spin_unlock(&nn->client_lock);
> +		return false;
> +	}
> +	spin_unlock(&nn->client_lock);
> +	expire_client(clp);
> +	return true;
> +}
> +

Perhaps nfs4_destroy_courtesy_client() could be merged into
nfsd4_fl_expire_lock(), it's only caller.


> __be32
> nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
> {
> @@ -5572,6 +5780,47 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
> }
> #endif
> 
> +static
> +bool nfs4_anylock_conflict(struct nfs4_client *clp)
> +{
> +	int i;
> +	struct nfs4_stateowner *so, *tmp;
> +	struct nfs4_lockowner *lo;
> +	struct nfs4_ol_stateid *stp;
> +	struct nfs4_file *nf;
> +	struct inode *ino;
> +	struct file_lock_context *ctx;
> +	struct file_lock *fl;
> +
> +	for (i = 0; i < OWNER_HASH_SIZE; i++) {
> +		/* scan each lock owner */
> +		list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
> +				so_strhash) {
> +			if (so->so_is_open_owner)
> +				continue;

Isn't cl_lock needed to protect the cl_ownerstr_hashtbl lists?


> +
> +			/* scan lock states of this lock owner */
> +			lo = lockowner(so);
> +			list_for_each_entry(stp, &lo->lo_owner.so_stateids,
> +					st_perstateowner) {
> +				nf = stp->st_stid.sc_file;
> +				ino = nf->fi_inode;
> +				ctx = ino->i_flctx;
> +				if (!ctx)
> +					continue;
> +				/* check each lock belongs to this lock state */
> +				list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
> +					if (fl->fl_owner != lo)
> +						continue;
> +					if (!list_empty(&fl->fl_blocked_requests))
> +						return true;
> +				}
> +			}
> +		}
> +	}
> +	return false;
> +}
> +
> static time64_t
> nfs4_laundromat(struct nfsd_net *nn)
> {
> @@ -5587,7 +5836,9 @@ nfs4_laundromat(struct nfsd_net *nn)
> 	};
> 	struct nfs4_cpntf_state *cps;
> 	copy_stateid_t *cps_t;
> +	struct nfs4_stid *stid;
> 	int i;
> +	int id = 0;
> 
> 	if (clients_still_reclaiming(nn)) {
> 		lt.new_timeo = 0;
> @@ -5608,8 +5859,33 @@ nfs4_laundromat(struct nfsd_net *nn)
> 	spin_lock(&nn->client_lock);
> 	list_for_each_safe(pos, next, &nn->client_lru) {
> 		clp = list_entry(pos, struct nfs4_client, cl_lru);
> +		if (test_bit(NFSD4_DESTROY_COURTESY_CLIENT, &clp->cl_flags)) {
> +			clear_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);
> +			goto exp_client;
> +		}
> +		if (test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags)) {
> +			if (ktime_get_boottime_seconds() >= clp->courtesy_client_expiry)
> +				goto exp_client;
> +			/*
> +			 * after umount, v4.0 client is still
> +			 * around waiting to be expired
> +			 */
> +			if (clp->cl_minorversion)
> +				continue;
> +		}
> 		if (!state_expired(&lt, clp->cl_time))
> 			break;
> +		spin_lock(&clp->cl_lock);
> +		stid = idr_get_next(&clp->cl_stateids, &id);
> +		spin_unlock(&clp->cl_lock);
> +		if (stid && !nfs4_anylock_conflict(clp)) {
> +			/* client still has states */
> +			clp->courtesy_client_expiry =
> +				ktime_get_boottime_seconds() + courtesy_client_expiry;
> +			set_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);
> +			continue;
> +		}
> +exp_client:
> 		if (mark_client_expired_locked(clp))
> 			continue;
> 		list_add(&clp->cl_lru, &reaplist);
> @@ -5689,9 +5965,6 @@ nfs4_laundromat(struct nfsd_net *nn)
> 	return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
> }
> 
> -static struct workqueue_struct *laundry_wq;
> -static void laundromat_main(struct work_struct *);
> -
> static void
> laundromat_main(struct work_struct *laundry)
> {
> @@ -6496,6 +6769,19 @@ nfs4_transform_lock_offset(struct file_lock *lock)
> 		lock->fl_end = OFFSET_MAX;
> }
> 
> +/* return true if lock was expired else return false */
> +static bool
> +nfsd4_fl_expire_lock(struct file_lock *fl, bool testonly)
> +{
> +	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
> +	struct nfs4_client *clp = lo->lo_owner.so_client;
> +
> +	if (testonly)
> +		return test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) ?
> +			true : false;

Hm. I know test_bit() returns an integer rather than a boolean, but
the ternary here is a bit unwieldy. How about just:

		return !!test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);


> +	return nfs4_destroy_courtesy_client(clp);
> +}
> +
> static fl_owner_t
> nfsd4_fl_get_owner(fl_owner_t owner)
> {
> @@ -6543,6 +6829,7 @@ static const struct lock_manager_operations nfsd_posix_mng_ops  = {
> 	.lm_notify = nfsd4_lm_notify,
> 	.lm_get_owner = nfsd4_fl_get_owner,
> 	.lm_put_owner = nfsd4_fl_put_owner,
> +	.lm_expire_lock = nfsd4_fl_expire_lock,
> };
> 
> static inline void
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e73bdbb1634a..93e30b101578 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -345,6 +345,8 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
> 					 1 << NFSD4_CLIENT_CB_KILL)
> +#define NFSD4_COURTESY_CLIENT		(6)	/* be nice to expired client */
> +#define NFSD4_DESTROY_COURTESY_CLIENT	(7)
> 	unsigned long		cl_flags;
> 	const struct cred	*cl_cb_cred;
> 	struct rpc_clnt		*cl_cb_client;
> @@ -385,6 +387,7 @@ struct nfs4_client {
> 	struct list_head	async_copies;	/* list of async copies */
> 	spinlock_t		async_lock;	/* lock for async copies */
> 	atomic_t		cl_cb_inflight;	/* Outstanding callbacks */
> +	int			courtesy_client_expiry;
> };
> 
> /* struct nfs4_client_reset
> -- 
> 2.9.5
> 

--
Chuck Lever







[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux