Re: [RFC 4/5] NFSD: Defer copying

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/22/2013 02:50 PM, J. Bruce Fields wrote:
> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@xxxxxxxxxx wrote:
>> From: Bryan Schumaker <bjschuma@xxxxxxxxxx>
>>
>> Rather than performing the copy right away, schedule it to run later and
>> reply to the client.  Later, send a callback to notify the client that
>> the copy has finished.
> 
> I believe you need to implement the referring triple support described
> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race
> described in
> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3
> .

I'll re-read and re-write.

> 
> I see cb_delay initialized below, but not otherwise used.  Am I missing
> anything?

Whoops!  I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously.  I must have forgotten to take it out :(

> 
> What about OFFLOAD_STATUS and OFFLOAD_ABORT?

I haven't thought out those too much... I haven't thought about a use for them on the client yet.

> 
> In some common cases the reply will be very quick, and we might be
> better off handling it synchronously.  Could we implement a heuristic
> like "copy synchronously if the filesystem has special support or the
> range is less than the maximum iosize, otherwise copy asynchronously"?

I'm sure that can be done, I'm just not sure how to do it yet...

> 
> --b.
> 
> 
>> Signed-off-by: Bryan Schumaker <bjschuma@xxxxxxxxxx>
>> ---
>>  fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfsd/nfs4proc.c     |  59 +++++++++++++++------
>>  fs/nfsd/nfs4state.c    |  11 ++++
>>  fs/nfsd/state.h        |  21 ++++++++
>>  fs/nfsd/xdr4cb.h       |   9 ++++
>>  5 files changed, 221 insertions(+), 15 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index 7f05cd1..8f797e1 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -52,6 +52,9 @@ enum {
>>  	NFSPROC4_CLNT_CB_NULL = 0,
>>  	NFSPROC4_CLNT_CB_RECALL,
>>  	NFSPROC4_CLNT_CB_SEQUENCE,
>> +
>> +	/* NFS v4.2 callback */
>> +	NFSPROC4_CLNT_CB_OFFLOAD,
>>  };
>>  
>>  struct nfs4_cb_compound_hdr {
>> @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 {
>>  	OP_CB_WANTS_CANCELLED		= 12,
>>  	OP_CB_NOTIFY_LOCK		= 13,
>>  	OP_CB_NOTIFY_DEVICEID		= 14,
>> +	OP_CB_OFFLOAD			= 15,
>>  	OP_CB_ILLEGAL			= 10044
>>  };
>>  
>> @@ -469,6 +473,31 @@ out_default:
>>  	return nfs_cb_stat_to_errno(nfserr);
>>  }
>>  
>> +static void encode_cb_offload4args(struct xdr_stream *xdr,
>> +				   const struct nfs4_cb_offload *offload,
>> +				   struct nfs4_cb_compound_hdr *hdr)
>> +{
>> +	__be32 *p;
>> +
>> +	if (hdr->minorversion < 2)
>> +		return;
>> +
>> +	encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD);
>> +	encode_nfs_fh4(xdr, &offload->co_dst_fh);
>> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
>> +
>> +	p = xdr_reserve_space(xdr, 4);
>> +	*p = cpu_to_be32(1);
>> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
>> +
>> +	p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
>> +	p = xdr_encode_hyper(p, offload->co_count);
>> +	*p++ = cpu_to_be32(offload->co_stable_how);
>> +	xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE);
>> +
>> +	hdr->nops++;
>> +}
>> +
>>  /*
>>   * NFSv4.0 and NFSv4.1 XDR encode functions
>>   *
>> @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>  	encode_cb_nops(&hdr);
>>  }
>>  
>> +/*
>> + * CB_OFFLOAD
>> + */
>> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr,
>> +				    const struct nfsd4_callback *cb)
>> +{
>> +	const struct nfs4_cb_offload *args = cb->cb_op;
>> +	struct nfs4_cb_compound_hdr hdr = {
>> +		.ident = cb->cb_clp->cl_cb_ident,
>> +		.minorversion = cb->cb_minorversion,
>> +	};
>> +
>> +	encode_cb_compound4args(xdr, &hdr);
>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>> +	encode_cb_offload4args(xdr, args, &hdr);
>> +	encode_cb_nops(&hdr);
>> +}
>>  
>>  /*
>>   * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -552,6 +598,36 @@ out:
>>  }
>>  
>>  /*
>> + * CB_OFFLOAD
>> + */
>> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
>> +				   struct nfsd4_callback *cb)
>> +{
>> +	struct nfs4_cb_compound_hdr hdr;
>> +	enum nfsstat4 nfserr;
>> +	int status;
>> +
>> +	status = decode_cb_compound4res(xdr, &hdr);
>> +	if (unlikely(status))
>> +		goto out;
>> +
>> +	if (cb != NULL) {
>> +		status = decode_cb_sequence4res(xdr, cb);
>> +		if (unlikely(status))
>> +			goto out;
>> +	}
>> +
>> +	status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr);
>> +	if (unlikely(status))
>> +		goto out;
>> +	if (unlikely(nfserr != NFS4_OK))
>> +		status = nfs_cb_stat_to_errno(nfserr);
>> +
>> +out:
>> +	return status;
>> +}
>> +
>> +/*
>>   * RPC procedure tables
>>   */
>>  #define PROC(proc, call, argtype, restype)				\
>> @@ -568,6 +644,7 @@ out:
>>  static struct rpc_procinfo nfs4_cb_procedures[] = {
>>  	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
>>  	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
>> +	PROC(CB_OFFLOAD, COMPOUND,	cb_offload,	cb_offload),
>>  };
>>  
>>  static struct rpc_version nfs_cb_version4 = {
>> @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb)
>>  	INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
>>  }
>>  
>> +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb)
>> +{
>> +	INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc);
>> +}
>> +
>>  void nfsd4_cb_recall(struct nfs4_delegation *dp)
>>  {
>>  	struct nfsd4_callback *cb = &dp->dl_recall;
>> @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp)
>>  
>>  	run_nfsd4_cb(&dp->dl_recall);
>>  }
>> +
>> +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata)
>> +{
>> +	struct nfsd4_callback *cb = calldata;
>> +	struct nfs4_client *clp = cb->cb_clp;
>> +	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
>> +
>> +	nfsd4_cb_done(task, calldata);
>> +
>> +	if (current_rpc_client != task->tk_client)
>> +		return;
>> +
>> +	if (cb->cb_done)
>> +		return;
>> +
>> +	if (task->tk_status != 0)
>> +		nfsd4_mark_cb_down(clp, task->tk_status);
>> +	cb->cb_done = true;
>> +}
>> +
>> +static void nfsd4_cb_offload_release(void *calldata)
>> +{
>> +	struct nfsd4_callback *cb = calldata;
>> +	struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback);
>> +
>> +	if (cb->cb_done) {
>> +		nfs4_free_offload_stateid(offload->co_stid);
>> +		kfree(offload);
>> +	}
>> +}
>> +
>> +static const struct rpc_call_ops nfsd4_cb_offload_ops = {
>> +	.rpc_call_prepare = nfsd4_cb_prepare,
>> +	.rpc_call_done    = nfsd4_cb_offload_done,
>> +	.rpc_release      = nfsd4_cb_offload_release,
>> +};
>> +
>> +void nfsd4_cb_offload(struct nfs4_cb_offload *offload)
>> +{
>> +	struct nfsd4_callback *cb = &offload->co_callback;
>> +
>> +	cb->cb_op = offload;
>> +	cb->cb_clp = offload->co_stid->sc_client;
>> +	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD];
>> +	cb->cb_msg.rpc_argp = cb;
>> +	cb->cb_msg.rpc_resp = cb;
>> +
>> +	cb->cb_ops = &nfsd4_cb_offload_ops;
>> +
>> +	INIT_LIST_HEAD(&cb->cb_per_client);
>> +	cb->cb_done = true;
>> +
>> +	run_nfsd4_cb(cb);
>> +}
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index d4584ea..66a787f 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -35,6 +35,7 @@
>>  #include <linux/file.h>
>>  #include <linux/slab.h>
>>  
>> +#include "state.h"
>>  #include "idmap.h"
>>  #include "cache.h"
>>  #include "xdr4.h"
>> @@ -1062,29 +1063,57 @@ out:
>>  	return status;
>>  }
>>  
>> +static void
>> +nfsd4_copy_async(struct work_struct *w)
>> +{
>> +	__be32 status;
>> +	struct nfs4_cb_offload *offload;
>> +
>> +	offload = container_of(w, struct nfs4_cb_offload, co_work);
>> +	status  = nfsd_copy_range(offload->co_src_file, offload->co_src_pos,
>> +				  offload->co_dst_file, offload->co_dst_pos,
>> +				  offload->co_count);
>> +
>> +	if (status == nfs_ok) {
>> +		offload->co_stable_how = NFS_FILE_SYNC;
>> +		gen_boot_verifier(&offload->co_verifier, offload->co_net);
>> +		fput(offload->co_src_file);
>> +		fput(offload->co_dst_file);
>> +	}
>> +	nfsd4_cb_offload(offload);
>> +}
>> +
>>  static __be32
>>  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>  		struct nfsd4_copy *copy)
>>  {
>> -	__be32 status;
>>  	struct file *src = NULL, *dst = NULL;
>> +	struct nfs4_cb_offload *offload;
>>  
>> -	status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst);
>> -	if (status)
>> -		return status;
>> -
>> -	status = nfsd_copy_range(src, copy->cp_src_pos,
>> -				 dst, copy->cp_dst_pos,
>> -				 copy->cp_count);
>> +	if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst))
>> +		return nfserr_jukebox;
>>  
>> -	if (status == nfs_ok) {
>> -		copy->cp_res.wr_stateid = NULL;
>> -		copy->cp_res.wr_bytes_written = copy->cp_count;
>> -		copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
>> -		gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
>> -	}
>> +	offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL);
>> +	if (!offload)
>> +		return nfserr_jukebox;
>>  
>> -	return status;
>> +	offload->co_src_file = get_file(src);
>> +	offload->co_dst_file = get_file(dst);
>> +	offload->co_src_pos  = copy->cp_src_pos;
>> +	offload->co_dst_pos  = copy->cp_dst_pos;
>> +	offload->co_count    = copy->cp_count;
>> +	offload->co_stid     = nfs4_alloc_offload_stateid(cstate->session->se_client);
>> +	offload->co_net      = SVC_NET(rqstp);
>> +	INIT_WORK(&offload->co_work, nfsd4_copy_async);
>> +	nfsd4_init_callback(&offload->co_callback);
>> +	memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh));
>> +
>> +	copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid;
>> +	copy->cp_res.wr_bytes_written = 0;
>> +	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>> +
>> +	schedule_work(&offload->co_work);
>> +	return nfs_ok;
>>  }
>>  
>>  /* This routine never returns NFS_OK!  If there are no other errors, it
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c4e270e..582edb5 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
>>  	return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
>>  }
>>  
>> +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp)
>> +{
>> +	return nfs4_alloc_stid(clp, stateid_slab);
>> +}
>> +
>>  static struct nfs4_delegation *
>>  alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
>>  {
>> @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp)
>>  	kmem_cache_free(stateid_slab, stp);
>>  }
>>  
>> +void nfs4_free_offload_stateid(struct nfs4_stid *stid)
>> +{
>> +	remove_stid(stid);
>> +	kmem_cache_free(stateid_slab, stid);
>> +}
>> +
>>  static void release_lock_stateid(struct nfs4_ol_stateid *stp)
>>  {
>>  	struct file *file;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 2478805..56682fb 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -70,6 +70,7 @@ struct nfsd4_callback {
>>  	struct rpc_message cb_msg;
>>  	const struct rpc_call_ops *cb_ops;
>>  	struct work_struct cb_work;
>> +	struct delayed_work cb_delay;
>>  	bool cb_done;
>>  };
>>  
>> @@ -101,6 +102,22 @@ struct nfs4_delegation {
>>  	struct nfsd4_callback	dl_recall;
>>  };
>>  
>> +struct nfs4_cb_offload {
>> +	struct file	*co_src_file;
>> +	struct file	*co_dst_file;
>> +	u64		co_src_pos;
>> +	u64		co_dst_pos;
>> +	u64		co_count;
>> +	u32		co_stable_how;
>> +	struct knfsd_fh	co_dst_fh;
>> +	nfs4_verifier	co_verifier;
>> +	struct net	*co_net;
>> +
>> +	struct nfs4_stid		*co_stid;
>> +	struct work_struct		co_work;
>> +	struct nfsd4_callback		co_callback;
>> +};
>> +
>>  /* client delegation callback info */
>>  struct nfs4_cb_conn {
>>  	/* SETCLIENTID info */
>> @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *);
>>  extern void nfs4_free_lockowner(struct nfs4_lockowner *);
>>  extern int set_callback_cred(void);
>>  extern void nfsd4_init_callback(struct nfsd4_callback *);
>> +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *);
>>  extern void nfsd4_probe_callback(struct nfs4_client *clp);
>>  extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
>>  extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
>>  extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
>> +extern void nfsd4_cb_offload(struct nfs4_cb_offload *);
>>  extern int nfsd4_create_callback_queue(void);
>>  extern void nfsd4_destroy_callback_queue(void);
>>  extern void nfsd4_shutdown_callback(struct nfs4_client *);
>> @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
>>  							struct nfsd_net *nn);
>>  extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
>>  extern void put_client_renew(struct nfs4_client *clp);
>> +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *);
>> +extern void nfs4_free_offload_stateid(struct nfs4_stid *);
>>  
>>  /* nfs4recover operations */
>>  extern int nfsd4_client_tracking_init(struct net *net);
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index c5c55df..75b0ef7 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -21,3 +21,12 @@
>>  #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
>>  					cb_sequence_dec_sz +            \
>>  					op_dec_sz)
>> +
>> +#define NFS4_enc_cb_offload_sz		(cb_compound_enc_hdr_sz +      \
>> +					 cb_sequence_enc_sz +          \
>> +					 1 + enc_stateid_sz + 2 + 1 +  \
>> +					 XDR_QUADLEN(NFS4_VERIFIER_SIZE))
>> +
>> +#define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz +  \
>> +					 cb_sequence_dec_sz +      \
>> +					 op_dec_sz)
>> -- 
>> 1.8.3.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux