Re: [PATCH Version 2 2/2] NFSv4.1 Use the MDS nfs_server authflavor for pNFS data server connections

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There's another case - what happens when a DS op uses an authflavor already in use by an nfs_server that is not the nfs_client::cl_rpcclient?

Do we want to make this cache general enough that it would share the rpc_clnt between the DS op and the nfs_server? This would also share the rpc_clnt between nfs_servers associated with the same nfs_client that have the same auth flavor.

Maybe this is overkill - allocating a new nfs_server is infrequent so incurring the cost of creating a new rpc_clnt isn't so bad, while getting the right rpc_clnt for DS communication is very frequent and we obviously don't want to allocate a new rpc_clnt each time.

Thoughts?

-dros

On Jul 24, 2013, at 1:53 PM, "Adamson, Andy" <William.Adamson@xxxxxxxxxx> wrote:

> 
> On Jul 24, 2013, at 1:23 PM, "Adamson, Dros" <Weston.Adamson@xxxxxxxxxx>
> wrote:
> 
>> Is there a reason that this is ds specific?
> 
> Yes - the normal mount/sub-mount creates nfs_server structs with appropriate rpc_clnts and do not need this feature.
> 
>> Why can't we just do this for cl_rpcclient and get rid of cl_ds_rpcclient?
>> 
>> That way, if there is already an rpcclient with that authflavor associated with an nfs_client we'd reuse it.
> 
> This patch can indeed have us end up with 2 rpc_clnts to the same server with the same auth. E.g. krb5i used for clientid management so the cl_rpcclient uses krb5i,  and a DS that exports with krb5i which would be cl_ds_clnt[2].  Good suggestion - I can fix that.
> 
>> 
>> -dros
>> 
>> On Jul 24, 2013, at 11:59 AM, andros@xxxxxxxxxx wrote:
>> 
>>> From: Andy Adamson <andros@xxxxxxxxxx>
>>> 
>>> pNFS data servers are not mounted in the normal sense as there is no associated
>>> nfs_server structure.
>>> 
>>> Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible"
>>> uses the nfs_client cl_rpcclient for all state management operations, and
>>> will use krb5i or auth_sys with no regard to the mount command authflavor
>>> choice.  For normal mounted servers, the nfs_server client authflavor is used
>>> for all non-state management operations
>>> 
>>> Data servers use the same authflavor as the MDS mount for non-state management
>>> operations. Note that the MDS has performed any sub-mounts and created an
>>> nfs_server rpc client. Add an array of struct rpc_clnt to struct
>>> nfs_client, one for each possible auth flavor for data server RPC connections.
>>> 
>>> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
>>> ---
>>> fs/nfs/client.c            | 14 +++++++++-
>>> fs/nfs/nfs4filelayout.c    | 65 ++++++++++++++++++++++++++++++++++++++--------
>>> fs/nfs/nfs4filelayout.h    | 17 ++++++++++++
>>> fs/nfs/nfs4filelayoutdev.c |  3 ++-
>>> include/linux/nfs_fs_sb.h  |  5 ++++
>>> 5 files changed, 91 insertions(+), 13 deletions(-)
>>> 
>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>>> index 2dceee4..fc63967 100644
>>> --- a/fs/nfs/client.c
>>> +++ b/fs/nfs/client.c
>>> @@ -152,7 +152,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
>>> {
>>> 	struct nfs_client *clp;
>>> 	struct rpc_cred *cred;
>>> -	int err = -ENOMEM;
>>> +	int err = -ENOMEM, i;
>>> 
>>> 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
>>> 		goto error_0;
>>> @@ -177,6 +177,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
>>> 
>>> 	INIT_LIST_HEAD(&clp->cl_superblocks);
>>> 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
>>> +	for (i = 0; i < NFS_NUM_DS_RPC_CLNT; i++)
>>> +		clp->cl_ds_clnt[i] = ERR_PTR(-EINVAL);
>>> 
>>> 	clp->cl_proto = cl_init->proto;
>>> 	clp->cl_net = get_net(cl_init->net);
>>> @@ -238,6 +240,8 @@ static void pnfs_init_server(struct nfs_server *server)
>>> */
>>> void nfs_free_client(struct nfs_client *clp)
>>> {
>>> +	int i;
>>> +
>>> 	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
>>> 
>>> 	nfs_fscache_release_client_cookie(clp);
>>> @@ -246,6 +250,14 @@ void nfs_free_client(struct nfs_client *clp)
>>> 	if (!IS_ERR(clp->cl_rpcclient))
>>> 		rpc_shutdown_client(clp->cl_rpcclient);
>>> 
>>> +	for (i = 0; i < NFS_NUM_DS_RPC_CLNT; i++) {
>>> +		if (!IS_ERR(clp->cl_ds_clnt[i])) {
>>> +			dprintk("%s shutdown data server client %p index %d\n",
>>> +				__func__, clp->cl_ds_clnt[i], i);
>>> +			rpc_shutdown_client(clp->cl_ds_clnt[i]);
>>> +		}
>>> +	}
>>> +
>>> 	if (clp->cl_machine_cred != NULL)
>>> 		put_rpccred(clp->cl_machine_cred);
>>> 
>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>> index 17ed87e..eb33592 100644
>>> --- a/fs/nfs/nfs4filelayout.c
>>> +++ b/fs/nfs/nfs4filelayout.c
>>> @@ -83,6 +83,31 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
>>> 	BUG();
>>> }
>>> 
>>> +/* Use the au_flavor of the MDS nfs_server RPC client to find or clone the
>>> + * correct data server RPC client.
>>> + *
>>> + * Note that the MDS has already performed any sub-mounts and negotiated
>>> + * a security flavor.
>>> + */
>>> +static struct rpc_clnt *
>>> +filelayout_rpc_clnt(struct inode *inode, struct nfs_client *clp)
>>> +{
>>> +	rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor;
>>> +	int index = filelayout_rpc_clnt_index(flavor);
>>> +
>>> +	if (index < 0)
>>> +		return ERR_PTR(index);
>>> +
>>> +	if (IS_ERR(clp->cl_ds_clnt[index])) {
>>> +		clp->cl_ds_clnt[index] =
>>> +			rpc_clone_client_set_auth(clp->cl_rpcclient, flavor);
>>> +
>>> +		dprintk("%s clone data server client %p flavor %d index %d \n",
>>> +			__func__, clp->cl_ds_clnt[index], flavor, index);
>>> +	}
>>> +	return clp->cl_ds_clnt[index];
>>> +}
>>> +
>>> static void filelayout_reset_write(struct nfs_write_data *data)
>>> {
>>> 	struct nfs_pgio_header *hdr = data->header;
>>> @@ -524,6 +549,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>>> 	struct nfs_pgio_header *hdr = data->header;
>>> 	struct pnfs_layout_segment *lseg = hdr->lseg;
>>> 	struct nfs4_pnfs_ds *ds;
>>> +	struct rpc_clnt *ds_clnt;
>>> 	loff_t offset = data->args.offset;
>>> 	u32 j, idx;
>>> 	struct nfs_fh *fh;
>>> @@ -538,6 +564,11 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>>> 	if (!ds)
>>> 		return PNFS_NOT_ATTEMPTED;
>>> +
>>> +	ds_clnt = filelayout_rpc_clnt(hdr->inode, ds->ds_clp);
>>> +	if (IS_ERR(ds_clnt))
>>> +		return PNFS_NOT_ATTEMPTED;
>>> +
>>> 	dprintk("%s USE DS: %s cl_count %d\n", __func__,
>>> 		ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
>>> 
>>> @@ -552,8 +583,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>>> 	data->mds_offset = offset;
>>> 
>>> 	/* Perform an asynchronous read to ds */
>>> -	nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
>>> -				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
>>> +	nfs_initiate_read(ds_clnt, data, &filelayout_read_call_ops,
>>> +			RPC_TASK_SOFTCONN);
>>> 	return PNFS_ATTEMPTED;
>>> }
>>> 
>>> @@ -564,6 +595,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>>> 	struct nfs_pgio_header *hdr = data->header;
>>> 	struct pnfs_layout_segment *lseg = hdr->lseg;
>>> 	struct nfs4_pnfs_ds *ds;
>>> +	struct rpc_clnt *ds_clnt;
>>> 	loff_t offset = data->args.offset;
>>> 	u32 j, idx;
>>> 	struct nfs_fh *fh;
>>> @@ -574,6 +606,11 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>>> 	if (!ds)
>>> 		return PNFS_NOT_ATTEMPTED;
>>> +
>>> +	ds_clnt = filelayout_rpc_clnt(hdr->inode, ds->ds_clp);
>>> +	if (IS_ERR(ds_clnt))
>>> +		return PNFS_NOT_ATTEMPTED;
>>> +
>>> 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
>>> 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
>>> 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
>>> @@ -591,9 +628,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>>> 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
>>> 
>>> 	/* Perform an asynchronous write */
>>> -	nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
>>> -				    &filelayout_write_call_ops, sync,
>>> -				    RPC_TASK_SOFTCONN);
>>> +	nfs_initiate_write(ds_clnt, data, &filelayout_write_call_ops, sync,
>>> +				RPC_TASK_SOFTCONN);
>>> 	return PNFS_ATTEMPTED;
>>> }
>>> 
>>> @@ -1101,16 +1137,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
>>> {
>>> 	struct pnfs_layout_segment *lseg = data->lseg;
>>> 	struct nfs4_pnfs_ds *ds;
>>> +	struct rpc_clnt *ds_clnt;
>>> 	u32 idx;
>>> 	struct nfs_fh *fh;
>>> 
>>> 	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
>>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>>> -	if (!ds) {
>>> -		prepare_to_resend_writes(data);
>>> -		filelayout_commit_release(data);
>>> -		return -EAGAIN;
>>> -	}
>>> +	if (!ds) 
>>> +		goto out_err;
>>> +
>>> +	ds_clnt = filelayout_rpc_clnt(data->inode, ds->ds_clp);
>>> +	if (IS_ERR(ds_clnt))
>>> +		goto out_err;
>>> +
>>> 	dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
>>> 		data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
>>> 	data->commit_done_cb = filelayout_commit_done_cb;
>>> @@ -1119,9 +1158,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
>>> 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
>>> 	if (fh)
>>> 		data->args.fh = fh;
>>> -	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
>>> +	return nfs_initiate_commit(ds_clnt, data,
>>> 				   &filelayout_commit_call_ops, how,
>>> 				   RPC_TASK_SOFTCONN);
>>> +out_err:
>>> +	prepare_to_resend_writes(data);
>>> +	filelayout_commit_release(data);
>>> +	return -EAGAIN;
>>> }
>>> 
>>> static int
>>> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
>>> index cebd20e..9bb39ec 100644
>>> --- a/fs/nfs/nfs4filelayout.h
>>> +++ b/fs/nfs/nfs4filelayout.h
>>> @@ -136,6 +136,23 @@ filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
>>> 	return test_bit(NFS_DEVICEID_INVALID, &node->flags);
>>> }
>>> 
>>> +static inline int
>>> +filelayout_rpc_clnt_index(rpc_authflavor_t flavor)
>>> +{
>>> +	switch (flavor) {
>>> +		case RPC_AUTH_UNIX:
>>> +			return 0;
>>> +		case RPC_AUTH_GSS_KRB5:
>>> +			return 1;
>>> +		case RPC_AUTH_GSS_KRB5I:
>>> +			return 2;
>>> +		case RPC_AUTH_GSS_KRB5P:
>>> +			return 3;
>>> +		default:
>>> +			return -EINVAL;
>>> +	}
>>> +}
>>> +
>>> extern bool
>>> filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
>>> 
>>> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
>>> index 95604f6..fea056d 100644
>>> --- a/fs/nfs/nfs4filelayoutdev.c
>>> +++ b/fs/nfs/nfs4filelayoutdev.c
>>> @@ -162,7 +162,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
>>> 	int status = 0;
>>> 
>>> 	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
>>> -		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
>>> +		mds_srv->client->cl_auth->au_flavor);
>>> 
>>> 	list_for_each_entry(da, &ds->ds_addrs, da_node) {
>>> 		dprintk("%s: DS %s: trying address %s\n",
>>> @@ -186,6 +186,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
>>> 		goto out_put;
>>> 
>>> 	ds->ds_clp = clp;
>>> +
>>> 	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
>>> out:
>>> 	return status;
>>> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
>>> index d221243..bd86a1b 100644
>>> --- a/include/linux/nfs_fs_sb.h
>>> +++ b/include/linux/nfs_fs_sb.h
>>> @@ -20,6 +20,10 @@ struct nfs4_minor_version_ops;
>>> struct nfs41_server_scope;
>>> struct nfs41_impl_id;
>>> 
>>> +
>>> +/* One rpc clnt for each authentiction flavor */
>>> +#define NFS_NUM_DS_RPC_CLNT 4
>>> +
>>> /*
>>> * The nfs_client identifies our client state to the server.
>>> */
>>> @@ -56,6 +60,7 @@ struct nfs_client {
>>> 	struct rpc_cred		*cl_machine_cred;
>>> 
>>> #if IS_ENABLED(CONFIG_NFS_V4)
>>> +	struct rpc_clnt *	cl_ds_clnt[NFS_NUM_DS_RPC_CLNT];
>>> 	u64			cl_clientid;	/* constant */
>>> 	nfs4_verifier		cl_confirm;	/* Clientid verifier */
>>> 	unsigned long		cl_state;
>>> -- 
>>> 1.8.3.1
>>> 
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux