Re: [PATCH Version 2 1/1] NFSv4.1 Use MDS auth flavor for data server connection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sep 5, 2013, at 1:48 PM, "Myklebust, Trond" <Trond.Myklebust@xxxxxxxxxx>
 wrote:

> On Thu, 2013-09-05 at 13:30 -0400, andros@xxxxxxxxxx wrote:
>> From: Andy Adamson <andros@xxxxxxxxxx>
>> 
>> Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible"
>> uses the nfs_client cl_rpcclient for all state management operations, and
>> will use krb5i or auth_sys with no regard to the mount command authflavor
>> choice.
>> 
>> The MDS, as any NFSv4.1 mount point, uses the nfs_server rpc client for all
>> non-state management operations with a different nfs_server for each fsid
>> encountered traversing the mount point, each with a potentially different
>> auth flavor.
>> 
>> pNFS data servers are not mounted in the normal sense as there is no associated
>> nfs_server structure. Data servers can also export multiple fsids, each with
>> a potentially different auth flavor.
>> 
>> Data servers need to use the same authflavor as the MDS server rpc client for
>> non-state management operations. Populate a list of rpc clients with the MDS
>> server rpc client auth flavor for the DS to use.
>> 
>> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
>> ---
>> fs/nfs/internal.h         |   2 +
>> fs/nfs/nfs4client.c       | 110 ++++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfs/nfs4filelayout.c   |  35 +++++++++++----
>> include/linux/nfs_fs_sb.h |   1 +
>> 4 files changed, 140 insertions(+), 8 deletions(-)
> 
> That looks a lot better. See comments inline below.
> 
>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
>> index 2415198..23ec6e8 100644
>> --- a/fs/nfs/internal.h
>> +++ b/fs/nfs/internal.h
>> @@ -186,6 +186,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
>> 					     int ds_addrlen, int ds_proto,
>> 					     unsigned int ds_timeo,
>> 					     unsigned int ds_retrans);
>> +extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
>> +						struct inode *);
>> #ifdef CONFIG_PROC_FS
>> extern int __init nfs_fs_proc_init(void);
>> extern void nfs_fs_proc_exit(void);
>> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
>> index 767a5e3..a8dd396 100644
>> --- a/fs/nfs/nfs4client.c
>> +++ b/fs/nfs/nfs4client.c
>> @@ -49,10 +49,118 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
>> 	}
>> 
>> }
>> +
>> +/**
>> + * Per auth flavor data server rpc clients
>> + */
>> +struct nfs4_ds_server {
>> +	struct list_head	list;	/* ds_clp->cl_ds_clients */
>> +	struct rpc_clnt		*rpc_clnt;
>> +};
>> +
>> +static struct nfs4_ds_server *
>> +nfs4_find_or_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor,
>> +			   struct nfs4_ds_server *new)
>> +{
>> +	struct nfs4_ds_server *dss, *tmp;
>> +
>> +	spin_lock(&ds_clp->cl_lock);
>> +	list_for_each_entry_safe(dss, tmp, &ds_clp->cl_ds_clients, list) {
> 
> Nit: why do we need list_for_each_entry_safe() here? You're not removing
> anything from that list.
> 
>> +		if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
>> +			continue;
>> +		goto out;
>> +	}
>> +	if (new)
>> +		list_add(&new->list, &ds_clp->cl_ds_clients);
>> +	dss = new;
>> +out:
>> +	spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */
>> +	return dss;
>> +}
>> +
>> +static struct nfs4_ds_server *
>> +nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
>> +{
>> +	struct nfs4_ds_server *dss;
>> +
>> +	dss = kmalloc(sizeof(*dss), GFP_NOFS);
>> +	if (dss == NULL)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor);
>> +	if (IS_ERR(dss->rpc_clnt)) {
>> +		int err = PTR_ERR(dss->rpc_clnt);
>> +		kfree (dss);
>> +		return ERR_PTR(err);
>> +	}
>> +	INIT_LIST_HEAD(&dss->list);
>> +
>> +	return dss;
>> +}
>> +
>> +static void
>> +nfs4_free_ds_server(struct nfs4_ds_server *dss)
>> +{
>> +	rpc_release_client(dss->rpc_clnt);
>> +	kfree(dss);
>> +}
>> +
>> +/**
>> + * Find or create a DS rpc client with th MDS server rpc client auth flavor
>> + * in the nfs_client cl_ds_clients list.
>> + */
>> +struct rpc_clnt *
>> +nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
>> +{
>> +	struct nfs4_ds_server *dss, *new;
>> +	rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor;
>> +
>> +	dss = nfs4_find_or_add_ds_client(ds_clp, flavor, NULL);
> 
> Does it make sense to perhaps use RCU in this case (but not the one
> below) in order to avoid the contention on clp->cl_lock?

OK - since this is in the I/O path, I can do that.
> 
>> +	if (dss != NULL)
>> +		goto out;
>> +	new = nfs4_alloc_ds_server(ds_clp, flavor);
>> +	if (IS_ERR(new))
>> +		return (struct rpc_clnt *)new;
> 
> Nit: you should use ERR_CAST() rather than an explicit cast above
> 
>> +	dss = nfs4_find_or_add_ds_client(ds_clp, flavor, new);
>> +	if (dss != new)
>> +		nfs4_free_ds_server(new);
>> +out:
>> +	return dss->rpc_clnt;
>> +}
>> +EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client);
>> +
>> +static void
>> +nfs4_shutdown_ds_clients(struct nfs_client *clp)
>> +{
>> +	struct nfs4_ds_server *dss;
>> +	LIST_HEAD(shutdown_list);
>> +
>> +	spin_lock(&clp->cl_lock);
>> +	while (!list_empty(&clp->cl_ds_clients)) {
>> +		dss = list_entry(clp->cl_ds_clients.next,
>> +				  struct nfs4_ds_server, list);
>> +		list_move(&dss->list, &shutdown_list);
> 
> Is this step necessary? We know that nobody other than us is referencing
> the nfs_client at this point.

Because of the kfree under the spin lock - but perhaps that is no longer an issue?

-->Andy
> 
>> +	}
>> +	spin_unlock(&clp->cl_lock);
>> +
>> +	while (!list_empty(&shutdown_list)) {
>> +		dss = list_entry(shutdown_list.next,
>> +				  struct nfs4_ds_server, list);
>> +		list_del(&dss->list);
>> +		rpc_shutdown_client(dss->rpc_clnt);
>> +		kfree (dss);
>> +	}
>> +}
>> +
>> #else /* CONFIG_NFS_V4_1 */
>> static void nfs4_shutdown_session(struct nfs_client *clp)
>> {
>> }
>> +
>> +static void
>> +nfs4_shutdown_ds_clients(struct nfs_client *clp)
>> +{
>> +}
>> #endif /* CONFIG_NFS_V4_1 */
>> 
>> struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
>> @@ -73,6 +181,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
>> 
>> 	spin_lock_init(&clp->cl_lock);
>> 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
>> +	INIT_LIST_HEAD(&clp->cl_ds_clients);
>> 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
>> 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
>> 	clp->cl_minorversion = cl_init->minorversion;
>> @@ -97,6 +206,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
>> {
>> 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
>> 		nfs4_kill_renewd(clp);
>> +	nfs4_shutdown_ds_clients(clp);
>> 	nfs4_shutdown_session(clp);
>> 	nfs4_destroy_callback(clp);
>> 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index a70cb3a..b86464b 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -528,6 +528,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>> 	struct nfs_pgio_header *hdr = data->header;
>> 	struct pnfs_layout_segment *lseg = hdr->lseg;
>> 	struct nfs4_pnfs_ds *ds;
>> +	struct rpc_clnt *ds_clnt;
>> 	loff_t offset = data->args.offset;
>> 	u32 j, idx;
>> 	struct nfs_fh *fh;
>> @@ -542,6 +543,11 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>> 	if (!ds)
>> 		return PNFS_NOT_ATTEMPTED;
>> +
>> +	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
>> +	if (IS_ERR(ds_clnt))
>> +		return PNFS_NOT_ATTEMPTED;
>> +
>> 	dprintk("%s USE DS: %s cl_count %d\n", __func__,
>> 		ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
>> 
>> @@ -556,7 +562,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
>> 	data->mds_offset = offset;
>> 
>> 	/* Perform an asynchronous read to ds */
>> -	nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
>> +	nfs_initiate_read(ds_clnt, data,
>> 				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
>> 	return PNFS_ATTEMPTED;
>> }
>> @@ -568,6 +574,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>> 	struct nfs_pgio_header *hdr = data->header;
>> 	struct pnfs_layout_segment *lseg = hdr->lseg;
>> 	struct nfs4_pnfs_ds *ds;
>> +	struct rpc_clnt *ds_clnt;
>> 	loff_t offset = data->args.offset;
>> 	u32 j, idx;
>> 	struct nfs_fh *fh;
>> @@ -578,6 +585,11 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>> 	if (!ds)
>> 		return PNFS_NOT_ATTEMPTED;
>> +
>> +	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
>> +	if (IS_ERR(ds_clnt))
>> +		return PNFS_NOT_ATTEMPTED;
>> +
>> 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
>> 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
>> 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
>> @@ -595,7 +607,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
>> 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
>> 
>> 	/* Perform an asynchronous write */
>> -	nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
>> +	nfs_initiate_write(ds_clnt, data,
>> 				    &filelayout_write_call_ops, sync,
>> 				    RPC_TASK_SOFTCONN);
>> 	return PNFS_ATTEMPTED;
>> @@ -1105,16 +1117,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
>> {
>> 	struct pnfs_layout_segment *lseg = data->lseg;
>> 	struct nfs4_pnfs_ds *ds;
>> +	struct rpc_clnt *ds_clnt;
>> 	u32 idx;
>> 	struct nfs_fh *fh;
>> 
>> 	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
>> 	ds = nfs4_fl_prepare_ds(lseg, idx);
>> -	if (!ds) {
>> -		prepare_to_resend_writes(data);
>> -		filelayout_commit_release(data);
>> -		return -EAGAIN;
>> -	}
>> +	if (!ds)
>> +		goto out_err;
>> +
>> +	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode);
>> +	if (IS_ERR(ds_clnt))
>> +		goto out_err;
>> +
>> 	dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
>> 		data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
>> 	data->commit_done_cb = filelayout_commit_done_cb;
>> @@ -1123,9 +1138,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
>> 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
>> 	if (fh)
>> 		data->args.fh = fh;
>> -	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
>> +	return nfs_initiate_commit(ds_clnt, data,
>> 				   &filelayout_commit_call_ops, how,
>> 				   RPC_TASK_SOFTCONN);
>> +out_err:
>> +	prepare_to_resend_writes(data);
>> +	filelayout_commit_release(data);
>> +	return -EAGAIN;
>> }
>> 
>> static int
>> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
>> index d221243..4d476e7 100644
>> --- a/include/linux/nfs_fs_sb.h
>> +++ b/include/linux/nfs_fs_sb.h
>> @@ -56,6 +56,7 @@ struct nfs_client {
>> 	struct rpc_cred		*cl_machine_cred;
>> 
>> #if IS_ENABLED(CONFIG_NFS_V4)
>> +	struct list_head	cl_ds_clients; /* auth flavor data servers */
>> 	u64			cl_clientid;	/* constant */
>> 	nfs4_verifier		cl_confirm;	/* Clientid verifier */
>> 	unsigned long		cl_state;
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer
> 
> NetApp
> Trond.Myklebust@xxxxxxxxxx
> www.netapp.com

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux