On Sep 5, 2013, at 1:48 PM, "Myklebust, Trond" <Trond.Myklebust@xxxxxxxxxx> wrote: > On Thu, 2013-09-05 at 13:30 -0400, andros@xxxxxxxxxx wrote: >> From: Andy Adamson <andros@xxxxxxxxxx> >> >> Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible" >> uses the nfs_client cl_rpcclient for all state management operations, and >> will use krb5i or auth_sys with no regard to the mount command authflavor >> choice. >> >> The MDS, as any NFSv4.1 mount point, uses the nfs_server rpc client for all >> non-state management operations with a different nfs_server for each fsid >> encountered traversing the mount point, each with a potentially different >> auth flavor. >> >> pNFS data servers are not mounted in the normal sense as there is no associated >> nfs_server structure. Data servers can also export multiple fsids, each with >> a potentially different auth flavor. >> >> Data servers need to use the same authflavor as the MDS server rpc client for >> non-state management operations. Populate a list of rpc clients with the MDS >> server rpc client auth flavor for the DS to use. >> >> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> >> --- >> fs/nfs/internal.h | 2 + >> fs/nfs/nfs4client.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfs/nfs4filelayout.c | 35 +++++++++++---- >> include/linux/nfs_fs_sb.h | 1 + >> 4 files changed, 140 insertions(+), 8 deletions(-) > > That looks a lot better. See comments inline below. > >> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h >> index 2415198..23ec6e8 100644 >> --- a/fs/nfs/internal.h >> +++ b/fs/nfs/internal.h >> @@ -186,6 +186,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, >> int ds_addrlen, int ds_proto, >> unsigned int ds_timeo, >> unsigned int ds_retrans); >> +extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, >> + struct inode *); >> #ifdef CONFIG_PROC_FS >> extern int __init nfs_fs_proc_init(void); >> extern void nfs_fs_proc_exit(void); >> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c >> index 767a5e3..a8dd396 100644 >> --- a/fs/nfs/nfs4client.c >> +++ b/fs/nfs/nfs4client.c >> @@ -49,10 +49,118 @@ static void nfs4_shutdown_session(struct nfs_client *clp) >> } >> >> } >> + >> +/** >> + * Per auth flavor data server rpc clients >> + */ >> +struct nfs4_ds_server { >> + struct list_head list; /* ds_clp->cl_ds_clients */ >> + struct rpc_clnt *rpc_clnt; >> +}; >> + >> +static struct nfs4_ds_server * >> +nfs4_find_or_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor, >> + struct nfs4_ds_server *new) >> +{ >> + struct nfs4_ds_server *dss, *tmp; >> + >> + spin_lock(&ds_clp->cl_lock); >> + list_for_each_entry_safe(dss, tmp, &ds_clp->cl_ds_clients, list) { > > Nit: why do we need list_for_each_entry_safe() here? You're not removing > anything from that list. > >> + if (dss->rpc_clnt->cl_auth->au_flavor != flavor) >> + continue; >> + goto out; >> + } >> + if (new) >> + list_add(&new->list, &ds_clp->cl_ds_clients); >> + dss = new; >> +out: >> + spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */ >> + return dss; >> +} >> + >> +static struct nfs4_ds_server * >> +nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor) >> +{ >> + struct nfs4_ds_server *dss; >> + >> + dss = kmalloc(sizeof(*dss), GFP_NOFS); >> + if (dss == NULL) >> + return ERR_PTR(-ENOMEM); >> + >> + dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor); >> + if (IS_ERR(dss->rpc_clnt)) { >> + int err = PTR_ERR(dss->rpc_clnt); >> + kfree (dss); >> + return ERR_PTR(err); >> + } >> + INIT_LIST_HEAD(&dss->list); >> + >> + return dss; >> +} >> + >> +static void >> +nfs4_free_ds_server(struct nfs4_ds_server *dss) >> +{ >> + rpc_release_client(dss->rpc_clnt); >> + kfree(dss); >> +} >> + >> +/** >> + * Find or create a DS rpc client with th MDS server rpc client auth flavor >> + * in the nfs_client cl_ds_clients list. >> + */ >> +struct rpc_clnt * >> +nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode) >> +{ >> + struct nfs4_ds_server *dss, *new; >> + rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor; >> + >> + dss = nfs4_find_or_add_ds_client(ds_clp, flavor, NULL); > > Does it make sense to perhaps use RCU in this case (but not the one > below) in order to avoid the contention on clp->cl_lock? OK - since this is in the I/O path, I can do that. > >> + if (dss != NULL) >> + goto out; >> + new = nfs4_alloc_ds_server(ds_clp, flavor); >> + if (IS_ERR(new)) >> + return (struct rpc_clnt *)new; > > Nit: you should use ERR_CAST() rather than an explicit cast above > >> + dss = nfs4_find_or_add_ds_client(ds_clp, flavor, new); >> + if (dss != new) >> + nfs4_free_ds_server(new); >> +out: >> + return dss->rpc_clnt; >> +} >> +EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client); >> + >> +static void >> +nfs4_shutdown_ds_clients(struct nfs_client *clp) >> +{ >> + struct nfs4_ds_server *dss; >> + LIST_HEAD(shutdown_list); >> + >> + spin_lock(&clp->cl_lock); >> + while (!list_empty(&clp->cl_ds_clients)) { >> + dss = list_entry(clp->cl_ds_clients.next, >> + struct nfs4_ds_server, list); >> + list_move(&dss->list, &shutdown_list); > > Is this step necessary? We know that nobody other than us is referencing > the nfs_client at this point. Because of the kfree under the spin lock - but perhaps that is no longer an issue? -->Andy > >> + } >> + spin_unlock(&clp->cl_lock); >> + >> + while (!list_empty(&shutdown_list)) { >> + dss = list_entry(shutdown_list.next, >> + struct nfs4_ds_server, list); >> + list_del(&dss->list); >> + rpc_shutdown_client(dss->rpc_clnt); >> + kfree (dss); >> + } >> +} >> + >> #else /* CONFIG_NFS_V4_1 */ >> static void nfs4_shutdown_session(struct nfs_client *clp) >> { >> } >> + >> +static void >> +nfs4_shutdown_ds_clients(struct nfs_client *clp) >> +{ >> +} >> #endif /* CONFIG_NFS_V4_1 */ >> >> struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) >> @@ -73,6 +181,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) >> >> spin_lock_init(&clp->cl_lock); >> INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); >> + INIT_LIST_HEAD(&clp->cl_ds_clients); >> rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); >> clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; >> clp->cl_minorversion = cl_init->minorversion; >> @@ -97,6 +206,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) >> { >> if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) >> nfs4_kill_renewd(clp); >> + nfs4_shutdown_ds_clients(clp); >> nfs4_shutdown_session(clp); >> nfs4_destroy_callback(clp); >> if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) >> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c >> index a70cb3a..b86464b 100644 >> --- a/fs/nfs/nfs4filelayout.c >> +++ b/fs/nfs/nfs4filelayout.c >> @@ -528,6 +528,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) >> struct nfs_pgio_header *hdr = data->header; >> struct pnfs_layout_segment *lseg = hdr->lseg; >> struct nfs4_pnfs_ds *ds; >> + struct rpc_clnt *ds_clnt; >> loff_t offset = data->args.offset; >> u32 j, idx; >> struct nfs_fh *fh; >> @@ -542,6 +543,11 @@ filelayout_read_pagelist(struct nfs_read_data *data) >> ds = nfs4_fl_prepare_ds(lseg, idx); >> if (!ds) >> return PNFS_NOT_ATTEMPTED; >> + >> + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); >> + if (IS_ERR(ds_clnt)) >> + return PNFS_NOT_ATTEMPTED; >> + >> dprintk("%s USE DS: %s cl_count %d\n", __func__, >> ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); >> >> @@ -556,7 +562,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) >> data->mds_offset = offset; >> >> /* Perform an asynchronous read to ds */ >> - nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, >> + nfs_initiate_read(ds_clnt, data, >> &filelayout_read_call_ops, RPC_TASK_SOFTCONN); >> return PNFS_ATTEMPTED; >> } >> @@ -568,6 +574,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) >> struct nfs_pgio_header *hdr = data->header; >> struct pnfs_layout_segment *lseg = hdr->lseg; >> struct nfs4_pnfs_ds *ds; >> + struct rpc_clnt *ds_clnt; >> loff_t offset = data->args.offset; >> u32 j, idx; >> struct nfs_fh *fh; >> @@ -578,6 +585,11 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) >> ds = nfs4_fl_prepare_ds(lseg, idx); >> if (!ds) >> return PNFS_NOT_ATTEMPTED; >> + >> + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); >> + if (IS_ERR(ds_clnt)) >> + return PNFS_NOT_ATTEMPTED; >> + >> dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", >> __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, >> offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); >> @@ -595,7 +607,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) >> data->args.offset = filelayout_get_dserver_offset(lseg, offset); >> >> /* Perform an asynchronous write */ >> - nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, >> + nfs_initiate_write(ds_clnt, data, >> &filelayout_write_call_ops, sync, >> RPC_TASK_SOFTCONN); >> return PNFS_ATTEMPTED; >> @@ -1105,16 +1117,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) >> { >> struct pnfs_layout_segment *lseg = data->lseg; >> struct nfs4_pnfs_ds *ds; >> + struct rpc_clnt *ds_clnt; >> u32 idx; >> struct nfs_fh *fh; >> >> idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); >> ds = nfs4_fl_prepare_ds(lseg, idx); >> - if (!ds) { >> - prepare_to_resend_writes(data); >> - filelayout_commit_release(data); >> - return -EAGAIN; >> - } >> + if (!ds) >> + goto out_err; >> + >> + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode); >> + if (IS_ERR(ds_clnt)) >> + goto out_err; >> + >> dprintk("%s ino %lu, how %d cl_count %d\n", __func__, >> data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); >> data->commit_done_cb = filelayout_commit_done_cb; >> @@ -1123,9 +1138,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) >> fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); >> if (fh) >> data->args.fh = fh; >> - return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, >> + return nfs_initiate_commit(ds_clnt, data, >> &filelayout_commit_call_ops, how, >> RPC_TASK_SOFTCONN); >> +out_err: >> + prepare_to_resend_writes(data); >> + filelayout_commit_release(data); >> + return -EAGAIN; >> } >> >> static int >> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h >> index d221243..4d476e7 100644 >> --- a/include/linux/nfs_fs_sb.h >> +++ b/include/linux/nfs_fs_sb.h >> @@ -56,6 +56,7 @@ struct nfs_client { >> struct rpc_cred *cl_machine_cred; >> >> #if IS_ENABLED(CONFIG_NFS_V4) >> + struct list_head cl_ds_clients; /* auth flavor data servers */ >> u64 cl_clientid; /* constant */ >> nfs4_verifier cl_confirm; /* Clientid verifier */ >> unsigned long cl_state; > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@xxxxxxxxxx > www.netapp.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html