On Mon, 2013-07-08 at 09:58 +1000, NeilBrown wrote: > > This patch adds a "nosharetransport" option to allow two different > mounts from the same server to use different transports. > If the mounts use NFSv4, or are of the same filesystem, then > "nosharecache" must be used as well. Won't this interfere with the recently added NFSv4 trunking detection? Also, how will it work with NFSv4.1 sessions? The server will usually require a BIND_CONN_TO_SESSION when new TCP connections attempt to attach to an existing session. > There are at least two circumstances where it might be desirable > to use separate transports: > > 1/ If the NFS server can get into a state where it will ignore > requests for one filesystem while servicing request for another, > then using separate connections for the separate filesystems can > stop problems with one affecting access to the other. > > This is particularly relevant for NetApp filers where one filesystem > has been "suspended". Requests to that filesystem will be dropped > (rather than the more correct NFS3ERR_JUKEBOX). This currently > interferes with other filesystems. This is a known issue that really needs to be fixed on the server, not on the client. As far as I know, work is already underway to fix this. > 2/ If a very fast network is used with a many-processor client, a > single TCP connection can present a bottle neck which reduces total > throughput. Using multiple TCP connections (one per mount) removes > the bottleneck. > An alternate workaround is to configure multiple virtual IP > addresses on the server and mount each filesystem from a different > IP. This is effective (throughput goes up) but an unnecessary > administrative burden. As I understand it, using multiple simultaneous TCP connections between the same endpoints also adds a risk that the congestion windows will interfere. Do you have numbers to back up the claim of a performance improvement? The other issue I can think of is that for NFS versions < 4.1, this may cause the server to allocate more resources per client in the form of replay caches etc. > Signed-off-by: NeilBrown <neilb@xxxxxxx> > > --- > Is this a good idea? Bad idea? Have I missed something important? > > NeilBrown > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > index c513b0c..64e3f39 100644 > --- a/fs/nfs/client.c > +++ b/fs/nfs/client.c > @@ -403,8 +403,13 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat > const struct sockaddr *sap = data->addr; > struct nfs_net *nn = net_generic(data->net, nfs_net_id); > > + if (test_bit(NFS_CS_NO_SHARE, &data->init_flags)) > + return NULL; > + > list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { > const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; > + if (test_bit(NFS_CS_NO_SHARE,&clp->cl_flags)) > + continue; > /* Don't match clients that failed to initialise properly */ > if (clp->cl_cons_state < 0) > continue; > @@ -753,6 +758,8 @@ static int nfs_init_server(struct nfs_server *server, > data->timeo, data->retrans); > if (data->flags & NFS_MOUNT_NORESVPORT) > set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); > + if (data->flags & NFS_MOUNT_NOSHARE_XPRT) > + set_bit(NFS_CS_NO_SHARE, &cl_init.init_flags); > if (server->options & NFS_OPTION_MIGRATION) > set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); > > diff --git a/fs/nfs/super.c b/fs/nfs/super.c > index 2d7525f..d9141d8 100644 > --- a/fs/nfs/super.c > +++ b/fs/nfs/super.c > @@ -88,6 +88,7 @@ enum { > Opt_acl, Opt_noacl, > Opt_rdirplus, Opt_nordirplus, > Opt_sharecache, Opt_nosharecache, > + Opt_sharetransport, Opt_nosharetransport, > Opt_resvport, Opt_noresvport, > Opt_fscache, Opt_nofscache, > Opt_migration, Opt_nomigration, > @@ -146,6 +147,8 @@ static const match_table_t nfs_mount_option_tokens = { > { Opt_nordirplus, "nordirplus" }, > { Opt_sharecache, "sharecache" }, > { Opt_nosharecache, "nosharecache" }, > + { Opt_sharetransport, "sharetransport"}, > + { Opt_nosharetransport, "nosharetransport"}, > { Opt_resvport, "resvport" }, > { Opt_noresvport, "noresvport" }, > { Opt_fscache, "fsc" }, > @@ -634,6 +637,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, > { NFS_MOUNT_NOACL, ",noacl", "" }, > { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, > { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, > + { NFS_MOUNT_NOSHARE_XPRT, ",nosharetransport", ""}, > { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, > { 0, NULL, NULL } > }; > @@ -1239,6 +1243,12 @@ static int nfs_parse_mount_options(char *raw, > case Opt_nosharecache: > mnt->flags |= NFS_MOUNT_UNSHARED; > break; > + case Opt_sharetransport: > + mnt->flags &= ~NFS_MOUNT_NOSHARE_XPRT; > + break; > + case Opt_nosharetransport: > + mnt->flags |= NFS_MOUNT_NOSHARE_XPRT; > + break; > case Opt_resvport: > mnt->flags &= ~NFS_MOUNT_NORESVPORT; > break; > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h > index 3b7fa2a..9e9d7d3 100644 > --- a/include/linux/nfs_fs_sb.h > +++ b/include/linux/nfs_fs_sb.h > @@ -41,6 +41,7 @@ struct nfs_client { > #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ > #define NFS_CS_MIGRATION 2 /* - transparent state migr */ > #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ > +#define NFS_CS_NO_SHARE 4 /* - don't share across mounts */ > struct sockaddr_storage cl_addr; /* server identifier */ > size_t cl_addrlen; > char * cl_hostname; /* hostname of server */ > diff --git a/include/uapi/linux/nfs_mount.h b/include/uapi/linux/nfs_mount.h > index 576bddd..81c49ff 100644 > --- a/include/uapi/linux/nfs_mount.h > +++ b/include/uapi/linux/nfs_mount.h > @@ -73,5 +73,6 @@ struct nfs_mount_data { > > #define NFS_MOUNT_LOCAL_FLOCK 0x100000 > #define NFS_MOUNT_LOCAL_FCNTL 0x200000 > +#define NFS_MOUNT_NOSHARE_XPRT 0x400000 > > #endif -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@xxxxxxxxxx www.netapp.com ��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥