From: Nagendra S Tomar <natomar@xxxxxxxxxxxxx> Adds a new mount option ncpolicy=roundrobin|hash which allows user to select the nconnect policy for the given mount. Defaults to roundrobin. We store the user selected policy inside the rpc_clnt structure and pass it down to the RPC client where the transport selection can be accordingly done. Also adds a new function pointer p_fhhash to struct rpc_procinfo. This can be supplied to find the target file's hash for the given RPC which will then be used to affine RPCs for a file to one xprt. Signed-off-by: Nagendra S Tomar <natomar@xxxxxxxxxxxxx> --- fs/nfs/client.c | 3 +++ fs/nfs/fs_context.c | 26 ++++++++++++++++++++++++++ fs/nfs/internal.h | 2 ++ fs/nfs/nfs3client.c | 4 +++- fs/nfs/nfs4client.c | 14 +++++++++++--- fs/nfs/super.c | 7 ++++++- include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 15 +++++++++++++++ net/sunrpc/clnt.c | 34 ++++++++++++++++++++++++++++------ 9 files changed, 95 insertions(+), 11 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ff5c4d0d6d13..5c2809d8368a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; + clp->cl_ncpolicy = cl_init->ncpolicy; clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; @@ -506,6 +507,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .net = clp->cl_net, .protocol = clp->cl_proto, .nconnect = clp->cl_nconnect, + .ncpolicy = clp->cl_ncpolicy, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, .timeout = cl_init->timeparms, @@ -678,6 +680,7 @@ static int nfs_init_server(struct nfs_server *server, .timeparms = &timeparms, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, + .ncpolicy = ctx->nfs_server.ncpolicy, .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 971a9251c1d9..7bb8f1c8356f 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -60,6 +60,7 @@ enum nfs_param { Opt_mountvers, Opt_namelen, Opt_nconnect, + Opt_ncpolicy, Opt_port, Opt_posix, Opt_proto, @@ -127,6 +128,18 @@ static const struct constant_table nfs_param_enums_write[] = { {} }; +enum { + Opt_ncpolicy_roundrobin, + Opt_ncpolicy_hash, +}; + +static const struct constant_table nfs_param_enums_ncpolicy[] = { + { "hash", Opt_ncpolicy_hash }, + { "roundrobin", Opt_ncpolicy_roundrobin }, + { "rr", Opt_ncpolicy_roundrobin }, + {} +}; + static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_flag_no("ac", Opt_ac), fsparam_u32 ("acdirmax", Opt_acdirmax), @@ -158,6 +171,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("mountvers", Opt_mountvers), fsparam_u32 ("namlen", Opt_namelen), fsparam_u32 ("nconnect", Opt_nconnect), + fsparam_enum ("ncpolicy", Opt_ncpolicy, nfs_param_enums_ncpolicy), fsparam_string("nfsvers", Opt_vers), fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), @@ -749,6 +763,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; + case Opt_ncpolicy: + switch (result.uint_32) { + case Opt_ncpolicy_roundrobin: + ctx->nfs_server.ncpolicy = ncpolicy_roundrobin; + break; + case Opt_ncpolicy_hash: + ctx->nfs_server.ncpolicy = ncpolicy_hash; + break; + default: + goto out_invalid_value; + } + break; case Opt_lookupcache: switch (result.uint_32) { case Opt_lookupcache_all: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7b644d6c09e4..e6ca664d7e91 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -67,6 +67,7 @@ struct nfs_client_initdata { int proto; u32 minorversion; unsigned int nconnect; + enum ncpolicy ncpolicy; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; @@ -120,6 +121,7 @@ struct nfs_fs_context { int port; unsigned short protocol; unsigned short nconnect; + enum ncpolicy ncpolicy; unsigned short export_path_len; } nfs_server; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 5601e47360c2..f8a648f7492a 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -102,8 +102,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) { cl_init.nconnect = mds_clp->cl_nconnect; + cl_init.ncpolicy = mds_clp->cl_ncpolicy; + } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 889a9f4c0310..c967c214129a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *ip_addr, int proto, const struct rpc_timeout *timeparms, u32 minorversion, unsigned int nconnect, + enum ncpolicy ncpolicy, struct net *net) { struct nfs_client_initdata cl_init = { @@ -881,8 +882,10 @@ static int nfs4_set_client(struct nfs_server *server, if (minorversion == 0) __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); - if (proto == XPRT_TRANSPORT_TCP) + if (proto == XPRT_TRANSPORT_TCP) { cl_init.nconnect = nconnect; + cl_init.ncpolicy = ncpolicy; + } if (server->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) { cl_init.nconnect = mds_clp->cl_nconnect; + cl_init.ncpolicy = mds_clp->cl_ncpolicy; + } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) &timeparms, ctx->minorversion, ctx->nfs_server.nconnect, + ctx->nfs_server.ncpolicy, fc->net_ns); if (error < 0) return error; @@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, + parent_client->cl_ncpolicy, parent_client->cl_net); if (!error) goto init_server; @@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, + parent_client->cl_ncpolicy, parent_client->cl_net); if (error < 0) goto error; @@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, - clp->cl_nconnect, net); + clp->cl_nconnect, clp->cl_ncpolicy, net); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 94885c6f8f54..8719be70051b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -481,8 +481,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); rcu_read_unlock(); - if (clp->cl_nconnect > 0) + if (clp->cl_nconnect > 0) { seq_printf(m, ",nconnect=%u", clp->cl_nconnect); + if (clp->cl_ncpolicy == ncpolicy_roundrobin) + seq_puts(m, ",ncpolicy=roundrobin"); + else if (clp->cl_ncpolicy == ncpolicy_hash) + seq_puts(m, ",ncpolicy=hash"); + } if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6f76b32a0238..737f4d231e23 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -62,6 +62,7 @@ struct nfs_client { u32 cl_minorversion;/* NFSv4 minorversion */ unsigned int cl_nconnect; /* Number of connections */ + enum ncpolicy cl_ncpolicy; /* nconnect policy */ const char * cl_principal; /* used for machine cred */ #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 02e7a5863d28..aa1c1706f4d5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -28,6 +28,15 @@ #include <net/ipv6.h> #include <linux/sunrpc/xprtmultipath.h> +/* + * Policies for controlling distribution of RPC requests over multiple + * nconnect connections. + */ +enum ncpolicy { + ncpolicy_roundrobin, // Select roundrobin. + ncpolicy_hash, // Select based on target filehandle hash. +}; + struct rpc_inode; /* @@ -40,6 +49,7 @@ struct rpc_clnt { struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ + enum ncpolicy cl_ncpolicy; /* nconnect policy */ const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ @@ -101,6 +111,8 @@ struct rpc_version { unsigned int *counts; /* call counts */ }; +typedef u32 (*getfhhash_t)(const void *obj); + /* * Procedure information */ @@ -108,6 +120,7 @@ struct rpc_procinfo { u32 p_proc; /* RPC procedure number */ kxdreproc_t p_encode; /* XDR encode function */ kxdrdproc_t p_decode; /* XDR decode function */ + getfhhash_t p_fhhash; /* Returns target fh hash */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ unsigned int p_timer; /* Which RTT timer to use */ @@ -129,6 +142,7 @@ struct rpc_create_args { u32 version; rpc_authflavor_t authflavor; u32 nconnect; + enum ncpolicy ncpolicy; unsigned long flags; char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ @@ -247,4 +261,5 @@ static inline void rpc_task_close_connection(struct rpc_task *task) if (task->tk_xprt) xprt_force_disconnect(task->tk_xprt); } + #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1b2a02460601..ed470a75e91d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -410,6 +410,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, } rpc_clnt_set_transport(clnt, xprt, timeout); + clnt->cl_ncpolicy = args->ncpolicy; xprt_iter_init(&clnt->cl_xpi, xps); xprt_switch_put(xps); @@ -640,6 +641,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; new->cl_principal = clnt->cl_principal; + new->cl_ncpolicy = clnt->cl_ncpolicy; return new; out_err: @@ -1053,9 +1055,10 @@ rpc_task_get_first_xprt(struct rpc_clnt *clnt) } static struct rpc_xprt * -rpc_task_get_next_xprt(struct rpc_clnt *clnt) +rpc_task_get_next_xprt(struct rpc_clnt *clnt, u32 hash) { - return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi, 0)); + return rpc_task_get_xprt(clnt, + xprt_iter_get_next(&clnt->cl_xpi, hash)); } static @@ -1065,8 +1068,16 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) return; if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); - else - task->tk_xprt = rpc_task_get_next_xprt(clnt); + else { + u32 xprt_hint = 0; + + if (clnt->cl_ncpolicy == ncpolicy_hash && + task->tk_msg.rpc_proc->p_fhhash) { + xprt_hint = task->tk_msg.rpc_proc->p_fhhash( + task->tk_msg.rpc_argp); + } + task->tk_xprt = rpc_task_get_next_xprt(clnt, xprt_hint); + } } static @@ -1130,8 +1141,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) if (!RPC_IS_ASYNC(task)) task->tk_flags |= RPC_TASK_CRED_NOREF; - rpc_task_set_client(task, task_setup_data->rpc_client); rpc_task_set_rpc_message(task, task_setup_data->rpc_message); + rpc_task_set_client(task, task_setup_data->rpc_client); if (task->tk_action == NULL) rpc_call_start(task); @@ -1636,6 +1647,7 @@ call_start(struct rpc_task *task) /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) clnt->cl_program->version[clnt->cl_vers]->counts[idx]++; + clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); @@ -2888,7 +2900,17 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, connect_timeout, reconnect_timeout); - rpc_xprt_switch_set_roundrobin(xps); + switch (clnt->cl_ncpolicy) { + case ncpolicy_roundrobin: + default: + WARN_ON(clnt->cl_ncpolicy != ncpolicy_roundrobin); + rpc_xprt_switch_set_roundrobin(xps); + break; + case ncpolicy_hash: + rpc_xprt_switch_set_hash(xps); + break; + } + if (setup) { ret = setup(clnt, xps, xprt, data); if (ret != 0)