On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote: > From: Olga Kornievskaia <kolga@xxxxxxxxxx> > > Using sysfs's xprt_state attribute, mark a particular transport > offline. > It will not be picked during the round-robin selection. It's not > allowed > to take the main (1st created transport associated with the > rpc_client) > offline. > > Signed-off-by: Olga Kornievskaia <kolga@xxxxxxxxxx> > --- > include/linux/sunrpc/xprt.h | 2 ++ > net/sunrpc/clnt.c | 1 + > net/sunrpc/sysfs.c | 42 +++++++++++++++++++++++++++++++++-- > -- > net/sunrpc/xprtmultipath.c | 3 ++- > 4 files changed, 43 insertions(+), 5 deletions(-) > > diff --git a/include/linux/sunrpc/xprt.h > b/include/linux/sunrpc/xprt.h > index 13a4eaf385cf..72a858f032c7 100644 > --- a/include/linux/sunrpc/xprt.h > +++ b/include/linux/sunrpc/xprt.h > @@ -293,6 +293,7 @@ struct rpc_xprt { > struct rcu_head rcu; > const struct xprt_class *xprt_class; > struct rpc_sysfs_xprt *xprt_sysfs; > + bool main; /* marked if it's the 1st > transport */ > }; > > #if defined(CONFIG_SUNRPC_BACKCHANNEL) > @@ -426,6 +427,7 @@ > void xprt_release_write(struct rpc_xprt *, > struct rpc_task *); > #define XPRT_BOUND (4) > #define XPRT_BINDING (5) > #define XPRT_CLOSING (6) > +#define XPRT_OFFLINE (7) > #define XPRT_CONGESTED (9) > #define XPRT_CWND_WAIT (10) > #define XPRT_WRITE_SPACE (11) > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c > index 9bf820bad84c..408618765aa5 100644 > --- a/net/sunrpc/clnt.c > +++ b/net/sunrpc/clnt.c > @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const > struct rpc_create_args *args, > } > > rpc_clnt_set_transport(clnt, xprt, timeout); > + xprt->main = true; > xprt_iter_init(&clnt->cl_xpi, xps); > xprt_switch_put(xps); > > diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c > index ec06c9257c07..02c918c5061b 100644 > --- a/net/sunrpc/sysfs.c > +++ b/net/sunrpc/sysfs.c > @@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > kobject *kobj, > struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); > ssize_t ret; > int locked, connected, connecting, close_wait, bound, > binding, > - closing, congested, cwnd_wait, write_space; > + closing, congested, cwnd_wait, write_space, offline; > > if (!xprt) > return 0; > @@ -136,8 +136,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > kobject *kobj, > congested = test_bit(XPRT_CONGESTED, &xprt->state); > cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state); > write_space = test_bit(XPRT_WRITE_SPACE, &xprt- > >state); > + offline = test_bit(XPRT_OFFLINE, &xprt->state); > > - ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s > %s\n", > + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s > %s %s\n", > locked ? "LOCKED" : "", > connected ? "CONNECTED" : "", > connecting ? "CONNECTING" : "", > @@ -147,7 +148,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > kobject *kobj, > closing ? "CLOSING" : "", > congested ? "CONGESTED" : "", > cwnd_wait ? "CWND_WAIT" : "", > - write_space ? "WRITE_SPACE" : ""); > + write_space ? "WRITE_SPACE" : "", > + offline ? "OFFLINE" : ""); > } > > xprt_put(xprt); > @@ -223,6 +225,38 @@ static ssize_t > rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, > goto out; > } > > +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, > + struct kobj_attribute > *attr, > + const char *buf, size_t > count) > +{ > + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); > + int offline = 0; > + > + if (!xprt) > + return 0; > + > + if (!strncmp(buf, "offline", 7)) > + offline = 1; > + else > + return -EINVAL; > + > + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, > TASK_KILLABLE)) { > + count = -EINTR; > + goto out_put; > + } > + if (offline) { > + if (xprt->main) > + count = -EINVAL; > + else > + set_bit(XPRT_OFFLINE, &xprt->state); > + } Is there any way to put the transport back online? What say the problem with the downed IP address gets fixed? > + > + xprt_release_write(xprt, NULL); > +out_put: > + xprt_put(xprt); > + return count; > +} > + > int rpc_sysfs_init(void) > { > rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, > kernel_kobj); > @@ -293,7 +327,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info > = __ATTR(xprt_info, > 0444, rpc_sysfs_xprt_info_show, NULL); > > static struct kobj_attribute rpc_sysfs_xprt_change_state = > __ATTR(xprt_state, > - 0644, rpc_sysfs_xprt_state_show, NULL); > + 0644, rpc_sysfs_xprt_state_show, > rpc_sysfs_xprt_state_change); > > static struct attribute *rpc_sysfs_xprt_attrs[] = { > &rpc_sysfs_xprt_dstaddr.attr, > diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c > index 07e76ae1028a..39551b794b80 100644 > --- a/net/sunrpc/xprtmultipath.c > +++ b/net/sunrpc/xprtmultipath.c > @@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct > rpc_xprt_iter *xpi) > static > bool xprt_is_active(const struct rpc_xprt *xprt) > { > - return kref_read(&xprt->kref) != 0; > + return (kref_read(&xprt->kref) != 0 && > + !test_bit(XPRT_OFFLINE, &xprt->state)); > } > > static -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@xxxxxxxxxxxxxxx