On Sun, Jun 13, 2021 at 12:16 PM Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> wrote: > > On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote: > > From: Olga Kornievskaia <kolga@xxxxxxxxxx> > > > > Using sysfs's xprt_state attribute, mark a particular transport > > offline. > > It will not be picked during the round-robin selection. It's not > > allowed > > to take the main (1st created transport associated with the > > rpc_client) > > offline. > > > > Signed-off-by: Olga Kornievskaia <kolga@xxxxxxxxxx> > > --- > > include/linux/sunrpc/xprt.h | 2 ++ > > net/sunrpc/clnt.c | 1 + > > net/sunrpc/sysfs.c | 42 +++++++++++++++++++++++++++++++++-- > > -- > > net/sunrpc/xprtmultipath.c | 3 ++- > > 4 files changed, 43 insertions(+), 5 deletions(-) > > > > diff --git a/include/linux/sunrpc/xprt.h > > b/include/linux/sunrpc/xprt.h > > index 13a4eaf385cf..72a858f032c7 100644 > > --- a/include/linux/sunrpc/xprt.h > > +++ b/include/linux/sunrpc/xprt.h > > @@ -293,6 +293,7 @@ struct rpc_xprt { > > struct rcu_head rcu; > > const struct xprt_class *xprt_class; > > struct rpc_sysfs_xprt *xprt_sysfs; > > + bool main; /* marked if it's the 1st > > transport */ > > }; > > > > #if defined(CONFIG_SUNRPC_BACKCHANNEL) > > @@ -426,6 +427,7 @@ > > void xprt_release_write(struct rpc_xprt *, > > struct rpc_task *); > > #define XPRT_BOUND (4) > > #define XPRT_BINDING (5) > > #define XPRT_CLOSING (6) > > +#define XPRT_OFFLINE (7) > > #define XPRT_CONGESTED (9) > > #define XPRT_CWND_WAIT (10) > > #define XPRT_WRITE_SPACE (11) > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c > > index 9bf820bad84c..408618765aa5 100644 > > --- a/net/sunrpc/clnt.c > > +++ b/net/sunrpc/clnt.c > > @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const > > struct rpc_create_args *args, > > } > > > > rpc_clnt_set_transport(clnt, xprt, timeout); > > + xprt->main = true; > > xprt_iter_init(&clnt->cl_xpi, xps); > > xprt_switch_put(xps); > > > > diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c > > index ec06c9257c07..02c918c5061b 100644 > > --- a/net/sunrpc/sysfs.c > > +++ b/net/sunrpc/sysfs.c > > @@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > > kobject *kobj, > > struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); > > ssize_t ret; > > int locked, connected, connecting, close_wait, bound, > > binding, > > - closing, congested, cwnd_wait, write_space; > > + closing, congested, cwnd_wait, write_space, offline; > > > > if (!xprt) > > return 0; > > @@ -136,8 +136,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > > kobject *kobj, > > congested = test_bit(XPRT_CONGESTED, &xprt->state); > > cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state); > > write_space = test_bit(XPRT_WRITE_SPACE, &xprt- > > >state); > > + offline = test_bit(XPRT_OFFLINE, &xprt->state); > > > > - ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s > > %s\n", > > + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s > > %s %s\n", > > locked ? "LOCKED" : "", > > connected ? "CONNECTED" : "", > > connecting ? "CONNECTING" : "", > > @@ -147,7 +148,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct > > kobject *kobj, > > closing ? "CLOSING" : "", > > congested ? "CONGESTED" : "", > > cwnd_wait ? "CWND_WAIT" : "", > > - write_space ? "WRITE_SPACE" : ""); > > + write_space ? "WRITE_SPACE" : "", > > + offline ? "OFFLINE" : ""); > > } > > > > xprt_put(xprt); > > @@ -223,6 +225,38 @@ static ssize_t > > rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, > > goto out; > > } > > > > +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, > > + struct kobj_attribute > > *attr, > > + const char *buf, size_t > > count) > > +{ > > + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); > > + int offline = 0; > > + > > + if (!xprt) > > + return 0; > > + > > + if (!strncmp(buf, "offline", 7)) > > + offline = 1; > > + else > > + return -EINVAL; > > + > > + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, > > TASK_KILLABLE)) { > > + count = -EINTR; > > + goto out_put; > > + } > > + if (offline) { > > + if (xprt->main) > > + count = -EINVAL; > > + else > > + set_bit(XPRT_OFFLINE, &xprt->state); > > + } > > Is there any way to put the transport back online? What say the problem > with the downed IP address gets fixed? I will add this in v2. Originally the thought was that offlining a transport was just a middle step before removing it and not something in its own right. I would like to know if it's appropriate to also then decrement the xps_nactive counter when the xprt is offline? > > > + > > + xprt_release_write(xprt, NULL); > > +out_put: > > + xprt_put(xprt); > > + return count; > > +} > > + > > int rpc_sysfs_init(void) > > { > > rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, > > kernel_kobj); > > @@ -293,7 +327,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info > > = __ATTR(xprt_info, > > 0444, rpc_sysfs_xprt_info_show, NULL); > > > > static struct kobj_attribute rpc_sysfs_xprt_change_state = > > __ATTR(xprt_state, > > - 0644, rpc_sysfs_xprt_state_show, NULL); > > + 0644, rpc_sysfs_xprt_state_show, > > rpc_sysfs_xprt_state_change); > > > > static struct attribute *rpc_sysfs_xprt_attrs[] = { > > &rpc_sysfs_xprt_dstaddr.attr, > > diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c > > index 07e76ae1028a..39551b794b80 100644 > > --- a/net/sunrpc/xprtmultipath.c > > +++ b/net/sunrpc/xprtmultipath.c > > @@ -230,7 +230,8 @@ void xprt_iter_default_rewind(struct > > rpc_xprt_iter *xpi) > > static > > bool xprt_is_active(const struct rpc_xprt *xprt) > > { > > - return kref_read(&xprt->kref) != 0; > > + return (kref_read(&xprt->kref) != 0 && > > + !test_bit(XPRT_OFFLINE, &xprt->state)); > > } > > > > static > > -- > Trond Myklebust > Linux NFS client maintainer, Hammerspace > trond.myklebust@xxxxxxxxxxxxxxx > >