On Wed, 02 Dec 2009 17:47:04 +0800 Mi Jinlong <mijinlong@xxxxxxxxxxxxxx> wrote: > After a client get lock, it's network partition for some reasons. > other client cannot get lock success forever. > > This patch can avoid this problem using rpc_ping to test client's > network when host expired but hold lock. > > If the client's network is partition, server will release client's > lock, other client will get lock success. > > Signed-off-by: mijinlong@xxxxxxxxxxxxxx Yikes! That sounds like it'll make locking subject to the reliability of the network. I don't think that's a good idea. What might be more reasonable is to consider implementing something like the clear_locks command in Solaris. That is, a way for an admin to remove server-side locks held by a client that he knows is never going to come back. With that, this sort of thing at least becomes a willful act... > --- > fs/lockd/host.c | 54 ++++++++++++++++++++++++++++++++++++++++++- > include/linux/sunrpc/clnt.h | 1 + > net/sunrpc/clnt.c | 4 +- > 3 files changed, 56 insertions(+), 3 deletions(-) > > diff --git a/fs/lockd/host.c b/fs/lockd/host.c > index 4600c20..73f6732 100644 > --- a/fs/lockd/host.c > +++ b/fs/lockd/host.c > @@ -31,6 +31,7 @@ static int nrhosts; > static DEFINE_MUTEX(nlm_host_mutex); > > static void nlm_gc_hosts(void); > +static int nlm_test_host(struct nlm_host *); > > struct nlm_lookup_host_info { > const int server; /* search for server|client */ > @@ -550,13 +551,24 @@ nlm_gc_hosts(void) > > for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { > hlist_for_each_entry_safe(host, pos, next, chain, h_hash) { > - if (atomic_read(&host->h_count) || host->h_inuse > + if (atomic_read(&host->h_count) > || time_before(jiffies, host->h_expires)) { > dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", > host->h_name, atomic_read(&host->h_count), > host->h_inuse, host->h_expires); > continue; > } > + > + if (host->h_inuse) { > + if (!nlm_test_host(host)) { > + dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", > + host->h_name, atomic_read(&host->h_count), > + host->h_inuse, host->h_expires); > + continue; > + } > + nlmsvc_free_host_resources(host); > + } > + > dprintk("lockd: delete host %s\n", host->h_name); > hlist_del_init(&host->h_hash); > > @@ -567,3 +579,43 @@ nlm_gc_hosts(void) > > next_gc = jiffies + NLM_HOST_COLLECT; > } > + > +static int > +nlm_test_host(struct nlm_host *host) > +{ > + struct rpc_clnt *clnt; > + > + dprintk("lockd: test host %s\n", host->h_name); > + clnt = host->h_rpcclnt; > + if (clnt == NULL) { > + unsigned long increment = HZ; > + struct rpc_timeout timeparms = { > + .to_initval = increment, > + .to_increment = increment, > + .to_maxval = increment * 3UL, > + .to_retries = 2U, > + }; > + > + struct rpc_create_args args = { > + .protocol = host->h_proto, > + .address = nlm_addr(host), > + .addrsize = host->h_addrlen, > + .saddress = nlm_srcaddr(host), > + .timeout = &timeparms, > + .servername = host->h_name, > + .program = &nlm_program, > + .version = host->h_version, > + .authflavor = RPC_AUTH_UNIX, > + .flags = RPC_CLNT_CREATE_AUTOBIND, > + }; > + > + clnt = rpc_create(&args); > + if (IS_ERR(clnt)) > + return -EIO; > + > + rpc_shutdown_client(clnt); > + return 0; > + } > + > + return rpc_ping(clnt, RPC_TASK_SOFT); > +} Hmm...this runs in lockd's context too, right? While it's waiting for a ping to come back, the server can't service any lock requests. That could really slow down lockd in the event of a network partition. In general, it's best to avoid blocking operations in lockd's context if at all possible. > diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h > index 8ed9642..b221f8f 100644 > --- a/include/linux/sunrpc/clnt.h > +++ b/include/linux/sunrpc/clnt.h > @@ -162,6 +162,7 @@ size_t rpc_pton(const char *, const size_t, > char * rpc_sockaddr2uaddr(const struct sockaddr *); > size_t rpc_uaddr2sockaddr(const char *, const size_t, > struct sockaddr *, const size_t); > +int rpc_ping(struct rpc_clnt *clnt, int flags); > > static inline unsigned short rpc_get_port(const struct sockaddr *sap) > { > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c > index 38829e2..53b9f34 100644 > --- a/net/sunrpc/clnt.c > +++ b/net/sunrpc/clnt.c > @@ -79,7 +79,6 @@ static void call_connect_status(struct rpc_task *task); > > static __be32 *rpc_encode_header(struct rpc_task *task); > static __be32 *rpc_verify_header(struct rpc_task *task); > -static int rpc_ping(struct rpc_clnt *clnt, int flags); > > static void rpc_register_client(struct rpc_clnt *clnt) > { > @@ -1675,7 +1674,7 @@ static struct rpc_procinfo rpcproc_null = { > .p_decode = rpcproc_decode_null, > }; > > -static int rpc_ping(struct rpc_clnt *clnt, int flags) > +int rpc_ping(struct rpc_clnt *clnt, int flags) > { > struct rpc_message msg = { > .rpc_proc = &rpcproc_null, > @@ -1686,6 +1685,7 @@ static int rpc_ping(struct rpc_clnt *clnt, int flags) > put_rpccred(msg.rpc_cred); > return err; > } > +EXPORT_SYMBOL_GPL(rpc_ping); > > struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) > { -- Jeff Layton <jlayton@xxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html