On Fri, 2013-01-18 at 17:03 -0500, an unknown sender wrote: > On Fri, 2013-01-18 at 16:33 -0500, Chuck Lever wrote: > > On Jan 18, 2013, at 4:28 PM, Ben Greear <greearb@xxxxxxxxxxxxxxx> wrote: > > > > > Any chance the STALE_CLIENTID case needs a 'break'? > > > > I don't think so. LEASE_CONFIRM is set, and we want to wake the state renewal thread. > > > > > > > > Twice I've seen kernel crashes after the nfs40_walk_client_list > > > failed (though code comments say it should never fail). > > > > nfs40_walk_client_list() is looking for an nfs_client that is supposed to already be in the nfs_client list. If the search fails, that's a bug. > > > > Eyeball the contents of your nfs_client list. You should find an appropriate nfs_client in there, and then figure out why the search doesn't find it. > > You have considered the fact that the call to > nfs4_proc_setclientid_confirm can potentially return > NFS4ERR_STALE_CLIENTID if the server rebooted while the client was > walking the list? In fact, as far as I can see, the correct behaviour in nfs40_discover_server_trunking() should be to re-issue the setclientid call, and then walk the list again if nfs40_walk_client_list() returns NFS4ERR_STALE_CLIENTID. Something like the attached patch: -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@xxxxxxxxxx www.netapp.com
From 8ae0dc0ae64c9913c90f550d3c6b724313883fff Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> Date: Fri, 18 Jan 2013 17:54:34 -0500 Subject: [PATCH] NFSv4: Fix NFSv4.0 trunking discovery If walking the list in nfs40_walk_client_list fails, then the most likely explanation is that the server rebooted. Ensure that we catch that case by testing our new nfs_client last. Reported-by: Ben Greear <greearb@xxxxxxxxxxxxxxx> Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> Cc: Chuck Lever <chuck.lever@xxxxxxxxxx> Cc: stable@xxxxxxxxxxxxxxx [>=3.7] --- fs/nfs/nfs4client.c | 35 +++++++++++++++++------------------ fs/nfs/nfs4state.c | 22 ++++++++++------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index acc3472..596b303 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -310,6 +310,9 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* Skip the new entry */ + if (pos == new) + continue; /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ if (pos->cl_cons_state < NFS_CS_READY) @@ -332,40 +335,36 @@ int nfs40_walk_client_list(struct nfs_client *new, if (prev) nfs_put_client(prev); + prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); - if (status == 0) { + switch (status) { + case 0: nfs4_swap_callback_idents(pos, new); - nfs_put_client(pos); + prev = NULL; *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - return 0; - } - if (status != -NFS4ERR_STALE_CLIENTID) { - nfs_put_client(pos); - dprintk("NFS: <-- %s status = %d, no result\n", - __func__, status); - return status; + default: + goto out; + case -NFS4ERR_STALE_CLIENTID: } spin_lock(&nn->nfs_client_lock); - prev = pos; } + spin_unlock(&nn->nfs_client_lock); /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. + * No match found. Try to confirm the clientid on "new". */ + *result = new; + status = nfs4_proc_setclientid_confirm(new, &clid, cred); +out: if (prev) nfs_put_client(prev); - spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #ifdef CONFIG_NFS_V4_1 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9448c57..102885d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -129,24 +129,22 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, if (clp->cl_addr.ss_family == AF_INET6) port = nn->nfs_callback_tcpport6; - status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); - if (status != 0) - goto out; - clp->cl_clientid = clid.clientid; - clp->cl_confirm = clid.confirm; + do { + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); + if (status != 0) + break; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; - status = nfs40_walk_client_list(clp, result, cred); - switch (status) { - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - case 0: + status = nfs40_walk_client_list(clp, result, cred); + } while (status == -NFS4ERR_STALE_CLIENTID); + + if (status == 0) { /* Sustain the lease, even if it's empty. If the clientid4 * goes stale it's of no use for trunking discovery. */ nfs4_schedule_state_renewal(*result); - break; } -out: return status; } -- 1.8.1