As soon as we detect a server failure, we quickly handle replication recovery without waiting for all the active commands to finish from the failed server. The first error would cause us to work with a different (replicated) server. Any later failures from the old server are indistinguishable from the new replicated server. These failures from the old server trigger a spurious replication recovery again. To avoid this, we add start time to nfs_client. If this is a recent enough nfs_client, we don't handle replication recovery and just retry the request instead. Signed-off-by: Malahal Naineni <malahal@xxxxxxxxxx> --- fs/nfs/client.c | 1 + fs/nfs/nfs4namespace.c | 23 +++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 25 insertions(+), 0 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 000ebdb..f0d8d24 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1477,6 +1477,7 @@ int nfs4_clone_client(struct nfs_client *clp, const struct sockaddr *sap, * lose state. */ new->cl_boot_time = clp->cl_boot_time; + new->cl_start_time = jiffies; dprintk("<-- %s moved (%llx:%llx) to nfs_client %p\n", __func__, (unsigned long long)server->fsid.major, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index ee75e27..617d6bf 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -356,6 +356,29 @@ int nfs4_replace_transport(struct nfs_server *server) unsigned int i; int error; + /* + * As soon as we detect a server failure, we quickly handle + * replication recovery without waiting for all the active + * commands to finish from the failed server. The first error + * would cause us to work with a different (replicated) server. + * + * Any later failures from the old server are indistinguishable + * from the new replicated server. These failures from the old + * server trigger a spurious replication recovery again. To + * avoid this, we add start time to nfs_client. If this is a + * recent enough nfs_client, we don't handle replication + * recovery and just retry the request instead. + */ +#define NFS_REPLI_SETTLE(rclient) (2 * (rclient)->cl_timeout->to_initval) + if (time_before(jiffies, server->nfs_client->cl_start_time + + NFS_REPLI_SETTLE(server->client))) { + dprintk("%s() ignoring spurious replication request, " + "current: %lu, client start: %lu, repli_settle: %lu\n", + __func__, jiffies, server->nfs_client->cl_start_time, + NFS_REPLI_SETTLE(server->client)); + return 0; + } + sap = kmalloc(addr_bufsize, GFP_KERNEL); if (sap == NULL) { error = -ENOMEM; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 8c16ec5..9d79a0e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -88,6 +88,7 @@ struct nfs_client { #endif struct server_scope *server_scope; /* from exchange_id */ + unsigned long cl_start_time; }; /* -- 1.7.8.3 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html