[RFC v3 2/2] SUNRPC: Mask XIDs to prevent replay cache collision

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Modify transport behavior so transports mask their XIDs. xid_mask is
the mask which clears the top several bits. masked_xid is the ID
(unique to each transport which belongs to the same NFS mount).

We also removed xprt_init_xid, since the XID is now initialized
through rpc_create (and passed in via xprtargs). Since the
initialization of xid, masked_id and xid_mask occur in
xprt_create_transport, this should work with rpcrdma as well.

Signed-off-by: Bennett Amodio <bamodio@xxxxxxxxxxxxxxx>
---
 include/linux/sunrpc/xprt.h |  5 +++++
 net/sunrpc/clnt.c           |  8 ++++++++
 net/sunrpc/xprt.c           | 14 ++++++--------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index eab1c74..c80edc4 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -207,6 +207,8 @@ struct rpc_xprt {
  * Multipath
  */
  struct list_head xprt_switch;
+ u32 masked_id;
+ u32 xid_mask;

  /*
  * Connection of transports
@@ -306,6 +308,9 @@ struct xprt_create {
  struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
  struct rpc_xprt_switch *bc_xps;
  unsigned int flags;
+ u32 transport_id;
+ u32 bitmask_len;
+ u32 init_xid;
 };

 struct xprt_class {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index bb39f07..8556370 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -520,6 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
  .addrlen = args->addrsize,
  .servername = args->servername,
  .bc_xprt = args->bc_xprt,
+ .init_xid = prandom_u32(),
  };
  char servername[48];
  struct rpc_clnt *clnt;
@@ -572,6 +573,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
  xprtargs.servername = servername;
  }

+ if (args->nconnect)
+ xprtargs.bitmask_len = fls(args->nconnect - 1);
+ else
+ xprtargs.bitmask_len = 0;
+ xprtargs.transport_id = 0;
+
  xprt = xprt_create_transport(&xprtargs);
  if (IS_ERR(xprt))
  return (struct rpc_clnt *)xprt;
@@ -591,6 +598,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
  return clnt;

  for (i = 0; i < args->nconnect - 1; i++) {
+ xprtargs.transport_id += 1;
  if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
  break;
  }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3e63c5e..49fd56e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1231,12 +1231,7 @@ void xprt_retry_reserve(struct rpc_task *task)

 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
- return (__force __be32)xprt->xid++;
-}
-
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
- xprt->xid = prandom_u32();
+ return (__force __be32) (xprt->xid++ & xprt->xid_mask) | xprt->masked_id;
 }

 static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
@@ -1334,8 +1329,6 @@ static void xprt_init(struct rpc_xprt *xprt,
struct net *net)
  rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
  rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");

- xprt_init_xid(xprt);
-
  xprt->xprt_net = get_net(net);
 }

@@ -1367,6 +1360,11 @@ struct rpc_xprt *xprt_create_transport(struct
xprt_create *args)
  -PTR_ERR(xprt));
  goto out;
  }
+
+ xprt->xid_mask = 0xffffffff >> args->bitmask_len;
+ xprt->masked_id = args->transport_id << (32 - args->bitmask_len);
+ xprt->xid = args->init_xid;
+
  if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
  xprt->idle_timeout = 0;
  INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
-- 
1.9.1

On Tue, Aug 15, 2017 at 5:48 PM, Bennett Amodio <bamodio@xxxxxxxxxxxxxxx> wrote:
> Enable nconnect mount option and multipathing behavior for NFSv3 and NFSv4.
>
> Signed-off-by: Jui-Yu Chang <juchang@xxxxxxxxxxxxxxx>
> ---
>  fs/nfs/client.c     | 3 +++
>  fs/nfs/nfs4client.c | 2 +-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 08baa66..8d11a11 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -653,6 +653,9 @@ static int nfs_init_server(struct nfs_server *server,
>   struct nfs_client *clp;
>   int error;
>
> + if (data->nfs_server.protocol == XPRT_TRANSPORT_TCP)
> + cl_init.nconnect = data->nfs_server.nconnect;
> +
>   nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
>   data->timeo, data->retrans);
>   if (data->flags & NFS_MOUNT_NORESVPORT)
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index f11dec1..6ef1baa 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -849,7 +849,7 @@ static int nfs4_set_client(struct nfs_server *server,
>   };
>   struct nfs_client *clp;
>
> - if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP)
> + if (proto == XPRT_TRANSPORT_TCP)
>   cl_init.nconnect = nconnect;
>   if (server->flags & NFS_MOUNT_NORESVPORT)
>   set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> --
> 1.9.1
>
> On Tue, Aug 15, 2017 at 5:46 PM, Bennett Amodio <bamodio@xxxxxxxxxxxxxxx> wrote:
>> After seeing Trond’s patches for NFS multipathing on NFSv4.1, we
>> decided to try using the same concept for NFSv3/4.  The primary issue
>> we identified was XID collision in the duplicate request cache (replay
>> cache) for NFSv3/4.  In NFSv3/4, entries are hashed based on XID
>> instead of the slot ID and sequence ID that NFSv4.1 uses.  Since the
>> XIDs are generated by the RPC transports, and Trond’s patches create
>> multiple transports for multipathing, different transports can end up
>> using an overlapping set of XIDs.
>>
>>
>> To fix this, we apply a mask to XIDs. Each transport is constrained to
>> its own segment of the total XID range, and they can never overlap.
>> In terms of loss of entropy, by masking out just enough bits from the
>> XID, we are convinced that the probability of XID wraparound or
>> collision on NFS client restart has not increased to a problematic
>> level (so long as the RPCs are distributed round-robin, as in Trond’s
>> patches).
>>
>>
>> We tested multipathing out and discovered that it enables NFS to get
>> more bandwidth on a bonded interface (instead of using only one
>> physical link, it can use multiple).  Specifically, we tested on a
>> setup where the client was connected to the server via 4 bonded 10Gb/s
>> links.  Without multipathing, the client could only achieve 10Gb/s
>> (using one physical link).  With multipathing, the client was able to
>> achieve a maximum of close to 40Gb/s.
>>
>>
>> However, although the maximum performance was close to 40Gb/s,
>> achieving an average throughput of even 30Gb/s required many
>> connections.  The performance of individual trials had a high
>> variance.  We traced this uneven performance to colliding network
>> paths.  With round-robin distribution of RPCs, no single TCP
>> connection can exceed the performance of the slowest one.  If the
>> connections are distributed unevenly across network paths, some
>> connections can bottleneck others.  To solve this problem, we are
>> currently working on patches to provide load-balancing as an
>> alternative to round-robin for distributing RPCs.
>>
>>
>> To use these patches, you first have to apply Trond's 5 patches
>> (Available at https://www.spinics.net/lists/linux-nfs/msg63368.html).
>> Let us know what you think or if you have any ideas for improving
>> this.
>>
>>
>> Jui-Yu Chang (1):
>>   NFS: Allow multiple connections to NFSv3 and NFSv4.0 servers
>>
>> Bennett Amodio (1):
>>   SUNRPC: Mask XIDs to prevent replay cache collision
>>
>>  fs/nfs/client.c             |  3 +++
>>  fs/nfs/nfs4client.c         |  2 +-
>>  include/linux/sunrpc/xprt.h |  5 +++++
>>  net/sunrpc/clnt.c           |  8 ++++++++
>>  net/sunrpc/xprt.c           | 14 ++++++--------
>>  5 files changed, 23 insertions(+), 9 deletions(-)
>>
>> --
>> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux