Re: [PATCH v1 01/14] xprtrdma: Transport fault injection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Chuck,

Neat idea!  Are servers able to handle client recovery without getting too confused?

Anna

On 05/04/2015 01:56 PM, Chuck Lever wrote:
> It has been exceptionally useful to exercise the logic that handles
> local immediate errors and RDMA connection loss.  To enable
> developers to test this regularly and repeatably, add logic to
> simulate connection loss every so often.
> 
> Fault injection is disabled by default. It is enabled with
> 
>   $ sudo echo xxx > /proc/sys/sunrpc/rdma_inject_transport_fault
> 
> where "xxx" is a large positive number of transport method calls
> before a disconnect. A value of several thousand is usually a good
> number that allows reasonable forward progress while still causing a
> lot of connection drops.
> 
> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
> ---
>  net/sunrpc/Kconfig              |   12 ++++++++++++
>  net/sunrpc/xprtrdma/transport.c |   34 ++++++++++++++++++++++++++++++++++
>  net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>  3 files changed, 47 insertions(+)
> 
> diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
> index 9068e72..329f82c 100644
> --- a/net/sunrpc/Kconfig
> +++ b/net/sunrpc/Kconfig
> @@ -61,6 +61,18 @@ config SUNRPC_XPRT_RDMA_CLIENT
>  
>  	  If unsure, say N.
>  
> +config SUNRPC_XPRT_RDMA_FAULT_INJECTION
> +	bool "RPC over RDMA client fault injection"
> +	depends on SUNRPC_XPRT_RDMA_CLIENT
> +	default N
> +	help
> +	  This option enables fault injection in the xprtrdma module.
> +	  Fault injection is disabled by default. It is enabled with:
> +
> +	    $ sudo echo xxx > /proc/sys/sunrpc/rdma_inject_fault
> +
> +	  If unsure, say N.
> +
>  config SUNRPC_XPRT_RDMA_SERVER
>  	tristate "RPC over RDMA Server Support"
>  	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 54f23b1..fdcb2c7 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -74,6 +74,7 @@ static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
>  static unsigned int xprt_rdma_inline_write_padding;
>  static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
>  		int xprt_rdma_pad_optimize = 1;
> +static unsigned int xprt_rdma_inject_transport_fault;
>  
>  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
>  
> @@ -135,6 +136,13 @@ static struct ctl_table xr_tunables_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec,
>  	},
> +	{
> +		.procname	= "rdma_inject_transport_fault",
> +		.data		= &xprt_rdma_inject_transport_fault,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
>  	{ },
>  };
>  
> @@ -246,6 +254,27 @@ xprt_rdma_connect_worker(struct work_struct *work)
>  	xprt_clear_connecting(xprt);
>  }
>  
> +#if defined CONFIG_SUNRPC_XPRT_RDMA_FAULT_INJECTION
> +static void
> +xprt_rdma_inject_disconnect(struct rpcrdma_xprt *r_xprt)
> +{
> +	if (!xprt_rdma_inject_transport_fault)
> +		return;
> +
> +	if (atomic_dec_return(&r_xprt->rx_inject_count) == 0) {
> +		atomic_set(&r_xprt->rx_inject_count,
> +			   xprt_rdma_inject_transport_fault);
> +		pr_info("rpcrdma: injecting transport disconnect\n");
> +		(void)rdma_disconnect(r_xprt->rx_ia.ri_id);
> +	}
> +}
> +#else
> +static void
> +xprt_rdma_inject_disconnect(struct rpcrdma_xprt *r_xprt)
> +{
> +}
> +#endif
> +
>  /*
>   * xprt_rdma_destroy
>   *
> @@ -405,6 +434,8 @@ xprt_setup_rdma(struct xprt_create *args)
>  	INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
>  			  xprt_rdma_connect_worker);
>  
> +	atomic_set(&new_xprt->rx_inject_count,
> +		   xprt_rdma_inject_transport_fault);
>  	xprt_rdma_format_addresses(xprt);
>  	xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
>  	if (xprt->max_payload == 0)
> @@ -515,6 +546,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
>  out:
>  	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
>  	req->rl_connect_cookie = 0;	/* our reserved value */
> +	xprt_rdma_inject_disconnect(r_xprt);
>  	return req->rl_sendbuf->rg_base;
>  
>  out_rdmabuf:
> @@ -589,6 +621,7 @@ xprt_rdma_free(void *buffer)
>  	}
>  
>  	rpcrdma_buffer_put(req);
> +	xprt_rdma_inject_disconnect(r_xprt);
>  }
>  
>  /*
> @@ -634,6 +667,7 @@ xprt_rdma_send_request(struct rpc_task *task)
>  
>  	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
>  	rqst->rq_bytes_sent = 0;
> +	xprt_rdma_inject_disconnect(r_xprt);
>  	return 0;
>  
>  failed_marshal:
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 78e0b8b..08aee53 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -377,6 +377,7 @@ struct rpcrdma_xprt {
>  	struct rpcrdma_create_data_internal rx_data;
>  	struct delayed_work	rx_connect_worker;
>  	struct rpcrdma_stats	rx_stats;
> +	atomic_t		rx_inject_count;
>  };
>  
>  #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux