Re: [PATCH v2 36/47] nfsd41: sunrpc: Added rpc server-side backchannel handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Mar 28, 2009 at 11:34:21AM +0300, Benny Halevy wrote:
> From: Andy Adamson <andros@xxxxxxxxxx>
> 
> FIXME: bhalevy: write up commit message

?--b.

> 
> Signed-off-by: Mike Sager <sager@xxxxxxxxxx>
> Signed-off-by: Marc Eshel <eshel@xxxxxxxxxxxxxxx>
> Signed-off-by: Benny Halevy <bhalevy@xxxxxxxxxxx>
> 
> When the call direction is a reply, copy the xid and call direction into the
> req->rq_private_buf.head[0].iov_base otherwise rpc_verify_header returns
> rpc_garbage.
> 
> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
> Signed-off-by: Benny Halevy <bhalevy@xxxxxxxxxxx>
> ---
>  include/linux/sunrpc/clnt.h    |    1 +
>  include/linux/sunrpc/svcsock.h |    1 +
>  include/linux/sunrpc/xprt.h    |    4 +
>  net/sunrpc/clnt.c              |    1 +
>  net/sunrpc/svcsock.c           |   81 +++++++++++
>  net/sunrpc/xprt.c              |   41 ++++++-
>  net/sunrpc/xprtsock.c          |  286 +++++++++++++++++++++++++++++++++++++++-
>  7 files changed, 405 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 928c547..0b32b00 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -111,6 +111,7 @@ struct rpc_create_args {
>  	rpc_authflavor_t	authflavor;
>  	unsigned long		flags;
>  	char			*client_name;
> +	struct svc_sock		*bc_sock;	/* NFSv4.1 backchannel */
>  };
>  
>  /* Values for "flags" field */
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 483e103..e8fda10 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -28,6 +28,7 @@ struct svc_sock {
>  	/* private TCP part */
>  	u32			sk_reclen;	/* length of record */
>  	u32			sk_tcplen;	/* current read length */
> +	struct rpc_xprt	       *sk_bc_xprt;	/* NFSv4.1 backchannel xprt */
>  };
>  
>  /*
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index 11fc71d..e9ffee3 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -174,6 +174,9 @@ struct rpc_xprt {
>  	spinlock_t		reserve_lock;	/* lock slot table */
>  	u32			xid;		/* Next XID value to use */
>  	struct rpc_task *	snd_task;	/* Task blocked in send */
> +#if defined(CONFIG_NFSD_V4_1)
> +	struct svc_sock		*bc_sock;	/* NFSv4.1 backchannel */
> +#endif /* CONFIG_NFSD_V4_1 */
>  	struct list_head	recv;
>  
>  	struct {
> @@ -197,6 +200,7 @@ struct xprt_create {
>  	struct sockaddr *	srcaddr;	/* optional local address */
>  	struct sockaddr *	dstaddr;	/* remote peer address */
>  	size_t			addrlen;
> +	struct svc_sock		*bc_sock;	/* NFSv4.1 backchannel */
>  };
>  
>  struct xprt_class {
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 836f15c..0a8f2d0 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -266,6 +266,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
>  		.srcaddr = args->saddress,
>  		.dstaddr = args->address,
>  		.addrlen = args->addrsize,
> +		.bc_sock = args->bc_sock,
>  	};
>  	char servername[48];
>  
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 7a2a90f..90b2232 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -49,6 +49,7 @@
>  #include <linux/sunrpc/msg_prot.h>
>  #include <linux/sunrpc/svcsock.h>
>  #include <linux/sunrpc/stats.h>
> +#include <linux/sunrpc/xprt.h>
>  
>  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
>  
> @@ -790,6 +791,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	int		len;
>  	struct kvec *vec;
>  	int pnum, vlen;
> +#if defined(CONFIG_NFSD_V4_1)
> +	struct rpc_rqst *req = NULL;
> +#endif
>  
>  	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
>  		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
> @@ -856,12 +860,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	len = svsk->sk_reclen;
>  	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
>  
> +	/*
> +	 * We have enough data for the whole tcp record. Let's try and read the
> +	 * first 8 bytes to get the xid and the call direction. We can use this
> +	 * to figure out if this is a call or a reply to a callback. If
> +	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
> +	 * In that case, don't bother with the calldir and just read the data.
> +	 * It will be rejected in svc_process.
> +	 */
> +
>  	vec = rqstp->rq_vec;
>  	vec[0] = rqstp->rq_arg.head[0];
>  	vlen = PAGE_SIZE;
> +
> +	if (len >= 8) {
> +		u32 *p;
> +		u32 xid;
> +		u32 calldir;
> +
> +		len = svc_recvfrom(rqstp, vec, 1, 8);
> +		if (len < 0)
> +			goto error;
> +
> +		p = (u32 *)rqstp->rq_arg.head[0].iov_base;
> +		xid = *p++;
> +		calldir = *p;
> +
> +#if defined(CONFIG_NFSD_V4_1)
> +		if (calldir) {
> +			/* REPLY */
> +			if (svsk->sk_bc_xprt)
> +				req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
> +			if (req) {
> +				memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
> +					sizeof(struct xdr_buf));
> +				/* copy the xid and call direction */
> +				memcpy(req->rq_private_buf.head[0].iov_base,
> +					rqstp->rq_arg.head[0].iov_base, 8);
> +				vec[0] = req->rq_private_buf.head[0];
> +			} else
> +				printk(KERN_NOTICE
> +					"%s: Got unrecognized reply: "
> +					"calldir 0x%x sk_bc_xprt %p xid %08x\n",
> +					__func__, ntohl(calldir),
> +					svsk->sk_bc_xprt, xid);
> +		}
> +
> +		if (!calldir || !req)
> +			vec[0] = rqstp->rq_arg.head[0];
> +
> +#else /* CONFIG_NFSD_V4_1 */
> +		vec[0] = rqstp->rq_arg.head[0];
> +#endif /* CONFIG_NFSD_V4_1 */
> +		vec[0].iov_base += 8;
> +		vec[0].iov_len -= 8;
> +		len = svsk->sk_reclen - 8;
> +		vlen -= 8;
> +	}
> +
>  	pnum = 1;
>  	while (vlen < len) {
> +#if defined(CONFIG_NFSD_V4_1)
> +		vec[pnum].iov_base = (req) ?
> +			page_address(req->rq_private_buf.pages[pnum - 1]) :
> +			page_address(rqstp->rq_pages[pnum]);
> +#else /* CONFIG_NFSD_V4_1 */
>  		vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
> +#endif /* CONFIG_NFSD_V4_1 */
>  		vec[pnum].iov_len = PAGE_SIZE;
>  		pnum++;
>  		vlen += PAGE_SIZE;
> @@ -873,6 +938,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	if (len < 0)
>  		goto error;
>  
> +	/*
> +	 * Account for the 8 bytes we read earlier
> +	 */
> +	len += 8;
> +
> +#if defined(CONFIG_NFSD_V4_1)
> +	if (req) {
> +		xprt_complete_rqst(req->rq_task, len);
> +		len = 0;
> +		goto out;
> +	}
> +#endif /* CONFIG_NFSD_V4_1 */
>  	dprintk("svc: TCP complete record (%d bytes)\n", len);
>  	rqstp->rq_arg.len = len;
>  	rqstp->rq_arg.page_base = 0;
> @@ -886,6 +963,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	rqstp->rq_xprt_ctxt   = NULL;
>  	rqstp->rq_prot	      = IPPROTO_TCP;
>  
> +#if defined(CONFIG_NFSD_V4_1)
> +out:
> +#endif /* CONFIG_NFSD_V4_1 */
> +
>  	/* Reset TCP read info */
>  	svsk->sk_reclen = 0;
>  	svsk->sk_tcplen = 0;
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 62098d1..7fac90c 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -998,6 +998,27 @@ void xprt_release(struct rpc_task *task)
>  	spin_unlock(&xprt->reserve_lock);
>  }
>  
> +/*
> + * The autoclose function for the back channel
> + *
> + * The callback channel should never close the channel,
> + * let the forechannel do that.
> + */
> +static void bc_autoclose(struct work_struct *work)
> +{
> +	return;
> +}
> +
> +
> +/*
> + * The autodisconnect routine for the back channel. We never disconnect
> + */
> +static void
> +bc_init_autodisconnect(unsigned long data)
> +{
> +	return;
> +}
> +
>  /**
>   * xprt_create_transport - create an RPC transport
>   * @args: rpc transport creation arguments
> @@ -1034,9 +1055,16 @@ found:
>  
>  	INIT_LIST_HEAD(&xprt->free);
>  	INIT_LIST_HEAD(&xprt->recv);
> -	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
> -	setup_timer(&xprt->timer, xprt_init_autodisconnect,
> -			(unsigned long)xprt);
> +	if (args->bc_sock) {
> +		INIT_WORK(&xprt->task_cleanup, bc_autoclose);
> +		setup_timer(&xprt->timer, bc_init_autodisconnect,
> +			    (unsigned long)xprt);
> +	} else {
> +		INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
> +		setup_timer(&xprt->timer, xprt_init_autodisconnect,
> +			    (unsigned long)xprt);
> +	}
> +
>  	xprt->last_used = jiffies;
>  	xprt->cwnd = RPC_INITCWND;
>  	xprt->bind_index = 0;
> @@ -1056,6 +1084,13 @@ found:
>  	dprintk("RPC:       created transport %p with %u slots\n", xprt,
>  			xprt->max_reqs);
>  
> +	/*
> +	 * Since we don't want connections for the backchannel, we set
> +	 * the xprt status to connected
> +	 */
> +	if (args->bc_sock)
> +		xprt_set_connected(xprt);
> +
>  	return xprt;
>  }
>  
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 29c71e6..f90c7c3 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -32,6 +32,7 @@
>  #include <linux/tcp.h>
>  #include <linux/sunrpc/clnt.h>
>  #include <linux/sunrpc/sched.h>
> +#include <linux/sunrpc/svcsock.h>
>  #include <linux/sunrpc/xprtsock.h>
>  #include <linux/file.h>
>  
> @@ -1884,6 +1885,221 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
>  			xprt->stat.bklog_u);
>  }
>  
> +#if defined(CONFIG_NFSD_V4_1)
> +/*
> + * The connect worker for the backchannel
> + * This should never be called as we should never need to connect
> + */
> +static void bc_connect_worker(struct work_struct *work)
> +{
> +	BUG();
> +}
> +
> +/*
> + * The set_port routine of the rpc_xprt_ops. This is related to the portmapper
> + * and should never be called
> + */
> +
> +static void bc_set_port(struct rpc_xprt *xprt, unsigned short port)
> +{
> +	BUG();
> +}
> +
> +/*
> + * The connect routine for the backchannel rpc_xprt ops
> + * Again, should never be called!
> + */
> +
> +static void bc_connect(struct rpc_task *task)
> +{
> +	BUG();
> +}
> +
> +struct rpc_buffer {
> +	size_t	len;
> +	char	data[];
> +};
> +/*
> + * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
> + * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
> + * to use the server side send routines.
> + */
> +void *bc_malloc(struct rpc_task *task, size_t size)
> +{
> +	struct page *page;
> +	struct rpc_buffer *buf;
> +
> +	BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
> +	page = alloc_page(GFP_KERNEL);
> +
> +	if (!page)
> +		return NULL;
> +
> +	buf = page_address(page);
> +	buf->len = PAGE_SIZE;
> +
> +	return buf->data;
> +}
> +
> +/*
> + * Free the space allocated in the bc_alloc routine
> + */
> +void bc_free(void *buffer)
> +{
> +	struct rpc_buffer *buf;
> +
> +	if (!buffer)
> +		return;
> +
> +	buf = container_of(buffer, struct rpc_buffer, data);
> +	free_pages((unsigned long)buf, get_order(buf->len));
> +}
> +
> +/*
> + * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
> + * held. Borrows heavily from svc_tcp_sendto and xs_tcp_semd_request.
> + */
> +static int bc_sendto(struct rpc_rqst *req)
> +{
> +	int total_len;
> +	int len;
> +	int size;
> +	int result;
> +	struct xdr_buf *xbufp = &req->rq_snd_buf;
> +	struct page **pages = xbufp->pages;
> +	unsigned int flags = MSG_MORE;
> +	unsigned int pglen = xbufp->page_len;
> +	size_t base = xbufp->page_base;
> +	struct rpc_xprt *xprt = req->rq_xprt;
> +	struct sock_xprt *transport =
> +				container_of(xprt, struct sock_xprt, xprt);
> +	struct socket *sock = transport->sock;
> +
> +	total_len = xbufp->len;
> +
> +	/*
> +	 * Set up the rpc header and record marker stuff
> +	 */
> +	xs_encode_tcp_record_marker(xbufp);
> +
> +	/*
> +	 * The RPC message is divided into 3 pieces:
> +	 * - The header: This is what most of the smaller RPC messages consist
> +	 *   of. Often the whole message is in this.
> +	 *
> +	 *   - xdr->pages: This is a list of pages that contain data, for
> +	 *   example in a write request or while using rpcsec gss
> +	 *
> +	 *   - The tail: This is the rest of the rpc message
> +	 *
> +	 *  First we send the header, then the pages and then finally the tail.
> +	 *  The code borrows heavily from svc_sendto.
> +	 */
> +
> +	/*
> +	 * Send the head
> +	 */
> +	if (total_len == xbufp->head[0].iov_len)
> +		flags = 0;
> +
> +	len = sock->ops->sendpage(sock, virt_to_page(xbufp->head[0].iov_base),
> +			(unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK,
> +			xbufp->head[0].iov_len, flags);
> +
> +	if (len != xbufp->head[0].iov_len)
> +		goto out;
> +
> +	/*
> +	 * send page data
> +	 *
> +	 * Check the amount of data to be sent. If it is less than the
> +	 * remaining page, then send it else send the current page
> +	 */
> +
> +	size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
> +	while (pglen > 0) {
> +		if (total_len == size)
> +			flags = 0;
> +		result = sock->ops->sendpage(sock, *pages, base, size, flags);
> +		if (result > 0)
> +			len += result;
> +		if (result != size)
> +			goto out;
> +		total_len -= size;
> +		pglen -= size;
> +		size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
> +		base = 0;
> +		pages++;
> +	}
> +	/*
> +	 * send tail
> +	 */
> +	if (xbufp->tail[0].iov_len) {
> +		result = sock->ops->sendpage(sock,
> +			xbufp->tail[0].iov_base,
> +			(unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK,
> +			xbufp->tail[0].iov_len,
> +			0);
> +
> +		if (result > 0)
> +			len += result;
> +	}
> +out:
> +	if (len != xbufp->len)
> +		printk(KERN_NOTICE "Error sending entire callback!\n");
> +
> +	return len;
> +}
> +
> +/*
> + * The send routine. Borrows from svc_send
> + */
> +static int bc_send_request(struct rpc_task *task)
> +{
> +	struct rpc_rqst *req = task->tk_rqstp;
> +	struct rpc_xprt *bc_xprt = req->rq_xprt;
> +	struct svc_xprt	*xprt;
> +	struct svc_sock         *svsk;
> +	u32                     len;
> +
> +	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
> +	/*
> +	 * Get the server socket associated with this callback xprt
> +	 */
> +	svsk = bc_xprt->bc_sock;
> +	xprt = &svsk->sk_xprt;
> +
> +	mutex_lock(&xprt->xpt_mutex);
> +	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
> +		len = -ENOTCONN;
> +	else
> +		len = bc_sendto(req);
> +	mutex_unlock(&xprt->xpt_mutex);
> +
> +	return 0;
> +
> +}
> +
> +/*
> + * The close routine. Since this is client initiated, we do nothing
> + */
> +
> +static void bc_close(struct rpc_xprt *xprt)
> +{
> +	return;
> +}
> +
> +/*
> + * The xprt destroy routine. Again, because this connection is client
> + * initiated, we do nothing
> + */
> +
> +static void bc_destroy(struct rpc_xprt *xprt)
> +{
> +	return;
> +}
> +#endif /* CONFIG_NFSD_V4_1 */
> +
>  static struct rpc_xprt_ops xs_udp_ops = {
>  	.set_buffer_size	= xs_udp_set_buffer_size,
>  	.reserve_xprt		= xprt_reserve_xprt_cong,
> @@ -1917,6 +2133,26 @@ static struct rpc_xprt_ops xs_tcp_ops = {
>  	.print_stats		= xs_tcp_print_stats,
>  };
>  
> +#if defined(CONFIG_NFSD_V4_1)
> +/*
> + * The rpc_xprt_ops for the server backchannel
> + */
> +
> +static struct rpc_xprt_ops bc_tcp_ops = {
> +	.reserve_xprt		= xprt_reserve_xprt,
> +	.release_xprt		= xprt_release_xprt,
> +	.set_port		= bc_set_port,
> +	.connect		= bc_connect,
> +	.buf_alloc		= bc_malloc,
> +	.buf_free		= bc_free,
> +	.send_request		= bc_send_request,
> +	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
> +	.close			= bc_close,
> +	.destroy		= bc_destroy,
> +	.print_stats		= xs_tcp_print_stats,
> +};
> +#endif /* CONFIG_NFSD_V4_1 */
> +
>  static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
>  				      unsigned int slot_table_size)
>  {
> @@ -2049,13 +2285,31 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
>  	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
>  	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
>  
> -	xprt->bind_timeout = XS_BIND_TO;
> -	xprt->connect_timeout = XS_TCP_CONN_TO;
> -	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
> -	xprt->idle_timeout = XS_IDLE_DISC_TO;
> +#ifdef CONFIG_NFSD_V4_1
> +	if (args->bc_sock) {
> +		/* backchannel */
> +		xprt_set_bound(xprt);
> +		INIT_DELAYED_WORK(&transport->connect_worker,
> +				  bc_connect_worker);
> +		xprt->bind_timeout = 0;
> +		xprt->connect_timeout = 0;
> +		xprt->reestablish_timeout = 0;
> +		xprt->idle_timeout = (~0);
>  
> -	xprt->ops = &xs_tcp_ops;
> -	xprt->timeout = &xs_tcp_default_timeout;
> +		/*
> +		 * The backchannel uses the same socket connection as the
> +		 * forechannel
> +		 */
> +		xprt->bc_sock = args->bc_sock;
> +		xprt->bc_sock->sk_bc_xprt = xprt;
> +		transport->sock = xprt->bc_sock->sk_sock;
> +		transport->inet = xprt->bc_sock->sk_sk;
> +
> +		xprt->ops = &bc_tcp_ops;
> +
> +		goto next;
> +	}
> +#endif /* CONFIG_NFSD_V4_1 */
>  
>  	switch (addr->sa_family) {
>  	case AF_INET:
> @@ -2063,13 +2317,31 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
>  			xprt_set_bound(xprt);
>  
>  		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
> -		xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
>  		break;
>  	case AF_INET6:
>  		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
>  			xprt_set_bound(xprt);
>  
>  		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
> +		break;
> +	}
> +	xprt->bind_timeout = XS_BIND_TO;
> +	xprt->connect_timeout = XS_TCP_CONN_TO;
> +	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
> +	xprt->idle_timeout = XS_IDLE_DISC_TO;
> +
> +	xprt->ops = &xs_tcp_ops;
> +
> +#ifdef CONFIG_NFSD_V4_1
> +next:
> +#endif /* CONFIG_NFSD_V4_1 */
> +	xprt->timeout = &xs_tcp_default_timeout;
> +
> +	switch (addr->sa_family) {
> +	case AF_INET:
> +		xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
> +		break;
> +	case AF_INET6:
>  		xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
>  		break;
>  	default:
> -- 
> 1.6.2.1
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux