Re: [PATCH v3 13/14] SUNRPC: vsock svcsock support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2017-06-30 at 14:23 +0100, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
> ---
>  net/sunrpc/svcsock.c | 214 +++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 190 insertions(+), 24 deletions(-)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index e67b097..86cce8f 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -42,7 +42,9 @@
>  #include <net/udp.h>
>  #include <net/tcp.h>
>  #include <net/tcp_states.h>
> +#include <net/af_vsock.h>
>  #include <linux/uaccess.h>
> +#include <asm/uaccess.h>
>  #include <asm/ioctls.h>
>  #include <trace/events/skb.h>
>  
> @@ -64,7 +66,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
>  static int		svc_udp_recvfrom(struct svc_rqst *);
>  static int		svc_udp_sendto(struct svc_rqst *);
>  static void		svc_sock_detach(struct svc_xprt *);
> -static void		svc_tcp_sock_detach(struct svc_xprt *);
> +static void		svc_common_sock_detach(struct svc_xprt *);
>  static void		svc_sock_free(struct svc_xprt *);
>  
>  static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
> @@ -810,9 +812,9 @@ static void svc_tcp_state_change(struct sock *sk)
>  }
>  
>  /*
> - * Accept a TCP connection
> + * Accept an incoming connection
>   */
> -static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
> +static struct svc_xprt *svc_common_accept(struct svc_xprt *xprt)
>  {
>  	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  	struct sockaddr_storage addr;
> @@ -824,7 +826,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
>  	int		err, slen;
>  	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
>  
> -	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
> +	dprintk("svc: %s %p sock %p\n", __func__, svsk, sock);
>  	if (!sock)
>  		return NULL;
>  
> @@ -877,7 +879,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
>  	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
>  	err = kernel_getsockname(newsock, sin, &slen);
>  	if (unlikely(err < 0)) {
> -		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
> +		dprintk("%s: kernel_getsockname error %d\n", __func__, -err);
>  		slen = offsetof(struct sockaddr, sa_data);
>  	}
>  	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
> @@ -1131,7 +1133,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  		rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
>  
>  	rqstp->rq_xprt_ctxt   = NULL;
> -	rqstp->rq_prot	      = IPPROTO_TCP;
> +	rqstp->rq_prot	      = IPPROTO_TCP; /* TODO vsock should either use 0 or IPPROTO_MAX */
>  	if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
>  		set_bit(RQ_LOCAL, &rqstp->rq_flags);
>  	else
> @@ -1200,13 +1202,13 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
>  }
>  
>  /*
> - * Setup response header. TCP has a 4B record length field.
> + * Setup response header. Byte stream transports have a 4B record length field.
>   */
> -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
> +static void svc_stream_prep_reply_hdr(struct svc_rqst *rqstp)
>  {
>  	struct kvec *resv = &rqstp->rq_res.head[0];
>  
> -	/* tcp needs a space for the record length... */
> +	/* space for the record length... */
>  	svc_putnl(resv, 0);
>  }
>  
> @@ -1232,7 +1234,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
>  	.xpo_create = svc_bc_create_socket,
>  	.xpo_detach = svc_bc_tcp_sock_detach,
>  	.xpo_free = svc_bc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_secure_port = svc_sock_secure_port,
>  };
>  
> @@ -1244,6 +1246,145 @@ static struct svc_xprt_class svc_tcp_bc_class = {
>  };
>  
>  #ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +/*
> + * A data_ready event on a listening socket means there's a connection
> + * pending. Do not use state_change as a substitute for it.
> + */
> +static void svc_vsock_listen_data_ready(struct sock *sk)
> +{
> +	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq;
> +
> +	dprintk("svc: socket %p AF_VSOCK (listen) state change %d\n",
> +		sk, sk->sk_state);
> +
> +	/*
> +	 * This callback may called twice when a new connection
> +	 * is established as a child socket inherits everything
> +	 * from a parent VSOCK_SS_LISTEN socket.
> +	 * 1) data_ready method of the parent socket will be called
> +	 *    when one of child sockets become SS_CONNECTED.
> +	 * 2) data_ready method of the child socket may be called
> +	 *    when it receives data before the socket is accepted.
> +	 * In case of 2, we should ignore it silently.
> +	 */
> +	if (sk->sk_state == VSOCK_SS_LISTEN) {
> +		if (svsk) {
> +			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
> +			svc_xprt_enqueue(&svsk->sk_xprt);
> +		} else
> +			printk("svc: socket %p: no user data\n", sk);
> +	}
> +
> +	wq = sk_sleep(sk);
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible_all(wq);
> +}
> +
> +/*
> + * A state change on a connected socket means it's dying or dead.
> + */
> +static void svc_vsock_state_change(struct sock *sk)
> +{
> +	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq = sk_sleep(sk);
> +
> +	dprintk("svc: socket %p AF_VSOCK (connected) state change %d (svsk %p)\n",
> +		sk, sk->sk_state, sk->sk_user_data);
> +
> +	if (!svsk)
> +		printk("svc: socket %p: no user data\n", sk);
> +	else {
> +		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> +		svc_xprt_enqueue(&svsk->sk_xprt);
> +	}
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible_all(wq);
> +}
> +
> +static void svc_vsock_data_ready(struct sock *sk)
> +{
> +	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq = sk_sleep(sk);
> +
> +	dprintk("svc: socket %p AF_VSOCK data ready (svsk %p)\n",
> +		sk, sk->sk_user_data);
> +	if (svsk) {
> +		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
> +		svc_xprt_enqueue(&svsk->sk_xprt);
> +	}
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible(wq);
> +}
> +
> +static int svc_vsock_has_wspace(struct svc_xprt *xprt)
> +{
> +	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
> +
> +	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
> +		return 1;
> +	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +}
> +
> +static struct svc_xprt *svc_vsock_create(struct svc_serv *serv,
> +					 struct net *net,
> +					 struct sockaddr *sa, int salen,
> +					 int flags)
> +{
> +	return svc_create_socket(serv, 0, net, sa, salen, flags);
> +}
> +
> +static struct svc_xprt_ops svc_vsock_ops = {
> +	.xpo_create = svc_vsock_create,
> +	.xpo_recvfrom = svc_tcp_recvfrom,
> +	.xpo_sendto = svc_tcp_sendto,
> +	.xpo_release_rqst = svc_release_skb,
> +	.xpo_detach = svc_common_sock_detach,
> +	.xpo_free = svc_sock_free,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
> +	.xpo_has_wspace = svc_vsock_has_wspace,
> +	.xpo_accept = svc_common_accept,
> +	.xpo_secure_port = svc_sock_secure_port,
> +};
> +
> +static struct svc_xprt_class svc_vsock_class = {
> +	.xcl_name = "vsock",
> +	.xcl_owner = THIS_MODULE,
> +	.xcl_ops = &svc_vsock_ops,
> +	.xcl_max_payload = RPCSVC_MAXPAYLOAD,
> +	.xcl_ident = XPRT_TRANSPORT_VSOCK,
> +};
> +
> +static void svc_vsock_init(struct svc_sock *svsk, struct svc_serv *serv)
> +{
> +	struct sock *sk = svsk->sk_sk;
> +
> +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_vsock_class,
> +		      &svsk->sk_xprt, serv);
> +	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
> +	set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
> +	if (sk->sk_state == VSOCK_SS_LISTEN) {
> +		dprintk("setting up AF_VSOCK socket for listening\n");
> +		set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
> +		sk->sk_data_ready = svc_vsock_listen_data_ready;
> +		set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
> +	} else {
> +		dprintk("setting up AF_VSOCK socket for reading\n");
> +		sk->sk_state_change = svc_vsock_state_change;
> +		sk->sk_data_ready = svc_vsock_data_ready;
> +		sk->sk_write_space = svc_write_space;
> +
> +		svsk->sk_reclen = 0;
> +		svsk->sk_tcplen = 0;
> +		svsk->sk_datalen = 0;
> +		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
> +
> +		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
> +		if (sk->sk_state != SS_CONNECTED)
> +			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> +	}
> +}
> +
>  static void svc_bc_vsock_sock_detach(struct svc_xprt *xprt)
>  {
>  }
> @@ -1252,7 +1393,7 @@ static struct svc_xprt_ops svc_vsock_bc_ops = {
>  	.xpo_create = svc_bc_create_socket,
>  	.xpo_detach = svc_bc_vsock_sock_detach,
>  	.xpo_free = svc_bc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_secure_port = svc_sock_secure_port,
>  };
>  
> @@ -1294,11 +1435,11 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_recvfrom = svc_tcp_recvfrom,
>  	.xpo_sendto = svc_tcp_sendto,
>  	.xpo_release_rqst = svc_release_skb,
> -	.xpo_detach = svc_tcp_sock_detach,
> +	.xpo_detach = svc_common_sock_detach,
>  	.xpo_free = svc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_has_wspace = svc_tcp_has_wspace,
> -	.xpo_accept = svc_tcp_accept,
> +	.xpo_accept = svc_common_accept,
>  	.xpo_secure_port = svc_sock_secure_port,
>  	.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
>  };
> @@ -1315,6 +1456,9 @@ void svc_init_xprt_sock(void)
>  {
>  	svc_reg_xprt_class(&svc_tcp_class);
>  	svc_reg_xprt_class(&svc_udp_class);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	svc_reg_xprt_class(&svc_vsock_class);
> +#endif
>  	svc_init_bc_xprt_sock();
>  }
>  
> @@ -1322,6 +1466,9 @@ void svc_cleanup_xprt_sock(void)
>  {
>  	svc_unreg_xprt_class(&svc_tcp_class);
>  	svc_unreg_xprt_class(&svc_udp_class);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	svc_unreg_xprt_class(&svc_vsock_class);
> +#endif
>  	svc_cleanup_bc_xprt_sock();
>  }
>  
> @@ -1417,6 +1564,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  	/* Initialize the socket */
>  	if (sock->type == SOCK_DGRAM)
>  		svc_udp_init(svsk, serv);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	else if (inet->sk_family == AF_VSOCK)
> +		svc_vsock_init(svsk, serv);
> +#endif
>  	else
>  		svc_tcp_init(svsk, serv);

The above conditional is a bit of a mess and doesn't really handle the
case where the upper layer might pass it something non-sensical (i.e.
SOCK_DGRAM + AF_VSOCK).

I think this should vet both values and return ERR_PTR(-EINVAL) if it's
not right. Maybe a switch statement on the address family and then check
the sock->type in each? We can afford to code a little defensively here.


>  
> @@ -1468,13 +1619,22 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
>  
>  	if (!so)
>  		return err;
> -	err = -EAFNOSUPPORT;
> -	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
> -		goto out;
> -	err =  -EPROTONOSUPPORT;
> -	if (so->sk->sk_protocol != IPPROTO_TCP &&
> -	    so->sk->sk_protocol != IPPROTO_UDP)
> +	err = -EPROTONOSUPPORT;
> +	switch (so->sk->sk_family) {
> +	case PF_INET:
> +	case PF_INET6:
> +		if (so->sk->sk_protocol != IPPROTO_TCP &&
> +		    so->sk->sk_protocol != IPPROTO_UDP)
> +			goto out;
> +		break;
> +	case PF_VSOCK:
> +		if (so->sk->sk_protocol != 0)
> +			goto out;
> +		break;
> +	default:
> +		err = -EAFNOSUPPORT;
>  		goto out;
> +	}
>  	err = -EISCONN;
>  	if (so->state > SS_UNCONNECTED)
>  		goto out;
> @@ -1521,7 +1681,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  			serv->sv_program->pg_name, protocol,
>  			__svc_print_addr(sin, buf, sizeof(buf)));
>  
> -	if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
> +	if (sin->sa_family != AF_VSOCK &&
> +	    protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
>  		printk(KERN_WARNING "svc: only UDP and TCP "
>  				"sockets supported\n");
>  		return ERR_PTR(-EINVAL);
> @@ -1535,6 +1696,11 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  	case AF_INET:
>  		family = PF_INET;
>  		break;
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	case AF_VSOCK:
> +		family = PF_VSOCK;
> +		break;
> +#endif
>  	default:
>  		return ERR_PTR(-EINVAL);
>  	}
> @@ -1566,7 +1732,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  	if (error < 0)
>  		goto bummer;
>  
> -	if (protocol == IPPROTO_TCP) {
> +	if (type == SOCK_STREAM) {
>  		if ((error = kernel_listen(sock, 64)) < 0)
>  			goto bummer;
>  	}
> @@ -1607,11 +1773,11 @@ static void svc_sock_detach(struct svc_xprt *xprt)
>  /*
>   * Disconnect the socket, and reset the callbacks
>   */
> -static void svc_tcp_sock_detach(struct svc_xprt *xprt)
> +static void svc_common_sock_detach(struct svc_xprt *xprt)
>  {
>  	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  
> -	dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
> +	dprintk("svc: %s(%p)\n", __func__, svsk);
>  
>  	svc_sock_detach(xprt);
>  

-- 
Jeff Layton <jlayton@xxxxxxxxxx>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux