> On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: >> From: Chuck Lever <chuck.lever@xxxxxxxxxx> >> >> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel >> NFS server. If the client requests RPC-with-TLS and the user space >> handshake agent is running, the server will set up a TLS session. >> >> There are no policy settings yet. For example, the server cannot >> yet require the use of RPC-with-TLS to access its data. >> >> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> >> --- >> include/linux/sunrpc/svc_xprt.h | 5 ++ >> include/linux/sunrpc/svcsock.h | 2 + >> include/trace/events/sunrpc.h | 16 ++++++- >> net/sunrpc/svc_xprt.c | 5 ++ >> net/sunrpc/svcauth_unix.c | 11 ++++- >> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ >> 6 files changed, 125 insertions(+), 5 deletions(-) >> >> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h >> index 775368802762..867479204840 100644 >> --- a/include/linux/sunrpc/svc_xprt.h >> +++ b/include/linux/sunrpc/svc_xprt.h >> @@ -27,7 +27,7 @@ struct svc_xprt_ops { >> void (*xpo_detach)(struct svc_xprt *); >> void (*xpo_free)(struct svc_xprt *); >> void (*xpo_kill_temp_xprt)(struct svc_xprt *); >> - void (*xpo_start_tls)(struct svc_xprt *); >> + void (*xpo_handshake)(struct svc_xprt *xprt); >> }; >> >> struct svc_xprt_class { >> @@ -70,6 +70,9 @@ struct svc_xprt { >> #define XPT_LOCAL 12 /* connection from loopback interface */ >> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ >> #define XPT_CONG_CTRL 14 /* has congestion control */ >> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ >> +#define XPT_TLS_SESSION 16 /* transport-layer security established */ >> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ >> >> struct svc_serv *xpt_server; /* service for transport */ >> atomic_t xpt_reserved; /* space on outq that is rsvd */ >> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h >> index bcc555c7ae9c..1175e1c38bac 100644 >> --- a/include/linux/sunrpc/svcsock.h >> +++ b/include/linux/sunrpc/svcsock.h >> @@ -38,6 +38,8 @@ struct svc_sock { >> /* Number of queued send requests */ >> atomic_t sk_sendqlen; >> >> + struct completion sk_handshake_done; >> + >> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ >> }; >> >> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h >> index cf286a0a17d0..2667a8db4811 100644 >> --- a/include/trace/events/sunrpc.h >> +++ b/include/trace/events/sunrpc.h >> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, >> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ >> { BIT(XPT_LOCAL), "LOCAL" }, \ >> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ >> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) >> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ >> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ >> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ >> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) >> >> TRACE_EVENT(svc_xprt_create_err, >> TP_PROTO( >> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); >> DEFINE_SVC_XPRT_EVENT(detach); >> DEFINE_SVC_XPRT_EVENT(free); >> >> +#define DEFINE_SVC_TLS_EVENT(name) \ >> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ >> + TP_PROTO(const struct svc_xprt *xprt), \ >> + TP_ARGS(xprt)) >> + >> +DEFINE_SVC_TLS_EVENT(start); >> +DEFINE_SVC_TLS_EVENT(upcall); >> +DEFINE_SVC_TLS_EVENT(unavailable); >> +DEFINE_SVC_TLS_EVENT(not_started); >> +DEFINE_SVC_TLS_EVENT(timed_out); >> + >> TRACE_EVENT(svc_xprt_accept, >> TP_PROTO( >> const struct svc_xprt *xprt, >> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c >> index ba629297da4e..b68c04dbf876 100644 >> --- a/net/sunrpc/svc_xprt.c >> +++ b/net/sunrpc/svc_xprt.c >> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) >> >> if (xpt_flags & BIT(XPT_BUSY)) >> return false; >> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) >> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) >> return true; >> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { >> if (xprt->xpt_ops->xpo_has_wspace(xprt) && >> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) >> module_put(xprt->xpt_class->xcl_owner); >> } >> svc_xprt_received(xprt); >> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { >> + xprt->xpt_ops->xpo_handshake(xprt); >> + svc_xprt_received(xprt); >> } else if (svc_xprt_reserve_slot(rqstp, xprt)) { >> /* XPT_DATA|XPT_DEFERRED case: */ >> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", >> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c >> index 983c5891cb56..374995201df4 100644 >> --- a/net/sunrpc/svcauth_unix.c >> +++ b/net/sunrpc/svcauth_unix.c >> @@ -17,8 +17,9 @@ >> #include <net/ipv6.h> >> #include <linux/kernel.h> >> #include <linux/user_namespace.h> >> -#define RPCDBG_FACILITY RPCDBG_AUTH >> +#include <trace/events/sunrpc.h> >> >> +#define RPCDBG_FACILITY RPCDBG_AUTH >> >> #include "netns.h" >> >> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >> { >> struct xdr_stream *xdr = &rqstp->rq_arg_stream; >> struct svc_cred *cred = &rqstp->rq_cred; >> + struct svc_xprt *xprt = rqstp->rq_xprt; >> u32 flavor, len; >> void *body; >> __be32 *p; >> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >> if (cred->cr_group_info == NULL) >> return SVC_CLOSE; >> >> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { >> + if (xprt->xpt_ops->xpo_handshake) { >> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); >> if (!p) >> return SVC_CLOSE; >> + trace_svc_tls_start(xprt); >> *p++ = rpc_auth_null; >> *p++ = cpu_to_be32(8); >> memcpy(p, "STARTTLS", 8); >> + >> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> } else { >> + trace_svc_tls_unavailable(xprt); >> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, >> RPC_AUTH_NULL, NULL, 0) < 0) >> return SVC_CLOSE; >> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >> index b6df73cb706a..16ba8d6ab20e 100644 >> --- a/net/sunrpc/svcsock.c >> +++ b/net/sunrpc/svcsock.c >> @@ -44,9 +44,11 @@ >> #include <net/tcp.h> >> #include <net/tcp_states.h> >> #include <net/tls.h> >> +#include <net/handshake.h> >> #include <linux/uaccess.h> >> #include <linux/highmem.h> >> #include <asm/ioctls.h> >> +#include <linux/key.h> >> >> #include <linux/sunrpc/types.h> >> #include <linux/sunrpc/clnt.h> >> @@ -64,6 +66,7 @@ >> >> #define RPCDBG_FACILITY RPCDBG_SVCXPRT >> >> +#define SVC_HANDSHAKE_TO (20U * HZ) >> >> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, >> int flags); >> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) >> rmb(); >> svsk->sk_odata(sk); >> trace_svcsock_data_ready(&svsk->sk_xprt, 0); >> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) >> + return; >> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) >> svc_xprt_enqueue(&svsk->sk_xprt); >> } >> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) >> sock_no_linger(svsk->sk_sock->sk); >> } >> >> +/** >> + * svc_tcp_handshake_done - Handshake completion handler >> + * @data: address of xprt to wake >> + * @status: status of handshake >> + * @peerid: serial number of key containing the remote peer's identity >> + * >> + * If a security policy is specified as an export option, we don't >> + * have a specific export here to check. So we set a "TLS session >> + * is present" flag on the xprt and let an upper layer enforce local >> + * security policy. >> + */ >> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) >> +{ >> + struct svc_xprt *xprt = data; >> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> + >> + if (!status) { >> + if (peerid != TLS_NO_PEERID) >> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); >> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >> + } >> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + complete_all(&svsk->sk_handshake_done); >> +} >> + >> +/** >> + * svc_tcp_handshake - Perform a transport-layer security handshake >> + * @xprt: connected transport endpoint >> + * >> + */ >> +static void svc_tcp_handshake(struct svc_xprt *xprt) >> +{ >> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> + struct tls_handshake_args args = { >> + .ta_sock = svsk->sk_sock, >> + .ta_done = svc_tcp_handshake_done, >> + .ta_data = xprt, >> + }; >> + int ret; >> + >> + trace_svc_tls_upcall(xprt); >> + >> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >> + init_completion(&svsk->sk_handshake_done); >> + smp_wmb(); >> + >> + ret = tls_server_hello_x509(&args, GFP_KERNEL); >> + if (ret) { >> + trace_svc_tls_not_started(xprt); >> + goto out_failed; >> + } >> + >> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, >> + SVC_HANDSHAKE_TO); > > Just curious: is this 20s timeout mandated by the spec? The spec doesn't mandate a timeout. I simply wanted to guarantee forward progress. > It seems like a long time to block a kernel thread if so. It's about the same as the client side timeout, fwiw. > Do we need to be concerned > with DoS attacks here? Could a client initiate handshakes and then stop > communicating, forcing the server to tie up threads with these 20s > waits? I think a malicious client can do all kinds of similar things already. Do you have a particular timeout value in mind, or is there some other mechanism we can use to better bullet- proof this aspect of the handshake? I'm open to suggestion. >> + if (ret <= 0) { >> + if (tls_handshake_cancel(svsk->sk_sock)) { >> + trace_svc_tls_timed_out(xprt); >> + goto out_close; >> + } >> + } >> + >> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { >> + trace_svc_tls_unavailable(xprt); >> + goto out_close; >> + } >> + >> + /* >> + * Mark the transport ready in case the remote sent RPC >> + * traffic before the kernel received the handshake >> + * completion downcall. >> + */ >> + set_bit(XPT_DATA, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> + return; >> + >> +out_close: >> + set_bit(XPT_CLOSE, &xprt->xpt_flags); >> +out_failed: >> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + set_bit(XPT_DATA, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> +} >> + >> /* >> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo >> */ >> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { >> .xpo_has_wspace = svc_tcp_has_wspace, >> .xpo_accept = svc_tcp_accept, >> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, >> + .xpo_handshake = svc_tcp_handshake, >> }; >> >> static struct svc_xprt_class svc_tcp_class = { >> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) >> { >> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> >> + tls_handshake_cancel(svsk->sk_sock); >> + >> if (svsk->sk_sock->file) >> sockfd_put(svsk->sk_sock); >> else >> >> > > -- > Jeff Layton <jlayton@xxxxxxxxxx> -- Chuck Lever