[PATCH net-next 2/4] net/smc: handle sockopt TCP_NODELAY

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ursula Braun <ubraun@xxxxxxxxxxxxxxxxxx>

TCP sockopts must not interfere with the CLC handshake on the
CLC socket. Therefore, we defer some of them till the CLC
handshake has completed, like resetting TCP_NODELAY.

While touching setsockopt, the TCP_FASTOPEN sockopts are
ignored, since SMC-connection setup is based on the TCP
three-way-handshake.

Signed-off-by: Ursula Braun <ubraun@xxxxxxxxxxxxxxxxxx>
---
 net/smc/af_smc.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc.h    |   4 ++
 2 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5f8046c62d90..96f4d182f998 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -377,6 +377,22 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
+/* deferred setsockopt's not desired during clc handshake */
+static void smc_apply_deferred_sockopts(struct smc_sock *smc)
+{
+	struct smc_sock *opt_smc = smc;
+	u8 val;
+
+	if (smc->listen_smc)
+		opt_smc = smc->listen_smc;
+	if (opt_smc->deferred_nodelay_reset) {
+		val = 0;
+		kernel_setsockopt(smc->clcsock, SOL_TCP, TCP_NODELAY, &val,
+				  sizeof(val));
+		opt_smc->deferred_nodelay_reset = 0;
+	}
+}
+
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
@@ -506,6 +522,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 	smc_tx_init(smc);
 
 out_connected:
+	smc_apply_deferred_sockopts(smc);
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
@@ -908,6 +925,7 @@ static void smc_listen_work(struct work_struct *work)
 	mutex_unlock(&smc_create_lgr_pending);
 
 out_connected:
+	smc_apply_deferred_sockopts(new_smc);
 	sk_refcnt_debug_inc(newsmcsk);
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
@@ -1280,9 +1298,60 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
+	int val;
 
 	smc = smc_sk(sk);
+	if (smc->use_fallback || level != SOL_TCP)
+		goto clcsock;
+
+	/* level SOL_TCP */
+	switch (optname) {
+	case TCP_CONGESTION:
+	case TCP_ULP:
+		/* sockopts without integer value; do not apply to SMC */
+		goto clcsock;
+	default:
+		break;
+	}
 
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+	switch (optname) {
+	case TCP_NODELAY:
+		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+			release_sock(sk);
+			goto clcsock;
+		}
+		/* for the CLC-handshake TCP_NODELAY is desired;
+		 * in case of fallback to TCP, a nodelay reset is
+		 * triggered afterwards.
+		 */
+		if (val)
+			smc->deferred_nodelay_reset = 0;
+		else
+			smc->deferred_nodelay_reset = 1;
+		break;
+	case TCP_FASTOPEN:
+	case TCP_FASTOPEN_CONNECT:
+	case TCP_FASTOPEN_KEY:
+	case TCP_FASTOPEN_NO_COOKIE:
+		/* ignore these options; 3-way handshake shouldn't be
+		 * bypassed with SMC
+		 */
+		break;
+	default:
+		/* apply option to the CLC socket */
+		release_sock(sk);
+		goto clcsock;
+	}
+	release_sock(sk);
+	return 0;
+
+clcsock:
 	/* generic setsockopts reaching us here always apply to the
 	 * CLC socket
 	 */
@@ -1293,10 +1362,41 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 static int smc_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
+	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
+	int val, len;
 
-	smc = smc_sk(sock->sk);
-	/* socket options apply to the CLC socket */
+	smc = smc_sk(sk);
+
+	if (smc->use_fallback || level != SOL_TCP)
+		goto clcsock;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+	len = min_t(unsigned int, len, sizeof(int));
+	if (len < 0)
+		return -EINVAL;
+
+	/* level SOL_TCP */
+	switch (optname) {
+	case TCP_NODELAY:
+		if (smc->deferred_nodelay_reset)
+			val = 0;
+		else
+			goto clcsock;
+		break;
+	default:
+		goto clcsock;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+
+clcsock:
+	/* socket options applying to the CLC socket */
 	return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
 					     optval, optlen);
 }
@@ -1387,6 +1487,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
 	struct smc_sock *smc;
 	struct sock *sk;
+	u8 val = 1;
 	int rc;
 
 	rc = -ESOCKTNOSUPPORT;
@@ -1412,6 +1513,10 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 		sk_common_release(sk);
 		goto out;
 	}
+	/* clc handshake should run with disabled Nagle algorithm */
+	kernel_setsockopt(smc->clcsock, SOL_TCP, TCP_NODELAY, &val,
+			  sizeof(val));
+	smc->deferred_nodelay_reset = 1; /* TCP_NODELAY is not the default */
 	smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
 	smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
 
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e4829a2f46ba..6dfc1c90bed2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -185,6 +185,10 @@ struct smc_sock {				/* smc sock container */
 						 * started, waiting for unsent
 						 * data to be sent
 						 */
+	u8			deferred_nodelay_reset : 1;
+						/* defer Nagle after CLC
+						 * handshake
+						 */
 };
 
 static inline struct smc_sock *smc_sk(const struct sock *sk)
-- 
2.13.5

--
To unsubscribe from this list: send the line "unsubscribe linux-s390" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Kernel Development]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite Info]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Linux Media]     [Device Mapper]

  Powered by Linux