This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx> Reviewed-by: Benjamin Herrenschmidt <benh@xxxxxxxxxx> --- Documentation/networking/ip-sysctl.rst | 20 ++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 3 files changed, 30 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c2ecc9894fd0..8e92f9b28aad 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -732,6 +732,26 @@ tcp_syncookies - INTEGER network connections you can set this knob to 2 to enable unconditionally generation of syncookies. +tcp_migrate_req - INTEGER + The incoming connection is tied to a specific listening socket when + the initial SYN packet is received during the three-way handshake. + When a listener is closed, in-flight request sockets during the + handshake and established sockets in the accept queue are aborted. + + If the listener has SO_REUSEPORT enabled, other listeners on the + same port should have been able to accept such connections. This + option makes it possible to migrate such child sockets to another + listener after close() or shutdown(). + + Default: 0 + + Note that the source and destination listeners MUST have the same + settings at the socket API level. If different applications listen + on the same port, disable this option or attach the + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program to select + the correct socket by bpf_sk_select_reuseport() or to cancel + migration by returning SK_DROP. + tcp_fastopen - INTEGER Enable TCP Fast Open (RFC7413) to send and accept data in the opening SYN packet. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f6af8d96d3c6..fd63c91addcc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -126,6 +126,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syn_retries; u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; + u8 sysctl_tcp_migrate_req; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a62934b9f15a..7bb013fcbf5f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -940,6 +940,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, }, #endif + { + .procname = "tcp_migrate_req", + .data = &init_net.ipv4.sysctl_tcp_migrate_req, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, { .procname = "tcp_reordering", .data = &init_net.ipv4.sysctl_tcp_reordering, -- 2.30.2