Allow the default load factor for the connection and service tables to be configured. Signed-off-by: Julian Anastasov <ja@xxxxxx> --- Documentation/networking/ipvs-sysctl.rst | 33 +++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 72 ++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index 3fb5fa142eef..ee9f70f446b4 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -29,6 +29,30 @@ backup_only - BOOLEAN If set, disable the director function while the server is in backup mode to avoid packet loops for DR/TUN methods. +conn_lfactor - INTEGER + -4 - default + Valid range: -8 (larger table) .. 8 (smaller table) + + Controls the sizing of the connection hash table based on the + load factor (number of connections per table buckets): + 2^conn_lfactor = nodes / buckets + As result, the table grows if load increases and shrinks when + load decreases in the range of 2^8 - 2^conn_tab_bits (module + parameter). + The value is a shift count where negative values select + buckets = (connection hash nodes << -value) while positive + values select buckets = (connection hash nodes >> value). The + negative values reduce the collisions and reduce the time for + lookups but increase the table size. Positive values will + tolerate load above 100% when using smaller table is + preferred with the cost of more collisions. If using NAT + connections consider decreasing the value with one because + they add two nodes in the hash table. + + Example: + -4: grow if load goes above 6% (buckets = nodes * 16) + 2: grow if load goes above 400% (buckets = nodes / 4) + conn_reuse_mode - INTEGER 1 - default @@ -219,6 +243,15 @@ secure_tcp - INTEGER The value definition is the same as that of drop_entry and drop_packet. +svc_lfactor - INTEGER + -3 - default + Valid range: -8 (larger table) .. 8 (smaller table) + + Controls the sizing of the service hash table based on the + load factor (number of services per table buckets). The table + will grow and shrink in the range of 2^4 - 2^20. + See conn_lfactor for explanation. + sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period default 3 50 diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b1c638f83559..a0666dc998fb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2431,6 +2431,60 @@ static int ipvs_proc_run_estimation(struct ctl_table *table, int write, return ret; } +static int ipvs_proc_conn_lfactor(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_dereference_protected(ipvs->conn_tab, 1)) + mod_delayed_work(system_unbound_wq, + &ipvs->conn_resize_work, 0); + } + } + return ret; +} + +static int ipvs_proc_svc_lfactor(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_dereference_protected(ipvs->svc_table, 1)) + mod_delayed_work(system_unbound_wq, + &ipvs->svc_resize_work, 0); + } + } + return ret; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -2619,6 +2673,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = ipvs_proc_est_nice, }, + { + .procname = "conn_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_conn_lfactor, + }, + { + .procname = "svc_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_svc_lfactor, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4856,6 +4922,12 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_est_nice; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_conn_lfactor; + + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_svc_lfactor; + #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) -- 2.44.0