On Mon, Aug 31, 2009 at 02:22:26PM +0200, Patrick McHardy wrote: > Simon Horman wrote: > > A pointed out by Shin Hong, IPVS doesn't always use atomic operations > > in an atomic manner. While this seems unlikely to be manifest in > > strange behaviour, it seems appropriate to clean this up. > > > > Cc: 홍신 shin hong <hongshin@xxxxxxxxx> > > Signed-off-by: Simon Horman <horms@xxxxxxxxxxxx> > > Applied, thanks. > > > if (af == AF_INET && > > (ip_vs_sync_state & IP_VS_STATE_MASTER) && > > (((cp->protocol != IPPROTO_TCP || > > cp->state == IP_VS_TCP_S_ESTABLISHED) && > > - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] > > + (pkts % sysctl_ip_vs_sync_threshold[1] > > It seems that proc_do_sync_threshold() should check whether this value > is zero. The current checks also look racy since incorrect values are > first updated, then overwritten again. Hi, I'm wondering if an approach along the lines of the following is valid. The idea is that the value in the ctl_table is essentially a scratch value that is used by the parser and then copied into ip_vs_sync_threshold if it is valid. I'm concerned that there are atomicity issues surrounding writing ip_vs_sync_threshold while there might be readers. diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 98978e7..28d0c4f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -774,8 +774,8 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, extern int sysctl_ip_vs_cache_bypass; extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_expire_quiescent_template; -extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; +extern int ip_vs_sync_threshold[2]; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b95699f..f3572b6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1362,8 +1362,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, (ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ip_vs_sync_threshold[1] == ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fba2892..8a9ff21 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -76,6 +76,11 @@ static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); /* number of virtual services */ static int ip_vs_num_services = 0; +/* threshold handling */ +static int ip_vs_sync_threshold_min = 0; +static int ip_vs_sync_threshold_max = INT_MAX; +int ip_vs_sync_threshold[2] = { 3, 50 }; + /* sysctl variables */ static int sysctl_ip_vs_drop_entry = 0; static int sysctl_ip_vs_drop_packet = 0; @@ -85,7 +90,7 @@ static int sysctl_ip_vs_am_droprate = 10; int sysctl_ip_vs_cache_bypass = 0; int sysctl_ip_vs_expire_nodest_conn = 0; int sysctl_ip_vs_expire_quiescent_template = 0; -int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; +static int sysctl_ip_vs_sync_threshold[2]; int sysctl_ip_vs_nat_icmp_send = 0; @@ -1521,17 +1526,12 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; - int val[2]; int rc; - /* backup the value first */ - memcpy(val, valp, sizeof(val)); - - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { - /* Restore the correct value */ - memcpy(valp, val, sizeof(val)); - } + rc = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + if (write && (valp[0] < valp[1])) + memcpy(ip_vs_sync_threshold, valp, + sizeof(ip_vs_sync_threshold)); return rc; } @@ -1698,6 +1698,8 @@ static struct ctl_table vs_vars[] = { .maxlen = sizeof(sysctl_ip_vs_sync_threshold), .mode = 0644, .proc_handler = proc_do_sync_threshold, + .extra1 = &ip_vs_sync_threshold_min, + .extra2 = &ip_vs_sync_threshold_max, }, { .procname = "nat_icmp_send", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e177f0d..d3322fb 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -438,7 +438,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + atomic_set(&cp->in_pkts, ip_vs_sync_threshold[0]); cp->state = state; cp->old_state = cp->state; /* -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html