On Tue, Jan 27, 2009 at 03:53:14PM -0800, Stephen Hemminger wrote: > Replace existing reader/writer lock with Read-Copy-Update to > elminate the overhead of a read lock on each incoming packet. > This should reduce the overhead of iptables especially on SMP > systems. > > The previous code used a reader-writer lock for two purposes. > The first was to ensure that the xt_table_info reference was not in > process of being changed. Since xt_table_info is only freed via one > routine, it was a direct conversion to RCU. > > The other use of the reader-writer lock was to to block changes > to counters while they were being read. This synchronization was > fixed by the previous patch. But still need to make sure table info > isn't going away. There are a couple of harmless races in reading > the counters, but we always give a valid values they just aren't > exactly frozen in time across all cpu's. Looks good from an RCU perspective! Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Stephen Hemminger <shemminger@xxxxxxxxxx> > > > --- > include/linux/netfilter/x_tables.h | 10 ++++++-- > net/ipv4/netfilter/arp_tables.c | 12 +++++----- > net/ipv4/netfilter/ip_tables.c | 12 +++++----- > net/ipv6/netfilter/ip6_tables.c | 12 +++++----- > net/netfilter/x_tables.c | 44 ++++++++++++++++++++++++++----------- > 5 files changed, 58 insertions(+), 32 deletions(-) > > --- a/include/linux/netfilter/x_tables.h 2009-01-27 15:33:10.791377313 -0800 > +++ b/include/linux/netfilter/x_tables.h 2009-01-27 15:46:26.024128742 -0800 > @@ -352,8 +352,8 @@ struct xt_table > /* What hooks you will enter on */ > unsigned int valid_hooks; > > - /* Lock for the curtain */ > - rwlock_t lock; > + /* Lock for curtain */ > + spinlock_t lock; > > /* Man behind the curtain... */ > struct xt_table_info *private; > @@ -383,6 +383,12 @@ struct xt_table_info > unsigned int hook_entry[NF_INET_NUMHOOKS]; > unsigned int underflow[NF_INET_NUMHOOKS]; > > + /* For the dustman... */ > + union { > + struct rcu_head rcu; > + struct work_struct work; > + }; > + > /* ipt_entry tables: one per CPU */ > /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ > void *entries[1]; > --- a/net/ipv4/netfilter/arp_tables.c 2009-01-27 15:45:34.566650540 -0800 > +++ b/net/ipv4/netfilter/arp_tables.c 2009-01-27 15:46:26.024128742 -0800 > @@ -237,8 +237,8 @@ unsigned int arpt_do_table(struct sk_buf > indev = in ? in->name : nulldevname; > outdev = out ? out->name : nulldevname; > > - read_lock_bh(&table->lock); > - private = table->private; > + rcu_read_lock_bh(); > + private = rcu_dereference(table->private); > table_base = (void *)private->entries[smp_processor_id()]; > e = get_entry(table_base, private->hook_entry[hook]); > back = get_entry(table_base, private->underflow[hook]); > @@ -313,7 +313,7 @@ unsigned int arpt_do_table(struct sk_buf > e = (void *)e + e->next_offset; > } > } while (!hotdrop); > - read_unlock_bh(&table->lock); > + rcu_read_unlock_bh(); > > if (hotdrop) > return NF_DROP; > @@ -1154,8 +1154,8 @@ static int do_add_counters(struct net *n > goto free; > } > > - write_lock_bh(&t->lock); > - private = t->private; > + rcu_read_lock_bh(); > + private = rcu_dereference(t->private); > if (private->number != num_counters) { > ret = -EINVAL; > goto unlock_up_free; > @@ -1170,7 +1170,7 @@ static int do_add_counters(struct net *n > paddc, > &i); > unlock_up_free: > - write_unlock_bh(&t->lock); > + rcu_read_unlock_bh(); > xt_table_unlock(t); > module_put(t->me); > free: > --- a/net/ipv4/netfilter/ip_tables.c 2009-01-27 15:45:05.766673246 -0800 > +++ b/net/ipv4/netfilter/ip_tables.c 2009-01-27 15:46:26.024128742 -0800 > @@ -347,9 +347,9 @@ ipt_do_table(struct sk_buff *skb, > mtpar.family = tgpar.family = NFPROTO_IPV4; > tgpar.hooknum = hook; > > - read_lock_bh(&table->lock); > + rcu_read_lock_bh(); > IP_NF_ASSERT(table->valid_hooks & (1 << hook)); > - private = table->private; > + private = rcu_dereference(table->private); > table_base = (void *)private->entries[smp_processor_id()]; > e = get_entry(table_base, private->hook_entry[hook]); > > @@ -447,7 +447,7 @@ ipt_do_table(struct sk_buff *skb, > } > } while (!hotdrop); > > - read_unlock_bh(&table->lock); > + rcu_read_unlock_bh(); > > #ifdef DEBUG_ALLOW_ALL > return NF_ACCEPT; > @@ -1399,8 +1399,8 @@ do_add_counters(struct net *net, void __ > goto free; > } > > - write_lock_bh(&t->lock); > - private = t->private; > + rcu_read_lock_bh(); > + private = rcu_dereference(t->private); > if (private->number != num_counters) { > ret = -EINVAL; > goto unlock_up_free; > @@ -1415,7 +1415,7 @@ do_add_counters(struct net *net, void __ > paddc, > &i); > unlock_up_free: > - write_unlock_bh(&t->lock); > + rcu_read_unlock_bh(); > xt_table_unlock(t); > module_put(t->me); > free: > --- a/net/ipv6/netfilter/ip6_tables.c 2009-01-27 15:45:22.262639173 -0800 > +++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-27 15:46:26.028128353 -0800 > @@ -373,9 +373,9 @@ ip6t_do_table(struct sk_buff *skb, > mtpar.family = tgpar.family = NFPROTO_IPV6; > tgpar.hooknum = hook; > > - read_lock_bh(&table->lock); > + rcu_read_lock_bh(); > IP_NF_ASSERT(table->valid_hooks & (1 << hook)); > - private = table->private; > + private = rcu_dereference(table->private); > table_base = (void *)private->entries[smp_processor_id()]; > e = get_entry(table_base, private->hook_entry[hook]); > > @@ -476,7 +476,7 @@ ip6t_do_table(struct sk_buff *skb, > #ifdef CONFIG_NETFILTER_DEBUG > ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; > #endif > - read_unlock_bh(&table->lock); > + rcu_read_unlock_bh(); > > #ifdef DEBUG_ALLOW_ALL > return NF_ACCEPT; > @@ -1430,8 +1430,8 @@ do_add_counters(struct net *net, void __ > goto free; > } > > - write_lock_bh(&t->lock); > - private = t->private; > + rcu_read_lock_bh(); > + private = rcu_dereference(t->private); > if (private->number != num_counters) { > ret = -EINVAL; > goto unlock_up_free; > @@ -1446,7 +1446,7 @@ do_add_counters(struct net *net, void __ > paddc, > &i); > unlock_up_free: > - write_unlock_bh(&t->lock); > + rcu_read_unlock_bh(); > xt_table_unlock(t); > module_put(t->me); > free: > --- a/net/netfilter/x_tables.c 2009-01-27 15:14:06.004743434 -0800 > +++ b/net/netfilter/x_tables.c 2009-01-27 15:48:07.194667206 -0800 > @@ -611,17 +611,37 @@ struct xt_table_info *xt_alloc_table_inf > } > EXPORT_SYMBOL(xt_alloc_table_info); > > -void xt_free_table_info(struct xt_table_info *info) > +/* callback to do free for vmalloc'd case */ > +static void xt_free_table_info_work(struct work_struct *arg) > +{ > + struct xt_table_info *info = container_of(arg, struct xt_table_info, work); > + unsigned int cpu; > + > + for_each_possible_cpu(cpu) > + vfree(info->entries[cpu]); > + kfree(info); > +} > + > +/* callback to do free after all cpu's are done */ > +static void xt_free_table_info_rcu(struct rcu_head *arg) > { > - int cpu; > + struct xt_table_info *info = container_of(arg, struct xt_table_info, rcu); > > - for_each_possible_cpu(cpu) { > - if (info->size <= PAGE_SIZE) > + if (info->size <= PAGE_SIZE) { > + unsigned int cpu; > + for_each_possible_cpu(cpu) > kfree(info->entries[cpu]); > - else > - vfree(info->entries[cpu]); > + kfree(info); > + } else { > + /* can't safely call vfree in current context */ > + INIT_WORK(&info->work, xt_free_table_info_work); > + schedule_work(&info->work); > } > - kfree(info); > +} > + > +void xt_free_table_info(struct xt_table_info *info) > +{ > + call_rcu(&info->rcu, xt_free_table_info_rcu); > } > EXPORT_SYMBOL(xt_free_table_info); > > @@ -671,20 +691,20 @@ xt_replace_table(struct xt_table *table, > struct xt_table_info *oldinfo, *private; > > /* Do the substitution. */ > - write_lock_bh(&table->lock); > + spin_lock_bh(&table->lock); > private = table->private; > /* Check inside lock: is the old number correct? */ > if (num_counters != private->number) { > duprintf("num_counters != table->private->number (%u/%u)\n", > num_counters, private->number); > - write_unlock_bh(&table->lock); > + spin_unlock_bh(&table->lock); > *error = -EAGAIN; > return NULL; > } > oldinfo = private; > - table->private = newinfo; > + rcu_assign_pointer(table->private, newinfo); > newinfo->initial_entries = oldinfo->initial_entries; > - write_unlock_bh(&table->lock); > + spin_unlock_bh(&table->lock); > > return oldinfo; > } > @@ -719,7 +739,7 @@ struct xt_table *xt_register_table(struc > > /* Simplifies replace_table code. */ > table->private = bootstrap; > - rwlock_init(&table->lock); > + spin_lock_init(&table->lock); > > if (!xt_replace_table(table, 0, newinfo, &ret)) > goto unlock; > > -- > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html