Re: [PATCH 10/14] netfilter: ipset: Introduce RCU locking in the list type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2 Dec 2014, Pablo Neira Ayuso wrote:

> On Sun, Nov 30, 2014 at 07:57:01PM +0100, Jozsef Kadlecsik wrote:
> > Signed-off-by: Jozsef Kadlecsik <kadlec@xxxxxxxxxxxxxxxxx>
> > ---
> >  net/netfilter/ipset/ip_set_list_set.c | 386 ++++++++++++++++------------------
> >  1 file changed, 182 insertions(+), 204 deletions(-)
> > 
> > diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> > index f8f6828..323115a 100644
> > --- a/net/netfilter/ipset/ip_set_list_set.c
> > +++ b/net/netfilter/ipset/ip_set_list_set.c
> > @@ -9,6 +9,7 @@
> >  
> >  #include <linux/module.h>
> >  #include <linux/ip.h>
> > +#include <linux/rculist.h>
> >  #include <linux/skbuff.h>
> >  #include <linux/errno.h>
> >  
> > @@ -27,6 +28,8 @@ MODULE_ALIAS("ip_set_list:set");
> >  
> >  /* Member elements  */
> >  struct set_elem {
> > +	struct rcu_head rcu;
> > +	struct list_head list;
> 
> I think rcu_barrier() in the module removal path is missing to make
> sure call_rcu() is called before the module is gone.

The module can be removed only when there isn't a single set of the given 
type. That means there are no elements to be removed by kfree_rcu(). 
Therefore I think rcu_barrier() is not required in the module removal 
path.

> > @@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
> >  {
> >  	struct list_set *map = set->data;
> >  	struct set_elem *e;
> > -	u32 i;
> >  	int ret;
> >  
> > -	for (i = 0; i < map->size; i++) {
> > -		e = list_set_elem(set, map, i);
> > -		if (e->id == IPSET_INVALID_ID)
> > -			return 0;
> > +	list_for_each_entry_rcu(e, &map->members, list) {
> 
> >From net/netfilter/ipset/ip_set_core.c I can see this kadd() will be
> called under spin_lock_bh(), so you can just use
> list_for_each_entry(). The _rcu() variant protects the reader side,
> but this code is only invoked from the writer side (no changes are
> guaranteed to happen there).

Yes, you are right! I'll correct it.

> >  static int
> >  list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> > @@ -282,60 +240,82 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> >  {
> >  	struct list_set *map = set->data;
> >  	struct set_adt_elem *d = value;
> > -	struct set_elem *e;
> > +	struct set_elem *e, *n, *prev, *next;
> >  	bool flag_exist = flags & IPSET_FLAG_EXIST;
> > -	u32 i, ret = 0;
> >  
> >  	if (SET_WITH_TIMEOUT(set))
> >  		set_cleanup_entries(set);
> >  
> > -	/* Check already added element */
> > -	for (i = 0; i < map->size; i++) {
> > -		e = list_set_elem(set, map, i);
> > -		if (e->id == IPSET_INVALID_ID)
> > -			goto insert;
> > -		else if (e->id != d->id)
> > +	/* Find where to add the new entry */
> > +	n = prev = next = NULL;
> > +	list_for_each_entry(e, &map->members, list) {
> > +		if (SET_WITH_TIMEOUT(set) &&
> > +		    ip_set_timeout_expired(ext_timeout(e, set)))
> >  			continue;
> > -
> > -		if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
> > -		    (d->before < 0 &&
> > -		     (i == 0 || !id_eq(set, i - 1, d->refid))))
> > -			/* Before/after doesn't match */
> > +		else if (d->id == e->id)
> > +			n = e;
> > +		else if (d->before == 0 || e->id != d->refid)
> > +			continue;
> > +		else if (d->before > 0)
> > +			next = e;
> > +		else
> > +			prev = e;
> > +	}
> > +	/* Re-add already existing element */
> > +	if (n) {
> > +		if ((d->before > 0 && !next) ||
> > +		    (d->before < 0 && !prev))
> >  			return -IPSET_ERR_REF_EXIST;
> >  		if (!flag_exist)
> > -			/* Can't re-add */
> >  			return -IPSET_ERR_EXIST;
> >  		/* Update extensions */
> > -		ip_set_ext_destroy(set, e);
> > +		ip_set_ext_destroy(set, n);
> > +		list_set_init_extensions(set, ext, n);
> >  
> > -		if (SET_WITH_TIMEOUT(set))
> > -			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
> > -		if (SET_WITH_COUNTER(set))
> > -			ip_set_init_counter(ext_counter(e, set), ext);
> > -		if (SET_WITH_COMMENT(set))
> > -			ip_set_init_comment(ext_comment(e, set), ext);
> > -		if (SET_WITH_SKBINFO(set))
> > -			ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
> >  		/* Set is already added to the list */
> >  		ip_set_put_byindex(map->net, d->id);
> >  		return 0;
> >  	}
> > -insert:
> > -	ret = -IPSET_ERR_LIST_FULL;
> > -	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
> > -		e = list_set_elem(set, map, i);
> > -		if (e->id == IPSET_INVALID_ID)
> > -			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
> > -				: list_set_add(set, i, d, ext);
> > -		else if (e->id != d->refid)
> > -			continue;
> > -		else if (d->before > 0)
> > -			ret = list_set_add(set, i, d, ext);
> > -		else if (i + 1 < map->size)
> > -			ret = list_set_add(set, i + 1, d, ext);
> > +	/* Add new entry */
> > +	if (d->before == 0) {
> > +		/* Append  */
> > +		n = list_empty(&map->members) ? NULL :
> > +		    list_last_entry(&map->members, struct set_elem, list);
> > +	} else if (d->before > 0) {
> > +		/* Insert after next element */
> > +		if (!list_is_last(&next->list, &map->members))
> > +			n = list_next_entry(next, list);
> > +	} else {
> > +		/* Insert before prev element */
> > +		if (prev->list.prev != &map->members)
> > +			n = list_prev_entry(prev, list);
> >  	}
> > -
> > -	return ret;
> > +	/* Can we replace a timed out entry? */
> > +	if (n != NULL &&
> > +	    !(SET_WITH_TIMEOUT(set) &&
> > +	      ip_set_timeout_expired(ext_timeout(n, set))))
> > +		n =  NULL;
> > +
> > +	e = kzalloc(set->dsize, GFP_KERNEL);
> > +	if (!e)
> > +		return -ENOMEM;
> > +	e->id = d->id;
> > +	INIT_LIST_HEAD(&e->list);
> > +	list_set_init_extensions(set, ext, e);
> > +	if (n)
> > +		list_set_replace(set, e, n);
> > +	else if (next)
> > +		list_add_tail_rcu(&e->list, &next->list);
> > +	else if (prev)
> > +		list_add_rcu(&e->list, &prev->list);
> > +	else
> > +		list_add_tail_rcu(&e->list, &map->members);
> > +	spin_unlock_bh(&set->lock);
> > +
> > +	synchronize_rcu_bh();
> 
> I suspect you don't need this. What is your intention here?

Here the userspace adds/deletes/replaces an element in the list type of 
set and in the meantime the kernel module can traverse the same linked 
list. In the replace case we remove and delete the old entry, therefore 
the call to synchronize_rcu_bh(). That could be called from a condition 
then, to express the case.

Best regards,
Jozsef
-
E-mail  : kadlec@xxxxxxxxxxxxxxxxx, kadlecsik.jozsef@xxxxxxxxxxxxx
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux