Re: [PATCH v2] ipset: list:set: Decrease refcount synchronously on deletion and replace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Stefano,

On Sun, 8 Jul 2018, Stefano Brivio wrote:

> Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> when flush/dump set in parallel") postponed decreasing set
> reference counters to the RCU callback.
> 
> An 'ipset del' command can terminate before the RCU grace period
> is elapsed, and if sets are listed before then, the reference
> counter shown in userspace will be wrong:
> 
>  # ipset create h hash:ip; ipset create l list:set; ipset add l
>  # ipset del l h; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 1
>  Number of entries: 0
>  Members:
>  # sleep 1; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 0
>  Number of entries: 0
>  Members:
> 
> Fix this by making the reference count update synchronous again.
> 
> As a result, when sets are listed, ip_set_name_byindex() might
> now fetch a set whose reference count is already zero. Instead
> of relying on the reference count to protect against concurrent
> set renaming, grab the netlink refcount while copying the name,
> and avoid renaming if the netlink refcount is taken, in a
> similar way as it's already done for swap and delete operations.
> 
> This also reverts commit db268d4dfdb9 ("ipset: remove unused
> function __ip_set_get_netlink") and restores that function
> since we now need it.
> 
> Reported-by: Li Shuang <shuali@xxxxxxxxxx>
> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
> Signed-off-by: Stefano Brivio <sbrivio@xxxxxxxxxx>
> ---
> v2: As pointed out by Joszef, we can't assume that the nfnl mutex
>     is taken while dumping, so we need some explicit locking
>     to protect names of list:set members against changes
> 
>  include/linux/netfilter/ipset/ip_set.h |  2 +-
>  net/netfilter/ipset/ip_set_core.c      | 31 +++++++++++++++++++++----------
>  net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
>  3 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
> index 34fc80f3eb90..1d100efe74ec 100644
> --- a/include/linux/netfilter/ipset/ip_set.h
> +++ b/include/linux/netfilter/ipset/ip_set.h
> @@ -314,7 +314,7 @@ enum {
>  extern ip_set_id_t ip_set_get_byname(struct net *net,
>  				     const char *name, struct ip_set **set);
>  extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
> -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
> +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
>  extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
>  extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..4062c0396a02 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -527,6 +527,15 @@ __ip_set_put(struct ip_set *set)
>  /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
>   * a separate reference counter
>   */
> +static inline void
> +__ip_set_get_netlink(struct ip_set *set)
> +{
> +	write_lock_bh(&ip_set_ref_lock);
> +	BUG_ON(set->ref_netlink != 0);
> +	set->ref_netlink++;
> +	write_unlock_bh(&ip_set_ref_lock);
> +}
> +
>  static inline void
>  __ip_set_put_netlink(struct ip_set *set)
>  {
> @@ -693,21 +702,19 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
>  EXPORT_SYMBOL_GPL(ip_set_put_byindex);
>  
>  /* Get the name of a set behind a set index.
> - * We assume the set is referenced, so it does exist and
> - * can't be destroyed. The set cannot be renamed due to
> - * the referencing either.
> - *
> + * Set itself is protected by RCU, but its name isn't: to protect against
> + * renaming, grab its netlink refcount (see ip_set_rename()) and copy it.
>   */
> -const char *
> -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> +void
> +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
>  {
> -	const struct ip_set *set = ip_set_rcu_get(net, index);
> +	struct ip_set *set = ip_set_rcu_get(net, index);
>  
>  	BUG_ON(!set);
> -	BUG_ON(set->ref == 0);
>  
> -	/* Referenced, so it's safe */
> -	return set->name;
> +	__ip_set_get_netlink(set);
> +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> +	__ip_set_put_netlink(set);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_name_byindex);

These are not required: when we call ip_set_name_byindex(), the set is 
still referenced (set->ref cannot be zero), because it's the member of the
list type of set.
  
> @@ -1158,6 +1165,10 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
>  		ret = -IPSET_ERR_REFERENCED;
>  		goto out;
>  	}
> +	if (set->ref_netlink != 0) {
> +		ret = -EBUSY;
> +		goto out;
> +	}

This is also not required: the set is protected by the normal reference 
counter, so it cannot be renamed. When the set is just deleted from the 
list type of set, it can again be renamed :-).
  
>  	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
>  	for (i = 0; i < inst->ip_set_max; i++) {
> diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> index 072a658fde04..3a39f20ba6fd 100644
> --- a/net/netfilter/ipset/ip_set_list_set.c
> +++ b/net/netfilter/ipset/ip_set_list_set.c
> @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  {
>  	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
>  	struct ip_set *set = e->set;
> -	struct list_set *map = set->data;
>  
> -	ip_set_put_byindex(map->net, e->id);
>  	ip_set_ext_destroy(set, e);
>  	kfree(e);
>  }
> @@ -158,14 +156,20 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  static inline void
>  list_set_del(struct ip_set *set, struct set_elem *e)
>  {
> +	struct list_set *map = set->data;
> +
>  	set->elements--;
> +	ip_set_put_byindex(map->net, e->id);
>  	list_del_rcu(&e->list);
>  	call_rcu(&e->rcu, __list_set_del_rcu);
>  }
>  
>  static inline void
> -list_set_replace(struct set_elem *e, struct set_elem *old)
> +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
>  {
> +	struct list_set *map = set->data;
> +
> +	ip_set_put_byindex(map->net, old->id);
>  	list_replace_rcu(&old->list, &e->list);
>  	call_rcu(&old->rcu, __list_set_del_rcu);
>  }
> @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
>  	INIT_LIST_HEAD(&e->list);
>  	list_set_init_extensions(set, ext, e);
>  	if (n)
> -		list_set_replace(e, n);
> +		list_set_replace(set, e, n);
>  	else if (next)
>  		list_add_tail_rcu(&e->list, &next->list);
>  	else if (prev)

I believe the only modifications which are required are up till this point 
in ip_set_list_set.c: ip_set_put_byindex(), moved outside of the 
call_rcu() call. However, the ip_set_put_byindex() calls should be placed 
after list_del_rcu() and list_replace_rcu().

> @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
>  	const struct list_set *map = set->data;
>  	struct nlattr *atd, *nested;
>  	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
> +	char name[IPSET_MAXNAMELEN];
>  	struct set_elem *e;
>  	int ret = 0;
>  
> @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
>  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
>  		if (!nested)
>  			goto nla_put_failure;
> -		if (nla_put_string(skb, IPSET_ATTR_NAME,
> -				   ip_set_name_byindex(map->net, e->id)))
> +		ip_set_name_byindex(map->net, e->id, name);
> +		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
>  			goto nla_put_failure;
>  		if (ip_set_put_extensions(skb, set, e, true))
>  			goto nla_put_failure;

And these lines are not required either.

Best regards,
Jozsef
-
E-mail  : kadlec@xxxxxxxxxxxxxxxxx, kadlecsik.jozsef@xxxxxxxxxxxxx
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux