Re: [PATCH v3] ipset: list:set: Decrease refcount synchronously on deletion and replace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Stefano,

On Sat, 14 Jul 2018, Stefano Brivio wrote:

> Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> when flush/dump set in parallel") postponed decreasing set
> reference counters to the RCU callback.
> 
> An 'ipset del' command can terminate before the RCU grace period
> is elapsed, and if sets are listed before then, the reference
> counter shown in userspace will be wrong:
> 
>  # ipset create h hash:ip; ipset create l list:set; ipset add l
>  # ipset del l h; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 1
>  Number of entries: 0
>  Members:
>  # sleep 1; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 0
>  Number of entries: 0
>  Members:
> 
> Fix this by making the reference count update synchronous again.
> 
> As a result, when sets are listed, ip_set_name_byindex() might
> now fetch a set whose reference count is already zero. Instead
> of relying on the reference count to protect against concurrent
> set renaming, grab ip_set_ref_lock as reader and copy the name,
> while holding the same lock in ip_set_rename() as writer
> instead.

Thanks, patch is applied in the ipset git tree.

Best regards,
Jozsef

> Reported-by: Li Shuang <shuali@xxxxxxxxxx>
> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
> Signed-off-by: Stefano Brivio <sbrivio@xxxxxxxxxx>
> ---
> v3:
>   - As suggested by Jozsef, don't hold ref_netlink in
>     ip_set_name_byindex(), grabbing ip_set_ref_lock as reader is
>     enough, if we hold ip_set_ref_lock as writer in ip_set_rename()
>   - Don't add __ip_set_get_netlink() back, we don't need it anymore
>   - Also as pointed out by Jozsef, in list_set_del() and
>     list_set_replace(), we should decrease the reference counter only
>     after we're done removing the set from or swapping it in the list
> 
> v2: As pointed out by Jozsef, we can't assume that the nfnl mutex
>     is taken while dumping, so we need some explicit locking
>     to protect names of list:set members against changes
> 
>  include/linux/netfilter/ipset/ip_set.h |  2 +-
>  net/netfilter/ipset/ip_set_core.c      | 23 +++++++++++------------
>  net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
>  3 files changed, 23 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
> index 34fc80f3eb90..1d100efe74ec 100644
> --- a/include/linux/netfilter/ipset/ip_set.h
> +++ b/include/linux/netfilter/ipset/ip_set.h
> @@ -314,7 +314,7 @@ enum {
>  extern ip_set_id_t ip_set_get_byname(struct net *net,
>  				     const char *name, struct ip_set **set);
>  extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
> -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
> +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
>  extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
>  extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..fa15a831aeee 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
>  EXPORT_SYMBOL_GPL(ip_set_put_byindex);
>  
>  /* Get the name of a set behind a set index.
> - * We assume the set is referenced, so it does exist and
> - * can't be destroyed. The set cannot be renamed due to
> - * the referencing either.
> - *
> + * Set itself is protected by RCU, but its name isn't: to protect against
> + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
> + * name.
>   */
> -const char *
> -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> +void
> +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
>  {
> -	const struct ip_set *set = ip_set_rcu_get(net, index);
> +	struct ip_set *set = ip_set_rcu_get(net, index);
>  
>  	BUG_ON(!set);
> -	BUG_ON(set->ref == 0);
>  
> -	/* Referenced, so it's safe */
> -	return set->name;
> +	read_lock_bh(&ip_set_ref_lock);
> +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> +	read_unlock_bh(&ip_set_ref_lock);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_name_byindex);
>  
> @@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
>  	if (!set)
>  		return -ENOENT;
>  
> -	read_lock_bh(&ip_set_ref_lock);
> +	write_lock_bh(&ip_set_ref_lock);
>  	if (set->ref != 0) {
>  		ret = -IPSET_ERR_REFERENCED;
>  		goto out;
> @@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
>  	strncpy(set->name, name2, IPSET_MAXNAMELEN);
>  
>  out:
> -	read_unlock_bh(&ip_set_ref_lock);
> +	write_unlock_bh(&ip_set_ref_lock);
>  	return ret;
>  }
>  
> diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> index 072a658fde04..4eef55da0878 100644
> --- a/net/netfilter/ipset/ip_set_list_set.c
> +++ b/net/netfilter/ipset/ip_set_list_set.c
> @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  {
>  	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
>  	struct ip_set *set = e->set;
> -	struct list_set *map = set->data;
>  
> -	ip_set_put_byindex(map->net, e->id);
>  	ip_set_ext_destroy(set, e);
>  	kfree(e);
>  }
> @@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  static inline void
>  list_set_del(struct ip_set *set, struct set_elem *e)
>  {
> +	struct list_set *map = set->data;
> +
>  	set->elements--;
>  	list_del_rcu(&e->list);
> +	ip_set_put_byindex(map->net, e->id);
>  	call_rcu(&e->rcu, __list_set_del_rcu);
>  }
>  
>  static inline void
> -list_set_replace(struct set_elem *e, struct set_elem *old)
> +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
>  {
> +	struct list_set *map = set->data;
> +
>  	list_replace_rcu(&old->list, &e->list);
> +	ip_set_put_byindex(map->net, old->id);
>  	call_rcu(&old->rcu, __list_set_del_rcu);
>  }
>  
> @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
>  	INIT_LIST_HEAD(&e->list);
>  	list_set_init_extensions(set, ext, e);
>  	if (n)
> -		list_set_replace(e, n);
> +		list_set_replace(set, e, n);
>  	else if (next)
>  		list_add_tail_rcu(&e->list, &next->list);
>  	else if (prev)
> @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
>  	const struct list_set *map = set->data;
>  	struct nlattr *atd, *nested;
>  	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
> +	char name[IPSET_MAXNAMELEN];
>  	struct set_elem *e;
>  	int ret = 0;
>  
> @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
>  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
>  		if (!nested)
>  			goto nla_put_failure;
> -		if (nla_put_string(skb, IPSET_ATTR_NAME,
> -				   ip_set_name_byindex(map->net, e->id)))
> +		ip_set_name_byindex(map->net, e->id, name);
> +		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
>  			goto nla_put_failure;
>  		if (ip_set_put_extensions(skb, set, e, true))
>  			goto nla_put_failure;
> -- 
> 2.15.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
E-mail  : kadlec@xxxxxxxxxxxxxxxxx, kadlecsik.jozsef@xxxxxxxxxxxxx
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux