Hi Stefano, On Sat, 14 Jul 2018, Stefano Brivio wrote: > Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash > when flush/dump set in parallel") postponed decreasing set > reference counters to the RCU callback. > > An 'ipset del' command can terminate before the RCU grace period > is elapsed, and if sets are listed before then, the reference > counter shown in userspace will be wrong: > > # ipset create h hash:ip; ipset create l list:set; ipset add l > # ipset del l h; ipset list h > Name: h > Type: hash:ip > Revision: 4 > Header: family inet hashsize 1024 maxelem 65536 > Size in memory: 88 > References: 1 > Number of entries: 0 > Members: > # sleep 1; ipset list h > Name: h > Type: hash:ip > Revision: 4 > Header: family inet hashsize 1024 maxelem 65536 > Size in memory: 88 > References: 0 > Number of entries: 0 > Members: > > Fix this by making the reference count update synchronous again. > > As a result, when sets are listed, ip_set_name_byindex() might > now fetch a set whose reference count is already zero. Instead > of relying on the reference count to protect against concurrent > set renaming, grab ip_set_ref_lock as reader and copy the name, > while holding the same lock in ip_set_rename() as writer > instead. Thanks, patch is applied in the ipset git tree. Best regards, Jozsef > Reported-by: Li Shuang <shuali@xxxxxxxxxx> > Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") > Signed-off-by: Stefano Brivio <sbrivio@xxxxxxxxxx> > --- > v3: > - As suggested by Jozsef, don't hold ref_netlink in > ip_set_name_byindex(), grabbing ip_set_ref_lock as reader is > enough, if we hold ip_set_ref_lock as writer in ip_set_rename() > - Don't add __ip_set_get_netlink() back, we don't need it anymore > - Also as pointed out by Jozsef, in list_set_del() and > list_set_replace(), we should decrease the reference counter only > after we're done removing the set from or swapping it in the list > > v2: As pointed out by Jozsef, we can't assume that the nfnl mutex > is taken while dumping, so we need some explicit locking > to protect names of list:set members against changes > > include/linux/netfilter/ipset/ip_set.h | 2 +- > net/netfilter/ipset/ip_set_core.c | 23 +++++++++++------------ > net/netfilter/ipset/ip_set_list_set.c | 17 +++++++++++------ > 3 files changed, 23 insertions(+), 19 deletions(-) > > diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h > index 34fc80f3eb90..1d100efe74ec 100644 > --- a/include/linux/netfilter/ipset/ip_set.h > +++ b/include/linux/netfilter/ipset/ip_set.h > @@ -314,7 +314,7 @@ enum { > extern ip_set_id_t ip_set_get_byname(struct net *net, > const char *name, struct ip_set **set); > extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); > -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); > +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); > extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); > extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); > > diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c > index bc4bd247bb7d..fa15a831aeee 100644 > --- a/net/netfilter/ipset/ip_set_core.c > +++ b/net/netfilter/ipset/ip_set_core.c > @@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) > EXPORT_SYMBOL_GPL(ip_set_put_byindex); > > /* Get the name of a set behind a set index. > - * We assume the set is referenced, so it does exist and > - * can't be destroyed. The set cannot be renamed due to > - * the referencing either. > - * > + * Set itself is protected by RCU, but its name isn't: to protect against > + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the > + * name. > */ > -const char * > -ip_set_name_byindex(struct net *net, ip_set_id_t index) > +void > +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) > { > - const struct ip_set *set = ip_set_rcu_get(net, index); > + struct ip_set *set = ip_set_rcu_get(net, index); > > BUG_ON(!set); > - BUG_ON(set->ref == 0); > > - /* Referenced, so it's safe */ > - return set->name; > + read_lock_bh(&ip_set_ref_lock); > + strncpy(name, set->name, IPSET_MAXNAMELEN); > + read_unlock_bh(&ip_set_ref_lock); > } > EXPORT_SYMBOL_GPL(ip_set_name_byindex); > > @@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, > if (!set) > return -ENOENT; > > - read_lock_bh(&ip_set_ref_lock); > + write_lock_bh(&ip_set_ref_lock); > if (set->ref != 0) { > ret = -IPSET_ERR_REFERENCED; > goto out; > @@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, > strncpy(set->name, name2, IPSET_MAXNAMELEN); > > out: > - read_unlock_bh(&ip_set_ref_lock); > + write_unlock_bh(&ip_set_ref_lock); > return ret; > } > > diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c > index 072a658fde04..4eef55da0878 100644 > --- a/net/netfilter/ipset/ip_set_list_set.c > +++ b/net/netfilter/ipset/ip_set_list_set.c > @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu) > { > struct set_elem *e = container_of(rcu, struct set_elem, rcu); > struct ip_set *set = e->set; > - struct list_set *map = set->data; > > - ip_set_put_byindex(map->net, e->id); > ip_set_ext_destroy(set, e); > kfree(e); > } > @@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu) > static inline void > list_set_del(struct ip_set *set, struct set_elem *e) > { > + struct list_set *map = set->data; > + > set->elements--; > list_del_rcu(&e->list); > + ip_set_put_byindex(map->net, e->id); > call_rcu(&e->rcu, __list_set_del_rcu); > } > > static inline void > -list_set_replace(struct set_elem *e, struct set_elem *old) > +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) > { > + struct list_set *map = set->data; > + > list_replace_rcu(&old->list, &e->list); > + ip_set_put_byindex(map->net, old->id); > call_rcu(&old->rcu, __list_set_del_rcu); > } > > @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, > INIT_LIST_HEAD(&e->list); > list_set_init_extensions(set, ext, e); > if (n) > - list_set_replace(e, n); > + list_set_replace(set, e, n); > else if (next) > list_add_tail_rcu(&e->list, &next->list); > else if (prev) > @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set, > const struct list_set *map = set->data; > struct nlattr *atd, *nested; > u32 i = 0, first = cb->args[IPSET_CB_ARG0]; > + char name[IPSET_MAXNAMELEN]; > struct set_elem *e; > int ret = 0; > > @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set, > nested = ipset_nest_start(skb, IPSET_ATTR_DATA); > if (!nested) > goto nla_put_failure; > - if (nla_put_string(skb, IPSET_ATTR_NAME, > - ip_set_name_byindex(map->net, e->id))) > + ip_set_name_byindex(map->net, e->id, name); > + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) > goto nla_put_failure; > if (ip_set_put_extensions(skb, set, e, true)) > goto nla_put_failure; > -- > 2.15.1 > > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > - E-mail : kadlec@xxxxxxxxxxxxxxxxx, kadlecsik.jozsef@xxxxxxxxxxxxx PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html