To avoid allocation of the conntrack extension area when possible, the default behaviour was changed to only allocate the event extension if a userspace program is subscribed to a notification group. Problem is that while 'conntrack -E' does enable the event allocation behind the scenes, 'conntrack -E expect' does not: no expectation events are delivered unless user sets "net.netfilter.nf_conntrack_events" back to 1 (always on). Fix the autodetection to also consider EXP type group. We need to track the 6 event groups (3+3, new/update/destroy for events and for expectations each) independently, else we'd disable events again if an expectation group becomes empty while there is still an active event group. Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") Reported-by: Yi Chen <yiche@xxxxxxxxxx> Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- include/net/netns/conntrack.h | 2 +- net/netfilter/nfnetlink.c | 83 ++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0677cd3de034..c396a3862e80 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,7 +95,7 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS - bool ctnetlink_has_listener; + u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c24b1240908f..9c44518cb70f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static DEFINE_SPINLOCK(nfnl_grp_active_lock); +#endif + struct nfnl_net { struct sock *nfnl; }; @@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } +static void nfnetlink_bind_event(struct net *net, unsigned int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + u8 v; + + /* All NFNLGRP_CONNTRACK_* group bits fit into u8. + * The other groups are not relevant and can be ignored. + */ + if (group >= 8) + return; + + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + v = READ_ONCE(net->ct.ctnetlink_has_listener); + if ((v & group_bit) == 0) { + v |= group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); + } + + spin_unlock(&nfnl_grp_active_lock); +#endif +} + static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group) if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - if (type == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - } -#endif + nfnetlink_bind_event(net, group); return 0; } static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return; - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - if (!nfnetlink_has_listeners(net, group)) - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + /* ctnetlink_has_listener is u8 */ + if (group >= 8) + return; + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + if (!nfnetlink_has_listeners(net, group)) { + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); + + v &= ~group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); } + spin_unlock(&nfnl_grp_active_lock); #endif } -- 2.35.1