Re: [PATCH] netfilter: remove extra timer from ecache extension

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 30, 2012 at 09:16:05PM +0100, Florian Westphal wrote:
> This brings the (per-conntrack) ecache extension back to 24 bytes in
> size (was 112 byte on x86_64 with lockdep on).

I like this.

> Instead we use a per-ns tasklet to re-trigger event delivery.
> When we enqueue a ct entry into the dying list, the tasklet
> is scheduled.
> 
> The tasklet will then deliver up to 20 entries.  It will
> re-sched itself if not all the pending events could be delivered.

I would like to give a test to this patch in my testbed.

And I wonder if we can make it better with some timer-based garbage
collector that randomly / adaptively runs to give tries to deliver
events.

I remember that insisting too often in the delivery of missed events
does not make any good.

> While at it, dying list handling is moved into ecache.c, since its
> only revlevant if ct events are enabled.

I like this too. That code really belongs where you moved it.

> Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
> ---
>  Note: Conflicts with "improve conntrack object traceability".
> 
>  The patch assumes the dying list only contains entries where the delete
>  event has not been delivered yet.
> 
>  With that patch, all conntracks are put on the dying list, including
>  those who are about to be free'd.
> 
>  I THINK that this is fixable by skipping dying-list entries with
>  IPS_DYING_BIT set.  However, this will increase the tasket workload.

I think you will mostly find entries that are waiting for its event to
be delivered. So playing with IPS_DYING_BIT seems the right way to go
to me.

>
>  Any other ideas?
> 
>  include/net/netfilter/nf_conntrack.h        |    1 -
>  include/net/netfilter/nf_conntrack_ecache.h |    9 +++-
>  include/net/netns/conntrack.h               |    5 +-
>  net/netfilter/nf_conntrack_core.c           |   58 -------------------
>  net/netfilter/nf_conntrack_ecache.c         |   80 +++++++++++++++++++++++---
>  5 files changed, 82 insertions(+), 71 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index f1494fe..e1cc862 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -182,7 +182,6 @@ __nf_conntrack_find(struct net *net, u16 zone,
>  
>  extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
>  extern void nf_ct_delete_from_lists(struct nf_conn *ct);
> -extern void nf_ct_insert_dying_list(struct nf_conn *ct);
>  
>  extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
>  
> diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
> index 5654d29..4b31c00 100644
> --- a/include/net/netfilter/nf_conntrack_ecache.h
> +++ b/include/net/netfilter/nf_conntrack_ecache.h
> @@ -18,7 +18,6 @@ struct nf_conntrack_ecache {
>  	u16 ctmask;		/* bitmask of ct events to be delivered */
>  	u16 expmask;		/* bitmask of expect events to be delivered */
>  	u32 portid;		/* netlink portid of destroyer */
> -	struct timer_list timeout;
>  };
>  
>  static inline struct nf_conntrack_ecache *
> @@ -207,6 +206,8 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event,
>  	nf_ct_expect_event_report(event, exp, 0, 0);
>  }
>  
> +extern void nf_ct_insert_dying_list(struct nf_conn *ct);
> +extern void nf_ct_release_dying_list(struct net *net);
>  extern int nf_conntrack_ecache_init(struct net *net);
>  extern void nf_conntrack_ecache_fini(struct net *net);
>  
> @@ -232,6 +233,12 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
>   					     u32 portid,
>   					     int report) {}
>  
> +static inline void nf_ct_insert_dying_list(struct nf_conn *ct)
> +{
> +	WARN_ON_ONCE(1); /* never called when CONFIG_NF_CONNTRACK_EVENTS=n */
> +}
> +static inline void nf_ct_release_dying_list(struct net *net) {}
> +
>  static inline int nf_conntrack_ecache_init(struct net *net)
>  {
>  	return 0;
> diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
> index a1d83cc..0cef968 100644
> --- a/include/net/netns/conntrack.h
> +++ b/include/net/netns/conntrack.h
> @@ -4,6 +4,7 @@
>  #include <linux/list.h>
>  #include <linux/list_nulls.h>
>  #include <linux/atomic.h>
> +#include <linux/interrupt.h>
>  #include <linux/netfilter/nf_conntrack_tcp.h>
>  
>  struct ctl_table_header;
> @@ -71,11 +72,13 @@ struct netns_ct {
>  	struct hlist_head	*expect_hash;
>  	struct hlist_nulls_head	unconfirmed;
>  	struct hlist_nulls_head	dying;
> +#ifdef CONFIG_NF_CONNTRACK_EVENTS
> +	struct tasklet_struct dying_tasklet;
> +#endif
>  	struct ip_conntrack_stat __percpu *stat;
>  	struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
>  	struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
>  	int			sysctl_events;
> -	unsigned int		sysctl_events_retry_timeout;
>  	int			sysctl_acct;
>  	int			sysctl_tstamp;
>  	int			sysctl_checksum;
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index 0f241be..09c9046 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -251,49 +251,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
>  }
>  EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
>  
> -static void death_by_event(unsigned long ul_conntrack)
> -{
> -	struct nf_conn *ct = (void *)ul_conntrack;
> -	struct net *net = nf_ct_net(ct);
> -	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
> -
> -	BUG_ON(ecache == NULL);
> -
> -	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
> -		/* bad luck, let's retry again */
> -		ecache->timeout.expires = jiffies +
> -			(random32() % net->ct.sysctl_events_retry_timeout);
> -		add_timer(&ecache->timeout);
> -		return;
> -	}
> -	/* we've got the event delivered, now it's dying */
> -	set_bit(IPS_DYING_BIT, &ct->status);
> -	spin_lock(&nf_conntrack_lock);
> -	hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
> -	spin_unlock(&nf_conntrack_lock);
> -	nf_ct_put(ct);
> -}
> -
> -void nf_ct_insert_dying_list(struct nf_conn *ct)
> -{
> -	struct net *net = nf_ct_net(ct);
> -	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
> -
> -	BUG_ON(ecache == NULL);
> -
> -	/* add this conntrack to the dying list */
> -	spin_lock_bh(&nf_conntrack_lock);
> -	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
> -			     &net->ct.dying);
> -	spin_unlock_bh(&nf_conntrack_lock);
> -	/* set a new timer to retry event delivery */
> -	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
> -	ecache->timeout.expires = jiffies +
> -		(random32() % net->ct.sysctl_events_retry_timeout);
> -	add_timer(&ecache->timeout);
> -}
> -EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
> -
>  static void death_by_timeout(unsigned long ul_conntrack)
>  {
>  	struct nf_conn *ct = (void *)ul_conntrack;
> @@ -1311,21 +1268,6 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
>  }
>  EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
>  
> -static void nf_ct_release_dying_list(struct net *net)
> -{
> -	struct nf_conntrack_tuple_hash *h;
> -	struct nf_conn *ct;
> -	struct hlist_nulls_node *n;
> -
> -	spin_lock_bh(&nf_conntrack_lock);
> -	hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
> -		ct = nf_ct_tuplehash_to_ctrack(h);
> -		/* never fails to remove them, no listeners at this point */
> -		nf_ct_kill(ct);
> -	}
> -	spin_unlock_bh(&nf_conntrack_lock);
> -}
> -
>  static int untrack_refs(void)
>  {
>  	int cnt = 0, cpu;
> diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
> index de9781b..5bc0403 100644
> --- a/net/netfilter/nf_conntrack_ecache.c
> +++ b/net/netfilter/nf_conntrack_ecache.c
> @@ -27,6 +27,38 @@
>  
>  static DEFINE_MUTEX(nf_ct_ecache_mutex);
>  
> +static void dying_tasklet_retry_events(unsigned long ctnetns)
> +{
> +	struct netns_ct *ctnet = (void *) ctnetns;
> +	struct nf_conntrack_tuple_hash *h;
> +	struct hlist_nulls_node *n;
> +	struct nf_conn *ct;
> +	int err = 0;
> +	int budget = 20;
> +
> +	hlist_nulls_for_each_entry(h, n, &ctnet->dying, hnnode) {
> +		ct = nf_ct_tuplehash_to_ctrack(h);
> +
> +		err = nf_conntrack_event(IPCT_DESTROY, ct);
> +		if (err)
> +			break;
> +		spin_lock_bh(&nf_conntrack_lock);
> +		hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
> +		spin_unlock_bh(&nf_conntrack_lock);
> +
> +		/* we've got the event delivered, now it's dying */
> +		set_bit(IPS_DYING_BIT, &ct->status);
> +
> +		nf_ct_put(ct);
> +		if (--budget == 0)
> +			break;
> +	}
> +
> +	/* err or budget exhausted? -> dying list not empty -- resched. */
> +	if (err || budget == 0)
> +		tasklet_schedule(&ctnet->dying_tasklet);
> +}
> +
>  /* deliver cached events and clear cache entry - must be called with locally
>   * disabled softirqs */
>  void nf_ct_deliver_cached_events(struct nf_conn *ct)
> @@ -81,6 +113,39 @@ out_unlock:
>  }
>  EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
>  
> +void nf_ct_insert_dying_list(struct nf_conn *ct)
> +{
> +	struct net *net = nf_ct_net(ct);
> +
> +	/* add this conntrack to the dying list */
> +	spin_lock_bh(&nf_conntrack_lock);
> +	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
> +			     &net->ct.dying);
> +	spin_unlock_bh(&nf_conntrack_lock);
> +
> +	/* retry event delivery */
> +	tasklet_schedule(&net->ct.dying_tasklet);
> +}
> +EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
> +
> +void nf_ct_release_dying_list(struct net *net)
> +{
> +	struct nf_conntrack_tuple_hash *h;
> +	struct nf_conn *ct;
> +	struct hlist_nulls_node *n;
> +
> +	/* no listeners at this point */
> +	hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
> +		ct = nf_ct_tuplehash_to_ctrack(h);
> +
> +		spin_lock_bh(&nf_conntrack_lock);
> +		hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
> +		spin_unlock_bh(&nf_conntrack_lock);
> +
> +		nf_ct_put(ct);
> +	}
> +}
> +
>  int nf_conntrack_register_notifier(struct net *net,
>  				   struct nf_ct_event_notifier *new)
>  {
> @@ -155,7 +220,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
>  
>  #define NF_CT_EVENTS_DEFAULT 1
>  static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
> -static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
>  
>  #ifdef CONFIG_SYSCTL
>  static struct ctl_table event_sysctl_table[] = {
> @@ -166,13 +230,6 @@ static struct ctl_table event_sysctl_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec,
>  	},
> -	{
> -		.procname	= "nf_conntrack_events_retry_timeout",
> -		.data		= &init_net.ct.sysctl_events_retry_timeout,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
>  	{}
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -194,7 +251,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
>  		goto out;
>  
>  	table[0].data = &net->ct.sysctl_events;
> -	table[1].data = &net->ct.sysctl_events_retry_timeout;
>  
>  	net->ct.event_sysctl_header =
>  		register_net_sysctl(net, "net/netfilter", table);
> @@ -234,7 +290,9 @@ int nf_conntrack_ecache_init(struct net *net)
>  	int ret;
>  
>  	net->ct.sysctl_events = nf_ct_events;
> -	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
> +
> +	tasklet_init(&net->ct.dying_tasklet, dying_tasklet_retry_events,
> +						(unsigned long) &net->ct);
>  
>  	if (net_eq(net, &init_net)) {
>  		ret = nf_ct_extend_register(&event_extend);
> @@ -260,6 +318,8 @@ out_extend_register:
>  
>  void nf_conntrack_ecache_fini(struct net *net)
>  {
> +	tasklet_kill(&net->ct.dying_tasklet);
> +
>  	nf_conntrack_event_fini_sysctl(net);
>  	if (net_eq(net, &init_net))
>  		nf_ct_extend_unregister(&event_extend);
> -- 
> 1.7.8.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux