Re: [RFC PATCH] netfilter: call synchronize_net only once from nf_register_net_hooks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Giuseppe Scrivano <gscrivan@xxxxxxxxxx> wrote:
> Florian Westphal <fw@xxxxxxxxx> writes:
> 
> > Giuseppe Scrivano <gscrivan@xxxxxxxxxx> wrote:
> >> SELinux, if enabled, registers for each new network namespace 6
> >> netfilter hooks.  Avoid to use synchronize_net for each new hook, but do
> >> it once after all the hooks are added.  The net benefit on an SMP
> >> machine with two cores is that creating a new network namespace takes
> >> -40% of the original time.
> >
> > but this needs more work.
> >
> >> Signed-off-by: Giuseppe Scrivano <gscrivan@xxxxxxxxxx>
> >> ---
> >>  net/netfilter/core.c | 15 ++++++++++++---
> >>  1 file changed, 12 insertions(+), 3 deletions(-)
> >> 
> >> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> >> index 52cd2901a097..beeb0b36f429 100644
> >> --- a/net/netfilter/core.c
> >> +++ b/net/netfilter/core.c
> >> @@ -252,7 +252,7 @@ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const
> >>  	return NULL;
> >>  }
> >>  
> >> -int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
> >> +static int __nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
> >
> > Change this to return struct nf_hook_entries *
> 
> thanks for the quick review.  Are you fine if I change it to:
> 
> static int __nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg, struct nf_hook_entries **old)

You can use ERR_PTR/PTR_ERR instead.

However, I suggest you try to go with call_rcu to get rid of all of the
synchronize_net() calls. I don't even see why its still needed for
nfqueue case provided we invoke the nfqueue drop handler first.

PoC example, untested:

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -77,6 +77,11 @@ struct nf_hook_entry {
 	void				*priv;
 };
 
+struct nf_hook_entries_rcu_head		{
+	struct rcu_head head;
+	void	*allocation;
+};
+
 struct nf_hook_entries {
 	u16				num_hook_entries;
 	/* padding */
@@ -87,10 +92,13 @@ struct nf_hook_entries {
 	 * This is not part of struct nf_hook_entry since its only
 	 * needed in slow path (hook register/unregister).
 	 *
-	 * const struct nf_hook_ops     *orig_ops[]
+	 * const struct nf_hook_ops     *orig_ops[];
+	 * struct nf_hook_entries_rcu_head;
 	 */
 };
 
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e);
+
 static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
 {
 	unsigned int n = e->num_hook_entries;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -69,22 +69,61 @@ static DEFINE_MUTEX(nf_hook_mutex);
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
+static struct nf_hook_entries_rcu_head *
+nf_hook_entries_rcu_head_get(struct nf_hook_entries *e)
+{
+	unsigned int num = e->num_hook_entries;
+	struct nf_hook_entries_rcu_head *head;
+	static struct nf_hook_ops **ops;
+
+	ops = nf_hook_entries_get_hook_ops(e);
+	head = (void *)&ops[num];
+
+	return head;
+}
+
 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 {
 	struct nf_hook_entries *e;
 	size_t alloc = sizeof(*e) +
 		       sizeof(struct nf_hook_entry) * num +
-		       sizeof(struct nf_hook_ops *) * num;
+		       sizeof(struct nf_hook_ops *) * num +
+		       sizeof(struct nf_hook_entries_rcu_head);
 
 	if (num == 0)
 		return NULL;
 
 	e = kvzalloc(alloc, GFP_KERNEL);
-	if (e)
+	if (e) {
+		struct nf_hook_entries_rcu_head *head;
+
 		e->num_hook_entries = num;
+
+		head = nf_hook_entries_rcu_head_get(e);
+		head->allocation = e;
+	}
 	return e;
 }
 
+void __nf_hook_entries_free(struct rcu_head *h)
+{
+	struct nf_hook_entries_rcu_head *head;
+
+	head = container_of(h, struct nf_hook_entries_rcu_head, head);
+	kvfree(head->allocation);
+}
+
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e)
+{
+	struct nf_hook_entries_rcu_head *head;
+
+	head = nf_hook_entries_rcu_head_get(e);
+	if (WARN_ON(e != head->allocation))
+		return;
+
+	call_rcu(&head->head, __nf_hook_entries_free);
+}
+
 static unsigned int accept_all(void *priv,
 			       struct sk_buff *skb,
 			       const struct nf_hook_state *state)
@@ -291,9 +330,9 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
-	synchronize_net();
 	BUG_ON(p == new_hooks);
-	kvfree(p);
+	if (p)
+		nf_hook_entries_free_rcu(p);
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_net_hook);
@@ -341,7 +380,6 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
 	struct nf_hook_entries __rcu **pp;
 	struct nf_hook_entries *p;
-	unsigned int nfq;
 
 	pp = nf_hook_entry_head(net, reg);
 	if (!pp)
@@ -362,13 +400,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	if (!p)
 		return;
 
-	synchronize_net();
-
-	/* other cpu might still process nfqueue verdict that used reg */
-	nfq = nf_queue_nf_hook_drop(net);
-	if (nfq)
-		synchronize_net();
-	kvfree(p);
+	nf_queue_nf_hook_drop(net);
+	nf_hook_entries_free_rcu(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -395,63 +428,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int hookcount)
 {
-	struct nf_hook_entries *to_free[16], *p;
-	struct nf_hook_entries __rcu **pp;
-	unsigned int i, j, n;
-
-	mutex_lock(&nf_hook_mutex);
-	for (i = 0; i < hookcount; i++) {
-		pp = nf_hook_entry_head(net, &reg[i]);
-		if (!pp)
-			continue;
-
-		p = nf_entry_dereference(*pp);
-		if (WARN_ON_ONCE(!p))
-			continue;
-		__nf_unregister_net_hook(p, &reg[i]);
-	}
-	mutex_unlock(&nf_hook_mutex);
-
-	do {
-		n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
-		mutex_lock(&nf_hook_mutex);
-
-		for (i = 0, j = 0; i < hookcount && j < n; i++) {
-			pp = nf_hook_entry_head(net, &reg[i]);
-			if (!pp)
-				continue;
-
-			p = nf_entry_dereference(*pp);
-			if (!p)
-				continue;
-
-			to_free[j] = __nf_hook_entries_try_shrink(pp);
-			if (to_free[j])
-				++j;
-		}
-
-		mutex_unlock(&nf_hook_mutex);
-
-		if (j) {
-			unsigned int nfq;
-
-			synchronize_net();
-
-			/* need 2nd synchronize_net() if nfqueue is used, skb
-			 * can get reinjected right before nf_queue_hook_drop()
-			 */
-			nfq = nf_queue_nf_hook_drop(net);
-			if (nfq)
-				synchronize_net();
-
-			for (i = 0; i < j; i++)
-				kvfree(to_free[i]);
-		}
+	unsigned int i;
 
-		reg += n;
-		hookcount -= n;
-	} while (hookcount > 0);
+	for (i = 0; i < hookcount; i++)
+		nf_unregister_net_hook(net, &reg[i]);
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux