Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Eric Dumazet wrote:
Le mercredi 09 juin 2010 à 18:30 +0200, Patrick McHardy a écrit :
Eric Dumazet wrote:
@@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
- read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		atomic_inc(&c->refcount);
+		if (entry)
+			atomic_inc(&c->entries);
Shouldn't this be using atomic_inc_not_zero() to avoid races with
clusterip_config_entry_put()?


Ah yes, I should stop doing patches today !

Hmm, I am not sure current code is correct ?

clusterip_config_find_get() can return a struct clusterip_config pointer
to an entry just that was given to clusterip_config_entry_put()
(c->entries == 0)

Then we access c->dev, while clusterip_config_entry_put() did a
dev_put(c->dev) on it ?

In the clusterip_tg_check() case we hold a "entries" reference, so
this can't happen. In arp_mangle() the device pointer is only used
for comparison (except for the pr_debug statement), so at least it
won't crash.

I wonder why this isn't simply using the refcount for both rule
entries and other references.

So following 'standard rcu lookup' works, but race is still there ?

static inline struct clusterip_config *
clusterip_config_find_get(__be32 clusterip, int entry)
{
        struct clusterip_config *c;

        rcu_read_lock_bh();
        c = __clusterip_config_find(clusterip);
        if (c) {
                if (unlikely(!atomic_inc_not_zero(&c->refcount)))
                        c  = NULL;
                else if (entry)
                        atomic_inc(&c->entries);
        }
        rcu_read_unlock_bh();

        return c;
}


updated patch for reference :

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b..ecd77b1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
static LIST_HEAD(clusterip_configs); /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
/* decrease the count of entries using/referencing this config. If last
@@ -84,10 +91,10 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	if (atomic_dec_and_test(&c->entries)) {
 		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+		spin_unlock_bh(&clusterip_lock);
dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +107,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
 }
static struct clusterip_config *
@@ -108,7 +115,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
- list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +128,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
- read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+			c  = NULL;
+		else if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
return c;
 }
@@ -181,9 +187,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
- write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
return c;
 }
@@ -733,6 +739,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
module_init(clusterip_tg_init);


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux