On Mon, Jun 09, 2003 at 12:32:12PM +1000, herbert wrote: > > Forget about that one, there is a race around the resolver where dead > objects can come back. flow_shrink_cache is also unprotected. OK, this patch should fix both of those problems. -- Debian GNU/Linux 3.0 is out! ( http://www.debian.org/ ) Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Index: kernel-source-2.5/include/net/flow.h =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/include/net/flow.h,v retrieving revision 1.1.1.3 diff -u -r1.1.1.3 flow.h --- kernel-source-2.5/include/net/flow.h 27 May 2003 08:38:39 -0000 1.1.1.3 +++ kernel-source-2.5/include/net/flow.h 9 Jun 2003 00:21:19 -0000 @@ -82,11 +82,14 @@ #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 +typedef void (*flow_obj_destroy_t)(void *); typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, - void **objp, atomic_t **obj_refp); + void **objp, atomic_t **obj_refp, + flow_obj_destroy_t *destroyer); extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver); +extern void flow_cache_flush(void *object); extern atomic_t flow_cache_genid; #endif Index: kernel-source-2.5/include/net/xfrm.h =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/include/net/xfrm.h,v retrieving revision 1.3 diff -u -r1.3 xfrm.h --- kernel-source-2.5/include/net/xfrm.h 7 Jun 2003 09:36:28 -0000 1.3 +++ kernel-source-2.5/include/net/xfrm.h 9 Jun 2003 00:21:19 -0000 @@ -349,7 +349,7 @@ atomic_inc(&policy->refcnt); } -extern void __xfrm_policy_destroy(struct xfrm_policy *policy); +extern void __xfrm_policy_destroy(void *policy); static inline void xfrm_pol_put(struct xfrm_policy *policy) { Index: kernel-source-2.5/net/core/flow.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/core/flow.c,v retrieving revision 1.3 diff -u -r1.3 flow.c --- kernel-source-2.5/net/core/flow.c 2 Jun 2003 10:55:50 -0000 1.3 +++ kernel-source-2.5/net/core/flow.c 9 Jun 2003 03:53:50 -0000 @@ -12,17 +12,33 @@ #include <linux/random.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> #include <net/flow.h> #include <asm/atomic.h> +enum { + /* alive and healthy */ + FLE_STATE_ALIVE, + /* possessed by a ghost: an rcu will perform exorcism */ + FLE_STATE_POSSESSED, + /* dead but still in one piece: candidate for resurrection or burial */ + FLE_STATE_DEAD, + /* dead and buried */ + FLE_STATE_BURIED, +}; + struct flow_cache_entry { struct flow_cache_entry *next; u16 family; u8 dir; + u8 state; struct flowi key; u32 genid; void *object; atomic_t *object_ref; + flow_obj_destroy_t object_destroy; + struct rcu_head rcu; }; atomic_t flow_cache_genid = ATOMIC_INIT(0); @@ -33,6 +49,7 @@ static kmem_cache_t *flow_cachep; static int flow_lwm, flow_hwm; +static spinlock_t flow_cache_lock = SPIN_LOCK_UNLOCKED; struct flow_percpu_info { int hash_rnd_recalc; @@ -65,6 +82,7 @@ struct flow_cache_entry *fle, **flp; int i; + spin_lock(&flow_cache_lock); for (i = 0; i < flow_hash_size; i++) { int k = 0; @@ -75,12 +93,17 @@ } while ((fle = *flp) != NULL) { *flp = fle->next; + flow_count(cpu)--; + if (fle->state >= FLE_STATE_POSSESSED) { + fle->state = FLE_STATE_BURIED; + continue; + } if (fle->object) atomic_dec(fle->object_ref); kmem_cache_free(flow_cachep, fle); - flow_count(cpu)--; } } + spin_unlock(&flow_cache_lock); } static void flow_cache_shrink(int cpu) @@ -139,21 +162,29 @@ return 0; } +static inline void flow_object_put(struct flow_cache_entry *fle) +{ + if (atomic_dec_and_test(fle->object_ref)) + fle->object_destroy(fle->object); +} + void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { - struct flow_cache_entry *fle, **head; + struct flow_cache_entry *fle, **flp, **head; unsigned int hash; int cpu; local_bh_disable(); + rcu_read_lock(); + cpu = smp_processor_id(); if (flow_hash_rnd_recalc(cpu)) flow_new_hash_rnd(cpu); hash = flow_hash_code(key, cpu); head = &flow_table[(cpu << flow_hash_shift) + hash]; - for (fle = *head; fle; fle = fle->next) { + for (flp = head; (fle = *flp) != NULL; flp = &fle->next) { if (fle->family == family && fle->dir == dir && flow_key_compare(key, &fle->key) == 0) { @@ -162,6 +193,8 @@ if (ret) atomic_inc(fle->object_ref); + + rcu_read_unlock(); local_bh_enable(); return ret; @@ -170,46 +203,113 @@ } } + if (!fle) { + if (flow_count(cpu) > flow_hwm) + flow_cache_shrink(cpu); + + fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + if (fle) { + fle->next = *head; + fle->family = family; + fle->dir = dir; + memcpy(&fle->key, key, sizeof(*key)); + flow_count(cpu)++; + } + } + { void *obj; atomic_t *obj_ref; + flow_obj_destroy_t obj_destroy; + unsigned genid; - resolver(key, family, dir, &obj, &obj_ref); +again: + genid = atomic_read(&flow_cache_genid); + resolver(key, family, dir, &obj, &obj_ref, &obj_destroy); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); + unsigned state = FLE_STATE_ALIVE; - if (fle->object) - atomic_dec(fle->object_ref); + spin_lock(&flow_cache_lock); + if (fle->next == *head) + *head = fle; + else { + if (fle->object) + flow_object_put(fle); + if (fle->state >= FLE_STATE_POSSESSED) + state = FLE_STATE_POSSESSED; + } + + fle->state = state; fle->object = obj; fle->object_ref = obj_ref; + fle->object_destroy = obj_destroy; + + spin_unlock(&flow_cache_lock); + + fle->genid = atomic_read(&flow_cache_genid); + if (fle->genid != genid) + goto again; + if (obj) atomic_inc(fle->object_ref); - } else { - if (flow_count(cpu) > flow_hwm) - flow_cache_shrink(cpu); - - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); - if (fle) { - fle->next = *head; - *head = fle; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); - fle->genid = atomic_read(&flow_cache_genid); - fle->object = obj; - fle->object_ref = obj_ref; - if (obj) - atomic_inc(fle->object_ref); - - flow_count(cpu)++; - } } + + rcu_read_unlock(); local_bh_enable(); return obj; } +} + +static void flow_drop_ref(void *data) +{ + struct flow_cache_entry *fle = data; + unsigned state; + + spin_lock_bh(&flow_cache_lock); + state = fle->state; + fle->state = FLE_STATE_ALIVE; + if (state >= FLE_STATE_DEAD) { + flow_object_put(fle); + fle->object = NULL; + } + spin_unlock_bh(&flow_cache_lock); + + if (state < FLE_STATE_BURIED) + return; + + kmem_cache_free(flow_cachep, fle); +} + +void flow_cache_flush(void *object) +{ + int i; + + spin_lock_bh(&flow_cache_lock); + + for (i = 0; i < NR_CPUS * flow_hash_size; i++) { + struct flow_cache_entry *fle, **flp; + + flp = &flow_table[i]; + for (; (fle = *flp) != NULL; flp = &fle->next) { + unsigned state; + + if (fle->object != object) + continue; + + state = fle->state; + fle->state = FLE_STATE_DEAD; + + if (state > FLE_STATE_ALIVE) + continue; + + call_rcu(&fle->rcu, flow_drop_ref, fle); + } + } + + spin_unlock_bh(&flow_cache_lock); } static int __init flow_cache_init(void) Index: kernel-source-2.5/net/xfrm/xfrm_policy.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/xfrm/xfrm_policy.c,v retrieving revision 1.5 diff -u -r1.5 xfrm_policy.c --- kernel-source-2.5/net/xfrm/xfrm_policy.c 7 Jun 2003 09:36:28 -0000 1.5 +++ kernel-source-2.5/net/xfrm/xfrm_policy.c 9 Jun 2003 00:21:19 -0000 @@ -180,8 +180,10 @@ /* Destroy xfrm_policy: descendant resources must be released to this moment. */ -void __xfrm_policy_destroy(struct xfrm_policy *policy) +void __xfrm_policy_destroy(void *data) { + struct xfrm_policy *policy = data; + if (!policy->dead) BUG(); @@ -216,6 +218,8 @@ if (del_timer(&policy->timer)) atomic_dec(&policy->refcnt); + flow_cache_flush(policy); + out: write_unlock_bh(&policy->lock); } @@ -388,8 +392,9 @@ /* Find policy to apply to this flow. */ -void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp, + flow_obj_destroy_t *obj_destroyp) { struct xfrm_policy *pol; @@ -408,8 +413,10 @@ } } read_unlock_bh(&xfrm_policy_lock); - if ((*objp = (void *) pol) != NULL) + if ((*objp = (void *) pol) != NULL) { *obj_refp = &pol->refcnt; + *obj_destroyp = __xfrm_policy_destroy; + } } struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)