On Mon, Jun 09, 2003 at 07:29:34PM +1000, herbert wrote: > > The references will disappear after the next rcu transition which is not > dependent on packets flowing through the engine. OK, here is the patch with no destructors. I've changed the calling convention of xfrm_policy_kill. It will now drop the last reference for you, asynchronously. Cheers, -- Debian GNU/Linux 3.0 is out! ( http://www.debian.org/ ) Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Index: kernel-source-2.5/include/net/flow.h =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/include/net/flow.h,v retrieving revision 1.1.1.3 diff -u -r1.1.1.3 flow.h --- kernel-source-2.5/include/net/flow.h 27 May 2003 08:38:39 -0000 1.1.1.3 +++ kernel-source-2.5/include/net/flow.h 9 Jun 2003 09:25:22 -0000 @@ -87,6 +87,7 @@ extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver); +extern void flow_cache_flush(void *object); extern atomic_t flow_cache_genid; #endif Index: kernel-source-2.5/include/net/xfrm.h =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/include/net/xfrm.h,v retrieving revision 1.3 diff -u -r1.3 xfrm.h --- kernel-source-2.5/include/net/xfrm.h 7 Jun 2003 09:36:28 -0000 1.3 +++ kernel-source-2.5/include/net/xfrm.h 9 Jun 2003 09:43:31 -0000 @@ -266,6 +266,7 @@ struct xfrm_policy { struct xfrm_policy *next; + struct list_head list; /* This lock only affects elements except for entry. */ rwlock_t lock; Index: kernel-source-2.5/net/core/flow.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/core/flow.c,v retrieving revision 1.3 diff -u -r1.3 flow.c --- kernel-source-2.5/net/core/flow.c 2 Jun 2003 10:55:50 -0000 1.3 +++ kernel-source-2.5/net/core/flow.c 9 Jun 2003 09:24:30 -0000 @@ -12,17 +12,32 @@ #include <linux/random.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> #include <net/flow.h> #include <asm/atomic.h> +enum { + /* alive and healthy */ + FLE_STATE_ALIVE, + /* possessed by a ghost: an rcu will perform exorcism */ + FLE_STATE_POSSESSED, + /* dead but still in one piece: candidate for resurrection or burial */ + FLE_STATE_DEAD, + /* dead and buried */ + FLE_STATE_BURIED, +}; + struct flow_cache_entry { struct flow_cache_entry *next; u16 family; u8 dir; + u8 state; struct flowi key; u32 genid; void *object; atomic_t *object_ref; + struct rcu_head rcu; }; atomic_t flow_cache_genid = ATOMIC_INIT(0); @@ -33,6 +48,7 @@ static kmem_cache_t *flow_cachep; static int flow_lwm, flow_hwm; +static spinlock_t flow_cache_lock = SPIN_LOCK_UNLOCKED; struct flow_percpu_info { int hash_rnd_recalc; @@ -65,6 +81,7 @@ struct flow_cache_entry *fle, **flp; int i; + spin_lock(&flow_cache_lock); for (i = 0; i < flow_hash_size; i++) { int k = 0; @@ -75,12 +92,17 @@ } while ((fle = *flp) != NULL) { *flp = fle->next; + flow_count(cpu)--; + if (fle->state >= FLE_STATE_POSSESSED) { + fle->state = FLE_STATE_BURIED; + continue; + } if (fle->object) atomic_dec(fle->object_ref); kmem_cache_free(flow_cachep, fle); - flow_count(cpu)--; } } + spin_unlock(&flow_cache_lock); } static void flow_cache_shrink(int cpu) @@ -142,18 +164,20 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { - struct flow_cache_entry *fle, **head; + struct flow_cache_entry *fle, **flp, **head; unsigned int hash; int cpu; local_bh_disable(); + rcu_read_lock(); + cpu = smp_processor_id(); if (flow_hash_rnd_recalc(cpu)) flow_new_hash_rnd(cpu); hash = flow_hash_code(key, cpu); head = &flow_table[(cpu << flow_hash_shift) + hash]; - for (fle = *head; fle; fle = fle->next) { + for (flp = head; (fle = *flp) != NULL; flp = &fle->next) { if (fle->family == family && fle->dir == dir && flow_key_compare(key, &fle->key) == 0) { @@ -162,6 +186,8 @@ if (ret) atomic_inc(fle->object_ref); + + rcu_read_unlock(); local_bh_enable(); return ret; @@ -170,46 +196,112 @@ } } + if (!fle) { + if (flow_count(cpu) > flow_hwm) + flow_cache_shrink(cpu); + + fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + if (fle) { + fle->next = *head; + fle->family = family; + fle->dir = dir; + memcpy(&fle->key, key, sizeof(*key)); + flow_count(cpu)++; + } + } + { void *obj; atomic_t *obj_ref; + unsigned genid; +again: + genid = atomic_read(&flow_cache_genid); resolver(key, family, dir, &obj, &obj_ref); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); + unsigned state = FLE_STATE_ALIVE; - if (fle->object) - atomic_dec(fle->object_ref); + spin_lock(&flow_cache_lock); + + if (fle->next == *head) + *head = fle; + else { + if (fle->object) + atomic_dec(fle->object_ref); + if (fle->state >= FLE_STATE_POSSESSED) + state = FLE_STATE_POSSESSED; + } + fle->state = state; fle->object = obj; fle->object_ref = obj_ref; + + spin_unlock(&flow_cache_lock); + + fle->genid = atomic_read(&flow_cache_genid); + if (fle->genid != genid) + goto again; + if (obj) atomic_inc(fle->object_ref); - } else { - if (flow_count(cpu) > flow_hwm) - flow_cache_shrink(cpu); - - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); - if (fle) { - fle->next = *head; - *head = fle; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); - fle->genid = atomic_read(&flow_cache_genid); - fle->object = obj; - fle->object_ref = obj_ref; - if (obj) - atomic_inc(fle->object_ref); - - flow_count(cpu)++; - } } + + rcu_read_unlock(); local_bh_enable(); return obj; } +} + +static void flow_drop_ref(void *data) +{ + struct flow_cache_entry *fle = data; + unsigned state; + + spin_lock_bh(&flow_cache_lock); + state = fle->state; + fle->state = FLE_STATE_ALIVE; + if (state >= FLE_STATE_DEAD) { + atomic_dec(fle->object_ref); + fle->object = NULL; + } + spin_unlock_bh(&flow_cache_lock); + + if (state < FLE_STATE_BURIED) + return; + + kmem_cache_free(flow_cachep, fle); +} + +void flow_cache_flush(void *object) +{ + int i; + + spin_lock_bh(&flow_cache_lock); + + for (i = 0; i < NR_CPUS * flow_hash_size; i++) { + struct flow_cache_entry *fle, **flp; + + flp = &flow_table[i]; + for (; (fle = *flp) != NULL; flp = &fle->next) { + unsigned state; + + if (fle->object != object) + continue; + + state = fle->state; + fle->state = FLE_STATE_DEAD; + + if (state > FLE_STATE_ALIVE) + continue; + + call_rcu(&fle->rcu, flow_drop_ref, fle); + } + } + + spin_unlock_bh(&flow_cache_lock); + synchronize_kernel(); } static int __init flow_cache_init(void) Index: kernel-source-2.5/net/key/af_key.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/key/af_key.c,v retrieving revision 1.4 diff -u -r1.4 af_key.c --- kernel-source-2.5/net/key/af_key.c 7 Jun 2003 09:36:28 -0000 1.4 +++ kernel-source-2.5/net/key/af_key.c 9 Jun 2003 08:42:11 -0000 @@ -2016,7 +2016,6 @@ out: if (xp) { xfrm_policy_kill(xp); - xfrm_pol_put(xp); } return err; } @@ -2060,7 +2059,8 @@ if (xp) { if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) xfrm_policy_kill(xp); - xfrm_pol_put(xp); + else + xfrm_pol_put(xp); } return err; } Index: kernel-source-2.5/net/xfrm/xfrm_policy.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/xfrm/xfrm_policy.c,v retrieving revision 1.5 diff -u -r1.5 xfrm_policy.c --- kernel-source-2.5/net/xfrm/xfrm_policy.c 7 Jun 2003 09:36:28 -0000 1.5 +++ kernel-source-2.5/net/xfrm/xfrm_policy.c 9 Jun 2003 09:43:49 -0000 @@ -15,6 +15,9 @@ #include <linux/config.h> #include <linux/slab.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> #include <net/xfrm.h> #include <net/ip.h> @@ -29,6 +32,11 @@ kmem_cache_t *xfrm_dst_cache; +static struct work_struct xfrm_policy_gc_work; +static struct list_head xfrm_policy_gc_list = + LIST_HEAD_INIT(xfrm_policy_gc_list); +static spinlock_t xfrm_policy_gc_lock = SPIN_LOCK_UNLOCKED; + int xfrm_register_type(struct xfrm_type *type, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -152,7 +160,6 @@ xp = xfrm_policy_byid(0, index, 1); if (xp) { xfrm_policy_kill(xp); - xfrm_pol_put(xp); } } @@ -194,27 +201,56 @@ kfree(policy); } +static void xfrm_policy_gc_kill(struct xfrm_policy *policy) +{ + struct dst_entry *dst; + + while ((dst = policy->bundles) != NULL) { + policy->bundles = dst->next; + dst_free(dst); + } + + if (del_timer(&policy->timer)) + atomic_dec(&policy->refcnt); + + if (atomic_read(&policy->refcnt) > 1) + flow_cache_flush(policy); + + xfrm_pol_put(policy); +} + +static void xfrm_policy_gc_task(void *data) +{ + struct xfrm_policy *policy; + struct list_head *entry, *tmp; + struct list_head gc_list = LIST_HEAD_INIT(gc_list); + + spin_lock_bh(&xfrm_policy_gc_lock); + list_splice_init(&xfrm_policy_gc_list, &gc_list); + spin_unlock_bh(&xfrm_policy_gc_lock); + + list_for_each_safe(entry, tmp, &gc_list) { + policy = list_entry(entry, struct xfrm_policy, list); + xfrm_policy_gc_kill(policy); + } +} + /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ void xfrm_policy_kill(struct xfrm_policy *policy) { - struct dst_entry *dst; - write_lock_bh(&policy->lock); if (policy->dead) goto out; policy->dead = 1; - while ((dst = policy->bundles) != NULL) { - policy->bundles = dst->next; - dst_free(dst); - } - - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); + spin_lock(&xfrm_policy_gc_lock); + list_add(&policy->list, &xfrm_policy_gc_list); + spin_unlock(&xfrm_policy_gc_lock); + schedule_work(&xfrm_policy_gc_work); out: write_unlock_bh(&policy->lock); @@ -282,7 +318,6 @@ if (delpol) { xfrm_policy_kill(delpol); - xfrm_pol_put(delpol); } return 0; } @@ -344,7 +379,6 @@ write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(xp); - xfrm_pol_put(xp); write_lock_bh(&xfrm_policy_lock); } @@ -388,8 +422,8 @@ /* Find policy to apply to this flow. */ -void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; @@ -469,7 +503,6 @@ if (old_pol) { xfrm_policy_kill(old_pol); - xfrm_pol_put(old_pol); } return 0; } @@ -516,7 +549,6 @@ write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(pol); - xfrm_pol_put(pol); } /* Resolve list of templates for the flow, given policy. */ @@ -1135,6 +1167,8 @@ NULL, NULL); if (!xfrm_dst_cache) panic("XFRM: failed to allocate xfrm_dst_cache\n"); + + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); } void __init xfrm_init(void) Index: kernel-source-2.5/net/xfrm/xfrm_user.c =================================================================== RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.5/net/xfrm/xfrm_user.c,v retrieving revision 1.3 diff -u -r1.3 xfrm_user.c --- kernel-source-2.5/net/xfrm/xfrm_user.c 7 Jun 2003 09:36:28 -0000 1.3 +++ kernel-source-2.5/net/xfrm/xfrm_user.c 9 Jun 2003 08:42:38 -0000 @@ -784,9 +784,8 @@ NETLINK_CB(skb).pid, MSG_DONTWAIT); } + xfrm_pol_put(xp); } - - xfrm_pol_put(xp); return err; }