From: "David S. Miller" <davem@redhat.com> Date: Thu, 22 May 2003 01:58:15 -0700 (PDT) Alexey, I will try to make something... Simon (and others who want to benchmark :-), give this patch below a try. It applies cleanly to both 2.4.x and 2.5.x kernels. Alexey, note the funny inaccurate comment found here, it totally invalidates "fast computer" comment found a few lines below this. Actually, much of this code wants some major cleanups. It is even quite costly to do these "u32 struct" things, especially on RISC. Alexey no longer makes major surgery in this area, so they may be undone. :) Next experiment can be to reimplement fn_hash() as: #include <linux/jhash.h> static fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz) { u32 h = ntohl(key.datum)>>(32 - fz->fz_order); jhash_1word(h, 0); h &= FZ_HASHMASK(fz); return *(fn_hash_idx_t*)&h; } or something like that. It is assuming we find some problems with hash distribution when using huge number of routes. Someone will need to add fib_hash lookup statistics in order to determine this. Anyways, testers please let us know the results. Note you must have CONFIG_IP_ROUTE_LARGE_TABLES (and thus CONFIG_IP_ADVANCED_ROUTER) in order to even make use of this stuff. Thanks. --- net/ipv4/fib_hash.c.~1~ Thu May 22 02:47:17 2003 +++ net/ipv4/fib_hash.c Thu May 22 03:27:12 2003 @@ -89,7 +89,7 @@ int fz_nent; /* Number of entries */ int fz_divisor; /* Hash divisor */ - u32 fz_hashmask; /* (1<<fz_divisor) - 1 */ + u32 fz_hashmask; /* (fz_divisor - 1) */ #define FZ_HASHMASK(fz) ((fz)->fz_hashmask) int fz_order; /* Zone order */ @@ -149,7 +149,30 @@ static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED; -#define FZ_MAX_DIVISOR 1024 +#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *)) + +static unsigned long size_to_order(unsigned long size) +{ + unsigned long order; + + for (order = 0; order < MAX_ORDER; order++) { + if ((PAGE_SIZE << order) >= size) + break; + } + return order; +} + +static struct fib_node **fz_hash_alloc(int divisor) +{ + unsigned long size = divisor * sizeof(struct fib_node *); + + if (divisor <= 1024) { + return kmalloc(size, GFP_KERNEL); + } else { + return (struct fib_node **) + __get_free_pages(GFP_KERNEL, size_to_order(size)); + } +} #ifdef CONFIG_IP_ROUTE_LARGE_TABLES @@ -174,6 +197,15 @@ } } +static void fz_hash_free(struct fib_node **hash, int divisor) +{ + if (divisor <= 1024) + kfree(hash); + else + free_pages((unsigned long) hash, + size_to_order(divisor * sizeof(struct fib_node *))); +} + static void fn_rehash_zone(struct fn_zone *fz) { struct fib_node **ht, **old_ht; @@ -185,24 +217,30 @@ switch (old_divisor) { case 16: new_divisor = 256; - new_hashmask = 0xFF; break; case 256: new_divisor = 1024; - new_hashmask = 0x3FF; break; default: - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; + if ((old_divisor << 1) > FZ_MAX_DIVISOR) { + printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); + return; + } + new_divisor = (old_divisor << 1); + break; } + + new_hashmask = (new_divisor - 1); + #if RT_CACHE_DEBUG >= 2 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); #endif - ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); + ht = fz_hash_alloc(new_divisor); if (ht) { memset(ht, 0, new_divisor*sizeof(struct fib_node*)); + write_lock_bh(&fib_hash_lock); old_ht = fz->fz_hash; fz->fz_hash = ht; @@ -210,7 +248,8 @@ fz->fz_divisor = new_divisor; fn_rebuild_zone(fz, old_ht, old_divisor); write_unlock_bh(&fib_hash_lock); - kfree(old_ht); + + fz_hash_free(old_ht, old_divisor); } } #endif /* CONFIG_IP_ROUTE_LARGE_TABLES */ @@ -233,12 +272,11 @@ memset(fz, 0, sizeof(struct fn_zone)); if (z) { fz->fz_divisor = 16; - fz->fz_hashmask = 0xF; } else { fz->fz_divisor = 1; - fz->fz_hashmask = 0; } - fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); + fz->fz_hashmask = (fz->fz_divisor - 1); + fz->fz_hash = fz_hash_alloc(fz->fz_divisor); if (!fz->fz_hash) { kfree(fz); return NULL; @@ -468,7 +506,7 @@ return err; #ifdef CONFIG_IP_ROUTE_LARGE_TABLES - if (fz->fz_nent > (fz->fz_divisor<<2) && + if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && (z==32 || (1<<z) > fz->fz_divisor)) fn_rehash_zone(fz); - : send the line "unsubscribe linux-net" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html