Solving scalability issue: for chain list "name" searching. Functions: iptcc_find_label(), iptc_is_chain(). Testing if a chain exist, requires a linearly walk of linked list with chain-names (doing a strcmp(3) in each step). Giving a worst-case runtime of O(n) where n is the number of chains. Why is this important to fix?! If only called once, this should not be a big concern, even-though the string compares are expensive. The performance issue arise with many chains for example; when using "iptables-restore", or when listing all "iptables -nL" rules, or when using CPAN IPTables::libiptc. Having 50k chains, the rule listing, with the command: "./iptables -nL > /dev/null", Without patch it takes approximately 5 minutes, With the patch it takes 0.5 seconds. Listing without patch: real 4m49.426s user 4m37.993s sys 0m0.280s Listing with patch: real 0m0.558s user 0m0.484s sys 0m0.064s How is it solved?! The issue is solved introducing a new data structure, that allow us to do binary search of chain names. Thus, reducing the worst-case runtime to O(log n). Being more specific: The new data structure is called "chain index", which is an array with pointers into the chain list, with CHAIN_INDEX_BUCKET_LEN spacing. This facilitates the ability to speedup chain list searching, by find a more optimal starting points when searching the linked list. The runtime complexity is actually also affected by this "bucket" size concept. Thus, O(log(n/k) + k) where k is CHAIN_INDEX_BUCKET_LEN. A nice property of the chain index, is that the "bucket" list length is max CHAIN_INDEX_BUCKET_LEN (when just build, inserts will change this). Oppose to hashing, where the "bucket" list length can vary a lot. Signed-off-by: Jesper Dangaard Brouer <hawk@xxxxxxx> --- libiptc/libiptc.c | 418 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 414 insertions(+), 4 deletions(-) diff --git a/libiptc/libiptc.c b/libiptc/libiptc.c index b4d865e..9160735 100644 --- a/libiptc/libiptc.c +++ b/libiptc/libiptc.c @@ -40,6 +40,12 @@ #define DEBUGP_C(x, args...) #endif +#ifdef DEBUG +#define debug(x, args...) fprintf(stderr, x, ## args) +#else +#define debug(x, args...) +#endif + #ifndef IPT_LIB_DIR #define IPT_LIB_DIR "/usr/local/lib/iptables" #endif @@ -134,6 +140,9 @@ STRUCT_TC_HANDLE unsigned int num_chains; /* number of user defined chains */ + struct chain_head **chain_index; /* array for fast chain list access*/ + unsigned int chain_index_sz;/* size of chain index array */ + STRUCT_GETINFO info; STRUCT_GET_ENTRIES *entries; }; @@ -266,6 +275,302 @@ iptcb_ent_is_hook_entry(STRUCT_ENTRY *e, TC_HANDLE_T h) /********************************************************************** + * Chain index (cache utility) functions + ********************************************************************** + * The chain index is an array with pointers into the chain list, with + * CHAIN_INDEX_BUCKET_LEN spacing. This facilitates the ability to + * speedup chain list searching, by find a more optimal starting + * points when searching the linked list. + * + * The starting point can be found fast by using a binary search of + * the chain index. Thus, reducing the previous search complexity of + * O(n) to O(log(n/k) + k) where k is CHAIN_INDEX_BUCKET_LEN. + * + * A nice property of the chain index, is that the "bucket" list + * length is max CHAIN_INDEX_BUCKET_LEN (when just build, inserts will + * change this). Oppose to hashing, where the "bucket" list length can + * vary a lot. + */ +#ifndef CHAIN_INDEX_BUCKET_LEN +#define CHAIN_INDEX_BUCKET_LEN 40 +#endif + +/* Another nice property of the chain index is that inserting/creating + * chains in chain list don't change the correctness of the chain + * index, it only causes longer lists in the buckets. + * + * To mitigate the performance penalty of longer bucket lists and the + * penalty of rebuilding, the chain index is rebuild only when + * CHAIN_INDEX_INSERT_MAX chains has been added. + */ +#ifndef CHAIN_INDEX_INSERT_MAX +#define CHAIN_INDEX_INSERT_MAX 355 +#endif + +static inline unsigned int iptcc_is_builtin(struct chain_head *c); + + +/* Use binary search in the chain index array, to find a chain_head + * pointer closest to the place of the searched name element. + * + * Notes that, binary search (obviously) requires that the chain list + * is sorted by name. + */ +static struct list_head * +iptcc_bsearch_chain_index(const char *name, unsigned int *index, TC_HANDLE_T handle) +{ + unsigned int pos, end; + int res; + + struct list_head *list_pos; + list_pos=&handle->chains; + + /* Check for empty array, e.g. no user defined chains */ + if (handle->chain_index_sz == 0) { + debug("WARNING: handle->chain_index_sz == 0\n"); + return list_pos; + } + + /* Init */ + end = handle->chain_index_sz; + pos = end / 2; + + debug("bsearch Find chain:%s (pos:%d end:%d)\n", name, pos, end); + + /* Loop */ + loop: + if (!handle->chain_index[pos]) { + fprintf(stderr, "ERROR: NULL pointer chain_index[%d]\n", pos); + return &handle->chains; /* Be safe, return orig start pos */ + } + + res = strcmp(name, handle->chain_index[pos]->name); + list_pos = &handle->chain_index[pos]->list; + (*index)=pos; + + debug("bsearch Index[%d] name:%s res:%d ", + pos, handle->chain_index[pos]->name, res); + + if (res == 0) { /* Found element, by direct hit */ + debug("[found] Direct hit pos:%d end:%d\n", pos, end); + return list_pos; + } else if (res < 0) { /* Too far, jump back */ + end = pos; + pos = pos / 2; + + /* Exit case: First element of array */ + if (end == 0) { + debug("[found] Reached first array elem (end%d)\n",end); + return list_pos; + } + debug("jump back to pos:%d (end:%d)\n", pos, end); + goto loop; + } else if (res > 0 ){ /* Not far enough, jump forward */ + + /* Exit case: Last element of array */ + if (pos == handle->chain_index_sz-1) { + debug("[found] Last array elem (end:%d)\n", end); + return list_pos; + } + + /* Exit case: Next index less, thus elem in this list section */ + res = strcmp(name, handle->chain_index[pos+1]->name); + if (res < 0) { + debug("[found] closest list (end:%d)\n", end); + return list_pos; + } + + pos = (pos+end)/2; + debug("jump forward to pos:%d (end:%d)\n", pos, end); + goto loop; + } + + return list_pos; +} + +#ifdef DEBUG +/* Trivial linear search of chain index. Function used for verifying + the output of bsearch function */ +static struct list_head * +iptcc_linearly_search_chain_index(const char *name, TC_HANDLE_T handle) +{ + unsigned int i=0; + int res=0; + + struct list_head *list_pos; + list_pos = &handle->chains; + + if (handle->chain_index_sz) + list_pos = &handle->chain_index[0]->list; + + /* Linearly walk of chain index array */ + + for (i=0; i < handle->chain_index_sz; i++) { + if (handle->chain_index[i]) { + res = strcmp(handle->chain_index[i]->name, name); + if (res > 0) + break; // One step too far + list_pos = &handle->chain_index[i]->list; + if (res == 0) + break; // Direct hit + } + } + + return list_pos; +} +#endif + +static int iptcc_chain_index_alloc(TC_HANDLE_T h) +{ + unsigned int list_length = CHAIN_INDEX_BUCKET_LEN; + unsigned int array_elems; + unsigned int array_mem; + + /* Allocate memory for the chain index array */ + array_elems = (h->num_chains / list_length) + + (h->num_chains % list_length ? 1 : 0); + array_mem = sizeof(h->chain_index) * array_elems; + + debug("Alloc Chain index, elems:%d mem:%d bytes\n", + array_elems, array_mem); + + h->chain_index = malloc(array_mem); + if (!h->chain_index) { + h->chain_index_sz = 0; + return -ENOMEM; + } + memset(h->chain_index, 0, array_mem); + h->chain_index_sz = array_elems; + + return 1; +} + +static void iptcc_chain_index_free(TC_HANDLE_T h) +{ + h->chain_index_sz = 0; + free(h->chain_index); +} + + +#ifdef DEBUG +static void iptcc_chain_index_dump(TC_HANDLE_T h) +{ + unsigned int i = 0; + + /* Dump: contents of chain index array */ + for (i=0; i < h->chain_index_sz; i++) { + if (h->chain_index[i]) { + fprintf(stderr, "Chain index[%d].name: %s\n", + i, h->chain_index[i]->name); + } + } +} +#endif + +/* Build the chain index */ +static int iptcc_chain_index_build(TC_HANDLE_T h) +{ + unsigned int list_length = CHAIN_INDEX_BUCKET_LEN; + unsigned int chains = 0; + unsigned int cindex = 0; + struct chain_head *c; + + /* Build up the chain index array here */ + debug("Building chain index\n"); + + debug("Number of user defined chains:%d bucket_sz:%d array_sz:%d\n", + h->num_chains, list_length, h->chain_index_sz); + + if (h->chain_index_sz == 0) + return 0; + + list_for_each_entry(c, &h->chains, list) { + + /* Issue: The index array needs to start after the + * builtin chains, as they are not sorted */ + if (!iptcc_is_builtin(c)) { + cindex=chains / list_length; + + /* Safe guard, break out on array limit, this + * is useful if chains are added and array is + * rebuild, without realloc of memory. */ + if (cindex >= h->chain_index_sz) + break; + + if ((chains % list_length)== 0) { + debug("\nIndex[%d] Chains:", cindex); + h->chain_index[cindex] = c; + } + chains++; + } + debug("%s, ", c->name); + } + debug("\n"); + + return 1; +} + +static int iptcc_chain_index_rebuild(TC_HANDLE_T h) +{ + debug("REBUILD chain index array\n"); + iptcc_chain_index_free(h); + if ((iptcc_chain_index_alloc(h)) < 0) + return -ENOMEM; + iptcc_chain_index_build(h); + return 1; +} + +/* Delete chain (pointer) from index array. Removing an element from + * the chain list only affects the chain index array, if the chain + * index points-to/uses that list pointer. + * + * There are different strategies, the simple and safe is to rebuild + * the chain index every time. The more advanced is to update the + * array index to point to the next element, but that requires some + * house keeping and boundry checks. The advanced is implemented, as + * the simple approach behaves badly when all chains are deleted + * because list_for_each processing will always hit the first chain + * index, thus causing a rebuild for every chain. + */ +static int iptcc_chain_index_delete_chain(struct chain_head *c, TC_HANDLE_T h) +{ + struct list_head *index_ptr, *index_ptr2, *next; + struct chain_head *c2; + unsigned int index, index2; + + index_ptr = iptcc_bsearch_chain_index(c->name, &index, h); + + debug("Del chain[%s] c->list:%p index_ptr:%p\n", + c->name, &c->list, index_ptr); + + /* Save the next pointer */ + next = c->list.next; + list_del(&c->list); + + if (index_ptr == &c->list) { /* Chain used as index ptr */ + + /* See if its possible to avoid a rebuild, by shifting + * to next pointer. Its possible if the next pointer + * is located in the same index bucket. + */ + c2 = list_entry(next, struct chain_head, list); + index_ptr2 = iptcc_bsearch_chain_index(c2->name, &index2, h); + if (index != index2) { + /* Rebuild needed */ + return iptcc_chain_index_rebuild(h); + } else { + /* Avoiding rebuild */ + debug("Update cindex[%d] with next ptr name:[%s]\n", + index, c2->name); + h->chain_index[index]=c2; + return 0; + } + } + return 0; +} + + +/********************************************************************** * iptc cache utility functions (iptcc_*) **********************************************************************/ @@ -322,21 +627,81 @@ iptcc_find_chain_by_offset(TC_HANDLE_T handle, unsigned int offset) return NULL; } + /* Returns chain head if found, otherwise NULL. */ static struct chain_head * iptcc_find_label(const char *name, TC_HANDLE_T handle) { struct list_head *pos; + struct list_head *list_start_pos; + unsigned int i=0; + int res; if (list_empty(&handle->chains)) return NULL; + /* First look at builtin chains */ list_for_each(pos, &handle->chains) { struct chain_head *c = list_entry(pos, struct chain_head, list); + if (!iptcc_is_builtin(c)) + break; if (!strcmp(c->name, name)) return c; } + /* Find a smart place to start the search via chain index */ + //list_start_pos = iptcc_linearly_search_chain_index(name, handle); + list_start_pos = iptcc_bsearch_chain_index(name, &i, handle); + + /* Handel if bsearch bails out early */ + if (list_start_pos == &handle->chains) { + list_start_pos = pos; + } +#ifdef DEBUG + else { + /* Verify result of bsearch against linearly index search */ + struct list_head *test_pos; + struct chain_head *test_c, *tmp_c; + test_pos = iptcc_linearly_search_chain_index(name, handle); + if (list_start_pos != test_pos) { + debug("BUG in chain_index search\n"); + test_c=list_entry(test_pos, struct chain_head,list); + tmp_c =list_entry(list_start_pos,struct chain_head,list); + debug("Verify search found:\n"); + debug(" Chain:%s\n", test_c->name); + debug("BSearch found:\n"); + debug(" Chain:%s\n", tmp_c->name); + exit(42); + } + } +#endif + + /* Initial/special case, no user defined chains */ + if (handle->num_chains == 0) + return NULL; + + /* Start searching through the chain list */ + list_for_each(pos, list_start_pos->prev) { + struct chain_head *c = list_entry(pos, struct chain_head, list); + res = strcmp(c->name, name); + debug("List search name:%s == %s res:%d\n", name, c->name, res); + if (res==0) + return c; + + /* We can stop earlier as we know list is sorted */ + if (res>0 && !iptcc_is_builtin(c)) { /* Walked too far*/ + debug(" Not in list, walked too far, sorted list\n"); + return NULL; + } + + /* Stop on wrap around, if list head is reached */ + if (pos == &handle->chains) { + debug("Stop, list head reached\n"); + return NULL; + } + } + + debug("List search NOT found name:%s\n", name); return NULL; } @@ -397,14 +762,37 @@ static int __iptcc_p_del_policy(TC_HANDLE_T h, unsigned int num) static inline void iptc_insert_chain(TC_HANDLE_T h, struct chain_head *c) { struct chain_head *tmp; + struct list_head *list_start_pos; + unsigned int i=1; + + /* Find a smart place to start the insert search */ + list_start_pos = iptcc_bsearch_chain_index(c->name, &i, h); + + /* Handle the case, where chain.name is smaller than index[0] */ + if (i==0 && strcmp(c->name, h->chain_index[0]->name) <= 0) { + h->chain_index[0] = c; /* Update chain index head */ + list_start_pos = h->chains.next; + debug("Update chain_index[0] with %s\n", c->name); + } + + /* Handel if bsearch bails out early */ + if (list_start_pos == &h->chains) { + list_start_pos = h->chains.next; + } /* sort only user defined chains */ if (!c->hooknum) { - list_for_each_entry(tmp, &h->chains, list) { + list_for_each_entry(tmp, list_start_pos->prev, list) { if (!tmp->hooknum && strcmp(c->name, tmp->name) <= 0) { list_add(&c->list, tmp->list.prev); return; } + + /* Stop if list head is reached */ + if (&tmp->list == &h->chains) { + debug("Insert, list head reached add to tail\n"); + break; + } } } @@ -565,6 +953,11 @@ static int parse_table(TC_HANDLE_T h) ENTRY_ITERATE(h->entries->entrytable, h->entries->size, cache_add_entry, h, &prev, &num); + /* Build the chain index, used for chain list search speedup */ + if ((iptcc_chain_index_alloc(h)) < 0) + return -ENOMEM; + iptcc_chain_index_build(h); + /* Second pass: fixup parsed data from first pass */ list_for_each_entry(c, &h->chains, list) { struct rule_head *r; @@ -910,6 +1303,8 @@ TC_FREE(TC_HANDLE_T *h) free(c); } + iptcc_chain_index_free(*h); + free((*h)->entries); free(*h); @@ -1809,6 +2204,20 @@ TC_CREATE_CHAIN(const IPT_CHAINLABEL chain, TC_HANDLE_T *handle) DEBUGP("Creating chain `%s'\n", chain); iptc_insert_chain(*handle, c); /* Insert sorted */ + /* Inserting chains don't change the correctness of the chain + * index (except if its smaller than index[0], but that + * handled by iptc_insert_chain). It only causes longer lists + * in the buckets. Thus, only rebuild chain index when the + * capacity is exceed with CHAIN_INDEX_INSERT_MAX chains. + */ + int capacity = (*handle)->chain_index_sz * CHAIN_INDEX_BUCKET_LEN; + int exceeded = ((((*handle)->num_chains)-capacity)); + if (exceeded > CHAIN_INDEX_INSERT_MAX) { + debug("Capacity(%d) exceeded(%d) rebuild (chains:%d)\n", + capacity, exceeded, (*handle)->num_chains); + iptcc_chain_index_rebuild(*handle); + } + set_changed(*handle); return 1; @@ -1875,11 +2284,12 @@ TC_DELETE_CHAIN(const IPT_CHAINLABEL chain, TC_HANDLE_T *handle) if (c == (*handle)->chain_iterator_cur) iptcc_chain_iterator_advance(*handle); - list_del(&c->list); - free(c); - (*handle)->num_chains--; /* One user defined chain deleted */ + //list_del(&c->list); /* Done in iptcc_chain_index_delete_chain() */ + iptcc_chain_index_delete_chain(c, *handle); + free(c); + DEBUGP("chain `%s' deleted\n", chain); set_changed(*handle); -- 1.5.3 - To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html