On Wed, 2008-11-12 at 20:48 +0000, Alan Jenkins wrote: > Kay Sievers wrote: > > On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote: > >> Kay Sievers wrote: > >>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote: > >>>> Kay Sievers wrote: > >>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@xxxxxxxx> wrote: > >>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote: > >>>>>> > >>>>>>> On my Ubuntu installation this removes 15k of duplicate strings, > >>>>>>> using a temporary index of about 25k. > >>>>>>> > >>>>>> Great. That looks nice. > >>>>>> > >>>>>> Thats's the diff of the rule dump before and after the patch: > >>>>>> ... > >>>>>> -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer > >>>>>> -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings > >>>>>> +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer > >>>>>> +[] used 40512 bytes of string index nodes (844 * 48 bytes) > >>>>>> +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings > >>>>>> > >>>>>> > >>>>> I split the nodes and the childs in two independent arrays, so we got > >>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots > >>>>> with the huge rules set here. Unlimited childs in the index removes > >>>>> another 3 kB of duplicates, and the temporary index seems also a bit > >>>>> smaller: > >>>>> shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer > >>>>> used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes > >>>>> child links) > >>>>> > >>>>> Would be great, if you can check if it still works for you as expected. :) > >>>>> > >>>>> > >>>> Did you have a particular reason to keep the trie_root array? Now > >>>> there's no fixed limit on children, you could just use trie[1] as the > >>>> root node. Remove the special case for depth == 0. And initialize it's > >>>> value and length to 0, then you can remove the special case for len == 0. > >>>> > >>>> > >>> No special reason, I thought about that too, but it was already 5am, > >>> and I was unable to think it through. :) > >>> > >>> Sounds nice to do that, did you try already, have a patch? > >>> > >>> > >> No, sorry :). > >> > > > > Ah, now by looking at it, maybe the then needed linear search for the > > key in the root is not as good as the plain root array index? > > > Mmm. Ok, without the root array add_string() takes twice as long, which > increases the total rules-loading time by 10%. (user time measured by > cachegrind). Let's leave it. Hmm, now I liked the idea. :) How about this? It has only a single array again, and no root, and no child limits. Seems to work fine, but, it looks somehow too simple now. :) rules use 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer temporary index used 21520 bytes (1076 * 20 bytes) diff --git a/udev/udev-rules.c b/udev/udev-rules.c index 1f28e4f..80c61b5 100644 --- a/udev/udev-rules.c +++ b/udev/udev-rules.c @@ -41,17 +41,16 @@ struct uid_gid { }; }; -struct trie_child { - unsigned int next_idx; - unsigned int node_idx; - unsigned char key; -}; - struct trie_node { + /* this nodes child list */ unsigned int child_idx; + /* the next child of our parent node's child list */ + unsigned int next_child_idx; + /* this nodes last child (shortcut) */ unsigned int last_child_idx; unsigned int value_off; unsigned short value_len; + unsigned char key; }; struct udev_rules { @@ -70,13 +69,9 @@ struct udev_rules { unsigned int buf_count; /* during rule parsing, strings are indexed to find duplicates */ - unsigned int *trie_root; struct trie_node *trie_nodes; unsigned int trie_nodes_cur; unsigned int trie_nodes_max; - struct trie_child *trie_childs; - unsigned int trie_childs_cur; - unsigned int trie_childs_max; /* during rule parsing, uid/gid lookup results are cached */ struct uid_gid *uids; @@ -453,6 +448,7 @@ static int add_string(struct udev_rules *rules, const char *str) unsigned short len; unsigned int depth; unsigned int off; + struct trie_node *parent; len = strlen(str); @@ -460,37 +456,31 @@ static int add_string(struct udev_rules *rules, const char *str) if (len == 0) return 0; - /* descend root - start from last character of str */ + /* walk trie, start from last character of str to find matching tails */ + node_idx = 0; key = str[len - 1]; - node_idx = rules->trie_root[key]; - depth = 0; - - /* descend suffix trie */ - if (node_idx > 0) { - while (1) { - struct trie_node *node; - unsigned int child_idx; - - node = &rules->trie_nodes[node_idx]; - depth++; - off = node->value_off + node->value_len - len; - - /* match against current node */ - if (depth == len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) == 0)) - return off; - - /* lookup child node */ - key = str[len - 1 - depth]; - child_idx = node->child_idx; - while (child_idx > 0) { - if (rules->trie_childs[child_idx].key == key) - break; - child_idx = rules->trie_childs[child_idx].next_idx; - } - if (child_idx == 0) + for (depth = 0; depth <= len; depth++) { + struct trie_node *node; + unsigned int child_idx; + + node = &rules->trie_nodes[node_idx]; + off = node->value_off + node->value_len - len; + + /* match against current node */ + if (depth == len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) == 0)) + return off; + + /* lookup child node */ + key = str[len - 1 - depth]; + child_idx = node->child_idx; + while (child_idx > 0) { + if (rules->trie_nodes[child_idx].key == key) break; - node_idx = rules->trie_childs[child_idx].node_idx; + child_idx = rules->trie_nodes[child_idx].next_child_idx; } + if (child_idx == 0) + break; + node_idx = child_idx; } /* string not found, add it */ @@ -515,25 +505,6 @@ static int add_string(struct udev_rules *rules, const char *str) rules->trie_nodes_max += add; } - /* grow trie childs if needed */ - if (rules->trie_childs_cur >= rules->trie_childs_max) { - struct trie_child *childs; - unsigned int add; - - /* double the buffer size */ - add = rules->trie_childs_max; - if (add < 8) - add = 8; - - childs = realloc(rules->trie_childs, (rules->trie_childs_max + add) * sizeof(struct trie_child)); - if (childs == NULL) - return -1; - dbg(rules->udev, "extend trie childs from %u to %u\n", - rules->trie_childs_max, rules->trie_childs_max + add); - rules->trie_childs = childs; - rules->trie_childs_max += add; - } - /* get new node */ new_node_idx = rules->trie_nodes_cur; rules->trie_nodes_cur++; @@ -542,36 +513,20 @@ static int add_string(struct udev_rules *rules, const char *str) new_node->value_len = len; new_node->child_idx = 0; new_node->last_child_idx = 0; + new_node->next_child_idx = 0; + new_node->key = key; - if (depth == 0) { - /* add node to root */ - rules->trie_root[key] = new_node_idx; + /* append child link to list of childs of parent */ + parent = &rules->trie_nodes[node_idx]; + if (parent->child_idx == 0) { + parent->child_idx = new_node_idx; } else { - /* add node to parent */ - struct trie_node *parent; - struct trie_child *new_child; - unsigned int new_child_idx; - - /* get new child link for list of childs of parent */ - new_child_idx = rules->trie_childs_cur; - rules->trie_childs_cur++; - new_child = &rules->trie_childs[new_child_idx]; - new_child->next_idx = 0; - new_child->node_idx = new_node_idx; - new_child->key = key; - - /* append child link to list of childs of parent */ - parent = &rules->trie_nodes[node_idx]; - if (parent->child_idx == 0) { - parent->child_idx = new_child_idx; - } else { - struct trie_child *last_child; + struct trie_node *last_child; - last_child = &rules->trie_childs[parent->last_child_idx]; - last_child->next_idx = new_child_idx; - } - parent->last_child_idx = new_child_idx; + last_child = &rules->trie_nodes[parent->last_child_idx]; + last_child->next_child_idx = new_node_idx; } + parent->last_child_idx = new_node_idx; return off; } @@ -1766,18 +1721,10 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names) if (rules->trie_nodes == NULL) return NULL; rules->trie_nodes_max = PREALLOC_TRIE; - /* offset 0 is reserved for the null trie node */ + /* offset 0 is the trie root */ + memset(rules->trie_nodes, 0x00, sizeof(struct trie_node)); rules->trie_nodes_cur = 1; - rules->trie_childs = malloc(PREALLOC_TRIE * sizeof(struct trie_child)); - if (rules->trie_childs == NULL) - return NULL; - rules->trie_childs_max = PREALLOC_TRIE; - /* offset 0 is reserved for the null child node */ - rules->trie_childs_cur = 1; - - rules->trie_root = calloc(UCHAR_MAX + 1, sizeof(unsigned short)); - if (udev_get_rules_path(udev) != NULL) { /* custom rules location for testing */ add_matching_files(udev, &file_list, udev_get_rules_path(udev), ".rules"); @@ -1890,22 +1837,15 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names) } info(udev, "rules use %zu bytes tokens (%u * %zu bytes), %zu bytes buffer\n", rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->buf_max); - info(udev, "temporary index used %zu bytes (%u * %zu bytes nodes, %u * %zu bytes child links)\n", - rules->trie_nodes_cur * sizeof(struct trie_node) + rules->trie_childs_cur * sizeof(struct trie_child), - rules->trie_nodes_cur, sizeof(struct trie_node), - rules->trie_childs_cur, sizeof(struct trie_child)); + info(udev, "temporary index used %zu bytes (%u * %zu bytes)\n", + rules->trie_nodes_cur * sizeof(struct trie_node), + rules->trie_nodes_cur, sizeof(struct trie_node)); /* cleanup trie */ free(rules->trie_nodes); rules->trie_nodes = NULL; rules->trie_nodes_cur = 0; rules->trie_nodes_max = 0; - free(rules->trie_childs); - rules->trie_childs = NULL; - rules->trie_childs_cur = 0; - rules->trie_childs_max = 0; - free(rules->trie_root); - rules->trie_root = NULL; /* cleanup uid/gid cache */ free(rules->uids); @@ -1928,8 +1868,6 @@ void udev_rules_unref(struct udev_rules *rules) free(rules->tokens); free(rules->buf); free(rules->trie_nodes); - free(rules->trie_childs); - free(rules->trie_root); free(rules->uids); free(rules->gids); free(rules); -- To unsubscribe from this list: send the line "unsubscribe linux-hotplug" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html