Re: [PATCH] udevd: de-duplicate strings in rules

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2008-11-12 at 20:48 +0000, Alan Jenkins wrote:
> Kay Sievers wrote:
> > On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote:   
> >> Kay Sievers wrote:
> >>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote:
> >>>> Kay Sievers wrote:
> >>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@xxxxxxxx> wrote:
> >>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@xxxxxxxxxxxxxx> wrote:
> >>>>>>
> >>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
> >>>>>>> using a temporary index of about 25k.
> >>>>>>>               
> >>>>>> Great. That looks nice.
> >>>>>>
> >>>>>> Thats's the diff of the rule dump before and after the patch:
> >>>>>>  ...
> >>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
> >>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
> >>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
> >>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
> >>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
> >>>>>>
> >>>>>>             
> >>>>> I split the nodes and the childs in two independent arrays, so we got
> >>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
> >>>>> with the huge rules set here. Unlimited childs in the index removes
> >>>>> another 3 kB of duplicates, and the temporary index seems also a bit
> >>>>> smaller:
> >>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
> >>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
> >>>>> child links)
> >>>>>
> >>>>> Would be great, if you can check if it still works for you as expected. :)
> >>>>>
> >>>>>           
> >>>> Did you have a particular reason to keep the trie_root array?  Now
> >>>> there's no fixed limit on children, you could just use trie[1] as the
> >>>> root node.  Remove the special case for depth == 0.  And initialize it's
> >>>> value and length to 0, then you can remove the special case for len == 0.
> >>>>
> >>>>         
> >>> No special reason, I thought about that too, but it was already 5am,
> >>> and I was unable to think it through. :)
> >>>
> >>> Sounds nice to do that, did you try already, have a patch?
> >>>
> >>>       
> >> No, sorry :).
> >>     
> >
> > Ah, now by looking at it, maybe the then needed linear search for the
> > key in the root is not as good as the plain root array index?
> >   
> Mmm.  Ok, without the root array add_string() takes twice as long, which 
> increases the total rules-loading time by 10%.  (user time measured by 
> cachegrind).  Let's leave it.

Hmm, now I liked the idea. :)

How about this? It has only a single array again, and no root, and no
child limits. Seems to work fine, but, it looks somehow too simple
now. :)

  rules use 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
  temporary index used 21520 bytes (1076 * 20 bytes)


diff --git a/udev/udev-rules.c b/udev/udev-rules.c
index 1f28e4f..80c61b5 100644
--- a/udev/udev-rules.c
+++ b/udev/udev-rules.c
@@ -41,17 +41,16 @@ struct uid_gid {
 	};
 };
 
-struct trie_child {
-	unsigned int next_idx;
-	unsigned int node_idx;
-	unsigned char key;
-};
-
 struct trie_node {
+	/* this nodes child list */
 	unsigned int child_idx;
+	/* the next child of our parent node's child list */
+	unsigned int next_child_idx;
+	/* this nodes last child (shortcut) */
 	unsigned int last_child_idx;
 	unsigned int value_off;
 	unsigned short value_len;
+	unsigned char key;
 };
 
 struct udev_rules {
@@ -70,13 +69,9 @@ struct udev_rules {
 	unsigned int buf_count;
 
 	/* during rule parsing, strings are indexed to find duplicates */
-	unsigned int *trie_root;
 	struct trie_node *trie_nodes;
 	unsigned int trie_nodes_cur;
 	unsigned int trie_nodes_max;
-	struct trie_child *trie_childs;
-	unsigned int trie_childs_cur;
-	unsigned int trie_childs_max;
 
 	/* during rule parsing, uid/gid lookup results are cached */
 	struct uid_gid *uids;
@@ -453,6 +448,7 @@ static int add_string(struct udev_rules *rules, const char *str)
 	unsigned short len;
 	unsigned int depth;
 	unsigned int off;
+	struct trie_node *parent;
 
 	len = strlen(str);
 
@@ -460,37 +456,31 @@ static int add_string(struct udev_rules *rules, const char *str)
 	if (len == 0)
 		return 0;
 
-	/* descend root - start from last character of str */
+	/* walk trie, start from last character of str to find matching tails */
+	node_idx = 0;
 	key = str[len - 1];
-	node_idx = rules->trie_root[key];
-	depth = 0;
-
-	/* descend suffix trie */
-	if (node_idx > 0) {
-		while (1) {
-			struct trie_node *node;
-			unsigned int child_idx;
-
-			node = &rules->trie_nodes[node_idx];
-			depth++;
-			off = node->value_off + node->value_len - len;
-
-			/* match against current node */
-			if (depth == len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) == 0))
-				return off;
-
-			/* lookup child node */
-			key = str[len - 1 - depth];
-			child_idx = node->child_idx;
-			while (child_idx > 0) {
-				if (rules->trie_childs[child_idx].key == key)
-					break;
-				child_idx = rules->trie_childs[child_idx].next_idx;
-			}
-			if (child_idx == 0)
+	for (depth = 0; depth <= len; depth++) {
+		struct trie_node *node;
+		unsigned int child_idx;
+
+		node = &rules->trie_nodes[node_idx];
+		off = node->value_off + node->value_len - len;
+
+		/* match against current node */
+		if (depth == len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) == 0))
+			return off;
+
+		/* lookup child node */
+		key = str[len - 1 - depth];
+		child_idx = node->child_idx;
+		while (child_idx > 0) {
+			if (rules->trie_nodes[child_idx].key == key)
 				break;
-			node_idx = rules->trie_childs[child_idx].node_idx;
+			child_idx = rules->trie_nodes[child_idx].next_child_idx;
 		}
+		if (child_idx == 0)
+			break;
+		node_idx = child_idx;
 	}
 
 	/* string not found, add it */
@@ -515,25 +505,6 @@ static int add_string(struct udev_rules *rules, const char *str)
 		rules->trie_nodes_max += add;
 	}
 
-	/* grow trie childs if needed */
-	if (rules->trie_childs_cur >= rules->trie_childs_max) {
-		struct trie_child *childs;
-		unsigned int add;
-
-		/* double the buffer size */
-		add = rules->trie_childs_max;
-		if (add < 8)
-			add = 8;
-
-		childs = realloc(rules->trie_childs, (rules->trie_childs_max + add) * sizeof(struct trie_child));
-		if (childs == NULL)
-			return -1;
-		dbg(rules->udev, "extend trie childs from %u to %u\n",
-		    rules->trie_childs_max, rules->trie_childs_max + add);
-		rules->trie_childs = childs;
-		rules->trie_childs_max += add;
-	}
-
 	/* get new node */
 	new_node_idx = rules->trie_nodes_cur;
 	rules->trie_nodes_cur++;
@@ -542,36 +513,20 @@ static int add_string(struct udev_rules *rules, const char *str)
 	new_node->value_len = len;
 	new_node->child_idx = 0;
 	new_node->last_child_idx = 0;
+	new_node->next_child_idx = 0;
+	new_node->key = key;
 
-	if (depth == 0) {
-		/* add node to root */
-		rules->trie_root[key] = new_node_idx;
+	/* append child link to list of childs of parent */
+	parent = &rules->trie_nodes[node_idx];
+	if (parent->child_idx == 0) {
+		parent->child_idx = new_node_idx;
 	} else {
-		/* add node to parent */
-		struct trie_node *parent;
-		struct trie_child *new_child;
-		unsigned int new_child_idx;
-
-		/* get new child link for list of childs of parent */
-		new_child_idx = rules->trie_childs_cur;
-		rules->trie_childs_cur++;
-		new_child = &rules->trie_childs[new_child_idx];
-		new_child->next_idx = 0;
-		new_child->node_idx = new_node_idx;
-		new_child->key = key;
-
-		/* append child link to list of childs of parent */
-		parent = &rules->trie_nodes[node_idx];
-		if (parent->child_idx == 0) {
-			parent->child_idx = new_child_idx;
-		} else {
-			struct trie_child *last_child;
+		struct trie_node *last_child;
 
-			last_child = &rules->trie_childs[parent->last_child_idx];
-			last_child->next_idx = new_child_idx;
-		}
-		parent->last_child_idx = new_child_idx;
+		last_child = &rules->trie_nodes[parent->last_child_idx];
+		last_child->next_child_idx = new_node_idx;
 	}
+	parent->last_child_idx = new_node_idx;
 	return off;
 }
 
@@ -1766,18 +1721,10 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	if (rules->trie_nodes == NULL)
 		return NULL;
 	rules->trie_nodes_max = PREALLOC_TRIE;
-	/* offset 0 is reserved for the null trie node */
+	/* offset 0 is the trie root */
+	memset(rules->trie_nodes, 0x00, sizeof(struct trie_node));
 	rules->trie_nodes_cur = 1;
 
-	rules->trie_childs = malloc(PREALLOC_TRIE * sizeof(struct trie_child));
-	if (rules->trie_childs == NULL)
-		return NULL;
-	rules->trie_childs_max = PREALLOC_TRIE;
-	/* offset 0 is reserved for the null child node */
-	rules->trie_childs_cur = 1;
-
-	rules->trie_root = calloc(UCHAR_MAX + 1, sizeof(unsigned short));
-
 	if (udev_get_rules_path(udev) != NULL) {
 		/* custom rules location for testing */
 		add_matching_files(udev, &file_list, udev_get_rules_path(udev), ".rules");
@@ -1890,22 +1837,15 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	}
 	info(udev, "rules use %zu bytes tokens (%u * %zu bytes), %zu bytes buffer\n",
 	     rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->buf_max);
-	info(udev, "temporary index used %zu bytes (%u * %zu bytes nodes, %u * %zu bytes child links)\n",
-	     rules->trie_nodes_cur * sizeof(struct trie_node) + rules->trie_childs_cur * sizeof(struct trie_child),
-	     rules->trie_nodes_cur, sizeof(struct trie_node),
-	     rules->trie_childs_cur, sizeof(struct trie_child));
+	info(udev, "temporary index used %zu bytes (%u * %zu bytes)\n",
+	     rules->trie_nodes_cur * sizeof(struct trie_node),
+	     rules->trie_nodes_cur, sizeof(struct trie_node));
 
 	/* cleanup trie */
 	free(rules->trie_nodes);
 	rules->trie_nodes = NULL;
 	rules->trie_nodes_cur = 0;
 	rules->trie_nodes_max = 0;
-	free(rules->trie_childs);
-	rules->trie_childs = NULL;
-	rules->trie_childs_cur = 0;
-	rules->trie_childs_max = 0;
-	free(rules->trie_root);
-	rules->trie_root = NULL;
 
 	/* cleanup uid/gid cache */
 	free(rules->uids);
@@ -1928,8 +1868,6 @@ void udev_rules_unref(struct udev_rules *rules)
 	free(rules->tokens);
 	free(rules->buf);
 	free(rules->trie_nodes);
-	free(rules->trie_childs);
-	free(rules->trie_root);
 	free(rules->uids);
 	free(rules->gids);
 	free(rules);



--
To unsubscribe from this list: send the line "unsubscribe linux-hotplug" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Linux DVB]     [Asterisk Internet PBX]     [DCCP]     [Netdev]     [X.org]     [Util Linux NG]     [Fedora Women]     [ALSA Devel]     [Linux USB]

  Powered by Linux