[REJECT 4/3] http-walker: use hashmap to reduce list scan

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For the sake of documentation, I worked on this patch but I
don't there was a measurable improvement (hard to tell with
variable network conditions) and it increased memory usage to
around 380M.

I wanted to reduce the list scanning in fill_active_slot() by
deleting during iteration, but I'm not sure it helps since the
loop in that is nowhere near as bad as the prefetch() insertion
loop fixed in 3/3.

list_for_each in fetch_object() also tends hit the first object
in the list when iterating, so there's no improvement I see
with this patch.

-----8<------
Subject: [PATCH] http-walker: use hashmap to reduce list scan

We can reduce list walking in fill_active_slot by deleting items
as we walk through the object request queue of pending objects.

However, we still need to maintain a mapping of live objects
for fetch_object, so introduce the use of a hashmap to keep
track of all live object requests in O(1) average time.

Signed-off-by: Eric Wong <e@xxxxxxxxx>
---
 http-walker.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index 0b24255..8d27707 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -3,6 +3,7 @@
 #include "walker.h"
 #include "http.h"
 #include "list.h"
+#include "hashmap.h"
 
 struct alt_base {
 	char *base;
@@ -19,6 +20,7 @@ enum object_request_state {
 };
 
 struct object_request {
+	struct hashmap_entry ent;
 	struct walker *walker;
 	unsigned char sha1[20];
 	struct alt_base *repo;
@@ -43,6 +45,7 @@ struct walker_data {
 };
 
 static LIST_HEAD(object_queue_head);
+static struct hashmap *object_requests;
 
 static void fetch_alternates(struct walker *walker, const char *base);
 
@@ -114,7 +117,11 @@ static void release_object_request(struct object_request *obj_req)
 	if (obj_req->req !=NULL && obj_req->req->localfile != -1)
 		error("fd leakage in release: %d", obj_req->req->localfile);
 
-	list_del(&obj_req->node);
+	/* XXX seems unnecessary with list_del in fill_active_slot */
+	if (!list_empty(&obj_req->node))
+		list_del(&obj_req->node);
+
+	hashmap_remove(object_requests, obj_req, obj_req->sha1);
 	free(obj_req);
 }
 
@@ -127,6 +134,8 @@ static int fill_active_slot(struct walker *walker)
 	list_for_each_safe(pos, tmp, head) {
 		obj_req = list_entry(pos, struct object_request, node);
 		if (obj_req->state == WAITING) {
+			/* _init so future list_del is idempotent */
+			list_del_init(pos);
 			if (has_sha1_file(obj_req->sha1))
 				obj_req->state = COMPLETE;
 			else {
@@ -145,6 +154,8 @@ static void prefetch(struct walker *walker, unsigned char *sha1)
 	struct walker_data *data = walker->data;
 
 	newreq = xmalloc(sizeof(*newreq));
+	hashmap_entry_init(&newreq->ent, sha1hash(sha1));
+	hashmap_add(object_requests, &newreq->ent);
 	newreq->walker = walker;
 	hashcpy(newreq->sha1, sha1);
 	newreq->repo = data->alt;
@@ -435,15 +446,12 @@ static int fetch_object(struct walker *walker, unsigned char *sha1)
 {
 	char *hex = sha1_to_hex(sha1);
 	int ret = 0;
-	struct object_request *obj_req = NULL;
+	struct object_request *obj_req;
+	struct hashmap_entry key;
 	struct http_object_request *req;
-	struct list_head *pos, *head = &object_queue_head;
 
-	list_for_each(pos, head) {
-		obj_req = list_entry(pos, struct object_request, node);
-		if (!hashcmp(obj_req->sha1, sha1))
-			break;
-	}
+	hashmap_entry_init(&key, sha1hash(sha1));
+	obj_req = hashmap_get(object_requests, &key, sha1);
 	if (obj_req == NULL)
 		return error("Couldn't find request for %s in the queue", hex);
 
@@ -553,6 +561,12 @@ static void cleanup(struct walker *walker)
 	}
 }
 
+static int obj_req_cmp(const struct object_request *e1,
+		const struct object_request *e2, const unsigned char *sha1)
+{
+	return hashcmp(e1->sha1, sha1 ? sha1 : e2->sha1);
+}
+
 struct walker *get_http_walker(const char *url)
 {
 	char *s;
@@ -580,5 +594,10 @@ struct walker *get_http_walker(const char *url)
 	add_fill_function(walker, (int (*)(void *)) fill_active_slot);
 #endif
 
+	if (!object_requests) {
+		object_requests = xmalloc(sizeof(*object_requests));
+		hashmap_init(object_requests, (hashmap_cmp_fn)obj_req_cmp, 0);
+	}
+
 	return walker;
 }
-- 
EW
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]