For the sake of documentation, I worked on this patch but I don't there was a measurable improvement (hard to tell with variable network conditions) and it increased memory usage to around 380M. I wanted to reduce the list scanning in fill_active_slot() by deleting during iteration, but I'm not sure it helps since the loop in that is nowhere near as bad as the prefetch() insertion loop fixed in 3/3. list_for_each in fetch_object() also tends hit the first object in the list when iterating, so there's no improvement I see with this patch. -----8<------ Subject: [PATCH] http-walker: use hashmap to reduce list scan We can reduce list walking in fill_active_slot by deleting items as we walk through the object request queue of pending objects. However, we still need to maintain a mapping of live objects for fetch_object, so introduce the use of a hashmap to keep track of all live object requests in O(1) average time. Signed-off-by: Eric Wong <e@xxxxxxxxx> --- http-walker.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/http-walker.c b/http-walker.c index 0b24255..8d27707 100644 --- a/http-walker.c +++ b/http-walker.c @@ -3,6 +3,7 @@ #include "walker.h" #include "http.h" #include "list.h" +#include "hashmap.h" struct alt_base { char *base; @@ -19,6 +20,7 @@ enum object_request_state { }; struct object_request { + struct hashmap_entry ent; struct walker *walker; unsigned char sha1[20]; struct alt_base *repo; @@ -43,6 +45,7 @@ struct walker_data { }; static LIST_HEAD(object_queue_head); +static struct hashmap *object_requests; static void fetch_alternates(struct walker *walker, const char *base); @@ -114,7 +117,11 @@ static void release_object_request(struct object_request *obj_req) if (obj_req->req !=NULL && obj_req->req->localfile != -1) error("fd leakage in release: %d", obj_req->req->localfile); - list_del(&obj_req->node); + /* XXX seems unnecessary with list_del in fill_active_slot */ + if (!list_empty(&obj_req->node)) + list_del(&obj_req->node); + + hashmap_remove(object_requests, obj_req, obj_req->sha1); free(obj_req); } @@ -127,6 +134,8 @@ static int fill_active_slot(struct walker *walker) list_for_each_safe(pos, tmp, head) { obj_req = list_entry(pos, struct object_request, node); if (obj_req->state == WAITING) { + /* _init so future list_del is idempotent */ + list_del_init(pos); if (has_sha1_file(obj_req->sha1)) obj_req->state = COMPLETE; else { @@ -145,6 +154,8 @@ static void prefetch(struct walker *walker, unsigned char *sha1) struct walker_data *data = walker->data; newreq = xmalloc(sizeof(*newreq)); + hashmap_entry_init(&newreq->ent, sha1hash(sha1)); + hashmap_add(object_requests, &newreq->ent); newreq->walker = walker; hashcpy(newreq->sha1, sha1); newreq->repo = data->alt; @@ -435,15 +446,12 @@ static int fetch_object(struct walker *walker, unsigned char *sha1) { char *hex = sha1_to_hex(sha1); int ret = 0; - struct object_request *obj_req = NULL; + struct object_request *obj_req; + struct hashmap_entry key; struct http_object_request *req; - struct list_head *pos, *head = &object_queue_head; - list_for_each(pos, head) { - obj_req = list_entry(pos, struct object_request, node); - if (!hashcmp(obj_req->sha1, sha1)) - break; - } + hashmap_entry_init(&key, sha1hash(sha1)); + obj_req = hashmap_get(object_requests, &key, sha1); if (obj_req == NULL) return error("Couldn't find request for %s in the queue", hex); @@ -553,6 +561,12 @@ static void cleanup(struct walker *walker) } } +static int obj_req_cmp(const struct object_request *e1, + const struct object_request *e2, const unsigned char *sha1) +{ + return hashcmp(e1->sha1, sha1 ? sha1 : e2->sha1); +} + struct walker *get_http_walker(const char *url) { char *s; @@ -580,5 +594,10 @@ struct walker *get_http_walker(const char *url) add_fill_function(walker, (int (*)(void *)) fill_active_slot); #endif + if (!object_requests) { + object_requests = xmalloc(sizeof(*object_requests)); + hashmap_init(object_requests, (hashmap_cmp_fn)obj_req_cmp, 0); + } + return walker; } -- EW -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html