$ time ./git rev-list HEAD --count 44271 print_time_spent_object: measure time needed for resolving hash collisions This shows that we roughly spent a third of the time in resolving hash collisions: I am using linux.git to measure: $ time git rev-list --objects --count HEAD >/dev/null print_time_spent_resolving_hash_collisions 9:445591664 real 0m31.733s user 0m31.220s sys 0m0.463s For fun I reverted 9a414486d9f0: $ time git rev-list --objects --count HEAD >/dev/null print_time_spent_resolving_hash_collisions 14:338605504 real 0m37.008s user 0m36.524s sys 0m0.423s The 17% that Jeff measured in there, equals to 1-31.733/37.008 = 0.14 in these measurements. The time spent in collision resolving went down by 1/3 on itself (14.33s to 9.44s), so there is still room for improvement. Signed-off-by: Stefan Beller <sbeller@xxxxxxxxxx> --- According to Jeff, sending patches that don't get accepted is the new hype! builtin/rev-list.c | 2 ++ object.c | 36 ++++++++++++++++++++++++++++++++++++ object.h | 2 ++ 3 files changed, 40 insertions(+) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 8479f6e..d0e5922 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -412,5 +412,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) printf("%d\n", revs.count_left + revs.count_right); } + print_time_spent_resolving_hash_collisions(); + return 0; } diff --git a/object.c b/object.c index 67d9a9e..e9e73e0 100644 --- a/object.c +++ b/object.c @@ -5,9 +5,40 @@ #include "commit.h" #include "tag.h" +#include <time.h> + static struct object **obj_hash; static int nr_objs, obj_hash_size; +struct timespec caching = {0, 0}; + +void diff(struct timespec *start, struct timespec *end, struct timespec *dst) +{ + if ((end->tv_nsec-start->tv_nsec)<0) { + dst->tv_sec = end->tv_sec-start->tv_sec-1; + dst->tv_nsec = 1000000000+end->tv_nsec-start->tv_nsec; + } else { + dst->tv_sec = end->tv_sec-start->tv_sec; + dst->tv_nsec = end->tv_nsec-start->tv_nsec; + } +} + +void add_time_to(struct timespec *dst, struct timespec *addend) +{ + dst->tv_sec += addend->tv_sec; + dst->tv_nsec += addend->tv_nsec; + while (dst->tv_nsec > 1000000000) { + dst->tv_nsec -= 1000000000; + dst->tv_sec ++; + } +} + +void print_time_spent_resolving_hash_collisions(void) +{ + fprintf(stderr, "print_time_spent_resolving_hash_collisions %ld:%9ld\n", + (long)caching.tv_sec, (long)caching.tv_nsec); +} + unsigned int get_max_object_index(void) { return obj_hash_size; @@ -86,11 +117,13 @@ struct object *lookup_object(const unsigned char *sha1) { unsigned int i, first; struct object *obj; + struct timespec time1, time2, t_diff; if (!obj_hash) return NULL; first = i = hash_obj(sha1, obj_hash_size); + clock_gettime(CLOCK_MONOTONIC, &time1); while ((obj = obj_hash[i]) != NULL) { if (!hashcmp(sha1, obj->oid.hash)) break; @@ -98,6 +131,9 @@ struct object *lookup_object(const unsigned char *sha1) if (i == obj_hash_size) i = 0; } + clock_gettime(CLOCK_MONOTONIC, &time2); + diff(&time1, &time2, &t_diff); + add_time_to(&caching, &t_diff); if (obj && i != first) { /* * Move object to where we started to look for it so diff --git a/object.h b/object.h index f8b6442..ee6ab3a 100644 --- a/object.h +++ b/object.h @@ -56,6 +56,8 @@ extern const char *typename(unsigned int type); extern int type_from_string_gently(const char *str, ssize_t, int gentle); #define type_from_string(str) type_from_string_gently(str, -1, 0) +void print_time_spent_resolving_hash_collisions(void); + /* * Return the current number of buckets in the object hashmap. */ -- 2.10.0.130.gef2bcd7.dirty