In repository having large number of refs, lstat for non-existing loose objects makes `git fetch` slow. This patch stores existing loose objects in hashmap beforehand and use it to check existence instead of using lstat. With this patch, the number of lstat calls in `git fetch` is reduced from 411412 to 13794 for chromium repository. I took time stat of `git fetch` disabling quickfetch for chromium repository 3 time on linux with SSD. * with this patch 8.105s 8.309s 7.640s avg: 8.018s * master 12.287s 11.175s 12.227s avg: 11.896s On my MacBook Air which has slower lstat. * with this patch 14.501s * master 1m16.027s `git fetch` on slow disk will be improved largely. Signed-off-by: Takuto Ikuta <tikuta@xxxxxxxxxxxx> --- cache.h | 2 ++ fetch-pack.c | 22 +++++++++++++++++++--- sha1_file.c | 3 +++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index d06932ed0..db38db40e 100644 --- a/cache.h +++ b/cache.h @@ -1773,6 +1773,8 @@ struct object_info { #define OBJECT_INFO_SKIP_CACHED 4 /* Do not retry packed storage after checking packed and loose storage */ #define OBJECT_INFO_QUICK 8 +/* Do not check loose object */ +#define OBJECT_INFO_SKIP_LOOSE 16 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); /* diff --git a/fetch-pack.c b/fetch-pack.c index d97461296..1658487f7 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -711,6 +711,15 @@ static void mark_alternate_complete(struct object *obj) mark_complete(&obj->oid); } +static int add_loose_objects_to_set(const struct object_id *oid, + const char *path, + void *data) +{ + struct oidset* set = (struct oidset*)(data); + oidset_insert(set, oid); + return 0; +} + static int everything_local(struct fetch_pack_args *args, struct ref **refs, struct ref **sought, int nr_sought) @@ -719,16 +728,21 @@ static int everything_local(struct fetch_pack_args *args, int retval; int old_save_commit_buffer = save_commit_buffer; timestamp_t cutoff = 0; + struct oidset loose_oid_set = OIDSET_INIT; + + for_each_loose_object(add_loose_objects_to_set, &loose_oid_set, 0); save_commit_buffer = 0; for (ref = *refs; ref; ref = ref->next) { struct object *o; + unsigned int flag = OBJECT_INFO_QUICK; - if (!has_object_file_with_flags(&ref->old_oid, - OBJECT_INFO_QUICK)) - continue; + if (!oidset_contains(&loose_oid_set, &ref->old_oid)) + flag |= OBJECT_INFO_SKIP_LOOSE; + if (!has_object_file_with_flags(&ref->old_oid, flag)) + continue; o = parse_object(&ref->old_oid); if (!o) continue; @@ -744,6 +758,8 @@ static int everything_local(struct fetch_pack_args *args, } } + oidset_clear(&loose_oid_set); + if (!args->no_dependents) { if (!args->deepen) { for_each_ref(mark_complete_oid, NULL); diff --git a/sha1_file.c b/sha1_file.c index 1b94f39c4..c903cbcec 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1262,6 +1262,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, if (find_pack_entry(real, &e)) break; + if (flags & OBJECT_INFO_SKIP_LOOSE) + return -1; + /* Most likely it's a loose object. */ if (!sha1_loose_object_info(real, oi, flags)) return 0; -- 2.16.2