"git rev-list --objects v1.8.4" time is reduced from 29s to 10s with this patch. But it is still a long way to catch up with v2: 4s. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- The problem I see with decode_entries() is that given n copy sequences, it re-reads the same base n times. 30+ copy sequences are not unusual at all with git.git. I'm thinking of adding a cache to deal with one-base trees, which is all we have now. If we know in advance what base a tree needs without parsing the tree, we could unpack from base up like we do with ref-deltas. Because in this case we know the base is always flat, we could have a more efficient decode_entries that only goes through the base once. I want to get the timing down to as close as possible to v2 before adding v4-aware interface. Pack cache is an idea being cooked for a while by Jeff. Maybe we could merge his work to pack v4 or require it when pack v4 is finally merged to 'next'. packv4-parse.c | 17 +++++++++++++++-- packv4-parse.h | 2 ++ sha1_file.c | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/packv4-parse.c b/packv4-parse.c index 5002f42..b8855b0 100644 --- a/packv4-parse.c +++ b/packv4-parse.c @@ -415,8 +415,20 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, unsigned int nb_entries; const unsigned char *src, *scp; off_t copy_objoffset = 0; + const void *cached = NULL; + unsigned long cached_size, cached_v4_size; + + if (hdr) /* we need offset point at obj header */ + cached = get_cached_v4_tree(p, offset, + &cached_size, &cached_v4_size); + + if (cached) { + src = cached; + avail = cached_v4_size; + hdr = 0; + } else + src = use_pack(p, w_curs, offset, &avail); - src = use_pack(p, w_curs, offset, &avail); scp = src; if (hdr) { @@ -452,7 +464,8 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, while (count) { unsigned int what; - if (avail < 20) { + /* fixme: need to put bach the out-of-bound check when cached == 1 */ + if (!cached && avail < 20) { src = use_pack(p, w_curs, offset, &avail); if (avail < 20) return -1; diff --git a/packv4-parse.h b/packv4-parse.h index 647b73c..f584c31 100644 --- a/packv4-parse.h +++ b/packv4-parse.h @@ -16,6 +16,8 @@ unsigned long pv4_unpack_object_header_buffer(const unsigned char *base, unsigned long *sizep); const unsigned char *get_sha1ref(struct packed_git *p, const unsigned char **bufp); +const void *get_cached_v4_tree(struct packed_git *p, off_t base_offset, + unsigned long *size, unsigned long *v4_size); void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs, off_t offset, unsigned long size); diff --git a/sha1_file.c b/sha1_file.c index b176316..82570be 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1967,6 +1967,20 @@ static int in_delta_base_cache(struct packed_git *p, off_t base_offset) return eq_delta_base_cache_entry(ent, p, base_offset); } +const void *get_cached_v4_tree(struct packed_git *p, off_t base_offset, + unsigned long *size, unsigned long *v4_size) +{ + struct delta_base_cache_entry *ent; + ent = get_delta_base_cache_entry(p, base_offset); + + if (!eq_delta_base_cache_entry(ent, p, base_offset) || + ent->type != OBJ_PV4_TREE) + return NULL; + *size = ent->size; + *v4_size = ent->v4_size; + return ent->data; +} + static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent) { ent->data = NULL; -- 1.8.2.83.gc99314b -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html