On Sun, 15 Sep 2013, Duy Nguyen wrote: > On Sat, Sep 14, 2013 at 11:22 AM, Nicolas Pitre <nico@xxxxxxxxxxx> wrote: > > The cache is currently updated by the caller. The caller may ask for a > > copy of 2 entries from a base object, but that base object may itself > > copy those objects from somewhere else in a larger chunk. > > > > Let's consider this example: > > > > tree A > > ------ > > 0 (0) copy 2 entries from tree B starting at entry 0 > > 1 (2) copy 1 entry from tree B starting at entry 3 > > > > tree B > > ------ > > 0 (0) copy 6 entries from tree C starting at entry 0 > > 1 (6) entry "foo.txt" > > 2 (7) entry "bar.txt" > > > > Right now, the code calls decode_entries() to decode 2 entries from tree > > B but those entries are part of a copy from tree C. When that call > > returns, the cache is updated as if tree B entry #2 would start at > > offset 1 but this is wrong because offset 0 in tree B covers 6 entries > > and therefore offset 1 is for entry #6. > > > > So this needs a rethink. > > I've given it some thought and see no simple/efficient way do it when > 2+ depth is involved. Ideally tree A should refer to tree C directly > for the first two entries, but in general we can't enforce that a copy > sequence must refer to non-copy sequences only. Caching flattened tree > B up until the 6th entry may help, but then there's no need to cache > offsets anymore because we could just cache tree A.. Well... for the case where tree A should refer to tree C directly, that should be optimized by packv4-create/pack-objects. But as far as my offset cache is concerned, I came around to rethink it. I managed to write decent and logical code this time. The speed-up is significant even without any tuning. Here it is: commit aa43ec18956a2c835112f086a0a59d7fbc35a021 Author: Nicolas Pitre <nico@xxxxxxxxxxx> Date: Fri Sep 13 20:56:31 2013 -0400 packv4-parse.c: add tree offset caching It is a common pattern to see a tree object copy a few entries from another tree object, possibly from a non zero offset, then provide a few entries of its own just to go back to the previous tree object to copy some more entries. Each time this happens, the tree object being copied has to be parsed from the beginning over again up to the desired offset which is rather wasteful. Let's introduce a tree offset cache to record the entry position and offset when tree parsing stops so a subsequent copy from the same tree object can be resumed without having to start from the beginning all the time. Without doing further tuning on this cache, performing "git rev-list --all --objects | wc -l" on my copy of git.git went from 14.5s down to 6.6s. Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxxx> diff --git a/packv4-parse.c b/packv4-parse.c index 38c22b0..b8af702 100644 --- a/packv4-parse.c +++ b/packv4-parse.c @@ -378,6 +378,40 @@ static int copy_canonical_tree_entries(struct packed_git *p, off_t offset, return 0; } +/* ordering is so that member alignment takes the least amount of space */ +struct pv4_tree_cache { + off_t base_offset; + off_t offset; + off_t last_copy_base; + struct packed_git *p; + unsigned int pos; + unsigned int nb_entries; +}; + +#define CACHE_SIZE 256 +static struct pv4_tree_cache pv4_tree_cache[CACHE_SIZE]; + +static struct pv4_tree_cache *get_tree_offset_cache(struct packed_git *p, off_t base_offset) +{ + struct pv4_tree_cache *c; + unsigned long hash; + + hash = (unsigned long)p + (unsigned long)base_offset; + hash += (hash >> 8) + (hash >> 16); + hash %= CACHE_SIZE; + + c = &pv4_tree_cache[hash]; + if (c->p != p || c->base_offset != base_offset) { + c->p = p; + c->base_offset = base_offset; + c->offset = 0; + c->last_copy_base = 0; + c->pos = 0; + c->nb_entries = 0; + } + return c; +} + static int tree_entry_prefix(unsigned char *buf, unsigned long size, const unsigned char *path, int path_len, unsigned mode) @@ -405,39 +439,72 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size, } static int decode_entries(struct packed_git *p, struct pack_window **w_curs, - off_t offset, unsigned int start, unsigned int count, - unsigned char **dstp, unsigned long *sizep, - int parse_hdr) + off_t obj_offset, unsigned int start, unsigned int count, + unsigned char **dstp, unsigned long *sizep) { unsigned long avail; - unsigned int nb_entries; const unsigned char *src, *scp; - off_t copy_objoffset = 0; + unsigned int curpos; + off_t offset, copy_objoffset; + struct pv4_tree_cache *c; + + c = get_tree_offset_cache(p, obj_offset); + if (count && start < c->nb_entries && start >= c->pos && + count <= c->nb_entries - start) { + offset = c->offset; + copy_objoffset = c->last_copy_base; + curpos = c->pos; + start -= curpos; + src = NULL; + avail = 0; + } else { + unsigned int nb_entries; - src = use_pack(p, w_curs, offset, &avail); - scp = src; + src = use_pack(p, w_curs, obj_offset, &avail); + scp = src; - if (parse_hdr) { /* we need to skip over the object header */ while (*scp & 128) if (++scp - src >= avail - 20) return -1; + /* is this a canonical tree object? */ - if ((*scp & 0xf) == OBJ_TREE) + if ((*scp & 0xf) == OBJ_TREE) { + offset = obj_offset + (scp - src); return copy_canonical_tree_entries(p, offset, start, count, dstp, sizep); + } + /* let's still make sure this is actually a pv4 tree */ if ((*scp++ & 0xf) != OBJ_PV4_TREE) return -1; - } - nb_entries = decode_varint(&scp); - if (scp == src || start > nb_entries || count > nb_entries - start) - return -1; - offset += scp - src; - avail -= scp - src; - src = scp; + nb_entries = decode_varint(&scp); + if (!count) + count = nb_entries; + if (!nb_entries || start > nb_entries || + count > nb_entries - start) + return -1; + + curpos = 0; + copy_objoffset = 0; + offset = obj_offset + scp - src; + avail -= scp - src; + src = scp; + + /* + * If this is a partial copy, let's (re)initialize a cache + * entry to speed things up if the remaining of this tree + * is needed in the future. + */ + if (start + count < nb_entries) { + c->offset = offset; + c->pos = 0; + c->nb_entries = nb_entries; + c->last_copy_base = 0; + } + } while (count) { unsigned int what; @@ -464,6 +531,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, else while (*scp++ & 128); start--; + curpos++; } else if (!(what & 1) && start == 0) { /* * This is an actual tree entry to recreate. @@ -485,6 +553,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, *dstp += len + 20; *sizep -= len + 20; count--; + curpos++; } else if (what & 1) { /* * Copy from another tree object. @@ -522,25 +591,34 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, nth_packed_object_offset(p, index - 1); } } - if (!copy_objoffset) - return -1; copy_count >>= 1; + if (!copy_count || !copy_objoffset) + return -1; if (start >= copy_count) { start -= copy_count; + curpos += copy_count; } else { int ret; + copy_count -= start; copy_start += start; - start = 0; - if (copy_count > count) + if (copy_count > count) { copy_count = count; - count -= copy_count; - ret = decode_entries(p, w_curs, - copy_objoffset, copy_start, copy_count, - dstp, sizep, 1); + count = 0; + scp = src; + } else { + count -= copy_count; + curpos += start + copy_count; + start = 0; + } + + ret = decode_entries(p, w_curs, copy_objoffset, + copy_start, copy_count, + dstp, sizep); if (ret) return ret; + /* force pack window readjustment */ avail = scp - src; } @@ -551,27 +629,30 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs, src = scp; } + /* + * Update the cache if we didn't run through the entire tree. + * We have to "get" it again as a recursion into decode_entries() + * could have invalidated what we obtained initially. + */ + c = get_tree_offset_cache(p, obj_offset); + if (curpos < c->nb_entries) { + c->pos = curpos; + c->offset = offset; + c->last_copy_base = copy_objoffset; + } + return 0; } void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs, - off_t offset, unsigned long size) + off_t obj_offset, unsigned long size) { - unsigned long avail; - unsigned int nb_entries; unsigned char *dst, *dcp; - const unsigned char *src, *scp; int ret; - src = use_pack(p, w_curs, offset, &avail); - scp = src; - nb_entries = decode_varint(&scp); - if (scp == src) - return NULL; - dst = xmallocz(size); dcp = dst; - ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0); + ret = decode_entries(p, w_curs, obj_offset, 0, 0, &dcp, &size); if (ret < 0 || size != 0) { free(dst); return NULL; diff --git a/packv4-parse.h b/packv4-parse.h index d674a3f..b437159 100644 --- a/packv4-parse.h +++ b/packv4-parse.h @@ -20,6 +20,6 @@ const unsigned char *get_sha1ref(struct packed_git *p, void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs, off_t offset, unsigned long size); void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs, - off_t offset, unsigned long size); + off_t obj_offset, unsigned long size); #endif diff --git a/sha1_file.c b/sha1_file.c index 038e22e..e98eb8b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2178,7 +2178,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, type -= 8; break; case OBJ_PV4_TREE: - data = pv4_get_tree(p, &w_curs, curpos, size); + data = pv4_get_tree(p, &w_curs, obj_offset, size); type -= 8; break; case OBJ_COMMIT: Nicolas -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html