Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> writes: > @@ -1749,16 +1750,16 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, > * number of bytes to be stripped from the end of the previous name, > * and the bytes to append to the result, to come up with its name. > */ > - int expand_name_field = version == 4; > + int expand_name_field = version >= 4; The code can be lazy like this, insteasd of being more descriptive to say "version 4 or 5", because we won't accept version 6 or later anyway. Which is OK, I guess. > if (flags & CE_EXTENDED) { > - struct ondisk_cache_entry_extended *ondisk2; > + const struct ondisk_cache_entry_extended *ondisk2 = mmap; > int extended_flags; > - ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; > + > extended_flags = get_be16(&ondisk2->flags2) << 16; > /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ > if (extended_flags & ~CE_EXTENDED_FLAGS) This part may be a good clean-up regardless. > @@ -1820,6 +1821,113 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, > return ce; > } > > +enum same_value_bit { > + DELTA_FORMAT = 1 << 0, > + SAME_CTIME = 1 << 1, /* only covers sec, not nsec */ > + SAME_MTIME = 1 << 2, /* only covers sec, not nsec */ > + SAME_DEV = 1 << 3, > + SAME_INO = 1 << 4, > + SAME_MODE = 1 << 5, > + SAME_UID = 1 << 6, > + SAME_GID = 1 << 7, > + SAME_FLAGS = 1 << 7 > +}; Hmph, really? > +static struct cache_entry no_previous_ce; > + > +static uintmax_t decode_varoffset(const unsigned char **bufp, uintmax_t prev) > +{ > + uintmax_t val = decode_varint(bufp); You'd need to make sure (1) !val, which indicates an overflow of the varint, and (2) bufp after decoding haven't over-read the mmapped index file. We may want to improve decode_varint() API so that we can detect truncated data (i.e. (2)) more reliably without first reading too much. Loose error checking like these would make good targets for fuzz tests, I suspect. > + return val & 1 ? prev - (val >> 1) : prev + (val >> 1); > +} So, the LSB is used for sign, and the magnitude is shifted by one? OK. > +static uintmax_t decode_varoffset_same(const unsigned char **bufp, uintmax_t prev, > + int same_flag) > +{ > + return same_flag ? prev : decode_varoffset(bufp, prev); > +} > + > +static uintmax_t decode_varint_same(const unsigned char **bufp, uintmax_t prev, > + int same_flag) > +{ > + return same_flag ? prev : decode_varint(bufp); > +} Likewise about two error conditions. > @@ -1967,12 +2075,18 @@ static unsigned long load_cache_entry_block(struct index_state *istate, > unsigned long src_offset = start_offset; > > for (i = offset; i < offset + nr; i++) { > - struct ondisk_cache_entry *disk_ce; > struct cache_entry *ce; > unsigned long consumed; > > - disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); > - ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, &consumed, previous_ce); > + if (istate->version <= 4) > + ce = create_from_disk(ce_mem_pool, istate->version, > + mmap + src_offset, &consumed, > + previous_ce); > + else > + ce = create_from_disk_v5(ce_mem_pool, > + mmap + src_offset, > + &consumed, > + previous_ce); This goes directly against the spirit of "create_from_disk()" internal API, doesn't it? It takes istate->version because it is an implementation detail of that function how bytes at &mmap[src_offset] are consumed, possibly using previous_ce information. IOW, I think the version dependent switch should go inside that function and not in this loop. > +static int ce_write_varint(git_hash_ctx *c, int fd, uintmax_t value) > +{ > + unsigned char varint[16]; We may want to do something about these "16". > +static int ce_write_varoffset(git_hash_ctx *c, int fd, uintmax_t next, uintmax_t prev) > +{ > + unsigned char varint[16];