Re: [PATCH 23/23] initial pack index v3 support on the read side

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 27, 2013 at 11:26 AM, Nicolas Pitre <nico@xxxxxxxxxxx> wrote:
> A bit crud but good enough for now.

I wonder if we should keep a short SHA-1 table in .idx. An entry in
the original .idx v1 table is <SHA-1>+<offset> then offset moved out
to make the table more compact for binary search. I observe that we
don't always need 20 byte SHA-1 to uniquely identify an entry in a
pack, so the SHA-1 table could be split in two: one table contain the
first part of SHA-1, long enough to identify any entry in the pack;
the second table contains either full SHA-1 or the remaining part.
Binary search is done on the first table, if matched, full sha-1 from
the second table is compared. We already have the second sha-1 table
in .pack v4. We could add the first table in .idx v3.

On linux-2.6 even in one-pack configuration, we only need the first 6
bytes of sha-1 to identify an object. That would cut the bsearch sha-1
table to 1/4 full sha-1 table size.

>
> Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxxx>
> ---
>  cache.h     |  1 +
>  sha1_file.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 52 insertions(+), 7 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index b6634c4..63066a1 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1018,6 +1018,7 @@ extern struct packed_git {
>         off_t pack_size;
>         const void *index_data;
>         size_t index_size;
> +       const unsigned char *sha1_table;
>         uint32_t num_objects;
>         uint32_t num_bad_objects;
>         unsigned char *bad_object_sha1;
> diff --git a/sha1_file.c b/sha1_file.c
> index c2020d0..e9c54f4 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -504,7 +504,7 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
>         hdr = idx_map;
>         if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
>                 version = ntohl(hdr->idx_version);
> -               if (version < 2 || version > 2) {
> +               if (version < 2 || version > 3) {
>                         munmap(idx_map, idx_size);
>                         return error("index file %s is version %"PRIu32
>                                      " and is not supported by this binary"
> @@ -539,12 +539,13 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
>                         munmap(idx_map, idx_size);
>                         return error("wrong index v1 file size in %s", path);
>                 }
> -       } else if (version == 2) {
> +       } else if (version == 2 || version == 3) {
> +               unsigned long min_size, max_size;
>                 /*
>                  * Minimum size:
>                  *  - 8 bytes of header
>                  *  - 256 index entries 4 bytes each
> -                *  - 20-byte sha1 entry * nr
> +                *  - 20-byte sha1 entry * nr (version 2 only)
>                  *  - 4-byte crc entry * nr
>                  *  - 4-byte offset entry * nr
>                  *  - 20-byte SHA1 of the packfile
> @@ -553,8 +554,10 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
>                  * variable sized table containing 8-byte entries
>                  * for offsets larger than 2^31.
>                  */
> -               unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
> -               unsigned long max_size = min_size;
> +               min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
> +               if (version != 2)
> +                       min_size -= nr*20;
> +               max_size = min_size;
>                 if (nr)
>                         max_size += (nr - 1)*8;
>                 if (idx_size < min_size || idx_size > max_size) {
> @@ -573,6 +576,36 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
>                 }
>         }
>
> +       if (version >= 3) {
> +               /* the SHA1 table is located in the main pack file */
> +               void *pack_map;
> +               struct pack_header *pack_hdr;
> +
> +               fd = git_open_noatime(p->pack_name);
> +               if (fd < 0) {
> +                       munmap(idx_map, idx_size);
> +                       return error("unable to open %s", p->pack_name);
> +               }
> +               if (fstat(fd, &st) != 0 || xsize_t(st.st_size) < 12 + nr*20) {
> +                       close(fd);
> +                       munmap(idx_map, idx_size);
> +                       return error("size of %s is wrong", p->pack_name);
> +               }
> +               pack_map = xmmap(NULL, 12 + nr*20, PROT_READ, MAP_PRIVATE, fd, 0);
> +               close(fd);
> +               pack_hdr = pack_map;
> +               if (pack_hdr->hdr_signature != htonl(PACK_SIGNATURE) ||
> +                   pack_hdr->hdr_version != htonl(4) ||
> +                   pack_hdr->hdr_entries != htonl(nr)) {
> +                       munmap(idx_map, idx_size);
> +                       munmap(pack_map, 12 + nr*20);
> +                       return error("packfile for %s doesn't match expectations", path);
> +               }
> +               p->sha1_table = pack_map;
> +               p->sha1_table += 12;
> +       } else
> +               p->sha1_table = NULL;
> +
>         p->index_version = version;
>         p->index_data = idx_map;
>         p->index_size = idx_size;
> @@ -697,6 +730,10 @@ void close_pack_index(struct packed_git *p)
>                 munmap((void *)p->index_data, p->index_size);
>                 p->index_data = NULL;
>         }
> +       if (p->sha1_table) {
> +               munmap((void *)(p->sha1_table - 12), 12 + p->num_objects * 20);
> +               p->sha1_table = NULL;
> +       }
>  }
>
>  /*
> @@ -808,7 +845,7 @@ static int open_packed_git_1(struct packed_git *p)
>                 return error("file %s is far too short to be a packfile", p->pack_name);
>         if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
>                 return error("file %s is not a GIT packfile", p->pack_name);
> -       if (!pack_version_ok(hdr.hdr_version))
> +       if (!pack_version_ok(hdr.hdr_version) && hdr.hdr_version != htonl(4))
>                 return error("packfile %s is version %"PRIu32" and not"
>                         " supported (try upgrading GIT to a newer version)",
>                         p->pack_name, ntohl(hdr.hdr_version));
> @@ -2226,9 +2263,12 @@ const unsigned char *nth_packed_object_sha1(struct packed_git *p,
>         index += 4 * 256;
>         if (p->index_version == 1) {
>                 return index + 24 * n + 4;
> -       } else {
> +       } else if (p->index_version == 2) {
>                 index += 8;
>                 return index + 20 * n;
> +       } else {
> +               index = p->sha1_table;
> +               return index + 20 * n;
>         }
>  }
>
> @@ -2241,6 +2281,8 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
>         } else {
>                 uint32_t off;
>                 index += 8 + p->num_objects * (20 + 4);
> +               if (p->index_version != 2)
> +                       index -= p->num_objects * 20;
>                 off = ntohl(*((uint32_t *)(index + 4 * n)));
>                 if (!(off & 0x80000000))
>                         return off;
> @@ -2281,6 +2323,8 @@ off_t find_pack_entry_one(const unsigned char *sha1,
>                 stride = 24;
>                 index += 4;
>         }
> +       if (p->index_version > 2)
> +               index = p->sha1_table;
>
>         if (debug_lookup)
>                 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
> --
> 1.8.4.22.g54757b7
>
> --
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]