Commentary: I included the pack directory of the MIDX file as a FLEX_ARRAY at the end of the midxed_git struct, similar to how the pack name appears at the end of the packed_git struct. A colleague mentioned this pattern is confusing and possibly dangerous so I should consider changing it. If there is no strong reason for this, then I will modify the struct before the v1 patch to use a char*. -- >8 -- Add a "--read" subcommand to the midx builtin to report summary information on the head MIDX file or a MIDX file specified by the supplied "--midx-id" parameter. This subcommand is used by t5318-midx.sh to verify the indexed objects are as expected. Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx> --- Documentation/git-midx.txt | 23 +++++++- builtin/midx.c | 59 ++++++++++++++++++++ midx.c | 132 +++++++++++++++++++++++++++++++++++++++++++++ midx.h | 58 ++++++++++++++++++++ t/t5318-midx.sh | 79 +++++++++++++++++++-------- 5 files changed, 328 insertions(+), 23 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index 01f79cbba5..3eeed1d969 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -9,7 +9,7 @@ git-midx - Write and verify multi-pack-indexes (MIDX files). SYNOPSIS -------- [verse] -'git midx' --write <options> [--pack-dir <pack_dir>] +'git midx' [--write|--read] <options> [--pack-dir <pack_dir>] DESCRIPTION ----------- @@ -22,9 +22,18 @@ OPTIONS Use given directory for the location of packfiles, pack-indexes, and MIDX files. +--read:: + If specified, read a midx file specified by the midx-head file + and output basic details about the midx file. (Cannot be combined + with --write.) + +--midx-id <oid>:: + If specified with --read, use the given oid to read midx-[oid].midx + instead of using midx-head. --write:: If specified, write a new midx file to the pack directory using the packfiles present. Outputs the hash of the result midx file. + (Cannot be combined with --read.) --update-head:: If specified with --write, update the midx-head file to point to @@ -58,6 +67,18 @@ $ git midx --write --update-head $ git midx --write --pack-dir ../../alt/pack/ --------------------------------------------------------- +* Read the current midx-head. ++ +----------------------------------------------- +$ git midx --read +----------------------------------------------- + +* Read a specific MIDX file in the local .git folder. ++ +-------------------------------------------------------------------- +$ git midx --read --midx-id 3e50d982a2257168c7fd0ff12ffe5cf6af38c74e +-------------------------------------------------------------------- + CONFIGURATION ------------- diff --git a/builtin/midx.c b/builtin/midx.c index 84ce6588a2..ee9234583d 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -16,12 +16,60 @@ static char const * const builtin_midx_usage[] = { static struct opts_midx { const char *pack_dir; + int read; + const char *midx_id; int write; int update_head; int has_existing; struct object_id old_midx_oid; } opts; +static int midx_read(void) +{ + struct object_id midx_oid; + struct midxed_git *midx; + uint32_t i; + + if (opts.midx_id && strlen(opts.midx_id) == GIT_MAX_HEXSZ) + get_oid_hex(opts.midx_id, &midx_oid); + else if (!get_midx_head_oid(opts.pack_dir, &midx_oid)) + die("No midx-head exists."); + + midx = get_midxed_git(opts.pack_dir, &midx_oid); + + printf("header: %08x %x %d %d %d %d %d\n", + ntohl(midx->hdr->midx_signature), + ntohl(midx->hdr->midx_version), + midx->hdr->hash_version, + midx->hdr->hash_len, + midx->hdr->num_base_midx, + midx->hdr->num_chunks, + ntohl(midx->hdr->num_packs)); + printf("num_objects: %d\n", midx->num_objects); + printf("chunks:"); + + if (midx->chunk_pack_lookup) + printf(" pack_lookup"); + if (midx->chunk_pack_names) + printf(" pack_names"); + if (midx->chunk_oid_fanout) + printf(" oid_fanout"); + if (midx->chunk_oid_lookup) + printf(" oid_lookup"); + if (midx->chunk_object_offsets) + printf(" object_offsets"); + if (midx->chunk_large_offsets) + printf(" large_offsets"); + printf("\n"); + + printf("pack_names:\n"); + for (i = 0; i < midx->num_packs; i++) + printf("%s\n", midx->pack_names[i]); + + printf("pack_dir: %s\n", midx->pack_dir); + return 0; +} + static int build_midx_from_packs( const char *pack_dir, const char **pack_names, uint32_t nr_packs, @@ -187,6 +235,12 @@ int cmd_midx(int argc, const char **argv, const char *prefix) { OPTION_STRING, 'p', "pack-dir", &opts.pack_dir, N_("dir"), N_("The pack directory containing set of packfile and pack-index pairs.") }, + OPT_BOOL('r', "read", &opts.read, + N_("read midx file")), + { OPTION_STRING, 'M', "midx-id", &opts.midx_id, + N_("oid"), + N_("An OID for a specific midx file in the pack-dir."), + PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, OPT_BOOL('w', "write", &opts.write, N_("write midx file")), OPT_BOOL('u', "update-head", &opts.update_head, @@ -205,6 +259,9 @@ int cmd_midx(int argc, const char **argv, const char *prefix) builtin_midx_options, builtin_midx_usage, 0); + if (opts.write + opts.read > 1) + usage_with_options(builtin_midx_usage, builtin_midx_options); + if (!opts.pack_dir) { struct strbuf path = STRBUF_INIT; strbuf_addstr(&path, get_object_directory()); @@ -214,6 +271,8 @@ int cmd_midx(int argc, const char **argv, const char *prefix) opts.has_existing = !!get_midx_head_oid(opts.pack_dir, &opts.old_midx_oid); + if (opts.read) + return midx_read(); if (opts.write) return midx_write(); diff --git a/midx.c b/midx.c index f4178c1b81..c631be451f 100644 --- a/midx.c +++ b/midx.c @@ -65,6 +65,138 @@ struct object_id *get_midx_head_oid(const char *pack_dir, return oid; } +static struct midxed_git *alloc_midxed_git(int extra) +{ + struct midxed_git *m = xmalloc(st_add(sizeof(*m), extra)); + memset(m, 0, sizeof(*m)); + m->midx_fd = -1; + + return m; +} + +static struct midxed_git *load_midxed_git_one(const char *midx_file, const char *pack_dir) +{ + void *midx_map; + const unsigned char *data; + struct pack_midx_header *hdr; + size_t midx_size, packs_len; + struct stat st; + uint32_t i; + struct midxed_git *midx; + int fd = git_open(midx_file); + + if (fd < 0) + return 0; + if (fstat(fd, &st)) { + close(fd); + return 0; + } + midx_size = xsize_t(st.st_size); + + if (midx_size < 16 + 8 * 5 + 4 * 256 + GIT_MAX_RAWSZ) { + close(fd); + die("midx file %s is too small", midx_file); + } + midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); + data = (const unsigned char *)midx_map; + + hdr = midx_map; + if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { + munmap(midx_map, midx_size); + close(fd); + die("MIDX signature %X does not match signature %X", + ntohl(hdr->midx_signature), MIDX_SIGNATURE); + } + + if (ntohl(hdr->midx_version) != MIDX_VERSION) { + munmap(midx_map, midx_size); + die("MIDX version %X does not match version %X", + ntohl(hdr->midx_version), MIDX_VERSION); + } + + midx = alloc_midxed_git(strlen(pack_dir) + 1); + + midx->hdr = hdr; + midx->midx_fd = fd; + midx->data = midx_map; + midx->data_len = midx_size; + + for (i = 0; i <= hdr->num_chunks; i++) { + uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i)); + uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4)); + uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8)); + uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2; + + if (sizeof(data) == 4 && chunk_offset >> 32) { + munmap(midx_map, midx_size); + close(fd); + die(_("unable to memory-map in 32-bit address space")); + } + + switch (chunk_id) { + case MIDX_CHUNKID_PACKLOOKUP: + midx->chunk_pack_lookup = data + chunk_offset; + break; + + case MIDX_CHUNKID_PACKNAMES: + midx->chunk_pack_names = data + chunk_offset; + break; + + case MIDX_CHUNKID_OIDFANOUT: + midx->chunk_oid_fanout = data + chunk_offset; + break; + + case MIDX_CHUNKID_OIDLOOKUP: + midx->chunk_oid_lookup = data + chunk_offset; + break; + + case MIDX_CHUNKID_OBJECTOFFSETS: + midx->chunk_object_offsets = data + chunk_offset; + break; + + case MIDX_CHUNKID_LARGEOFFSETS: + midx->chunk_large_offsets = data + chunk_offset; + break; + + case 0: + break; + + default: + munmap(midx_map, midx_size); + close(fd); + die("unrecognized MIDX chunk id: %08x", chunk_id); + } + } + + midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4))); + midx->num_packs = ntohl(midx->hdr->num_packs); + + packs_len = st_mult(sizeof(struct packed_git*), midx->num_packs); + + if (packs_len) { + ALLOC_ARRAY(midx->packs, midx->num_packs); + ALLOC_ARRAY(midx->pack_names, midx->num_packs); + memset(midx->packs, 0, packs_len); + + for (i = 0; i < midx->num_packs; i++) { + uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i)); + midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset); + } + } + + strcpy(midx->pack_dir, pack_dir); + return midx; +} + +struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid) +{ + struct midxed_git *m; + char *fname = get_midx_filename_oid(pack_dir, oid); + m = load_midxed_git_one(fname, pack_dir); + free(fname); + return m; +} + struct pack_midx_details_internal { uint32_t pack_int_id; uint32_t internal_offset; diff --git a/midx.h b/midx.h index 9d9ab85261..92b74e49db 100644 --- a/midx.h +++ b/midx.h @@ -27,6 +27,64 @@ struct pack_midx_header { uint32_t num_packs; }; +struct midxed_git { + struct midxed_git *next; + + int midx_fd; + + /* the mmap'd data for the midx file */ + const unsigned char *data; + size_t data_len; + + /* points into the mmap'd data */ + struct pack_midx_header *hdr; + + /* can construct filename from obj_dir + "/packs/midx-" + oid + ".midx" */ + struct object_id oid; + + /* derived from the fanout chunk */ + uint32_t num_objects; + + /* converted number of packs */ + uint32_t num_packs; + + /* hdr->num_packs * 4 bytes */ + const unsigned char *chunk_pack_lookup; + const unsigned char *chunk_pack_names; + + /* 256 * 4 bytes */ + const unsigned char *chunk_oid_fanout; + + /* num_objects * hdr->hash_len bytes */ + const unsigned char *chunk_oid_lookup; + + /* num_objects * 8 bytes */ + const unsigned char *chunk_object_offsets; + + /* + * 8 bytes per large offset. + * (Optional: may be null.) + */ + const unsigned char *chunk_large_offsets; + + /* + * Points into mmap'd data storing the pack filenames. + */ + const char **pack_names; + + /* + * Store an array of pack-pointers. If NULL, then the + * pack has not been loaded yet. The array indices + * correspond to the pack_int_ids from the midx storage. + */ + struct packed_git **packs; + + /* something like ".git/objects/pack" */ + char pack_dir[FLEX_ARRAY]; /* more */ +}; + +extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid); + /* * Write a single MIDX file storing the given entries for the * given list of packfiles. If midx_name is null, then a temp diff --git a/t/t5318-midx.sh b/t/t5318-midx.sh index b66efcdce9..2e52389442 100755 --- a/t/t5318-midx.sh +++ b/t/t5318-midx.sh @@ -26,11 +26,27 @@ test_expect_success 'create objects' \ git commit -m "test data 1" && git branch commit1 HEAD' +_midx_read_expect() { + cat >expect <<- EOF + header: 4d494458 1 1 20 0 5 $1 + num_objects: $2 + chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets + pack_names: + $(ls $3 | grep pack | grep -v idx | sort) + pack_dir: $3 + EOF +} + test_expect_success 'write-midx from index version 1' \ 'pack1=$(git rev-list --all --objects | git pack-objects --index-version=1 ${packdir}/test-1) && midx1=$(git midx --write) && test_path_is_file ${packdir}/midx-${midx1}.midx && - test_path_is_missing ${packdir}/midx-head' + test_path_is_missing ${packdir}/midx-head && + _midx_read_expect \ + "1" "102" \ + "${packdir}" && + git midx --read --midx-id=${midx1} >output && + cmp output expect' test_expect_success 'write-midx from index version 2' \ 'rm "${packdir}/test-1-${pack1}.pack" && @@ -38,12 +54,17 @@ test_expect_success 'write-midx from index version 2' \ midx2=$(git midx --write --update-head) && test_path_is_file ${packdir}/midx-${midx2}.midx && test_path_is_file ${packdir}/midx-head && - test $(cat ${packdir}/midx-head) = "$midx2"' + test $(cat ${packdir}/midx-head) = "$midx2" && + _midx_read_expect \ + "1" "102" \ + "${packdir}" && + git midx --read> output && + cmp output expect' test_expect_success 'Create more objects' \ 'for i in $(test_seq 100) do - echo $i >file-2-$i + echo extra-$i >file-2-$i done && git add file-* && test_tick && @@ -55,28 +76,32 @@ test_expect_success 'write-midx with two packs' \ midx3=$(git midx --write --update-head) && test_path_is_file ${packdir}/midx-${midx3}.midx && test_path_is_file ${packdir}/midx-head && - test $(cat ${packdir}/midx-head) = "$midx3"' + test $(cat ${packdir}/midx-head) = "$midx3" && + _midx_read_expect \ + "2" "204" \ + "${packdir}" && + git midx --read >output && + cmp output expect' test_expect_success 'Add more packs' \ - 'for j in $(test_seq 10) + 'for i in $(test_seq 10) do - jjj=$(printf '%03i' $j) - test-genrandom "bar" 200 > wide_delta_$jjj && - test-genrandom "baz $jjj" 50 >> wide_delta_$jjj && - test-genrandom "foo"$j 100 > deep_delta_$jjj && - test-genrandom "foo"$(expr $j + 1) 100 >> deep_delta_$jjj && - test-genrandom "foo"$(expr $j + 2) 100 >> deep_delta_$jjj && - echo $jjj >file_$jjj && - test-genrandom "$jjj" 8192 >>file_$jjj && - git update-index --add file_$jjj deep_delta_$jjj wide_delta_$jjj && + iii=$(printf '%03i' $i) + test-genrandom "bar" 200 > wide_delta_$iii && + test-genrandom "baz $iii" 50 >> wide_delta_$iii && + test-genrandom "foo"$i 100 > deep_delta_$iii && + test-genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && { echo 101 && test-genrandom 100 8192; } >file_101 && git update-index --add file_101 && - commit=$(git commit-tree $EMPTY_TREE -p HEAD</dev/null) && { - echo $EMPTY_TREE && - git ls-tree $EMPTY_TREE | sed -e "s/.* \\([0-9a-f]*\\) .*/\\1/" + tree=$(git write-tree) && + commit=$(git commit-tree $tree -p HEAD</dev/null) && { + echo $tree && + git ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\) .*/\\1/" } >obj-list && - echo commit_packs_$j = $commit && - git branch commit_packs_$j $commit && git update-ref HEAD $commit && git pack-objects --index-version=2 ${packdir}/test-pack <obj-list done' @@ -85,7 +110,12 @@ test_expect_success 'write-midx with twelve packs' \ 'midx4=$(git midx --write --update-head) && test_path_is_file ${packdir}/midx-${midx4}.midx && test_path_is_file ${packdir}/midx-head && - test $(cat ${packdir}/midx-head) = "$midx4"' + test $(cat ${packdir}/midx-head) = "$midx4" && + _midx_read_expect \ + "12" "245" \ + "${packdir}" && + git midx --read >output && + cmp output expect' test_expect_success 'write-midx with no new packs' \ 'midx5=$(git midx --write --update-head) && @@ -100,12 +130,17 @@ test_expect_success 'create bare repo' \ cd bare && git config core.midx true && git config pack.threads 1 && - baredir=objects/pack' + baredir=./objects/pack' test_expect_success 'write-midx in bare repo' \ 'midxbare=$(git midx --write --update-head) && test_path_is_file ${baredir}/midx-${midxbare}.midx && test_path_is_file ${baredir}/midx-head && - test $(cat ${baredir}/midx-head) = "$midxbare"' + test $(cat ${baredir}/midx-head) = "$midxbare" && + _midx_read_expect \ + "12" "245" \ + "${baredir}" && + git midx --read >output && + cmp output expect' test_done -- 2.15.0