[PATCH v2 0/4] multi-pack-index: fix verify on large repos

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Version 2 addresses progress-related concerns raised in the previous version
of the midx verify code.

This version also extends the existing progress.[ch] code and adds a
"sparse" mode that automatically ensures the 100% message is issued.


----------------------------------------------------------------------------

Teach "multi-pack-index verify" to handle cases where the number of
packfiles exceeds the open file handle limit.

The first commit fixes a problem that prevented the LRU-style
close_one_pack() mechanism from working which caused midx verify to run out
of file descriptors.

The second commit teaches midx verify to sort the set of objects to verify
by packfile rather than verifying them in OID order. This eliminates the
need to have more than one packfile/idx open at the same time.

With the second commit, runtime on 3600 packfiles went from 12 minutes to 25
seconds.

Thanks, Jeff

Cc: dstolee@xxxxxxxxxxxxx

Jeff Hostetler (4):
  progress: add sparse mode to force 100% complete message
  trace2:data: add trace2 data to midx
  midx: verify: add midx packfiles to the packed_git list
  midx: verify: group objects by packfile to speed up object
    verification

 builtin/multi-pack-index.c |  3 ++
 midx.c                     | 84 +++++++++++++++++++++++++++++++++++---
 packfile.c                 |  2 +-
 packfile.h                 |  2 +
 progress.c                 | 40 ++++++++++++++++--
 progress.h                 |  3 ++
 6 files changed, 124 insertions(+), 10 deletions(-)


base-commit: e902e9bcae2010bc42648c80ab6adc6c5a16a4a5
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-166%2Fjeffhostetler%2Fupstream-midx-verify-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-166/jeffhostetler/upstream-midx-verify-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/166

Range-diff vs v1:

 -:  ---------- > 1:  e1da1f84a8 progress: add sparse mode to force 100% complete message
 3:  2d23bc24b7 = 2:  11c88845e7 trace2:data: add trace2 data to midx
 1:  d1a730df94 = 3:  ced7f1cb34 midx: verify: add midx packfiles to the packed_git list
 2:  86f6b03258 ! 4:  e2dd99911f midx: verify: group objects by packfile to speed up object verification
     @@ -31,13 +31,20 @@
      +	struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
      +	struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
      +
     -+	if (a->pack_int_id < b->pack_int_id)
     -+		return -1;
     -+	if (a->pack_int_id > b->pack_int_id)
     -+		return 1;
     -+
     -+	return 0;
     ++	return b->pack_int_id - a->pack_int_id;
      +}
     ++
     ++/*
     ++ * Limit calls to display_progress() for performance reasons.
     ++ * The interval here was arbitrarily chosen.
     ++ */
     ++#define SPARSE_PROGRESS_INTERVAL (1 << 12)
     ++#define midx_display_sparse_progress(progress, n) \
     ++	do { \
     ++		uint64_t _n = (n); \
     ++		if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0)	\
     ++			display_progress(progress, _n); \
     ++	} while (0)
      +
       int verify_midx_file(const char *object_dir)
       {
     @@ -48,10 +55,43 @@
       	struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
       	verify_midx_error = 0;
      @@
     + 	if (!m)
     + 		return 0;
     + 
     ++	progress = start_progress(_("Looking for referenced packfiles"),
     ++				  m->num_packs);
     + 	for (i = 0; i < m->num_packs; i++) {
     + 		if (prepare_midx_pack(m, i))
     + 			midx_report("failed to load pack in position %d", i);
     + 
     + 		if (m->packs[i])
     + 			install_packed_git(the_repository, m->packs[i]);
     ++
     ++		display_progress(progress, i + 1);
     + 	}
     ++	stop_progress(&progress);
     + 
     + 	for (i = 0; i < 255; i++) {
     + 		uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
     +@@
     + 				    i, oid_fanout1, oid_fanout2, i + 1);
       	}
       
     - 	progress = start_progress(_("Verifying object offsets"), m->num_objects);
     ++	progress = start_sparse_progress(_("Verifying OID order in MIDX"),
     ++					 m->num_objects - 1);
     + 	for (i = 0; i < m->num_objects - 1; i++) {
     + 		struct object_id oid1, oid2;
     + 
     +@@
     + 		if (oidcmp(&oid1, &oid2) >= 0)
     + 			midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
     + 				    i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
      +
     ++		midx_display_sparse_progress(progress, i + 1);
     + 	}
     ++	stop_progress(&progress);
     + 
     +-	progress = start_progress(_("Verifying object offsets"), m->num_objects);
      +	/*
      +	 * Create an array mapping each object to its packfile id.  Sort it
      +	 * to group the objects by packfile.  Use this permutation to visit
     @@ -63,8 +103,15 @@
      +		pairs[i].pos = i;
      +		pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
      +	}
     ++
     ++	progress = start_sparse_progress(
     ++		_("Sorting objects by packfile"), m->num_objects);
     ++	display_progress(progress, 0); /* TODO: Measure QSORT() progress */
      +	QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
     ++	stop_progress(&progress);
      +
     ++	progress = start_sparse_progress(_("Verifying object offsets"),
     ++					 m->num_objects);
      +	for (k = 0; k < m->num_objects; k++) {
       		struct object_id oid;
       		struct pack_entry e;
     @@ -94,7 +141,7 @@
      +				    pairs[k].pos, oid_to_hex(&oid), m_offset, p_offset);
       
      -		display_progress(progress, i + 1);
     -+		display_progress(progress, k + 1);
     ++		midx_display_sparse_progress(progress, k + 1);
       	}
       	stop_progress(&progress);
       

-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux