[PATCH 2/3] midx: verify: group objects by packfile to speed up object verification

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx>

Teach `multi-pack-index verify` to sort the set of objects by
packfile so that only one packfile needs to be open at a time.

This is a performance improvement.  Previously, objects were
verified in OID order.  This essentially requires all packfiles
to be open at the same time.  If the number of packfiles exceeds
the open file limit, packfiles would be LRU-closed and re-opened
many times.

Signed-off-by: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx>
---
 midx.c     | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 packfile.c |  2 +-
 packfile.h |  2 ++
 3 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/midx.c b/midx.c
index d2c39b6d37..0376757390 100644
--- a/midx.c
+++ b/midx.c
@@ -958,9 +958,29 @@ static void midx_report(const char *fmt, ...)
 	va_end(ap);
 }
 
+struct pair_pos_vs_id
+{
+	uint32_t pos;
+	uint32_t pack_int_id;
+};
+
+static int compare_pair_pos_vs_id(const void *_a, const void *_b)
+{
+	struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
+	struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
+
+	if (a->pack_int_id < b->pack_int_id)
+		return -1;
+	if (a->pack_int_id > b->pack_int_id)
+		return 1;
+
+	return 0;
+}
+
 int verify_midx_file(const char *object_dir)
 {
-	uint32_t i;
+	struct pair_pos_vs_id *pairs = NULL;
+	uint32_t i, k;
 	struct progress *progress;
 	struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
 	verify_midx_error = 0;
@@ -997,15 +1017,36 @@ int verify_midx_file(const char *object_dir)
 	}
 
 	progress = start_progress(_("Verifying object offsets"), m->num_objects);
+
+	/*
+	 * Create an array mapping each object to its packfile id.  Sort it
+	 * to group the objects by packfile.  Use this permutation to visit
+	 * each of the objects and only require 1 packfile to be open at a
+	 * time.
+	 */
+	ALLOC_ARRAY(pairs, m->num_objects);
 	for (i = 0; i < m->num_objects; i++) {
+		pairs[i].pos = i;
+		pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
+	}
+	QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
+
+	for (k = 0; k < m->num_objects; k++) {
 		struct object_id oid;
 		struct pack_entry e;
 		off_t m_offset, p_offset;
 
-		nth_midxed_object_oid(&oid, m, i);
+		if (k > 0 && pairs[k-1].pack_int_id != pairs[k].pack_int_id &&
+		    m->packs[pairs[k-1].pack_int_id])
+		{
+			close_pack_fd(m->packs[pairs[k-1].pack_int_id]);
+			close_pack_index(m->packs[pairs[k-1].pack_int_id]);
+		}
+
+		nth_midxed_object_oid(&oid, m, pairs[k].pos);
 		if (!fill_midx_entry(&oid, &e, m)) {
 			midx_report(_("failed to load pack entry for oid[%d] = %s"),
-				    i, oid_to_hex(&oid));
+				    pairs[k].pos, oid_to_hex(&oid));
 			continue;
 		}
 
@@ -1020,11 +1061,13 @@ int verify_midx_file(const char *object_dir)
 
 		if (m_offset != p_offset)
 			midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
-				    i, oid_to_hex(&oid), m_offset, p_offset);
+				    pairs[k].pos, oid_to_hex(&oid), m_offset, p_offset);
 
-		display_progress(progress, i + 1);
+		display_progress(progress, k + 1);
 	}
 	stop_progress(&progress);
 
+	free(pairs);
+
 	return verify_midx_error;
 }
diff --git a/packfile.c b/packfile.c
index 16bcb75262..d2bcb2f860 100644
--- a/packfile.c
+++ b/packfile.c
@@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p)
 	}
 }
 
-static int close_pack_fd(struct packed_git *p)
+int close_pack_fd(struct packed_git *p)
 {
 	if (p->pack_fd < 0)
 		return 0;
diff --git a/packfile.h b/packfile.h
index d70c6d9afb..b1c18504eb 100644
--- a/packfile.h
+++ b/packfile.h
@@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *);
  */
 extern void close_pack_index(struct packed_git *);
 
+int close_pack_fd(struct packed_git *p);
+
 extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
 
 extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
-- 
gitgitgadget




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux