[PATCH v2 2/2] packfile: refactor hash search with fanout table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Subsequent patches will introduce file formats that make use of a fanout
array and a sorted table containing hashes, just like packfiles.
Refactor the hash search in packfile.c into its own function, so that
those patches can make use of it as well.

Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
---
 packfile.c    | 18 ++++--------------
 sha1-lookup.c | 28 ++++++++++++++++++++++++++++
 sha1-lookup.h | 22 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/packfile.c b/packfile.c
index 58bdced3b..59648a182 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1712,7 +1712,8 @@ off_t find_pack_entry_one(const unsigned char *sha1,
 {
 	const uint32_t *level1_ofs = p->index_data;
 	const unsigned char *index = p->index_data;
-	unsigned hi, lo, stride;
+	unsigned stride;
+	uint32_t result;
 
 	if (!index) {
 		if (open_pack_index(p))
@@ -1725,8 +1726,6 @@ off_t find_pack_entry_one(const unsigned char *sha1,
 		index += 8;
 	}
 	index += 4 * 256;
-	hi = ntohl(level1_ofs[*sha1]);
-	lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
 	if (p->index_version > 1) {
 		stride = 20;
 	} else {
@@ -1734,17 +1733,8 @@ off_t find_pack_entry_one(const unsigned char *sha1,
 		index += 4;
 	}
 
-	while (lo < hi) {
-		unsigned mi = lo + (hi - lo) / 2;
-		int cmp = hashcmp(index + mi * stride, sha1);
-
-		if (!cmp)
-			return nth_packed_object_offset(p, mi);
-		if (cmp > 0)
-			hi = mi;
-		else
-			lo = mi+1;
-	}
+	if (bsearch_hash(sha1, level1_ofs, index, stride, &result))
+		return nth_packed_object_offset(p, result);
 	return 0;
 }
 
diff --git a/sha1-lookup.c b/sha1-lookup.c
index 4cf3ebd92..8d0b1db3e 100644
--- a/sha1-lookup.c
+++ b/sha1-lookup.c
@@ -99,3 +99,31 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr,
 	} while (lo < hi);
 	return -lo-1;
 }
+
+int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo,
+		 const unsigned char *table, size_t stride, uint32_t *result)
+{
+	uint32_t hi, lo;
+
+	hi = ntohl(fanout_nbo[*sha1]);
+	lo = ((*sha1 == 0x0) ? 0 : ntohl(fanout_nbo[*sha1 - 1]));
+
+	while (lo < hi) {
+		unsigned mi = lo + (hi - lo) / 2;
+		int cmp = hashcmp(table + mi * stride, sha1);
+
+		if (!cmp) {
+			if (result)
+				*result = mi;
+			return 1;
+		}
+		if (cmp > 0)
+			hi = mi;
+		else
+			lo = mi + 1;
+	}
+
+	if (result)
+		*result = lo;
+	return 0;
+}
diff --git a/sha1-lookup.h b/sha1-lookup.h
index cf5314f40..7678b23b3 100644
--- a/sha1-lookup.h
+++ b/sha1-lookup.h
@@ -7,4 +7,26 @@ extern int sha1_pos(const unsigned char *sha1,
 		    void *table,
 		    size_t nr,
 		    sha1_access_fn fn);
+
+/*
+ * Searches for sha1 in table, using the given fanout table to determine the
+ * interval to search, then using binary search. Returns 1 if found, 0 if not.
+ *
+ * Takes the following parameters:
+ *
+ *  - sha1: the hash to search for
+ *  - fanout_nbo: a 256-element array of NETWORK-order 32-bit integers; the
+ *    integer at position i represents the number of elements in table whose
+ *    first byte is less than or equal to i
+ *  - table: a sorted list of hashes with optional extra information in between
+ *  - stride: distance between two consecutive elements in table (should be
+ *    GIT_MAX_RAWSZ or greater)
+ *  - result: if not NULL, this function stores the element index of the
+ *    position found (if the search is successful) or the index of the least
+ *    element that is greater than sha1 (if the search is not successful)
+ *
+ * This function does not verify the validity of the fanout table.
+ */
+int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo,
+		 const unsigned char *table, size_t stride, uint32_t *result);
 #endif
-- 
2.16.0.rc1.238.g530d649a79-goog




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux