Subsequent patches will introduce file formats that make use of a fanout array and a sorted table containing hashes, just like packfiles. Refactor the hash search in packfile.c into its own function, so that those patches can make use of it as well. Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> --- packfile.c | 19 +++++-------------- sha1-lookup.c | 24 ++++++++++++++++++++++++ sha1-lookup.h | 21 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/packfile.c b/packfile.c index 58bdced3b..29f5dc239 100644 --- a/packfile.c +++ b/packfile.c @@ -1712,7 +1712,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, { const uint32_t *level1_ofs = p->index_data; const unsigned char *index = p->index_data; - unsigned hi, lo, stride; + unsigned stride; + int ret; if (!index) { if (open_pack_index(p)) @@ -1725,8 +1726,6 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 8; } index += 4 * 256; - hi = ntohl(level1_ofs[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); if (p->index_version > 1) { stride = 20; } else { @@ -1734,17 +1733,9 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 4; } - while (lo < hi) { - unsigned mi = lo + (hi - lo) / 2; - int cmp = hashcmp(index + mi * stride, sha1); - - if (!cmp) - return nth_packed_object_offset(p, mi); - if (cmp > 0) - hi = mi; - else - lo = mi+1; - } + ret = bsearch_hash(sha1, level1_ofs, index, stride); + if (ret >= 0) + return nth_packed_object_offset(p, ret); return 0; } diff --git a/sha1-lookup.c b/sha1-lookup.c index 4cf3ebd92..d11c5e526 100644 --- a/sha1-lookup.c +++ b/sha1-lookup.c @@ -99,3 +99,27 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr, } while (lo < hi); return -lo-1; } + +int bsearch_hash(const unsigned char *sha1, const void *fanout_, + const void *table_, size_t stride) +{ + const uint32_t *fanout = fanout_; + const unsigned char *table = table_; + int hi, lo; + + hi = ntohl(fanout[*sha1]); + lo = ((*sha1 == 0x0) ? 0 : ntohl(fanout[*sha1 - 1])); + + while (lo < hi) { + unsigned mi = lo + (hi - lo) / 2; + int cmp = hashcmp(table + mi * stride, sha1); + + if (!cmp) + return mi; + if (cmp > 0) + hi = mi; + else + lo = mi + 1; + } + return -lo - 1; +} diff --git a/sha1-lookup.h b/sha1-lookup.h index cf5314f40..3c59e9cb1 100644 --- a/sha1-lookup.h +++ b/sha1-lookup.h @@ -7,4 +7,25 @@ extern int sha1_pos(const unsigned char *sha1, void *table, size_t nr, sha1_access_fn fn); + +/* + * Searches for sha1 in table, using the given fanout table to determine the + * interval to search, then using binary search. Returns the element index of + * the position found if successful, -i-1 if not (where i is the index of the + * least element that is greater than sha1). + * + * Takes the following parameters: + * + * - sha1: the hash to search for + * - fanout: a 256-element array of NETWORK-order 32-bit integers; the integer + * at position i represents the number of elements in table whose first byte + * is less than or equal to i + * - table: a sorted list of hashes with optional extra information in between + * - stride: distance between two consecutive elements in table (should be + * GIT_MAX_RAWSZ or greater) + * + * This function does not verify the validity of the fanout table. + */ +extern int bsearch_hash(const unsigned char *sha1, const void *fanout, + const void *table, size_t stride); #endif -- 2.16.0.rc1.238.g530d649a79-goog