[PATCH 08/15] cache: compare the entire buffer for struct object_id

"brian m. carlson" <sandals@xxxxxxxxxxxxxxxxxxxx> · Sat, 10 Apr 2021 15:21:33 +0000

Currently, when we compare two object IDs, we have to take a branch to
determine what the hash size is supposed to be.  The compiler can
optimize well for a single length, but has trouble when there are two
possible lengths.

There is, however, an alternative: we can ensure that we always compare
the full length of the hash buffer, but in turn we must zero the
remainder of the buffer when using SHA-1; otherwise, we'll end up with
incompatible junk at the end of otherwise equivalent object IDs that
will prevent them from matching.  This is an acceptable tradeoff,
because we generally read an object ID in once, but then compare it
against others multiple times.

This latter approach also has some benefits as well: since we will have
annotated every location in which we load an object ID into an instance
of struct object_id, if we want to set the hash algorithm for the object
ID, we can do so at the same time.

Adopt this latter approach, since it provides us greater flexibility and
lets us read and store object IDs for multiple algorithms at once.

Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx>
---
 hash.h        | 13 ++++++++++---
 hex.c         |  9 ++++++---
 notes.c       |  3 +++
 object-file.c |  1 +
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/hash.h b/hash.h
index c8f03d8aee..04eba5c56b 100644
--- a/hash.h
+++ b/hash.h
@@ -205,7 +205,7 @@ static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
 
 static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
 {
-	return hashcmp(oid1->hash, oid2->hash);
+	return memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
 }
 
 static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
@@ -221,7 +221,7 @@ static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
 
 static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
 {
-	return hasheq(oid1->hash, oid2->hash);
+	return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
 }
 
 static inline int is_null_oid(const struct object_id *oid)
@@ -258,7 +258,9 @@ static inline void oidclr(struct object_id *oid)
 
 static inline void oidread(struct object_id *oid, const unsigned char *hash)
 {
-	memcpy(oid->hash, hash, the_hash_algo->rawsz);
+	size_t rawsz = the_hash_algo->rawsz;
+	memcpy(oid->hash, hash, rawsz);
+	memset(oid->hash + rawsz, 0, GIT_MAX_RAWSZ - rawsz);
 }
 
 static inline int is_empty_blob_sha1(const unsigned char *sha1)
@@ -281,6 +283,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid)
 	return oideq(oid, the_hash_algo->empty_tree);
 }
 
+static inline void oid_pad_buffer(struct object_id *oid, const struct git_hash_algo *algop)
+{
+	memset(oid->hash + algop->rawsz, 0, GIT_MAX_RAWSZ - algop->rawsz);
+}
+
 const char *empty_tree_oid_hex(void);
 const char *empty_blob_oid_hex(void);
 
diff --git a/hex.c b/hex.c
index da51e64929..5fa3e71cb9 100644
--- a/hex.c
+++ b/hex.c
@@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
 int get_oid_hex_algop(const char *hex, struct object_id *oid,
 		      const struct git_hash_algo *algop)
 {
-	return get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	if (!ret)
+		oid_pad_buffer(oid, algop);
+	return ret;
 }
 
 /*
@@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid)
 {
 	int i;
 	for (i = GIT_HASH_NALGOS - 1; i > 0; i--) {
-		if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i]))
+		if (!get_oid_hex_algop(hex, oid, &hash_algos[i]))
 			return i;
 	}
 	return GIT_HASH_UNKNOWN;
@@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid,
 			const char **end,
 			const struct git_hash_algo *algop)
 {
-	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_oid_hex_algop(hex, oid, algop);
 	if (!ret)
 		*end = hex + algop->hexsz;
 	return ret;
diff --git a/notes.c b/notes.c
index a44b25858f..1dfe9e2b9f 100644
--- a/notes.c
+++ b/notes.c
@@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 		CALLOC_ARRAY(l, 1);
 		oidcpy(&l->key_oid, &object_oid);
 		oidcpy(&l->val_oid, &entry.oid);
+		oid_pad_buffer(&l->key_oid, the_hash_algo);
+		oid_pad_buffer(&l->val_oid, the_hash_algo);
 		if (note_tree_insert(t, node, n, l, type,
 				     combine_notes_concatenate))
 			die("Failed to load %s %s into notes tree "
@@ -484,6 +486,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 				strbuf_addch(&non_note_path, '/');
 			}
 			strbuf_addstr(&non_note_path, entry.path);
+			oid_pad_buffer(&entry.oid, the_hash_algo);
 			add_non_note(t, strbuf_detach(&non_note_path, NULL),
 				     entry.mode, entry.oid.hash);
 		}
diff --git a/object-file.c b/object-file.c
index 3f43c376e7..8e338247cc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2352,6 +2352,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 		if (namelen == the_hash_algo->hexsz - 2 &&
 		    !hex_to_bytes(oid.hash + 1, de->d_name,
 				  the_hash_algo->rawsz - 1)) {
+			oid_pad_buffer(&oid, the_hash_algo);
 			if (obj_cb) {
 				r = obj_cb(&oid, path->buf, data);
 				if (r)