Just an observation here: comparing 256 bytes every time would seem to have one nice bonus side effect and one potentially bad, but vanishingly unlikely, side effect: a 160 byte null hash will now compare equal to a 256 byte null hash (good), but a 160 byte hash extended to 256 bytes will compare equal to a 256 byte hash that just happens to end in 96 bytes of zero (bad, but I would guess, will never actually happen). Chris On Sat, Apr 10, 2021 at 8:23 AM brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> wrote: > > Currently, when we compare two object IDs, we have to take a branch to > determine what the hash size is supposed to be. The compiler can > optimize well for a single length, but has trouble when there are two > possible lengths. > > There is, however, an alternative: we can ensure that we always compare > the full length of the hash buffer, but in turn we must zero the > remainder of the buffer when using SHA-1; otherwise, we'll end up with > incompatible junk at the end of otherwise equivalent object IDs that > will prevent them from matching. This is an acceptable tradeoff, > because we generally read an object ID in once, but then compare it > against others multiple times. > > This latter approach also has some benefits as well: since we will have > annotated every location in which we load an object ID into an instance > of struct object_id, if we want to set the hash algorithm for the object > ID, we can do so at the same time. > > Adopt this latter approach, since it provides us greater flexibility and > lets us read and store object IDs for multiple algorithms at once. > > Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> > --- > hash.h | 13 ++++++++++--- > hex.c | 9 ++++++--- > notes.c | 3 +++ > object-file.c | 1 + > 4 files changed, 20 insertions(+), 6 deletions(-) > > diff --git a/hash.h b/hash.h > index c8f03d8aee..04eba5c56b 100644 > --- a/hash.h > +++ b/hash.h > @@ -205,7 +205,7 @@ static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) > > static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2) > { > - return hashcmp(oid1->hash, oid2->hash); > + return memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); > } > > static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2) > @@ -221,7 +221,7 @@ static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2) > > static inline int oideq(const struct object_id *oid1, const struct object_id *oid2) > { > - return hasheq(oid1->hash, oid2->hash); > + return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); > } > > static inline int is_null_oid(const struct object_id *oid) > @@ -258,7 +258,9 @@ static inline void oidclr(struct object_id *oid) > > static inline void oidread(struct object_id *oid, const unsigned char *hash) > { > - memcpy(oid->hash, hash, the_hash_algo->rawsz); > + size_t rawsz = the_hash_algo->rawsz; > + memcpy(oid->hash, hash, rawsz); > + memset(oid->hash + rawsz, 0, GIT_MAX_RAWSZ - rawsz); > } > > static inline int is_empty_blob_sha1(const unsigned char *sha1) > @@ -281,6 +283,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid) > return oideq(oid, the_hash_algo->empty_tree); > } > > +static inline void oid_pad_buffer(struct object_id *oid, const struct git_hash_algo *algop) > +{ > + memset(oid->hash + algop->rawsz, 0, GIT_MAX_RAWSZ - algop->rawsz); > +} > + > const char *empty_tree_oid_hex(void); > const char *empty_blob_oid_hex(void); > > diff --git a/hex.c b/hex.c > index da51e64929..5fa3e71cb9 100644 > --- a/hex.c > +++ b/hex.c > @@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1) > int get_oid_hex_algop(const char *hex, struct object_id *oid, > const struct git_hash_algo *algop) > { > - return get_hash_hex_algop(hex, oid->hash, algop); > + int ret = get_hash_hex_algop(hex, oid->hash, algop); > + if (!ret) > + oid_pad_buffer(oid, algop); > + return ret; > } > > /* > @@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid) > { > int i; > for (i = GIT_HASH_NALGOS - 1; i > 0; i--) { > - if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i])) > + if (!get_oid_hex_algop(hex, oid, &hash_algos[i])) > return i; > } > return GIT_HASH_UNKNOWN; > @@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid, > const char **end, > const struct git_hash_algo *algop) > { > - int ret = get_hash_hex_algop(hex, oid->hash, algop); > + int ret = get_oid_hex_algop(hex, oid, algop); > if (!ret) > *end = hex + algop->hexsz; > return ret; > diff --git a/notes.c b/notes.c > index a44b25858f..1dfe9e2b9f 100644 > --- a/notes.c > +++ b/notes.c > @@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, > CALLOC_ARRAY(l, 1); > oidcpy(&l->key_oid, &object_oid); > oidcpy(&l->val_oid, &entry.oid); > + oid_pad_buffer(&l->key_oid, the_hash_algo); > + oid_pad_buffer(&l->val_oid, the_hash_algo); > if (note_tree_insert(t, node, n, l, type, > combine_notes_concatenate)) > die("Failed to load %s %s into notes tree " > @@ -484,6 +486,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, > strbuf_addch(&non_note_path, '/'); > } > strbuf_addstr(&non_note_path, entry.path); > + oid_pad_buffer(&entry.oid, the_hash_algo); > add_non_note(t, strbuf_detach(&non_note_path, NULL), > entry.mode, entry.oid.hash); > } > diff --git a/object-file.c b/object-file.c > index 3f43c376e7..8e338247cc 100644 > --- a/object-file.c > +++ b/object-file.c > @@ -2352,6 +2352,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, > if (namelen == the_hash_algo->hexsz - 2 && > !hex_to_bytes(oid.hash + 1, de->d_name, > the_hash_algo->rawsz - 1)) { > + oid_pad_buffer(&oid, the_hash_algo); > if (obj_cb) { > r = obj_cb(&oid, path->buf, data); > if (r)