On Fri, Mar 7, 2025 at 6:20 AM Patrick Steinhardt <ps@xxxxxx> wrote: > > The `null_oid()` function returns the object ID that only consists of > zeroes. Naturally, this ID also depends on the hash algorithm used, as > the number of zeroes is different between SHA1 and SHA256. Consequently, > the function returns the hash-algorithm-specific null object ID. > > This is currently done by depending on `the_hash_algo`, which implicitly > makes us depend on `the_repository`. Refactor the function to instead > pass in the hash algorithm for which we want to retrieve the null object > ID. Adapt callsites accordingly by passing in `the_repository`, thus > bubbling up the dependency on that global variable by one layer. > > There are a couple of trivial exceptions for subsystems that already got > rid of `the_repository`. These subsystems instead use the repository > that is available via the calling context: > > - "builtin/grep.c" > - "grep.c" > - "refs/debug.c" > > There are also two non-trivial exceptions: > > - "diff-no-index.c": Here we know that we may not have a repository > initialized at all, so we cannot rely on `the_repository`. Instead, > we adapt `diff_no_index()` to get a `struct git_hash_algo` as > parameter. The only caller is located in "builtin/diff.c", where we > know to call `repo_set_hash_algo()` in case we're running outside of > a Git repository. Consequently, it is fine to continue passing > `the_repository->hash_algo` even in this case. > > - "builtin/ls-files.c": There is an in-flight patch series that drops > `USE_THE_REPOSITORY_VARIABLE` in this file, which causes a semantic > conflict because we use `null_oid()` in `show_submodule()`. The > value is passed to `repo_submodule_init()`, which may use the object > ID to resolve a tree-ish in the superproject from which we want to > read the submodule config. As such, the object ID should refer to an > object in the superproject, and consequently we need to use its hash > algorithm. > > This means that we could in theory just not bother about this edge case > at all and just use `the_repository` in "diff-no-index.c". But doing so > would feel misdesigned. Very minor, but this was a bit jarring to me -- shouldn't this paragraph be indented over since it is a continuation of the second bullet above? > Remove the `USE_THE_REPOSITORY_VARIABLE` preprocessor define in > "hash.c". > > Signed-off-by: Patrick Steinhardt <ps@xxxxxx> [...] > diff --git a/merge-ort.c b/merge-ort.c > index 46e78c3ffa6..eb3a834652a 100644 > --- a/merge-ort.c > +++ b/merge-ort.c > @@ -1817,7 +1817,7 @@ static int merge_submodule(struct merge_options *opt, > BUG("submodule deleted on one side; this should be handled outside of merge_submodule()"); > > if ((sub_not_initialized = repo_submodule_init(&subrepo, > - opt->repo, path, null_oid()))) { > + opt->repo, path, null_oid(the_hash_algo)))) { > path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0, > path, NULL, NULL, NULL, > _("Failed to merge submodule %s (not checked out)"), > @@ -2199,7 +2199,7 @@ static int handle_content_merge(struct merge_options *opt, > two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); > > merge_status = merge_3way(opt, path, > - two_way ? null_oid() : &o->oid, > + two_way ? null_oid(the_hash_algo) : &o->oid, > &a->oid, &b->oid, > pathnames, extra_marker_size, > &result_buf); > @@ -2231,7 +2231,7 @@ static int handle_content_merge(struct merge_options *opt, > } else if (S_ISGITLINK(a->mode)) { > int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); > clean = merge_submodule(opt, pathnames[0], > - two_way ? null_oid() : &o->oid, > + two_way ? null_oid(the_hash_algo) : &o->oid, > &a->oid, &b->oid, &result->oid); > if (clean < 0) > return -1; > @@ -2739,7 +2739,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt, > assert(!new_ci->match_mask); > new_ci->dirmask = 0; > new_ci->stages[1].mode = 0; > - oidcpy(&new_ci->stages[1].oid, null_oid()); > + oidcpy(&new_ci->stages[1].oid, null_oid(the_hash_algo)); > > /* > * Now that we have the file information in new_ci, make sure > @@ -2752,7 +2752,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt, > continue; > /* zero out any entries related to files */ > ci->stages[i].mode = 0; > - oidcpy(&ci->stages[i].oid, null_oid()); > + oidcpy(&ci->stages[i].oid, null_oid(the_hash_algo)); > } > > /* Now we want to focus on new_ci, so reassign ci to it. */ > @@ -3122,7 +3122,7 @@ static int process_renames(struct merge_options *opt, > if (type_changed) { > /* rename vs. typechange */ > /* Mark the original as resolved by removal */ > - memcpy(&oldinfo->stages[0].oid, null_oid(), > + memcpy(&oldinfo->stages[0].oid, null_oid(the_hash_algo), > sizeof(oldinfo->stages[0].oid)); > oldinfo->stages[0].mode = 0; > oldinfo->filemask &= 0x06; > @@ -3994,7 +3994,7 @@ static int process_entry(struct merge_options *opt, > if (ci->filemask & (1 << i)) > continue; > ci->stages[i].mode = 0; > - oidcpy(&ci->stages[i].oid, null_oid()); > + oidcpy(&ci->stages[i].oid, null_oid(the_hash_algo)); > } > } else if (ci->df_conflict && ci->merged.result.mode != 0) { > /* > @@ -4041,7 +4041,7 @@ static int process_entry(struct merge_options *opt, > continue; > /* zero out any entries related to directories */ > new_ci->stages[i].mode = 0; > - oidcpy(&new_ci->stages[i].oid, null_oid()); > + oidcpy(&new_ci->stages[i].oid, null_oid(the_hash_algo)); > } > > /* > @@ -4163,11 +4163,11 @@ static int process_entry(struct merge_options *opt, > new_ci->merged.result.mode = ci->stages[2].mode; > oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid); > new_ci->stages[1].mode = 0; > - oidcpy(&new_ci->stages[1].oid, null_oid()); > + oidcpy(&new_ci->stages[1].oid, null_oid(the_hash_algo)); > new_ci->filemask = 5; > if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) { > new_ci->stages[0].mode = 0; > - oidcpy(&new_ci->stages[0].oid, null_oid()); > + oidcpy(&new_ci->stages[0].oid, null_oid(the_hash_algo)); > new_ci->filemask = 4; > } > > @@ -4175,11 +4175,11 @@ static int process_entry(struct merge_options *opt, > ci->merged.result.mode = ci->stages[1].mode; > oidcpy(&ci->merged.result.oid, &ci->stages[1].oid); > ci->stages[2].mode = 0; > - oidcpy(&ci->stages[2].oid, null_oid()); > + oidcpy(&ci->stages[2].oid, null_oid(the_hash_algo)); > ci->filemask = 3; > if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) { > ci->stages[0].mode = 0; > - oidcpy(&ci->stages[0].oid, null_oid()); > + oidcpy(&ci->stages[0].oid, null_oid(the_hash_algo)); > ci->filemask = 2; > } > > @@ -4304,7 +4304,7 @@ static int process_entry(struct merge_options *opt, > /* Deleted on both sides */ > ci->merged.is_null = 1; > ci->merged.result.mode = 0; > - oidcpy(&ci->merged.result.oid, null_oid()); > + oidcpy(&ci->merged.result.oid, null_oid(the_hash_algo)); > assert(!ci->df_conflict); > ci->merged.clean = !ci->path_conflict; > } What you have is an improvement since it's at least making things explicit, but these should really be opt->repo->hash_algo. > diff --git a/merge-recursive.c b/merge-recursive.c > index 5dfaf32b2c1..d88e61b07a9 100644 > --- a/merge-recursive.c > +++ b/merge-recursive.c > @@ -502,7 +502,7 @@ static int get_tree_entry_if_blob(struct repository *r, > > ret = get_tree_entry(r, tree, path, &dfs->oid, &dfs->mode); > if (S_ISDIR(dfs->mode)) { > - oidcpy(&dfs->oid, null_oid()); > + oidcpy(&dfs->oid, null_oid(the_hash_algo)); > dfs->mode = 0; > } > return ret; > @@ -1238,7 +1238,7 @@ static int merge_submodule(struct merge_options *opt, > if (is_null_oid(b)) > return 0; > > - if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) { > + if (repo_submodule_init(&subrepo, opt->repo, path, null_oid(the_hash_algo))) { > output(opt, 1, _("Failed to merge submodule %s (not checked out)"), path); > return 0; > } > @@ -1698,7 +1698,7 @@ static int handle_file_collision(struct merge_options *opt, > > /* Store things in diff_filespecs for functions that need it */ > null.path = (char *)collide_path; > - oidcpy(&null.oid, null_oid()); > + oidcpy(&null.oid, null_oid(the_hash_algo)); > null.mode = 0; > > if (merge_mode_and_contents(opt, &null, a, b, collide_path, > @@ -2898,14 +2898,14 @@ static int process_renames(struct merge_options *opt, > dst_other.mode = ren1->dst_entry->stages[other_stage].mode; > try_merge = 0; > > - if (oideq(&src_other.oid, null_oid()) && > + if (oideq(&src_other.oid, null_oid(the_hash_algo)) && > ren1->dir_rename_original_type == 'A') { > setup_rename_conflict_info(RENAME_VIA_DIR, > opt, ren1, NULL); > } else if (renamed_to_self) { > setup_rename_conflict_info(RENAME_NORMAL, > opt, ren1, NULL); > - } else if (oideq(&src_other.oid, null_oid())) { > + } else if (oideq(&src_other.oid, null_oid(the_hash_algo))) { > setup_rename_conflict_info(RENAME_DELETE, > opt, ren1, NULL); > } else if ((dst_other.mode == ren1->pair->two->mode) && > @@ -2924,7 +2924,7 @@ static int process_renames(struct merge_options *opt, > 1, /* update_cache */ > 0 /* update_wd */)) > clean_merge = -1; > - } else if (!oideq(&dst_other.oid, null_oid())) { > + } else if (!oideq(&dst_other.oid, null_oid(the_hash_algo))) { > /* > * Probably not a clean merge, but it's > * premature to set clean_merge to 0 here, This will textually conflict with my upcoming series to delete merge-recursive.[ch]. Luckily, it'll be easy to resolve, since the changes will just be irrelevant... :-) [...] The rest of the changes looked good to me.