Re: [PATCH v6 05/13] merge-index: libify merge_one_path() and merge_all()

Derrick Stolee <stolee@xxxxxxxxx> · Tue, 5 Jan 2021 10:59:07 -0500

On 11/24/2020 6:53 AM, Alban Gruin wrote:
> The "resolve" and "octopus" merge strategies do not call directly `git
> merge-one-file', they delegate the work to another git command, `git
> merge-index', that will loop over files in the index and call the
> specified command.  Unfortunately, these functions are not part of
> libgit.a, which means that once rewritten, the strategies would still
> have to invoke `merge-one-file' by spawning a new process first.

This is a good thing to do.

> To avoid this, this moves and renames merge_one_path(), merge_all(), and
> their helpers to merge-strategies.c.  They also take a callback to
> dictate what they should do for each file.  For now, to preserve the
> behaviour of `merge-index', only one callback, launching a new process,
> is defined.

I don't think the callback should be in libgit.a, though. The callback
itself should be a static method inside builtin/merge-index.c.

> Signed-off-by: Alban Gruin <alban.gruin@xxxxxxxxx>
> ---
>  builtin/merge-index.c |  77 +++----------------------------
>  merge-strategies.c    | 104 ++++++++++++++++++++++++++++++++++++++++++
>  merge-strategies.h    |  19 ++++++++
>  3 files changed, 130 insertions(+), 70 deletions(-)
> 
> diff --git a/builtin/merge-index.c b/builtin/merge-index.c
> index 38ea6ad6ca..d5e5713b25 100644
> --- a/builtin/merge-index.c
> +++ b/builtin/merge-index.c
> @@ -1,74 +1,11 @@
>  #define USE_THE_INDEX_COMPATIBILITY_MACROS
>  #include "builtin.h"
> -#include "run-command.h"
> -
> -static const char *pgm;
> -static int one_shot, quiet;
> -static int err;
> -
> -static int merge_entry(int pos, const char *path)
> -{
> -	int found;
> -	const char *arguments[] = { pgm, "", "", "", path, "", "", "", NULL };
> -	char hexbuf[4][GIT_MAX_HEXSZ + 1];
> -	char ownbuf[4][60];
> -
> -	if (pos >= active_nr)
> -		die("git merge-index: %s not in the cache", path);
> -	found = 0;
> -	do {
> -		const struct cache_entry *ce = active_cache[pos];
> -		int stage = ce_stage(ce);
> -
> -		if (strcmp(ce->name, path))
> -			break;
> -		found++;
> -		oid_to_hex_r(hexbuf[stage], &ce->oid);
> -		xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", ce->ce_mode);
> -		arguments[stage] = hexbuf[stage];
> -		arguments[stage + 4] = ownbuf[stage];
> -	} while (++pos < active_nr);
> -	if (!found)
> -		die("git merge-index: %s not in the cache", path);
> -
> -	if (run_command_v_opt(arguments, 0)) {
> -		if (one_shot)
> -			err++;
> -		else {
> -			if (!quiet)
> -				die("merge program failed");
> -			exit(1);
> -		}
> -	}
> -	return found;
> -}
> -
> -static void merge_one_path(const char *path)
> -{
> -	int pos = cache_name_pos(path, strlen(path));
> -
> -	/*
> -	 * If it already exists in the cache as stage0, it's
> -	 * already merged and there is nothing to do.
> -	 */
> -	if (pos < 0)
> -		merge_entry(-pos-1, path);
> -}
> -
> -static void merge_all(void)
> -{
> -	int i;
> -	for (i = 0; i < active_nr; i++) {
> -		const struct cache_entry *ce = active_cache[i];
> -		if (!ce_stage(ce))
> -			continue;
> -		i += merge_entry(i, ce->name)-1;
> -	}
> -}
> +#include "merge-strategies.h"
>  
>  int cmd_merge_index(int argc, const char **argv, const char *prefix)
>  {
> -	int i, force_file = 0;
> +	int i, force_file = 0, err = 0, one_shot = 0, quiet = 0;
> +	const char *pgm;
>  
>  	/* Without this we cannot rely on waitpid() to tell
>  	 * what happened to our children.
> @@ -98,14 +35,14 @@ int cmd_merge_index(int argc, const char **argv, const char *prefix)
>  				continue;
>  			}
>  			if (!strcmp(arg, "-a")) {
> -				merge_all();
> +				err |= merge_all_index(the_repository, one_shot, quiet,
> +						       merge_one_file_spawn, (void *)pgm);

This hunk makes it look like pgm is uninitialized, but it is set earlier
in cmd_merge_index() (previously referring to the global instance). Good.

> +int merge_one_file_spawn(struct repository *r,
> +			 const struct object_id *orig_blob,
> +			 const struct object_id *our_blob,
> +			 const struct object_id *their_blob, const char *path,
> +			 unsigned int orig_mode, unsigned int our_mode, unsigned int their_mode,
> +			 void *data)
> +{
> +	char oids[3][GIT_MAX_HEXSZ + 1] = {{0}};
> +	char modes[3][10] = {{0}};
> +	const char *arguments[] = { (char *)data, oids[0], oids[1], oids[2],
> +				    path, modes[0], modes[1], modes[2], NULL };
> +
> +	if (orig_blob) {
> +		oid_to_hex_r(oids[0], orig_blob);
> +		xsnprintf(modes[0], sizeof(modes[0]), "%06o", orig_mode);
> +	}
> +
> +	if (our_blob) {
> +		oid_to_hex_r(oids[1], our_blob);
> +		xsnprintf(modes[1], sizeof(modes[1]), "%06o", our_mode);
> +	}
> +
> +	if (their_blob) {
> +		oid_to_hex_r(oids[2], their_blob);
> +		xsnprintf(modes[2], sizeof(modes[2]), "%06o", their_mode);
> +	}
> +
> +	return run_command_v_opt(arguments, 0);
> +}

Yes, this would be better in the builtin code. Better to keep the meaning
of 'data' clear in the context of that file.

> +static int merge_entry(struct repository *r, int quiet, unsigned int pos,
> +		       const char *path, int *err, merge_fn fn, void *data)
> +{
> +	int found = 0;
> +	const struct object_id *oids[3] = {NULL};
> +	unsigned int modes[3] = {0};
> +
> +	do {
> +		const struct cache_entry *ce = r->index->cache[pos];
> +		int stage = ce_stage(ce);
> +
> +		if (strcmp(ce->name, path))
> +			break;
> +		found++;
> +		oids[stage - 1] = &ce->oid;
> +		modes[stage - 1] = ce->ce_mode;
> +	} while (++pos < r->index->cache_nr);
> +	if (!found)
> +		return error(_("%s is not in the cache"), path);
> +
> +	if (fn(r, oids[0], oids[1], oids[2], path,
> +	       modes[0], modes[1], modes[2], data)) {
> +		if (!quiet)
> +			error(_("Merge program failed"));
> +		(*err)++;
> +	}
> +
> +	return found;
> +}
> +
> +int merge_index_path(struct repository *r, int oneshot, int quiet,
> +		     const char *path, merge_fn fn, void *data)
> +{
> +	int pos = index_name_pos(r->index, path, strlen(path)), ret, err = 0;
> +
> +	/*
> +	 * If it already exists in the cache as stage0, it's
> +	 * already merged and there is nothing to do.
> +	 */
> +	if (pos < 0) {
> +		ret = merge_entry(r, quiet || oneshot, -pos - 1, path, &err, fn, data);
> +		if (ret == -1)
> +			return -1;
> +		else if (err)
> +			return 1;
> +	}
> +	return 0;
> +}
> +
> +int merge_all_index(struct repository *r, int oneshot, int quiet,
> +		    merge_fn fn, void *data)
> +{
> +	int err = 0, ret;
> +	unsigned int i;
> +
> +	for (i = 0; i < r->index->cache_nr; i++) {
> +		const struct cache_entry *ce = r->index->cache[i];
> +		if (!ce_stage(ce))
> +			continue;
> +
> +		ret = merge_entry(r, quiet || oneshot, i, ce->name, &err, fn, data);
> +		if (ret > 0)
> +			i += ret - 1;
> +		else if (ret == -1)
> +			return -1;
> +
> +		if (err && !oneshot)
> +			return 1;
> +	}
> +
> +	return err;
> +}

I notice that these methods don't actually use the repository pointer
more than they just use 'r->index'. Should they instead take a
'struct index_state *istate' directly? (I see that the repository is
used later by merge_strategies_resolve(), but not in these.)

If you think it likely that we will need a repository for these methods,
then feel free to ignore me and keep your 'r' pointer.

Thanks,
-Stolee