Re: [PATCH v6 05/13] merge-index: libify merge_one_path() and merge_all()

Alban Gruin <alban.gruin@xxxxxxxxx> · Wed, 6 Jan 2021 00:20:06 +0100

Hi Derrick,

Le 05/01/2021 à 16:59, Derrick Stolee a écrit :
> On 11/24/2020 6:53 AM, Alban Gruin wrote:
>> The "resolve" and "octopus" merge strategies do not call directly `git
>> merge-one-file', they delegate the work to another git command, `git
>> merge-index', that will loop over files in the index and call the
>> specified command.  Unfortunately, these functions are not part of
>> libgit.a, which means that once rewritten, the strategies would still
>> have to invoke `merge-one-file' by spawning a new process first.
> 
> This is a good thing to do.
>  
>> To avoid this, this moves and renames merge_one_path(), merge_all(), and
>> their helpers to merge-strategies.c.  They also take a callback to
>> dictate what they should do for each file.  For now, to preserve the
>> behaviour of `merge-index', only one callback, launching a new process,
>> is defined.
> 
> I don't think the callback should be in libgit.a, though. The callback
> itself should be a static method inside builtin/merge-index.c.
> 

Right.  Modern code should not use this callback -- or the merge-index
builtin once this gets merged.

>> Signed-off-by: Alban Gruin <alban.gruin@xxxxxxxxx>
>> ---
>>  builtin/merge-index.c |  77 +++----------------------------
>>  merge-strategies.c    | 104 ++++++++++++++++++++++++++++++++++++++++++
>>  merge-strategies.h    |  19 ++++++++
>>  3 files changed, 130 insertions(+), 70 deletions(-)
>>
>> diff --git a/builtin/merge-index.c b/builtin/merge-index.c
>> index 38ea6ad6ca..d5e5713b25 100644
>> --- a/builtin/merge-index.c
>> +++ b/builtin/merge-index.c
>> @@ -1,74 +1,11 @@
>>  #define USE_THE_INDEX_COMPATIBILITY_MACROS
>>  #include "builtin.h"
>> -#include "run-command.h"
>> -
>> -static const char *pgm;
>> -static int one_shot, quiet;
>> -static int err;
>> -
>> -static int merge_entry(int pos, const char *path)
>> -{
>> -	int found;
>> -	const char *arguments[] = { pgm, "", "", "", path, "", "", "", NULL };
>> -	char hexbuf[4][GIT_MAX_HEXSZ + 1];
>> -	char ownbuf[4][60];
>> -
>> -	if (pos >= active_nr)
>> -		die("git merge-index: %s not in the cache", path);
>> -	found = 0;
>> -	do {
>> -		const struct cache_entry *ce = active_cache[pos];
>> -		int stage = ce_stage(ce);
>> -
>> -		if (strcmp(ce->name, path))
>> -			break;
>> -		found++;
>> -		oid_to_hex_r(hexbuf[stage], &ce->oid);
>> -		xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", ce->ce_mode);
>> -		arguments[stage] = hexbuf[stage];
>> -		arguments[stage + 4] = ownbuf[stage];
>> -	} while (++pos < active_nr);
>> -	if (!found)
>> -		die("git merge-index: %s not in the cache", path);
>> -
>> -	if (run_command_v_opt(arguments, 0)) {
>> -		if (one_shot)
>> -			err++;
>> -		else {
>> -			if (!quiet)
>> -				die("merge program failed");
>> -			exit(1);
>> -		}
>> -	}
>> -	return found;
>> -}
>> -
>> -static void merge_one_path(const char *path)
>> -{
>> -	int pos = cache_name_pos(path, strlen(path));
>> -
>> -	/*
>> -	 * If it already exists in the cache as stage0, it's
>> -	 * already merged and there is nothing to do.
>> -	 */
>> -	if (pos < 0)
>> -		merge_entry(-pos-1, path);
>> -}
>> -
>> -static void merge_all(void)
>> -{
>> -	int i;
>> -	for (i = 0; i < active_nr; i++) {
>> -		const struct cache_entry *ce = active_cache[i];
>> -		if (!ce_stage(ce))
>> -			continue;
>> -		i += merge_entry(i, ce->name)-1;
>> -	}
>> -}
>> +#include "merge-strategies.h"
>>  
>>  int cmd_merge_index(int argc, const char **argv, const char *prefix)
>>  {
>> -	int i, force_file = 0;
>> +	int i, force_file = 0, err = 0, one_shot = 0, quiet = 0;
>> +	const char *pgm;
>>  
>>  	/* Without this we cannot rely on waitpid() to tell
>>  	 * what happened to our children.
>> @@ -98,14 +35,14 @@ int cmd_merge_index(int argc, const char **argv, const char *prefix)
>>  				continue;
>>  			}
>>  			if (!strcmp(arg, "-a")) {
>> -				merge_all();
>> +				err |= merge_all_index(the_repository, one_shot, quiet,
>> +						       merge_one_file_spawn, (void *)pgm);
> 
> This hunk makes it look like pgm is uninitialized, but it is set earlier
> in cmd_merge_index() (previously referring to the global instance). Good.
> 
>> +int merge_one_file_spawn(struct repository *r,
>> +			 const struct object_id *orig_blob,
>> +			 const struct object_id *our_blob,
>> +			 const struct object_id *their_blob, const char *path,
>> +			 unsigned int orig_mode, unsigned int our_mode, unsigned int their_mode,
>> +			 void *data)
>> +{
>> +	char oids[3][GIT_MAX_HEXSZ + 1] = {{0}};
>> +	char modes[3][10] = {{0}};
>> +	const char *arguments[] = { (char *)data, oids[0], oids[1], oids[2],
>> +				    path, modes[0], modes[1], modes[2], NULL };
>> +
>> +	if (orig_blob) {
>> +		oid_to_hex_r(oids[0], orig_blob);
>> +		xsnprintf(modes[0], sizeof(modes[0]), "%06o", orig_mode);
>> +	}
>> +
>> +	if (our_blob) {
>> +		oid_to_hex_r(oids[1], our_blob);
>> +		xsnprintf(modes[1], sizeof(modes[1]), "%06o", our_mode);
>> +	}
>> +
>> +	if (their_blob) {
>> +		oid_to_hex_r(oids[2], their_blob);
>> +		xsnprintf(modes[2], sizeof(modes[2]), "%06o", their_mode);
>> +	}
>> +
>> +	return run_command_v_opt(arguments, 0);
>> +}
> 
> Yes, this would be better in the builtin code. Better to keep the meaning
> of 'data' clear in the context of that file.
> 
>> +static int merge_entry(struct repository *r, int quiet, unsigned int pos,
>> +		       const char *path, int *err, merge_fn fn, void *data)
>> +{
>> +	int found = 0;
>> +	const struct object_id *oids[3] = {NULL};
>> +	unsigned int modes[3] = {0};
>> +
>> +	do {
>> +		const struct cache_entry *ce = r->index->cache[pos];
>> +		int stage = ce_stage(ce);
>> +
>> +		if (strcmp(ce->name, path))
>> +			break;
>> +		found++;
>> +		oids[stage - 1] = &ce->oid;
>> +		modes[stage - 1] = ce->ce_mode;
>> +	} while (++pos < r->index->cache_nr);
>> +	if (!found)
>> +		return error(_("%s is not in the cache"), path);
>> +
>> +	if (fn(r, oids[0], oids[1], oids[2], path,
>> +	       modes[0], modes[1], modes[2], data)) {
>> +		if (!quiet)
>> +			error(_("Merge program failed"));
>> +		(*err)++;
>> +	}
>> +
>> +	return found;
>> +}
>> +
>> +int merge_index_path(struct repository *r, int oneshot, int quiet,
>> +		     const char *path, merge_fn fn, void *data)
>> +{
>> +	int pos = index_name_pos(r->index, path, strlen(path)), ret, err = 0;
>> +
>> +	/*
>> +	 * If it already exists in the cache as stage0, it's
>> +	 * already merged and there is nothing to do.
>> +	 */
>> +	if (pos < 0) {
>> +		ret = merge_entry(r, quiet || oneshot, -pos - 1, path, &err, fn, data);
>> +		if (ret == -1)
>> +			return -1;
>> +		else if (err)
>> +			return 1;
>> +	}
>> +	return 0;
>> +}
>> +
>> +int merge_all_index(struct repository *r, int oneshot, int quiet,
>> +		    merge_fn fn, void *data)
>> +{
>> +	int err = 0, ret;
>> +	unsigned int i;
>> +
>> +	for (i = 0; i < r->index->cache_nr; i++) {
>> +		const struct cache_entry *ce = r->index->cache[i];
>> +		if (!ce_stage(ce))
>> +			continue;
>> +
>> +		ret = merge_entry(r, quiet || oneshot, i, ce->name, &err, fn, data);
>> +		if (ret > 0)
>> +			i += ret - 1;
>> +		else if (ret == -1)
>> +			return -1;
>> +
>> +		if (err && !oneshot)
>> +			return 1;
>> +	}
>> +
>> +	return err;
>> +}
> 
> I notice that these methods don't actually use the repository pointer
> more than they just use 'r->index'. Should they instead take a
> 'struct index_state *istate' directly? (I see that the repository is
> used later by merge_strategies_resolve(), but not in these.)
> 
> If you think it likely that we will need a repository for these methods,
> then feel free to ignore me and keep your 'r' pointer.
> 

Ouch, you're right.  I thought this was necessary because
merge_three_way() wanted a `struct repository *', without noticing that
it was in fact unnecessary, even in my follow-up patch.  I change that.

> Thanks,
> -Stolee
> 

Cheers,
Alban