Alban Gruin <alban.gruin@xxxxxxxxx> writes: > diff --git a/builtin/merge-one-file.c b/builtin/merge-one-file.c > new file mode 100644 > index 0000000000..306a86c2f0 > --- /dev/null > +++ b/builtin/merge-one-file.c > @@ -0,0 +1,85 @@ > +/* > + * Builtin "git merge-one-file" > + * > + * Copyright (c) 2020 Alban Gruin > + * > + * Based on git-merge-one-file.sh, written by Linus Torvalds. > + * > + * This is the git per-file merge utility, called with > + * > + * argv[1] - original file SHA1 (or empty) > + * argv[2] - file in branch1 SHA1 (or empty) > + * argv[3] - file in branch2 SHA1 (or empty) > + * argv[4] - pathname in repository > + * argv[5] - original file mode (or empty) > + * argv[6] - file in branch1 mode (or empty) > + * argv[7] - file in branch2 mode (or empty) > + * > + * Handle some trivial cases. The _really_ trivial cases have been > + * handled already by git read-tree, but that one doesn't do any merges > + * that might change the tree layout. > + */ > + > +#include "cache.h" > +#include "builtin.h" > +#include "lockfile.h" > +#include "merge-strategies.h" > + > +static const char builtin_merge_one_file_usage[] = > + "git merge-one-file <orig blob> <our blob> <their blob> <path> " > + "<orig mode> <our mode> <their mode>\n\n" > + "Blob ids and modes should be empty for missing files."; > + > +int cmd_merge_one_file(int argc, const char **argv, const char *prefix) > +{ > + struct object_id orig_blob, our_blob, their_blob, > + *p_orig_blob = NULL, *p_our_blob = NULL, *p_their_blob = NULL; > + unsigned int orig_mode = 0, our_mode = 0, their_mode = 0, ret = 0; > + struct lock_file lock = LOCK_INIT; > + > + if (argc != 8) > + usage(builtin_merge_one_file_usage); > + > + if (repo_read_index(the_repository) < 0) > + die("invalid index"); > + > + repo_hold_locked_index(the_repository, &lock, LOCK_DIE_ON_ERROR); I do understand why we would want merge_strategies_one_file() helper introduced by this step so that the helper can work in an arbitrary repository (hence taking a pointer to repository structure as one of its parameters). But the "merge-one-file" command will always work in the_repository. I do not see a point in using helpers that can work in an arbitrary repository, like repo_read_index() or repo_hold_locked_index(), in the above. I only see downsides --- it is longer to read, makes readers wonder if there is something tricky involving another repository going on, etc. > + if (!get_oid(argv[1], &orig_blob)) { > + p_orig_blob = &orig_blob; > + orig_mode = strtol(argv[5], NULL, 8); Write a wrapper around strtol(...,...,8) to reduce repetition, and make sure you do not pass NULL as the second parameter to strtol() to always check you parsed the string to the end. > + ret = merge_strategies_one_file(the_repository, > + p_orig_blob, p_our_blob, p_their_blob, argv[4], > + orig_mode, our_mode, their_mode); Here, as I said above, it is perfectly fine to pass the_repository(). > + if (ret) { > + rollback_lock_file(&lock); > + return ret; > + } > + > + return write_locked_index(the_repository->index, &lock, COMMIT_LOCK); Likewise, I do not see much point in saying the_repository->index; the_index is a perfectly fine short-hand. > diff --git a/merge-strategies.c b/merge-strategies.c > new file mode 100644 > index 0000000000..f2af4a894d > --- /dev/null > +++ b/merge-strategies.c > @@ -0,0 +1,199 @@ > +#include "cache.h" > +#include "dir.h" > +#include "ll-merge.h" > +#include "merge-strategies.h" > +#include "xdiff-interface.h" > + > +static int add_to_index_cacheinfo(struct index_state *istate, > + unsigned int mode, > + const struct object_id *oid, const char *path) > +{ > + struct cache_entry *ce; > + int len, option; > + > + if (!verify_path(path, mode)) > + return error(_("Invalid path '%s'"), path); > + > + len = strlen(path); > + ce = make_empty_cache_entry(istate, len); > + > + oidcpy(&ce->oid, oid); > + memcpy(ce->name, path, len); > + ce->ce_flags = create_ce_flags(0); > + ce->ce_namelen = len; > + ce->ce_mode = create_ce_mode(mode); > + if (assume_unchanged) > + ce->ce_flags |= CE_VALID; > + option = ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE; > + if (add_index_entry(istate, ce, option)) > + return error(_("%s: cannot add to the index"), path); > + > + return 0; > +} > + > +static int checkout_from_index(struct index_state *istate, const char *path) > +{ > + struct checkout state = CHECKOUT_INIT; > + struct cache_entry *ce; > + > + state.istate = istate; > + state.force = 1; > + state.base_dir = ""; > + state.base_dir_len = 0; > + > + ce = index_file_exists(istate, path, strlen(path), 0); > + if (checkout_entry(ce, &state, NULL, NULL) < 0) > + return error(_("%s: cannot checkout file"), path); > + return 0; > +} > + > +static int merge_one_file_deleted(struct index_state *istate, > + const struct object_id *orig_blob, > + const struct object_id *our_blob, > + const struct object_id *their_blob, const char *path, > + unsigned int orig_mode, unsigned int our_mode, unsigned int their_mode) > +{ > + if ((our_blob && orig_mode != our_mode) || > + (their_blob && orig_mode != their_mode)) > + return error(_("File %s deleted on one branch but had its " > + "permissions changed on the other."), path); > + > + if (our_blob) { > + printf(_("Removing %s\n"), path); > + > + if (file_exists(path)) > + remove_path(path); > + } > + > + if (remove_file_from_index(istate, path)) > + return error("%s: cannot remove from the index", path); > + return 0; > +} These functions we see above all are now easy to write these days, thanks to the previous work that built many helpers to perform ommon operations (e.g. remove_path()). Reusing them is very good. > +static int do_merge_one_file(struct index_state *istate, > + const struct object_id *orig_blob, > + const struct object_id *our_blob, > + const struct object_id *their_blob, const char *path, > + unsigned int orig_mode, unsigned int our_mode, unsigned int their_mode) > +{ > + int ret, i, dest; > + mmbuffer_t result = {NULL, 0}; > + mmfile_t mmfs[3]; > + struct ll_merge_options merge_opts = {0}; > + struct cache_entry *ce; > + > + if (our_mode == S_IFLNK || their_mode == S_IFLNK) > + return error(_("%s: Not merging symbolic link changes."), path); > + else if (our_mode == S_IFGITLINK || their_mode == S_IFGITLINK) > + return error(_("%s: Not merging conflicting submodule changes."), path); > + > + read_mmblob(mmfs + 1, our_blob); > + read_mmblob(mmfs + 2, their_blob); > + > + if (orig_blob) { > + printf(_("Auto-merging %s\n"), path); > + read_mmblob(mmfs + 0, orig_blob); > + } else { > + printf(_("Added %s in both, but differently.\n"), path); > + read_mmblob(mmfs + 0, &null_oid); > + } > + > + merge_opts.xdl_opts = XDL_MERGE_ZEALOUS_ALNUM; > + ret = ll_merge(&result, path, > + mmfs + 0, "orig", > + mmfs + 1, "our", > + mmfs + 2, "their", > + istate, &merge_opts); > + > + for (i = 0; i < 3; i++) > + free(mmfs[i].ptr); > + > + if (ret > 127 || !orig_blob) > + ret = error(_("content conflict in %s"), path); The original only checked if ret is zero or non-zero; here we require ret to be large. Intended? ll_merge() that called ll_xdl_merge() (i.e. the most common case) would return the value returned from xdl_merge(), which can be -1 when we got an error before calling xdl_do_merge(). xdl_do_merge() in turn can return -1. The most common case returns the value returned from xdl_cleanup_merge(), which is 0 for clean merge, and any positive integer (not clipped to 127 or 128) for conflicted one. > + /* Create the working tree file, using "our tree" version from > + the index, and then store the result of the merge. */ Style. (cf. Documentation/CodingGuidelines). > + ce = index_file_exists(istate, path, strlen(path), 0); > + if (!ce) > + BUG("file is not present in the cache?"); > + > + unlink(path); > + dest = open(path, O_WRONLY | O_CREAT, ce->ce_mode); > + write_in_full(dest, result.ptr, result.size); If open() fails, we write to a bogus file descriptor here. > + close(dest); > + > + free(result.ptr); > + > + if (ret && our_mode != their_mode) > + return error(_("permission conflict: %o->%o,%o in %s"), > + orig_mode, our_mode, their_mode, path); > + if (ret) > + return 1; What is the error returning convention around here? Our usual convention is that 0 signals a success, and negative reports an error. Returning the value returned from add_file_to_index() below, and error() above, are consistent with the convention, but this one returns 1 that is not. When deviating from convention, it needs to be documented for the callers in a comment before the function definition. > + > + return add_file_to_index(istate, path, 0); > +} > +int merge_strategies_one_file(struct repository *r, > + const struct object_id *orig_blob, > + const struct object_id *our_blob, > + const struct object_id *their_blob, const char *path, > + unsigned int orig_mode, unsigned int our_mode, > + unsigned int their_mode) > +{ > + if (orig_blob && > + ((!their_blob && our_blob && oideq(orig_blob, our_blob)) || > + (!our_blob && their_blob && oideq(orig_blob, their_blob)))) > + /* Deleted in both or deleted in one and unchanged in > + the other */ > + return merge_one_file_deleted(r->index, > + orig_blob, our_blob, their_blob, path, > + orig_mode, our_mode, their_mode); > + else if (!orig_blob && our_blob && !their_blob) { > + /* Added in one. The other side did not add and we > + added so there is nothing to be done, except making > + the path merged. */ > + return add_to_index_cacheinfo(r->index, our_mode, our_blob, path); > + } else if (!orig_blob && !our_blob && their_blob) { > + printf(_("Adding %s\n"), path); > + > + if (file_exists(path)) > + return error(_("untracked %s is overwritten by the merge."), path); > + > + if (add_to_index_cacheinfo(r->index, their_mode, their_blob, path)) > + return 1; > + return checkout_from_index(r->index, path); > + } else if (!orig_blob && our_blob && their_blob && > + oideq(our_blob, their_blob)) { > + /* Added in both, identically (check for same > + permissions). */ > + if (our_mode != their_mode) > + return error(_("File %s added identically in both branches, " > + "but permissions conflict %o->%o."), > + path, our_mode, their_mode); > + > + printf(_("Adding %s\n"), path); > + > + if (add_to_index_cacheinfo(r->index, our_mode, our_blob, path)) > + return 1; > + return checkout_from_index(r->index, path); > + } else if (our_blob && their_blob) > + /* Modified in both, but differently. */ > + return do_merge_one_file(r->index, > + orig_blob, our_blob, their_blob, path, > + orig_mode, our_mode, their_mode); > + else { > + char *orig_hex = "", *our_hex = "", *their_hex = ""; > + > + if (orig_blob) > + orig_hex = oid_to_hex(orig_blob); > + if (our_blob) > + our_hex = oid_to_hex(our_blob); > + if (their_blob) > + their_hex = oid_to_hex(their_blob); Prepare three char [] buffers and use oid_to_hex_r() instead, instead of relying that we'd have sufficient number of entries in the rotating buffer. > + return error(_("%s: Not handling case %s -> %s -> %s"), > + path, orig_hex, our_hex, their_hex); > + } > + > + return 0; > +} > diff --git a/merge-strategies.h b/merge-strategies.h > new file mode 100644 > index 0000000000..b527d145c7 > --- /dev/null > +++ b/merge-strategies.h > @@ -0,0 +1,13 @@ > +#ifndef MERGE_STRATEGIES_H > +#define MERGE_STRATEGIES_H > + > +#include "object.h" > + > +int merge_strategies_one_file(struct repository *r, > + const struct object_id *orig_blob, > + const struct object_id *our_blob, > + const struct object_id *their_blob, const char *path, > + unsigned int orig_mode, unsigned int our_mode, > + unsigned int their_mode); > + > +#endif /* MERGE_STRATEGIES_H */ > diff --git a/t/t6415-merge-dir-to-symlink.sh b/t/t6415-merge-dir-to-symlink.sh > index 2eddcc7664..5fb74e39a0 100755 > --- a/t/t6415-merge-dir-to-symlink.sh > +++ b/t/t6415-merge-dir-to-symlink.sh > @@ -94,7 +94,7 @@ test_expect_success SYMLINKS 'a/b was resolved as symlink' ' > test -h a/b > ' > > -test_expect_failure 'do not lose untracked in merge (resolve)' ' > +test_expect_success 'do not lose untracked in merge (resolve)' ' > git reset --hard && > git checkout baseline^0 && > >a/b/c/e &&