Hi Joel, On Wed, 4 Apr 2018, Joel Teichroeb wrote: > Add a bulitin helper for performing stash commands. Converting > all at once proved hard to review, so starting with just apply > let conversion get started without the other command being s/let/lets the/ s/command/commands/ > finished. > > The helper is being implemented as a drop in replacement for > stash so that when it is complete it can simply be renamed and > the shell script deleted. > > Delete the contents of the apply_stash shell function and replace > it with a call to stash--helper apply until pop is also > converted. > > [...] > > diff --git a/builtin/stash--helper.c b/builtin/stash--helper.c > new file mode 100644 > index 0000000000..9d00a9547d > --- /dev/null > +++ b/builtin/stash--helper.c > @@ -0,0 +1,431 @@ > +#include "builtin.h" > +#include "config.h" > +#include "parse-options.h" > +#include "refs.h" > +#include "lockfile.h" > +#include "cache-tree.h" > +#include "unpack-trees.h" > +#include "merge-recursive.h" > +#include "argv-array.h" > +#include "run-command.h" > +#include "dir.h" > +#include "rerere.h" > + > +static const char * const git_stash_helper_usage[] = { Note to other reviewers: while it would appear that our convention is usually to cuddle the `*` with the identifier afterwards (i.e. `char *const` instead of `char * const`), it would *also* appear that we somehow prefer spaces on both sides when it comes to the _usage: $ git grep 'usage\[' builtin/*.c | grep -c ' \* ' 88 $ git grep 'usage\[' builtin/*.c | grep -c ' \*[^ ]' 18 So much to clean up and make more consistent... (not your problem, Joel) > + N_("git stash--helper apply [--index] [-q|--quiet] [<stash>]"), > + NULL > +}; > + > +static const char * const git_stash_helper_apply_usage[] = { > + N_("git stash--helper apply [--index] [-q|--quiet] [<stash>]"), > + NULL > +}; > + > +static const char *ref_stash = "refs/stash"; > +static int quiet; > +static struct strbuf stash_index_path = STRBUF_INIT; > + > +struct stash_info { > + struct object_id w_commit; > + struct object_id b_commit; > + struct object_id i_commit; > + struct object_id u_commit; > + struct object_id w_tree; > + struct object_id b_tree; > + struct object_id i_tree; > + struct object_id u_tree; > + struct strbuf revision; > + int is_stash_ref; > + int has_u; > +}; > + > +static int grab_oid(struct object_id *oid, const char *fmt, const char *rev) > +{ > + struct strbuf buf = STRBUF_INIT; > + int ret; > + > + strbuf_addf(&buf, fmt, rev); > + ret = get_oid(buf.buf, oid); > + strbuf_release(&buf); > + return ret; > +} It could be even more reusable, by using int get_oidf(struct object_id *oid, const char *fmt, ...) { va_list ap; char *p; int ret; va_start(ap, fmt); p = xstrvfmt(fmt, ap); va_end(ap); ret = get_oid(p, oid); free(p); return ret; } Feel free to do this as a preparatory patch (it should probably live next to get_oid()), otherwise there is always an opportunity for a micro-project (or not so micro- one). > +static void free_stash_info(struct stash_info *info) > +{ > + strbuf_release(&info->revision); > +} > + > +static int get_stash_info(struct stash_info *info, int argc, const char **argv) > +{ > + struct strbuf symbolic = STRBUF_INIT; > + int ret; > + const char *revision; > + const char *commit = NULL; > + char *end_of_rev; > + char *expanded_ref; > + struct object_id discard; I think elsewhere we prefer the name "dummy" for variables that are assigned without really needing the values. > + > + if (argc > 1) { > + int i; > + struct strbuf refs_msg = STRBUF_INIT; > + for (i = 0; i < argc; ++i) > + strbuf_addf(&refs_msg, " '%s'", argv[i]); > + > + fprintf_ln(stderr, _("Too many revisions specified:%s"), refs_msg.buf); Note to interested parties: while it may have been okay for a mere Unix shell script to output such a message directly to stderr, in C, we need to use `error()` and start the error message with lower-case. This should *not* be done as part of the conversion to C, which is kind of bug-for-bug. > + strbuf_release(&refs_msg); > + > + return -1; > + } > + > + if (argc == 1) > + commit = argv[0]; It is probably a bit too magical to assign `commit = argv[0]` right away... but it would work, as `argv` is guaranteed to be NULL-terminated. > + > + strbuf_init(&info->revision, 0); > + if (commit == NULL) { We usually use `!commit` instead of `commit == NULL`. It's just the style we use, not a judgement. > + if (!ref_exists(ref_stash)) { > + free_stash_info(info); > + return error(_("No stash entries found.")); Technically, the shell version used `die`. We could use the same here, too, but `error()` seems to be fine, too. However, if you use `error()` here, you should use it also instead of the `fprintf(stderr, ...);` above, for "Too many revisions specified". Also: our convention is to start error messages lower-case because they are prefixed with `error: `. But in the interest of a faithful conversion to C, maybe we should start by using `fprintf(stderr, ...)` first, and only convert to `error()` later (because that will require changes to the test suite, too). > + } > + > + strbuf_addf(&info->revision, "%s@{0}", ref_stash); > + } else if (strspn(commit, "0123456789") == strlen(commit)) { > + strbuf_addf(&info->revision, "%s@{%s}", ref_stash, commit); > + } else { > + strbuf_addstr(&info->revision, commit); > + } > + > + revision = info->revision.buf; > + > + if (get_oid(revision, &info->w_commit)) { > + error(_("%s is not a valid reference"), revision); > + free_stash_info(info); > + return -1; > + } > + > + if (grab_oid(&info->b_commit, "%s^1", revision) || > + grab_oid(&info->w_tree, "%s:", revision) || > + grab_oid(&info->b_tree, "%s^1:", revision) || > + grab_oid(&info->i_tree, "%s^2:", revision)) { > + > + error(_("'%s' is not a stash-like commit"), revision); > + free_stash_info(info); > + return -1; > + } Fans of keeping Git commands in the form of Unix shell scripts, please note that this is a deviation from the shell script, but in a good way: before, if any of those revisions could not be parsed by `git rev-parse`, the error would not have resulted in a fatal error. Instead, IS_STASH_LIKE and IS_STASH_REF would have been unchanged, as would be w_commit, b_commit, w_tree, b_tree and i_tree. The error checking in C is much better for end users. > + info->has_u = !grab_oid(&info->u_tree, "%s^3:", revision); > + > + end_of_rev = strchrnul(revision, '@'); > + strbuf_add(&symbolic, revision, end_of_rev - revision); This is a literal translation of the shell script's `"${REV%@*}"`, but I wonder whether we can do better in C. After all, `refs/stash@my-laptop@{1}` is possibly a valid stash, when I called `git fetch laptop refs/stash:refs/stash@my-laptop` regularly. Let's see. Nope, we really don't have anything in our "API" to peel off reflog suffixes... Another micro-project for the future. Nothing that needs to hold up this patch series. (I am saying this mainly for the benefit of other reviewers...) > + > + ret = dwim_ref(symbolic.buf, symbolic.len, &discard, &expanded_ref); > + strbuf_release(&symbolic); > + switch (ret) { > + case 0: /* Not found, but valid ref */ > + info->is_stash_ref = 0; > + break; > + case 1: > + info->is_stash_ref = !strcmp(expanded_ref, ref_stash); > + break; > + default: /* Invalid or ambiguous */ > + free_stash_info(info); > + return -1; > + } > + > + return 0; > +} > + > +static int reset_tree(struct object_id *i_tree, int update, int reset) > +{ > + struct unpack_trees_options opts; > + int nr_trees = 1; > + struct tree_desc t[MAX_UNPACK_TREES]; > + struct tree *tree; > + struct lock_file lock_file = LOCK_INIT; > + > + read_cache_preload(NULL); > + if (refresh_cache(REFRESH_QUIET)) > + return -1; > + > + hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR); > + > + memset(&opts, 0, sizeof(opts)); > + > + tree = parse_tree_indirect(i_tree); > + if (parse_tree(tree)) > + return -1; > + > + init_tree_desc(t, tree->buffer, tree->size); > + > + opts.head_idx = 1; > + opts.src_index = &the_index; > + opts.dst_index = &the_index; > + opts.merge = 1; > + opts.reset = reset; > + opts.update = update; > + opts.fn = oneway_merge; > + > + if (unpack_trees(nr_trees, t, &opts)) > + return -1; > + > + if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK)) > + return error(_("unable to write new index file")); > + > + return 0; > +} Oh how I wish we had that as API: $ git grep -W unpack_trees \*.c | grep init_tree_desc archive.c- init_tree_desc(&t, args->tree->buffer, args->tree->size); builtin/am.c- init_tree_desc(&t[0], head->buffer, head->size); builtin/am.c- init_tree_desc(&t[1], remote->buffer, remote->size); builtin/am.c- init_tree_desc(&t[0], tree->buffer, tree->size); builtin/checkout.c- init_tree_desc(&tree_desc, tree->buffer, tree->size); builtin/checkout.c- init_tree_desc(&trees[0], tree->buffer, tree->size); builtin/checkout.c- init_tree_desc(&trees[1], tree->buffer, tree->size); builtin/clone.c- init_tree_desc(&t, tree->buffer, tree->size); builtin/commit.c- init_tree_desc(&t, tree->buffer, tree->size); builtin/merge.c- init_tree_desc(t+i, trees[i]->buffer, trees[i]->size); builtin/read-tree.c- init_tree_desc(t+i, tree->buffer, tree->size); diff-lib.c- init_tree_desc(&t, tree->buffer, tree->size); merge-recursive.c- init_tree_desc_from_tree(t+0, common); merge-recursive.c- init_tree_desc_from_tree(t+1, head); merge-recursive.c- init_tree_desc_from_tree(t+2, merge); merge.c- init_tree_desc(t+i, trees[i]->buffer, trees[i]->size); Looks like a lot of duplication to me. (Again, this is just a lament, and a suggestion for a future project, not a micro-project, though). > + > +static int diff_tree_binary(struct strbuf *out, struct object_id *w_commit) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + const char *w_commit_hex = oid_to_hex(w_commit); > + > + /* Diff-tree would not be very hard to replace with a native function, > + * however it should be done together with apply_cached. > + */ > + cp.git_cmd = 1; > + argv_array_pushl(&cp.args, "diff-tree", "--binary", NULL); > + argv_array_pushf(&cp.args, "%s^2^..%s^2", w_commit_hex, w_commit_hex); > + > + return pipe_command(&cp, NULL, 0, out, 0, NULL, 0); > +} As I stated before: I think this is good enough for the moment. We can always spend time later to reduce these spawned processes and call in-process functions instead. One teeny tiny step at a time. > +static int apply_cached(struct strbuf *out) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + > + /* Apply currently only reads either from stdin or a file, thus > + * apply_all_patches would have to be updated to optionally take a buffer > + */ > + cp.git_cmd = 1; > + argv_array_pushl(&cp.args, "apply", "--cached", NULL); > + return pipe_command(&cp, out->buf, out->len, NULL, 0, NULL, 0); > +} > + > +static int reset_head(const char *prefix) > +{ > + struct argv_array args = ARGV_ARRAY_INIT; > + > + /* Reset is overall quite simple, however there is no current public API > + * for resetting. > + */ > + argv_array_push(&args, "reset"); > + return cmd_reset(args.argc, args.argv, prefix); > +} > + > +static int diff_cached_index(struct strbuf *out, struct object_id *c_tree) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + const char *c_tree_hex = oid_to_hex(c_tree); > + > + /* diff-index is very similar to diff-tree above, and should be converted > + * together with update_index > + */ > + cp.git_cmd = 1; > + argv_array_pushl(&cp.args, "diff-index", "--cached", "--name-only", "--diff-filter=A", NULL); We should probably note in the name of this function that we return a list only of the added files. > + argv_array_push(&cp.args, c_tree_hex); > + return pipe_command(&cp, NULL, 0, out, 0, NULL, 0); > +} > + > +static int update_index(struct strbuf *out) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + > + /* Update-index is very complicated and may need to have a public function > + * exposed in order to remove this forking > + */ > + cp.git_cmd = 1; > + argv_array_pushl(&cp.args, "update-index", "--add", "--stdin", NULL); > + return pipe_command(&cp, out->buf, out->len, NULL, 0, NULL, 0); > +} > + > +static int restore_untracked(struct object_id *u_tree) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + int res; > + > + /* > + * We need to run restore files from a given index, but without affecting > + * the current index, so we use GIT_INDEX_FILE with run_command to fork > + * processes that will not interfere. > + */ > + cp.git_cmd = 1; > + argv_array_push(&cp.args, "read-tree"); > + argv_array_push(&cp.args, oid_to_hex(u_tree)); > + argv_array_pushf(&cp.env_array, "GIT_INDEX_FILE=%s", stash_index_path.buf); > + if (run_command(&cp)) { > + remove_path(stash_index_path.buf); > + return -1; > + } > + > + child_process_init(&cp); > + cp.git_cmd = 1; > + argv_array_pushl(&cp.args, "checkout-index", "--all", NULL); > + argv_array_pushf(&cp.env_array, "GIT_INDEX_FILE=%s", stash_index_path.buf); > + > + res = run_command(&cp); > + remove_path(stash_index_path.buf); > + return res; > +} > + > +static int do_apply_stash(const char *prefix, struct stash_info *info, int index) > +{ > + struct merge_options o; > + struct object_id c_tree; > + struct object_id index_tree; > + const struct object_id *bases[1]; > + int bases_count = 1; It is always 1, maybe not waste a variable on it? ;-) > + struct commit *result; > + int ret; > + int has_index = index; > + > + read_cache_preload(NULL); > + if (refresh_cache(REFRESH_QUIET)) > + return -1; > + > + if (write_cache_as_tree(&c_tree, 0, NULL) || reset_tree(&c_tree, 0, 0)) > + return error(_("Cannot apply a stash in the middle of a merge")); > + > + if (index) { > + if (!oidcmp(&info->b_tree, &info->i_tree) || !oidcmp(&c_tree, &info->i_tree)) { > + has_index = 0; > + } else { > + struct strbuf out = STRBUF_INIT; > + > + if (diff_tree_binary(&out, &info->w_commit)) { > + strbuf_release(&out); > + return -1; > + } > + > + ret = apply_cached(&out); > + strbuf_release(&out); > + if (ret) > + return -1; A more faithful conversion would pipe the output of `git diff-tree --binary` into `git apply --cached`, but I do not think it is worth complicating things here. > + > + discard_cache(); > + read_cache(); $ git grep -A1 discard_cache | grep -c read_cache 10 I guess that's another idiom we use a lot. (Again, micro-project here.) > + if (write_cache_as_tree(&index_tree, 0, NULL)) > + return -1; > + > + reset_head(prefix); > + } > + } > + > + if (info->has_u) { > + if (restore_untracked(&info->u_tree)) > + return error(_("Could not restore untracked files from stash")); > + } > + > + init_merge_options(&o); > + > + o.branch1 = "Updated upstream"; > + o.branch2 = "Stashed changes"; > + > + if (!oidcmp(&info->b_tree, &c_tree)) > + o.branch1 = "Version stash was based on"; > + > + if (quiet) > + o.verbosity = 0; > + > + if (o.verbosity >= 3) > + printf_ln(_("Merging %s with %s"), o.branch1, o.branch2); > + > + bases[0] = &info->b_tree; > + > + ret = merge_recursive_generic(&o, &c_tree, &info->w_tree, bases_count, bases, &result); > + if (ret != 0) { > + rerere(0); > + > + if (index) > + fprintf_ln(stderr, _("Index was not unstashed.")); > + > + return ret; > + } > + > + if (has_index) { > + if (reset_tree(&index_tree, 0, 0)) > + return -1; > + } else { > + struct strbuf out = STRBUF_INIT; > + > + if (diff_cached_index(&out, &c_tree)) { > + strbuf_release(&out); > + return -1; > + } > + > + if (reset_tree(&c_tree, 0, 1)) { > + strbuf_release(&out); > + return -1; > + } > + > + ret = update_index(&out); > + strbuf_release(&out); > + if (ret) > + return -1; > + > + discard_cache(); > + } > + > + if (!quiet) { > + struct argv_array args = ARGV_ARRAY_INIT; > + /* Status is quite simple and could be replaced with calls to wt_status > + * in the future, but it adds complexities which may require more tests > + */ > + argv_array_push(&args, "status"); > + cmd_status(args.argc, args.argv, prefix); > + } This is a change in behavior. The shell script calls `git status` always, and only *suppresses* the output if `--quiet` was passed. I guess the difference in behavior really boils down only to refreshing the index. Maybe we can do that here, too? Something like if (quiet) { if (refresh_cache(REFRESH_QUIET)) warning("could not refresh index"); } else { ... > + > + return 0; > +} > + > +static int apply_stash(int argc, const char **argv, const char *prefix) > +{ > + int index = 0; > + struct stash_info info; > + int ret; > + struct option options[] = { > + OPT__QUIET(&quiet, N_("be quiet, only report errors")), > + OPT_BOOL(0, "index", &index, > + N_("attempt to recreate the index")), > + OPT_END() > + }; > + > + argc = parse_options(argc, argv, prefix, options, > + git_stash_helper_apply_usage, 0); > + > + if (get_stash_info(&info, argc, argv)) > + return -1; > + > + ret = do_apply_stash(prefix, &info, index); This is the only caller of that function. But I guess that `pop` also needs to call it, so I guess it makes sense to keep `do_apply_stash()` separate. > + free_stash_info(&info); > + return ret; > +} > + > +int cmd_stash__helper(int argc, const char **argv, const char *prefix) > +{ > + int result = 0; > + pid_t pid = getpid(); > + const char *index_file; > + > + struct option options[] = { > + OPT_END() > + }; > + > + git_config(git_default_config, NULL); > + > + argc = parse_options(argc, argv, prefix, options, git_stash_helper_usage, > + PARSE_OPT_KEEP_UNKNOWN|PARSE_OPT_KEEP_DASHDASH); Please keep spaces around the pipe symbol. > + index_file = get_index_file(); > + strbuf_addf(&stash_index_path, "%s.stash.%" PRIuMAX, index_file, (uintmax_t)pid); > + > + if (argc < 1) > + usage_with_options(git_stash_helper_usage, options); > + else if (!strcmp(argv[0], "apply")) > + result = apply_stash(argc, argv, prefix); > + else { > + error(_("unknown subcommand: %s"), argv[0]); > + usage_with_options(git_stash_helper_usage, options); > + result = 1; > + } > + > + return result; > +} > diff --git a/git-stash.sh b/git-stash.sh > index 94793c1a91..0b5d1f3743 100755 > --- a/git-stash.sh > +++ b/git-stash.sh > @@ -566,76 +566,11 @@ assert_stash_ref() { > } > > apply_stash () { > - > - assert_stash_like "$@" > - > - git update-index -q --refresh || die "$(gettext "unable to refresh index")" > - > - # current index state > - c_tree=$(git write-tree) || > - die "$(gettext "Cannot apply a stash in the middle of a merge")" > - > - unstashed_index_tree= > - if test -n "$INDEX_OPTION" && test "$b_tree" != "$i_tree" && > - test "$c_tree" != "$i_tree" > - then > - git diff-tree --binary $s^2^..$s^2 | git apply --cached > - test $? -ne 0 && > - die "$(gettext "Conflicts in index. Try without --index.")" > - unstashed_index_tree=$(git write-tree) || > - die "$(gettext "Could not save index tree")" > - git reset > - fi > - > - if test -n "$u_tree" > - then > - GIT_INDEX_FILE="$TMPindex" git read-tree "$u_tree" && > - GIT_INDEX_FILE="$TMPindex" git checkout-index --all && > - rm -f "$TMPindex" || > - die "$(gettext "Could not restore untracked files from stash entry")" > - fi > - > - eval " > - GITHEAD_$w_tree='Stashed changes' && > - GITHEAD_$c_tree='Updated upstream' && > - GITHEAD_$b_tree='Version stash was based on' && > - export GITHEAD_$w_tree GITHEAD_$c_tree GITHEAD_$b_tree > - " > - > - if test -n "$GIT_QUIET" > - then > - GIT_MERGE_VERBOSITY=0 && export GIT_MERGE_VERBOSITY > - fi > - if git merge-recursive $b_tree -- $c_tree $w_tree > - then > - # No conflict > - if test -n "$unstashed_index_tree" > - then > - git read-tree "$unstashed_index_tree" > - else > - a="$TMP-added" && > - git diff-index --cached --name-only --diff-filter=A $c_tree >"$a" && > - git read-tree --reset $c_tree && > - git update-index --add --stdin <"$a" || > - die "$(gettext "Cannot unstage modified files")" > - rm -f "$a" > - fi > - squelch= > - if test -n "$GIT_QUIET" > - then > - squelch='>/dev/null 2>&1' > - fi > - (cd "$START_DIR" && eval "git status $squelch") || : > - else > - # Merge conflict; keep the exit status from merge-recursive > - status=$? > - git rerere > - if test -n "$INDEX_OPTION" > - then > - gettextln "Index was not unstashed." >&2 > - fi > - exit $status > - fi > + cd "$START_DIR" > + git stash--helper apply "$@" > + res=$? > + cd_to_toplevel Is it important to cd to top-level when we are exiting anyway? I guess it is, because we are not exiting right away, necessarily: both pop_stash() and apply_to_branch() call this function. But then, both callers only call drop_stash() afterwards (which does not perform worktree operations)... > + return $res > } > > pop_stash() { > diff --git a/git.c b/git.c > index ceaa58ef40..6ffe6364ac 100644 > --- a/git.c > +++ b/git.c > @@ -466,6 +466,7 @@ static struct cmd_struct commands[] = { > { "show-branch", cmd_show_branch, RUN_SETUP }, > { "show-ref", cmd_show_ref, RUN_SETUP }, > { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, > + { "stash--helper", cmd_stash__helper, RUN_SETUP | NEED_WORK_TREE }, > { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, > { "stripspace", cmd_stripspace }, > { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX}, > -- > 2.16.3 This patch (apart from the minor suggestions I had) looks very good to me already. Thank you so much! Ciao, Dscho