Re: [PATCH v2 3/4] builtin/stash: provide a way to export stashes to a ref

Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> · Thu, 31 Mar 2022 03:56:01 +0200

On Tue, Mar 29 2022, brian m. carlson wrote:

> A common user problem is how to sync in-progress work to another
> machine.  Users currently must use some sort of transfer of the working
> tree, which poses security risks and also necessarily causes the index
> to become dirty.  The experience is suboptimal and frustrating for
> users.
>
> A reasonable idea is to use the stash for this purpose, but the stash is
> stored in the reflog, not in a ref, and as such it cannot be pushed or
> pulled.  This also means that it cannot be saved into a bundle or
> preserved elsewhere, which is a problem when using throwaway development
> environments.
>
> Let's solve this problem by allowing the user to export the stash to a
> ref (or, to just write it into the repository and print the hash, à la
> git commit-tree).  Introduce git stash export, which writes a chain of
> commits where the first parent is always a chain to the previous stash,
> or to a single, empty commit (for the final item) and the second is the
> stash commit normally written to the reflog.
>
> Iterate over each stash from topmost to bottomost, looking up the data
> for each one, and then create the chain from the single empty commit
> back up in reverse order.  Generate a predictable empty commit so our
> behavior is reproducible.  Create a useful commit message, preserving
> the author and committer information, to help users identify stash
> commits when viewing them as normal commits.
>
> If the user has specified specific stashes they'd like to export
> instead, use those instead of iterating over all of the stashes.
>
> As part of this, specifically request quiet behavior when looking up the
> OID for a revision because we will eventually hit a revision that
> doesn't exist and we don't want to die when that occurs.
>
> Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx>
> ---
>  Documentation/git-stash.txt |  22 ++++-
>  builtin/stash.c             | 183 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 204 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt
> index 6e15f47525..162110314e 100644
> --- a/Documentation/git-stash.txt
> +++ b/Documentation/git-stash.txt
> @@ -20,6 +20,7 @@ SYNOPSIS
>  'git stash' clear
>  'git stash' create [<message>]
>  'git stash' store [-m|--message <message>] [-q|--quiet] <commit>
> +'git stash' export ( --print | --to-ref <ref> ) [<stash>...]
>  
>  DESCRIPTION
>  -----------
> @@ -151,6 +152,12 @@ store::
>  	reflog.  This is intended to be useful for scripts.  It is
>  	probably not the command you want to use; see "push" above.
>  
> +export ( --print | --to-ref <ref> ) [<stash>...]::
> +

I think this extra \n here isn't needed.

> +static const char * const git_stash_export_usage[] = {
> +	N_("git stash export (--print | --to-ref <ref>) [<stash>...]"),
> +	NULL
> +};
> +
> +

Stray too-much-whitespace.

>  static const char ref_stash[] = "refs/stash";
>  static struct strbuf stash_index_path = STRBUF_INIT;
>  
> @@ -1773,6 +1780,180 @@ static int save_stash(int argc, const char **argv, const char *prefix)
>  	return ret;
>  }
>  
> +static int write_commit_with_parents(struct object_id *out, const struct object_id *oid, struct commit_list *parents)
> +{
> +	size_t author_len, committer_len;
> +	struct commit *this;
> +	const char *orig_author, *orig_committer;
> +	char *author = NULL, *committer = NULL;
> +	const char *buffer;
> +	unsigned long bufsize;
> +	const char *p;
> +	struct strbuf msg = STRBUF_INIT;
> +	int ret = 0;

With this...

> +	this = lookup_commit_reference(the_repository, oid);
> +	buffer = get_commit_buffer(this, &bufsize);
> +	orig_author = find_commit_header(buffer, "author", &author_len);
> +	orig_committer = find_commit_header(buffer, "committer", &committer_len);
> +	p = memmem(buffer, bufsize, "\n\n", 2);
> +
> +	if (!orig_author || !orig_committer || !p) {
> +		error(_("cannot parse commit %s"), oid_to_hex(oid));
> +		goto out;

..isn't this a logic error, shouldn't we return -1 here?

> +	}
> +	/* Jump to message. */
> +	p += 2;
> +	strbuf_addstr(&msg, "git stash: ");
> +	strbuf_add(&msg, p, bufsize - (p - buffer));
> +
> +	author = xmemdupz(orig_author, author_len);
> +	committer = xmemdupz(orig_committer, committer_len);
> +
> +	if (commit_tree_extended(msg.buf, msg.len,
> +				 the_hash_algo->empty_tree, parents,
> +				 out, author, committer,
> +				 NULL, NULL)) {
> +		ret = -1;
> +		error(_("could not write commit"));

better as "ret = error(..."?

> +		goto out;
> +	}
> +out:
> +	strbuf_reset(&msg);
> +	unuse_commit_buffer(this, buffer);
> +	free(author);
> +	free(committer);
> +	return ret;
> +}
> +
> +static int do_export_stash(const char *ref, size_t argc, const char **argv)
> +{
> +	struct object_id base;
> +	struct object_context unused;
> +	struct commit *prev;
> +	struct object_id *items = NULL;
> +	int nitems = 0, nalloc = 0;

Can nalloc be moved into the if=else scopes?

Also shouldn't these be size_t...?

> +	int res = 0;
> +	struct strbuf revision;
> +	const char *author, *committer;
> +
> +	/*
> +	 * This is an arbitrary, fixed date, specifically the one used by git
> +	 * format-patch.  The goal is merely to produce reproducible output.
> +	 */
> +	prepare_fallback_ident("git stash", "git@stash");
> +	author = fmt_ident("git stash", "git@stash", WANT_BLANK_IDENT, "2001-09-17T00:00:00Z", 0);
> +	committer = fmt_ident("git stash", "git@stash", WANT_BLANK_IDENT, "2001-09-17T00:00:00Z", 0);
> +
> +	/* First, we create a single empty commit. */
> +	if (commit_tree_extended(NULL, 0, the_hash_algo->empty_tree, NULL, &base, author, committer, NULL, NULL))

Some very long lines here.

> +		return error(_("unable to write base commit"));
> +
> +	prev = lookup_commit_reference(the_repository, &base);
> +
> +	if (argc) {
> +		/*
> +		 * Find each specified stash, and load data into the array.
> +		 */
> +		for (int i = 0; i < argc; i++) {

...as this is size_t, not int.

> +			ALLOC_GROW_BY(items, nitems, 1, nalloc);
> +			if (parse_revision(&revision, argv[i], 0) ||
> +			    get_oid_with_context(the_repository, revision.buf,
> +						 GET_OID_QUIETLY | GET_OID_GENTLY,
> +						 &items[i], &unused)) {
> +				error(_("unable to find stash entry %s"), argv[i]);
> +				res = -1;

ditto "ret = error(..."
> +				goto out;
> +			}
> +		}
> +	} else {
> +		/*
> +		 * Walk the reflog, finding each stash entry, and load data into the
> +		 * array.
> +		 */
> +		for (int i = 0;; i++) {

Aside from the C99 dependency Junio mentioned, this should also be size_t.

> +	/*
> +	 * Now, create a set of commits identical to the regular stash commits,
> +	 * but where their first parents form a chain to our original empty
> +	 * base commit.
> +	 */
> +	for (int i = nitems - 1; i >= 0; i--) {

Did you spot my "count down" suggestion in
https://lore.kernel.org/git/220311.86bkydi65v.gmgdl@xxxxxxxxxxxxxxxxxxx/
on the v1?

> +		struct commit_list *parents = NULL;
> +		struct commit_list **next = &parents;
> +		struct object_id out;
> +
> +		next = commit_list_append(prev, next);
> +		next = commit_list_append(lookup_commit_reference(the_repository, &items[i]), next);
> +		if (write_commit_with_parents(&out, &items[i], parents)) {

Here we returned -1 from this earlier, I think this would be more
straightforward as:

	res = write_commit_with_parents(...)
	if (res < 0)
		goto out;

> +			res = -1;
> +			goto out;

So one doesn't have to wonder why we're ignoring the error value, and
using -1, but then treating all non-zero as errors.

> +		}
> +		prev = lookup_commit_reference(the_repository, &out);
> +	}
> +	if (ref)
> +		update_ref(NULL, ref, &prev->object.oid, NULL, 0, UPDATE_REFS_DIE_ON_ERR);
> +	else
> +		puts(oid_to_hex(&prev->object.oid));
> +out:
> +	free(items);
> +
> +	return res;
> +}
> +
> +enum export_action {
> +	ACTION_NONE,
> +	ACTION_PRINT,
> +	ACTION_TO_REF,
> +};
> +
> +static int export_stash(int argc, const char **argv, const char *prefix)
> +{
> +	int ret = 0;

It looks like we don't need to initialize this.

> +	const char *ref = NULL;
> +	enum export_action action = ACTION_NONE;
> +	struct option options[] = {
> +		OPT_CMDMODE(0, "print", &action,
> +			    N_("print the object ID instead of writing it to a ref"),
> +			    ACTION_PRINT),
> +		OPT_CMDMODE(0, "to-ref", &action,
> +			    N_("save the data to the given ref"),
> +			    ACTION_TO_REF),
> +		OPT_END()
> +	};
> +
> +	argc = parse_options(argc, argv, prefix, options,
> +			     git_stash_export_usage,
> +			     PARSE_OPT_KEEP_DASHDASH);
> +
> +	if (action == ACTION_NONE) {
> +		return error(_("exactly one of --print and --to-ref is required"));
> +	} else if (action == ACTION_TO_REF) {
> +		if (!argc)
> +			return error(_("--to-ref requires an argument"));
> +		ref = argv[0];
> +		argc--;
> +		argv++;
> +	}
> +
> +

nit: Too much whitespace

> +	ret = do_export_stash(ref, argc, argv);
> +	return ret;

Aside from the "ret" case above, maybe this would be better if the
"action" check became a swith, then the compiler would help check it
against the enum, and this wouldn't implicitly be both ACTION_PRINT and
ACTION_TO_REF, but could be done via a fall-through.