Hi Johannes, Johannes Schindelin <johannes.schindelin@xxxxxx> writes: > This operation has quadratic complexity, which is especially painful > on Windows, where shell scripts are *already* slow (mainly due to the > overhead of the POSIX emulation layer). > > Let's reimplement this with linear complexity (using a hash map to > match the commits' subject lines) for the common case; Sadly, the > fixup/squash feature's design neglected performance considerations, > allowing arbitrary prefixes (read: `fixup! hell` will match the > commit subject `hello world`), which means that we are stuck with > quadratic performance in the worst case. > > The reimplemented logic also happens to fix a bug where commented-out > lines (representing empty patches) were dropped by the previous code. > > While at it, clarify how the fixup/squash feature works in `git rebase > -i`'s man page. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx> > --- > Documentation/git-rebase.txt | 16 ++-- > builtin/rebase--helper.c | 6 +- > git-rebase--interactive.sh | 90 +------------------- > sequencer.c | 195 +++++++++++++++++++++++++++++++++++++++++++ > sequencer.h | 1 + > t/t3415-rebase-autosquash.sh | 2 +- > 6 files changed, 212 insertions(+), 98 deletions(-) > > diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt > index 53f4e144444..c5464aa5365 100644 > --- a/Documentation/git-rebase.txt > +++ b/Documentation/git-rebase.txt > @@ -430,13 +430,15 @@ without an explicit `--interactive`. > --autosquash:: > --no-autosquash:: > When the commit log message begins with "squash! ..." (or > - "fixup! ..."), and there is a commit whose title begins with > - the same ..., automatically modify the todo list of rebase -i > - so that the commit marked for squashing comes right after the > - commit to be modified, and change the action of the moved > - commit from `pick` to `squash` (or `fixup`). Ignores subsequent > - "fixup! " or "squash! " after the first, in case you referred to an > - earlier fixup/squash with `git commit --fixup/--squash`. > + "fixup! ..."), and there is already a commit in the todo list that > + matches the same `...`, automatically modify the todo list of rebase > + -i so that the commit marked for squashing comes right after the > + commit to be modified, and change the action of the moved commit > + from `pick` to `squash` (or `fixup`). A commit matches the `...` if > + the commit subject matches, or if the `...` refers to the commit's > + hash. As a fall-back, partial matches of the commit subject work, > + too. The recommended way to create fixup/squash commits is by using > + the `--fixup`/`--squash` options of linkgit:git-commit[1]. > + > This option is only valid when the `--interactive` option is used. > + > diff --git a/builtin/rebase--helper.c b/builtin/rebase--helper.c > index de3ccd9bfbc..e6591f01112 100644 > --- a/builtin/rebase--helper.c > +++ b/builtin/rebase--helper.c > @@ -14,7 +14,7 @@ int cmd_rebase__helper(int argc, const char **argv, const char *prefix) > int keep_empty = 0; > enum { > CONTINUE = 1, ABORT, MAKE_SCRIPT, SHORTEN_SHA1S, EXPAND_SHA1S, > - CHECK_TODO_LIST, SKIP_UNNECESSARY_PICKS > + CHECK_TODO_LIST, SKIP_UNNECESSARY_PICKS, REARRANGE_SQUASH > } command = 0; > struct option options[] = { > OPT_BOOL(0, "ff", &opts.allow_ff, N_("allow fast-forward")), > @@ -33,6 +33,8 @@ int cmd_rebase__helper(int argc, const char **argv, const char *prefix) > N_("check the todo list"), CHECK_TODO_LIST), > OPT_CMDMODE(0, "skip-unnecessary-picks", &command, > N_("skip unnecessary picks"), SKIP_UNNECESSARY_PICKS), > + OPT_CMDMODE(0, "rearrange-squash", &command, > + N_("rearrange fixup/squash lines"), REARRANGE_SQUASH), > OPT_END() > }; > > @@ -59,5 +61,7 @@ int cmd_rebase__helper(int argc, const char **argv, const char *prefix) > return !!check_todo_list(); > if (command == SKIP_UNNECESSARY_PICKS && argc == 1) > return !!skip_unnecessary_picks(); > + if (command == REARRANGE_SQUASH && argc == 1) > + return !!rearrange_squash(); > usage_with_options(builtin_rebase_helper_usage, options); > } > diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh > index 92e3ca1de3b..84c6e62518f 100644 > --- a/git-rebase--interactive.sh > +++ b/git-rebase--interactive.sh > @@ -721,94 +721,6 @@ collapse_todo_ids() { > git rebase--helper --shorten-sha1s > } > > -# Rearrange the todo list that has both "pick sha1 msg" and > -# "pick sha1 fixup!/squash! msg" appears in it so that the latter > -# comes immediately after the former, and change "pick" to > -# "fixup"/"squash". > -# > -# Note that if the config has specified a custom instruction format > -# each log message will be re-retrieved in order to normalize the > -# autosquash arrangement > -rearrange_squash () { > - format=$(git config --get rebase.instructionFormat) > - # extract fixup!/squash! lines and resolve any referenced sha1's > - while read -r pick sha1 message > - do > - test -z "${format}" || message=$(git log -n 1 --format="%s" ${sha1}) > - case "$message" in > - "squash! "*|"fixup! "*) > - action="${message%%!*}" > - rest=$message > - prefix= > - # skip all squash! or fixup! (but save for later) > - while : > - do > - case "$rest" in > - "squash! "*|"fixup! "*) > - prefix="$prefix${rest%%!*}," > - rest="${rest#*! }" > - ;; > - *) > - break > - ;; > - esac > - done > - printf '%s %s %s %s\n' "$sha1" "$action" "$prefix" "$rest" > - # if it's a single word, try to resolve to a full sha1 and > - # emit a second copy. This allows us to match on both message > - # and on sha1 prefix > - if test "${rest#* }" = "$rest"; then > - fullsha="$(git rev-parse -q --verify "$rest" 2>/dev/null)" > - if test -n "$fullsha"; then > - # prefix the action to uniquely identify this line as > - # intended for full sha1 match > - echo "$sha1 +$action $prefix $fullsha" > - fi > - fi > - esac > - done >"$1.sq" <"$1" > - test -s "$1.sq" || return > - > - used= > - while read -r pick sha1 message > - do > - case " $used" in > - *" $sha1 "*) continue ;; > - esac > - printf '%s\n' "$pick $sha1 $message" > - test -z "${format}" || message=$(git log -n 1 --format="%s" ${sha1}) > - used="$used$sha1 " > - while read -r squash action msg_prefix msg_content > - do > - case " $used" in > - *" $squash "*) continue ;; > - esac > - emit=0 > - case "$action" in > - +*) > - action="${action#+}" > - # full sha1 prefix test > - case "$msg_content" in "$sha1"*) emit=1;; esac ;; > - *) > - # message prefix test > - case "$message" in "$msg_content"*) emit=1;; esac ;; > - esac > - if test $emit = 1; then > - if test -n "${format}" > - then > - msg_content=$(git log -n 1 --format="${format}" ${squash}) > - else > - msg_content="$(echo "$msg_prefix" | sed "s/,/! /g")$msg_content" > - fi > - printf '%s\n' "$action $squash $msg_content" > - used="$used$squash " > - fi > - done <"$1.sq" > - done >"$1.rearranged" <"$1" > - cat "$1.rearranged" >"$1" > - rm -f "$1.sq" "$1.rearranged" > -} > - > # Add commands after a pick or after a squash/fixup serie > # in the todo list. > add_exec_commands () { > @@ -1068,7 +980,7 @@ then > fi > > test -s "$todo" || echo noop >> "$todo" > -test -n "$autosquash" && rearrange_squash "$todo" > +test -z "$autosquash" || git rebase--helper --rearrange-squash || exit > test -n "$cmd" && add_exec_commands "$todo" > > todocount=$(git stripspace --strip-comments <"$todo" | wc -l) > diff --git a/sequencer.c b/sequencer.c > index 72e3ad8d145..63a588f0916 100644 > --- a/sequencer.c > +++ b/sequencer.c > @@ -19,6 +19,7 @@ > #include "trailer.h" > #include "log-tree.h" > #include "wt-status.h" > +#include "hashmap.h" > > #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION" > > @@ -2723,3 +2724,197 @@ int skip_unnecessary_picks(void) > > return 0; > } > + > +struct subject2item_entry { > + struct hashmap_entry entry; > + int i; > + char subject[FLEX_ARRAY]; > +}; > + > +static int subject2item_cmp(const struct subject2item_entry *a, > + const struct subject2item_entry *b, const void *key) > +{ > + return key ? strcmp(a->subject, key) : strcmp(a->subject, b->subject); > +} > + > +/* > + * Rearrange the todo list that has both "pick sha1 msg" and "pick sha1 > + * fixup!/squash! msg" in it so that the latter is put immediately after the > + * former, and change "pick" to "fixup"/"squash". > + * > + * Note that if the config has specified a custom instruction format, each log > + * message will have to be retrieved from the commit (as the oneline in the > + * script cannot be trusted) in order to normalize the autosquash arrangement. > + */ > +int rearrange_squash(void) > +{ > + const char *todo_file = rebase_path_todo(); > + struct todo_list todo_list = TODO_LIST_INIT; > + struct hashmap subject2item; > + int res = 0, rearranged = 0, *next, *tail, fd, i; > + char **subjects; > + > + fd = open(todo_file, O_RDONLY); > + if (fd < 0) > + return error_errno(_("could not open '%s'"), todo_file); > + if (strbuf_read(&todo_list.buf, fd, 0) < 0) { > + close(fd); > + return error(_("could not read '%s'."), todo_file); > + } > + close(fd); > + if (parse_insn_buffer(todo_list.buf.buf, &todo_list) < 0) { > + todo_list_release(&todo_list); > + return -1; > + } > + > + /* > + * The hashmap maps onelines to the respective todo list index. > + * > + * If any items need to be rearranged, the next[i] value will indicate > + * which item was moved directly after the i'th. > + * > + * In that case, last[i] will indicate the index of the latest item to > + * be moved to appear after the i'th. > + */ > + hashmap_init(&subject2item, (hashmap_cmp_fn) subject2item_cmp, > + todo_list.nr); > + ALLOC_ARRAY(next, todo_list.nr); > + ALLOC_ARRAY(tail, todo_list.nr); > + ALLOC_ARRAY(subjects, todo_list.nr); > + for (i = 0; i < todo_list.nr; i++) { > + struct strbuf buf = STRBUF_INIT; > + struct todo_item *item = todo_list.items + i; > + const char *commit_buffer, *subject, *p; > + size_t subject_len; > + int i2 = -1; > + struct subject2item_entry *entry; > + > + next[i] = tail[i] = -1; > + if (item->command >= TODO_EXEC) { > + subjects[i] = NULL; > + continue; > + } > + > + if (is_fixup(item->command)) { > + todo_list_release(&todo_list); > + return error(_("the script was already rearranged.")); > + } > + > + item->commit->util = item; > + > + parse_commit(item->commit); > + commit_buffer = get_commit_buffer(item->commit, NULL); > + find_commit_subject(commit_buffer, &subject); > + format_subject(&buf, subject, " "); > + subject = subjects[i] = strbuf_detach(&buf, &subject_len); > + unuse_commit_buffer(item->commit, commit_buffer); > + if ((skip_prefix(subject, "fixup! ", &p) || > + skip_prefix(subject, "squash! ", &p))) { > + struct commit *commit2; > + > + for (;;) { > + while (isspace(*p)) > + p++; > + if (!skip_prefix(p, "fixup! ", &p) && > + !skip_prefix(p, "squash! ", &p)) > + break; > + } > + > + if ((entry = hashmap_get_from_hash(&subject2item, > + strhash(p), p))) > + /* found by title */ > + i2 = entry->i; > + else if (!strchr(p, ' ') && > + (commit2 = > + lookup_commit_reference_by_name(p)) && > + commit2->util) > + /* found by commit name */ > + i2 = (struct todo_item *)commit2->util > + - todo_list.items; > + else { > + /* copy can be a prefix of the commit subject */ > + for (i2 = 0; i2 < i; i2++) > + if (subjects[i2] && > + starts_with(subjects[i2], p)) > + break; > + if (i2 == i) > + i2 = -1; > + } > + } > + if (i2 >= 0) { > + rearranged = 1; > + todo_list.items[i].command = > + starts_with(subject, "fixup!") ? > + TODO_FIXUP : TODO_SQUASH; > + if (next[i2] < 0) > + next[i2] = i; > + else > + next[tail[i2]] = i; > + tail[i2] = i; > + } else if (!hashmap_get_from_hash(&subject2item, > + strhash(subject), subject)) { > + FLEX_ALLOC_MEM(entry, subject, subject, subject_len); > + entry->i = i; > + hashmap_entry_init(entry, strhash(entry->subject)); > + hashmap_put(&subject2item, entry); > + } > + } > + > + if (rearranged) { > + struct strbuf buf = STRBUF_INIT; > + > + for (i = 0; i < todo_list.nr; i++) { > + enum todo_command command = todo_list.items[i].command; > + int cur = i; > + > + /* > + * Initially, all commands are 'pick's. If it is a > + * fixup or a squash now, we have rearranged it. > + */ > + if (is_fixup(command)) > + continue; > + > + while (cur >= 0) { > + int offset = todo_list.items[cur].offset_in_buf; > + int end_offset = cur + 1 < todo_list.nr ? > + todo_list.items[cur + 1].offset_in_buf : > + todo_list.buf.len; > + char *bol = todo_list.buf.buf + offset; > + char *eol = todo_list.buf.buf + end_offset; I got a little confused with these offsets. I know it was part of a previous series, but maybe we could add a description to the fields of `struct todo_list` and `struct todo_item`. Other than that, I don't have any particular comments. > + > + /* replace 'pick', by 'fixup' or 'squash' */ > + command = todo_list.items[cur].command; > + if (is_fixup(command)) { > + strbuf_addstr(&buf, > + todo_command_info[command].str); > + bol += strcspn(bol, " \t"); > + } > + > + strbuf_add(&buf, bol, eol - bol); > + > + cur = next[cur]; > + } > + } > + > + fd = open(todo_file, O_WRONLY); > + if (fd < 0) > + res = error_errno(_("could not open '%s'"), todo_file); > + else if (write(fd, buf.buf, buf.len) < 0) > + res = error_errno(_("could not read '%s'."), todo_file); > + else if (ftruncate(fd, buf.len) < 0) > + res = error_errno(_("could not finish '%s'"), > + todo_file); > + close(fd); > + strbuf_release(&buf); > + } > + > + free(next); > + free(tail); > + for (i = 0; i < todo_list.nr; i++) > + free(subjects[i]); > + free(subjects); > + hashmap_free(&subject2item, 1); > + todo_list_release(&todo_list); > + > + return res; > +} > diff --git a/sequencer.h b/sequencer.h > index 28e1fc1e9bb..1c94bec7622 100644 > --- a/sequencer.h > +++ b/sequencer.h > @@ -51,6 +51,7 @@ int sequencer_make_script(int keep_empty, FILE *out, > int transform_todo_ids(int shorten_sha1s); > int check_todo_list(void); > int skip_unnecessary_picks(void); > +int rearrange_squash(void); > > extern const char sign_off_header[]; > > diff --git a/t/t3415-rebase-autosquash.sh b/t/t3415-rebase-autosquash.sh > index 926bb3da788..2f88f50c057 100755 > --- a/t/t3415-rebase-autosquash.sh > +++ b/t/t3415-rebase-autosquash.sh > @@ -290,7 +290,7 @@ set_backup_editor () { > test_set_editor "$PWD/backup-editor.sh" > } > > -test_expect_failure 'autosquash with multiple empty patches' ' > +test_expect_success 'autosquash with multiple empty patches' ' > test_tick && > git commit --allow-empty -m "empty" && > test_tick && > -- > 2.12.2.windows.2.800.gede8f145e06 Liam