Hi Miriam, this patch looks pretty good to me. I just have a couple comments/suggestions: On Mon, 21 Dec 2020, Miriam Rubio wrote: > diff --git a/builtin/bisect--helper.c b/builtin/bisect--helper.c > index 1854377fa6..92c783237d 100644 > --- a/builtin/bisect--helper.c > +++ b/builtin/bisect--helper.c > @@ -916,6 +917,121 @@ static enum bisect_error bisect_log(void) > return status ? BISECT_FAILED : BISECT_OK; > } > > +static int get_next_word(const char *line, int pos, struct strbuf *word) > +{ > + int i, len = strlen(line), begin = 0; > + > + strbuf_reset(word); > + for (i = pos; i < len; i++) { > + if (line[i] == ' ' && begin) > + return i + 1; > + > + if (!begin) > + begin = 1; > + strbuf_addch(word, line[i]); > + } > + > + return i; > +} While I have different objections than Rafael (the function seems to want to left-trim, but does an inadequate job at it), I do not even think that we need this function. See below. > + > +static int process_line(struct bisect_terms *terms, struct strbuf *line, struct strbuf *word) > +{ > + int res = 0; > + int pos = 0; > + > + while (pos < line->len) { > + pos = get_next_word(line->buf, pos, word); > + > + if (!strcmp(word->buf, "git")) > + continue; > + else if (!strcmp(word->buf, "git-bisect")) > + continue; > + else if (!strcmp(word->buf, "bisect")) > + continue; This is not quite correct, as it would skip arbitrary amounts of "git" and "git-bisect" and "bisect", even in the middle of the line. Besides, this `while()` loop iterates over the characters of the current line, while the original loop iterated over the _lines_: while read git bisect command rev do test "$git $bisect" = "git bisect" || test "$git" = "git-bisect" || continue [...] As you can see, lines that do not start with "git bisect" or "git-bisect" are simply ignored by `continue`ing to the next line. In the C code, `continue` would continue to the next _word_. I'd like to strongly suggest a very different approach, where no `while()` loop is used in `process_line()` (BTW can we please rename this to `process_replay_line()` to 1) make it less generic a name and 2) convey via the name what this function is about?): const char *p; if (!skip_prefix(line->buf, "git bisect", &p) && !skip_prefix(line->buf, "git bisect", p)) return 0; As the original code used `read git bisect command rev` to parse the line, which automatically trims white-space, we might want to do something similar to that: const char *p = line->buf + strspn(line->buf, " \t"); if ((!skip_prefix(p, "git bisect", &p) && !skip_prefix(p, "git-bisect", &p)) || !isspace(*p)) return 0; p += strspn(p, " \t"); > + else if (starts_with(word->buf, "#")) > + break; Please note that the original shell code is _a lot_ more forgiving: it ignores _anything_ but "git bisect" and "git-bisect" at the beginning of the (white-space-trimmed) line. Not just comments starting with `#`. I'd like to return to the shell script's behavior. > + > + get_terms(terms); Do we really have to read the terms for every line we replay? I guess we do, as every time we use new/old after good/bad (or vice versa), the terms file gets rewritten. We might want to address this awkwardness in the future: in C, we do not have to read and write the terms file _all_ the time. > + if (check_and_set_terms(terms, word->buf)) > + return -1; > + > + if (!strcmp(word->buf, "start")) { Let's use `skip_prefix()` here, too: char *word_end = p + strcspn(p, " \t"); const char *rev = word_end + strspn(word_end, " \t"); *word_end = '\0'; /* NUL-terminate the word */ if (!strcmp("start", p)) { struct strvec argv = STRVEC_INIT; int res; sq_dequote_to_strvec(rev, &argv); res = bisect_start(terms, argv.v, argv.nr); strvec_clear(&argv); return res; } > + struct strvec argv = STRVEC_INIT; > + int res; > + sq_dequote_to_strvec(line->buf+pos, &argv); > + res = bisect_start(terms, argv.v, argv.nr); > + strvec_clear(&argv); > + if (res) > + return -1; > + break; > + } > + > + if (one_of(word->buf, terms->term_good, > + terms->term_bad, "skip", NULL)) { > + if (bisect_write(word->buf, line->buf+pos, terms, 0)) > + return -1; Apart from using `p` as above instead of `word->buf`, and `rev` instead of `line->buf+pos`, why not returning directly what `bisect_write()` returned? if (one_of(p, terms->term_good, terms->term_bad, "skip", NULL)) return bisect_write(p, rev, terms, 0); > + break; > + } > + > + if (!strcmp(word->buf, "terms")) { > + struct strvec argv = STRVEC_INIT; > + int res; > + sq_dequote_to_strvec(line->buf+pos, &argv); > + res = bisect_terms(terms, argv.nr == 1 ? argv.v[0] : NULL); We should probably error out if `argv.nr > 1`. > + strvec_clear(&argv); > + if (res) > + return -1; > + break; Let's `return res` directly. > + } > + > + error(_("Replay file contains rubbish (\"%s\")"), > + word->buf); The shell script version does this instead: die "$(gettext "?? what are you talking about?")" ;; We should do the same, if only to make life easier on the translators. > + res = -1; > + } > + return res; > +} > + > +static int process_replay_file(FILE *fp, struct bisect_terms *terms) > +{ > + struct strbuf line = STRBUF_INIT; > + struct strbuf word = STRBUF_INIT; > + int res = 0; > + > + while (strbuf_getline(&line, fp) != EOF) { > + res = process_line(terms, &line, &word); > + if (res) > + break; > + } > + > + strbuf_release(&line); > + strbuf_release(&word); > + return res; > +} A lot of this function is just boiler plate. I'd prefer to merge the code into `bisect_replay()` instead. > + > +static enum bisect_error bisect_replay(struct bisect_terms *terms, const char *filename) > +{ > + FILE *fp = NULL; > + enum bisect_error res = BISECT_OK; > + > + if (is_empty_or_missing_file(filename)) > + return error(_("cannot read file '%s' for replaying"), filename); > + > + if (bisect_reset(NULL)) > + return BISECT_FAILED; > + > + fp = fopen(filename, "r"); > + if (!fp) > + return BISECT_FAILED; > + > + res = process_replay_file(fp, terms); > + fclose(fp); > + > + if (res) > + return BISECT_FAILED; > + > + return bisect_auto_next(terms, NULL); > +} The rest looks good to me! Ciao, Dscho