Hi Thomas, On Fri, 5 Jul 2019, Thomas Gummerer wrote: > Currently range-diff uses the 'strbuf_getline()' function for doing > its line by line processing. In a future patch we want to do parts of > that parsing using the 'parse_git_header()' function, which does If you like my suggestion in patch 7/14, this commit message needs to talk about the new name, too. > requires reading parts of the input from that function, which doesn't s/requires/require/ > use strbufs. > > Switch range-diff to do our own line by line parsing, so we can re-use > the parse_git_header function later. > > Signed-off-by: Thomas Gummerer <t.gummerer@xxxxxxxxx> > --- > > Longer term it might be better to have both range-diff and apply code > use strbufs. However I didn't feel it's worth making that change for > this patch series. Makes sense. > range-diff.c | 69 +++++++++++++++++++++++++++++----------------------- > 1 file changed, 39 insertions(+), 30 deletions(-) > > diff --git a/range-diff.c b/range-diff.c > index 9242b8975f..916afa44c0 100644 > --- a/range-diff.c > +++ b/range-diff.c > @@ -24,6 +24,17 @@ struct patch_util { > struct object_id oid; > }; > > +static unsigned long linelen(const char *buffer, unsigned long size) Shouldn't this be `size_t`? > +{ > + unsigned long len = 0; Likewise. > + while (size--) { > + len++; > + if (*buffer++ == '\n') > + break; > + } > + return len; How about const char *eol = memchr(buffer, '\n', size); return !eol ? size : eol + 1 - buffer; instead? For an extra brownie point, you could even rename this function to `find_end_of_line()` and replace the LF by a NUL: if (!eol) return size; *eol = '\0'; return eol + 1 - buffer; > +} > + > /* > * Reads the patches into a string list, with the `util` field being populated > * as struct object_id (will need to be free()d). > @@ -31,10 +42,12 @@ struct patch_util { > static int read_patches(const char *range, struct string_list *list) > { > struct child_process cp = CHILD_PROCESS_INIT; > - FILE *in; > - struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; > + struct strbuf buf = STRBUF_INIT, file = STRBUF_INIT; This puzzled me. I'd like to suggest s/file/contents/ > struct patch_util *util = NULL; > int in_header = 1; > + char *line; > + int offset, len; > + size_t size; > > argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", > "--reverse", "--date-order", "--decorate=no", > @@ -54,17 +67,15 @@ static int read_patches(const char *range, struct string_list *list) > > if (start_command(&cp)) > return error_errno(_("could not start `log`")); > - in = fdopen(cp.out, "r"); > - if (!in) { > - error_errno(_("could not read `log` output")); > - finish_command(&cp); > - return -1; > - } > + strbuf_read(&file, cp.out, 0); Shouldn't we handle a negative return value here, erroring out with "could not read `log` output" as before? > > - while (strbuf_getline(&line, in) != EOF) { > + line = strbuf_detach(&file, &size); I strongly suspect this to leak, given that `line` is subsequently advanced, and there is no backup copy. Maybe line = file.buf; size = file.len; would make more sense here? > + for (offset = 0; size > 0; offset += len, size -= len, line += len) { > const char *p; > > - if (skip_prefix(line.buf, "commit ", &p)) { > + len = linelen(line, size); > + line[len - 1] = '\0'; > + if (skip_prefix(line, "commit ", &p)) { > if (util) { > string_list_append(list, buf.buf)->util = util; > strbuf_reset(&buf); > @@ -75,8 +86,6 @@ static int read_patches(const char *range, struct string_list *list) > free(util); > string_list_clear(list, 1); > strbuf_release(&buf); > - strbuf_release(&line); > - fclose(in); We should release the file contents in `file` (or `contents`, if you like my suggestions) here. > finish_command(&cp); > return -1; > } > @@ -85,26 +94,28 @@ static int read_patches(const char *range, struct string_list *list) > continue; > } > > - if (starts_with(line.buf, "diff --git")) { > + if (starts_with(line, "diff --git")) { > in_header = 0; > strbuf_addch(&buf, '\n'); > if (!util->diff_offset) > util->diff_offset = buf.len; > strbuf_addch(&buf, ' '); > - strbuf_addbuf(&buf, &line); > + strbuf_addstr(&buf, line); > } else if (in_header) { > - if (starts_with(line.buf, "Author: ")) { > - strbuf_addbuf(&buf, &line); > + if (starts_with(line, "Author: ")) { > + strbuf_addstr(&buf, line); > strbuf_addstr(&buf, "\n\n"); > - } else if (starts_with(line.buf, " ")) { > - strbuf_rtrim(&line); > - strbuf_addbuf(&buf, &line); > + } else if (starts_with(line, " ")) { > + p = line + len - 2; > + while (isspace(*p) && p >= line) > + p--; > + strbuf_add(&buf, line, p - line + 1); > strbuf_addch(&buf, '\n'); > } > continue; > - } else if (starts_with(line.buf, "@@ ")) > + } else if (starts_with(line, "@@ ")) > strbuf_addstr(&buf, "@@"); > - else if (!line.buf[0] || starts_with(line.buf, "index ")) > + else if (!line[0] || starts_with(line, "index ")) > /* > * A completely blank (not ' \n', which is context) > * line is not valid in a diff. We skip it > @@ -117,25 +128,23 @@ static int read_patches(const char *range, struct string_list *list) > * we are not interested. > */ > continue; > - else if (line.buf[0] == '>') { > + else if (line[0] == '>') { > strbuf_addch(&buf, '+'); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > - } else if (line.buf[0] == '<') { > + strbuf_addstr(&buf, line + 1); > + } else if (line[0] == '<') { > strbuf_addch(&buf, '-'); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > - } else if (line.buf[0] == '#') { > + strbuf_addstr(&buf, line + 1); > + } else if (line[0] == '#') { > strbuf_addch(&buf, ' '); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > + strbuf_addstr(&buf, line + 1); > } else { > strbuf_addch(&buf, ' '); > - strbuf_addbuf(&buf, &line); > + strbuf_addstr(&buf, line); > } > > strbuf_addch(&buf, '\n'); > util->diffsize++; > } > - fclose(in); > - strbuf_release(&line); We should release the file contents we previously read via `strbuf_read()` here. Ciao, Dscho > > if (util) > string_list_append(list, buf.buf)->util = util; > -- > 2.22.0.510.g264f2c817a > >