On 07/05, Johannes Schindelin wrote: > Hi Thomas, > > > On Fri, 5 Jul 2019, Thomas Gummerer wrote: > > > Currently range-diff uses the 'strbuf_getline()' function for doing > > its line by line processing. In a future patch we want to do parts of > > that parsing using the 'parse_git_header()' function, which does > > If you like my suggestion in patch 7/14, this commit message needs to talk > about the new name, too. Thanks for the reminder here! I do indeed like the new name, but would probably have forgotten to change it in the commit message here. > > requires reading parts of the input from that function, which doesn't > > s/requires/require/ > > > use strbufs. > > > > Switch range-diff to do our own line by line parsing, so we can re-use > > the parse_git_header function later. > > > > Signed-off-by: Thomas Gummerer <t.gummerer@xxxxxxxxx> > > --- > > > > Longer term it might be better to have both range-diff and apply code > > use strbufs. However I didn't feel it's worth making that change for > > this patch series. > > Makes sense. > > > range-diff.c | 69 +++++++++++++++++++++++++++++----------------------- > > 1 file changed, 39 insertions(+), 30 deletions(-) > > > > diff --git a/range-diff.c b/range-diff.c > > index 9242b8975f..916afa44c0 100644 > > --- a/range-diff.c > > +++ b/range-diff.c > > @@ -24,6 +24,17 @@ struct patch_util { > > struct object_id oid; > > }; > > > > +static unsigned long linelen(const char *buffer, unsigned long size) > > Shouldn't this be `size_t`? > > > +{ > > + unsigned long len = 0; > > Likewise. > > > + while (size--) { > > + len++; > > + if (*buffer++ == '\n') > > + break; > > + } > > + return len; > > How about > > const char *eol = memchr(buffer, '\n', size); > > return !eol ? size : eol + 1 - buffer; > > instead? > > For an extra brownie point, you could even rename this function to > `find_end_of_line()` and replace the LF by a NUL: > > if (!eol) > return size; > > *eol = '\0'; > return eol + 1 - buffer; I like this, thank you! > > +} > > + > > /* > > * Reads the patches into a string list, with the `util` field being populated > > * as struct object_id (will need to be free()d). > > @@ -31,10 +42,12 @@ struct patch_util { > > static int read_patches(const char *range, struct string_list *list) > > { > > struct child_process cp = CHILD_PROCESS_INIT; > > - FILE *in; > > - struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; > > + struct strbuf buf = STRBUF_INIT, file = STRBUF_INIT; > > This puzzled me. I'd like to suggest s/file/contents/ Thanks, will change. > > struct patch_util *util = NULL; > > int in_header = 1; > > + char *line; > > + int offset, len; > > + size_t size; > > > > argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", > > "--reverse", "--date-order", "--decorate=no", > > @@ -54,17 +67,15 @@ static int read_patches(const char *range, struct string_list *list) > > > > if (start_command(&cp)) > > return error_errno(_("could not start `log`")); > > - in = fdopen(cp.out, "r"); > > - if (!in) { > > - error_errno(_("could not read `log` output")); > > - finish_command(&cp); > > - return -1; > > - } > > + strbuf_read(&file, cp.out, 0); > > Shouldn't we handle a negative return value here, erroring out with "could > not read `log` output" as before? Yeah, that was an oversight, we should definitely still handle errors here. > > > > - while (strbuf_getline(&line, in) != EOF) { > > + line = strbuf_detach(&file, &size); > > I strongly suspect this to leak, given that `line` is subsequently > advanced, and there is no backup copy. > > Maybe > > line = file.buf; > size = file.len; > > would make more sense here? Hmm good point, that makes more sense indeed. > > + for (offset = 0; size > 0; offset += len, size -= len, line += len) { > > const char *p; > > > > - if (skip_prefix(line.buf, "commit ", &p)) { > > + len = linelen(line, size); > > + line[len - 1] = '\0'; > > + if (skip_prefix(line, "commit ", &p)) { > > if (util) { > > string_list_append(list, buf.buf)->util = util; > > strbuf_reset(&buf); > > @@ -75,8 +86,6 @@ static int read_patches(const char *range, struct string_list *list) > > free(util); > > string_list_clear(list, 1); > > strbuf_release(&buf); > > - strbuf_release(&line); > > - fclose(in); > > We should release the file contents in `file` (or `contents`, if you like > my suggestions) here. Yeah, I thought it was no longer necessary because of the 'strbuf_detach()' earlier, but that obviously leaks in a different way as you pointed out. Will release 'contents' here and below. > > finish_command(&cp); > > return -1; > > } > > @@ -85,26 +94,28 @@ static int read_patches(const char *range, struct string_list *list) > > continue; > > } > > > > - if (starts_with(line.buf, "diff --git")) { > > + if (starts_with(line, "diff --git")) { > > in_header = 0; > > strbuf_addch(&buf, '\n'); > > if (!util->diff_offset) > > util->diff_offset = buf.len; > > strbuf_addch(&buf, ' '); > > - strbuf_addbuf(&buf, &line); > > + strbuf_addstr(&buf, line); > > } else if (in_header) { > > - if (starts_with(line.buf, "Author: ")) { > > - strbuf_addbuf(&buf, &line); > > + if (starts_with(line, "Author: ")) { > > + strbuf_addstr(&buf, line); > > strbuf_addstr(&buf, "\n\n"); > > - } else if (starts_with(line.buf, " ")) { > > - strbuf_rtrim(&line); > > - strbuf_addbuf(&buf, &line); > > + } else if (starts_with(line, " ")) { > > + p = line + len - 2; > > + while (isspace(*p) && p >= line) > > + p--; > > + strbuf_add(&buf, line, p - line + 1); > > strbuf_addch(&buf, '\n'); > > } > > continue; > > - } else if (starts_with(line.buf, "@@ ")) > > + } else if (starts_with(line, "@@ ")) > > strbuf_addstr(&buf, "@@"); > > - else if (!line.buf[0] || starts_with(line.buf, "index ")) > > + else if (!line[0] || starts_with(line, "index ")) > > /* > > * A completely blank (not ' \n', which is context) > > * line is not valid in a diff. We skip it > > @@ -117,25 +128,23 @@ static int read_patches(const char *range, struct string_list *list) > > * we are not interested. > > */ > > continue; > > - else if (line.buf[0] == '>') { > > + else if (line[0] == '>') { > > strbuf_addch(&buf, '+'); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > - } else if (line.buf[0] == '<') { > > + strbuf_addstr(&buf, line + 1); > > + } else if (line[0] == '<') { > > strbuf_addch(&buf, '-'); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > - } else if (line.buf[0] == '#') { > > + strbuf_addstr(&buf, line + 1); > > + } else if (line[0] == '#') { > > strbuf_addch(&buf, ' '); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > + strbuf_addstr(&buf, line + 1); > > } else { > > strbuf_addch(&buf, ' '); > > - strbuf_addbuf(&buf, &line); > > + strbuf_addstr(&buf, line); > > } > > > > strbuf_addch(&buf, '\n'); > > util->diffsize++; > > } > > - fclose(in); > > - strbuf_release(&line); > > We should release the file contents we previously read via `strbuf_read()` here. > > Ciao, > Dscho > > > > > if (util) > > string_list_append(list, buf.buf)->util = util; > > -- > > 2.22.0.510.g264f2c817a > > > >