Hi Junio, On Fri, 2 Sep 2022, Junio C Hamano wrote: > "git diff --no-index" codepath starts with the two elements in > argv[] and munges them into two paths to be compared, stored in a > separate path[] arrays. The munging is implemented in a rather > haphazard way, sometimes overwriting old version with a new copy, > and sometimes a constant string assigned to path[], making it > impossible to release the resources properly: > > * A single dash "-" from the command line is a special signal that > the standard input is used for the side to be compared, and is > internally replaced with a copy of string "-" at a known address. > > * When run in a subdirectory, full paths to the two paths are > allocated and placed in path[]. > > * After the above happens, when comparing a file with a directory, > the directory side is replaced with the path to a file in the > directory with the same name as the file. > > This was perfectly fine for just two strings that are pathnames used > during the lifetime of the program and cleaned up upon program exit, > but it gets in the way when leak sanitizer is in effect. The third > step can be losing the full path that was allocated in the second > step, but it is not easy to tell if its input is an allocated piece > of memory to begin with. > > Loosen the earlier two steps a bit so that elements of the path[] > array that come to the directory/file comparison code are either the > singleton "-" or an allocated piece of memory. Use that knowledge > in the third step to release an allocated piece of memory when it > replaces the path to a directory with the path to a file in that > directory, and also at the end to release the two elements of the > path[] array as needed. > > Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx> > --- > > * The previous one allowed strbuf_release() to free replacement.buf > which may be used in path[0] or path[1] potentially leading to > double freeing. The kosher way may be to use strbuf_detach() in > fixup_paths(), but this is a simpler fix, it is getting late in > the day, and I am getting sick of fighting the leak-checker, so... I wonder whether a much better way would be to first fix the code to always release `replacement`, like so: -- snip -- diff --git a/diff-no-index.c b/diff-no-index.c index 9a8b09346bd..87047605385 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -242,7 +242,7 @@ int diff_no_index(struct rev_info *revs, int implicit_no_index, int argc, const char **argv) { - int i, no_index; + int i, no_index, ret; const char *paths[2]; struct strbuf replacement = STRBUF_INIT; const char *prefix = revs->prefix; @@ -294,17 +294,23 @@ int diff_no_index(struct rev_info *revs, setup_diff_pager(&revs->diffopt); revs->diffopt.flags.exit_with_status = 1; - if (queue_diff(&revs->diffopt, paths[0], paths[1])) - return 1; + if (queue_diff(&revs->diffopt, paths[0], paths[1])) { + ret = 1; + goto out; + } + diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/"); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); - strbuf_release(&replacement); - /* * The return code for --no-index imitates diff(1): * 0 = no changes, 1 = changes, else error */ - return diff_result_code(&revs->diffopt, 0); + ret = diff_result_code(&revs->diffopt, 0); + +out: + strbuf_release(&replacement); + + return ret; } -- snap -- After that, the proposed diff could be replaced by this diff: -- snip -- diff --git a/diff-no-index.c b/diff-no-index.c index 87047605385..d350e4381bc 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -244,6 +244,7 @@ int diff_no_index(struct rev_info *revs, { int i, no_index, ret; const char *paths[2]; + struct string_list to_release = STRING_LIST_INIT_DUP; struct strbuf replacement = STRBUF_INIT; const char *prefix = revs->prefix; struct option no_index_options[] = { @@ -274,6 +275,12 @@ int diff_no_index(struct rev_info *revs, p = file_from_standard_input; else if (prefix) p = prefix_filename(prefix, p); + else { + char *dup = xstrdup(p); + + p = dup; + string_list_append_nodup(&to_release, dup); + } paths[i] = p; } @@ -310,6 +317,7 @@ int diff_no_index(struct rev_info *revs, ret = diff_result_code(&revs->diffopt, 0); out: + string_list_clear(&to_release, 1); strbuf_release(&replacement); return ret; -- snap -- That approach has the distinct advantage of making it very easy to reason about the code. What do you think? Ciao, Dscho > > diff-no-index.c | 24 ++++++++++++++++++++++-- > 1 file changed, 22 insertions(+), 2 deletions(-) > > > diff --git a/diff-no-index.c b/diff-no-index.c > index 9a8b09346b..77a126469b 100644 > --- a/diff-no-index.c > +++ b/diff-no-index.c > @@ -208,6 +208,14 @@ static void append_basename(struct strbuf *path, const char *dir, const char *fi > strbuf_addstr(path, tail ? tail + 1 : file); > } > > +static void free_allocated_path(const char *path) > +{ > + if (!path || > + (path == file_from_standard_input)) > + return; > + free((char *)path); > +} > + > /* > * DWIM "diff D F" into "diff D/F F" and "diff F D" into "diff F D/F" > * Note that we append the basename of F to D/, so "diff a/b/file D" > @@ -226,9 +234,11 @@ static void fixup_paths(const char **path, struct strbuf *replacement) > return; > if (isdir0) { > append_basename(replacement, path[0], path[1]); > + free_allocated_path(path[0]); > path[0] = replacement->buf; > } else { > append_basename(replacement, path[1], path[0]); > + free_allocated_path(path[1]); > path[1] = replacement->buf; > } > } > @@ -274,6 +284,8 @@ int diff_no_index(struct rev_info *revs, > p = file_from_standard_input; > else if (prefix) > p = prefix_filename(prefix, p); > + else > + p = xstrdup(p); > paths[i] = p; > } > > @@ -294,13 +306,21 @@ int diff_no_index(struct rev_info *revs, > setup_diff_pager(&revs->diffopt); > revs->diffopt.flags.exit_with_status = 1; > > - if (queue_diff(&revs->diffopt, paths[0], paths[1])) > + if (queue_diff(&revs->diffopt, paths[0], paths[1])) { > + free_allocated_path(paths[0]); > + free_allocated_path(paths[1]); > return 1; > + } > diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/"); > diffcore_std(&revs->diffopt); > diff_flush(&revs->diffopt); > > - strbuf_release(&replacement); > + /* > + * do not strbuf_release(&replacement), as it is in paths[] > + * when replacement was actually used. > + */ > + free_allocated_path(paths[0]); > + free_allocated_path(paths[1]); > > /* > * The return code for --no-index imitates diff(1): > -- > 2.37.3-661-g73a641a77a > >