On Wed, Oct 02, 2024 at 04:07:04PM +0000, Derrick Stolee via GitGitGadget wrote: > The output_prefix() method in line-log.c may call a function pointer via > the diff_options struct. This function pointer returns a strbuf struct > and then its buffer is passed back. However, that implies that the > consumer is responsible to free the string. This is especially true > because the default behavior is to duplicate the empty string. > > The existing functions used in the output_prefix pointer include: > > 1. idiff_prefix_cb() in diff-lib.c. This returns the data pointer, so > the value exists across multiple calls. > > 2. diff_output_prefix_callback() in graph.c. This uses a static strbuf > struct, so it reuses buffers across calls. These should not be > freed. > > 3. output_prefix_cb() in range-diff.c. This is similar to the > diff-lib.c case. > > In each case, we should not be freeing this buffer. We can convert the > output_prefix() function to return a const char pointer and stop freeing > the result. > > This choice is essentially the opposite of what was done in 394affd46d > (line-log: always allocate the output prefix, 2024-06-07). > > This was discovered via 'valgrind' while investigating a public report > of a bug in 'git log --graph -L' [1]. Good catch. Your analysis looks correct, though I had two questions I managed to answer myself: 1. This seems like an obvious use-after-free problem. Why didn't we catch it sooner? I think the answer is that most uses of the output_prefix callback happen via diff_line_prefix(), which was not broken by 394affd46d. It's only line-log that was affected. Building with ASan and running: ./git log --graph -L :diff_line_prefix:diff.c shows the problem immediately (and that your patch fixes it). Should we include a new test in this patch? 2. If the caller isn't freeing it, then who does? Should diffopt cleanup do so? The answer is "no", the pointer is not owned by that struct. In your cases (1) and (3), the caller does so after finishing with the diffopt struct. In case (2) it is effectively "leaked", but still reachable by the static strbuf. That's not great, and is a problem for eventual libification. But for now, we can ignore it as it won't trigger the leak-checker. It does make me wonder what leak Patrick saw that caused him to write 394affd46d, and whether we're now leaking in some case that I'm missing. I do think this would have been a harder mistake to make if the callback simply returned a "const char *" pointer. We'd lose the ability to show prefixes with embedded NULs, but I'm not sure that's a big deal. It would also help for line-log to use the existing helper rather than inventing its own. So together on top something like this (which I'd probably turn into two patches if this seems to others like it's valuable and not just churn): diff-lib.c | 4 ++-- diff.c | 9 +++------ diff.h | 2 +- graph.c | 4 ++-- line-log.c | 14 ++------------ log-tree.c | 7 +++---- range-diff.c | 4 ++-- 7 files changed, 15 insertions(+), 29 deletions(-) diff --git a/diff-lib.c b/diff-lib.c index a680768ee7..6b14b95962 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -701,7 +701,7 @@ int index_differs_from(struct repository *r, return (has_changes != 0); } -static struct strbuf *idiff_prefix_cb(struct diff_options *opt UNUSED, void *data) +static const char *idiff_prefix_cb(struct diff_options *opt UNUSED, void *data) { return data; } @@ -716,7 +716,7 @@ void show_interdiff(const struct object_id *oid1, const struct object_id *oid2, opts.output_format = DIFF_FORMAT_PATCH; opts.output_prefix = idiff_prefix_cb; strbuf_addchars(&prefix, ' ', indent); - opts.output_prefix_data = &prefix; + opts.output_prefix_data = prefix.buf; diff_setup_done(&opts); diff_tree_oid(oid1, oid2, "", &opts); diff --git a/diff.c b/diff.c index 173cbe2bed..efde65feef 100644 --- a/diff.c +++ b/diff.c @@ -2317,12 +2317,9 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix) const char *diff_line_prefix(struct diff_options *opt) { - struct strbuf *msgbuf; - if (!opt->output_prefix) - return ""; - - msgbuf = opt->output_prefix(opt, opt->output_prefix_data); - return msgbuf->buf; + return opt->output_prefix ? + opt->output_prefix(opt, opt->output_prefix_data) : + ""; } static unsigned long sane_truncate_line(char *line, unsigned long len) diff --git a/diff.h b/diff.h index 0cde3b34e2..2a9c9191c1 100644 --- a/diff.h +++ b/diff.h @@ -94,7 +94,7 @@ typedef void (*add_remove_fn_t)(struct diff_options *options, typedef void (*diff_format_fn_t)(struct diff_queue_struct *q, struct diff_options *options, void *data); -typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data); +typedef const char *(*diff_prefix_fn_t)(struct diff_options *opt, void *data); #define DIFF_FORMAT_RAW 0x0001 #define DIFF_FORMAT_DIFFSTAT 0x0002 diff --git a/graph.c b/graph.c index 091c14cf4f..ebb7d1e66f 100644 --- a/graph.c +++ b/graph.c @@ -314,7 +314,7 @@ struct git_graph { unsigned short default_column_color; }; -static struct strbuf *diff_output_prefix_callback(struct diff_options *opt, void *data) +static const char *diff_output_prefix_callback(struct diff_options *opt, void *data) { struct git_graph *graph = data; static struct strbuf msgbuf = STRBUF_INIT; @@ -327,7 +327,7 @@ static struct strbuf *diff_output_prefix_callback(struct diff_options *opt, void opt->line_prefix_length); if (graph) graph_padding_line(graph, &msgbuf); - return &msgbuf; + return msgbuf.buf; } static const struct diff_options *default_diffopt; diff --git a/line-log.c b/line-log.c index 29cf66bdd1..63945c4729 100644 --- a/line-log.c +++ b/line-log.c @@ -897,16 +897,6 @@ static void print_line(const char *prefix, char first, fputs("\\ No newline at end of file\n", file); } -static const char *output_prefix(struct diff_options *opt) -{ - if (opt->output_prefix) { - struct strbuf *sb = opt->output_prefix(opt, opt->output_prefix_data); - return sb->buf; - } else { - return ""; - } -} - static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *range) { unsigned int i, j = 0; @@ -916,7 +906,7 @@ static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *rang struct diff_ranges *diff = &range->diff; struct diff_options *opt = &rev->diffopt; - const char *prefix = output_prefix(opt); + const char *prefix = diff_line_prefix(opt); const char *c_reset = diff_get_color(opt->use_color, DIFF_RESET); const char *c_frag = diff_get_color(opt->use_color, DIFF_FRAGINFO); const char *c_meta = diff_get_color(opt->use_color, DIFF_METAINFO); @@ -1011,7 +1001,7 @@ static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *rang */ static void dump_diff_hacky(struct rev_info *rev, struct line_log_data *range) { - const char *prefix = output_prefix(&rev->diffopt); + const char *prefix = diff_line_prefix(&rev->diffopt); fprintf(rev->diffopt.file, "%s\n", prefix); diff --git a/log-tree.c b/log-tree.c index 3758e0d3b8..1b80abe7d5 100644 --- a/log-tree.c +++ b/log-tree.c @@ -923,10 +923,9 @@ int log_tree_diff_flush(struct rev_info *opt) */ int pch = DIFF_FORMAT_DIFFSTAT | DIFF_FORMAT_PATCH; if (opt->diffopt.output_prefix) { - struct strbuf *msg = NULL; - msg = opt->diffopt.output_prefix(&opt->diffopt, - opt->diffopt.output_prefix_data); - fwrite(msg->buf, msg->len, 1, opt->diffopt.file); + fputs(opt->diffopt.output_prefix(&opt->diffopt, + opt->diffopt.output_prefix_data), + opt->diffopt.file); } /* diff --git a/range-diff.c b/range-diff.c index bbb0952264..10885ba301 100644 --- a/range-diff.c +++ b/range-diff.c @@ -480,7 +480,7 @@ static void patch_diff(const char *a, const char *b, diff_flush(diffopt); } -static struct strbuf *output_prefix_cb(struct diff_options *opt UNUSED, void *data) +static const char *output_prefix_cb(struct diff_options *opt UNUSED, void *data) { return data; } @@ -508,7 +508,7 @@ static void output(struct string_list *a, struct string_list *b, opts.flags.suppress_hunk_header_line_count = 1; opts.output_prefix = output_prefix_cb; strbuf_addstr(&indent, " "); - opts.output_prefix_data = &indent; + opts.output_prefix_data = indent.buf; diff_setup_done(&opts); /*