The `struct image` uses a character array to track the pre- or postimage of a patch operation. This has multiple downsides: - It is somewhat hard to track memory ownership. In fact, we have several memory leaks in git-apply(1) because we do not (and cannot easily) free the buffer in all situations. - We have to reinvent the wheel and manually implement a lot of functionality that would already be provided by `struct strbuf`. - We have to carefully track whether `update_pre_post_images()` can do an in-place update of the postimage or whether it has to allocate a new buffer for it. This is all rather cumbersome, and especially `update_pre_post_images()` is really hard to understand as a consequence even though what it is doing is rather trivial. Refactor the code to use a `struct strbuf` instead, addressing all of the above. Like this we can easily perform in-place updates in all situations, the logic to perform those updates becomes way simpler and the lifetime of the buffer becomes a ton easier to track. This refactoring also plugs some leaking buffers as a side effect. Signed-off-by: Patrick Steinhardt <ps@xxxxxx> --- apply.c | 194 +++++++++++------------------ t/t3436-rebase-more-options.sh | 1 + t/t4107-apply-ignore-whitespace.sh | 4 +- t/t4124-apply-ws-rule.sh | 1 + t/t4125-apply-ws-fuzz.sh | 1 + t/t4138-apply-ws-expansion.sh | 1 + 6 files changed, 79 insertions(+), 123 deletions(-) diff --git a/apply.c b/apply.c index 3340bb1fc0..06a75ea034 100644 --- a/apply.c +++ b/apply.c @@ -277,12 +277,13 @@ struct line { * This represents a "file", which is an array of "lines". */ struct image { - char *buf; - size_t len; + struct strbuf buf; struct line *line; size_t line_nr, line_alloc; }; -#define IMAGE_INIT { 0 } +#define IMAGE_INIT { \ + .buf = STRBUF_INIT, \ +} static void image_init(struct image *image) { @@ -292,7 +293,7 @@ static void image_init(struct image *image) static void image_clear(struct image *image) { - free(image->buf); + strbuf_release(&image->buf); free(image->line); image_init(image); } @@ -329,14 +330,13 @@ static void image_prepare(struct image *image, char *buf, size_t len, const char *cp, *ep; image_clear(image); - image->buf = buf; - image->len = len; + strbuf_attach(&image->buf, buf, len, len + 1); if (!prepare_linetable) return; - ep = image->buf + image->len; - cp = image->buf; + ep = image->buf.buf + image->buf.len; + cp = image->buf.buf; while (cp < ep) { const char *next; for (next = cp; next < ep && *next != '\n'; next++) @@ -350,8 +350,7 @@ static void image_prepare(struct image *image, char *buf, size_t len, static void image_remove_first_line(struct image *img) { - img->buf += img->line[0].len; - img->len -= img->line[0].len; + strbuf_remove(&img->buf, 0, img->line[0].len); img->line_nr--; if (img->line_nr) MOVE_ARRAY(img->line, img->line + 1, img->line_nr); @@ -359,7 +358,9 @@ static void image_remove_first_line(struct image *img) static void image_remove_last_line(struct image *img) { - img->len -= img->line[--img->line_nr].len; + size_t last_line_len = img->line[img->line_nr - 1].len; + strbuf_setlen(&img->buf, img->buf.len - last_line_len); + img->line_nr--; } /* fmt must contain _one_ %s and no other substitution */ @@ -2308,19 +2309,16 @@ static int read_old_data(struct stat *st, struct patch *patch, /* * Update the preimage, and the common lines in postimage, - * from buffer buf of length len. If postlen is 0 the postimage - * is updated in place, otherwise it's updated on a new buffer - * of length postlen + * from buffer buf of length len. */ - static void update_pre_post_images(struct image *preimage, struct image *postimage, - char *buf, - size_t len, size_t postlen) + char *buf, size_t len) { - int i, ctx, reduced; - char *new_buf, *old_buf, *fixed; struct image fixed_preimage = IMAGE_INIT; + size_t insert_pos = 0; + int i, ctx, reduced; + const char *fixed; /* * Update the preimage with whitespace fixes. Note that we @@ -2328,43 +2326,24 @@ static void update_pre_post_images(struct image *preimage, * free "oldlines". */ image_prepare(&fixed_preimage, buf, len, 1); - assert(postlen - ? fixed_preimage.line_nr == preimage->line_nr - : fixed_preimage.line_nr <= preimage->line_nr); for (i = 0; i < fixed_preimage.line_nr; i++) fixed_preimage.line[i].flag = preimage->line[i].flag; - free(preimage->line); + image_clear(preimage); *preimage = fixed_preimage; + fixed = preimage->buf.buf; /* - * Adjust the common context lines in postimage. This can be - * done in-place when we are shrinking it with whitespace - * fixing, but needs a new buffer when ignoring whitespace or - * expanding leading tabs to spaces. - * - * We trust the caller to tell us if the update can be done - * in place (postlen==0) or not. + * Adjust the common context lines in postimage. */ - old_buf = postimage->buf; - if (postlen) - new_buf = postimage->buf = xmalloc(postlen); - else - new_buf = old_buf; - fixed = preimage->buf; - for (i = reduced = ctx = 0; i < postimage->line_nr; i++) { size_t l_len = postimage->line[i].len; + if (!(postimage->line[i].flag & LINE_COMMON)) { /* an added line -- no counterparts in preimage */ - memmove(new_buf, old_buf, l_len); - old_buf += l_len; - new_buf += l_len; + insert_pos += l_len; continue; } - /* a common context -- skip it in the original postimage */ - old_buf += l_len; - /* and find the corresponding one in the fixed preimage */ while (ctx < preimage->line_nr && !(preimage->line[ctx].flag & LINE_COMMON)) { @@ -2383,21 +2362,15 @@ static void update_pre_post_images(struct image *preimage, /* and copy it in, while fixing the line length */ l_len = preimage->line[ctx].len; - memcpy(new_buf, fixed, l_len); - new_buf += l_len; + strbuf_splice(&postimage->buf, insert_pos, postimage->line[i].len, + fixed, l_len); + insert_pos += l_len; fixed += l_len; postimage->line[i].len = l_len; ctx++; } - if (postlen - ? postlen < new_buf - postimage->buf - : postimage->len < new_buf - postimage->buf) - BUG("caller miscounted postlen: asked %d, orig = %d, used = %d", - (int)postlen, (int) postimage->len, (int)(new_buf - postimage->buf)); - /* Fix the length of the whole thing */ - postimage->len = new_buf - postimage->buf; postimage->line_nr -= reduced; } @@ -2447,7 +2420,6 @@ static int line_by_line_fuzzy_match(struct image *img, int i; size_t imgoff = 0; size_t preoff = 0; - size_t postlen = postimage->len; size_t extra_chars; char *buf; char *preimage_eof; @@ -2460,11 +2432,9 @@ static int line_by_line_fuzzy_match(struct image *img, size_t prelen = preimage->line[i].len; size_t imglen = img->line[current_lno+i].len; - if (!fuzzy_matchlines(img->buf + current + imgoff, imglen, - preimage->buf + preoff, prelen)) + if (!fuzzy_matchlines(img->buf.buf + current + imgoff, imglen, + preimage->buf.buf + preoff, prelen)) return 0; - if (preimage->line[i].flag & LINE_COMMON) - postlen += imglen - prelen; imgoff += imglen; preoff += prelen; } @@ -2480,10 +2450,10 @@ static int line_by_line_fuzzy_match(struct image *img, * are whitespace characters. (This can only happen if * we are removing blank lines at the end of the file.) */ - buf = preimage_eof = preimage->buf + preoff; + buf = preimage_eof = preimage->buf.buf + preoff; for ( ; i < preimage->line_nr; i++) preoff += preimage->line[i].len; - preimage_end = preimage->buf + preoff; + preimage_end = preimage->buf.buf + preoff; for ( ; buf < preimage_end; buf++) if (!isspace(*buf)) return 0; @@ -2497,11 +2467,11 @@ static int line_by_line_fuzzy_match(struct image *img, */ extra_chars = preimage_end - preimage_eof; strbuf_init(&fixed, imgoff + extra_chars); - strbuf_add(&fixed, img->buf + current, imgoff); + strbuf_add(&fixed, img->buf.buf + current, imgoff); strbuf_add(&fixed, preimage_eof, extra_chars); fixed_buf = strbuf_detach(&fixed, &fixed_len); update_pre_post_images(preimage, postimage, - fixed_buf, fixed_len, postlen); + fixed_buf, fixed_len); return 1; } @@ -2517,7 +2487,8 @@ static int match_fragment(struct apply_state *state, int i; const char *orig, *target; struct strbuf fixed = STRBUF_INIT; - size_t postlen; + char *fixed_buf; + size_t fixed_len; int preimage_limit; int ret; @@ -2573,9 +2544,9 @@ static int match_fragment(struct apply_state *state, * exactly. */ if ((match_end - ? (current + preimage->len == img->len) - : (current + preimage->len <= img->len)) && - !memcmp(img->buf + current, preimage->buf, preimage->len)) { + ? (current + preimage->buf.len == img->buf.len) + : (current + preimage->buf.len <= img->buf.len)) && + !memcmp(img->buf.buf + current, preimage->buf.buf, preimage->buf.len)) { ret = 1; goto out; } @@ -2589,7 +2560,7 @@ static int match_fragment(struct apply_state *state, */ const char *buf, *buf_end; - buf = preimage->buf; + buf = preimage->buf.buf; buf_end = buf; for (i = 0; i < preimage_limit; i++) buf_end += preimage->line[i].len; @@ -2634,21 +2605,14 @@ static int match_fragment(struct apply_state *state, * fixed. */ - /* First count added lines in postimage */ - postlen = 0; - for (i = 0; i < postimage->line_nr; i++) { - if (!(postimage->line[i].flag & LINE_COMMON)) - postlen += postimage->line[i].len; - } - /* * The preimage may extend beyond the end of the file, * but in this loop we will only handle the part of the * preimage that falls within the file. */ - strbuf_grow(&fixed, preimage->len + 1); - orig = preimage->buf; - target = img->buf + current; + strbuf_grow(&fixed, preimage->buf.len + 1); + orig = preimage->buf.buf; + target = img->buf.buf + current; for (i = 0; i < preimage_limit; i++) { size_t oldlen = preimage->line[i].len; size_t tgtlen = img->line[current_lno + i].len; @@ -2677,10 +2641,6 @@ static int match_fragment(struct apply_state *state, !memcmp(tgtfix.buf, fixed.buf + fixstart, fixed.len - fixstart)); - /* Add the length if this is common with the postimage */ - if (preimage->line[i].flag & LINE_COMMON) - postlen += tgtfix.len; - strbuf_release(&tgtfix); if (!match) { ret = 0; @@ -2722,10 +2682,9 @@ static int match_fragment(struct apply_state *state, * has whitespace breakages unfixed, and fixing them makes the * hunk match. Update the context lines in the postimage. */ - if (postlen < postimage->len) - postlen = 0; + fixed_buf = strbuf_detach(&fixed, &fixed_len); update_pre_post_images(preimage, postimage, - fixed.buf, fixed.len, postlen); + fixed_buf, fixed_len); ret = 1; @@ -2839,6 +2798,7 @@ static void update_image(struct apply_state *state, */ int i, nr; size_t remove_count, insert_count, applied_at = 0; + size_t result_alloc; char *result; int preimage_limit; @@ -2861,19 +2821,18 @@ static void update_image(struct apply_state *state, remove_count = 0; for (i = 0; i < preimage_limit; i++) remove_count += img->line[applied_pos + i].len; - insert_count = postimage->len; + insert_count = postimage->buf.len; /* Adjust the contents */ - result = xmalloc(st_add3(st_sub(img->len, remove_count), insert_count, 1)); - memcpy(result, img->buf, applied_at); - memcpy(result + applied_at, postimage->buf, postimage->len); - memcpy(result + applied_at + postimage->len, - img->buf + (applied_at + remove_count), - img->len - (applied_at + remove_count)); - free(img->buf); - img->buf = result; - img->len += insert_count - remove_count; - result[img->len] = '\0'; + result_alloc = st_add3(st_sub(img->buf.len, remove_count), insert_count, 1); + result = xmalloc(result_alloc); + memcpy(result, img->buf.buf, applied_at); + memcpy(result + applied_at, postimage->buf.buf, postimage->buf.len); + memcpy(result + applied_at + postimage->buf.len, + img->buf.buf + (applied_at + remove_count), + img->buf.len - (applied_at + remove_count)); + strbuf_attach(&img->buf, result, postimage->buf.len + img->buf.len - remove_count, + result_alloc); /* Adjust the line table */ nr = img->line_nr + postimage->line_nr - preimage_limit; @@ -3054,10 +3013,8 @@ static int apply_one_fragment(struct apply_state *state, match_end = !state->unidiff_zero && !trailing; pos = frag->newpos ? (frag->newpos - 1) : 0; - preimage.buf = oldlines; - preimage.len = old - oldlines; - postimage.buf = newlines.buf; - postimage.len = newlines.len; + strbuf_add(&preimage.buf, oldlines, old - oldlines); + strbuf_swap(&postimage.buf, &newlines); for (;;) { @@ -3145,8 +3102,8 @@ static int apply_one_fragment(struct apply_state *state, out: free(oldlines); strbuf_release(&newlines); - free(preimage.line); - free(postimage.line); + image_clear(&preimage); + image_clear(&postimage); return (applied_pos < 0); } @@ -3176,18 +3133,16 @@ static int apply_binary_fragment(struct apply_state *state, } switch (fragment->binary_patch_method) { case BINARY_DELTA_DEFLATED: - dst = patch_delta(img->buf, img->len, fragment->patch, + dst = patch_delta(img->buf.buf, img->buf.len, fragment->patch, fragment->size, &len); if (!dst) return -1; image_clear(img); - img->buf = dst; - img->len = len; + strbuf_attach(&img->buf, dst, len, len + 1); return 0; case BINARY_LITERAL_DEFLATED: image_clear(img); - img->len = fragment->size; - img->buf = xmemdupz(fragment->patch, img->len); + strbuf_add(&img->buf, fragment->patch, fragment->size); return 0; } return -1; @@ -3223,8 +3178,8 @@ static int apply_binary(struct apply_state *state, * See if the old one matches what the patch * applies to. */ - hash_object_file(the_hash_algo, img->buf, img->len, OBJ_BLOB, - &oid); + hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, + OBJ_BLOB, &oid); if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix)) return error(_("the patch applies to '%s' (%s), " "which does not match the " @@ -3233,7 +3188,7 @@ static int apply_binary(struct apply_state *state, } else { /* Otherwise, the old one must be empty. */ - if (img->len) + if (img->buf.len) return error(_("the patch applies to an empty " "'%s' but it is not empty"), name); } @@ -3257,8 +3212,7 @@ static int apply_binary(struct apply_state *state, "'%s' cannot be read"), patch->new_oid_prefix, name); image_clear(img); - img->buf = result; - img->len = size; + strbuf_attach(&img->buf, result, size, size + 1); } else { /* * We have verified buf matches the preimage; @@ -3270,7 +3224,7 @@ static int apply_binary(struct apply_state *state, name); /* verify that the result matches */ - hash_object_file(the_hash_algo, img->buf, img->len, OBJ_BLOB, + hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, OBJ_BLOB, &oid); if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix)) return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"), @@ -3540,14 +3494,14 @@ static int resolve_to(struct image *image, const struct object_id *result_id) { unsigned long size; enum object_type type; + char *data; image_clear(image); - image->buf = repo_read_object_file(the_repository, result_id, &type, - &size); - if (!image->buf || type != OBJ_BLOB) + data = repo_read_object_file(the_repository, result_id, &type, &size); + if (!data || type != OBJ_BLOB) die("unable to read blob object %s", oid_to_hex(result_id)); - image->len = size; + strbuf_attach(&image->buf, data, size, size + 1); return 0; } @@ -3589,8 +3543,7 @@ static int three_way_merge(struct apply_state *state, return -1; } image_clear(image); - image->buf = result.ptr; - image->len = result.size; + strbuf_attach(&image->buf, result.ptr, result.size, result.size); return status; } @@ -3677,7 +3630,7 @@ static int try_threeway(struct apply_state *state, return -1; } /* post_oid is theirs */ - write_object_file(tmp_image.buf, tmp_image.len, OBJ_BLOB, &post_oid); + write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &post_oid); image_clear(&tmp_image); /* our_oid is ours */ @@ -3690,7 +3643,7 @@ static int try_threeway(struct apply_state *state, return error(_("cannot read the current contents of '%s'"), patch->old_name); } - write_object_file(tmp_image.buf, tmp_image.len, OBJ_BLOB, &our_oid); + write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &our_oid); image_clear(&tmp_image); /* in-core three-way merge between post and our using pre as base */ @@ -3743,8 +3696,7 @@ static int apply_data(struct apply_state *state, struct patch *patch, return -1; } } - patch->result = image.buf; - patch->resultsize = image.len; + patch->result = strbuf_detach(&image.buf, &patch->resultsize); add_to_fn_table(state, patch); free(image.line); diff --git a/t/t3436-rebase-more-options.sh b/t/t3436-rebase-more-options.sh index 94671d3c46..4d9744e5fc 100755 --- a/t/t3436-rebase-more-options.sh +++ b/t/t3436-rebase-more-options.sh @@ -5,6 +5,7 @@ test_description='tests to ensure compatibility between am and interactive backends' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh . "$TEST_DIRECTORY"/lib-rebase.sh diff --git a/t/t4107-apply-ignore-whitespace.sh b/t/t4107-apply-ignore-whitespace.sh index ac72eeaf27..5e6e203aa5 100755 --- a/t/t4107-apply-ignore-whitespace.sh +++ b/t/t4107-apply-ignore-whitespace.sh @@ -3,9 +3,9 @@ # Copyright (c) 2009 Giuseppe Bilotta # -test_description='git-apply --ignore-whitespace. +test_description='git-apply --ignore-whitespace.' -' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh # This primes main.c file that indents without using HT at all. diff --git a/t/t4124-apply-ws-rule.sh b/t/t4124-apply-ws-rule.sh index 485c7d2d12..cdffee0273 100755 --- a/t/t4124-apply-ws-rule.sh +++ b/t/t4124-apply-ws-rule.sh @@ -2,6 +2,7 @@ test_description='core.whitespace rules and git apply' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh prepare_test_file () { diff --git a/t/t4125-apply-ws-fuzz.sh b/t/t4125-apply-ws-fuzz.sh index 090987c89b..f248cc2a00 100755 --- a/t/t4125-apply-ws-fuzz.sh +++ b/t/t4125-apply-ws-fuzz.sh @@ -2,6 +2,7 @@ test_description='applying patch that has broken whitespaces in context' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_expect_success setup ' diff --git a/t/t4138-apply-ws-expansion.sh b/t/t4138-apply-ws-expansion.sh index 8bbf8260fa..7981931b4e 100755 --- a/t/t4138-apply-ws-expansion.sh +++ b/t/t4138-apply-ws-expansion.sh @@ -5,6 +5,7 @@ test_description='git apply test patches with whitespace expansion.' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_expect_success setup ' -- 2.46.0.551.gc5ee8f2d1c.dirty