I came up with this on top of Taylor's series which fixes the order in which we write files associated with pack files[1]. His series fixes a race where we write *.idx before *.rev, but left the issue of writing *.bitmap after *.idx, this series fixes that. Now we'll really write the *.idx last. This v2 attempts to fix the comments Taylor had on v1, in particular I think the borderline readability improvements in v1 are now pretty unambiguously more readable in 2/4 of this series. I also converted the bulk-checkin.c caller, split up the wrapping change in pack.h into its own commit etc. 1. https://lore.kernel.org/git/cover.1630461918.git.me@xxxxxxxxxxxx/ [1] Ævar Arnfjörð Bjarmason (4): pack.h: line-wrap the definition of finish_tmp_packfile() pack-write: refactor renaming in finish_tmp_packfile() pack-write: split up finish_tmp_packfile() function pack-write: rename *.idx file into place last (really!) builtin/pack-objects.c | 16 +++++++++--- bulk-checkin.c | 16 ++++++++++++ pack-write.c | 59 +++++++++++++++++++++--------------------- pack.h | 10 ++++++- 4 files changed, 67 insertions(+), 34 deletions(-) Range-diff against v1: -: ---------- > 1: 29f5787651 pack.h: line-wrap the definition of finish_tmp_packfile() 1: 0e6ef07ce0 ! 2: 7b39f4599b pack-write: use more idiomatic strbuf usage for packname construction @@ Metadata Author: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> ## Commit message ## - pack-write: use more idiomatic strbuf usage for packname construction + pack-write: refactor renaming in finish_tmp_packfile() - Change code added in 5889271114a (finish_tmp_packfile():use strbuf for - pathname construction, 2014-03-03) to do strbuf_reset() instead of - noting the length of the base template, and doing a strbuf_setlen() to - reset it, also change the spacing in the finish_tmp_packfile() so that - each setup of the template, rename, and strbuf_reset() is grouped - together. + Refactor the renaming in finish_tmp_packfile() so that it takes a + "const struct strbuf *" instead of a non-const, and refactor the + repetitive renaming pattern in finish_tmp_packfile() to use a new + static rename_tmp_packfile() helper function. - Since the prototype of the previous "name_buffer" now has a "const" - use this chance to wrap the overly long definition of the - finish_tmp_packfile() function. + The previous strbuf_reset() idiom originated with + 5889271114a (finish_tmp_packfile():use strbuf for pathname + construction, 2014-03-03), which in turn was a minimal adjustment of + pre-strbuf code added in 0e990530ae (finish_tmp_packfile(): a helper + function, 2011-10-28). - This doesn't really matter for now, but as we'll see makes the - subsequent change much easier, as we won't need to juggle the basename - template v.s. its current contents anymore when writing bitmaps. + Since the memory allocation is not a bottleneck here we can afford a + bit more readability at the cost of re-allocating this new "struct + strbuf sb". Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> ## builtin/pack-objects.c ## @@ builtin/pack-objects.c: static void write_pack_file(void) - - if (!pack_to_stdout) { - struct stat st; -- struct strbuf tmpname = STRBUF_INIT; -+ struct strbuf tmp_basename = STRBUF_INIT; - - /* - * Packs are runtime accessed in their mtime -@@ builtin/pack-objects.c: static void write_pack_file(void) - warning_errno(_("failed utime() on %s"), pack_tmp_name); - } - -- strbuf_addf(&tmpname, "%s-", base_name); -+ strbuf_addf(&tmp_basename, "%s-", base_name); - - if (write_bitmap_index) { - bitmap_writer_set_checksum(hash); -@@ builtin/pack-objects.c: static void write_pack_file(void) - &to_pack, written_list, nr_written); - } - -- finish_tmp_packfile(&tmpname, pack_tmp_name, -+ finish_tmp_packfile(&tmp_basename, pack_tmp_name, - written_list, nr_written, &pack_idx_opts, hash); if (write_bitmap_index) { - strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash)); + struct strbuf sb = STRBUF_INIT; + -+ strbuf_addf(&sb, "%s%s.bitmap", -+ tmp_basename.buf, ++ strbuf_addf(&sb, "%s%s.bitmap", tmpname.buf, + hash_to_hex(hash)); stop_progress(&progress_state); @@ builtin/pack-objects.c: static void write_pack_file(void) + strbuf_release(&sb); } -- strbuf_release(&tmpname); -+ strbuf_release(&tmp_basename); - free(pack_tmp_name); - puts(hash_to_hex(hash)); - } + strbuf_release(&tmpname); ## pack-write.c ## @@ pack-write.c: struct hashfile *create_tmp_packfile(char **pack_tmp_name) @@ pack-write.c: struct hashfile *create_tmp_packfile(char **pack_tmp_name) } -void finish_tmp_packfile(struct strbuf *name_buffer, -+void finish_tmp_packfile(const struct strbuf *tmp_basename, ++static void rename_tmp_packfile(const char *source, ++ const struct strbuf *basename, ++ const unsigned char hash[], ++ const char *ext) ++{ ++ struct strbuf sb = STRBUF_INIT; ++ ++ strbuf_addf(&sb, "%s%s.%s", basename->buf, hash_to_hex(hash), ext); ++ if (rename(source, sb.buf)) ++ die_errno("unable to rename temporary '*.%s' file to '%s'", ++ ext, sb.buf); ++ strbuf_release(&sb); ++} ++ ++void finish_tmp_packfile(const struct strbuf *basename, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, +@@ pack-write.c: void finish_tmp_packfile(struct strbuf *name_buffer, unsigned char hash[]) { -+ struct strbuf sb = STRBUF_INIT; const char *idx_tmp_name, *rev_tmp_name = NULL; - int basename_len = name_buffer->len; @@ pack-write.c: void finish_tmp_packfile(struct strbuf *name_buffer, - strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash)); - - if (rename(pack_tmp_name, name_buffer->buf)) -+ strbuf_addf(&sb, "%s%s.pack", tmp_basename->buf, hash_to_hex(hash)); -+ if (rename(pack_tmp_name, sb.buf)) - die_errno("unable to rename temporary pack file"); +- die_errno("unable to rename temporary pack file"); - - strbuf_setlen(name_buffer, basename_len); -+ strbuf_reset(&sb); - - if (rev_tmp_name) { +- +- if (rev_tmp_name) { - strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash)); - if (rename(rev_tmp_name, name_buffer->buf)) -+ strbuf_addf(&sb, "%s%s.rev", tmp_basename->buf, -+ hash_to_hex(hash)); -+ if (rename(rev_tmp_name, sb.buf)) - die_errno("unable to rename temporary reverse-index file"); +- die_errno("unable to rename temporary reverse-index file"); - - strbuf_setlen(name_buffer, basename_len); -+ strbuf_reset(&sb); - } - +- } +- - strbuf_addf(name_buffer, "%s.idx", hash_to_hex(hash)); - if (rename(idx_tmp_name, name_buffer->buf)) -+ strbuf_addf(&sb, "%s%s.idx", tmp_basename->buf, hash_to_hex(hash)); -+ if (rename(idx_tmp_name, sb.buf)) - die_errno("unable to rename temporary index file"); +- die_errno("unable to rename temporary index file"); - - strbuf_setlen(name_buffer, basename_len); -+ strbuf_reset(&sb); ++ rename_tmp_packfile(pack_tmp_name, basename, hash, "pack"); ++ if (rev_tmp_name) ++ rename_tmp_packfile(rev_tmp_name, basename, hash, "rev"); ++ rename_tmp_packfile(idx_tmp_name, basename, hash, "idx"); free((void *)idx_tmp_name); } @@ pack.h: int encode_in_pack_object_header(unsigned char *hdr, int hdr_len, int read_pack_header(int fd, struct pack_header *); struct hashfile *create_tmp_packfile(char **pack_tmp_name); --void finish_tmp_packfile(struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]); -+void finish_tmp_packfile(const struct strbuf *name_buffer, -+ const char *pack_tmp_name, -+ struct pack_idx_entry **written_list, -+ uint32_t nr_written, -+ struct pack_idx_option *pack_idx_opts, -+ unsigned char sha1[]); - - #endif +-void finish_tmp_packfile(struct strbuf *name_buffer, ++void finish_tmp_packfile(const struct strbuf *basename, + const char *pack_tmp_name, + struct pack_idx_entry **written_list, + uint32_t nr_written, 2: 42f83774fe ! 3: 1205f9d0c2 pack-write: split up finish_tmp_packfile() function @@ Commit message pack-write: split up finish_tmp_packfile() function Split up the finish_tmp_packfile() function and use the split-up - version in pack-objects.c. This change should not change any - functionality, but sets up code flow for a bug fix where we'll be able - to move the *.idx in-place after the *.bitmap is written. + version in pack-objects.c in preparation for moving the step of + renaming the *.idx file later as part of a function change. + + Since the only other caller of finish_tmp_packfile() was in + bulk-checkin.c, and it won't be needing a change to its *.idx + renaming, provide a thin wrapper for the old function as a static + function in that file. If other callers end up needing the simpler + version it could be moved back to "pack-write.c" and "pack.h". Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> @@ builtin/pack-objects.c @@ builtin/pack-objects.c: static void write_pack_file(void) if (!pack_to_stdout) { struct stat st; - struct strbuf tmp_basename = STRBUF_INIT; + struct strbuf tmpname = STRBUF_INIT; + char *idx_tmp_name = NULL; /* @@ builtin/pack-objects.c: static void write_pack_file(void) &to_pack, written_list, nr_written); } -- finish_tmp_packfile(&tmp_basename, pack_tmp_name, -- written_list, nr_written, +- finish_tmp_packfile(&tmpname, pack_tmp_name, ++ stage_tmp_packfiles(&tmpname, pack_tmp_name, + written_list, nr_written, - &pack_idx_opts, hash); -+ stage_tmp_packfile(&tmp_basename, pack_tmp_name, -+ written_list, nr_written, -+ &pack_idx_opts, hash, &idx_tmp_name); -+ rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name); ++ &pack_idx_opts, hash, &idx_tmp_name); ++ rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name); if (write_bitmap_index) { struct strbuf sb = STRBUF_INIT; @@ builtin/pack-objects.c: static void write_pack_file(void) } + free(idx_tmp_name); - strbuf_release(&tmp_basename); + strbuf_release(&tmpname); free(pack_tmp_name); puts(hash_to_hex(hash)); - ## pack-write.c ## -@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, - unsigned char hash[]) + ## bulk-checkin.c ## +@@ bulk-checkin.c: static struct bulk_checkin_state { + uint32_t nr_written; + } state; + ++static void finish_tmp_packfile(const struct strbuf *basename, ++ const char *pack_tmp_name, ++ struct pack_idx_entry **written_list, ++ uint32_t nr_written, ++ struct pack_idx_option *pack_idx_opts, ++ unsigned char hash[]) +{ + char *idx_tmp_name = NULL; + -+ stage_tmp_packfile(tmp_basename, pack_tmp_name, written_list, -+ nr_written, pack_idx_opts, hash, &idx_tmp_name); -+ rename_tmp_packfile_idx(tmp_basename, hash, &idx_tmp_name); ++ stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written, ++ pack_idx_opts, hash, &idx_tmp_name); ++ rename_tmp_packfile_idx(basename, hash, &idx_tmp_name); + + free(idx_tmp_name); +} + -+void stage_tmp_packfile(const struct strbuf *tmp_basename, -+ const char *pack_tmp_name, -+ struct pack_idx_entry **written_list, -+ uint32_t nr_written, -+ struct pack_idx_option *pack_idx_opts, -+ unsigned char hash[], -+ char **idx_tmp_name) + static void finish_bulk_checkin(struct bulk_checkin_state *state) + { + struct object_id oid; + + ## pack-write.c ## +@@ pack-write.c: static void rename_tmp_packfile(const char *source, + strbuf_release(&sb); + } + +-void finish_tmp_packfile(const struct strbuf *basename, ++void rename_tmp_packfile_idx(const struct strbuf *basename, ++ unsigned char hash[], char **idx_tmp_name) ++{ ++ rename_tmp_packfile(*idx_tmp_name, basename, hash, "idx"); ++} ++ ++void stage_tmp_packfiles(const struct strbuf *basename, + const char *pack_tmp_name, + struct pack_idx_entry **written_list, + uint32_t nr_written, + struct pack_idx_option *pack_idx_opts, +- unsigned char hash[]) ++ unsigned char hash[], ++ char **idx_tmp_name) { - struct strbuf sb = STRBUF_INIT; - const char *idx_tmp_name, *rev_tmp_name = NULL; + const char *rev_tmp_name = NULL; @@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename, die_errno("unable to make temporary index file readable"); rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash, -@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename, - strbuf_reset(&sb); - } - -+ strbuf_release(&sb); -+} -+ -+void rename_tmp_packfile_idx(const struct strbuf *tmp_basename, -+ unsigned char hash[], char **idx_tmp_name) -+{ -+ struct strbuf sb = STRBUF_INIT; -+ - strbuf_addf(&sb, "%s%s.idx", tmp_basename->buf, hash_to_hex(hash)); -- if (rename(idx_tmp_name, sb.buf)) -+ if (rename(*idx_tmp_name, sb.buf)) - die_errno("unable to rename temporary index file"); -- strbuf_reset(&sb); +@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *basename, + rename_tmp_packfile(pack_tmp_name, basename, hash, "pack"); + if (rev_tmp_name) + rename_tmp_packfile(rev_tmp_name, basename, hash, "rev"); +- rename_tmp_packfile(idx_tmp_name, basename, hash, "idx"); - - free((void *)idx_tmp_name); -+ strbuf_release(&sb); } void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought) @@ pack.h: int encode_in_pack_object_header(unsigned char *hdr, int hdr_len, int read_pack_header(int fd, struct pack_header *); struct hashfile *create_tmp_packfile(char **pack_tmp_name); -+void stage_tmp_packfile(const struct strbuf *tmp_basename, -+ const char *pack_tmp_name, -+ struct pack_idx_entry **written_list, -+ uint32_t nr_written, -+ struct pack_idx_option *pack_idx_opts, -+ unsigned char hash[], -+ char **idx_tmp_name); -+void rename_tmp_packfile_idx(const struct strbuf *tmp_basename, -+ unsigned char hash[], char **idx_tmp_name); - void finish_tmp_packfile(const struct strbuf *name_buffer, +-void finish_tmp_packfile(const struct strbuf *basename, ++void stage_tmp_packfiles(const struct strbuf *basename, const char *pack_tmp_name, struct pack_idx_entry **written_list, + uint32_t nr_written, + struct pack_idx_option *pack_idx_opts, +- unsigned char sha1[]); ++ unsigned char hash[], ++ char **idx_tmp_name); ++void rename_tmp_packfile_idx(const struct strbuf *tmp_basename, ++ unsigned char hash[], char **idx_tmp_name); + + #endif 3: 78976fcb7b ! 4: 70f4a9767d pack-write: rename *.idx file into place last (really!) @@ Commit message those files are written out, nothing in that commit contradicts what's being done here. - While we're at it let's add cross-commentary to both builtin/repack.c - and builtin/pack-objects.c to point out the two places where we write - out these sets of files in sequence. + Note that the referenced earlier commit[1] is overly optimistic about + "clos[ing the] race", i.e. yes we'll now write the files in the right + order, but we might still race due to our sloppy use of fsync(). See + the thread at [2] for a rabbit hole of various discussions about + filesystem races in the face of doing and not doing fsync() (and if + doing fsync(), not doing it properly). 1. https://lore.kernel.org/git/a6a4d2154e83d41c10986c5f455279ab249a910c.1630461918.git.me@xxxxxxxxxxxx/ + 2. https://lore.kernel.org/git/8735qgkvv1.fsf@xxxxxxxxxxxxxxxxxxx/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> ## builtin/pack-objects.c ## @@ builtin/pack-objects.c: static void write_pack_file(void) - stage_tmp_packfile(&tmp_basename, pack_tmp_name, - written_list, nr_written, - &pack_idx_opts, hash, &idx_tmp_name); -- rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name); + stage_tmp_packfiles(&tmpname, pack_tmp_name, + written_list, nr_written, + &pack_idx_opts, hash, &idx_tmp_name); +- rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name); if (write_bitmap_index) { struct strbuf sb = STRBUF_INIT; @@ builtin/pack-objects.c: static void write_pack_file(void) strbuf_release(&sb); } -+ /* -+ * We must write the *.idx last, so that anything that expects -+ * an accompanying *.rev, *.bitmap etc. can count on it being -+ * present. -+ * -+ * See also corresponding logic in the "exts" -+ * struct in builtin/repack.c -+ */ -+ rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name); ++ rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name); + free(idx_tmp_name); - strbuf_release(&tmp_basename); + strbuf_release(&tmpname); free(pack_tmp_name); - - ## builtin/repack.c ## -@@ builtin/repack.c: static struct { - {".rev", 1}, - {".bitmap", 1}, - {".promisor", 1}, -+ /* -+ * We must write the *.idx last, so that anything that expects -+ * an accompanying *.rev, *.bitmap etc. can count on it being -+ * present. -+ * -+ * See also corresponding logic in write_pack_file() in -+ * builtin/pack-objects.c -+ */ - {".idx"}, - }; - -- 2.33.0.819.gea1b153a43c