Re: [PATCH v5 5/5] builtin/merge-tree.c: implement support for `--write-pack`

Jeff King <peff@xxxxxxxx> · Mon, 13 Nov 2023 17:02:54 -0500

On Fri, Nov 10, 2023 at 03:51:18PM -0800, Elijah Newren wrote:

> This is unsafe; the object may need to be read later within the same
> merge. [...]
>
> I think (untested) that you could fix this by creating two packs
> instead of just one.  In particular, add a call to
> flush_odb_transcation() after the "redo_after_renames" block in
> merge_ort_nonrecursive_internal().

It might not be too hard to just let in-process callers access the
objects we've written. A quick and dirty patch is below, which works
with the test you suggested (the test still fails because it finds a
conflict, but it gets past the "woah, I can't find that sha1" part).

I don't know if that is sufficient, though. Would other spawned
processes (hooks, external merge drivers, and so on) need to be able to
see these objects, too?

The patch teaches the packfile code about the special bulk checkin pack.
It might be cleaner to invert it, and just have the bulk checkin code
register a magic packed_git (it would need to fake the .idx somehow).

diff --git a/bulk-checkin.c b/bulk-checkin.c
index bd6151ba3c..566fc36e68 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -30,6 +30,8 @@ static struct bulk_checkin_packfile {
 	struct pack_idx_entry **written;
 	uint32_t alloc_written;
 	uint32_t nr_written;
+
+	struct packed_git *fake_packed_git;
 } bulk_checkin_packfile;
 
 static void finish_tmp_packfile(struct strbuf *basename,
@@ -82,6 +84,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 
 clear_exit:
 	free(state->written);
+	free(state->fake_packed_git);
 	memset(state, 0, sizeof(*state));
 
 	strbuf_release(&packname);
@@ -530,3 +533,37 @@ void end_odb_transaction(void)
 
 	flush_odb_transaction();
 }
+
+static struct packed_git *fake_packed_git(struct bulk_checkin_packfile *state)
+{
+	struct packed_git *p = state->fake_packed_git;
+	if (!p) {
+		FLEX_ALLOC_STR(p, pack_name, "/fake/in-progress.pack");
+		state->fake_packed_git = p;
+		p->pack_fd = state->f->fd;
+		p->do_not_close = 1;
+	}
+
+	hashflush(state->f);
+	p->pack_size = state->f->total; /* maybe add 20 to simulate trailer? */
+
+	return p;
+}
+
+int bulk_checkin_pack_entry(const struct object_id *oid, struct pack_entry *e)
+{
+	size_t i;
+	/*
+	 * this really ought to have some more efficient data structure for
+	 * lookup; ditto for the existing already_written()
+	 */
+	for (i = 0; i < bulk_checkin_packfile.nr_written; i++) {
+		struct pack_idx_entry *p = bulk_checkin_packfile.written[i];
+		if (oideq(&p->oid, oid)) {
+			e->p = fake_packed_git(&bulk_checkin_packfile);
+			e->offset = p->offset;
+			return 0;
+		}
+	}
+	return -1;
+}
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 89786b3954..153fe87c06 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -44,4 +44,7 @@ void flush_odb_transaction(void);
  */
 void end_odb_transaction(void);
 
+struct pack_entry;
+int bulk_checkin_pack_entry(const struct object_id *oid, struct pack_entry *e);
+
 #endif
diff --git a/packfile.c b/packfile.c
index 9cc0a2e37a..05194b1d9b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -23,6 +23,7 @@
 #include "commit-graph.h"
 #include "pack-revindex.h"
 #include "promisor-remote.h"
+#include "bulk-checkin.h"
 
 char *odb_pack_name(struct strbuf *buf,
 		    const unsigned char *hash,
@@ -2045,6 +2046,9 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
 	struct list_head *pos;
 	struct multi_pack_index *m;
 
+	if (!bulk_checkin_pack_entry(oid, e))
+		return 1;
+
 	prepare_packed_git(r);
 	if (!r->objects->packed_git && !r->objects->multi_pack_index)
 		return 0;