Introduce `index_blob_bulk_checkin_incore()` which allows streaming arbitrary blob contents from memory into the bulk-checkin pack. In order to support streaming from a location in memory, we must implement a new kind of bulk_checkin_source that does just that. These implementation in spread out across: - init_bulk_checkin_source_incore() - bulk_checkin_source_read_incore() - bulk_checkin_source_seek_incore() Note that, unlike file descriptors, which manage their own offset internally, we have to keep track of how many bytes we've read out of the buffer, and make sure we don't read past the end of the buffer. This will be useful in a couple of more commits in order to provide the `merge-tree` builtin with a mechanism to create a new pack containing any objects it created during the merge, instead of storing those objects individually as loose. Similar to the existing `index_blob_bulk_checkin()` function, the entrypoint delegates to `deflate_obj_to_pack_incore()`. That function in turn delegates to deflate_obj_to_pack(), which is responsible for formatting the pack header and then deflating the contents into the pack. Consistent with the rest of the bulk-checkin mechanism, there are no direct tests here. In future commits when we expose this new functionality via the `merge-tree` builtin, we will test it indirectly there. Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- bulk-checkin.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ bulk-checkin.h | 4 +++ 2 files changed, 79 insertions(+) diff --git a/bulk-checkin.c b/bulk-checkin.c index 79776e679e..b728210bc7 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -148,6 +148,10 @@ struct bulk_checkin_source { struct { int fd; } from_fd; + struct { + const void *buf; + size_t nr_read; + } incore; } data; size_t size; @@ -166,6 +170,36 @@ static off_t bulk_checkin_source_seek_from_fd(struct bulk_checkin_source *source return lseek(source->data.from_fd.fd, offset, SEEK_SET); } +static off_t bulk_checkin_source_read_incore(struct bulk_checkin_source *source, + void *buf, size_t nr) +{ + const unsigned char *src = source->data.incore.buf; + + if (source->data.incore.nr_read > source->size) + BUG("read beyond bulk-checkin source buffer end " + "(%"PRIuMAX" > %"PRIuMAX")", + (uintmax_t)source->data.incore.nr_read, + (uintmax_t)source->size); + + if (nr > source->size - source->data.incore.nr_read) + nr = source->size - source->data.incore.nr_read; + + src += source->data.incore.nr_read; + + memcpy(buf, src, nr); + source->data.incore.nr_read += nr; + return nr; +} + +static off_t bulk_checkin_source_seek_incore(struct bulk_checkin_source *source, + off_t offset) +{ + if (!(0 <= offset && offset < source->size)) + return (off_t)-1; + source->data.incore.nr_read = offset; + return source->data.incore.nr_read; +} + static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source, int fd, size_t size, const char *path) @@ -181,6 +215,22 @@ static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source, source->path = path; } +static void init_bulk_checkin_source_incore(struct bulk_checkin_source *source, + const void *buf, size_t size, + const char *path) +{ + memset(source, 0, sizeof(struct bulk_checkin_source)); + + source->read = bulk_checkin_source_read_incore; + source->seek = bulk_checkin_source_seek_incore; + + source->data.incore.buf = buf; + source->data.incore.nr_read = 0; + + source->size = size; + source->path = path; +} + /* * Read the contents from 'source' for 'size' bytes, streaming it to the * packfile in state while updating the hash in ctx. Signal a failure @@ -359,6 +409,19 @@ static int deflate_obj_to_pack(struct bulk_checkin_packfile *state, return 0; } +static int deflate_obj_to_pack_incore(struct bulk_checkin_packfile *state, + struct object_id *result_oid, + const void *buf, size_t size, + const char *path, enum object_type type, + unsigned flags) +{ + struct bulk_checkin_source source; + + init_bulk_checkin_source_incore(&source, buf, size, path); + + return deflate_obj_to_pack(state, result_oid, &source, type, 0, flags); +} + static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, struct object_id *result_oid, int fd, size_t size, @@ -421,6 +484,18 @@ int index_blob_bulk_checkin(struct object_id *oid, return status; } +int index_blob_bulk_checkin_incore(struct object_id *oid, + const void *buf, size_t size, + const char *path, unsigned flags) +{ + int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid, + buf, size, path, OBJ_BLOB, + flags); + if (!odb_transaction_nesting) + flush_bulk_checkin_packfile(&bulk_checkin_packfile); + return status; +} + void begin_odb_transaction(void) { odb_transaction_nesting += 1; diff --git a/bulk-checkin.h b/bulk-checkin.h index aa7286a7b3..1b91daeaee 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -13,6 +13,10 @@ int index_blob_bulk_checkin(struct object_id *oid, int fd, size_t size, const char *path, unsigned flags); +int index_blob_bulk_checkin_incore(struct object_id *oid, + const void *buf, size_t size, + const char *path, unsigned flags); + /* * Tell the object database to optimize for adding * multiple objects. end_odb_transaction must be called -- 2.42.0.425.g963d08ddb3.dirty