On Mon, Oct 23, 2023 at 06:45:01PM -0400, Taylor Blau wrote: > Introduce `index_blob_bulk_checkin_incore()` which allows streaming > arbitrary blob contents from memory into the bulk-checkin pack. > > In order to support streaming from a location in memory, we must > implement a new kind of bulk_checkin_source that does just that. These > implementation in spread out across: Nit: the commit message is a bit off here. Probably not worth a reroll though. > - init_bulk_checkin_source_incore() > - bulk_checkin_source_read_incore() > - bulk_checkin_source_seek_incore() > > Note that, unlike file descriptors, which manage their own offset > internally, we have to keep track of how many bytes we've read out of > the buffer, and make sure we don't read past the end of the buffer. > > This will be useful in a couple of more commits in order to provide the > `merge-tree` builtin with a mechanism to create a new pack containing > any objects it created during the merge, instead of storing those > objects individually as loose. > > Similar to the existing `index_blob_bulk_checkin()` function, the > entrypoint delegates to `deflate_obj_to_pack_incore()`. That function in > turn delegates to deflate_obj_to_pack(), which is responsible for > formatting the pack header and then deflating the contents into the > pack. > > Consistent with the rest of the bulk-checkin mechanism, there are no > direct tests here. In future commits when we expose this new > functionality via the `merge-tree` builtin, we will test it indirectly > there. > > Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> > --- > bulk-checkin.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ > bulk-checkin.h | 4 +++ > 2 files changed, 79 insertions(+) > > diff --git a/bulk-checkin.c b/bulk-checkin.c > index 79776e679e..b728210bc7 100644 > --- a/bulk-checkin.c > +++ b/bulk-checkin.c > @@ -148,6 +148,10 @@ struct bulk_checkin_source { > struct { > int fd; > } from_fd; > + struct { > + const void *buf; > + size_t nr_read; > + } incore; > } data; > > size_t size; > @@ -166,6 +170,36 @@ static off_t bulk_checkin_source_seek_from_fd(struct bulk_checkin_source *source > return lseek(source->data.from_fd.fd, offset, SEEK_SET); > } > > +static off_t bulk_checkin_source_read_incore(struct bulk_checkin_source *source, > + void *buf, size_t nr) > +{ > + const unsigned char *src = source->data.incore.buf; > + > + if (source->data.incore.nr_read > source->size) > + BUG("read beyond bulk-checkin source buffer end " > + "(%"PRIuMAX" > %"PRIuMAX")", > + (uintmax_t)source->data.incore.nr_read, > + (uintmax_t)source->size); > + > + if (nr > source->size - source->data.incore.nr_read) > + nr = source->size - source->data.incore.nr_read; > + > + src += source->data.incore.nr_read; > + > + memcpy(buf, src, nr); > + source->data.incore.nr_read += nr; > + return nr; > +} > + > +static off_t bulk_checkin_source_seek_incore(struct bulk_checkin_source *source, > + off_t offset) > +{ > + if (!(0 <= offset && offset < source->size)) > + return (off_t)-1; At the risk of showing my own ignorance, but why is the cast here necessary? Patrick > + source->data.incore.nr_read = offset; > + return source->data.incore.nr_read; > +} > + > static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source, > int fd, size_t size, > const char *path) > @@ -181,6 +215,22 @@ static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source, > source->path = path; > } > > +static void init_bulk_checkin_source_incore(struct bulk_checkin_source *source, > + const void *buf, size_t size, > + const char *path) > +{ > + memset(source, 0, sizeof(struct bulk_checkin_source)); > + > + source->read = bulk_checkin_source_read_incore; > + source->seek = bulk_checkin_source_seek_incore; > + > + source->data.incore.buf = buf; > + source->data.incore.nr_read = 0; > + > + source->size = size; > + source->path = path; > +} > + > /* > * Read the contents from 'source' for 'size' bytes, streaming it to the > * packfile in state while updating the hash in ctx. Signal a failure > @@ -359,6 +409,19 @@ static int deflate_obj_to_pack(struct bulk_checkin_packfile *state, > return 0; > } > > +static int deflate_obj_to_pack_incore(struct bulk_checkin_packfile *state, > + struct object_id *result_oid, > + const void *buf, size_t size, > + const char *path, enum object_type type, > + unsigned flags) > +{ > + struct bulk_checkin_source source; > + > + init_bulk_checkin_source_incore(&source, buf, size, path); > + > + return deflate_obj_to_pack(state, result_oid, &source, type, 0, flags); > +} > + > static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, > struct object_id *result_oid, > int fd, size_t size, > @@ -421,6 +484,18 @@ int index_blob_bulk_checkin(struct object_id *oid, > return status; > } > > +int index_blob_bulk_checkin_incore(struct object_id *oid, > + const void *buf, size_t size, > + const char *path, unsigned flags) > +{ > + int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid, > + buf, size, path, OBJ_BLOB, > + flags); > + if (!odb_transaction_nesting) > + flush_bulk_checkin_packfile(&bulk_checkin_packfile); > + return status; > +} > + > void begin_odb_transaction(void) > { > odb_transaction_nesting += 1; > diff --git a/bulk-checkin.h b/bulk-checkin.h > index aa7286a7b3..1b91daeaee 100644 > --- a/bulk-checkin.h > +++ b/bulk-checkin.h > @@ -13,6 +13,10 @@ int index_blob_bulk_checkin(struct object_id *oid, > int fd, size_t size, > const char *path, unsigned flags); > > +int index_blob_bulk_checkin_incore(struct object_id *oid, > + const void *buf, size_t size, > + const char *path, unsigned flags); > + > /* > * Tell the object database to optimize for adding > * multiple objects. end_odb_transaction must be called > -- > 2.42.0.425.g963d08ddb3.dirty >
Attachment:
signature.asc
Description: PGP signature