Re: [PATCH v5 3/5] bulk-checkin: introduce `index_blob_bulk_checkin_incore()`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Oct 23, 2023 at 06:45:01PM -0400, Taylor Blau wrote:
> Introduce `index_blob_bulk_checkin_incore()` which allows streaming
> arbitrary blob contents from memory into the bulk-checkin pack.
> 
> In order to support streaming from a location in memory, we must
> implement a new kind of bulk_checkin_source that does just that. These
> implementation in spread out across:

Nit: the commit message is a bit off here. Probably not worth a reroll
though.

>   - init_bulk_checkin_source_incore()
>   - bulk_checkin_source_read_incore()
>   - bulk_checkin_source_seek_incore()
> 
> Note that, unlike file descriptors, which manage their own offset
> internally, we have to keep track of how many bytes we've read out of
> the buffer, and make sure we don't read past the end of the buffer.
> 
> This will be useful in a couple of more commits in order to provide the
> `merge-tree` builtin with a mechanism to create a new pack containing
> any objects it created during the merge, instead of storing those
> objects individually as loose.
> 
> Similar to the existing `index_blob_bulk_checkin()` function, the
> entrypoint delegates to `deflate_obj_to_pack_incore()`. That function in
> turn delegates to deflate_obj_to_pack(), which is responsible for
> formatting the pack header and then deflating the contents into the
> pack.
> 
> Consistent with the rest of the bulk-checkin mechanism, there are no
> direct tests here. In future commits when we expose this new
> functionality via the `merge-tree` builtin, we will test it indirectly
> there.
> 
> Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx>
> ---
>  bulk-checkin.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  bulk-checkin.h |  4 +++
>  2 files changed, 79 insertions(+)
> 
> diff --git a/bulk-checkin.c b/bulk-checkin.c
> index 79776e679e..b728210bc7 100644
> --- a/bulk-checkin.c
> +++ b/bulk-checkin.c
> @@ -148,6 +148,10 @@ struct bulk_checkin_source {
>  		struct {
>  			int fd;
>  		} from_fd;
> +		struct {
> +			const void *buf;
> +			size_t nr_read;
> +		} incore;
>  	} data;
>  
>  	size_t size;
> @@ -166,6 +170,36 @@ static off_t bulk_checkin_source_seek_from_fd(struct bulk_checkin_source *source
>  	return lseek(source->data.from_fd.fd, offset, SEEK_SET);
>  }
>  
> +static off_t bulk_checkin_source_read_incore(struct bulk_checkin_source *source,
> +					     void *buf, size_t nr)
> +{
> +	const unsigned char *src = source->data.incore.buf;
> +
> +	if (source->data.incore.nr_read > source->size)
> +		BUG("read beyond bulk-checkin source buffer end "
> +		    "(%"PRIuMAX" > %"PRIuMAX")",
> +		    (uintmax_t)source->data.incore.nr_read,
> +		    (uintmax_t)source->size);
> +
> +	if (nr > source->size - source->data.incore.nr_read)
> +		nr = source->size - source->data.incore.nr_read;
> +
> +	src += source->data.incore.nr_read;
> +
> +	memcpy(buf, src, nr);
> +	source->data.incore.nr_read += nr;
> +	return nr;
> +}
> +
> +static off_t bulk_checkin_source_seek_incore(struct bulk_checkin_source *source,
> +					     off_t offset)
> +{
> +	if (!(0 <= offset && offset < source->size))
> +		return (off_t)-1;

At the risk of showing my own ignorance, but why is the cast here
necessary?

Patrick

> +	source->data.incore.nr_read = offset;
> +	return source->data.incore.nr_read;
> +}
> +
>  static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source,
>  					     int fd, size_t size,
>  					     const char *path)
> @@ -181,6 +215,22 @@ static void init_bulk_checkin_source_from_fd(struct bulk_checkin_source *source,
>  	source->path = path;
>  }
>  
> +static void init_bulk_checkin_source_incore(struct bulk_checkin_source *source,
> +					    const void *buf, size_t size,
> +					    const char *path)
> +{
> +	memset(source, 0, sizeof(struct bulk_checkin_source));
> +
> +	source->read = bulk_checkin_source_read_incore;
> +	source->seek = bulk_checkin_source_seek_incore;
> +
> +	source->data.incore.buf = buf;
> +	source->data.incore.nr_read = 0;
> +
> +	source->size = size;
> +	source->path = path;
> +}
> +
>  /*
>   * Read the contents from 'source' for 'size' bytes, streaming it to the
>   * packfile in state while updating the hash in ctx. Signal a failure
> @@ -359,6 +409,19 @@ static int deflate_obj_to_pack(struct bulk_checkin_packfile *state,
>  	return 0;
>  }
>  
> +static int deflate_obj_to_pack_incore(struct bulk_checkin_packfile *state,
> +				       struct object_id *result_oid,
> +				       const void *buf, size_t size,
> +				       const char *path, enum object_type type,
> +				       unsigned flags)
> +{
> +	struct bulk_checkin_source source;
> +
> +	init_bulk_checkin_source_incore(&source, buf, size, path);
> +
> +	return deflate_obj_to_pack(state, result_oid, &source, type, 0, flags);
> +}
> +
>  static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>  				struct object_id *result_oid,
>  				int fd, size_t size,
> @@ -421,6 +484,18 @@ int index_blob_bulk_checkin(struct object_id *oid,
>  	return status;
>  }
>  
> +int index_blob_bulk_checkin_incore(struct object_id *oid,
> +				   const void *buf, size_t size,
> +				   const char *path, unsigned flags)
> +{
> +	int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid,
> +						buf, size, path, OBJ_BLOB,
> +						flags);
> +	if (!odb_transaction_nesting)
> +		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
> +	return status;
> +}
> +
>  void begin_odb_transaction(void)
>  {
>  	odb_transaction_nesting += 1;
> diff --git a/bulk-checkin.h b/bulk-checkin.h
> index aa7286a7b3..1b91daeaee 100644
> --- a/bulk-checkin.h
> +++ b/bulk-checkin.h
> @@ -13,6 +13,10 @@ int index_blob_bulk_checkin(struct object_id *oid,
>  			    int fd, size_t size,
>  			    const char *path, unsigned flags);
>  
> +int index_blob_bulk_checkin_incore(struct object_id *oid,
> +				   const void *buf, size_t size,
> +				   const char *path, unsigned flags);
> +
>  /*
>   * Tell the object database to optimize for adding
>   * multiple objects. end_odb_transaction must be called
> -- 
> 2.42.0.425.g963d08ddb3.dirty
> 

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux