Re: [PATCH v11 3/8] object-file.c: refactor write_loose_object() to several steps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 19.03.22 um 01:23 schrieb Ævar Arnfjörð Bjarmason:
> From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>
>
> When writing a large blob using "write_loose_object()", we have to pass
> a buffer with the whole content of the blob, and this behavior will
> consume lots of memory and may cause OOM. We will introduce a stream
> version function ("stream_loose_object()") in later commit to resolve
> this issue.
>
> Before introducing that streaming function, do some refactoring on
> "write_loose_object()" to reuse code for both versions.
>
> Rewrite "write_loose_object()" as follows:
>
>  1. Figure out a path for the (temp) object file. This step is only
>     used in "write_loose_object()".
>
>  2. Move common steps for starting to write loose objects into a new
>     function "start_loose_object_common()".
>
>  3. Compress data.
>
>  4. Move common steps for ending zlib stream into a new function
>     "end_loose_object_common()".
>
>  5. Close fd and finalize the object file.
>
> Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
> Helped-by: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx>
> Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
> ---
>  object-file.c | 129 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 89 insertions(+), 40 deletions(-)
>
> diff --git a/object-file.c b/object-file.c
> index 4c140eda6bf..4fcaf7a36ce 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1943,6 +1943,87 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename)
>  	return fd;
>  }
>
> +/**
> + * Common steps for loose object writers to start writing loose
> + * objects:
> + *
> + * - Create tmpfile for the loose object.
> + * - Setup zlib stream for compression.
> + * - Start to feed header to zlib stream.
> + *
> + * Returns a "fd", which should later be provided to
> + * end_loose_object_common().
> + */
> +static int start_loose_object_common(struct strbuf *tmp_file,
> +				     const char *filename, unsigned flags,
> +				     git_zstream *stream,
> +				     unsigned char *buf, size_t buflen,
> +				     git_hash_ctx *c,
> +				     char *hdr, int hdrlen)
> +{
> +	int fd;
> +
> +	fd = create_tmpfile(tmp_file, filename);
> +	if (fd < 0) {
> +		if (flags & HASH_SILENT)
> +			return -1;
> +		else if (errno == EACCES)
> +			return error(_("insufficient permission for adding "
> +				       "an object to repository database %s"),
> +				     get_object_directory());
> +		else
> +			return error_errno(
> +				_("unable to create temporary file"));
> +	}
> +
> +	/*  Setup zlib stream for compression */
> +	git_deflate_init(stream, zlib_compression_level);
> +	stream->next_out = buf;
> +	stream->avail_out = buflen;
> +	the_hash_algo->init_fn(c);
> +
> +	/*  Start to feed header to zlib stream */
> +	stream->next_in = (unsigned char *)hdr;
> +	stream->avail_in = hdrlen;
> +	while (git_deflate(stream, 0) == Z_OK)
> +		; /* nothing */
> +	the_hash_algo->update_fn(c, hdr, hdrlen);
> +
> +	return fd;
> +}
> +
> +/**
> + * Common steps for loose object writers to end writing loose objects:
> + *
> + * - End the compression of zlib stream.
> + * - Get the calculated oid to "parano_oid".
> + * - fsync() and close() the "fd"
> + */
> +static void end_loose_object_common(int fd, int ret, git_hash_ctx *c,
> +				    git_zstream *stream,
> +				    struct object_id *parano_oid,
> +				    const struct object_id *expected_oid,
> +				    const char *die_msg1_fmt,
> +				    const char *die_msg2_fmt)
> +{
> +	if (ret != Z_STREAM_END)
> +		die(_(die_msg1_fmt), ret, expected_oid);
> +	ret = git_deflate_end_gently(stream);
> +	if (ret != Z_OK)
> +		die(_(die_msg2_fmt), ret, expected_oid);

stream_loose_object(), added in patch 5, passes NULL as expected_oid,
but these die() messages need a valid value.  end_loose_object_common()
has more parameters than lines of code in its body.  Inline it to allow
fully custom messages?

> +	the_hash_algo->final_oid_fn(parano_oid, c);
> +
> +	/*
> +	 * We already did a write_buffer() to the "fd", let's fsync()
> +	 * and close().
> +	 *
> +	 * We might still die() on a subsequent sanity check, but
> +	 * let's not add to that confusion by not flushing any
> +	 * outstanding writes to disk first.
> +	 */
> +	close_loose_object(fd);
> +}
> +
>  static int write_loose_object(const struct object_id *oid, char *hdr,
>  			      int hdrlen, const void *buf, unsigned long len,
>  			      time_t mtime, unsigned flags)
> @@ -1957,28 +2038,11 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
>
>  	loose_object_path(the_repository, &filename, oid);
>
> -	fd = create_tmpfile(&tmp_file, filename.buf);
> -	if (fd < 0) {
> -		if (flags & HASH_SILENT)
> -			return -1;
> -		else if (errno == EACCES)
> -			return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory());
> -		else
> -			return error_errno(_("unable to create temporary file"));
> -	}
> -
> -	/* Set it up */
> -	git_deflate_init(&stream, zlib_compression_level);
> -	stream.next_out = compressed;
> -	stream.avail_out = sizeof(compressed);
> -	the_hash_algo->init_fn(&c);
> -
> -	/* First header.. */
> -	stream.next_in = (unsigned char *)hdr;
> -	stream.avail_in = hdrlen;
> -	while (git_deflate(&stream, 0) == Z_OK)
> -		; /* nothing */
> -	the_hash_algo->update_fn(&c, hdr, hdrlen);
> +	fd = start_loose_object_common(&tmp_file, filename.buf, flags,
> +				       &stream, compressed, sizeof(compressed),
> +				       &c, hdr, hdrlen);
> +	if (fd < 0)
> +		return -1;
>
>  	/* Then the data itself.. */
>  	stream.next_in = (void *)buf;
> @@ -1993,24 +2057,9 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
>  		stream.avail_out = sizeof(compressed);
>  	} while (ret == Z_OK);
>
> -	if (ret != Z_STREAM_END)
> -		die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
> -		    ret);
> -	ret = git_deflate_end_gently(&stream);
> -	if (ret != Z_OK)
> -		die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
> -		    ret);
> -	the_hash_algo->final_oid_fn(&parano_oid, &c);
> -
> -	/*
> -	 * We already did a write_buffer() to the "fd", let's fsync()
> -	 * and close().
> -	 *
> -	 * We might still die() on a subsequent sanity check, but
> -	 * let's not add to that confusion by not flushing any
> -	 * outstanding writes to disk first.
> -	 */
> -	close_loose_object(fd);
> +	end_loose_object_common(fd, ret, &c, &stream, &parano_oid, oid,
> +				N_("unable to deflate new object %s (%d)"),
> +				N_("deflateEnd on object %s failed (%d)"));
>
>  	if (!oideq(oid, &parano_oid))
>  		die(_("confused by unstable object source data for %s"),




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux