Re: [PATCH v7 4/5] object-file.c: add "write_stream_object_file()" to support read in stream

Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> · Tue, 21 Dec 2021 15:20:22 +0100

On Tue, Dec 21 2021, Han Xin wrote:

> From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>
> [...]
> +int write_stream_object_file(struct input_stream *in_stream, size_t len,
> +			     enum object_type type, time_t mtime,
> +			     unsigned flags, struct object_id *oid)
> +{
> +	int fd, ret, flush = 0;
> +	unsigned char compressed[4096];
> +	git_zstream stream;
> +	git_hash_ctx c;
> +	struct object_id parano_oid;
> +	static struct strbuf tmp_file = STRBUF_INIT;
> +	static struct strbuf filename = STRBUF_INIT;
> +	int dirlen;
> +	char hdr[MAX_HEADER_LEN];
> +	int hdrlen = sizeof(hdr);
> +
> +	/* Since "filename" is defined as static, it will be reused. So reset it
> +	 * first before using it. */
> +	strbuf_reset(&filename);
> +	/* When oid is not determined, save tmp file to odb path. */
> +	strbuf_addf(&filename, "%s/", get_object_directory());

I realize this is somewhat following the pattern of code you moved
around earlier, but FWIW I think these sorts of comments are really
over-doing it. I.e. we try not to comment on things that are obvious
from the code itself.

Also René's comment on v6 still applies here:

    Given that this function is only used for huge objects I think making
    the strbufs non-static and releasing them is the best choice here.

I thin just making them non-static and doing a strbuf_release() as he
suggested is best here.

> +
> +	fd = create_tmpfile(&tmp_file, filename.buf, flags);
> +	if (fd < 0)
> +		return -1;
> +
> +	hdrlen = format_object_header(hdr, hdrlen, type, len);
> +
> +	/* Set it up and write header */
> +	setup_stream_and_header(&stream, compressed, sizeof(compressed),
> +				&c, hdr, hdrlen);
> +
> +	/* Then the data itself.. */
> +	do {
> +		unsigned char *in0 = stream.next_in;
> +		if (!stream.avail_in) {
> +			const void *in = in_stream->read(in_stream, &stream.avail_in);
> +			stream.next_in = (void *)in;
> +			in0 = (unsigned char *)in;
> +			/* All data has been read. */
> +			if (len + hdrlen == stream.total_in + stream.avail_in)
> +				flush = Z_FINISH;
> +		}
> +		ret = git_deflate(&stream, flush);
> +		the_hash_algo->update_fn(&c, in0, stream.next_in - in0);
> +		if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
> +			die(_("unable to write loose object file"));
> +		stream.next_out = compressed;
> +		stream.avail_out = sizeof(compressed);
> +	} while (ret == Z_OK || ret == Z_BUF_ERROR);
> +
> +	if (ret != Z_STREAM_END)
> +		die(_("unable to deflate new object streamingly (%d)"), ret);
> +	ret = git_deflate_end_gently(&stream);
> +	if (ret != Z_OK)
> +		die(_("deflateEnd on object streamingly failed (%d)"), ret);

nit: let's say "unable to stream deflate new object" or something, and
not use the confusing (invented?) word "streamingly".

> +	the_hash_algo->final_oid_fn(&parano_oid, &c);
> +
> +	close_loose_object(fd);
> +
> +	oidcpy(oid, &parano_oid);

I see there's still quite a bit of duplication between this and
write_loose_object(), but maybe it's not easy to factor out.

> +	if (freshen_packed_object(oid) || freshen_loose_object(oid)) {
> +		unlink_or_warn(tmp_file.buf);
> +		return 0;
> +	}
> +
> +	loose_object_path(the_repository, &filename, oid);
> +
> +	/* We finally know the object path, and create the missing dir. */
> +	dirlen = directory_size(filename.buf);
> +	if (dirlen) {
> +		struct strbuf dir = STRBUF_INIT;
> +		strbuf_add(&dir, filename.buf, dirlen - 1);

Just a minor nit, but I noticed we could have this on top, i.e. this
"remove the slash" is now what 1/3 users of it wan:

	 object-file.c | 10 +++++-----
	 1 file changed, 5 insertions(+), 5 deletions(-)

	diff --git a/object-file.c b/object-file.c
	index 77a3217fd0e..b0dea96906e 100644
	--- a/object-file.c
	+++ b/object-file.c
	@@ -1878,13 +1878,13 @@ static void close_loose_object(int fd)
	 		die_errno(_("error when closing loose object file"));
	 }

	-/* Size of directory component, including the ending '/' */
	+/* Size of directory component, excluding the ending '/' */
	 static inline int directory_size(const char *filename)
	 {
	 	const char *s = strrchr(filename, '/');
	 	if (!s)
	 		return 0;
	-	return s - filename + 1;
	+	return s - filename;
	 }

	 /*
	@@ -1901,7 +1901,7 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename,

	 	strbuf_reset(tmp);
	 	strbuf_add(tmp, filename, dirlen);
	-	strbuf_addstr(tmp, "tmp_obj_XXXXXX");
	+	strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
	 	fd = git_mkstemp_mode(tmp->buf, 0444);
	 	do {
	 		if (fd >= 0 || !dirlen || errno != ENOENT)
	@@ -1913,7 +1913,7 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename,
	 		 * scratch.
	 		 */
	 		strbuf_reset(tmp);
	-		strbuf_add(tmp, filename, dirlen - 1);
	+		strbuf_add(tmp, filename, dirlen);
	 		if (mkdir(tmp->buf, 0777) && errno != EEXIST)
	 			break;
	 		if (adjust_shared_perm(tmp->buf))
	@@ -2100,7 +2100,7 @@ int write_stream_object_file(struct input_stream *in_stream, size_t len,
	 	dirlen = directory_size(filename.buf);
	 	if (dirlen) {
	 		struct strbuf dir = STRBUF_INIT;
	-		strbuf_add(&dir, filename.buf, dirlen - 1);
	+		strbuf_add(&dir, filename.buf, dirlen);

	 		if (mkdir_in_gitdir(dir.buf) && errno != EEXIST) {
	 			ret = error_errno(_("unable to create directory %s"), dir.buf);

On my platform (linux) it's not needed either way, a "mkdir foo" works
as well as "mkdir foo/", but maybe some oS's have trouble with it.