Am 19.03.22 um 01:23 schrieb Ævar Arnfjörð Bjarmason: > From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> > > When writing a large blob using "write_loose_object()", we have to pass > a buffer with the whole content of the blob, and this behavior will > consume lots of memory and may cause OOM. We will introduce a stream > version function ("stream_loose_object()") in later commit to resolve > this issue. > > Before introducing that streaming function, do some refactoring on > "write_loose_object()" to reuse code for both versions. > > Rewrite "write_loose_object()" as follows: > > 1. Figure out a path for the (temp) object file. This step is only > used in "write_loose_object()". > > 2. Move common steps for starting to write loose objects into a new > function "start_loose_object_common()". > > 3. Compress data. > > 4. Move common steps for ending zlib stream into a new function > "end_loose_object_common()". > > 5. Close fd and finalize the object file. > > Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> > Helped-by: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx> > Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> > Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> > --- > object-file.c | 129 ++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 89 insertions(+), 40 deletions(-) > > diff --git a/object-file.c b/object-file.c > index 4c140eda6bf..4fcaf7a36ce 100644 > --- a/object-file.c > +++ b/object-file.c > @@ -1943,6 +1943,87 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) > return fd; > } > > +/** > + * Common steps for loose object writers to start writing loose > + * objects: > + * > + * - Create tmpfile for the loose object. > + * - Setup zlib stream for compression. > + * - Start to feed header to zlib stream. > + * > + * Returns a "fd", which should later be provided to > + * end_loose_object_common(). > + */ > +static int start_loose_object_common(struct strbuf *tmp_file, > + const char *filename, unsigned flags, > + git_zstream *stream, > + unsigned char *buf, size_t buflen, > + git_hash_ctx *c, > + char *hdr, int hdrlen) > +{ > + int fd; > + > + fd = create_tmpfile(tmp_file, filename); > + if (fd < 0) { > + if (flags & HASH_SILENT) > + return -1; > + else if (errno == EACCES) > + return error(_("insufficient permission for adding " > + "an object to repository database %s"), > + get_object_directory()); > + else > + return error_errno( > + _("unable to create temporary file")); > + } > + > + /* Setup zlib stream for compression */ > + git_deflate_init(stream, zlib_compression_level); > + stream->next_out = buf; > + stream->avail_out = buflen; > + the_hash_algo->init_fn(c); > + > + /* Start to feed header to zlib stream */ > + stream->next_in = (unsigned char *)hdr; > + stream->avail_in = hdrlen; > + while (git_deflate(stream, 0) == Z_OK) > + ; /* nothing */ > + the_hash_algo->update_fn(c, hdr, hdrlen); > + > + return fd; > +} > + > +/** > + * Common steps for loose object writers to end writing loose objects: > + * > + * - End the compression of zlib stream. > + * - Get the calculated oid to "parano_oid". > + * - fsync() and close() the "fd" > + */ > +static void end_loose_object_common(int fd, int ret, git_hash_ctx *c, > + git_zstream *stream, > + struct object_id *parano_oid, > + const struct object_id *expected_oid, > + const char *die_msg1_fmt, > + const char *die_msg2_fmt) > +{ > + if (ret != Z_STREAM_END) > + die(_(die_msg1_fmt), ret, expected_oid); > + ret = git_deflate_end_gently(stream); > + if (ret != Z_OK) > + die(_(die_msg2_fmt), ret, expected_oid); stream_loose_object(), added in patch 5, passes NULL as expected_oid, but these die() messages need a valid value. end_loose_object_common() has more parameters than lines of code in its body. Inline it to allow fully custom messages? > + the_hash_algo->final_oid_fn(parano_oid, c); > + > + /* > + * We already did a write_buffer() to the "fd", let's fsync() > + * and close(). > + * > + * We might still die() on a subsequent sanity check, but > + * let's not add to that confusion by not flushing any > + * outstanding writes to disk first. > + */ > + close_loose_object(fd); > +} > + > static int write_loose_object(const struct object_id *oid, char *hdr, > int hdrlen, const void *buf, unsigned long len, > time_t mtime, unsigned flags) > @@ -1957,28 +2038,11 @@ static int write_loose_object(const struct object_id *oid, char *hdr, > > loose_object_path(the_repository, &filename, oid); > > - fd = create_tmpfile(&tmp_file, filename.buf); > - if (fd < 0) { > - if (flags & HASH_SILENT) > - return -1; > - else if (errno == EACCES) > - return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory()); > - else > - return error_errno(_("unable to create temporary file")); > - } > - > - /* Set it up */ > - git_deflate_init(&stream, zlib_compression_level); > - stream.next_out = compressed; > - stream.avail_out = sizeof(compressed); > - the_hash_algo->init_fn(&c); > - > - /* First header.. */ > - stream.next_in = (unsigned char *)hdr; > - stream.avail_in = hdrlen; > - while (git_deflate(&stream, 0) == Z_OK) > - ; /* nothing */ > - the_hash_algo->update_fn(&c, hdr, hdrlen); > + fd = start_loose_object_common(&tmp_file, filename.buf, flags, > + &stream, compressed, sizeof(compressed), > + &c, hdr, hdrlen); > + if (fd < 0) > + return -1; > > /* Then the data itself.. */ > stream.next_in = (void *)buf; > @@ -1993,24 +2057,9 @@ static int write_loose_object(const struct object_id *oid, char *hdr, > stream.avail_out = sizeof(compressed); > } while (ret == Z_OK); > > - if (ret != Z_STREAM_END) > - die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), > - ret); > - ret = git_deflate_end_gently(&stream); > - if (ret != Z_OK) > - die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), > - ret); > - the_hash_algo->final_oid_fn(¶no_oid, &c); > - > - /* > - * We already did a write_buffer() to the "fd", let's fsync() > - * and close(). > - * > - * We might still die() on a subsequent sanity check, but > - * let's not add to that confusion by not flushing any > - * outstanding writes to disk first. > - */ > - close_loose_object(fd); > + end_loose_object_common(fd, ret, &c, &stream, ¶no_oid, oid, > + N_("unable to deflate new object %s (%d)"), > + N_("deflateEnd on object %s failed (%d)")); > > if (!oideq(oid, ¶no_oid)) > die(_("confused by unstable object source data for %s"),