From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> When writing a large blob using "write_loose_object()", we have to pass a buffer with the whole content of the blob, and this behavior will consume lots of memory and may cause OOM. We will introduce a stream version function ("stream_loose_object()") in latter commit to resolve this issue. Before introducing a stream vesion function for writing loose object, do some refactoring on "write_loose_object()" to reuse code for both versions. Rewrite "write_loose_object()" as follows: 1. Figure out a path for the (temp) object file. This step is only used in "write_loose_object()". 2. Move common steps for starting to write loose objects into a new function "start_loose_object_common()". 3. Compress data. 4. Move common steps for ending zlib stream into a new funciton "end_loose_object_common()". 5. Close fd and finalize the object file. Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> Helped-by: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx> Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> --- object-file.c | 149 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 44 deletions(-) diff --git a/object-file.c b/object-file.c index eb1426f98c..5d163081b1 100644 --- a/object-file.c +++ b/object-file.c @@ -1743,6 +1743,25 @@ static void write_object_file_prepare(const struct git_hash_algo *algo, algo->final_oid_fn(oid, &c); } +/* + * Move the just written object with proper mtime into its final resting place. + */ +static int finalize_object_file_with_mtime(const char *tmpfile, + const char *filename, + time_t mtime, + unsigned flags) +{ + struct utimbuf utb; + + if (mtime) { + utb.actime = mtime; + utb.modtime = mtime; + if (utime(tmpfile, &utb) < 0 && !(flags & HASH_SILENT)) + warning_errno(_("failed utime() on %s"), tmpfile); + } + return finalize_object_file(tmpfile, filename); +} + /* * Move the just written object into its final resting place. */ @@ -1828,7 +1847,8 @@ static inline int directory_size(const char *filename) * We want to avoid cross-directory filename renames, because those * can have problems on various filesystems (FAT, NFS, Coda). */ -static int create_tmpfile(struct strbuf *tmp, const char *filename) +static int create_tmpfile(struct strbuf *tmp, const char *filename, + unsigned flags) { int fd, dirlen = directory_size(filename); @@ -1836,7 +1856,9 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) strbuf_add(tmp, filename, dirlen); strbuf_addstr(tmp, "tmp_obj_XXXXXX"); fd = git_mkstemp_mode(tmp->buf, 0444); - if (fd < 0 && dirlen && errno == ENOENT) { + do { + if (fd >= 0 || !dirlen || errno != ENOENT) + break; /* * Make sure the directory exists; note that the contents * of the buffer are undefined after mkstemp returns an @@ -1846,17 +1868,72 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) strbuf_reset(tmp); strbuf_add(tmp, filename, dirlen - 1); if (mkdir(tmp->buf, 0777) && errno != EEXIST) - return -1; + break; if (adjust_shared_perm(tmp->buf)) - return -1; + break; /* Try again */ strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); fd = git_mkstemp_mode(tmp->buf, 0444); + } while (0); + + if (fd < 0 && !(flags & HASH_SILENT)) { + if (errno == EACCES) + return error(_("insufficient permission for adding an " + "object to repository database %s"), + get_object_directory()); + else + return error_errno(_("unable to create temporary file")); } + return fd; } +static int start_loose_object_common(struct strbuf *tmp_file, + const char *filename, unsigned flags, + git_zstream *stream, + unsigned char *buf, size_t buflen, + git_hash_ctx *c, + enum object_type type, size_t len, + char *hdr, int hdrlen) +{ + int fd; + + fd = create_tmpfile(tmp_file, filename, flags); + if (fd < 0) + return -1; + + /* Setup zlib stream for compression */ + git_deflate_init(stream, zlib_compression_level); + stream->next_out = buf; + stream->avail_out = buflen; + the_hash_algo->init_fn(c); + + /* Start to feed header to zlib stream */ + stream->next_in = (unsigned char *)hdr; + stream->avail_in = hdrlen; + while (git_deflate(stream, 0) == Z_OK) + ; /* nothing */ + the_hash_algo->update_fn(c, hdr, hdrlen); + + return fd; +} + +static void end_loose_object_common(int ret, git_hash_ctx *c, + git_zstream *stream, + struct object_id *parano_oid, + const struct object_id *expected_oid, + const char *die_msg1_fmt, + const char *die_msg2_fmt) +{ + if (ret != Z_STREAM_END) + die(_(die_msg1_fmt), ret, expected_oid); + ret = git_deflate_end_gently(stream); + if (ret != Z_OK) + die(_(die_msg2_fmt), ret, expected_oid); + the_hash_algo->final_oid_fn(parano_oid, c); +} + static int write_loose_object(const struct object_id *oid, char *hdr, int hdrlen, const void *buf, unsigned long len, time_t mtime, unsigned flags) @@ -1871,28 +1948,18 @@ static int write_loose_object(const struct object_id *oid, char *hdr, loose_object_path(the_repository, &filename, oid); - fd = create_tmpfile(&tmp_file, filename.buf); - if (fd < 0) { - if (flags & HASH_SILENT) - return -1; - else if (errno == EACCES) - return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory()); - else - return error_errno(_("unable to create temporary file")); - } - - /* Set it up */ - git_deflate_init(&stream, zlib_compression_level); - stream.next_out = compressed; - stream.avail_out = sizeof(compressed); - the_hash_algo->init_fn(&c); - - /* First header.. */ - stream.next_in = (unsigned char *)hdr; - stream.avail_in = hdrlen; - while (git_deflate(&stream, 0) == Z_OK) - ; /* nothing */ - the_hash_algo->update_fn(&c, hdr, hdrlen); + /* Common steps for write_loose_object and stream_loose_object to + * start writing loose oject: + * + * - Create tmpfile for the loose object. + * - Setup zlib stream for compression. + * - Start to feed header to zlib stream. + */ + fd = start_loose_object_common(&tmp_file, filename.buf, flags, + &stream, compressed, sizeof(compressed), + &c, OBJ_NONE, 0, hdr, hdrlen); + if (fd < 0) + return -1; /* Then the data itself.. */ stream.next_in = (void *)buf; @@ -1907,30 +1974,24 @@ static int write_loose_object(const struct object_id *oid, char *hdr, stream.avail_out = sizeof(compressed); } while (ret == Z_OK); - if (ret != Z_STREAM_END) - die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), - ret); - ret = git_deflate_end_gently(&stream); - if (ret != Z_OK) - die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), - ret); - the_hash_algo->final_oid_fn(¶no_oid, &c); + /* Common steps for write_loose_object and stream_loose_object to + * end writing loose oject: + * + * - End the compression of zlib stream. + * - Get the calculated oid to "parano_oid". + */ + end_loose_object_common(ret, &c, &stream, ¶no_oid, oid, + N_("unable to deflate new object %s (%d)"), + N_("deflateEnd on object %s failed (%d)")); + if (!oideq(oid, ¶no_oid)) die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); close_loose_object(fd); - if (mtime) { - struct utimbuf utb; - utb.actime = mtime; - utb.modtime = mtime; - if (utime(tmp_file.buf, &utb) < 0 && - !(flags & HASH_SILENT)) - warning_errno(_("failed utime() on %s"), tmp_file.buf); - } - - return finalize_object_file(tmp_file.buf, filename.buf); + return finalize_object_file_with_mtime(tmp_file.buf, filename.buf, + mtime, flags); } static int freshen_loose_object(const struct object_id *oid) -- 2.34.1.52.gc288e771b4.agit.6.5.6