From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> Changes since v8: * Rename "assert_no_loose ()" into "test_no_loose ()" in "t5329-unpack-large-objects.sh". Remove "assert_no_pack ()" and use "test_dir_is_empty" instead. * Revert changes to "create_tmpfile()" and error handling is now in "start_loose_object_common()". * Remove "finalize_object_file_with_mtime()" which seems to be an overkill for "write_loose_object()" now. * Remove the commit "object-file.c: remove the slash for directory_size()", it can be in a separate patch if necessary. Han Xin (4): unpack-objects: low memory footprint for get_data() in dry_run mode object-file.c: refactor write_loose_object() to several steps object-file.c: add "stream_loose_object()" to handle large object unpack-objects: unpack_non_delta_entry() read data in a stream Ævar Arnfjörð Bjarmason (1): object-file API: add a format_object_header() function builtin/index-pack.c | 3 +- builtin/unpack-objects.c | 110 ++++++++++++++-- bulk-checkin.c | 4 +- cache.h | 21 +++ http-push.c | 2 +- object-file.c | 220 +++++++++++++++++++++++++++----- object-store.h | 9 ++ t/t5328-unpack-large-objects.sh | 65 ++++++++++ 8 files changed, 384 insertions(+), 50 deletions(-) create mode 100755 t/t5328-unpack-large-objects.sh Range-diff against v8: 1: bd34da5816 ! 1: 6a6c11ba93 unpack-objects: low memory footprint for get_data() in dry_run mode @@ builtin/unpack-objects.c: static void unpack_delta_entry(enum object_type type, hi = nr; while (lo < hi) { - ## t/t5329-unpack-large-objects.sh (new) ## + ## t/t5328-unpack-large-objects.sh (new) ## @@ +#!/bin/sh +# -+# Copyright (c) 2021 Han Xin ++# Copyright (c) 2022 Han Xin +# + +test_description='git unpack-objects with large objects' @@ t/t5329-unpack-large-objects.sh (new) + git init --bare dest.git +} + -+assert_no_loose () { ++test_no_loose () { + glob=dest.git/objects/?? && + echo "$glob" >expect && + eval "echo $glob" >actual && + test_cmp expect actual +} + -+assert_no_pack () { -+ rmdir dest.git/objects/pack -+} -+ +test_expect_success "create large objects (1.5 MB) and PACK" ' + test-tool genrandom foo 1500000 >big-blob && + test_commit --append foo big-blob && @@ t/t5329-unpack-large-objects.sh (new) +test_expect_success 'unpack-objects works with memory limitation in dry-run mode' ' + prepare_dest && + git -C dest.git unpack-objects -n <test-$PACK.pack && -+ assert_no_loose && -+ assert_no_pack ++ test_no_loose && ++ test_dir_is_empty dest.git/objects/pack +' + +test_done 2: f9a4365a7d ! 2: bab9e0402f object-file.c: refactor write_loose_object() to several steps @@ Commit message Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> ## object-file.c ## -@@ object-file.c: static void write_object_file_prepare(const struct git_hash_algo *algo, - algo->final_oid_fn(oid, &c); - } - -+/* -+ * Move the just written object with proper mtime into its final resting place. -+ */ -+static int finalize_object_file_with_mtime(const char *tmpfile, -+ const char *filename, -+ time_t mtime, -+ unsigned flags) -+{ -+ struct utimbuf utb; -+ -+ if (mtime) { -+ utb.actime = mtime; -+ utb.modtime = mtime; -+ if (utime(tmpfile, &utb) < 0 && !(flags & HASH_SILENT)) -+ warning_errno(_("failed utime() on %s"), tmpfile); -+ } -+ return finalize_object_file(tmpfile, filename); -+} -+ - /* - * Move the just written object into its final resting place. - */ -@@ object-file.c: static inline int directory_size(const char *filename) - * We want to avoid cross-directory filename renames, because those - * can have problems on various filesystems (FAT, NFS, Coda). - */ --static int create_tmpfile(struct strbuf *tmp, const char *filename) -+static int create_tmpfile(struct strbuf *tmp, const char *filename, -+ unsigned flags) - { - int fd, dirlen = directory_size(filename); - -@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename) - strbuf_add(tmp, filename, dirlen); - strbuf_addstr(tmp, "tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(tmp->buf, 0444); -- if (fd < 0 && dirlen && errno == ENOENT) { -+ do { -+ if (fd >= 0 || !dirlen || errno != ENOENT) -+ break; - /* - * Make sure the directory exists; note that the contents - * of the buffer are undefined after mkstemp returns an @@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename) - strbuf_reset(tmp); - strbuf_add(tmp, filename, dirlen - 1); - if (mkdir(tmp->buf, 0777) && errno != EEXIST) -- return -1; -+ break; - if (adjust_shared_perm(tmp->buf)) -- return -1; -+ break; - - /* Try again */ - strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(tmp->buf, 0444); -+ } while (0); -+ -+ if (fd < 0 && !(flags & HASH_SILENT)) { -+ if (errno == EACCES) -+ return error(_("insufficient permission for adding an " -+ "object to repository database %s"), -+ get_object_directory()); -+ else -+ return error_errno(_("unable to create temporary file")); - } -+ return fd; } @@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filenam + git_zstream *stream, + unsigned char *buf, size_t buflen, + git_hash_ctx *c, -+ enum object_type type, size_t len, + char *hdr, int hdrlen) +{ + int fd; + -+ fd = create_tmpfile(tmp_file, filename, flags); -+ if (fd < 0) -+ return -1; ++ fd = create_tmpfile(tmp_file, filename); ++ if (fd < 0) { ++ if (flags & HASH_SILENT) ++ return -1; ++ else if (errno == EACCES) ++ return error(_("insufficient permission for adding " ++ "an object to repository database %s"), ++ get_object_directory()); ++ else ++ return error_errno( ++ _("unable to create temporary file")); ++ } + + /* Setup zlib stream for compression */ + git_deflate_init(stream, zlib_compression_level); @@ object-file.c: static int write_loose_object(const struct object_id *oid, char * + */ + fd = start_loose_object_common(&tmp_file, filename.buf, flags, + &stream, compressed, sizeof(compressed), -+ &c, OBJ_NONE, 0, hdr, hdrlen); ++ &c, hdr, hdrlen); + if (fd < 0) + return -1; @@ object-file.c: static int write_loose_object(const struct object_id *oid, char * if (!oideq(oid, ¶no_oid)) die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); - - close_loose_object(fd); - -- if (mtime) { -- struct utimbuf utb; -- utb.actime = mtime; -- utb.modtime = mtime; -- if (utime(tmp_file.buf, &utb) < 0 && -- !(flags & HASH_SILENT)) -- warning_errno(_("failed utime() on %s"), tmp_file.buf); -- } -- -- return finalize_object_file(tmp_file.buf, filename.buf); -+ return finalize_object_file_with_mtime(tmp_file.buf, filename.buf, -+ mtime, flags); - } - - static int freshen_loose_object(const struct object_id *oid) 3: 18dd21122d < -: ---------- object-file.c: remove the slash for directory_size() 4: 964715451b ! 3: dd13614985 object-file.c: add "stream_loose_object()" to handle large object @@ object-file.c: static int freshen_packed_object(const struct object_id *oid) + */ + fd = start_loose_object_common(&tmp_file, filename.buf, 0, + &stream, compressed, sizeof(compressed), -+ &c, OBJ_BLOB, len, hdr, hdrlen); ++ &c, hdr, hdrlen); + if (fd < 0) { + err = -1; + goto cleanup; 5: 3f620466fe ! 4: cd84e27b08 unpack-objects: unpack_non_delta_entry() read data in a stream @@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type write_object(nr, type, buf, size); } - ## t/t5329-unpack-large-objects.sh ## -@@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large objects' + ## t/t5328-unpack-large-objects.sh ## +@@ t/t5328-unpack-large-objects.sh: test_description='git unpack-objects with large objects' prepare_dest () { test_when_finished "rm -rf dest.git" && @@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large + fi } - assert_no_loose () { -@@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' ' + test_no_loose () { +@@ t/t5328-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' ' ' test_expect_success 'unpack-objects failed under memory limitation' ' @@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1 - prepare_dest && + prepare_dest 2m && git -C dest.git unpack-objects -n <test-$PACK.pack && - assert_no_loose && - assert_no_pack + test_no_loose && + test_dir_is_empty dest.git/objects/pack ' +test_expect_success 'unpack big object in stream' ' + prepare_dest 1m && + git -C dest.git unpack-objects <test-$PACK.pack && -+ assert_no_pack ++ test_dir_is_empty dest.git/objects/pack +' + +test_expect_success 'do not unpack existing large objects' ' + prepare_dest 1m && + git -C dest.git index-pack --stdin <test-$PACK.pack && + git -C dest.git unpack-objects <test-$PACK.pack && -+ assert_no_loose ++ test_no_loose +' + test_done 6: 8073a3888d = 5: 59f0ad95c7 object-file API: add a format_object_header() function -- 2.34.1.52.gc288e771b4.agit.6.5.6