[PATCH v9 0/5] unpack large blobs in stream

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>

Changes since v8:
* Rename "assert_no_loose ()" into "test_no_loose ()" in
  "t5329-unpack-large-objects.sh". Remove "assert_no_pack ()" and use
  "test_dir_is_empty" instead.

* Revert changes to "create_tmpfile()" and error handling is now in
  "start_loose_object_common()".

* Remove "finalize_object_file_with_mtime()" which seems to be an overkill
  for "write_loose_object()" now. 

* Remove the commit "object-file.c: remove the slash for directory_size()",
  it can be in a separate patch if necessary.

Han Xin (4):
  unpack-objects: low memory footprint for get_data() in dry_run mode
  object-file.c: refactor write_loose_object() to several steps
  object-file.c: add "stream_loose_object()" to handle large object
  unpack-objects: unpack_non_delta_entry() read data in a stream

Ævar Arnfjörð Bjarmason (1):
  object-file API: add a format_object_header() function

 builtin/index-pack.c            |   3 +-
 builtin/unpack-objects.c        | 110 ++++++++++++++--
 bulk-checkin.c                  |   4 +-
 cache.h                         |  21 +++
 http-push.c                     |   2 +-
 object-file.c                   | 220 +++++++++++++++++++++++++++-----
 object-store.h                  |   9 ++
 t/t5328-unpack-large-objects.sh |  65 ++++++++++
 8 files changed, 384 insertions(+), 50 deletions(-)
 create mode 100755 t/t5328-unpack-large-objects.sh

Range-diff against v8:
1:  bd34da5816 ! 1:  6a6c11ba93 unpack-objects: low memory footprint for get_data() in dry_run mode
    @@ builtin/unpack-objects.c: static void unpack_delta_entry(enum object_type type,
      		hi = nr;
      		while (lo < hi) {
     
    - ## t/t5329-unpack-large-objects.sh (new) ##
    + ## t/t5328-unpack-large-objects.sh (new) ##
     @@
     +#!/bin/sh
     +#
    -+# Copyright (c) 2021 Han Xin
    ++# Copyright (c) 2022 Han Xin
     +#
     +
     +test_description='git unpack-objects with large objects'
    @@ t/t5329-unpack-large-objects.sh (new)
     +	git init --bare dest.git
     +}
     +
    -+assert_no_loose () {
    ++test_no_loose () {
     +	glob=dest.git/objects/?? &&
     +	echo "$glob" >expect &&
     +	eval "echo $glob" >actual &&
     +	test_cmp expect actual
     +}
     +
    -+assert_no_pack () {
    -+	rmdir dest.git/objects/pack
    -+}
    -+
     +test_expect_success "create large objects (1.5 MB) and PACK" '
     +	test-tool genrandom foo 1500000 >big-blob &&
     +	test_commit --append foo big-blob &&
    @@ t/t5329-unpack-large-objects.sh (new)
     +test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
     +	prepare_dest &&
     +	git -C dest.git unpack-objects -n <test-$PACK.pack &&
    -+	assert_no_loose &&
    -+	assert_no_pack
    ++	test_no_loose &&
    ++	test_dir_is_empty dest.git/objects/pack
     +'
     +
     +test_done
2:  f9a4365a7d ! 2:  bab9e0402f object-file.c: refactor write_loose_object() to several steps
    @@ Commit message
         Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>
     
      ## object-file.c ##
    -@@ object-file.c: static void write_object_file_prepare(const struct git_hash_algo *algo,
    - 	algo->final_oid_fn(oid, &c);
    - }
    - 
    -+/*
    -+ * Move the just written object with proper mtime into its final resting place.
    -+ */
    -+static int finalize_object_file_with_mtime(const char *tmpfile,
    -+					   const char *filename,
    -+					   time_t mtime,
    -+					   unsigned flags)
    -+{
    -+	struct utimbuf utb;
    -+
    -+	if (mtime) {
    -+		utb.actime = mtime;
    -+		utb.modtime = mtime;
    -+		if (utime(tmpfile, &utb) < 0 && !(flags & HASH_SILENT))
    -+			warning_errno(_("failed utime() on %s"), tmpfile);
    -+	}
    -+	return finalize_object_file(tmpfile, filename);
    -+}
    -+
    - /*
    -  * Move the just written object into its final resting place.
    -  */
    -@@ object-file.c: static inline int directory_size(const char *filename)
    -  * We want to avoid cross-directory filename renames, because those
    -  * can have problems on various filesystems (FAT, NFS, Coda).
    -  */
    --static int create_tmpfile(struct strbuf *tmp, const char *filename)
    -+static int create_tmpfile(struct strbuf *tmp, const char *filename,
    -+			  unsigned flags)
    - {
    - 	int fd, dirlen = directory_size(filename);
    - 
    -@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename)
    - 	strbuf_add(tmp, filename, dirlen);
    - 	strbuf_addstr(tmp, "tmp_obj_XXXXXX");
    - 	fd = git_mkstemp_mode(tmp->buf, 0444);
    --	if (fd < 0 && dirlen && errno == ENOENT) {
    -+	do {
    -+		if (fd >= 0 || !dirlen || errno != ENOENT)
    -+			break;
    - 		/*
    - 		 * Make sure the directory exists; note that the contents
    - 		 * of the buffer are undefined after mkstemp returns an
     @@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename)
    - 		strbuf_reset(tmp);
    - 		strbuf_add(tmp, filename, dirlen - 1);
    - 		if (mkdir(tmp->buf, 0777) && errno != EEXIST)
    --			return -1;
    -+			break;
    - 		if (adjust_shared_perm(tmp->buf))
    --			return -1;
    -+			break;
    - 
    - 		/* Try again */
    - 		strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
    - 		fd = git_mkstemp_mode(tmp->buf, 0444);
    -+	} while (0);
    -+
    -+	if (fd < 0 && !(flags & HASH_SILENT)) {
    -+		if (errno == EACCES)
    -+			return error(_("insufficient permission for adding an "
    -+				       "object to repository database %s"),
    -+				     get_object_directory());
    -+		else
    -+			return error_errno(_("unable to create temporary file"));
    - 	}
    -+
      	return fd;
      }
      
    @@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filenam
     +				     git_zstream *stream,
     +				     unsigned char *buf, size_t buflen,
     +				     git_hash_ctx *c,
    -+				     enum object_type type, size_t len,
     +				     char *hdr, int hdrlen)
     +{
     +	int fd;
     +
    -+	fd = create_tmpfile(tmp_file, filename, flags);
    -+	if (fd < 0)
    -+		return -1;
    ++	fd = create_tmpfile(tmp_file, filename);
    ++	if (fd < 0) {
    ++		if (flags & HASH_SILENT)
    ++			return -1;
    ++		else if (errno == EACCES)
    ++			return error(_("insufficient permission for adding "
    ++				       "an object to repository database %s"),
    ++				     get_object_directory());
    ++		else
    ++			return error_errno(
    ++				_("unable to create temporary file"));
    ++	}
     +
     +	/*  Setup zlib stream for compression */
     +	git_deflate_init(stream, zlib_compression_level);
    @@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
     +	 */
     +	fd = start_loose_object_common(&tmp_file, filename.buf, flags,
     +				       &stream, compressed, sizeof(compressed),
    -+				       &c, OBJ_NONE, 0, hdr, hdrlen);
    ++				       &c, hdr, hdrlen);
     +	if (fd < 0)
     +		return -1;
      
    @@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
      	if (!oideq(oid, &parano_oid))
      		die(_("confused by unstable object source data for %s"),
      		    oid_to_hex(oid));
    - 
    - 	close_loose_object(fd);
    - 
    --	if (mtime) {
    --		struct utimbuf utb;
    --		utb.actime = mtime;
    --		utb.modtime = mtime;
    --		if (utime(tmp_file.buf, &utb) < 0 &&
    --		    !(flags & HASH_SILENT))
    --			warning_errno(_("failed utime() on %s"), tmp_file.buf);
    --	}
    --
    --	return finalize_object_file(tmp_file.buf, filename.buf);
    -+	return finalize_object_file_with_mtime(tmp_file.buf, filename.buf,
    -+					       mtime, flags);
    - }
    - 
    - static int freshen_loose_object(const struct object_id *oid)
3:  18dd21122d < -:  ---------- object-file.c: remove the slash for directory_size()
4:  964715451b ! 3:  dd13614985 object-file.c: add "stream_loose_object()" to handle large object
    @@ object-file.c: static int freshen_packed_object(const struct object_id *oid)
     +	 */
     +	fd = start_loose_object_common(&tmp_file, filename.buf, 0,
     +				       &stream, compressed, sizeof(compressed),
    -+				       &c, OBJ_BLOB, len, hdr, hdrlen);
    ++				       &c, hdr, hdrlen);
     +	if (fd < 0) {
     +		err = -1;
     +		goto cleanup;
5:  3f620466fe ! 4:  cd84e27b08 unpack-objects: unpack_non_delta_entry() read data in a stream
    @@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
      		write_object(nr, type, buf, size);
      }
     
    - ## t/t5329-unpack-large-objects.sh ##
    -@@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large objects'
    + ## t/t5328-unpack-large-objects.sh ##
    +@@ t/t5328-unpack-large-objects.sh: test_description='git unpack-objects with large objects'
      
      prepare_dest () {
      	test_when_finished "rm -rf dest.git" &&
    @@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large
     +	fi
      }
      
    - assert_no_loose () {
    -@@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' '
    + test_no_loose () {
    +@@ t/t5328-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' '
      '
      
      test_expect_success 'unpack-objects failed under memory limitation' '
    @@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1
     -	prepare_dest &&
     +	prepare_dest 2m &&
      	git -C dest.git unpack-objects -n <test-$PACK.pack &&
    - 	assert_no_loose &&
    - 	assert_no_pack
    + 	test_no_loose &&
    + 	test_dir_is_empty dest.git/objects/pack
      '
      
     +test_expect_success 'unpack big object in stream' '
     +	prepare_dest 1m &&
     +	git -C dest.git unpack-objects <test-$PACK.pack &&
    -+	assert_no_pack
    ++	test_dir_is_empty dest.git/objects/pack
     +'
     +
     +test_expect_success 'do not unpack existing large objects' '
     +	prepare_dest 1m &&
     +	git -C dest.git index-pack --stdin <test-$PACK.pack &&
     +	git -C dest.git unpack-objects <test-$PACK.pack &&
    -+	assert_no_loose
    ++	test_no_loose
     +'
     +
      test_done
6:  8073a3888d = 5:  59f0ad95c7 object-file API: add a format_object_header() function
-- 
2.34.1.52.gc288e771b4.agit.6.5.6




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux