From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx> Changes since v5: * Refactor write_loose_object() to reuse in stream version sugguest by Ævar Arnfjörð Bjarmason [1]. * Add a new testcase into t5590-unpack-non-delta-objects to cover the case of unpacking existing objects. * Fix code formatting in unpack-objects.c sugguest by Ævar Arnfjörð Bjarmason [2]. 1. https://lore.kernel.org/git/211213.86bl1l9bfz.gmgdl@xxxxxxxxxxxxxxxxxxx/ 2. https://lore.kernel.org/git/211213.867dc8ansq.gmgdl@xxxxxxxxxxxxxxxxxxx/ Han Xin (6): object-file.c: release strbuf in write_loose_object() object-file.c: refactor object header generation into a function object-file.c: refactor write_loose_object() to reuse in stream version object-file.c: make "write_object_file_flags()" to support read in stream unpack-objects.c: add dry_run mode for get_data() unpack-objects: unpack_non_delta_entry() read data in a stream Documentation/config/core.txt | 11 ++ builtin/unpack-objects.c | 94 ++++++++++++- cache.h | 2 + config.c | 5 + environment.c | 1 + object-file.c | 207 +++++++++++++++++++++++----- object-store.h | 5 + t/t5590-unpack-non-delta-objects.sh | 87 ++++++++++++ 8 files changed, 370 insertions(+), 42 deletions(-) create mode 100755 t/t5590-unpack-non-delta-objects.sh Range-diff against v5: 1: f3595e68cc < -: ---------- object-file: refactor write_loose_object() to support read from stream 2: c25fdd1fe5 < -: ---------- object-file.c: handle undetermined oid in write_loose_object() 3: ed226f2f9f < -: ---------- object-file.c: read stream in a loop in write_loose_object() -: ---------- > 1: 59d35dac5f object-file.c: release strbuf in write_loose_object() -: ---------- > 2: 2174a6cbad object-file.c: refactor object header generation into a function -: ---------- > 3: 8a704ecc59 object-file.c: refactor write_loose_object() to reuse in stream version -: ---------- > 4: 96f05632a2 object-file.c: make "write_object_file_flags()" to support read in stream 4: 2f91e540f6 ! 5: 1acbb6e849 unpack-objects.c: add dry_run mode for get_data() @@ builtin/unpack-objects.c: static void use(int bytes) { git_zstream stream; - void *buf = xmallocz(size); -+ unsigned long bufsize = dry_run ? 8192 : size; -+ void *buf = xmallocz(bufsize); ++ unsigned long bufsize; ++ void *buf; memset(&stream, 0, sizeof(stream)); ++ if (dry_run && size > 8192) ++ bufsize = 8192; ++ else ++ bufsize = size; ++ buf = xmallocz(bufsize); stream.next_out = buf; - stream.avail_out = size; 5: 7698938eac < -: ---------- object-file.c: make "write_object_file_flags()" to support "HASH_STREAM" 6: 92d69cb84a ! 6: 476aaba527 unpack-objects: unpack_non_delta_entry() read data in a stream @@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type + int status; +}; + -+static const void *feed_input_zstream(struct input_stream *in_stream, unsigned long *readlen) ++static const void *feed_input_zstream(const struct input_stream *in_stream, ++ unsigned long *readlen) +{ + struct input_zstream_data *data = in_stream->data; + git_zstream *zstream = data->zstream; @@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type + .read = feed_input_zstream, + .data = &data, + }; -+ int ret; + + memset(&zstream, 0, sizeof(zstream)); + memset(&data, 0, sizeof(data)); + data.zstream = &zstream; + git_inflate_init(&zstream); + -+ if ((ret = write_object_file_flags(&in_stream, size, type_name(OBJ_BLOB) ,&obj_list[nr].oid, HASH_STREAM))) -+ die(_("failed to write object in stream %d"), ret); ++ if (write_object_file_flags(&in_stream, size, ++ type_name(OBJ_BLOB), ++ &obj_list[nr].oid, ++ HASH_STREAM)) ++ die(_("failed to write object in stream")); + + if (zstream.total_out != size || data.status != Z_STREAM_END) + die(_("inflate returned %d"), data.status); + git_inflate_end(&zstream); + -+ if (strict && !dry_run) { ++ if (strict) { + struct blob *blob = lookup_blob(the_repository, &obj_list[nr].oid); + if (blob) + blob->object.flags |= FLAG_WRITTEN; + else -+ die("invalid blob object from stream"); ++ die(_("invalid blob object from stream")); + } + obj_list[nr].obj = NULL; +} @@ t/t5590-unpack-non-delta-objects.sh (new) +prepare_dest () { + test_when_finished "rm -rf dest.git" && + git init --bare dest.git && -+ git -C dest.git config core.bigFileStreamingThreshold $1 ++ git -C dest.git config core.bigFileStreamingThreshold $1 && + git -C dest.git config core.bigFileThreshold $1 +} + @@ t/t5590-unpack-non-delta-objects.sh (new) + test_cmp expect actual +' + ++test_expect_success 'unpack big object in stream with existing oids' ' ++ prepare_dest 1m && ++ git -C dest.git index-pack --stdin <test-$PACK.pack && ++ ( ++ cd dest.git && ++ find objects/?? -type f | sort ++ ) >actual && ++ test_must_be_empty actual && ++ git -C dest.git unpack-objects <test-$PACK.pack && ++ git -C dest.git fsck && ++ ( ++ cd dest.git && ++ find objects/?? -type f | sort ++ ) >actual && ++ test_must_be_empty actual ++' ++ +test_expect_success 'unpack-objects dry-run' ' + prepare_dest 1m && + git -C dest.git unpack-objects -n <test-$PACK.pack && -- 2.34.1.52.gfcc2252aea.agit.6.5.6