With many incremental imports, small packs become highly inefficient due to the need to readdir scan and load many indices to locate even a single object. Frequent repacking and consolidation may be prohibitively expensive in terms of disk I/O, especially in large repositories where the initial packs were aggressively optimized and marked with .keep files. In those cases, users may be better served with loose objects and relying on "git gc --auto". Signed-off-by: Eric Wong <normalperson@xxxxxxxx> --- There should be a matching config file directive, but I'm not sure how/if it should affect other commands. So I'm not sure if it should be "pack.packSizeMin" or "fastimport.packSizeMin" or something else. To further reduce disk I/O, the fsync_or_die call in fixup_pack_header_footer could probably be moved out of that function and become the fphf caller's responsibility. Documentation/git-fast-import.txt | 9 ++++++++ fast-import.c | 30 ++++++++++++++++++++++++++ t/t9302-fast-import-min-packsize.sh | 42 +++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100755 t/t9302-fast-import-min-packsize.sh diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 66910aa..8c0ac94 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -136,6 +136,15 @@ Performance and Compression Tuning Maximum size of each output packfile. The default is unlimited. +--min-pack-size=<n>:: + Mininum size of an output packfile, packfiles smaller + than this threshold are unpacked into loose objects and + the pack is discarded. This is useful when performing + small, incremental imports as loose objects and relying + on `git gc --auto` may be more efficient than generating + many tiny packs. + The default is to always preserve the pack and never + generate loose objects. Performance ----------- diff --git a/fast-import.c b/fast-import.c index 9fc7093..a00bee5 100644 --- a/fast-import.c +++ b/fast-import.c @@ -166,6 +166,7 @@ Format of STDIN stream: #include "quote.h" #include "exec_cmd.h" #include "dir.h" +#include "run-command.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) @@ -282,6 +283,7 @@ struct recent_command { /* Configured limits on output */ static unsigned long max_depth = 10; static off_t max_packsize; +static off_t min_packsize; static int force_update; static int pack_compression_level = Z_DEFAULT_COMPRESSION; static int pack_compression_seen; @@ -950,6 +952,22 @@ static void unkeep_all_packs(void) } } +static int loosen_small_pack(const struct packed_git *p) +{ + struct child_process unpack = CHILD_PROCESS_INIT; + + if (lseek(p->pack_fd, 0, SEEK_SET) < 0) + die_errno("Failed seeking to start of '%s'", p->pack_name); + + unpack.in = p->pack_fd; + unpack.git_cmd = 1; + unpack.stdout_to_stderr = 1; + argv_array_push(&unpack.args, "unpack-objects"); + argv_array_push(&unpack.args, "-q"); + + return run_command(&unpack); +} + static void end_packfile(void) { static int running; @@ -972,6 +990,12 @@ static void end_packfile(void) fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, pack_data->pack_name, object_count, cur_pack_sha1, pack_size); + + if (pack_size < min_packsize) { + if (loosen_small_pack(pack_data) == 0) + goto discard_pack; + } + close(pack_data->pack_fd); idx_name = keep_pack(create_index()); @@ -1002,6 +1026,7 @@ static void end_packfile(void) pack_id++; } else { +discard_pack: close(pack_data->pack_fd); unlink_or_warn(pack_data->pack_name); } @@ -3237,6 +3262,11 @@ static int parse_one_option(const char *option) v = 1024 * 1024; } max_packsize = v; + } else if (skip_prefix(option, "min-pack-size=", &option)) { + unsigned long v; + if (!git_parse_ulong(option, &v)) + return 0; + min_packsize = v; } else if (skip_prefix(option, "big-file-threshold=", &option)) { unsigned long v; if (!git_parse_ulong(option, &v)) diff --git a/t/t9302-fast-import-min-packsize.sh b/t/t9302-fast-import-min-packsize.sh new file mode 100755 index 0000000..7dcdccc --- /dev/null +++ b/t/t9302-fast-import-min-packsize.sh @@ -0,0 +1,42 @@ +#!/bin/sh +test_description='test git fast-import min-packsize' +. ./test-lib.sh + +test_expect_success 'create loose objects on import' ' + test_tick && + cat >input <<-INPUT_END && + commit refs/heads/master + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + initial + COMMIT + + done + INPUT_END + + git fast-import --done --min-pack-size=1g <input && + git fsck --no-progress && + test $(find .git/objects/?? -type f | wc -l) -eq 2 && + test $(find .git/objects/pack -type f | wc -l) -eq 0 +' + +test_expect_success 'bigger packs are preserved' ' + test_tick && + cat >input <<-INPUT_END && + commit refs/heads/master + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + incremental should create a pack + COMMIT + from refs/heads/master^0 + + done + INPUT_END + + git fast-import --done --min-pack-size=10 <input && + git fsck --no-progress && + test $(find .git/objects/?? -type f | wc -l) -eq 2 && + test $(find .git/objects/pack -type f | wc -l) -eq 2 +' + +test_done -- EW -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html