The bulk-checkin interface is designed to throw multiple blobs into a single output packfile during the lifetime of a single process by "plugging" the output. The direct streaming of the data to a packfile however is primarily meant as a way to deal with large blobs better, and it is possible that we end up with a single humongous packfile that is awkward to handle. Pay attention to the pack.packsizelimit configuration the same way as the pack-object does, and make sure we close a packfile and switch to a new one before busting the size limit. We allow the limit to be busted if a single object is too large to be contained in a pack that is smaller than the limit on its own, as there is no way to store such an object otherwise; the same is already done in pack-objects. Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx> --- builtin/pack-objects.c | 6 +----- bulk-checkin.c | 35 ++++++++++++++++++++++++++++++----- cache.h | 1 + config.c | 4 ++++ environment.c | 1 + 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b458b6d..dde913e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts; static const char *base_name; static int progress = 1; static int window = 10; -static unsigned long pack_size_limit, pack_size_limit_cfg; +static unsigned long pack_size_limit; static int depth = 50; static int delta_search_threads; static int pack_to_stdout; @@ -2009,10 +2009,6 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_idx_opts.version); return 0; } - if (!strcmp(k, "pack.packsizelimit")) { - pack_size_limit_cfg = git_config_ulong(k, v); - return 0; - } return git_default_config(k, v, cb); } diff --git a/bulk-checkin.c b/bulk-checkin.c index 60178ef..2adc67b 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -73,10 +73,13 @@ static int already_written(struct bulk_checkin_state *state, unsigned char sha1[ return 0; } -static void deflate_to_pack(struct bulk_checkin_state *state, - unsigned char sha1[], - int fd, size_t size, enum object_type type, - const char *path, unsigned flags) +#define DEFLATE_TO_PACK_OK 0 +#define DEFLATE_TO_PACK_TOOBIG 1 + +static int deflate_to_pack(struct bulk_checkin_state *state, + unsigned char sha1[], + int fd, size_t size, enum object_type type, + const char *path, unsigned flags) { unsigned char obuf[16384]; unsigned hdrlen; @@ -149,6 +152,13 @@ static void deflate_to_pack(struct bulk_checkin_state *state, sha1file_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); + } else if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset) { + sha1file_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + return DEFLATE_TO_PACK_TOOBIG; } else { hashcpy(idx->sha1, sha1); ALLOC_GROW(state->written, @@ -156,12 +166,17 @@ static void deflate_to_pack(struct bulk_checkin_state *state, state->written[state->nr_written++] = idx; } } + return DEFLATE_TO_PACK_OK; } int index_bulk_checkin(unsigned char *sha1, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { + off_t seekback; + int status; + +again: if (!state.f && (flags & HASH_WRITE_OBJECT)) { state.f = create_tmp_packfile(&state.pack_tmp_name); reset_pack_idx_option(&state.pack_idx_opts); @@ -171,7 +186,17 @@ int index_bulk_checkin(unsigned char *sha1, die_errno("unable to write pack header"); } - deflate_to_pack(&state, sha1, fd, size, type, path, flags); + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t) -1) + return error("cannot seek"); + status = deflate_to_pack(&state, sha1, fd, size, type, path, flags); + if (status == DEFLATE_TO_PACK_TOOBIG) { + finish_bulk_checkin(&state); + if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) + return error("cannot seek back"); + goto again; + } + if (!state.plugged) finish_bulk_checkin(&state); return 0; diff --git a/cache.h b/cache.h index 2e6ad36..b158d3e 100644 --- a/cache.h +++ b/cache.h @@ -598,6 +598,7 @@ extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; extern unsigned long big_file_threshold; +extern unsigned long pack_size_limit_cfg; extern int read_replace_refs; extern int fsync_object_files; extern int core_preload_index; diff --git a/config.c b/config.c index edf9914..c736802 100644 --- a/config.c +++ b/config.c @@ -797,6 +797,10 @@ int git_default_config(const char *var, const char *value, void *dummy) return 0; } + if (!strcmp(var, "pack.packsizelimit")) { + pack_size_limit_cfg = git_config_ulong(var, value); + return 0; + } /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/environment.c b/environment.c index 0bee6a7..31e4284 100644 --- a/environment.c +++ b/environment.c @@ -60,6 +60,7 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; struct startup_info *startup_info; +unsigned long pack_size_limit_cfg; /* Parallel index stat data preload? */ int core_preload_index = 0; -- 1.7.8.rc3.208.g1edbd -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html