[PATCH] bulk-checkin: honor pack.packsizelimit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The bulk-checkin interface is designed to throw multiple blobs into a
single output packfile during the lifetime of a single process by
"plugging" the output. The direct streaming of the data to a packfile
however is primarily meant as a way to deal with large blobs better, and
it is possible that we end up with a single humongous packfile that is
awkward to handle.

Pay attention to the pack.packsizelimit configuration the same way as
the pack-object does, and make sure we close a packfile and switch to a
new one before busting the size limit.

We allow the limit to be busted if a single object is too large to be
contained in a pack that is smaller than the limit on its own, as there is
no way to store such an object otherwise; the same is already done in
pack-objects.

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 builtin/pack-objects.c |    6 +-----
 bulk-checkin.c         |   35 ++++++++++++++++++++++++++++++-----
 cache.h                |    1 +
 config.c               |    4 ++++
 environment.c          |    1 +
 5 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index b458b6d..dde913e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts;
 static const char *base_name;
 static int progress = 1;
 static int window = 10;
-static unsigned long pack_size_limit, pack_size_limit_cfg;
+static unsigned long pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
@@ -2009,10 +2009,6 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			    pack_idx_opts.version);
 		return 0;
 	}
-	if (!strcmp(k, "pack.packsizelimit")) {
-		pack_size_limit_cfg = git_config_ulong(k, v);
-		return 0;
-	}
 	return git_default_config(k, v, cb);
 }
 
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 60178ef..2adc67b 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -73,10 +73,13 @@ static int already_written(struct bulk_checkin_state *state, unsigned char sha1[
 	return 0;
 }
 
-static void deflate_to_pack(struct bulk_checkin_state *state,
-			    unsigned char sha1[],
-			    int fd, size_t size, enum object_type type,
-			    const char *path, unsigned flags)
+#define DEFLATE_TO_PACK_OK 0
+#define DEFLATE_TO_PACK_TOOBIG 1
+
+static int deflate_to_pack(struct bulk_checkin_state *state,
+			   unsigned char sha1[],
+			   int fd, size_t size, enum object_type type,
+			   const char *path, unsigned flags)
 {
 	unsigned char obuf[16384];
 	unsigned hdrlen;
@@ -149,6 +152,13 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 			sha1file_truncate(state->f, &checkpoint);
 			state->offset = checkpoint.offset;
 			free(idx);
+		} else if (state->nr_written &&
+			   pack_size_limit_cfg &&
+			   pack_size_limit_cfg < state->offset) {
+			sha1file_truncate(state->f, &checkpoint);
+			state->offset = checkpoint.offset;
+			free(idx);
+			return DEFLATE_TO_PACK_TOOBIG;
 		} else {
 			hashcpy(idx->sha1, sha1);
 			ALLOC_GROW(state->written,
@@ -156,12 +166,17 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 			state->written[state->nr_written++] = idx;
 		}
 	}
+	return DEFLATE_TO_PACK_OK;
 }
 
 int index_bulk_checkin(unsigned char *sha1,
 		       int fd, size_t size, enum object_type type,
 		       const char *path, unsigned flags)
 {
+	off_t seekback;
+	int status;
+
+again:
 	if (!state.f && (flags & HASH_WRITE_OBJECT)) {
 		state.f = create_tmp_packfile(&state.pack_tmp_name);
 		reset_pack_idx_option(&state.pack_idx_opts);
@@ -171,7 +186,17 @@ int index_bulk_checkin(unsigned char *sha1,
 			die_errno("unable to write pack header");
 	}
 
-	deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	seekback = lseek(fd, 0, SEEK_CUR);
+	if (seekback == (off_t) -1)
+		return error("cannot seek");
+	status = deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	if (status == DEFLATE_TO_PACK_TOOBIG) {
+		finish_bulk_checkin(&state);
+		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
+			return error("cannot seek back");
+		goto again;
+	}
+
 	if (!state.plugged)
 		finish_bulk_checkin(&state);
 	return 0;
diff --git a/cache.h b/cache.h
index 2e6ad36..b158d3e 100644
--- a/cache.h
+++ b/cache.h
@@ -598,6 +598,7 @@ extern size_t packed_git_window_size;
 extern size_t packed_git_limit;
 extern size_t delta_base_cache_limit;
 extern unsigned long big_file_threshold;
+extern unsigned long pack_size_limit_cfg;
 extern int read_replace_refs;
 extern int fsync_object_files;
 extern int core_preload_index;
diff --git a/config.c b/config.c
index edf9914..c736802 100644
--- a/config.c
+++ b/config.c
@@ -797,6 +797,10 @@ int git_default_config(const char *var, const char *value, void *dummy)
 		return 0;
 	}
 
+	if (!strcmp(var, "pack.packsizelimit")) {
+		pack_size_limit_cfg = git_config_ulong(var, value);
+		return 0;
+	}
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/environment.c b/environment.c
index 0bee6a7..31e4284 100644
--- a/environment.c
+++ b/environment.c
@@ -60,6 +60,7 @@ char *notes_ref_name;
 int grafts_replace_parents = 1;
 int core_apply_sparse_checkout;
 struct startup_info *startup_info;
+unsigned long pack_size_limit_cfg;
 
 /* Parallel index stat data preload? */
 int core_preload_index = 0;
-- 
1.7.8.rc3.208.g1edbd

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]