[RFC PATCH] Automatically save packfiles created during git-push

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some workflows encourage users to create loose objects in their
local repository and then upload them to a central location by
way of git-push.  During the git-push operation the end-user is
expecting network latency to be the dominating factor and we are
also very likely to be packing mostly loose objects for transport
as the user is likely to be pushing their recent work, which is
typically stored only in loose objects.

By saving the packfile we are transferring over the network to a
local file we can remove the corresponding loose objects from the
objects directory and immediately benefit from the packing work
that was done to perform the network transport.  This is a form
of `git gc --auto` that happens automatically anytime the user
performs a push.

Signed-off-by: Shawn O. Pearce <spearce@xxxxxxxxxxx>
---

 This is *NOT* meant for application to any current version (but
 it was developed on 'next' 4d83fa8).  It has only loosely been
 tested and only manually, not through the test suite.  As a patch
 series its probably better broken up into a couple of changes
 (one for the csum-file teefd support, one for --save-pack, and
 one for enabling it in send-pack/push).

 It seems to do what I wanted, which is to let `git push --no-thin`
 save the packfile it is transporting so that we don't have to redo
 this work later in a `git gc --auto` invocation.

 Originally I started this patch out thinking that --no-thin was
 the default for git-push and that therefore this was almost a
 no-brainer change.  Then I found out it isn't the default, so now
 I'm not so sure about the value of this particular patch.

 My goal was to make `git gc --auto` cheaper in the long run for
 users who frequently use git-push as most of the work was already
 done for them during their current git-push operations.

 builtin-pack-objects.c |   50 +++++++++++++++++++++++++++++++++++++++--------
 csum-file.c            |   16 +++++++++++++-
 csum-file.h            |    2 +-
 send-pack.c            |    2 +
 4 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index b1c64be..37a42ad 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -14,6 +14,7 @@
 #include "revision.h"
 #include "list-objects.h"
 #include "progress.h"
+#include "exec_cmd.h"
 
 static const char pack_usage[] = "\
 git-pack-objects [{ -q | --progress | --all-progress }] \n\
@@ -21,7 +22,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
 	[--window=N] [--window-memory=N] [--depth=N] \n\
 	[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
 	[--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
-	[--stdout | base-name] [<ref-list | <object-list]";
+	[--stdout [--save-pack] | base-name] [<ref-list | <object-list]";
 
 struct object_entry {
 	struct pack_idx_entry idx;
@@ -68,7 +69,7 @@ static int progress = 1;
 static int window = 10;
 static uint32_t pack_size_limit;
 static int depth = 50;
-static int pack_to_stdout;
+static int pack_to_stdout, save_pack;
 static int num_preferred_base;
 static struct progress progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
@@ -385,7 +386,7 @@ static unsigned long write_object(struct sha1file *f,
 				 */
 				entry->delta->idx.offset ? 1 : 0;
 
-	if (!pack_to_stdout)
+	if (!pack_to_stdout || save_pack)
 		crc32_begin(f);
 
 	obj_type = entry->type;
@@ -509,7 +510,7 @@ static unsigned long write_object(struct sha1file *f,
 		offset = entry->in_pack_offset;
 		revidx = find_packed_object(p, offset);
 		datalen = revidx[1].offset - offset;
-		if (!pack_to_stdout && p->index_version > 1 &&
+		if ((!pack_to_stdout || save_pack) && p->index_version > 1 &&
 		    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
 			die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
 		offset += entry->in_pack_header_size;
@@ -537,7 +538,7 @@ static unsigned long write_object(struct sha1file *f,
 			sha1write(f, header, hdrlen);
 		}
 
-		if (!pack_to_stdout && p->index_version == 1 &&
+		if ((!pack_to_stdout || save_pack) && p->index_version == 1 &&
 		    check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
 			die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
 		copy_pack_data(f, p, &w_curs, offset, datalen);
@@ -547,7 +548,7 @@ static unsigned long write_object(struct sha1file *f,
 	if (usable_delta)
 		written_delta++;
 	written++;
-	if (!pack_to_stdout)
+	if (!pack_to_stdout || save_pack)
 		entry->idx.crc32 = crc32_end(f);
 	return hdrlen + datalen;
 }
@@ -608,7 +609,12 @@ static void write_pack_file(void)
 	do {
 		unsigned char sha1[20];
 
-		if (pack_to_stdout) {
+		if (save_pack) {
+			int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
+			pack_tmp_name = xstrdup(tmpname);
+			f = sha1fd(fd, pack_tmp_name);
+			f->teefd = 1; /* implies pack_to_stdout */
+		} else if (pack_to_stdout) {
 			f = sha1fd(1, "<stdout>");
 		} else {
 			int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
@@ -644,7 +650,7 @@ static void write_pack_file(void)
 			close(f->fd);
 		}
 
-		if (!pack_to_stdout) {
+		if (!pack_to_stdout || save_pack) {
 			mode_t mode = umask(0);
 
 			umask(mode);
@@ -1556,6 +1562,21 @@ static void prepare_pack(int window, int depth)
 	if (!nr_objects || !window || !depth)
 		return;
 
+	if (save_pack) {
+		/* Only retain save_pack behavior if at least many of the
+		 * objects we are about to pack are currently loose */
+		n = 0;
+		for (i = 0; i < nr_objects; i++) {
+			struct object_entry *entry = objects + i;
+			if (!entry->in_pack)
+				n++;
+		}
+		if (n < (nr_objects / 2))
+			save_pack = 0;
+		else if (progress)
+			fprintf(stderr, "Also keeping saving packfile...\n");
+	}
+
 	delta_list = xmalloc(nr_objects * sizeof(*delta_list));
 	nr_deltas = n = 0;
 
@@ -1813,6 +1834,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			pack_to_stdout = 1;
 			continue;
 		}
+		if (!strcmp("--save-pack", arg)) {
+			save_pack = 1;
+			continue;
+		}
 		if (!strcmp("--revs", arg)) {
 			use_internal_rev_list = 1;
 			continue;
@@ -1868,12 +1893,17 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 
 	if (pack_to_stdout != !base_name)
 		usage(pack_usage);
+	if (!pack_to_stdout && save_pack)
+		usage(pack_usage);
 
 	if (pack_to_stdout && pack_size_limit)
 		die("--max-pack-size cannot be used to build a pack for transfer.");
 
-	if (!pack_to_stdout && thin)
+	if ((!pack_to_stdout || save_pack) && thin)
 		die("--thin cannot be used to build an indexable pack.");
+	if (save_pack)
+		base_name = xstrdup(mkpath("%s/pack/pack",
+			get_object_directory()));
 
 	prepare_packed_git();
 
@@ -1901,5 +1931,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (progress)
 		fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
 			written, written_delta, reused, reused_delta);
+	if (nr_result && save_pack)
+		return execl_git_cmd("prune-packed", NULL);
 	return 0;
 }
diff --git a/csum-file.c b/csum-file.c
index 9ab9971..8c9d8e3 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -10,12 +10,14 @@
 #include "cache.h"
 #include "csum-file.h"
 
-static void sha1flush(struct sha1file *f, unsigned int count)
+static void sha1flush_helper(struct sha1file *f,
+		int fd,
+		unsigned int count)
 {
 	void *buf = f->buffer;
 
 	for (;;) {
-		int ret = xwrite(f->fd, buf, count);
+		int ret = xwrite(fd, buf, count);
 		if (ret > 0) {
 			buf = (char *) buf + ret;
 			count -= ret;
@@ -29,6 +31,13 @@ static void sha1flush(struct sha1file *f, unsigned int count)
 	}
 }
 
+static void sha1flush(struct sha1file *f, unsigned int count)
+{
+	sha1flush_helper(f, f->fd, count);
+	if (f->teefd != -1)
+		sha1flush_helper(f, f->teefd, count);
+}
+
 int sha1close(struct sha1file *f, unsigned char *result, int final)
 {
 	unsigned offset = f->offset;
@@ -45,6 +54,8 @@ int sha1close(struct sha1file *f, unsigned char *result, int final)
 	sha1flush(f, 20);
 	if (close(f->fd))
 		die("%s: sha1 file error on close (%s)", f->name, strerror(errno));
+	if (f->teefd != -1 && close(f->teefd))
+		die("%s: sha1 file error on close (%s)", f->name, strerror(errno));
 	free(f);
 	return 0;
 }
@@ -87,6 +98,7 @@ struct sha1file *sha1fd(int fd, const char *name)
 	memcpy(f->name, name, len+1);
 
 	f->fd = fd;
+	f->teefd = -1;
 	f->error = 0;
 	f->offset = 0;
 	f->do_crc = 0;
diff --git a/csum-file.h b/csum-file.h
index c3c792f..2775dee 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -3,7 +3,7 @@
 
 /* A SHA1-protected file */
 struct sha1file {
-	int fd, error;
+	int fd, teefd, error;
 	unsigned int offset, namelen;
 	SHA_CTX ctx;
 	char name[PATH_MAX];
diff --git a/send-pack.c b/send-pack.c
index f74e66a..0826ad8 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -37,6 +37,8 @@ static int pack_objects(int fd, struct ref *refs)
 
 	if (use_thin_pack)
 		args[4] = "--thin";
+	else
+		args[4] = "--save-pack";
 	memset(&po, 0, sizeof(po));
 	po.argv = args;
 	po.in = -1;
-- 
1.5.3.1.880.g5a3ab
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux