[PATCH] {fetch,receive}-pack: drop unpack-objects, delay loosing objects until the end

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Current code peaks into the transfered pack's header, if the number of
objects is under a limit, unpack-objects is called to handle the rest,
otherwise index-pack is. This patch makes fetch-pack use index-pack
unconditionally, then turn objects loose and remove the pack at the
end. unpack-objects is deprecated and may be removed in future.

There are a few reasons for this:

 - down to two code paths to maintain regarding pack reading
   (sha1_file.c and index-pack.c). When .pack v4 comes, we don't need
   to duplicate work in index-pack and unpack-objects.

 - the number of objects should not be the only indicator for
   unpacking. If there are a few large objects in the pack, the pack
   should be kept anyway. Keeping the pack lets us examine the whole
   pack later and make a better decision.

 - by going through index-pack first, then unpack, we pay extra cost
   for completing a thin pack into a full one. But compared to fetch's
   total time, it should not be noticeable because unpack-objects is
   only called when the pack contains a small number of objects.

 - unpack-objects does not support streaming large blobs. Granted
   force_object_loose(), which is used by this patch, does not either.
   But it'll be easier to do so because sha1_file.c interface supports
   streaming.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 builtin/receive-pack.c | 120 +++++++++++++------------------------------------
 cache.h                |   1 +
 fetch-pack.c           |  77 +++++++++++--------------------
 sha1_file.c            |  70 ++++++++++++++++++++++++++++-
 4 files changed, 128 insertions(+), 140 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index e3eb5fc..6eb4ffb 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -792,105 +792,49 @@ static struct command *read_head_info(void)
 	return commands;
 }
 
-static const char *parse_pack_header(struct pack_header *hdr)
-{
-	switch (read_pack_header(0, hdr)) {
-	case PH_ERROR_EOF:
-		return "eof before pack header was fully read";
-
-	case PH_ERROR_PACK_SIGNATURE:
-		return "protocol error (pack signature mismatch detected)";
-
-	case PH_ERROR_PROTOCOL:
-		return "protocol error (pack version unsupported)";
-
-	default:
-		return "unknown error in parse_pack_header";
-
-	case 0:
-		return NULL;
-	}
-}
-
 static const char *pack_lockfile;
 
 static const char *unpack(int err_fd)
 {
-	struct pack_header hdr;
-	const char *hdr_err;
-	char hdr_arg[38];
 	int fsck_objects = (receive_fsck_objects >= 0
 			    ? receive_fsck_objects
 			    : transfer_fsck_objects >= 0
 			    ? transfer_fsck_objects
 			    : 0);
 
-	hdr_err = parse_pack_header(&hdr);
-	if (hdr_err) {
-		if (err_fd > 0)
-			close(err_fd);
-		return hdr_err;
-	}
-	snprintf(hdr_arg, sizeof(hdr_arg),
-			"--pack_header=%"PRIu32",%"PRIu32,
-			ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries));
-
-	if (ntohl(hdr.hdr_entries) < unpack_limit) {
-		int code, i = 0;
-		struct child_process child;
-		const char *unpacker[5];
-		unpacker[i++] = "unpack-objects";
-		if (quiet)
-			unpacker[i++] = "-q";
-		if (fsck_objects)
-			unpacker[i++] = "--strict";
-		unpacker[i++] = hdr_arg;
-		unpacker[i++] = NULL;
-		memset(&child, 0, sizeof(child));
-		child.argv = unpacker;
-		child.no_stdout = 1;
-		child.err = err_fd;
-		child.git_cmd = 1;
-		code = run_command(&child);
-		if (!code)
-			return NULL;
-		return "unpack-objects abnormal exit";
-	} else {
-		const char *keeper[7];
-		int s, status, i = 0;
-		char keep_arg[256];
-		struct child_process ip;
-
-		s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
-		if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-			strcpy(keep_arg + s, "localhost");
-
-		keeper[i++] = "index-pack";
-		keeper[i++] = "--stdin";
-		if (fsck_objects)
-			keeper[i++] = "--strict";
-		keeper[i++] = "--fix-thin";
-		keeper[i++] = hdr_arg;
-		keeper[i++] = keep_arg;
-		keeper[i++] = NULL;
-		memset(&ip, 0, sizeof(ip));
-		ip.argv = keeper;
-		ip.out = -1;
-		ip.err = err_fd;
-		ip.git_cmd = 1;
-		status = start_command(&ip);
-		if (status) {
-			return "index-pack fork failed";
-		}
-		pack_lockfile = index_pack_lockfile(ip.out);
-		close(ip.out);
-		status = finish_command(&ip);
-		if (!status) {
-			reprepare_packed_git();
-			return NULL;
-		}
+	const char *keeper[7];
+	int s, status, i = 0;
+	char keep_arg[256];
+	struct child_process ip;
+
+	s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
+	if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+		strcpy(keep_arg + s, "localhost");
+
+	keeper[i++] = "index-pack";
+	keeper[i++] = "--stdin";
+	if (fsck_objects)
+		keeper[i++] = "--strict";
+	keeper[i++] = "--fix-thin";
+	keeper[i++] = keep_arg;
+	keeper[i++] = NULL;
+	memset(&ip, 0, sizeof(ip));
+	ip.argv = keeper;
+	ip.out = -1;
+	ip.err = err_fd;
+	ip.git_cmd = 1;
+	status = start_command(&ip);
+	if (status) {
+		return "index-pack fork failed";
+	}
+	pack_lockfile = index_pack_lockfile(ip.out);
+	close(ip.out);
+	status = finish_command(&ip);
+	if (status)
 		return "index-pack abnormal exit";
-	}
+	if (!maybe_unpack_objects(pack_lockfile, unpack_limit))
+		pack_lockfile = NULL;
+	return NULL;
 }
 
 static const char *unpack_with_sideband(void)
diff --git a/cache.h b/cache.h
index 85b544f..0fff958 100644
--- a/cache.h
+++ b/cache.h
@@ -789,6 +789,7 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
 extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+extern int maybe_unpack_objects(const char *pack_lockfile, int limit);
 extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
 extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
diff --git a/fetch-pack.c b/fetch-pack.c
index f5d99c1..9c81a15 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -687,9 +687,7 @@ static int get_pack(struct fetch_pack_args *args,
 	struct async demux;
 	const char *argv[22];
 	char keep_arg[256];
-	char hdr_arg[256];
 	const char **av;
-	int do_keep = args->keep_pack;
 	struct child_process cmd;
 	int ret;
 
@@ -711,54 +709,29 @@ static int get_pack(struct fetch_pack_args *args,
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.argv = argv;
+	cmd.out = -1;
 	av = argv;
-	*hdr_arg = 0;
-	if (!args->keep_pack && unpack_limit) {
-		struct pack_header header;
-
-		if (read_pack_header(demux.out, &header))
-			die("protocol error: bad pack header");
-		snprintf(hdr_arg, sizeof(hdr_arg),
-			 "--pack_header=%"PRIu32",%"PRIu32,
-			 ntohl(header.hdr_version), ntohl(header.hdr_entries));
-		if (ntohl(header.hdr_entries) < unpack_limit)
-			do_keep = 0;
-		else
-			do_keep = 1;
-	}
 
 	if (alternate_shallow_file) {
 		*av++ = "--shallow-file";
 		*av++ = alternate_shallow_file;
 	}
 
-	if (do_keep) {
-		if (pack_lockfile)
-			cmd.out = -1;
-		*av++ = "index-pack";
-		*av++ = "--stdin";
-		if (!args->quiet && !args->no_progress)
-			*av++ = "-v";
-		if (args->use_thin_pack)
-			*av++ = "--fix-thin";
-		if (args->lock_pack || unpack_limit) {
-			int s = sprintf(keep_arg,
-					"--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
-			if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-				strcpy(keep_arg + s, "localhost");
-			*av++ = keep_arg;
-		}
-		if (args->check_self_contained_and_connected)
-			*av++ = "--check-self-contained-and-connected";
-	}
-	else {
-		*av++ = "unpack-objects";
-		if (args->quiet || args->no_progress)
-			*av++ = "-q";
-		args->check_self_contained_and_connected = 0;
-	}
-	if (*hdr_arg)
-		*av++ = hdr_arg;
+	*av++ = "index-pack";
+	*av++ = "--stdin";
+	if (!args->quiet && !args->no_progress)
+		*av++ = "-v";
+	if (args->use_thin_pack)
+		*av++ = "--fix-thin";
+	if (args->lock_pack || unpack_limit) {
+		int s = sprintf(keep_arg,
+				"--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
+		if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+			strcpy(keep_arg + s, "localhost");
+		*av++ = keep_arg;
+	}
+	if (args->check_self_contained_and_connected)
+		*av++ = "--check-self-contained-and-connected";
 	if (fetch_fsck_objects >= 0
 	    ? fetch_fsck_objects
 	    : transfer_fsck_objects >= 0
@@ -771,10 +744,8 @@ static int get_pack(struct fetch_pack_args *args,
 	cmd.git_cmd = 1;
 	if (start_command(&cmd))
 		die("fetch-pack: unable to fork off %s", argv[0]);
-	if (do_keep && pack_lockfile) {
-		*pack_lockfile = index_pack_lockfile(cmd.out);
-		close(cmd.out);
-	}
+	*pack_lockfile = index_pack_lockfile(cmd.out);
+	close(cmd.out);
 
 	ret = finish_command(&cmd);
 	if (!ret || (args->check_self_contained_and_connected && ret == 1))
@@ -820,11 +791,12 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 				 int fd[2],
 				 const struct ref *orig_ref,
 				 struct ref **sought, int nr_sought,
-				 char **pack_lockfile)
+				 char **pack_lockfile_p)
 {
 	struct ref *ref = copy_ref_list(orig_ref);
 	unsigned char sha1[20];
 	const char *agent_feature;
+	char *pack_lockfile = NULL;
 	int agent_len;
 
 	sort_ref_list(&ref, ref_compare_name);
@@ -899,9 +871,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 		setup_alternate_shallow();
 	else
 		alternate_shallow_file = NULL;
-	if (get_pack(args, fd, pack_lockfile))
+	if (get_pack(args, fd, &pack_lockfile))
 		die("git fetch-pack: fetch failed.");
 
+	if (!maybe_unpack_objects(pack_lockfile,
+				  args->keep_pack ? 0 : unpack_limit))
+		pack_lockfile = NULL;
+	if (pack_lockfile_p)
+		*pack_lockfile_p = pack_lockfile;
+
  all_done:
 	return ref;
 }
@@ -997,6 +975,5 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 			commit_lock_file(&shallow_lock);
 	}
 
-	reprepare_packed_git();
 	return ref_cpy;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 8e27db1..a0cdeae 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -61,6 +61,8 @@ static struct cached_object empty_tree = {
 };
 
 static struct packed_git *last_found_pack;
+/* temporarily skip one pack in find_pack_entry() */
+static struct packed_git *skip_this_pack;
 
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
@@ -2376,11 +2378,15 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack &&
+	    last_found_pack != skip_this_pack &&
+	    fill_pack_entry(sha1, e, last_found_pack))
 		return 1;
 
 	for (p = packed_git; p; p = p->next) {
-		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+		if (p == last_found_pack ||
+		    p == skip_this_pack ||
+		    !fill_pack_entry(sha1, e, p))
 			continue;
 
 		last_found_pack = p;
@@ -3133,3 +3139,63 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
 		die("%s is not a valid '%s' object", sha1_to_hex(sha1),
 		    typename(expect));
 }
+
+static int has_sha1_except_in(struct packed_git *p,
+			      const unsigned char *sha1)
+{
+	int has;
+	skip_this_pack = p;
+	has = has_sha1_file(sha1);
+	skip_this_pack = NULL;
+	return has;
+}
+
+/*
+ * Unpack objects if the number of objects in the pack is lower than
+ * specified limit. Otherwise make sure the new pack is registered
+ * (including when pack_lockfile is NULL). Return 0 is the pack is
+ * removed.
+ */
+int maybe_unpack_objects(const char *pack_lockfile, int limit)
+{
+	const char *ext[] = { ".pack", ".idx", ".keep", NULL };
+	struct packed_git *p;
+	char *path;
+	int len, ret = 0;
+	uint32_t i;
+
+	reprepare_packed_git();
+	if (!pack_lockfile)
+		return -1;
+
+	/* must be enough for any extensions in ext[] */
+	path = xstrdup(pack_lockfile);
+	len = strlen(pack_lockfile) - strlen(".keep");
+	strcpy(path + len, ".pack");
+	for (p = packed_git; p; p = p->next)
+		if (!strcmp(p->pack_name, path))
+			break;
+	if (!p || open_pack_index(p))
+		die("unable to find pack %s", path);
+
+	if (p->num_objects >= limit) {
+		free(path);
+		return -1;
+	}
+
+	for (i = 0; i < p->num_objects; i++) {
+		const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+		if (!has_sha1_except_in(p, sha1)) /* skip --fix-thin objects */
+			ret |= force_object_loose(sha1, p->mtime);
+	}
+
+	if (!ret) {
+		free_pack_by_name(p->pack_name);
+		for (i = 0; ext[i]; i++) {
+			strcpy(path + len, ext[i]);
+			unlink_or_warn(path);
+		}
+	}
+	free(path);
+	return ret;
+}
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]