[PATCH 25/32] update-index: new options to enable/disable split index mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 Documentation/git-update-index.txt | 11 +++++++
 builtin/update-index.c             | 18 ++++++++++
 cache.h                            |  1 +
 read-cache.c                       | 67 ++++++++++++++++++++++++++++++++++----
 split-index.c                      | 23 +++++++++++++
 5 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index d6de4a0..dfc09d9 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -161,6 +161,17 @@ may not support it yet.
 	Only meaningful with `--stdin` or `--index-info`; paths are
 	separated with NUL character instead of LF.
 
+--split-index::
+--no-split-index::
+	Enable or disable split index mode. If enabled, the index is
+	split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
+	Changes are accumulated in $GIT_DIR/index while the shared
+	index file contains all index entries stays unchanged. If
+	split-index mode is already enabled and `--split-index` is
+	given again, all changes in $GIT_DIR/index are pushed back to
+	the shared index file. This mode is designed for very large
+	indexes that take a signficant amount of time to read or write.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index f7a19c4..b0503f4 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -13,6 +13,7 @@
 #include "parse-options.h"
 #include "pathspec.h"
 #include "dir.h"
+#include "split-index.h"
 
 /*
  * Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	char set_executable_bit = 0;
 	struct refresh_params refresh_args = {0, &has_errors};
 	int lock_error = 0;
+	int split_index = -1;
 	struct lock_file *lock_file;
 	struct parse_opt_ctx_t ctx;
 	int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			resolve_undo_clear_callback},
 		OPT_INTEGER(0, "index-version", &preferred_index_format,
 			N_("write index in this format")),
+		OPT_BOOL(0, "split-index", &split_index,
+			N_("enable or disable split index")),
 		OPT_END()
 	};
 
@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		strbuf_release(&buf);
 	}
 
+	if (split_index > 0) {
+		init_split_index(&the_index);
+		the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+	} else if (!split_index && the_index.split_index) {
+		/*
+		 * can't discard_split_index(&the_index); because that
+		 * will destroy split_index->base->cache[], which may
+		 * be shared with the_index.cache[]. So yeah we're
+		 * leaking a bit here.
+		 */
+		the_index.split_index = NULL;
+		the_index.cache_changed |= SOMETHING_CHANGED;
+	}
+
 	if (active_cache_changed) {
 		if (newfd < 0) {
 			if (refresh_args.flags & REFRESH_QUIET)
diff --git a/cache.h b/cache.h
index ddb7cd2..7523c28 100644
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CE_ENTRY_ADDED		(1 << 3)
 #define RESOLVE_UNDO_CHANGED	(1 << 4)
 #define CACHE_TREE_CHANGED	(1 << 5)
+#define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
 struct index_state {
diff --git a/read-cache.c b/read-cache.c
index af475dc..8ecb959 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,7 @@
 #include "strbuf.h"
 #include "varint.h"
 #include "split-index.h"
+#include "sigchain.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
-		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
+		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
+		 SPLIT_INDEX_ORDERED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 		rollback_lock_file(lockfile);
 }
 
-static int do_write_index(struct index_state *istate, int newfd)
+static int do_write_index(struct index_state *istate, int newfd,
+			  int strip_extensions)
 {
 	git_SHA_CTX c;
 	struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 	strbuf_release(&previous_name_buf);
 
 	/* Write extension data here */
-	if (istate->split_index) {
+	if (!strip_extensions && istate->split_index) {
 		struct strbuf sb = STRBUF_INIT;
 
 		err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 		if (err)
 			return -1;
 	}
-	if (istate->cache_tree) {
+	if (!strip_extensions && istate->cache_tree) {
 		struct strbuf sb = STRBUF_INIT;
 
 		cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 		if (err)
 			return -1;
 	}
-	if (istate->resolve_undo) {
+	if (!strip_extensions && istate->resolve_undo) {
 		struct strbuf sb = STRBUF_INIT;
 
 		resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
 static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
 				 unsigned flags)
 {
-	int ret = do_write_index(istate, lock->fd);
+	int ret = do_write_index(istate, lock->fd, 0);
 	if (ret)
 		return ret;
 	assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
 	return ret;
 }
 
+static char *temporary_sharedindex;
+
+static void remove_temporary_sharedindex(void)
+{
+	if (temporary_sharedindex) {
+		unlink_or_warn(temporary_sharedindex);
+		free(temporary_sharedindex);
+		temporary_sharedindex = NULL;
+	}
+}
+
+static void remove_temporary_sharedindex_on_signal(int signo)
+{
+	remove_temporary_sharedindex();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+static int write_shared_index(struct index_state *istate)
+{
+	struct split_index *si = istate->split_index;
+	static int installed_handler;
+	int fd, ret;
+
+	temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
+	fd = xmkstemp(temporary_sharedindex);
+	if (!installed_handler) {
+		atexit(remove_temporary_sharedindex);
+		sigchain_push_common(remove_temporary_sharedindex_on_signal);
+	}
+	move_cache_to_base_index(istate);
+	ret = do_write_index(si->base, fd, 1);
+	close(fd);
+	if (ret) {
+		remove_temporary_sharedindex();
+		return ret;
+	}
+	ret = rename(temporary_sharedindex,
+		     git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
+	free(temporary_sharedindex);
+	temporary_sharedindex = NULL;
+	if (!ret)
+		hashcpy(si->base_sha1, si->base->sha1);
+	return ret;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
 		       unsigned flags)
 {
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
 		return do_write_locked_index(istate, lock, flags);
 	}
 
+	if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
+		int ret = write_shared_index(istate);
+		if (ret)
+			return ret;
+	}
+
 	return write_split_index(istate, lock, flags);
 }
 
diff --git a/split-index.c b/split-index.c
index ee3246f..21485e2 100644
--- a/split-index.c
+++ b/split-index.c
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
 		base->cache[i]->index = i + 1;
 }
 
+void move_cache_to_base_index(struct index_state *istate)
+{
+	struct split_index *si = istate->split_index;
+	int i;
+
+	/*
+	 * do not delete old si->base, its index entries may be shared
+	 * with istate->cache[]. Accept a bit of leaking here because
+	 * this code is only used by short-lived update-index.
+	 */
+	si->base = xcalloc(1, sizeof(*si->base));
+	si->base->version = istate->version;
+	/* zero timestamp disables racy test in ce_write_index() */
+	si->base->timestamp = istate->timestamp;
+	ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
+	si->base->cache_nr = istate->cache_nr;
+	memcpy(si->base->cache, istate->cache,
+	       sizeof(*istate->cache) * istate->cache_nr);
+	mark_base_index_entries(si->base);
+	for (i = 0; i < si->base->cache_nr; i++)
+		si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
+}
+
 static void mark_entry_for_delete(size_t pos, void *data)
 {
 	struct index_state *istate = data;
-- 
1.9.1.346.ga2b5940

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]