[PATCH 4/4] diff-lib: parallelize run_diff_files for submodules

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



During the iteration of the index entries in run_diff_files, whenever
a submodule is found and needs its status checked, a subprocess is
spawned for it. Instead of spawning the subprocess immediately and
waiting for its completion to continue, hold onto all submodules and
relevant information in a list. Then use that list to create tasks for
run_processes_parallel. Finished subprocesses pipe their output to
status_finish which parses it and sets the relevant variables.

Add config option status.parallelSubmodules to set the maximum number
of parallel jobs. 

Signed-off-by: Calvin Wan <calvinwan@xxxxxxxxxx>
---
 Documentation/config/status.txt |   6 ++
 add-interactive.c               |   2 +-
 builtin/add.c                   |   4 +-
 builtin/commit.c                |   6 ++
 builtin/diff-files.c            |   2 +-
 builtin/diff.c                  |   2 +-
 builtin/merge.c                 |   2 +-
 builtin/stash.c                 |   2 +-
 builtin/submodule--helper.c     |   4 +-
 diff-lib.c                      |  55 +++++++++++--
 diff.h                          |   2 +-
 submodule.c                     | 142 ++++++++++++++++++++++++++++++++
 submodule.h                     |   9 ++
 wt-status.c                     |   6 +-
 wt-status.h                     |   1 +
 15 files changed, 226 insertions(+), 19 deletions(-)

diff --git a/Documentation/config/status.txt b/Documentation/config/status.txt
index 0fc704ab80..92c5511fec 100644
--- a/Documentation/config/status.txt
+++ b/Documentation/config/status.txt
@@ -75,3 +75,9 @@ status.submoduleSummary::
 	the --ignore-submodules=dirty command-line option or the 'git
 	submodule summary' command, which shows a similar output but does
 	not honor these settings.
+
+status.parallelSubmodules::
+	When linkgit:git-status[1] is run in a superproject with
+	submodules, a status subprocess is spawned for every submodule.
+	This option specifies the number of submodule status subprocesses
+	to run in parallel. If unset, it defaults to 1.
diff --git a/add-interactive.c b/add-interactive.c
index 22fcd3412c..3e38aa833d 100644
--- a/add-interactive.c
+++ b/add-interactive.c
@@ -565,7 +565,7 @@ static int get_modified_files(struct repository *r,
 			run_diff_index(&rev, 1);
 		else {
 			rev.diffopt.flags.ignore_dirty_submodules = 1;
-			run_diff_files(&rev, 0);
+			run_diff_files(&rev, 0, -1);
 		}
 
 		release_revisions(&rev);
diff --git a/builtin/add.c b/builtin/add.c
index f84372964c..094d59c1b4 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -148,7 +148,7 @@ int add_files_to_cache(const char *prefix,
 	 * may not have their own transaction active.
 	 */
 	begin_odb_transaction();
-	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
+	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED, -1);
 	end_odb_transaction();
 
 	release_revisions(&rev);
@@ -325,7 +325,7 @@ static int edit_patch(int argc, const char **argv, const char *prefix)
 	out = xopen(file, O_CREAT | O_WRONLY | O_TRUNC, 0666);
 	rev.diffopt.file = xfdopen(out, "w");
 	rev.diffopt.close_file = 1;
-	if (run_diff_files(&rev, 0))
+	if (run_diff_files(&rev, 0, -1))
 		die(_("Could not write patch"));
 
 	if (launch_editor(file, NULL, NULL))
diff --git a/builtin/commit.c b/builtin/commit.c
index fcf9c85947..c5147a7952 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1468,6 +1468,12 @@ static int git_status_config(const char *k, const char *v, void *cb)
 		s->detect_rename = git_config_rename(k, v);
 		return 0;
 	}
+	if (!strcmp(k, "status.parallelsubmodules")) {
+		s->parallel_jobs_submodules = git_config_int(k, v);
+		if (s->parallel_jobs_submodules < 0)
+			die(_("status.parallelsubmodules cannot be negative"));
+		return 0;
+	}
 	return git_diff_ui_config(k, v, NULL);
 }
 
diff --git a/builtin/diff-files.c b/builtin/diff-files.c
index 92cf6e1e92..eb1b576440 100644
--- a/builtin/diff-files.c
+++ b/builtin/diff-files.c
@@ -80,7 +80,7 @@ int cmd_diff_files(int argc, const char **argv, const char *prefix)
 		result = -1;
 		goto cleanup;
 	}
-	result = run_diff_files(&rev, options);
+	result = run_diff_files(&rev, options, -1);
 	result = diff_result_code(&rev.diffopt, result);
 cleanup:
 	release_revisions(&rev);
diff --git a/builtin/diff.c b/builtin/diff.c
index 54bb3de964..c3c532517b 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -275,7 +275,7 @@ static int builtin_diff_files(struct rev_info *revs, int argc, const char **argv
 		perror("read_cache_preload");
 		return -1;
 	}
-	return run_diff_files(revs, options);
+	return run_diff_files(revs, options, -1);
 }
 
 struct symdiff {
diff --git a/builtin/merge.c b/builtin/merge.c
index f7c92c0e64..ff373fa880 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1001,7 +1001,7 @@ static int evaluate_result(void)
 		DIFF_FORMAT_CALLBACK;
 	rev.diffopt.format_callback = count_diff_files;
 	rev.diffopt.format_callback_data = &cnt;
-	run_diff_files(&rev, 0);
+	run_diff_files(&rev, 0, -1);
 
 	/*
 	 * Check how many unmerged entries are
diff --git a/builtin/stash.c b/builtin/stash.c
index 30fa101460..097ea55214 100644
--- a/builtin/stash.c
+++ b/builtin/stash.c
@@ -1075,7 +1075,7 @@ static int check_changes_tracked_files(const struct pathspec *ps)
 		goto done;
 	}
 
-	result = run_diff_files(&rev, 0);
+	result = run_diff_files(&rev, 0, -1);
 	if (diff_result_code(&rev.diffopt, result)) {
 		ret = 1;
 		goto done;
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index fac52ade5e..b50dbf5e36 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -679,7 +679,7 @@ static void status_submodule(const char *path, const struct object_id *ce_oid,
 	diff_files_args.nr = setup_revisions(diff_files_args.nr,
 					     diff_files_args.v,
 					     &rev, NULL);
-	diff_files_result = run_diff_files(&rev, 0);
+	diff_files_result = run_diff_files(&rev, 0, -1);
 
 	if (!diff_result_code(&rev.diffopt, diff_files_result)) {
 		print_status(flags, ' ', path, ce_oid,
@@ -1143,7 +1143,7 @@ static int compute_summary_module_list(struct object_id *head_oid,
 	if (diff_cmd == DIFF_INDEX)
 		run_diff_index(&rev, info->cached);
 	else
-		run_diff_files(&rev, 0);
+		run_diff_files(&rev, 0, -1);
 	prepare_submodule_summary(info, &list);
 cleanup:
 	strvec_clear(&diff_args);
diff --git a/diff-lib.c b/diff-lib.c
index 2e148b79e6..ec745755fc 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -65,14 +65,19 @@ static int check_removed(const struct index_state *istate, const struct cache_en
  * Return 1 when changes are detected, 0 otherwise. If the DIRTY_SUBMODULES
  * option is set, the caller does not only want to know if a submodule is
  * modified at all but wants to know all the conditions that are met (new
- * commits, untracked content and/or modified content).
+ * commits, untracked content and/or modified content). If
+ * defer_submodule_status bit is set, dirty_submodule will be left to the
+ * caller to set. defer_submodule_status can also be set to 0 in this
+ * function if there is no need to check if the submodule is modified.
  */
 static int match_stat_with_submodule(struct diff_options *diffopt,
 				     const struct cache_entry *ce,
 				     struct stat *st, unsigned ce_option,
-				     unsigned *dirty_submodule)
+				     unsigned *dirty_submodule, int *defer_submodule_status,
+					 int *ignore_untracked_in_submodules)
 {
 	int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option);
+	int defer = 0;
 	struct diff_flags orig_flags = diffopt->flags;
 	if (!S_ISGITLINK(ce->ce_mode))
 		goto cleanup;
@@ -83,11 +88,20 @@ static int match_stat_with_submodule(struct diff_options *diffopt,
 		goto cleanup;
 	}
 	if (!diffopt->flags.ignore_dirty_submodules &&
-		(!changed || diffopt->flags.dirty_submodules))
+		(!changed || diffopt->flags.dirty_submodules)) {
+		if (defer_submodule_status && *defer_submodule_status) {
+			defer = 1;
+			*ignore_untracked_in_submodules =
+							diffopt->flags.ignore_untracked_in_submodules;
+		} else {
 			*dirty_submodule = is_submodule_modified(ce->name,
 							diffopt->flags.ignore_untracked_in_submodules);
+		}
+	}
 cleanup:
 	diffopt->flags = orig_flags;
+	if (defer_submodule_status)
+		*defer_submodule_status = defer;
 	return changed;
 }
 
@@ -117,7 +131,7 @@ static void finish_run_diff_files(struct rev_info *revs,
 			    ce->name, 0, dirty_submodule);
 }
 
-int run_diff_files(struct rev_info *revs, unsigned int option)
+int run_diff_files(struct rev_info *revs, unsigned int option, int parallel_jobs)
 {
 	int entries, i;
 	int diff_unmerged_stage = revs->max_count;
@@ -125,6 +139,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 			      ? CE_MATCH_RACY_IS_DIRTY : 0);
 	uint64_t start = getnanotime();
 	struct index_state *istate = revs->diffopt.repo->index;
+	struct string_list submodules = STRING_LIST_INIT_NODUP;
 
 	diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/");
 
@@ -138,6 +153,9 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 		struct cache_entry *ce = istate->cache[i];
 		int changed;
 		unsigned dirty_submodule = 0;
+		int defer_submodule_status = revs && revs->repo &&
+							!strcmp(revs->repo->gitdir, ".git");
+		int ignore_untracked_in_submodules;
 
 		if (diff_can_quit_early(&revs->diffopt))
 			break;
@@ -269,11 +287,36 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 			}
 
 			changed = match_stat_with_submodule(&revs->diffopt, ce, &st,
-							    ce_option, &dirty_submodule);
+							ce_option, &dirty_submodule,
+							&defer_submodule_status,
+							&ignore_untracked_in_submodules);
 			newmode = ce_mode_from_stat(ce, st.st_mode);
+			if (defer_submodule_status) {
+				struct string_list_item *item =
+								string_list_append(&submodules, ce->name);
+				struct submodule_status_util *util = xmalloc(sizeof(*util));
+				util->changed = changed;
+				util->dirty_submodule = 0;
+				util->ignore_untracked = ignore_untracked_in_submodules;
+				util->newmode = newmode;
+				util->ce = ce;
+				item->util = util;
+				continue;
+			}
 		}
 		finish_run_diff_files(revs, ce, istate, changed, dirty_submodule, newmode);
 	}
+	if (submodules.nr > 0) {
+		if (get_submodules_status(revs->repo, &submodules,
+						parallel_jobs > 0 ? parallel_jobs : 1))
+			BUG("Submodule status failed");
+		for (int i = 0; i < submodules.nr; i++) {
+			struct submodule_status_util *util = submodules.items[i].util;
+
+			finish_run_diff_files(revs, util->ce, NULL, util->changed,
+							util->dirty_submodule, util->newmode);
+		}
+	}
 	diffcore_std(&revs->diffopt);
 	diff_flush(&revs->diffopt);
 	trace_performance_since(start, "diff-files");
@@ -321,7 +364,7 @@ static int get_stat_data(const struct index_state *istate,
 			return -1;
 		}
 		changed = match_stat_with_submodule(diffopt, ce, &st,
-						    0, dirty_submodule);
+						    0, dirty_submodule, NULL, NULL);
 		if (changed) {
 			mode = ce_mode_from_stat(ce, st.st_mode);
 			oid = null_oid();
diff --git a/diff.h b/diff.h
index 8ae18e5ab1..5a6a615381 100644
--- a/diff.h
+++ b/diff.h
@@ -627,7 +627,7 @@ void diff_get_merge_base(const struct rev_info *revs, struct object_id *mb);
 #define DIFF_SILENT_ON_REMOVED 01
 /* report racily-clean paths as modified */
 #define DIFF_RACY_IS_MODIFIED 02
-int run_diff_files(struct rev_info *revs, unsigned int option);
+int run_diff_files(struct rev_info *revs, unsigned int option, int parallel_jobs);
 
 #define DIFF_INDEX_CACHED 01
 #define DIFF_INDEX_MERGE_BASE 02
diff --git a/submodule.c b/submodule.c
index 91213ba83c..15729bb327 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1362,6 +1362,20 @@ int submodule_touches_in_range(struct repository *r,
 	return ret;
 }
 
+struct submodule_parallel_status {
+	int index_count;
+	int result;
+
+	struct string_list *submodule_names;
+	struct repository *r;
+
+	/* Pending statuses by OIDs */
+	struct status_task **oid_status_tasks;
+	int oid_status_tasks_nr, oid_status_tasks_alloc;
+};
+
+#define SPS_INIT { 0 }
+
 struct submodule_parallel_fetch {
 	/*
 	 * The index of the last index entry processed by
@@ -1444,6 +1458,12 @@ struct fetch_task {
 	struct oid_array *commits; /* Ensure these commits are fetched */
 };
 
+struct status_task {
+	struct repository *repo;
+	const char *path;
+	int ignore_untracked;
+};
+
 /**
  * When a submodule is not defined in .gitmodules, we cannot access it
  * via the regular submodule-config. Create a fake submodule, which we can
@@ -1547,6 +1567,41 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf
 	return NULL;
 }
 
+static struct status_task *
+get_status_task_from_index(struct submodule_parallel_status *sps,
+			  struct strbuf *err)
+{
+	for (; sps->index_count < sps->submodule_names->nr; sps->index_count++) {
+		struct submodule_status_util *util = sps->submodule_names->items[sps->index_count].util;
+		const struct cache_entry *ce = util->ce;
+		struct status_task *task;
+		struct strbuf buf = STRBUF_INIT;
+		const char *git_dir;
+
+		strbuf_addf(&buf, "%s/.git", ce->name);
+		git_dir = read_gitfile(buf.buf);
+		if (!git_dir)
+			git_dir = buf.buf;
+		if (!is_git_directory(git_dir)) {
+			if (is_directory(git_dir))
+				die(_("'%s' not recognized as a git repository"), git_dir);
+			strbuf_release(&buf);
+			/* The submodule is not checked out, so it is not modified */
+			util->dirty_submodule = 0;
+			continue;
+		}
+		strbuf_release(&buf);
+
+		task = xmalloc(sizeof(*task));
+		memset(task, 0, sizeof(*task));
+		task->path = ce->name;
+		task->ignore_untracked = util->ignore_untracked;
+		sps->index_count++;
+		return task;
+	}
+	return NULL;
+}
+
 static struct fetch_task *
 get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
 			  struct strbuf *err)
@@ -1744,6 +1799,16 @@ static int fetch_start_failure(struct strbuf *err,
 	return 0;
 }
 
+static int status_start_failure(struct strbuf *err,
+			       void *cb, void *task_cb)
+{
+	struct submodule_parallel_status *sps = cb;
+
+	sps->result = 1;
+	return 0;
+}
+
+
 static int commit_missing_in_sub(const struct object_id *oid, void *data)
 {
 	struct repository *subrepo = data;
@@ -1790,6 +1855,30 @@ static int parse_status_porcelain(char *buf, unsigned *dirty_submodule, int igno
 	return 0;
 }
 
+static int status_finish(int retvalue, struct strbuf *err,
+			void *cb, void *task_cb)
+{
+	struct submodule_parallel_status *sps = cb;
+	struct status_task *task = task_cb;
+	struct string_list list = STRING_LIST_INIT_DUP;
+	struct string_list_item *it = string_list_lookup(sps->submodule_names,
+											task->path);
+	struct submodule_status_util *util = it->util;
+
+	int i;
+
+	string_list_split(&list, err->buf, '\n', -1);
+
+	for (i = 0; i < list.nr; i++) {
+		if (parse_status_porcelain(list.items[i].string,
+							&util->dirty_submodule, util->ignore_untracked))
+			break;
+	}
+	strbuf_reset(err);
+
+	return 0;
+}
+
 static int fetch_finish(int retvalue, struct strbuf *err,
 			void *cb, void *task_cb)
 {
@@ -1943,6 +2032,59 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked)
 	return dirty_submodule;
 }
 
+static int get_next_submodule_status(struct child_process *cp,
+						struct strbuf *err, void *data, void **task_cb)
+{
+	struct submodule_parallel_status *sps = data;
+	struct status_task *task = get_status_task_from_index(sps, err);
+
+	int ignore_untracked;
+
+	if (!task) {
+		return 0;
+	}
+
+	ignore_untracked = task->ignore_untracked;
+
+	child_process_init(cp);
+	prepare_submodule_repo_env_in_gitdir(&cp->env);
+
+	strvec_init(&cp->args);
+	strvec_pushl(&cp->args, "status", "--porcelain=2", NULL);
+	if (ignore_untracked)
+		strvec_push(&cp->args, "-uno");
+
+	prepare_submodule_repo_env(&cp->env);
+	cp->git_cmd = 1;
+	cp->no_stdin = 1;
+	cp->dir = task->path;
+	*task_cb = task;
+	return 1;
+}
+
+int get_submodules_status(struct repository *r,
+			struct string_list *submodules,
+		    int max_parallel_jobs)
+{
+	struct submodule_parallel_status sps = SPS_INIT;
+
+	sps.r = r;
+
+	if (repo_read_index(r) < 0)
+		die(_("index file corrupt"));
+
+	sps.submodule_names = submodules;
+	string_list_sort(sps.submodule_names);
+	run_processes_parallel_pipe_output = 1;
+	run_processes_parallel_tr2(max_parallel_jobs,
+				get_next_submodule_status,
+				status_start_failure,
+				status_finish,
+				&sps,
+				"submodule", "parallel/status");
+	return sps.result;
+}
+
 int submodule_uses_gitfile(const char *path)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
diff --git a/submodule.h b/submodule.h
index bfaa9da186..18a42c64ce 100644
--- a/submodule.h
+++ b/submodule.h
@@ -41,6 +41,12 @@ struct submodule_update_strategy {
 	.type = SM_UPDATE_UNSPECIFIED, \
 }
 
+struct submodule_status_util {
+	int changed, ignore_untracked;
+	unsigned dirty_submodule, newmode;
+	struct cache_entry *ce;
+};
+
 int is_gitmodules_unmerged(struct index_state *istate);
 int is_writing_gitmodules_ok(void);
 int is_staging_gitmodules_ok(struct index_state *istate);
@@ -94,6 +100,9 @@ int fetch_submodules(struct repository *r,
 		     int command_line_option,
 		     int default_option,
 		     int quiet, int max_parallel_jobs);
+int get_submodules_status(struct repository *r,
+			 struct string_list *submodules,
+		     int max_parallel_jobs);
 unsigned is_submodule_modified(const char *path, int ignore_untracked);
 int submodule_uses_gitfile(const char *path);
 
diff --git a/wt-status.c b/wt-status.c
index 867e3e417e..9864484f81 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -615,7 +615,7 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
 	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
 	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
-	run_diff_files(&rev, 0);
+	run_diff_files(&rev, 0, s->parallel_jobs_submodules);
 	release_revisions(&rev);
 }
 
@@ -1149,7 +1149,7 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
 		setup_work_tree();
 		rev.diffopt.a_prefix = "i/";
 		rev.diffopt.b_prefix = "w/";
-		run_diff_files(&rev, 0);
+		run_diff_files(&rev, 0, -1);
 	}
 	release_revisions(&rev);
 }
@@ -2544,7 +2544,7 @@ int has_unstaged_changes(struct repository *r, int ignore_submodules)
 	}
 	rev_info.diffopt.flags.quick = 1;
 	diff_setup_done(&rev_info.diffopt);
-	result = run_diff_files(&rev_info, 0);
+	result = run_diff_files(&rev_info, 0, -1);
 	result = diff_result_code(&rev_info.diffopt, result);
 	release_revisions(&rev_info);
 	return result;
diff --git a/wt-status.h b/wt-status.h
index ab9cc9d8f0..2ea2317715 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -131,6 +131,7 @@ struct wt_status {
 	enum wt_status_format status_format;
 	struct wt_status_state state;
 	struct object_id oid_commit; /* when not Initial */
+	int parallel_jobs_submodules;
 
 	/* These are computed during processing of the individual sections */
 	int committable;
-- 
2.37.3.998.g577e59143f-goog




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux