[PATCH v3 2/6] hook: allow parallel hook execution

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In many cases, there's no reason not to allow hooks to execute in
parallel, if more than one was provided. hook.c already calls
run_processes_parallel(), so all we need to do is allow the job count we
hand to run_processes_parallel() to be greater than 1.

If users have specified no alternative, we can use the processor count
from online_cpus() to run an efficient number of tasks at once. However,
users can also override this number if they want by configuring
'hook.jobs'.

To avoid looking up 'hook.jobs' in cases where we don't end up with any
hooks to run anyways, teach the hook runner commands to notice if
.jobs==0 and do a config or online_cpus() lookup if so, when we already
know we have jobs to run.

Serial execution can still be performed by setting .jobs == 1.

Signed-off-by: Emily Shaffer <emilyshaffer@xxxxxxxxxx>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
---
 Documentation/config/hook.txt |  4 ++++
 Documentation/git-hook.txt    | 17 ++++++++++++++++-
 builtin/am.c                  |  4 ++--
 builtin/checkout.c            |  2 +-
 builtin/clone.c               |  2 +-
 builtin/hook.c                |  4 +++-
 builtin/merge.c               |  2 +-
 builtin/rebase.c              |  2 +-
 builtin/receive-pack.c        |  9 +++++----
 builtin/worktree.c            |  2 +-
 commit.c                      |  2 +-
 hook.c                        | 36 +++++++++++++++++++++++++++++++----
 hook.h                        | 24 ++++++++++++++++++-----
 read-cache.c                  |  2 +-
 refs.c                        |  2 +-
 reset.c                       |  3 ++-
 sequencer.c                   |  4 ++--
 transport.c                   |  2 +-
 18 files changed, 94 insertions(+), 29 deletions(-)
 create mode 100644 Documentation/config/hook.txt

diff --git a/Documentation/config/hook.txt b/Documentation/config/hook.txt
new file mode 100644
index 0000000000..96d3d6572c
--- /dev/null
+++ b/Documentation/config/hook.txt
@@ -0,0 +1,4 @@
+hook.jobs::
+	Specifies how many hooks can be run simultaneously during parallelized
+	hook execution. If unspecified, defaults to the number of processors on
+	the current system.
diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt
index fa68c1f391..8bf82b5dd4 100644
--- a/Documentation/git-hook.txt
+++ b/Documentation/git-hook.txt
@@ -8,7 +8,8 @@ git-hook - run git hooks
 SYNOPSIS
 --------
 [verse]
-'git hook' run [--to-stdin=<path>] [--ignore-missing] <hook-name> [-- <hook-args>]
+'git hook' run [--to-stdin=<path>] [--ignore-missing] [(-j|--jobs) <n>]
+	<hook-name> [-- <hook-args>]
 
 DESCRIPTION
 -----------
@@ -42,6 +43,20 @@ OPTIONS
 	tools that want to do a blind one-shot run of a hook that may
 	or may not be present.
 
+-j::
+--jobs::
+	Only valid for `run`.
++
+Specify how many hooks to run simultaneously. If this flag is not specified, use
+the value of the `hook.jobs` config. If the config is not specified, use the
+number of CPUs on the current system. Some hooks may be ineligible for
+parallelization: for example, 'commit-msg' intends hooks modify the commit
+message body and cannot be parallelized.
+
+CONFIGURATION
+-------------
+include::config/hook.txt[]
+
 SEE ALSO
 --------
 linkgit:githooks[5]
diff --git a/builtin/am.c b/builtin/am.c
index 9e3d4d9ab4..c7ffc7eafc 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -446,7 +446,7 @@ static void am_destroy(const struct am_state *state)
 static int run_applypatch_msg_hook(struct am_state *state)
 {
 	int ret;
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
 
 	assert(state->msg);
 	strvec_push(&opt.args, am_path(state, "final-commit"));
@@ -467,7 +467,7 @@ static int run_applypatch_msg_hook(struct am_state *state)
  */
 static int run_post_rewrite_hook(const struct am_state *state)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 	strvec_push(&opt.args, "rebase");
 	opt.path_to_stdin = am_path(state, "rewritten");
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 6d69b4c011..2cb5c67f64 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -107,7 +107,7 @@ struct branch_info {
 static int post_checkout_hook(struct commit *old_commit, struct commit *new_commit,
 			      int changed)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
 
 	/* "new_commit" can be NULL when checking out from the index before
 	   a commit exists. */
diff --git a/builtin/clone.c b/builtin/clone.c
index 27fc05ee51..986c3b1932 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -776,7 +776,7 @@ static int checkout(int submodule_progress)
 	struct tree *tree;
 	struct tree_desc t;
 	int err = 0;
-	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_SERIAL;
 
 	if (option_no_checkout)
 		return 0;
diff --git a/builtin/hook.c b/builtin/hook.c
index e18357ba1f..4dd3617876 100644
--- a/builtin/hook.c
+++ b/builtin/hook.c
@@ -22,7 +22,7 @@ static const char * const builtin_hook_run_usage[] = {
 static int run(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
 	int ignore_missing = 0;
 	const char *hook_name;
 	struct list_head *hooks;
@@ -32,6 +32,8 @@ static int run(int argc, const char **argv, const char *prefix)
 			 N_("exit quietly with a zero exit code if the requested hook cannot be found")),
 		OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"),
 			   N_("file to read into hooks' stdin")),
+		OPT_INTEGER('j', "jobs", &opt.jobs,
+			    N_("run up to <n> hooks simultaneously")),
 		OPT_END(),
 	};
 	int ret;
diff --git a/builtin/merge.c b/builtin/merge.c
index 9bd4a2532c..81d7ebbbf6 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -448,7 +448,7 @@ static void finish(struct commit *head_commit,
 		   const struct object_id *new_head, const char *msg)
 {
 	struct strbuf reflog_message = STRBUF_INIT;
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	const struct object_id *head = &head_commit->object.oid;
 
 	if (!msg)
diff --git a/builtin/rebase.c b/builtin/rebase.c
index e7c668c99b..ad118c983c 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -1314,7 +1314,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
 	char *squash_onto_name = NULL;
 	int reschedule_failed_exec = -1;
 	int allow_preemptive_ff = 1;
-	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	struct option builtin_rebase_options[] = {
 		OPT_STRING(0, "onto", &options.onto_name,
 			   N_("revision"),
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index ebec6f3bb1..7460124b74 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -909,7 +909,7 @@ static int run_receive_hook(struct command *commands,
 			    int skip_broken,
 			    const struct string_list *push_options)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	struct receive_hook_feed_context ctx;
 	struct command *iter = commands;
 
@@ -948,7 +948,7 @@ static int run_receive_hook(struct command *commands,
 
 static int run_update_hook(struct command *cmd)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 	strvec_pushl(&opt.args,
 		     cmd->ref_name,
@@ -1432,7 +1432,8 @@ static const char *push_to_checkout(unsigned char *hash,
 				    struct strvec *env,
 				    const char *work_tree)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
+
 	opt.invoked_hook = invoked_hook;
 
 	strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree));
@@ -1628,7 +1629,7 @@ static const char *update(struct command *cmd, struct shallow_info *si)
 static void run_update_post_hook(struct command *commands)
 {
 	struct command *cmd;
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 	for (cmd = commands; cmd; cmd = cmd->next) {
 		if (cmd->error_string || cmd->did_not_exist)
diff --git a/builtin/worktree.c b/builtin/worktree.c
index 330867c19b..3090506790 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -382,7 +382,7 @@ static int add_worktree(const char *path, const char *refname,
 	 * is_junk is cleared, but do return appropriate code when hook fails.
 	 */
 	if (!ret && opts->checkout) {
-		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
 
 		strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL);
 		strvec_pushl(&opt.args,
diff --git a/commit.c b/commit.c
index 842e47beae..a38bd04752 100644
--- a/commit.c
+++ b/commit.c
@@ -1700,7 +1700,7 @@ int run_commit_hook(int editor_is_used, const char *index_file,
 		    int *invoked_hook,
 		    const char *name, ...)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SERIAL;
 	va_list args;
 	const char *arg;
 
diff --git a/hook.c b/hook.c
index 333cfd633a..b8420353fa 100644
--- a/hook.c
+++ b/hook.c
@@ -227,6 +227,28 @@ static int notify_hook_finished(int result,
 	return 0;
 }
 
+/*
+ * Determines how many jobs to use after we know we want to parallelize. First
+ * priority is the config 'hook.jobs' and second priority is the number of CPUs.
+ */
+static int configured_hook_jobs(void)
+{
+	/*
+	 * The config and the CPU count probably won't change during the process
+	 * lifetime, so cache the result in case we invoke multiple hooks during
+	 * one process.
+	 */
+	static int jobs = 0;
+	if (jobs)
+		return jobs;
+
+	if (git_config_get_int("hook.jobs", &jobs))
+		/* if the config isn't set, fall back to CPU count. */
+		jobs = online_cpus();
+
+	return jobs;
+}
+
 int run_hooks(const char *hook_name, struct list_head *hooks,
 		    struct run_hooks_opt *options)
 {
@@ -236,16 +258,18 @@ int run_hooks(const char *hook_name, struct list_head *hooks,
 		.options = options,
 		.invoked_hook = options->invoked_hook,
 	};
-	int jobs = 1;
 
 	if (!options)
 		BUG("a struct run_hooks_opt must be provided to run_hooks");
 
-
 	cb_data.head = hooks;
 	cb_data.run_me = list_first_entry(hooks, struct hook, list);
 
-	run_processes_parallel_tr2(jobs,
+	/* INIT_PARALLEL sets jobs to 0, so go look up how many to use. */
+	if (!options->jobs)
+		options->jobs = configured_hook_jobs();
+
+	run_processes_parallel_tr2(options->jobs,
 				   pick_next_hook,
 				   notify_start_failure,
 				   options->feed_pipe,
@@ -264,7 +288,11 @@ int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options)
 {
 	struct list_head *hooks;
 	int ret = 0;
-	struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT;
+	/*
+	 * Turn on parallelism by default. Hooks which don't want it should
+	 * specify their options accordingly.
+	 */
+	struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 	if (!options)
 		options = &hook_opt_scratch;
diff --git a/hook.h b/hook.h
index 12b56616bb..cd3123d290 100644
--- a/hook.h
+++ b/hook.h
@@ -43,6 +43,13 @@ struct run_hooks_opt
 	/* Args to be passed to each hook */
 	struct strvec args;
 
+	/*
+	 * Number of threads to parallelize across. Set to 0 to use the
+	 * 'hook.jobs' config or, if that config is unset, the number of cores
+	 * on the system.
+	 */
+	int jobs;
+
 	/* Resolve and run the "absolute_path(hook)" instead of
 	 * "hook". Used for "git worktree" hooks
 	 */
@@ -85,11 +92,6 @@ struct run_hooks_opt
 	int *invoked_hook;
 };
 
-#define RUN_HOOKS_OPT_INIT { \
-	.env = STRVEC_INIT, \
-	.args = STRVEC_INIT, \
-}
-
 /*
  * To specify a 'struct string_list', set 'run_hooks_opt.feed_pipe_ctx' to the
  * string_list and set 'run_hooks_opt.feed_pipe' to 'pipe_from_string_list()'.
@@ -111,6 +113,18 @@ struct hook_cb_data {
 	int *invoked_hook;
 };
 
+#define RUN_HOOKS_OPT_INIT_SERIAL { \
+	.jobs = 1, \
+	.env = STRVEC_INIT, \
+	.args = STRVEC_INIT, \
+}
+
+#define RUN_HOOKS_OPT_INIT_PARALLEL { \
+	.jobs = 0, \
+	.env = STRVEC_INIT, \
+	.args = STRVEC_INIT, \
+}
+
 void run_hooks_opt_clear(struct run_hooks_opt *o);
 
 /**
diff --git a/read-cache.c b/read-cache.c
index 90099ca14d..f157e62531 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3068,7 +3068,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
 {
 	int ret;
 	int was_full = !istate->sparse_index;
-	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 	ret = convert_to_sparse(istate);
 
diff --git a/refs.c b/refs.c
index 73d4a93926..5543b8cdab 100644
--- a/refs.c
+++ b/refs.c
@@ -2062,7 +2062,7 @@ int ref_update_reject_duplicates(struct string_list *refnames,
 static int run_transaction_hook(struct ref_transaction *transaction,
 				const char *state)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	struct string_list to_stdin = STRING_LIST_INIT_NODUP;
 	int ret = 0, i;
 
diff --git a/reset.c b/reset.c
index 6499bc5127..1941adb771 100644
--- a/reset.c
+++ b/reset.c
@@ -128,7 +128,8 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action,
 					    reflog_head);
 	}
 	if (run_hook) {
-		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
+
 		strvec_pushl(&opt.args,
 			     oid_to_hex(orig ? orig : null_oid()),
 			     oid_to_hex(oid),
diff --git a/sequencer.c b/sequencer.c
index f451e23d0c..30fbe79b8a 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1148,7 +1148,7 @@ int update_head_with_reflog(const struct commit *old_head,
 static int run_rewrite_hook(const struct object_id *oldoid,
 			    const struct object_id *newoid)
 {
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	struct strbuf tmp = STRBUF_INIT;
 	struct string_list to_stdin = STRING_LIST_INIT_DUP;
 	int code;
@@ -4522,7 +4522,7 @@ static int pick_commits(struct repository *r,
 		if (!stat(rebase_path_rewritten_list(), &st) &&
 				st.st_size > 0) {
 			struct child_process notes_cp = CHILD_PROCESS_INIT;
-			struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
+			struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 
 			notes_cp.in = open(rebase_path_rewritten_list(), O_RDONLY);
 			notes_cp.git_cmd = 1;
diff --git a/transport.c b/transport.c
index 4ca8fc0391..33da71a108 100644
--- a/transport.c
+++ b/transport.c
@@ -1204,7 +1204,7 @@ static int run_pre_push_hook(struct transport *transport,
 			     struct ref *remote_refs)
 {
 	int ret = 0;
-	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_PARALLEL;
 	struct ref *r;
 	struct string_list to_stdin = STRING_LIST_INIT_NODUP;
 
-- 
2.33.0.rc2.250.ged5fa647cd-goog





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux