In many cases, there's no reason not to allow hooks to execute in parallel. run_processes_parallel() is well-suited - it's a task queue that runs its housekeeping in series, which means users don't need to worry about thread safety on their callback data. True multithreaded execution with the async_* functions isn't necessary here. Synchronous hook execution can be achieved by only allowing 1 job to run at a time. Teach run_hooks() to use that function for simple hooks which don't require stdin or capture of stderr. Signed-off-by: Emily Shaffer <emilyshaffer@xxxxxxxxxx> Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> --- Documentation/config/hook.txt | 4 ++++ Documentation/git-hook.txt | 17 ++++++++++++++++- builtin/am.c | 4 ++-- builtin/checkout.c | 2 +- builtin/clone.c | 2 +- builtin/hook.c | 4 +++- builtin/merge.c | 2 +- builtin/rebase.c | 2 +- builtin/receive-pack.c | 9 +++++---- builtin/worktree.c | 2 +- commit.c | 2 +- hook.c | 36 +++++++++++++++++++++++++++++++---- hook.h | 24 ++++++++++++++++++----- read-cache.c | 2 +- refs.c | 2 +- reset.c | 3 ++- sequencer.c | 4 ++-- transport.c | 2 +- 18 files changed, 94 insertions(+), 29 deletions(-) create mode 100644 Documentation/config/hook.txt diff --git a/Documentation/config/hook.txt b/Documentation/config/hook.txt new file mode 100644 index 0000000000..96d3d6572c --- /dev/null +++ b/Documentation/config/hook.txt @@ -0,0 +1,4 @@ +hook.jobs:: + Specifies how many hooks can be run simultaneously during parallelized + hook execution. If unspecified, defaults to the number of processors on + the current system. diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt index fa68c1f391..8bf82b5dd4 100644 --- a/Documentation/git-hook.txt +++ b/Documentation/git-hook.txt @@ -8,7 +8,8 @@ git-hook - run git hooks SYNOPSIS -------- [verse] -'git hook' run [--to-stdin=<path>] [--ignore-missing] <hook-name> [-- <hook-args>] +'git hook' run [--to-stdin=<path>] [--ignore-missing] [(-j|--jobs) <n>] + <hook-name> [-- <hook-args>] DESCRIPTION ----------- @@ -42,6 +43,20 @@ OPTIONS tools that want to do a blind one-shot run of a hook that may or may not be present. +-j:: +--jobs:: + Only valid for `run`. ++ +Specify how many hooks to run simultaneously. If this flag is not specified, use +the value of the `hook.jobs` config. If the config is not specified, use the +number of CPUs on the current system. Some hooks may be ineligible for +parallelization: for example, 'commit-msg' intends hooks modify the commit +message body and cannot be parallelized. + +CONFIGURATION +------------- +include::config/hook.txt[] + SEE ALSO -------- linkgit:githooks[5] diff --git a/builtin/am.c b/builtin/am.c index 9e3d4d9ab4..c24a27d6a1 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -446,7 +446,7 @@ static void am_destroy(const struct am_state *state) static int run_applypatch_msg_hook(struct am_state *state) { int ret; - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; assert(state->msg); strvec_push(&opt.args, am_path(state, "final-commit")); @@ -467,7 +467,7 @@ static int run_applypatch_msg_hook(struct am_state *state) */ static int run_post_rewrite_hook(const struct am_state *state) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; strvec_push(&opt.args, "rebase"); opt.path_to_stdin = am_path(state, "rewritten"); diff --git a/builtin/checkout.c b/builtin/checkout.c index 6d69b4c011..27166c0bb8 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -107,7 +107,7 @@ struct branch_info { static int post_checkout_hook(struct commit *old_commit, struct commit *new_commit, int changed) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; /* "new_commit" can be NULL when checking out from the index before a commit exists. */ diff --git a/builtin/clone.c b/builtin/clone.c index 27fc05ee51..599e7a7936 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -776,7 +776,7 @@ static int checkout(int submodule_progress) struct tree *tree; struct tree_desc t; int err = 0; - struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_SYNC; if (option_no_checkout) return 0; diff --git a/builtin/hook.c b/builtin/hook.c index 4d39c9e75e..12c9126032 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -22,7 +22,7 @@ static const char * const builtin_hook_run_usage[] = { static int run(int argc, const char **argv, const char *prefix) { int i; - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; int ignore_missing = 0; const char *hook_name; struct list_head *hooks; @@ -32,6 +32,8 @@ static int run(int argc, const char **argv, const char *prefix) N_("exit quietly with a zero exit code if the requested hook cannot be found")), OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"), N_("file to read into hooks' stdin")), + OPT_INTEGER('j', "jobs", &opt.jobs, + N_("run up to <n> hooks simultaneously")), OPT_END(), }; int ret; diff --git a/builtin/merge.c b/builtin/merge.c index 9bd4a2532c..c749c382c3 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -448,7 +448,7 @@ static void finish(struct commit *head_commit, const struct object_id *new_head, const char *msg) { struct strbuf reflog_message = STRBUF_INIT; - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; const struct object_id *head = &head_commit->object.oid; if (!msg) diff --git a/builtin/rebase.c b/builtin/rebase.c index e7c668c99b..fecf248ed9 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1314,7 +1314,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) char *squash_onto_name = NULL; int reschedule_failed_exec = -1; int allow_preemptive_ff = 1; - struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_ASYNC; struct option builtin_rebase_options[] = { OPT_STRING(0, "onto", &options.onto_name, N_("revision"), diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index ebec6f3bb1..b32dcc9000 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -909,7 +909,7 @@ static int run_receive_hook(struct command *commands, int skip_broken, const struct string_list *push_options) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; struct receive_hook_feed_context ctx; struct command *iter = commands; @@ -948,7 +948,7 @@ static int run_receive_hook(struct command *commands, static int run_update_hook(struct command *cmd) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; strvec_pushl(&opt.args, cmd->ref_name, @@ -1432,7 +1432,8 @@ static const char *push_to_checkout(unsigned char *hash, struct strvec *env, const char *work_tree) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; + opt.invoked_hook = invoked_hook; strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree)); @@ -1628,7 +1629,7 @@ static const char *update(struct command *cmd, struct shallow_info *si) static void run_update_post_hook(struct command *commands) { struct command *cmd; - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; for (cmd = commands; cmd; cmd = cmd->next) { if (cmd->error_string || cmd->did_not_exist) diff --git a/builtin/worktree.c b/builtin/worktree.c index 330867c19b..efead564c1 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -382,7 +382,7 @@ static int add_worktree(const char *path, const char *refname, * is_junk is cleared, but do return appropriate code when hook fails. */ if (!ret && opts->checkout) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL); strvec_pushl(&opt.args, diff --git a/commit.c b/commit.c index 842e47beae..0e6e5a5b27 100644 --- a/commit.c +++ b/commit.c @@ -1700,7 +1700,7 @@ int run_commit_hook(int editor_is_used, const char *index_file, int *invoked_hook, const char *name, ...) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_SYNC; va_list args; const char *arg; diff --git a/hook.c b/hook.c index 80e150548c..37f682c6d8 100644 --- a/hook.c +++ b/hook.c @@ -228,6 +228,28 @@ static int notify_hook_finished(int result, return 0; } +/* + * Determines how many jobs to use after we know we want to parallelize. First + * priority is the config 'hook.jobs' and second priority is the number of CPUs. + */ +static int configured_hook_jobs(void) +{ + /* + * The config and the CPU count probably won't change during the process + * lifetime, so cache the result in case we invoke multiple hooks during + * one process. + */ + static int jobs = 0; + if (jobs) + return jobs; + + if (git_config_get_int("hook.jobs", &jobs)) + /* if the config isn't set, fall back to CPU count. */ + jobs = online_cpus(); + + return jobs; +} + int run_hooks(const char *hook_name, struct list_head *hooks, struct run_hooks_opt *options) { @@ -237,16 +259,18 @@ int run_hooks(const char *hook_name, struct list_head *hooks, .options = options, .invoked_hook = options->invoked_hook, }; - int jobs = 1; if (!options) BUG("a struct run_hooks_opt must be provided to run_hooks"); - cb_data.head = hooks; cb_data.run_me = list_first_entry(hooks, struct hook, list); - run_processes_parallel_tr2(jobs, + /* INIT_ASYNC sets jobs to 0, so go look up how many to use. */ + if (!options->jobs) + options->jobs = configured_hook_jobs(); + + run_processes_parallel_tr2(options->jobs, pick_next_hook, notify_start_failure, options->feed_pipe, @@ -265,7 +289,11 @@ int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options) { struct list_head *hooks; int ret = 0; - struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT; + /* + * Turn on parallelism by default. Hooks which don't want it should + * specify their options accordingly. + */ + struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT_ASYNC; if (!options) options = &hook_opt_scratch; diff --git a/hook.h b/hook.h index 7705e6a529..4f90228a0c 100644 --- a/hook.h +++ b/hook.h @@ -43,6 +43,13 @@ struct run_hooks_opt /* Args to be passed to each hook */ struct strvec args; + /* + * Number of threads to parallelize across. Set to 0 to use the + * 'hook.jobs' config or, if that config is unset, the number of cores + * on the system. + */ + int jobs; + /* Resolve and run the "absolute_path(hook)" instead of * "hook". Used for "git worktree" hooks */ @@ -85,11 +92,6 @@ struct run_hooks_opt int *invoked_hook; }; -#define RUN_HOOKS_OPT_INIT { \ - .env = STRVEC_INIT, \ - .args = STRVEC_INIT, \ -} - /* * To specify a 'struct string_list', set 'run_hooks_opt.feed_pipe_ctx' to the * string_list and set 'run_hooks_opt.feed_pipe' to 'pipe_from_string_list()'. @@ -111,6 +113,18 @@ struct hook_cb_data { int *invoked_hook; }; +#define RUN_HOOKS_OPT_INIT_SYNC { \ + .jobs = 1, \ + .env = STRVEC_INIT, \ + .args = STRVEC_INIT, \ +} + +#define RUN_HOOKS_OPT_INIT_ASYNC { \ + .jobs = 0, \ + .env = STRVEC_INIT, \ + .args = STRVEC_INIT, \ +} + void run_hooks_opt_clear(struct run_hooks_opt *o); /** diff --git a/read-cache.c b/read-cache.c index 90099ca14d..fd2bc67667 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3068,7 +3068,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l { int ret; int was_full = !istate->sparse_index; - struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_ASYNC; ret = convert_to_sparse(istate); diff --git a/refs.c b/refs.c index 73d4a93926..305a075746 100644 --- a/refs.c +++ b/refs.c @@ -2062,7 +2062,7 @@ int ref_update_reject_duplicates(struct string_list *refnames, static int run_transaction_hook(struct ref_transaction *transaction, const char *state) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; struct string_list to_stdin = STRING_LIST_INIT_NODUP; int ret = 0, i; diff --git a/reset.c b/reset.c index 6499bc5127..b93fe6a783 100644 --- a/reset.c +++ b/reset.c @@ -128,7 +128,8 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, reflog_head); } if (run_hook) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; + strvec_pushl(&opt.args, oid_to_hex(orig ? orig : null_oid()), oid_to_hex(oid), diff --git a/sequencer.c b/sequencer.c index f451e23d0c..f13a0cbfbe 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1148,7 +1148,7 @@ int update_head_with_reflog(const struct commit *old_head, static int run_rewrite_hook(const struct object_id *oldoid, const struct object_id *newoid) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; struct strbuf tmp = STRBUF_INIT; struct string_list to_stdin = STRING_LIST_INIT_DUP; int code; @@ -4522,7 +4522,7 @@ static int pick_commits(struct repository *r, if (!stat(rebase_path_rewritten_list(), &st) && st.st_size > 0) { struct child_process notes_cp = CHILD_PROCESS_INIT; - struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_ASYNC; notes_cp.in = open(rebase_path_rewritten_list(), O_RDONLY); notes_cp.git_cmd = 1; diff --git a/transport.c b/transport.c index 4ca8fc0391..abf8785885 100644 --- a/transport.c +++ b/transport.c @@ -1204,7 +1204,7 @@ static int run_pre_push_hook(struct transport *transport, struct ref *remote_refs) { int ret = 0; - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_ASYNC; struct ref *r; struct string_list to_stdin = STRING_LIST_INIT_NODUP; -- 2.32.0.605.g8dce9f2422-goog