[PATCH v8 0/6] submodule: parallelize diff

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Original cover letter for context:
https://lore.kernel.org/git/20221011232604.839941-1-calvinwan@xxxxxxxxxx/

This reroll contains stylistic changes suggested by Avar and Phillip,
and includes a range-diff below.

Calvin Wan (6):
  run-command: add duplicate_output_fn to run_processes_parallel_opts
  submodule: strbuf variable rename
  submodule: move status parsing into function
  submodule: refactor is_submodule_modified()
  diff-lib: refactor out diff_change logic
  diff-lib: parallelize run_diff_files for submodules

 Documentation/config/submodule.txt |  12 ++
 diff-lib.c                         | 133 +++++++++++----
 run-command.c                      |  16 +-
 run-command.h                      |  25 +++
 submodule.c                        | 266 ++++++++++++++++++++++++-----
 submodule.h                        |   9 +
 t/helper/test-run-command.c        |  20 +++
 t/t0061-run-command.sh             |  39 +++++
 t/t4027-diff-submodule.sh          |  31 ++++
 t/t7506-status-submodule.sh        |  25 +++
 10 files changed, 497 insertions(+), 79 deletions(-)

Range-diff against v7:
1:  311b1abfbe ! 1:  5d51250c67 run-command: add duplicate_output_fn to run_processes_parallel_opts
    @@ run-command.c: static void pp_init(struct parallel_processes *pp,
      	if (!opts->get_next_task)
      		BUG("you need to specify a get_next_task function");
      
    -+	if (opts->duplicate_output && opts->ungroup)
    -+		BUG("duplicate_output and ungroup are incompatible with each other");
    ++	if (opts->ungroup) {
    ++		if (opts->duplicate_output)
    ++			BUG("duplicate_output and ungroup are incompatible with each other");
    ++	}
     +
      	CALLOC_ARRAY(pp->children, n);
      	if (!opts->ungroup)
    @@ run-command.c: static void pp_buffer_stderr(struct parallel_processes *pp,
     +			} else if (n < 0) {
      				if (errno != EAGAIN)
      					die_errno("read");
    -+			} else {
    -+				if (opts->duplicate_output)
    -+					opts->duplicate_output(&pp->children[i].err,
    -+					       strlen(pp->children[i].err.buf) - n,
    -+					       opts->data,
    -+					       pp->children[i].data);
    ++			} else if (opts->duplicate_output) {
    ++				opts->duplicate_output(&pp->children[i].err,
    ++					pp->children[i].err.len - n,
    ++					opts->data, pp->children[i].data);
     +			}
      		}
      	}
    @@ run-command.h: typedef int (*start_failure_fn)(struct strbuf *out,
     + *
     + * This function is incompatible with "ungroup"
     + */
    -+typedef void (*duplicate_output_fn)(struct strbuf *out,
    -+				    size_t offset,
    -+				    void *pp_cb,
    -+				    void *pp_task_cb);
    ++typedef void (*duplicate_output_fn)(struct strbuf *out, size_t offset,
    ++				    void *pp_cb, void *pp_task_cb);
     +
      /**
       * This callback is called on every child process that finished processing.
    @@ run-command.h: struct run_process_parallel_opts
      	start_failure_fn start_failure;
      
     +	/**
    -+	 * duplicate_output: See duplicate_output_fn() above. This should be
    -+	 * NULL unless process specific output is needed
    ++	 * duplicate_output: See duplicate_output_fn() above. Unless you need
    ++	 * to capture output from child processes, leave this as NULL.
     +	 */
     +	duplicate_output_fn duplicate_output;
     +
    @@ t/helper/test-run-command.c: static int no_job(struct child_process *cp,
     +			void *pp_task_cb UNUSED)
     +{
     +	struct string_list list = STRING_LIST_INIT_DUP;
    ++	struct string_list_item *item;
     +
     +	string_list_split(&list, out->buf + offset, '\n', -1);
    -+	for (size_t i = 0; i < list.nr; i++) {
    -+		if (strlen(list.items[i].string) > 0)
    -+			fprintf(stderr, "duplicate_output: %s\n", list.items[i].string);
    -+	}
    ++	for_each_string_list_item(item, &list)
    ++		fprintf(stderr, "duplicate_output: %s\n", item->string);
     +	string_list_clear(&list, 0);
     +}
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with a
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
2:  d00a18dd84 = 2:  6ded5b6788 submodule: strbuf variable rename
3:  dcda518922 = 3:  0c71cea8cd submodule: move status parsing into function
4:  c6fc5ba13b ! 4:  5c8cc93f9f submodule: refactor is_submodule_modified()
    @@ submodule.c: static int config_update_recurse_submodules = RECURSE_SUBMODULES_OF
      static int initialized_fetch_ref_tips;
      static struct oid_array ref_tips_before_fetch;
      static struct oid_array ref_tips_after_fetch;
    -+static const char *status_porcelain_start_error =
    -+	N_("could not run 'git status --porcelain=2' in submodule %s");
    -+static const char *status_porcelain_fail_error =
    -+	N_("'git status --porcelain=2' failed in submodule %s");
    ++#define STATUS_PORCELAIN_START_ERROR \
    ++	N_("could not run 'git status --porcelain=2' in submodule %s")
    ++#define STATUS_PORCELAIN_FAIL_ERROR \
    ++	N_("'git status --porcelain=2' failed in submodule %s")
      
      /*
       * Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	prepare_status_porcelain(&cp, path, ignore_untracked);
      	if (start_command(&cp))
     -		die(_("Could not run 'git status --porcelain=2' in submodule %s"), path);
    -+		die(_(status_porcelain_start_error), path);
    ++		die(_(STATUS_PORCELAIN_START_ERROR), path);
      
      	fp = xfdopen(cp.out, "r");
      	while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
      
      	if (finish_command(&cp) && !ignore_cp_exit_code)
     -		die(_("'git status --porcelain=2' failed in submodule %s"), path);
    -+		die(_(status_porcelain_fail_error), path);
    ++		die(_(STATUS_PORCELAIN_FAIL_ERROR), path);
      
      	strbuf_release(&buf);
      	return dirty_submodule;
5:  1ea8eae9c9 = 5:  6c2b62abc8 diff-lib: refactor out diff_change logic
6:  0d35fcc38d < -:  ---------- diff-lib: refactor match_stat_with_submodule
7:  fd1eec974d ! 6:  bb25dadbe5 diff-lib: parallelize run_diff_files for submodules
    @@ diff-lib.c: static int check_removed(const struct index_state *istate, const str
     +				     unsigned *ignore_untracked)
      {
      	int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option);
    - 	struct diff_flags orig_flags;
    +-	if (S_ISGITLINK(ce->ce_mode)) {
    +-		struct diff_flags orig_flags = diffopt->flags;
    +-		if (!diffopt->flags.override_submodule_config)
    +-			set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    +-		if (diffopt->flags.ignore_submodules)
    +-			changed = 0;
    +-		else if (!diffopt->flags.ignore_dirty_submodules &&
    +-			 (!changed || diffopt->flags.dirty_submodules))
    ++	struct diff_flags orig_flags;
     +	int defer = 0;
    - 
    - 	if (!S_ISGITLINK(ce->ce_mode))
    --		return changed;
    ++
    ++	if (!S_ISGITLINK(ce->ce_mode))
     +		goto ret;
    - 
    - 	orig_flags = diffopt->flags;
    - 	if (!diffopt->flags.override_submodule_config)
    -@@ diff-lib.c: static int match_stat_with_submodule(struct diff_options *diffopt,
    - 		goto cleanup;
    - 	}
    - 	if (!diffopt->flags.ignore_dirty_submodules &&
    --	    (!changed || diffopt->flags.dirty_submodules))
    --		*dirty_submodule = is_submodule_modified(ce->name,
    ++
    ++	orig_flags = diffopt->flags;
    ++	if (!diffopt->flags.override_submodule_config)
    ++		set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    ++	if (diffopt->flags.ignore_submodules) {
    ++		changed = 0;
    ++		goto cleanup;
    ++	}
    ++	if (!diffopt->flags.ignore_dirty_submodules &&
     +	    (!changed || diffopt->flags.dirty_submodules)) {
     +		if (defer_submodule_status && *defer_submodule_status) {
     +			defer = 1;
     +			*ignore_untracked = diffopt->flags.ignore_untracked_in_submodules;
     +		} else {
    -+			*dirty_submodule = is_submodule_modified(ce->name,
    - 					 diffopt->flags.ignore_untracked_in_submodules);
    + 			*dirty_submodule = is_submodule_modified(ce->name,
    +-								 diffopt->flags.ignore_untracked_in_submodules);
    +-		diffopt->flags = orig_flags;
    ++					 diffopt->flags.ignore_untracked_in_submodules);
     +		}
    -+	}
    - cleanup:
    - 	diffopt->flags = orig_flags;
    + 	}
    ++cleanup:
    ++	diffopt->flags = orig_flags;
     +ret:
     +	if (defer_submodule_status)
     +		*defer_submodule_status = defer;
    @@ diff-lib.c: int run_diff_files(struct rev_info *revs, unsigned int option)
      				       changed, istate, ce))
      			continue;
      	}
    -+	if (submodules.nr > 0) {
    -+		int parallel_jobs;
    -+		if (git_config_get_int("submodule.diffjobs", &parallel_jobs))
    ++	if (submodules.nr) {
    ++		unsigned long parallel_jobs;
    ++		struct string_list_item *item;
    ++
    ++		if (git_config_get_ulong("submodule.diffjobs", &parallel_jobs))
     +			parallel_jobs = 1;
     +		else if (!parallel_jobs)
     +			parallel_jobs = online_cpus();
    -+		else if (parallel_jobs < 0)
    -+			die(_("submodule.diffjobs cannot be negative"));
     +
     +		if (get_submodules_status(&submodules, parallel_jobs))
     +			die(_("submodule status failed"));
    -+		for (size_t i = 0; i < submodules.nr; i++) {
    -+			struct submodule_status_util *util = submodules.items[i].util;
    ++		for_each_string_list_item(item, &submodules) {
    ++			struct submodule_status_util *util = item->util;
     +
     +			if (diff_change_helper(&revs->diffopt, util->newmode,
     +				       util->dirty_submodule, util->changed,
    @@ submodule.c: int submodule_touches_in_range(struct repository *r,
     +	int result;
     +
     +	struct string_list *submodule_names;
    -+
    -+	/* Pending statuses by OIDs */
    -+	struct status_task **oid_status_tasks;
    -+	int oid_status_tasks_nr, oid_status_tasks_alloc;
     +};
     +
      struct submodule_parallel_fetch {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	struct status_task *task = task_cb;
     +
     +	sps->result = 1;
    -+	strbuf_addf(err,
    -+	    _(status_porcelain_start_error),
    -+	    task->path);
    ++	strbuf_addf(err, _(STATUS_PORCELAIN_START_ERROR), task->path);
     +	return 0;
     +}
     +
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +
     +	if (retvalue) {
     +		sps->result = 1;
    -+		strbuf_addf(err,
    -+		    _(status_porcelain_fail_error),
    -+		    task->path);
    ++		strbuf_addf(err, _(STATUS_PORCELAIN_FAIL_ERROR), task->path);
     +	}
     +
     +	parse_status_porcelain_strbuf(&task->out,
-- 
2.39.1.519.gcb327c4b5f-goog




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux