Re: [PATCH v3 4/5] pack-objects: simplify --filter handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 29.11.22 um 14:27 schrieb Ævar Arnfjörð Bjarmason:
>
> On Tue, Nov 29 2022, René Scharfe wrote:
>
> First, I think 3/5 in this series looks really good in this iteration.
>
> I think this is also worth cleaning up, but depending on when we expect
> this to be queued maybe splitting these post-cleanups into their own
> series would make sense?
>
>> pack-objects uses OPT_PARSE_LIST_OBJECTS_FILTER_INIT() to initialize the
>> a rev_info struct lazily before populating its filter member using the
>> --filter option values.  It tracks whether the initialization is needed
>> using the .have_revs member of the callback data.
>>
>> There is a better way: Use a stand-alone list_objects_filter_options
>> struct and build a rev_info struct with its .filter member after option
>> parsing.  This allows using the simpler OPT_PARSE_LIST_OBJECTS_FILTER()
>> and getting rid of the extra callback mechanism.
>>
>> Even simpler would be using a struct rev_info as before 5cb28270a1
>> (pack-objects: lazily set up "struct rev_info", don't leak, 2022-03-28),
>> but that would expose a memory leak caused by repo_init_revisions()
>> followed by release_revisions() without a setup_revisions() call in
>> between.
>>
>> Using list_objects_filter_options also allows pushing the rev_info
>> struct into get_object_list(), where it arguably belongs. Either way,
>> this is all left for later.
>
> I think what you're missing here (or not explaining) is that *the
> reason* for us not being able to return to a pre-5cb28270a1 state is
> that if we initialize with REV_INFO_INIT we'll first of all find that
> the "filter" isn't initialized, because it's not in that macro.

The parent of 5cb28270a1 used repo_init_revisions(), not REV_INFO_INIT
directly, so that macro is not a direct reason.

But you mean that in a world where struct rev_info can be sufficiently
initialized with a macro we could use it without allocations, avoiding
leaks?  And for pack-objects just the filter member is missing to reach
that goal?

> And even if it were there, the repo_init_revisions() will wipe away
> anything we add to it once we call repo_init_revisions(), which we'll
> need to do any "real" work with it.
>
> But if we're really going to refactor this properly let's stop beating
> around the bush and just connect those dots. So first, the only reason
> for why "filter" isn't there yet is because a recent topic would have
> conflicted with it, so if this is cherry-picked after 3/5 we'll have
> that: https://github.com/avar/git/commit/6b29069d72f.patch

That patch looks good.

> Then the below change on top of that could replace this 4/5 and
> 5/5. I.e. also the 5/5 because the only reason we need the
> "opt_lof_init" is because of this init-ing edge case.
>
> 	 builtin/pack-objects.c        | 32 +++++++-------------------------
> 	 list-objects-filter-options.c |  4 ----
> 	 list-objects-filter-options.h | 29 ++++++++++-------------------
> 	 revision.c                    | 19 +++++++++++++------
> 	 revision.h                    |  9 +++++++++
> 	 5 files changed, 39 insertions(+), 54 deletions(-)
>
> 	diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> 	index c702c09dd45..d0c4c26f4e9 100644
> 	--- a/builtin/pack-objects.c
> 	+++ b/builtin/pack-objects.c
> 	@@ -4149,22 +4149,6 @@ static int option_parse_cruft_expiration(const struct option *opt,
> 	 	return 0;
> 	 }
>
> 	-struct po_filter_data {
> 	-	unsigned have_revs:1;
> 	-	struct rev_info revs;
> 	-};
> 	-
> 	-static struct list_objects_filter_options *po_filter_revs_init(void *value)
> 	-{
> 	-	struct po_filter_data *data = value;
> 	-
> 	-	if (!data->have_revs)
> 	-		repo_init_revisions(the_repository, &data->revs, NULL);
> 	-	data->have_revs = 1;
> 	-
> 	-	return &data->revs.filter;
> 	-}
> 	-
> 	 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 {
> 	 	int use_internal_rev_list = 0;
> 	@@ -4175,7 +4159,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 	int rev_list_index = 0;
> 	 	int stdin_packs = 0;
> 	 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
> 	-	struct po_filter_data pfd = { .have_revs = 0 };
> 	+	struct rev_info revs = REV_INFO_INIT;
>
> 	 	struct option pack_objects_options[] = {
> 	 		OPT_SET_INT('q', "quiet", &progress,
> 	@@ -4266,7 +4250,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 			      &write_bitmap_index,
> 	 			      N_("write a bitmap index if possible"),
> 	 			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
> 	-		OPT_PARSE_LIST_OBJECTS_FILTER_INIT(&pfd, po_filter_revs_init),
> 	+		OPT_PARSE_LIST_OBJECTS_FILTER(&revs.filter),
> 	 		OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
> 	 		  N_("handling for missing objects"), PARSE_OPT_NONEG,
> 	 		  option_parse_missing_action),
> 	@@ -4386,7 +4370,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 	if (!rev_list_all || !rev_list_reflog || !rev_list_index)
> 	 		unpack_unreachable_expiration = 0;
>
> 	-	if (pfd.have_revs && pfd.revs.filter.choice) {
> 	+	if (revs.filter.choice) {
> 	 		if (!pack_to_stdout)
> 	 			die(_("cannot use --filter without --stdout"));
> 	 		if (stdin_packs)
> 	@@ -4473,16 +4457,14 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 		read_cruft_objects();
> 	 	} else if (!use_internal_rev_list) {
> 	 		read_object_list_from_stdin();
> 	-	} else if (pfd.have_revs) {
> 	-		get_object_list(&pfd.revs, rp.nr, rp.v);
> 	-		release_revisions(&pfd.revs);
> 	+	} else if (revs.filter.choice) {
> 	+		repo_dynamic_init_revisions(the_repository, &revs, NULL);
> 	+		get_object_list(&revs, rp.nr, rp.v);
> 	 	} else {
> 	-		struct rev_info revs;
> 	-
> 	 		repo_init_revisions(the_repository, &revs, NULL);
> 	 		get_object_list(&revs, rp.nr, rp.v);
> 	-		release_revisions(&revs);
> 	 	}
> 	+	release_revisions(&revs);
> 	 	cleanup_preferred_base();
> 	 	if (include_tag && nr_result)
> 	 		for_each_tag_ref(add_ref_tag, NULL);
> 	diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> 	index 53396602380..ee01bcd2cc3 100644
> 	--- a/list-objects-filter-options.c
> 	+++ b/list-objects-filter-options.c
> 	@@ -290,10 +290,6 @@ int opt_parse_list_objects_filter(const struct option *opt,
> 	 				  const char *arg, int unset)
> 	 {
> 	 	struct list_objects_filter_options *filter_options = opt->value;
> 	-	opt_lof_init init = (opt_lof_init)opt->defval;
> 	-
> 	-	if (init)
> 	-		filter_options = init(opt->value);
>
> 	 	if (unset || !arg)
> 	 		list_objects_filter_set_no_filter(filter_options);
> 	diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
> 	index 7eeadab2dd0..470b86113ac 100644
> 	--- a/list-objects-filter-options.h
> 	+++ b/list-objects-filter-options.h
> 	@@ -108,30 +108,21 @@ void parse_list_objects_filter(
> 	 	const char *arg);
>
> 	 /**
> 	- * The opt->value to opt_parse_list_objects_filter() is either a
> 	- * "struct list_objects_filter_option *" when using
> 	- * OPT_PARSE_LIST_OBJECTS_FILTER().
> 	+ * The opt->value to opt_parse_list_objects_filter() is a "struct
> 	+ * list_objects_filter_option *".
> 	  *
> 	- * Or, if using no "struct option" field is used by the callback,
> 	- * except the "defval" which is expected to be an "opt_lof_init"
> 	- * function, which is called with the "opt->value" and must return a
> 	- * pointer to the ""struct list_objects_filter_option *" to be used.
> 	- *
> 	- * The OPT_PARSE_LIST_OBJECTS_FILTER_INIT() can be used e.g. the
> 	- * "struct list_objects_filter_option" is embedded in a "struct
> 	- * rev_info", which the "defval" could be tasked with lazily
> 	- * initializing. See cmd_pack_objects() for an example.
> 	+ * When it's the "filter" member of a "struct rev_info" make sure to
> 	+ * initialize it first with "REV_INFO_INIT" or repo_init_revisions()
> 	+ * before manipulating the "filter". When using "REV_INFO_INIT" to
> 	+ * lazily initialize a "struct rev_info" use
> 	+ * repo_dynamic_init_revisions() before using that "struct rev_info"
> 	+ * with the revisions API.
> 	  */
> 	 int opt_parse_list_objects_filter(const struct option *opt,
> 	 				  const char *arg, int unset);
> 	-typedef struct list_objects_filter_options *(*opt_lof_init)(void *);
> 	-#define OPT_PARSE_LIST_OBJECTS_FILTER_INIT(fo, init) \
> 	-	{ OPTION_CALLBACK, 0, "filter", (fo), N_("args"), \
> 	-	  N_("object filtering"), 0, opt_parse_list_objects_filter, \
> 	-	  (intptr_t)(init) }
> 	-
> 	 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
> 	-	OPT_PARSE_LIST_OBJECTS_FILTER_INIT((fo), NULL)
> 	+	{ OPTION_CALLBACK, 0, "filter", (fo), N_("args"), \
> 	+	  N_("object filtering"), 0, opt_parse_list_objects_filter }
>
> 	 /*
> 	  * Translates abbreviated numbers in the filter's filter_spec into their
> 	diff --git a/revision.c b/revision.c
> 	index 47817da209a..435c864d7cd 100644
> 	--- a/revision.c
> 	+++ b/revision.c
> 	@@ -1888,13 +1888,10 @@ static int add_parents_only(struct rev_info *revs, const char *arg_, int flags,
> 	 	return 1;
> 	 }
>
> 	-void repo_init_revisions(struct repository *r,
> 	-			 struct rev_info *revs,
> 	-			 const char *prefix)
> 	+void repo_dynamic_init_revisions(struct repository *r,
> 	+				 struct rev_info *revs,
> 	+				 const char *prefix)
> 	 {
> 	-	struct rev_info blank = REV_INFO_INIT;
> 	-	memcpy(revs, &blank, sizeof(*revs));
> 	-
> 	 	revs->repo = r;
> 	 	revs->pruning.repo = r;
> 	 	revs->pruning.add_remove = file_add_remove;
> 	@@ -1914,6 +1911,16 @@ void repo_init_revisions(struct repository *r,
> 	 	init_display_notes(&revs->notes_opt);
> 	 }
>
> 	+void repo_init_revisions(struct repository *r,
> 	+			 struct rev_info *revs,
> 	+			 const char *prefix)
> 	+{
> 	+	struct rev_info blank = REV_INFO_INIT;
> 	+	memcpy(revs, &blank, sizeof(*revs));
> 	+
> 	+	repo_dynamic_init_revisions(r, revs, prefix);
> 	+}
> 	+
> 	 static void add_pending_commit_list(struct rev_info *revs,
> 	 				    struct commit_list *commit_list,
> 	 				    unsigned int flags)
> 	diff --git a/revision.h b/revision.h
> 	index 56c6b012abb..75982288db8 100644
> 	--- a/revision.h
> 	+++ b/revision.h
> 	@@ -417,6 +417,15 @@ struct rev_info {
> 	 void repo_init_revisions(struct repository *r,
> 	 			 struct rev_info *revs,
> 	 			 const char *prefix);
> 	+
> 	+/**
> 	+ * A subset of repo_init_revisions(), assumes that we've already
> 	+ * initialized with REV_INFO_INIT, and possibly manipulated the
> 	+ * members initialized therein.
> 	+ */
> 	+void repo_dynamic_init_revisions(struct repository *r,
> 	+				 struct rev_info *revs,
> 	+				 const char *prefix);
> 	 #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
> 	 #define init_revisions(revs, prefix) repo_init_revisions(the_repository, revs, prefix)
> 	 #endif
>
> Now, that repo_dynamic_init_revisions() is rather nasty, but why is it
> that we need it in the first place? It turns out that this whole
> "dynamic init" business is only needed because we need to set
> "revs.repo" to "the_repository". So this on top passes all tests:
>
> 	diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> 	index d0c4c26f4e9..190e2f8ee0e 100644
> 	--- a/builtin/pack-objects.c
> 	+++ b/builtin/pack-objects.c
> 	@@ -4458,10 +4458,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> 	 	} else if (!use_internal_rev_list) {
> 	 		read_object_list_from_stdin();
> 	 	} else if (revs.filter.choice) {
> 	+		xsetenv("GIT_TEST_RI", "1", 1);
> 	 		repo_dynamic_init_revisions(the_repository, &revs, NULL);
> 	 		get_object_list(&revs, rp.nr, rp.v);
> 	 	} else {
> 	-		repo_init_revisions(the_repository, &revs, NULL);
> 	+		xsetenv("GIT_TEST_RI", "1", 1);
> 	+		repo_dynamic_init_revisions(the_repository, &revs, NULL);
> 	 		get_object_list(&revs, rp.nr, rp.v);
> 	 	}
> 	 	release_revisions(&revs);
> 	diff --git a/revision.c b/revision.c
> 	index 435c864d7cd..cd832499d22 100644
> 	--- a/revision.c
> 	+++ b/revision.c
> 	@@ -1893,6 +1893,9 @@ void repo_dynamic_init_revisions(struct repository *r,
> 	 				 const char *prefix)
> 	 {
> 	 	revs->repo = r;
> 	+	if (getenv("GIT_TEST_RI"))
> 	+		return;
> 	+
> 	 	revs->pruning.repo = r;
> 	 	revs->pruning.add_remove = file_add_remove;
> 	 	revs->pruning.change = file_change;
>
> And of course we can in turn simplify that to something like the
> CHILD_PROCESS_INIT macro we discussed the other day in another thread,
> i.e. just:
>
> 	struct rev_info = REV_INFO_INIT_VA(
> 		.repo = the_repository,
> 	};
>
> So then the whole intermediate repo_dynamic_init_revisions() will also
> go away, and I suspect many current users of repo_init_revisions() can
> be made to just use the macro.

Converting existing users of repo_init_revisions() to partial
initialization using a macro seems like a lot of work to determine if
that subset is sufficient.  Testing alone won't be enough.

A macro that initializes the struct fully would be easier to use.  One
obstacle is that repo and prefix are duplicated into embedded structs
(.repo, .pruning.repo, .grep_filter.repo, .diffopt.repo; .prefix,
.diffopt.prefix).  Another is .diffopt.parseopts, which includes
pointers to other .diffopt members, whose initialization requires the
name of the rev_info variable.

Why do we want to get rid of repo_init_revisions()?  Because it
allocates .diffopt.parseopts and we sometimes forget to release it,
especially in cases where we don't actually need it.  We can avoid
that by leaving the allocations to the code paths that parse diff
options.  That's actually easy (for now?) because only two places
customize the option list and they do it by concatenation.  Patch
below.

> Regarding 5/5: I know I asked in the previous iteration if we couldn't
> split that J.5.7 change from the rest, but the 4/5 here just leaves it
> as a loose end. I.e. we've already made the macro unused, so in 5/5
> we're just cleaning up already dead code.
>
> On reflection, I don't know if splitting it up is worth it after all,
> but if it's going to be made an atomic change it'll surely have to
> happen the other way around.
>
> E.g. now we take an arbtirary "intptr_t", but once you get rid of the
> callback mechanism we stop using it, so we've already done the J.5.7
> change.
>
> Maybe it's not possible to split it up after all. In any case the real
> valuable thing is to split it from the bug fix in 3/5. If it then goes
> away as part of some general refactoring...
>
> I'd also be fine with just having this series as-is at this point,
> depending on your appetite to continue hacking on this.
>
> I've been slowly working towards making the "struct rev_info"
> initialization less crappy, so the changes above could also be made in
> some eventual REV_INFO_INIT series...

Merging the introduction or removal of infrastructure with the first or
last use is sometimes nicer, e.g. if one part is small.  Here I slightly
prefer to keep the different concerns separate, i.e. to not merge patch
4 and 5, because 4 is big enough already and 5 being a trivial dead code
removal is a good thing in my eyes.


diff --git a/builtin/range-diff.c b/builtin/range-diff.c
index e2a74efb42..757010069f 100644
--- a/builtin/range-diff.c
+++ b/builtin/range-diff.c
@@ -47,7 +47,7 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)

 	repo_diff_setup(the_repository, &diffopt);

-	options = parse_options_concat(range_diff_options, diffopt.parseopts);
+	options = diff_parse_options_concat(range_diff_options, &diffopt);
 	argc = parse_options(argc, argv, prefix, options,
 			     builtin_range_diff_usage, PARSE_OPT_KEEP_DASHDASH);

diff --git a/diff-no-index.c b/diff-no-index.c
index 18edbdf4b5..0a240039a0 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -255,8 +255,7 @@ int diff_no_index(struct rev_info *revs,
 	};
 	struct option *options;

-	options = parse_options_concat(no_index_options,
-				       revs->diffopt.parseopts);
+	options = diff_parse_options_concat(no_index_options, &revs->diffopt);
 	argc = parse_options(argc, argv, revs->prefix, options,
 			     diff_no_index_usage, 0);
 	if (argc != 2) {
diff --git a/diff.c b/diff.c
index 1054a4b732..ebf9154eaf 100644
--- a/diff.c
+++ b/diff.c
@@ -4615,8 +4615,6 @@ static void run_checkdiff(struct diff_filepair *p, struct diff_options *o)
 	builtin_checkdiff(name, other, attr_path, p->one, p->two, o);
 }

-static void prep_parse_options(struct diff_options *options);
-
 void repo_diff_setup(struct repository *r, struct diff_options *options)
 {
 	memcpy(options, &default_diff_options, sizeof(*options));
@@ -4662,8 +4660,6 @@ void repo_diff_setup(struct repository *r, struct diff_options *options)

 	options->color_moved = diff_color_moved_default;
 	options->color_moved_ws_handling = diff_color_moved_ws_default;
-
-	prep_parse_options(options);
 }

 static const char diff_status_letters[] = {
@@ -4821,8 +4817,6 @@ void diff_setup_done(struct diff_options *options)
 			options->filter = ~filter_bit[DIFF_STATUS_FILTER_AON];
 		options->filter &= ~options->filter_not;
 	}
-
-	FREE_AND_NULL(options->parseopts);
 }

 int parse_long_opt(const char *opt, const char **argv,
@@ -5419,7 +5413,8 @@ static int diff_opt_rotate_to(const struct option *opt, const char *arg, int uns
 	return 0;
 }

-static void prep_parse_options(struct diff_options *options)
+struct option *diff_parse_options_concat(const struct option *extra,
+					 struct diff_options *options)
 {
 	struct option parseopts[] = {
 		OPT_GROUP(N_("Diff output format options")),
@@ -5689,22 +5684,24 @@ static void prep_parse_options(struct diff_options *options)
 		OPT_END()
 	};

-	ALLOC_ARRAY(options->parseopts, ARRAY_SIZE(parseopts));
-	memcpy(options->parseopts, parseopts, sizeof(parseopts));
+	return parse_options_concat(extra, parseopts);
 }

 int diff_opt_parse(struct diff_options *options,
 		   const char **av, int ac, const char *prefix)
 {
+	struct option *opts = diff_parse_options_concat(NULL, options);
+
 	if (!prefix)
 		prefix = "";

-	ac = parse_options(ac, av, prefix, options->parseopts, NULL,
+	ac = parse_options(ac, av, prefix, opts, NULL,
 			   PARSE_OPT_KEEP_DASHDASH |
 			   PARSE_OPT_KEEP_UNKNOWN_OPT |
 			   PARSE_OPT_NO_INTERNAL_HELP |
 			   PARSE_OPT_ONE_SHOT |
 			   PARSE_OPT_STOP_AT_NON_OPTION);
+	free(opts);

 	return ac;
 }
@@ -6513,7 +6510,6 @@ void diff_free(struct diff_options *options)
 	diff_free_file(options);
 	diff_free_ignore_regex(options);
 	clear_pathspec(&options->pathspec);
-	FREE_AND_NULL(options->parseopts);
 }

 void diff_flush(struct diff_options *options)
diff --git a/diff.h b/diff.h
index fd33caeb25..f2ab97b8b4 100644
--- a/diff.h
+++ b/diff.h
@@ -394,7 +394,6 @@ struct diff_options {
 	unsigned color_moved_ws_handling;

 	struct repository *repo;
-	struct option *parseopts;
 	struct strmap *additional_path_headers;

 	int no_free;
@@ -539,6 +538,8 @@ int git_diff_ui_config(const char *var, const char *value, void *cb);
 #define diff_setup(diffopts) repo_diff_setup(the_repository, diffopts)
 #endif
 void repo_diff_setup(struct repository *, struct diff_options *);
+struct option *diff_parse_options_concat(const struct option *extra,
+					 struct diff_options *options);
 int diff_opt_parse(struct diff_options *, const char **, int, const char *);
 void diff_setup_done(struct diff_options *);
 int git_config_rename(const char *var, const char *value);






[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux