[PATCH v1 5/5] list-objects-filter-options: allow mult. --filter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allow combining of multiple filters by simply repeating the --filter
flag. Before this patch, the user had to combine them in a single flag
somewhat awkwardly (e.g. --filter=combine:FOO+BAR), including
URL-encoding the individual filters.

To make this work, in the --filter flag parsing callback, rather than
error out when we detect that the filter_options struct is already
populated, we modify it in-place to contain the added sub-filter. The
existing sub-filter becomes the lhs of the combined filter, and the
next sub-filter becomes the rhs. We also have to URL-encode the LHS and
RHS sub-filters.

We can simplify the operation if the LHS is already a combine: filter.
In that case, we just append the URL-encoded RHS sub-filter to the LHS
spec to get the new spec.

Signed-off-by: Matthew DeVore <matvore@xxxxxxxxxx>
---
 Documentation/rev-list-options.txt  | 16 +++---
 builtin/fetch-pack.c                |  5 +-
 builtin/rev-list.c                  |  5 +-
 fetch-pack.c                        |  5 +-
 list-objects-filter-options.c       | 83 +++++++++++++++++++++++++----
 list-objects-filter-options.h       |  3 +-
 t/t5616-partial-clone.sh            | 19 +++++++
 t/t6112-rev-list-filters-objects.sh | 38 ++++++++++++-
 transport.c                         |  5 +-
 upload-pack.c                       | 10 +++-
 10 files changed, 164 insertions(+), 25 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 4fb0c4fbb0..2be5f3a6d1 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -731,27 +731,29 @@ specification contained in <path>.
 The form '--filter=tree:<depth>' omits all blobs and trees whose depth
 from the root tree is >= <depth> (minimum depth if an object is located
 at multiple depths in the commits traversed). <depth>=0 will not include
 any trees or blobs unless included explicitly in the command-line (or
 standard input when --stdin is used). <depth>=1 will include only the
 tree and blobs which are referenced directly by a commit reachable from
 <commit> or an explicitly-given object. <depth>=2 is like <depth>=1
 while also including trees and blobs one more level removed from an
 explicitly-given commit or tree.
 +
-The form '--filter=combine:<filter1>+<filter2>+...<filterN>' combines
-several filters. Only objects which are accepted by every filter are
-included. Filters are joined by '{plus}' and individual filters are %-encoded
-(i.e. URL-encoded). Besides the '{plus}' and '%' characters, the following
-characters are reserved and also must be encoded:
-`~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+ as well as all characters with ASCII code
-&lt;= `0x20`, which includes space and newline.
+Multiple '--filter=' flags can be specified to combine filters. Only
+objects which are accepted by every filter are included.
++
+The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be
+used to combine filters. Filters are joined by '{plus}' and individual
+filters are %-encoded (i.e. URL-encoded). Besides the '{plus}' and '%'
+characters, the following characters are reserved and also must be
+encoded: `~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+ as well as all characters
+with ASCII code &lt;= `0x20`, which includes space and newline.
 +
 Other arbitrary characters can also be encoded. For instance,
 'combine:tree:3+blob:none' and 'combine:tree%3A2+blob%3Anone' are
 equivalent.
 
 --no-filter::
 	Turn off any previous `--filter=` argument.
 
 --filter-print-omitted::
 	Only useful with `--filter=`; prints a list of the objects omitted
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index dc1485c8aa..cadcb2b915 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -151,21 +151,24 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 		}
 		if (!strcmp("--from-promisor", arg)) {
 			args.from_promisor = 1;
 			continue;
 		}
 		if (!strcmp("--no-dependents", arg)) {
 			args.no_dependents = 1;
 			continue;
 		}
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
-			parse_list_objects_filter(&args.filter_options, arg);
+			parse_list_objects_filter(
+				&args.filter_options,
+				arg,
+				/*allow_implicit_combine=*/1);
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&args.filter_options);
 			continue;
 		}
 		usage(fetch_pack_usage);
 	}
 	if (deepen_not.nr)
 		args.deepen_not = &deepen_not;
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 9f31837d30..e584e7d1ac 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -454,21 +454,24 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 		if (!strcmp(arg, "--test-bitmap")) {
 			test_bitmap_walk(&revs);
 			return 0;
 		}
 		if (skip_prefix(arg, "--progress=", &arg)) {
 			show_progress = arg;
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
-			parse_list_objects_filter(&filter_options, arg);
+			parse_list_objects_filter(
+				&filter_options,
+				arg,
+				/*allow_implicit_combine=*/1);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
 			    !filter_options.sparse_oid_value)
 				die(_("invalid sparse value '%s'"),
 				    filter_options.filter_spec);
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
diff --git a/fetch-pack.c b/fetch-pack.c
index 3f24d0c8a6..c58fd9148a 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1661,21 +1661,24 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		/*
 		 * The protocol does not support requesting that only the
 		 * wanted objects be sent, so approximate this by setting a
 		 * "blob:none" filter if no filter is already set. This works
 		 * for all object types: note that wanted blobs will still be
 		 * sent because they are directly specified as a "want".
 		 *
 		 * NEEDSWORK: Add an option in the protocol to request that
 		 * only the wanted objects be sent, and implement it.
 		 */
-		parse_list_objects_filter(&args->filter_options, "blob:none");
+		parse_list_objects_filter(
+			&args->filter_options,
+			"blob:none",
+			/*allow_implicit_combine=*/0);
 	}
 
 	if (version != protocol_v2 && !ref) {
 		packet_flush(fd[1]);
 		die(_("no matching remote head"));
 	}
 	if (version == protocol_v2) {
 		if (shallow->nr)
 			BUG("Protocol V2 does not provide shallows at this point in the fetch");
 		memset(&si, 0, sizeof(si));
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 647b2b220e..a0cc87c62b 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,19 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "trace.h"
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
@@ -229,43 +230,107 @@ static int parse_combine_filter(
 
 cleanup:
 	strbuf_list_free(sub_specs);
 	if (result) {
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
-int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
-			      const char *arg)
+static void add_url_encoded(struct strbuf *dest, const char *s)
 {
-	struct strbuf buf = STRBUF_INIT;
-	if (filter_options->choice)
-		die(_("multiple filter-specs cannot be combined"));
-	filter_options->filter_spec = strdup(arg);
-	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
-		die("%s", buf.buf);
+	while (*s) {
+		if (*s <= ' ' || strchr(RESERVED_NON_WS, *s) ||
+			*s == '%' || *s == '+')
+			strbuf_addf(dest, "%%%02X", (int)*s);
+		else
+			strbuf_addf(dest, "%c", *s);
+		s++;
+	}
+}
+
+/*
+ * Returns a new filter-spec string by combining (with combine:) the two
+ * sub-specs. The caller gains ownership of a new string, and lhs and rhs are
+ * not freed.
+ */
+static char *combine_specs(const char *lhs, const char *rhs)
+{
+	struct strbuf combined = STRBUF_INIT;
+	if (starts_with(lhs, "combine:")) {
+		strbuf_addf(&combined, "%s", lhs);
+	} else {
+		strbuf_addf(&combined, "combine:");
+		add_url_encoded(&combined, lhs);
+	}
+	strbuf_addf(&combined, "+");
+
+	add_url_encoded(&combined, rhs);
+	trace_printf("Generated composite filter-spec: %s\n", combined.buf);
+	return strbuf_detach(&combined, NULL);
+}
+
+int parse_list_objects_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	int allow_implicit_combine)
+{
+	struct strbuf errbuf = STRBUF_INIT;
+	if (filter_options->choice) {
+		struct list_objects_filter_options *lhs;
+
+		if (!allow_implicit_combine)
+			die(_("multiple filter-specs cannot be combined"));
+
+		lhs = xcalloc(1, sizeof(*lhs));
+		*lhs = *filter_options;
+		memset(filter_options, 0, sizeof(*filter_options));
+
+		filter_options->lhs = lhs;
+		filter_options->rhs = xcalloc(1, sizeof(*filter_options->rhs));
+		filter_options->choice = LOFC_COMBINE;
+
+		/*
+		 * Build up the filter-spec string using the already-parsed
+		 * portion (the lhs) and the to-be-parsed portion (the rhs).
+		 */
+		filter_options->filter_spec = combine_specs(
+			lhs->filter_spec, arg);
+		FREE_AND_NULL(lhs->filter_spec);
+
+		/*
+		 * The gentle parse function below will populate the rhs of the
+		 * combined filter. But the caller of *this* function sees
+		 * filter_options as the combined filter.
+		 */
+		filter_options = filter_options->rhs;
+	} else {
+		filter_options->filter_spec = strdup(arg);
+	}
+	if (gently_parse_list_objects_filter(filter_options, arg, &errbuf))
+		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
 		return 0;
 	}
 
-	return parse_list_objects_filter(filter_options, arg);
+	return parse_list_objects_filter(
+		filter_options, arg, /*allow_implicit_combine=*/1);
 }
 
 void expand_list_objects_filter_spec(
 	const struct list_objects_filter_options *filter,
 	struct strbuf *expanded_spec)
 {
 	strbuf_init(expanded_spec, strlen(filter->filter_spec));
 	if (filter->choice == LOFC_BLOB_LIMIT)
 		strbuf_addf(expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 6c0f0ecd08..a3ad00fde2 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -55,21 +55,22 @@ struct list_objects_filter_options {
 	/*
 	 * END choice-specific parsed values.
 	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
-	const char *arg);
+	const char *arg,
+	int allow_implicit_combine);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
 
 /*
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 9a8f9886b3..11536f4028 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -201,20 +201,39 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
 	test_line_count = 70 fetched_objects &&
 
 	awk -f print_1.awk fetched_objects |
 	xargs -n1 git -C dst cat-file -t >fetched_types &&
 
 	sort -u fetched_types >unique_types.observed &&
 	test_write_lines blob commit tree >unique_types.expected &&
 	test_cmp unique_types.expected unique_types.observed
 '
 
+test_expect_success 'implicitly construct combine: filter with repeated flags' '
+	GIT_TRACE=$(pwd)/trace git clone --bare \
+		--filter=blob:none --filter=tree:1 \
+		"file://$(pwd)/srv.bare" pc2 &&
+	grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
+		trace &&
+	git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
+
+	# We should have gotten some root trees.
+	grep " $" objects &&
+	# Should not have gotten any non-root trees or blobs.
+	! grep " ." objects &&
+
+	xargs -n 1 git -C pc2 cat-file -t <objects >types &&
+	sort -u types >unique_types.actual &&
+	test_write_lines commit tree >unique_types.expected &&
+	test_cmp unique_types.expected unique_types.actual
+'
+
 test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
 	rm -rf src dst &&
 	git init src &&
 	test_commit -C src x &&
 	test_config -C src uploadpack.allowfilter 1 &&
 	test_config -C src uploadpack.allowanysha1inwant 1 &&
 
 	# Create a tag pointing to a blob.
 	BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
 	git -C src tag myblob "$BLOB" &&
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index ddfacb1a1a..104248c73d 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -357,21 +357,30 @@ test_expect_success 'verify tree:3 includes everything expected' '
 
 test_expect_success 'combine:... for a simple combination' '
 	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
 		>actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
 	# There are also 2 commit objects
-	test_line_count = 5 actual
+	test_line_count = 5 actual &&
+
+	cp actual expected &&
+
+	# Try again using repeated --filter - this is equivalent to a manual
+	# combine with "combine:...+..."
+	git -C r3 rev-list --objects --filter=combine:tree:2 \
+		--filter=blob:none HEAD >actual &&
+
+	test_cmp expected actual
 '
 
 test_expect_success 'combine:... with URL encoding' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
@@ -459,21 +468,46 @@ test_expect_success 'combine:... with more than two sub-filters' '
 	test_line_count = 7 actual &&
 
 	# Try again, this time making sure the last sub-filter is only
 	# URL-decoded once.
 	cp pattern1 pattern1+renamed% &&
 	cp actual expect &&
 
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:path=../pattern1%2brenamed%25 \
 		HEAD >actual &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+
+	# Use the same composite filter again, but with a pattern file name that
+	# requires encoding multiple characters, and use implicit filter
+	# combining.
+	cp pattern1 "p;at%ter+n" &&
+	GIT_TRACE=$(pwd)/trace git -C r3 rev-list --objects \
+		--filter=tree:3 --filter=blob:limit=40 \
+		--filter=sparse:path="../p;at%ter+n" \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Generated composite filter-spec: combine:tree:3+blob:limit=40+sparse:path=../p%3Bat%25ter%2B" \
+		trace &&
+
+	# Repeat the above test, but this time, the characters to encode are in
+	# the LHS of the combined filter.
+	cp pattern1 "^~pattern" &&
+	GIT_TRACE=$(pwd)/trace git -C r3 rev-list --objects \
+		--filter=sparse:path="../^~pattern" \
+		--filter=tree:3 --filter=blob:limit=40 \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Generated composite filter-spec: combine:sparse:path=../%5E%7Epattern+tree:3+blob:limit=40" \
+		trace
 '
 
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
diff --git a/transport.c b/transport.c
index f1fcd2c4b0..63100da143 100644
--- a/transport.c
+++ b/transport.c
@@ -217,21 +217,24 @@ static int set_git_option(struct git_transport_options *opts,
 	} else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) {
 		opts->deepen_relative = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) {
 		opts->from_promisor = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) {
 		opts->no_dependents = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
-		parse_list_objects_filter(&opts->filter_options, value);
+		parse_list_objects_filter(
+			&opts->filter_options,
+			value,
+			/*allow_implicit_combine=*/0);
 		return 0;
 	}
 	return 1;
 }
 
 static int connect_setup(struct transport *transport, int for_push)
 {
 	struct git_transport_data *data = transport->data;
 	int flags = transport->verbose > 0 ? CONNECT_VERBOSE : 0;
 
diff --git a/upload-pack.c b/upload-pack.c
index d2ea5eb20d..e3f2618600 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -877,21 +877,24 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (process_deepen(reader->line, &depth))
 			continue;
 		if (process_deepen_since(reader->line, &deepen_since, &deepen_rev_list))
 			continue;
 		if (process_deepen_not(reader->line, &deepen_not, &deepen_rev_list))
 			continue;
 
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
-			parse_list_objects_filter(&filter_options, arg);
+			parse_list_objects_filter(
+				&filter_options,
+				arg,
+				/*allow_implicit_combine=*/0);
 			continue;
 		}
 
 		if (!skip_prefix(reader->line, "want ", &arg) ||
 		    parse_oid_hex(arg, &oid_buf, &features))
 			die("git upload-pack: protocol error, "
 			    "expected to get object ID, not '%s'", reader->line);
 
 		if (parse_feature_request(features, "deepen-relative"))
 			deepen_relative = 1;
@@ -1296,21 +1299,24 @@ static void process_args(struct packet_reader *request,
 			continue;
 		if (process_deepen_not(arg, &data->deepen_not,
 				       &data->deepen_rev_list))
 			continue;
 		if (!strcmp(arg, "deepen-relative")) {
 			data->deepen_relative = 1;
 			continue;
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
-			parse_list_objects_filter(&filter_options, p);
+			parse_list_objects_filter(
+				&filter_options,
+				p,
+				/*allow_implicit_combine=*/0);
 			continue;
 		}
 
 		if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
 		     allow_sideband_all) &&
 		    !strcmp(arg, "sideband-all")) {
 			data->writer.use_sideband = 1;
 			continue;
 		}
 
-- 
2.21.0




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux