[PATCH 2/5] list-objects-filter: implement filter only:commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Teach list-objects the "only:commits" filter which allows for filtering
out all non-commit and non-annotated tag objects (unless other objects
are explicitly specified by the user). The purpose of this patch is to
allow smaller partial clones.

The name of this filter - only:commits - is a bit inaccurate because it
still allows annotated tags to pass through. I chose it because it was
the only concise name I could think of that was pretty descriptive. I
considered and decided against "tree:none" because the code and
documentation for filters seems to lack the concept of "you're filtering
this, so we'll implicitly filter all referents of this." So "tree:none"
is vague, since some may think it filters blobs too, while some may not.
"only:commits" is specific and makes it easier to match it to a
potential use case.

Signed-off-by: Matthew DeVore <matvore@xxxxxxxxxx>
---
 Documentation/rev-list-options.txt     |  2 ++
 list-objects-filter-options.c          |  4 +++
 list-objects-filter-options.h          |  1 +
 list-objects-filter.c                  | 43 ++++++++++++++++++--------
 t/t5317-pack-objects-filter-objects.sh | 30 ++++++++++++++++++
 t/t6112-rev-list-filters-objects.sh    | 13 ++++++++
 6 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 7b273635d..3a60a490a 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -743,6 +743,8 @@ specification contained in <path>.
 	A debug option to help with future "partial clone" development.
 	This option specifies how missing objects are handled.
 +
+The form '--filter=only:commits' omits all blobs and trees.
++
 The form '--missing=error' requests that rev-list stop with an error if
 a missing object is encountered.  This is the default action.
 +
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index c0e2bd6a0..aaaaae508 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -69,6 +69,10 @@ static int gently_parse_list_objects_filter(
 		filter_options->choice = LOFC_SPARSE_PATH;
 		filter_options->sparse_path_value = strdup(v0);
 		return 0;
+
+	} else if (!strcmp(arg, "only:commits")) {
+		filter_options->choice = LOFC_ONLY_COMMITS;
+		return 0;
 	}
 
 	if (errbuf) {
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 0000a61f8..a68df42c8 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -12,6 +12,7 @@ enum list_objects_filter_choice {
 	LOFC_BLOB_LIMIT,
 	LOFC_SPARSE_OID,
 	LOFC_SPARSE_PATH,
+	LOFC_ONLY_COMMITS,
 	LOFC__COUNT /* must be last */
 };
 
diff --git a/list-objects-filter.c b/list-objects-filter.c
index a0ba78b20..f0a064b4b 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -26,38 +26,39 @@
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
 /*
- * A filter for list-objects to omit ALL blobs from the traversal.
- * And to OPTIONALLY collect a list of the omitted OIDs.
+ * A filter for list-objects to omit ALL blobs from the traversal, and possibly
+ * trees as well.
+ * Can OPTIONALLY collect a list of the omitted OIDs.
  */
-struct filter_blobs_none_data {
+struct filter_none_of_type_data {
+	unsigned omit_trees : 1;
 	struct oidset *omits;
 };
 
-static enum list_objects_filter_result filter_blobs_none(
+static enum list_objects_filter_result filter_none_of_type(
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
 	void *filter_data_)
 {
-	struct filter_blobs_none_data *filter_data = filter_data_;
+	struct filter_none_of_type_data *filter_data = filter_data_;
 
 	switch (filter_situation) {
 	default:
 		die("unknown filter_situation");
 		return LOFR_ZERO;
 
-	case LOFS_BEGIN_TREE:
-		assert(obj->type == OBJ_TREE);
-		/* always include all tree objects */
-		return LOFR_MARK_SEEN | LOFR_DO_SHOW;
-
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		return LOFR_ZERO;
 
+	case LOFS_BEGIN_TREE:
+		assert(obj->type == OBJ_TREE);
+		if (!filter_data->omit_trees)
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
 	case LOFS_BLOB:
-		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
 		if (filter_data->omits)
@@ -72,10 +73,25 @@ static void *filter_blobs_none__init(
 	filter_object_fn *filter_fn,
 	filter_free_fn *filter_free_fn)
 {
-	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
+	struct filter_none_of_type_data *d = xcalloc(1, sizeof(*d));
+	d->omits = omitted;
+
+	*filter_fn = filter_none_of_type;
+	*filter_free_fn = free;
+	return d;
+}
+
+static void* filter_only_commits__init(
+	struct oidset *omitted,
+	struct list_objects_filter_options *filter_options,
+	filter_object_fn *filter_fn,
+	filter_free_fn *filter_free_fn)
+{
+	struct filter_none_of_type_data *d = xcalloc(1, sizeof(*d));
+	d->omit_trees = 1;
 	d->omits = omitted;
 
-	*filter_fn = filter_blobs_none;
+	*filter_fn = filter_none_of_type;
 	*filter_free_fn = free;
 	return d;
 }
@@ -376,6 +392,7 @@ static filter_init_fn s_filters[] = {
 	filter_blobs_limit__init,
 	filter_sparse_oid__init,
 	filter_sparse_path__init,
+	filter_only_commits__init,
 };
 
 void *list_objects_filter__init(
diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh
index 6710c8bc8..600d153f9 100755
--- a/t/t5317-pack-objects-filter-objects.sh
+++ b/t/t5317-pack-objects-filter-objects.sh
@@ -59,6 +59,36 @@ test_expect_success 'verify normal and blob:none packfiles have same commits/tre
 	test_cmp observed expected
 '
 
+test_expect_success 'setup for tests of only:commits' '
+	mkdir r1/subtree &&
+	echo "This is a file in a subtree" > r1/subtree/file &&
+	git -C r1 add subtree/file &&
+	git -C r1 commit -m subtree
+'
+
+test_expect_success 'verify only:commits packfile has no blobs or trees' '
+	git -C r1 pack-objects --rev --stdout --filter=only:commits >commitsonly.pack <<-EOF &&
+	HEAD
+	EOF
+	git -C r1 index-pack ../commitsonly.pack &&
+	git -C r1 verify-pack -v ../commitsonly.pack \
+		| grep -E "tree|blob" \
+		| sort >observed &&
+	test_line_count = 0 observed
+'
+
+test_expect_success 'grab tree directly when using only:commits' '
+	# We should get the tree specified directly but not its blobs or subtrees.
+	git -C r1 pack-objects --rev --stdout --filter=only:commits >commitsonly.pack <<-EOF &&
+	HEAD:
+	EOF
+	git -C r1 index-pack ../commitsonly.pack &&
+	git -C r1 verify-pack -v ../commitsonly.pack \
+		| grep -E "tree|blob" \
+		| sort >observed &&
+	test_line_count = 1 observed
+'
+
 # Test blob:limit=<n>[kmg] filter.
 # We boundary test around the size parameter.  The filter is strictly less than
 # the value, so size 500 and 1000 should have the same results, but 1001 should
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 0a37dd5f9..6dbd9477c 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -196,6 +196,19 @@ test_expect_success 'verify sparse:oid=oid-ish omits top-level files' '
 	test_cmp observed expected
 '
 
+# Test only:commits filter.
+
+test_expect_success 'verify only:commits includes trees in "filtered" output' '
+	git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=only:commits \
+		| awk -f print_1.awk \
+		| sed s/~// \
+		| xargs -n1 git -C r3 cat-file -t \
+		| sort -u >filtered_types &&
+	printf "blob\ntree\n" > expected &&
+	test_cmp filtered_types expected
+'
+
+
 # Delete some loose objects and use rev-list, but WITHOUT any filtering.
 # This models previously omitted objects that we did not receive.
 
-- 
2.18.0.597.ga71716f1ad-goog




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux