Teach list-objects the "only:commits" filter which allows for filtering out all non-commit and non-annotated tag objects (unless other objects are explicitly specified by the user). The purpose of this patch is to allow smaller partial clones. The name of this filter - only:commits - is a bit inaccurate because it still allows annotated tags to pass through. I chose it because it was the only concise name I could think of that was pretty descriptive. I considered and decided against "tree:none" because the code and documentation for filters seems to lack the concept of "you're filtering this, so we'll implicitly filter all referents of this." So "tree:none" is vague, since some may think it filters blobs too, while some may not. "only:commits" is specific and makes it easier to match it to a potential use case. Signed-off-by: Matthew DeVore <matvore@xxxxxxxxxx> --- Documentation/rev-list-options.txt | 2 ++ list-objects-filter-options.c | 4 +++ list-objects-filter-options.h | 1 + list-objects-filter.c | 43 ++++++++++++++++++-------- t/t5317-pack-objects-filter-objects.sh | 30 ++++++++++++++++++ t/t6112-rev-list-filters-objects.sh | 13 ++++++++ 6 files changed, 80 insertions(+), 13 deletions(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 7b273635d..3a60a490a 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -743,6 +743,8 @@ specification contained in <path>. A debug option to help with future "partial clone" development. This option specifies how missing objects are handled. + +The form '--filter=only:commits' omits all blobs and trees. ++ The form '--missing=error' requests that rev-list stop with an error if a missing object is encountered. This is the default action. + diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index c0e2bd6a0..aaaaae508 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -69,6 +69,10 @@ static int gently_parse_list_objects_filter( filter_options->choice = LOFC_SPARSE_PATH; filter_options->sparse_path_value = strdup(v0); return 0; + + } else if (!strcmp(arg, "only:commits")) { + filter_options->choice = LOFC_ONLY_COMMITS; + return 0; } if (errbuf) { diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 0000a61f8..a68df42c8 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -12,6 +12,7 @@ enum list_objects_filter_choice { LOFC_BLOB_LIMIT, LOFC_SPARSE_OID, LOFC_SPARSE_PATH, + LOFC_ONLY_COMMITS, LOFC__COUNT /* must be last */ }; diff --git a/list-objects-filter.c b/list-objects-filter.c index a0ba78b20..f0a064b4b 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -26,38 +26,39 @@ #define FILTER_SHOWN_BUT_REVISIT (1<<21) /* - * A filter for list-objects to omit ALL blobs from the traversal. - * And to OPTIONALLY collect a list of the omitted OIDs. + * A filter for list-objects to omit ALL blobs from the traversal, and possibly + * trees as well. + * Can OPTIONALLY collect a list of the omitted OIDs. */ -struct filter_blobs_none_data { +struct filter_none_of_type_data { + unsigned omit_trees : 1; struct oidset *omits; }; -static enum list_objects_filter_result filter_blobs_none( +static enum list_objects_filter_result filter_none_of_type( enum list_objects_filter_situation filter_situation, struct object *obj, const char *pathname, const char *filename, void *filter_data_) { - struct filter_blobs_none_data *filter_data = filter_data_; + struct filter_none_of_type_data *filter_data = filter_data_; switch (filter_situation) { default: die("unknown filter_situation"); return LOFR_ZERO; - case LOFS_BEGIN_TREE: - assert(obj->type == OBJ_TREE); - /* always include all tree objects */ - return LOFR_MARK_SEEN | LOFR_DO_SHOW; - case LOFS_END_TREE: assert(obj->type == OBJ_TREE); return LOFR_ZERO; + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + if (!filter_data->omit_trees) + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + case LOFS_BLOB: - assert(obj->type == OBJ_BLOB); assert((obj->flags & SEEN) == 0); if (filter_data->omits) @@ -72,10 +73,25 @@ static void *filter_blobs_none__init( filter_object_fn *filter_fn, filter_free_fn *filter_free_fn) { - struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d)); + struct filter_none_of_type_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + + *filter_fn = filter_none_of_type; + *filter_free_fn = free; + return d; +} + +static void* filter_only_commits__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_none_of_type_data *d = xcalloc(1, sizeof(*d)); + d->omit_trees = 1; d->omits = omitted; - *filter_fn = filter_blobs_none; + *filter_fn = filter_none_of_type; *filter_free_fn = free; return d; } @@ -376,6 +392,7 @@ static filter_init_fn s_filters[] = { filter_blobs_limit__init, filter_sparse_oid__init, filter_sparse_path__init, + filter_only_commits__init, }; void *list_objects_filter__init( diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 6710c8bc8..600d153f9 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -59,6 +59,36 @@ test_expect_success 'verify normal and blob:none packfiles have same commits/tre test_cmp observed expected ' +test_expect_success 'setup for tests of only:commits' ' + mkdir r1/subtree && + echo "This is a file in a subtree" > r1/subtree/file && + git -C r1 add subtree/file && + git -C r1 commit -m subtree +' + +test_expect_success 'verify only:commits packfile has no blobs or trees' ' + git -C r1 pack-objects --rev --stdout --filter=only:commits >commitsonly.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../commitsonly.pack && + git -C r1 verify-pack -v ../commitsonly.pack \ + | grep -E "tree|blob" \ + | sort >observed && + test_line_count = 0 observed +' + +test_expect_success 'grab tree directly when using only:commits' ' + # We should get the tree specified directly but not its blobs or subtrees. + git -C r1 pack-objects --rev --stdout --filter=only:commits >commitsonly.pack <<-EOF && + HEAD: + EOF + git -C r1 index-pack ../commitsonly.pack && + git -C r1 verify-pack -v ../commitsonly.pack \ + | grep -E "tree|blob" \ + | sort >observed && + test_line_count = 1 observed +' + # Test blob:limit=<n>[kmg] filter. # We boundary test around the size parameter. The filter is strictly less than # the value, so size 500 and 1000 should have the same results, but 1001 should diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 0a37dd5f9..6dbd9477c 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -196,6 +196,19 @@ test_expect_success 'verify sparse:oid=oid-ish omits top-level files' ' test_cmp observed expected ' +# Test only:commits filter. + +test_expect_success 'verify only:commits includes trees in "filtered" output' ' + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=only:commits \ + | awk -f print_1.awk \ + | sed s/~// \ + | xargs -n1 git -C r3 cat-file -t \ + | sort -u >filtered_types && + printf "blob\ntree\n" > expected && + test_cmp filtered_types expected +' + + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. -- 2.18.0.597.ga71716f1ad-goog