From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> Teach pack-objects command to accept --max-blob-size=<n> argument and use a traverse_commit_list filter-proc to omit unwanted blobs from the resulting packfile. This filter-proc always includes special files matching ".git*" (such as ".gitignore") and blobs smaller than <n>. <n> is a magnitude value and accepts [kmg] suffixes. A value of zero can be used to omit all blobs (except for special files). There are 2 placeholder TODOs in this code to talk about building an omitted-blob list for the client. Signed-off-by: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> --- builtin/pack-objects.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 50e01aa..cdcd4d6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -77,6 +77,8 @@ static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; +static signed long max_blob_size = -1; + /* * stats */ @@ -2519,6 +2521,7 @@ static void read_object_list_from_stdin(void) } #define OBJECT_ADDED (1u<<20) +#define BLOB_OMITTED (1u<<21) static void show_commit(struct commit *commit, void *data) { @@ -2536,6 +2539,70 @@ static void show_object(struct object *obj, const char *name, void *data) obj->flags |= OBJECT_ADDED; } +/* + * Filter blobs by pathname or size. + * Return 1 to mark the blob SEEN so that it will not be reported again. + * Return 0 to allow it to be presented again. + */ +static int filter_blob( + struct object *obj, + const char *pathname, + const char *entryname, + void *data) +{ + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + assert((obj->flags & OBJECT_ADDED) == 0); + assert(max_blob_size >= 0); + + /* + * Always include blobs for special files of the form ".git*". + */ + if ((strncmp(entryname, ".git", 4) == 0) && entryname[4]) { + if (obj->flags & BLOB_OMITTED) { + /* + * TODO + * TODO Remove this blob from the omitted blob list. + * TODO + */ + obj->flags &= ~BLOB_OMITTED; + } + show_object(obj, pathname, data); + return 1; + } + + /* + * We already know the blob is too big because it was previously + * omitted. We still don't want it yet. DO NOT mark it SEEN + * in case it is associated with a ".git*" path in another tree + * or commit. + */ + if (obj->flags & BLOB_OMITTED) + return 0; + + /* + * We only want blobs that are LESS THAN the maximum. + * This allows zero to mean NO BLOBS. + */ + if (max_blob_size > 0) { + unsigned long s; + enum object_type t = sha1_object_info(obj->oid.hash, &s); + assert(t == OBJ_BLOB); + if (s < max_blob_size) { + show_object(obj, pathname, data); + return 1; + } + } + + /* + * TODO + * TODO (Provisionally) add this blob to the omitted blob list. + * TODO + */ + obj->flags |= BLOB_OMITTED; + return 0; +} + static void show_edge(struct commit *commit) { add_preferred_base(commit->object.oid.hash); @@ -2800,7 +2867,12 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(&revs, show_edge); - traverse_commit_list(&revs, show_commit, show_object, NULL); + + if (max_blob_size == -1) + traverse_commit_list(&revs, show_commit, show_object, NULL); + else + traverse_commit_list_filtered(&revs, show_commit, show_object, + filter_blob, NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; @@ -2936,6 +3008,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("use a bitmap index if available to speed up counting objects")), OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index, N_("write a bitmap index together with the pack index")), + OPT_MAGNITUDE(0, "max-blob-size", (unsigned long *)&max_blob_size, + N_("omit large blobs from packfile")), OPT_END(), }; -- 2.9.3