Add a --format option to ls-tree. It has an existing default output, and then --long and --name-only options to emit the default output along with the objectsize and, or to only emit object paths. Rather than add --type-only, --object-only etc. we can just support a --format using a strbuf_expand() similar to "for-each-ref --format". We might still add such options in the future for convenience. The --format implementation is slower than the existing code, but this change does not cause any performance regressions. We'll leave the existing show_tree() unchanged, and only run show_tree_format() in if a --format different than the hardcoded built-in ones corresponding to the existing modes is provided. "Slower" here can bee seen via the the following "hyperfine" command. This uses GIT_TEST_LS_TREE_FORMAT_BACKEND=<bool> to force the use of the new backend: $ hyperfine -L env false,true -L f "-r,-r -l,-r --name-only,-r --format='%(objectname)'" 'GIT_TEST_LS_TREE_FORMAT_BACKEND={env} ./git -C ~/g/linux ls-tree {f} HEAD' -r 10 Benchmark 1: GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r HEAD Time (mean ± σ): 86.1 ms ± 0.6 ms [User: 65.2 ms, System: 20.9 ms] Range (min … max): 85.2 ms … 87.5 ms 10 runs Benchmark 2: GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r HEAD Time (mean ± σ): 122.5 ms ± 0.6 ms [User: 101.3 ms, System: 21.1 ms] Range (min … max): 121.8 ms … 123.4 ms 10 runs Benchmark 3: GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r -l HEAD Time (mean ± σ): 277.7 ms ± 1.3 ms [User: 234.6 ms, System: 43.0 ms] Range (min … max): 275.9 ms … 279.7 ms 10 runs Benchmark 4: GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r -l HEAD Time (mean ± σ): 332.8 ms ± 2.6 ms [User: 282.0 ms, System: 50.7 ms] Range (min … max): 329.6 ms … 338.2 ms 10 runs Benchmark 5: GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r --name-only HEAD Time (mean ± σ): 71.8 ms ± 0.4 ms [User: 54.1 ms, System: 17.6 ms] Range (min … max): 71.2 ms … 72.5 ms 10 runs Benchmark 6: GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r --name-only HEAD Time (mean ± σ): 86.6 ms ± 0.5 ms [User: 65.7 ms, System: 20.7 ms] Range (min … max): 85.9 ms … 87.4 ms 10 runs Benchmark 7: GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r --format='%(objectname)' HEAD Time (mean ± σ): 85.8 ms ± 0.6 ms [User: 66.2 ms, System: 19.5 ms] Range (min … max): 85.0 ms … 86.9 ms 10 runs Benchmark 8: GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r --format='%(objectname)' HEAD Time (mean ± σ): 85.3 ms ± 0.2 ms [User: 66.6 ms, System: 18.7 ms] Range (min … max): 85.0 ms … 85.7 ms 10 runs Summary 'GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r --name-only HEAD' ran 1.19 ± 0.01 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r --format='%(objectname)' HEAD' 1.19 ± 0.01 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r --format='%(objectname)' HEAD' 1.20 ± 0.01 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r HEAD' 1.21 ± 0.01 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r --name-only HEAD' 1.71 ± 0.01 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r HEAD' 3.87 ± 0.03 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=false ./git -C ~/g/linux ls-tree -r -l HEAD' 4.64 ± 0.05 times faster than 'GIT_TEST_LS_TREE_FORMAT_BACKEND=true ./git -C ~/g/linux ls-tree -r -l HEAD' I.e. something like the "--long" output would be much slower with this, mainly due to how we need to allocate various things to do with quote.c instead of spewing the output directly to stdout. But even a --format='%(objectname)' is fast with the new backend, so this is viable as a replacement for adding new formats, and we'll pay for this added complexity as a one-off, and not again every time a new format needs to be added. See [1] for an example of what it would otherwise take to add an --object-name flag. 1. https://lore.kernel.org/git/2e449d1c792ff81da5f22c8bf65ed33c393d62f8.1639721750.git.dyroneteng@xxxxxxxxx/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> --- builtin/ls-tree.c | 167 +++++++++++++++++++++++++++++++++++++- t/t3105-ls-tree-format.sh | 49 +++++++++++ 2 files changed, 215 insertions(+), 1 deletion(-) create mode 100755 t/t3105-ls-tree-format.sh diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index df8312408da..efd85cab088 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -26,11 +26,34 @@ static struct pathspec pathspec; static int chomp_prefix; static const char *ls_tree_prefix; +/* + * The format equivalents that show_tree() is prepared to handle. + */ +static const char *ls_tree_format_d = "%(objectmode) %(objecttype) %(objectname)%x09%(path)"; +static const char *ls_tree_format_l = "%(objectmode) %(objecttype) %(objectname) %(objectsize:padded)%x09%(path)"; +static const char *ls_tree_format_n = "%(path)"; + static const char * const ls_tree_usage[] = { N_("git ls-tree [<options>] <tree-ish> [<path>...]"), NULL }; +struct read_tree_ls_tree_data { + const char *format; + struct strbuf sb_scratch; + struct strbuf sb_tmp; +}; + +struct expand_ls_tree_data { + unsigned mode; + enum object_type type; + const struct object_id *oid; + const char *pathname; + const char *basebuf; + struct strbuf *sb_scratch; + struct strbuf *sb_tmp; +}; + static int show_recursive(const char *base, size_t baselen, const char *pathname) { int i; @@ -61,6 +84,76 @@ static int show_recursive(const char *base, size_t baselen, const char *pathname return 0; } +static void expand_objectsize(struct strbuf *sb, + const struct object_id *oid, + const enum object_type type, + unsigned int padded) +{ + if (type == OBJ_BLOB) { + unsigned long size; + if (oid_object_info(the_repository, oid, &size) < 0) + die(_("could not get object info about '%s'"), oid_to_hex(oid)); + if (padded) + strbuf_addf(sb, "%7"PRIuMAX, (uintmax_t)size); + else + strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)size); + } else if (padded) { + strbuf_addf(sb, "%7s", "-"); + } else { + strbuf_addstr(sb, "-"); + } +} + +static size_t expand_show_tree(struct strbuf *sb, + const char *start, + void *context) +{ + struct expand_ls_tree_data *data = context; + const char *end; + const char *p; + size_t len; + + len = strbuf_expand_literal_cb(sb, start, NULL); + if (len) + return len; + + if (*start != '(') + die(_("bad format as of '%s'"), start); + end = strchr(start + 1, ')'); + if (!end) + die(_("ls-tree format element '%s' does not end in ')'"), + start); + len = end - start + 1; + + if (skip_prefix(start, "(objectmode)", &p)) { + strbuf_addf(sb, "%06o", data->mode); + } else if (skip_prefix(start, "(objecttype)", &p)) { + strbuf_addstr(sb, type_name(data->type)); + } else if (skip_prefix(start, "(objectsize:padded)", &p)) { + expand_objectsize(sb, data->oid, data->type, 1); + } else if (skip_prefix(start, "(objectsize)", &p)) { + expand_objectsize(sb, data->oid, data->type, 0); + } else if (skip_prefix(start, "(objectname)", &p)) { + strbuf_addstr(sb, find_unique_abbrev(data->oid, abbrev)); + } else if (skip_prefix(start, "(path)", &p)) { + const char *name = data->basebuf; + const char *prefix = chomp_prefix ? ls_tree_prefix : NULL; + + if (prefix) + name = relative_path(name, prefix, data->sb_scratch); + quote_c_style(name, data->sb_tmp, NULL, 0); + strbuf_add(sb, data->sb_tmp->buf, data->sb_tmp->len); + + strbuf_reset(data->sb_tmp); + /* The relative_path() function resets "scratch" */ + } else { + unsigned int errlen = (unsigned long)len; + die(_("bad ls-tree format specifiec %%%.*s"), errlen, start); + } + + return len; +} + static int show_tree_init(enum object_type *type, struct strbuf *base, const char *pathname, unsigned mode, int *retval) { @@ -79,6 +172,38 @@ static int show_tree_init(enum object_type *type, struct strbuf *base, return 0; } +static int show_tree_fmt(const struct object_id *oid, struct strbuf *base, + const char *pathname, unsigned mode, void *context) +{ + struct read_tree_ls_tree_data *data = context; + struct expand_ls_tree_data my_data = { + .mode = mode, + .type = OBJ_BLOB, + .oid = oid, + .pathname = pathname, + .sb_scratch = &data->sb_scratch, + .sb_tmp = &data->sb_tmp, + }; + struct strbuf sb = STRBUF_INIT; + int retval = 0; + size_t baselen; + + if (show_tree_init(&my_data.type, base, pathname, mode, &retval)) + return retval; + + baselen = base->len; + strbuf_addstr(base, pathname); + strbuf_reset(&sb); + my_data.basebuf = base->buf; + + strbuf_expand(&sb, data->format, expand_show_tree, &my_data); + strbuf_addch(&sb, line_termination); + fwrite(sb.buf, sb.len, 1, stdout); + strbuf_setlen(base, baselen); + + return retval; +} + static int show_tree(const struct object_id *oid, struct strbuf *base, const char *pathname, unsigned mode, void *context) { @@ -125,6 +250,12 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) struct object_id oid; struct tree *tree; int i, full_tree = 0; + const char *implicit_format = NULL; + const char *format = NULL; + struct read_tree_ls_tree_data read_tree_cb_data = { + .sb_scratch = STRBUF_INIT, + .sb_tmp = STRBUF_INIT, + }; const struct option ls_tree_options[] = { OPT_BIT('d', NULL, &ls_options, N_("only show trees"), LS_TREE_ONLY), @@ -145,9 +276,12 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) OPT_BOOL(0, "full-tree", &full_tree, N_("list entire tree; not just current directory " "(implies --full-name)")), + OPT_STRING_F(0 , "format", &format, N_("format"), + N_("format to use for the output"), PARSE_OPT_NONEG), OPT__ABBREV(&abbrev), OPT_END() }; + read_tree_fn_t fn = show_tree; git_config(git_default_config, NULL); ls_tree_prefix = prefix; @@ -164,6 +298,18 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) if ( (LS_TREE_ONLY|LS_RECURSIVE) == ((LS_TREE_ONLY|LS_RECURSIVE) & ls_options)) ls_options |= LS_SHOW_TREES; + if (ls_options & LS_NAME_ONLY) + implicit_format = ls_tree_format_n; + if (ls_options & LS_SHOW_SIZE) + implicit_format = ls_tree_format_l; + + if (format && implicit_format) + usage_msg_opt(_("providing --format cannot be combined with other format-altering options"), + ls_tree_usage, ls_tree_options); + if (implicit_format) + format = implicit_format; + if (!format) + format = ls_tree_format_d; if (argc < 1) usage_with_options(ls_tree_usage, ls_tree_options); @@ -186,6 +332,25 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) tree = parse_tree_indirect(&oid); if (!tree) die("not a tree object"); + + /* + * The generic show_tree_fmt() is slower than show_tree(), so + * take the fast path if possible. + */ + if (format && (!strcmp(format, ls_tree_format_d) || + !strcmp(format, ls_tree_format_l) || + !strcmp(format, ls_tree_format_n))) + fn = show_tree; + else if (format) + fn = show_tree_fmt; + /* + * Allow forcing the show_tree_fmt(), to test that it can + * handle the test suite. + */ + if (git_env_bool("GIT_TEST_LS_TREE_FORMAT_BACKEND", 0)) + fn = show_tree_fmt; + + read_tree_cb_data.format = format; return !!read_tree(the_repository, tree, - &pathspec, show_tree, NULL); + &pathspec, fn, &read_tree_cb_data); } diff --git a/t/t3105-ls-tree-format.sh b/t/t3105-ls-tree-format.sh new file mode 100755 index 00000000000..79817260ce8 --- /dev/null +++ b/t/t3105-ls-tree-format.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +test_description='ls-tree --format' + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'ls-tree --format usage' ' + test_expect_code 129 git ls-tree --format=fmt -l && + test_expect_code 129 git ls-tree --format=fmt --name-only && + test_expect_code 129 git ls-tree --format=fmt --name-status +' + +test_expect_success 'setup' ' + mkdir dir && + test_commit dir/sub-file && + test_commit top-file +' + +test_ls_tree_format () { + format=$1 && + opts=$2 && + shift 2 && + git ls-tree $opts -r HEAD >expect.raw && + sed "s/^/> /" >expect <expect.raw && + git ls-tree --format="> $format" -r HEAD >actual && + test_cmp expect actual +} + +test_expect_success 'ls-tree --format=<default-like>' ' + test_ls_tree_format \ + "%(objectmode) %(objecttype) %(objectname)%x09%(path)" \ + "" +' + +test_expect_success 'ls-tree --format=<long-like>' ' + test_ls_tree_format \ + "%(objectmode) %(objecttype) %(objectname) %(objectsize:padded)%x09%(path)" \ + "--long" +' + +test_expect_success 'ls-tree --format=<name-only-like>' ' + test_ls_tree_format \ + "%(path)" \ + "--name-only" + +' + +test_done -- 2.34.1.1119.g7a3fc8778ee