[PATCH v4 3/3] ref-filter: add support for %(contents:size)

Christian Couder <christian.couder@xxxxxxxxx> · Fri, 10 Jul 2020 18:47:39 +0200

It's useful and efficient to be able to get the size of the
contents directly without having to pipe through `wc -c`.

Also the result of the following:

`git for-each-ref --format='%(contents)' refs/heads/my-branch | wc -c`

is off by one as `git for-each-ref` appends a newline character
after the contents, which can be seen by comparing its output
with the output from `git cat-file`.

As with %(contents), %(contents:size) is silently ignored, if a
ref points to something other than a commit or a tag:

```
$ git update-ref refs/mytrees/first HEAD^{tree}
$ git for-each-ref --format='%(contents)' refs/mytrees/first

$ git for-each-ref --format='%(contents:size)' refs/mytrees/first

```

Signed-off-by: Christian Couder <chriscool@xxxxxxxxxxxxx>
---
 Documentation/git-for-each-ref.txt |  3 +++
 ref-filter.c                       |  7 ++++++-
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index b739412c30..2ea71c5f6c 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,9 @@ and `date` to extract the named component.
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
+contents:size::
+	The size in bytes of the commit or tag message.
+
 contents:subject::
 	The first paragraph of the message, which typically is a
 	single line, is taken as the "subject" of the commit or the
diff --git a/ref-filter.c b/ref-filter.c
index 8447cb09be..73d8bfa86d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -127,7 +127,8 @@ static struct used_atom {
 			unsigned int nobracket : 1, push : 1, push_remote : 1;
 		} remote_ref;
 		struct {
-			enum { C_BARE, C_BODY, C_BODY_DEP, C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
+			enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH,
+			       C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
@@ -338,6 +339,8 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 		atom->u.contents.option = C_BARE;
 	else if (!strcmp(arg, "body"))
 		atom->u.contents.option = C_BODY;
+	else if (!strcmp(arg, "size"))
+		atom->u.contents.option = C_LENGTH;
 	else if (!strcmp(arg, "signature"))
 		atom->u.contents.option = C_SIG;
 	else if (!strcmp(arg, "subject"))
@@ -1253,6 +1256,8 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			v->s = copy_subject(subpos, sublen);
 		else if (atom->u.contents.option == C_BODY_DEP)
 			v->s = xmemdupz(bodypos, bodylen);
+		else if (atom->u.contents.option == C_LENGTH)
+			v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos));
 		else if (atom->u.contents.option == C_BODY)
 			v->s = xmemdupz(bodypos, nonsiglen);
 		else if (atom->u.contents.option == C_SIG)
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index e9f468d360..467871ac10 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -52,6 +52,25 @@ test_atom() {
 		sanitize_pgp <actual >actual.clean &&
 		test_cmp expected actual.clean
 	"
+	# Automatically test "contents:size" atom after testing "contents"
+	if test "$2" = "contents"
+	then
+		case "$1" in
+		refs/tags/signed-*)
+			# We cannot use $3 as it expects sanitize_pgp to run
+			expect=$(git cat-file tag $ref | tail -n +6 | wc -c) ;;
+		refs/mytrees/* | refs/myblobs/*)
+			expect='' ;;
+		*)
+			expect=$(printf '%s' "$3" | wc -c) ;;
+		esac
+		# Leave $expect unquoted to lose possible leading whitespaces
+		echo $expect >expected
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2:size" "
+			git for-each-ref --format='%($2:size)' $ref >actual &&
+			test_cmp expected actual
+		"
+	fi
 }
 
 hexlen=$(test_oid hexsz)
-- 
2.27.0.347.gb620d8b0da