Re: [PATCH v14 07/13] ref-filter: add support for %(contents:lines=X)

Eric Sunshine <sunshine@xxxxxxxxxxxxxx> · Sun, 30 Aug 2015 03:53:31 -0400

On Sat, Aug 29, 2015 at 10:12 AM, Karthik Nayak <karthik.188@xxxxxxxxx> wrote:
> In 'tag.c' we can print N lines from the annotation of the tag using
> the '-n<num>' option. Copy code from 'tag.c' to 'ref-filter' and
> modify it to support appending of N lines from the annotation of tags
> to the given strbuf.
>
> Implement %(contents:lines=X) where X lines of the given object are
> obtained.
>
> Add documentation and test for the same.
>
> Signed-off-by: Karthik Nayak <karthik.188@xxxxxxxxx>
> ---
> diff --git a/builtin/tag.c b/builtin/tag.c
> index 471d6b1..0fc7557 100644
> --- a/builtin/tag.c
> +++ b/builtin/tag.c
> @@ -63,6 +64,11 @@ struct align {
>         unsigned int width;
>  };
>
> +struct contents {
> +       unsigned int lines;
> +       struct object_id oid;
> +};
> +
>  #define REF_FORMATTING_STATE_INIT  { 0, NULL }
>
>  struct ref_formatting_stack {
> @@ -80,6 +86,7 @@ struct ref_formatting_state {
>  struct atom_value {
>         const char *s;
>         struct align *align;
> +       struct contents *contents;

Same question as for 'align': Does 'contents' need to be
heap-allocated because it must exist beyond the lifetime of
'atom_value'? If not, making it just a plain member of 'atom_value'
would simplify memory management (no need to free it).

Also, will 'align' and 'contents' ever be used at the same time? If
not, it might make sense to place them in a 'union' (not for the
memory saving, but to make it clear to the reader that their use is
mutually exclusive).

More below.

>         void (*handler)(struct atom_value *atomv, struct ref_formatting_state *state);
>         unsigned long ul; /* used for sorting when not FIELD_STR */
>  };
> @@ -569,6 +576,61 @@ static void find_subpos(const char *buf, unsigned long sz,
>         *nonsiglen = *sig - buf;
>  }
>
> +/*
> + * If 'lines' is greater than 0, append that many lines from the given
> + * object_id 'oid' to the given strbuf.
> + */
> +static void append_tag_lines(struct strbuf *out, const struct object_id *oid, int lines)
> +{
> +       int i;
> +       unsigned long size;
> +       enum object_type type;
> +       char *buf, *sp, *eol;
> +       size_t len;
> +
> +       buf = read_sha1_file(oid->hash, &type, &size);
> +       if (!buf)
> +               die_errno("unable to read object %s", oid_to_hex(oid));
> +       if (type != OBJ_COMMIT && type != OBJ_TAG)
> +               goto free_return;
> +       if (!size)
> +               die("an empty %s object %s?",
> +                   typename(type), oid_to_hex(oid));
> +
> +       /* skip header */
> +       sp = strstr(buf, "\n\n");
> +       if (!sp)
> +               goto free_return;
> +
> +       /* only take up to "lines" lines, and strip the signature from a tag */
> +       if (type == OBJ_TAG)
> +               size = parse_signature(buf, size);
> +       for (i = 0, sp += 2; i < lines && sp < buf + size; i++) {
> +               if (i)
> +                       strbuf_addstr(out, "\n    ");
> +               eol = memchr(sp, '\n', size - (sp - buf));
> +               len = eol ? eol - sp : size - (sp - buf);
> +               strbuf_add(out, sp, len);
> +               if (!eol)
> +                       break;
> +               sp = eol + 1;
> +       }
> +free_return:
> +       free(buf);
> +}

I understand that you want to re-use this code from
tag.c:show_tag_lines(), but (from a very cursory read) isn't this
duplicating logic and processing already done elsewhere in
ref-filter.c? More about this below.

> +
> +static void contents_lines_handler(struct atom_value *atomv, struct ref_formatting_state *state)
> +{
> +       struct contents *contents = (struct contents *)atomv->contents;

Why is this cast needed?

> +       struct strbuf s = STRBUF_INIT;
> +
> +       append_tag_lines(&s, &contents->oid, contents->lines);
> +       quote_formatting(&state->stack->output, s.buf, state->quote_style);
> +       strbuf_release(&s);
> +
> +       free(contents);
> +}
> @@ -588,7 +651,8 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct obj
>                     strcmp(name, "contents") &&
>                     strcmp(name, "contents:subject") &&
>                     strcmp(name, "contents:body") &&
> -                   strcmp(name, "contents:signature"))
> +                   strcmp(name, "contents:signature") &&
> +                   !starts_with(name, "contents:lines="))
>                         continue;
>                 if (!subpos)
>                         find_subpos(buf, sz,
> @@ -608,6 +672,15 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct obj
>                         v->s = xmemdupz(sigpos, siglen);
>                 else if (!strcmp(name, "contents"))
>                         v->s = xstrdup(subpos);
> +               else if (skip_prefix(name, "contents:lines=", &valp)) {
> +                       struct contents *contents = xmalloc(sizeof(struct contents));
> +
> +                       if (strtoul_ui(valp, 10, &contents->lines))
> +                               die(_("positive width expected align:%s"), valp);
> +                       hashcpy(contents->oid.hash, obj->sha1);

The logic in append_tag_lines() which was copied from
tag.c:show_tag_lines() goes through the effort of loading the object
and parsing it, but hasn't the object already been loaded and parsed
by the time you get to this point? Assuming I'm reading this
correctly, wouldn't it make more sense to take advantage of the work
already done loading and parsing the object rather than repeating it
all inside append_tag_lines()?

> +                       v->handler = contents_lines_handler;
> +                       v->contents = contents;
> +               }
>         }
>  }
> diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh
> index cef7a41..0277498 100755
> --- a/t/t6302-for-each-ref-filter.sh
> +++ b/t/t6302-for-each-ref-filter.sh
> @@ -166,4 +166,20 @@ test_expect_success 'nested alignment' '
>         test_cmp expect actual
>  '
>
> +test_expect_success 'check `%(contents:lines=X)`' '
> +       cat >expect <<-\EOF &&
> +       master three
> +       side four
> +       odd/spot three
> +       double-tag Annonated doubly
> +       four four
> +       one one
> +       signed-tag A signed tag message
> +       three three
> +       two two
> +       EOF
> +       git for-each-ref --format="%(refname:short) %(contents:lines=1)" >actual &&

Maybe also test some edge cases, such as line=0, lines=-1 (an invalid
value), lines=2, lines=9999999 (a value larger than the number of
lines in any object).

> +       test_cmp expect actual
> +'
> +
>  test_done
> --
> 2.5.0
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html