Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type

Eric Sunshine <sunshine@xxxxxxxxxxxxxx> · Tue, 7 Apr 2015 16:46:33 -0400

On Sun, Apr 5, 2015 at 2:28 PM, Karthik Nayak <karthik.188@xxxxxxxxx> wrote:
> Update sha1_loose_object_info() to optionally allow it to read
> from a loose object file of unknown/bogus type; as the function
> usually returns the type of the object it read in the form of enum
> for known types, add an optional "typename" field to receive the
> name of the type in textual form and a flag to indicate the reading
> of a loose object file of unknown/bogus type.
>
> Add parse_sha1_header_extended() which acts as a wrapper around
> parse_sha1_header() allowing more information to be obtained.
>
> Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
> unknown/corrupt objects which have a unknown sha1 header size to
> a strbuf structure. This was written by Junio C Hamano but tested
> by me.
>
> Helped-by: Junio C Hamano <gitster@xxxxxxxxx>
> Helped-by: Eric Sunshine <sunshine@xxxxxxxxxxxxxx>
> Signed-off-by: Karthik Nayak <karthik.188@xxxxxxxxx>
> ---
> diff --git a/sha1_file.c b/sha1_file.c
> index 980ce6b..ac8fffc 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
>   * too permissive for what we want to check. So do an anal
>   * object header parse by hand.
>   */
> -int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
> +                              unsigned int flags)
>  {
> -       char type[10];
> -       int i;
> +       struct strbuf typename = STRBUF_INIT;
>         unsigned long size;
> +       int type;
>
>         /*
>          * The type can be at most ten bytes (including the
>          * terminating '\0' that we add), and is followed by
>          * a space.
>          */
> -       i = 0;
>         for (;;) {
>                 char c = *hdr++;
>                 if (c == ' ')
>                         break;
> -               type[i++] = c;
> -               if (i >= sizeof(type))
> -                       return -1;
> +               strbuf_addch(&typename, c);
>         }
> -       type[i] = 0;
>
>         /*
>          * The length must follow immediately, and be in canonical
> @@ -1652,12 +1677,39 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
>                         size = size * 10 + c;
>                 }
>         }
> -       *sizep = size;
> +
> +       type = type_from_string_gently(typename.buf, typename.len, 1);
> +       if (oi->sizep)
> +               *oi->sizep = size;
> +       if (oi->typename)
> +               strbuf_addbuf(oi->typename, &typename);
> +       strbuf_release(&typename);
>
>         /*
> +        * Set type to 0 if its an unknown object and
> +        * we're obtaining the type using '--literally'
> +        * option.
> +        */
> +       if ((flags & LOOKUP_LITERALLY) && (type == -1))
> +               type = 0;
> +       else if (type == -1)
> +               die("invalid object type");
> +       if (oi->typep)
> +               *oi->typep = type;

This unnecessary intermixing of 'type'/'typename' and 'size'
processing makes the code more confusing than it ought to be. Why not
do all the processing related to 'type'/'typename' before the
processing of 'size'?

> +       /*
>          * The length must be followed by a zero byte
>          */
> -       return *hdr ? -1 : type_from_string(type);
> +       return *hdr ? -1 : type;
> +}
> +
> +int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +{
> +       struct object_info oi;
> +
> +       oi.sizep = sizep;
> +       oi.typename = NULL;
> +       oi.typep = NULL;
> +       return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
>  }
>
>  static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html