Re: [PATCH v9 17/17] fsck: report invalid object type-path combinations

Junio C Hamano <gitster@xxxxxxxxx> · Thu, 30 Sep 2021 14:01:19 -0700

Ævar Arnfjörð Bjarmason  <avarab@xxxxxxxxx> writes:

> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index 623f8fc3194..980c26e3b25 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -598,23 +598,30 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data)
>  	struct object *obj;
>  	enum object_type type;
>  	unsigned long size;
> -	void *contents;
> +	unsigned char *contents = NULL;
>  	int eaten;
>  	struct strbuf sb = STRBUF_INIT;
>  	struct object_info oi = OBJECT_INFO_INIT;
> -	int err = 0;
> +	struct object_id real_oid = *null_oid();
> +	int ret;
>  
>  	oi.type_name = &sb;
>  	oi.sizep = &size;
>  	oi.typep = &type;
>  
> -	if (read_loose_object(path, oid, &contents, &oi) < 0)
> -		err = error(_("%s: object corrupt or missing: %s"),
> -			    oid_to_hex(oid), path);
> +	ret = read_loose_object(path, oid, &real_oid, (void **)&contents, &oi);
> +	if (ret < 0) {
> +		if (contents && !oideq(&real_oid, oid))
> +			error(_("%s: hash-path mismatch, found at: %s"),
> +			      oid_to_hex(&real_oid), path);
> +		else
> +			error(_("%s: object corrupt or missing: %s"),
> +			      oid_to_hex(oid), path);
> +	}
>  	if (type < 0)
> -		err = error(_("%s: object is of unknown type '%s': %s"),
> -			    oid_to_hex(oid), sb.buf, path);
> -	if (err) {
> +		ret = error(_("%s: object is of unknown type '%s': %s"),
> +			    oid_to_hex(&real_oid), sb.buf, path);
> +	if (ret < 0) {
>  		errors_found |= ERROR_OBJECT;
>  		goto cleanup;
>  	}

This is immediately touching up what 16/17 has introduced, which is
making it a bit harder to follow than necessary, so let's take the
whole postimage of 16+17.

> static int fsck_loose(const struct object_id *oid, const char *path, void *data)
> {
> 	struct object *obj;
> 	enum object_type type;
> 	unsigned long size;
> 	unsigned char *contents = NULL;
> 	int eaten;
> 	struct strbuf sb = STRBUF_INIT;
> 	struct object_info oi = OBJECT_INFO_INIT;
> 	struct object_id real_oid = *null_oid();
> 	int ret;
> 
> 	oi.type_name = &sb;
> 	oi.sizep = &size;
> 	oi.typep = &type;
> 
> 	ret = read_loose_object(path, oid, &real_oid, (void **)&contents, &oi);
> 	if (ret < 0) {
> 		if (contents && !oideq(&real_oid, oid))
> 			error(_("%s: hash-path mismatch, found at: %s"),
> 			      oid_to_hex(&real_oid), path);
> 		else
> 			error(_("%s: object corrupt or missing: %s"),
> 			      oid_to_hex(oid), path);

We can emit an error() message from either one of these.  contents
may or may not be NULL, ret is negative, and we continue.  Do we
know anything about the value of type at this point?  IOW, will we
get into the body of the next "if (type < 0)" statement to overwrite
ret with -1?

> 	}
> 	if (type < 0)
> 		ret = error(_("%s: object is of unknown type '%s': %s"),
> 			    oid_to_hex(&real_oid), sb.buf, path);
> 	if (ret < 0) {
> 		errors_found |= ERROR_OBJECT;
> 		goto cleanup;

In any case, we'd jump to clean-up if any of the above hold, so we'd
avoid hittign the next BUG().

> 	}
> 
> 	if (!contents && type != OBJ_BLOB)
> 		BUG("read_loose_object streamed a non-blob");
> 
> 	obj = parse_object_buffer(the_repository, oid, type, size,
> 				  contents, &eaten);
> 
> 	if (!obj) {
> 		errors_found |= ERROR_OBJECT;
> 		error(_("%s: object could not be parsed: %s"),
> 		      oid_to_hex(oid), path);
> 		goto cleanup_eaten;
> 	}
> 
> 	obj->flags &= ~(REACHABLE | SEEN);
> 	obj->flags |= HAS_OBJ;
> 	if (fsck_obj(obj, contents, size))
> 		errors_found |= ERROR_OBJECT;
> 
> cleanup_eaten:
> 	if (!eaten)
> 		free(contents);
> cleanup:
> 	strbuf_release(&sb);

In the "goto cleanup" error case above, we haven't done anything
that would have caused the object contents eaten, and contents may
either point at an allocated memory or NULL (in the "hash-path
mismatch" case, we may have contents allocated but nobody has freed
it yet, leaking it).

I am wondering if we initialized "eaten" to false, we can get rid of
one of the two labels we added in this series, which would fix this
leak as well, no?

> 	return 0; /* keep checking other objects, even if we saw an error */
> }