Re: [PATCH 2/5] evaluate: check variadic argument types against formatting info

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 01, 2018 at 06:11:14PM +0000, Ben Dooks wrote:
> The variadic argumnet code did not check any of the variadic arguments
> as it did not previously know the possible type. Now we have the possible
> formatting information stored in the ctype, we can do some checks on the
> printf formatting types.
> 
> Signed-off-by: Ben Dooks <ben.dooks@xxxxxxxxxxxxxxx>
> ---
> Fixes since v1:
> - Split out the format-string -> symbol code
> - Use symbol_list for the symbols from format parsing
> - Changed to follow the new parsing code and ctype use
> - Merged the unsigned-int/long types together
> 
> Fixes since v2:
> - Check for printf_va_start before checking variadic-list
> - Tidy the type code and fix a couple of bugs with %l and %ll
> - Fix function names in working through printf arguments.
> - Tidy documentation
> 
> Fixes since v3:
> - Added positional arguments
> - Also added precision and width specifiers
> 
> Notes:
> - Awaiting new pointer-list accessing code
> - %p still generates an address-space mismatch
> - how do we deal with the kernel's attempt to make printk format all types?
> 
> expr: updated handling for formats
> ---
>  evaluate.c | 297 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 297 insertions(+)
> 
> diff --git a/evaluate.c b/evaluate.c
> index b96696d..ffb4af6 100644
> --- a/evaluate.c
> +++ b/evaluate.c
> @@ -2243,6 +2243,297 @@ static struct symbol *evaluate_alignof(struct expression *expr)
>  	return size_t_ctype;
>  }
>  
> +struct format_type {
> +	const char	*format;
> +	int		(*test)(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff);
> +	void		*data;

Does this need to be a void pointer? Isn't it always a 'struct symbol *'?

> +};
> +
> +struct format_state {
> +	struct expression	*expr;
> +	unsigned int		va_start;
> +	unsigned int		fmt_index;
> +	unsigned int		arg_index;
> +	unsigned int		used_position: 1;
> +};
> +
> +static int printf_fmt_inttype(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff)
> +{
> +	struct symbol *type = data;
> +	*target = type;
> +	return ctype == type;

This will be fine as a first step but it won't work if thz argument
has some modifiers. It will be best, I think, to use something like
is_int_type() and type_difference.

> +}
> +
> +// todo - comparing pointers isn't easy...
> +static int printf_fmt_string(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff)
> +{
> +	*target = &string_ctype;

Same as above for integers. For example what if the arg
is a 'const char *'?

> +	return check_assignment_types(*target, expr, typediff);
> +}
> +
> +static int printf_fmt_pointer(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff)
> +{
> +	*target = &ptr_ctype;
> +	// todo - deal with not caring about address-space //

What about:
	if (is_ptr_type(ctype))
		...

> +static struct format_type *parse_printf_get_fmt(char *msg, char **msgout)
> +{
> +	struct format_type *type;
> +	char *end;
> +	int len;
> +
> +	for (end = msg; *end > ' '; end++);
I don't understand this line. Please explain.

> +	*msgout = end+1;
> +
> +	len = (end - msg);
> +	for (type = printf_fmts; type->format != NULL; type++)
> +		if (!strncmp(msg, type->format, len))
> +			return type;
> +
> +	return NULL;
> +}
> +
> +static int is_printf_flag(char ch)
> +{
> +	return ch == '0' || ch == '+' || ch == '-' || ch == ' ' || ch == '#';
> +}
> +
> +static int printf_check_position(char **fmt)
> +{
> +	char *ptr= *fmt;
> +
> +	while (isdigit(*ptr))
> +		ptr++;
> +	if (*ptr == '$') {
> +		char *pos = *fmt;
> +		*fmt = ptr+1;
> +		return atoi(pos);

Use strtol(), please. Also (and related) "%$d" is invalid (or is
this checked by parse_format_printf_checkpos() ?).

> +	}
> +	return -1;
> +}
> +
> +static void parse_format_printf_checkpos(struct format_state *state, const char *which)
> +{
> +	if (state->used_position)
> +		warning(state->expr->pos,
> +			"format %d: %s: no position specified",
> +			state->arg_index-1, which);
> +}
> +
> +static int parse_format_printf_argfield(char **fmtptr, struct format_state *state, struct expression_list *head, int *pos, const char *which)
> +{
> +	struct expression *expr;
> +	char *fmt = *fmtptr;
> +	int argpos = -1;
> +
> +	/* check for simple digit-string width/precision specifier first */
> +	if (*fmt != '*') {
> +		while (isdigit(*fmt))
> +			fmt++;
> +		*fmtptr = fmt;
> +		return 0;
> +	}
> +
> +	fmt++;
> +	argpos = printf_check_position(&fmt);
> +
> +	if (argpos > 0) {
> +		argpos += state->va_start - 1;
> +		state->used_position = 1;
> +	} else {
> +		argpos = (*pos)++;
> +		state->arg_index++;
> +		parse_format_printf_checkpos(state, which);
> +	}
> +
> +	*fmtptr = fmt;
> +	expr = get_expression_n(head, argpos-1);
> +	if (!expr) {
> +		warning(state->expr->pos, "%s: no argument at position %d", which, argpos);
> +		return 1;
> +	}
> +
> +	/* todo - verify we got an actual integer argument */
> +	return 0;
> +}
> +
> +/* printf format parsing code
> + *
> + * TODO:
> + * - fix up type checking of castable types (such as int vs long vs long long)
> + * - validate all arguments specified are also used...
> + * - possibly remove return code from this function
> + */
> +static int parse_format_printf(const char **fmtstring, struct format_state *state, struct expression_list *head)
> +{
> +	char buff[strlen(*fmtstring)+1];
> +	struct format_type *type;
> +	struct expression *expr;
> +	const char *string = *fmtstring;
> +	char *fmtpost = NULL, *fmt = buff;
> +	int pos = state->arg_index;
> +	int error = 0;
> +	int ret;
> +
> +	/* trivial check for %% */
> +	string++;
> +	if (string[0] == '%') {
> +		*fmtstring = string;
> +		return 0;
> +	}
> +
> +	state->arg_index++;
> +	state->fmt_index++;
> +
> +	strcpy(buff, string);
Is a copy really needed?

> +	ret = printf_check_position(&fmt);
> +	if (ret == 0) {
> +		/* we got an invalid position argument */
> +		error++;
> +	} else if (ret < 0) {
> +		parse_format_printf_checkpos(state, "position");
> +	} else {
> +		state->used_position = 1;
> +		pos = ret + state->va_start - 1;
> +	}
> +
> +	/* get rid of any formatting flag bits */
> +	while (is_printf_flag(*fmt))
> +		fmt++;
> +
> +	/* now there is the posibility of a width specifier */
> +	if (parse_format_printf_argfield(&fmt, state, head, &pos, "width"))
> +		error++;
> +
> +	/* now we might have the precision specifier */
> +	if (*fmt == '.') {
> +		fmt++;
> +		if (parse_format_printf_argfield(&fmt, state, head, &pos, "position"))
> +			error++;
> +	}
> +
> +	type = parse_printf_get_fmt(fmt, &fmtpost);
> +	*fmtstring += fmtpost - buff;
> +	fmtpost[-1] = '\0';
> +
> +	if (!type && fmt[0] == 'p')
> +		type = &printf_fmt_ptr_ref;	/* probably some extension */
> +
> +	if (type) {
> +		struct symbol *ctype, *source, *target = NULL;
> +		const char *typediff = "different types";
> +		int ret;
> +
> +		expr = get_expression_n(head, pos-1);
> +		if (!expr)
> +			return -2;
> +
> +		ctype = evaluate_expression(expr);
> +		if (!ctype)
> +			return -3;
> +
> +		source = degenerate(expr);
> +		ret = (type->test)(type->data, &expr, ctype, &target, &typediff);
> +
> +		if (ret == 0) {
> +			warning(expr->pos, "incorrect type in argument %d (%s)", pos, typediff);
> +			info(expr->pos, "   expected %s", show_typename(target));
> +			info(expr->pos, "   got %s", show_typename(source));
> +		}
> +	} else {
> +		warning(expr->pos, "cannot evaluate type '%s'", buff);
> +		return -1;
> +	}
> +
> +	return 1;
> +}
> +
> +/* attempt to run through a printf format string and work out the types
> + * it specifies. The format is parsed from the __attribute__(format())
> + * in the parser code which stores the positions of the message and arg
> + * start in the ctype.
> + */
> +static void evaluate_format_printf(struct symbol *fn, struct expression_list *head)

Rename 'head' to something like 'args'.

> +{
> +	struct format_state state = { };
> +	struct expression *expr;
> +	const char *fmt_string = NULL;
> +
> +	expr = get_expression_n(head, fn->ctype.printf_msg-1);
> +	if (!expr)
> +		return;
> +	if (expr->string && expr->string->length)
You must first check if (expr->type == EXPR_STRING)

> +		fmt_string = expr->string->data;
> +	if (!fmt_string) {
> +		struct symbol *sym = evaluate_expression(expr);
> +
> +		/* attempt to find initialiser for this */
> +		if (sym && sym->initializer && sym->initializer->string)
> +			fmt_string = sym->initializer->string->data;
Same here.
But don't want only check literal strings, so this last part
is not needed/wrong.

> +	}
> +
> +	state.expr = expr;
> +	state.va_start = fn->ctype.printf_va_start;
> +	state.arg_index = fn->ctype.printf_va_start;
> +
> +	if (!fmt_string) {
> +		warning(expr->pos, "not a format string?");
> +	} else {
> +		const char *string = fmt_string;
> +
> +		for (; string[0] != '\0'; string++) {
> +			if (string[0] != '%')
> +				continue;
> +			parse_format_printf(&string, &state, head);

I tend to think that this would make parse_format_printf() simpler:
			string = parse_format_printf(string, &state, head);
but it's not important.

> +		}
> +	}
> +}
> +
>  static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
>  {
>  	struct expression *expr;
> @@ -2250,6 +2541,12 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head)
>  	struct symbol *argtype;
>  	int i = 1;
>  
> +	/* do this first, otherwise the arugment info may get lost or changed
> +	 * later on in the evaluation loop.
> +	 */
Please, add an explanation (which info can be lost or changed and how).
I would much much prefer to make the format checks *after* the normal
evaluation loop; This would allow to have the args already evaluated
(in the sparse usual sense) with their corresponding warnings (for the
non-variadic part) coming before the format warnings.

> +	if (fn->ctype.printf_va_start)
> +		evaluate_format_printf(fn, head);
> +
>  	PREPARE_PTR_LIST(argument_types, argtype);
>  	FOR_EACH_PTR (head, expr) {
>  		struct expression **p = THIS_ADDRESS(expr);

Kind regards,
-- Luc



[Index of Archives]     [Newbies FAQ]     [LKML]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Trinity Fuzzer Tool]

  Powered by Linux