On Thu, Nov 01, 2018 at 06:11:14PM +0000, Ben Dooks wrote: > The variadic argumnet code did not check any of the variadic arguments > as it did not previously know the possible type. Now we have the possible > formatting information stored in the ctype, we can do some checks on the > printf formatting types. > > Signed-off-by: Ben Dooks <ben.dooks@xxxxxxxxxxxxxxx> > --- > Fixes since v1: > - Split out the format-string -> symbol code > - Use symbol_list for the symbols from format parsing > - Changed to follow the new parsing code and ctype use > - Merged the unsigned-int/long types together > > Fixes since v2: > - Check for printf_va_start before checking variadic-list > - Tidy the type code and fix a couple of bugs with %l and %ll > - Fix function names in working through printf arguments. > - Tidy documentation > > Fixes since v3: > - Added positional arguments > - Also added precision and width specifiers > > Notes: > - Awaiting new pointer-list accessing code > - %p still generates an address-space mismatch > - how do we deal with the kernel's attempt to make printk format all types? > > expr: updated handling for formats > --- > evaluate.c | 297 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 297 insertions(+) > > diff --git a/evaluate.c b/evaluate.c > index b96696d..ffb4af6 100644 > --- a/evaluate.c > +++ b/evaluate.c > @@ -2243,6 +2243,297 @@ static struct symbol *evaluate_alignof(struct expression *expr) > return size_t_ctype; > } > > +struct format_type { > + const char *format; > + int (*test)(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff); > + void *data; Does this need to be a void pointer? Isn't it always a 'struct symbol *'? > +}; > + > +struct format_state { > + struct expression *expr; > + unsigned int va_start; > + unsigned int fmt_index; > + unsigned int arg_index; > + unsigned int used_position: 1; > +}; > + > +static int printf_fmt_inttype(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff) > +{ > + struct symbol *type = data; > + *target = type; > + return ctype == type; This will be fine as a first step but it won't work if thz argument has some modifiers. It will be best, I think, to use something like is_int_type() and type_difference. > +} > + > +// todo - comparing pointers isn't easy... > +static int printf_fmt_string(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff) > +{ > + *target = &string_ctype; Same as above for integers. For example what if the arg is a 'const char *'? > + return check_assignment_types(*target, expr, typediff); > +} > + > +static int printf_fmt_pointer(void *data, struct expression **expr, struct symbol *ctype, struct symbol **target, const char **typediff) > +{ > + *target = &ptr_ctype; > + // todo - deal with not caring about address-space // What about: if (is_ptr_type(ctype)) ... > +static struct format_type *parse_printf_get_fmt(char *msg, char **msgout) > +{ > + struct format_type *type; > + char *end; > + int len; > + > + for (end = msg; *end > ' '; end++); I don't understand this line. Please explain. > + *msgout = end+1; > + > + len = (end - msg); > + for (type = printf_fmts; type->format != NULL; type++) > + if (!strncmp(msg, type->format, len)) > + return type; > + > + return NULL; > +} > + > +static int is_printf_flag(char ch) > +{ > + return ch == '0' || ch == '+' || ch == '-' || ch == ' ' || ch == '#'; > +} > + > +static int printf_check_position(char **fmt) > +{ > + char *ptr= *fmt; > + > + while (isdigit(*ptr)) > + ptr++; > + if (*ptr == '$') { > + char *pos = *fmt; > + *fmt = ptr+1; > + return atoi(pos); Use strtol(), please. Also (and related) "%$d" is invalid (or is this checked by parse_format_printf_checkpos() ?). > + } > + return -1; > +} > + > +static void parse_format_printf_checkpos(struct format_state *state, const char *which) > +{ > + if (state->used_position) > + warning(state->expr->pos, > + "format %d: %s: no position specified", > + state->arg_index-1, which); > +} > + > +static int parse_format_printf_argfield(char **fmtptr, struct format_state *state, struct expression_list *head, int *pos, const char *which) > +{ > + struct expression *expr; > + char *fmt = *fmtptr; > + int argpos = -1; > + > + /* check for simple digit-string width/precision specifier first */ > + if (*fmt != '*') { > + while (isdigit(*fmt)) > + fmt++; > + *fmtptr = fmt; > + return 0; > + } > + > + fmt++; > + argpos = printf_check_position(&fmt); > + > + if (argpos > 0) { > + argpos += state->va_start - 1; > + state->used_position = 1; > + } else { > + argpos = (*pos)++; > + state->arg_index++; > + parse_format_printf_checkpos(state, which); > + } > + > + *fmtptr = fmt; > + expr = get_expression_n(head, argpos-1); > + if (!expr) { > + warning(state->expr->pos, "%s: no argument at position %d", which, argpos); > + return 1; > + } > + > + /* todo - verify we got an actual integer argument */ > + return 0; > +} > + > +/* printf format parsing code > + * > + * TODO: > + * - fix up type checking of castable types (such as int vs long vs long long) > + * - validate all arguments specified are also used... > + * - possibly remove return code from this function > + */ > +static int parse_format_printf(const char **fmtstring, struct format_state *state, struct expression_list *head) > +{ > + char buff[strlen(*fmtstring)+1]; > + struct format_type *type; > + struct expression *expr; > + const char *string = *fmtstring; > + char *fmtpost = NULL, *fmt = buff; > + int pos = state->arg_index; > + int error = 0; > + int ret; > + > + /* trivial check for %% */ > + string++; > + if (string[0] == '%') { > + *fmtstring = string; > + return 0; > + } > + > + state->arg_index++; > + state->fmt_index++; > + > + strcpy(buff, string); Is a copy really needed? > + ret = printf_check_position(&fmt); > + if (ret == 0) { > + /* we got an invalid position argument */ > + error++; > + } else if (ret < 0) { > + parse_format_printf_checkpos(state, "position"); > + } else { > + state->used_position = 1; > + pos = ret + state->va_start - 1; > + } > + > + /* get rid of any formatting flag bits */ > + while (is_printf_flag(*fmt)) > + fmt++; > + > + /* now there is the posibility of a width specifier */ > + if (parse_format_printf_argfield(&fmt, state, head, &pos, "width")) > + error++; > + > + /* now we might have the precision specifier */ > + if (*fmt == '.') { > + fmt++; > + if (parse_format_printf_argfield(&fmt, state, head, &pos, "position")) > + error++; > + } > + > + type = parse_printf_get_fmt(fmt, &fmtpost); > + *fmtstring += fmtpost - buff; > + fmtpost[-1] = '\0'; > + > + if (!type && fmt[0] == 'p') > + type = &printf_fmt_ptr_ref; /* probably some extension */ > + > + if (type) { > + struct symbol *ctype, *source, *target = NULL; > + const char *typediff = "different types"; > + int ret; > + > + expr = get_expression_n(head, pos-1); > + if (!expr) > + return -2; > + > + ctype = evaluate_expression(expr); > + if (!ctype) > + return -3; > + > + source = degenerate(expr); > + ret = (type->test)(type->data, &expr, ctype, &target, &typediff); > + > + if (ret == 0) { > + warning(expr->pos, "incorrect type in argument %d (%s)", pos, typediff); > + info(expr->pos, " expected %s", show_typename(target)); > + info(expr->pos, " got %s", show_typename(source)); > + } > + } else { > + warning(expr->pos, "cannot evaluate type '%s'", buff); > + return -1; > + } > + > + return 1; > +} > + > +/* attempt to run through a printf format string and work out the types > + * it specifies. The format is parsed from the __attribute__(format()) > + * in the parser code which stores the positions of the message and arg > + * start in the ctype. > + */ > +static void evaluate_format_printf(struct symbol *fn, struct expression_list *head) Rename 'head' to something like 'args'. > +{ > + struct format_state state = { }; > + struct expression *expr; > + const char *fmt_string = NULL; > + > + expr = get_expression_n(head, fn->ctype.printf_msg-1); > + if (!expr) > + return; > + if (expr->string && expr->string->length) You must first check if (expr->type == EXPR_STRING) > + fmt_string = expr->string->data; > + if (!fmt_string) { > + struct symbol *sym = evaluate_expression(expr); > + > + /* attempt to find initialiser for this */ > + if (sym && sym->initializer && sym->initializer->string) > + fmt_string = sym->initializer->string->data; Same here. But don't want only check literal strings, so this last part is not needed/wrong. > + } > + > + state.expr = expr; > + state.va_start = fn->ctype.printf_va_start; > + state.arg_index = fn->ctype.printf_va_start; > + > + if (!fmt_string) { > + warning(expr->pos, "not a format string?"); > + } else { > + const char *string = fmt_string; > + > + for (; string[0] != '\0'; string++) { > + if (string[0] != '%') > + continue; > + parse_format_printf(&string, &state, head); I tend to think that this would make parse_format_printf() simpler: string = parse_format_printf(string, &state, head); but it's not important. > + } > + } > +} > + > static int evaluate_arguments(struct symbol *fn, struct expression_list *head) > { > struct expression *expr; > @@ -2250,6 +2541,12 @@ static int evaluate_arguments(struct symbol *fn, struct expression_list *head) > struct symbol *argtype; > int i = 1; > > + /* do this first, otherwise the arugment info may get lost or changed > + * later on in the evaluation loop. > + */ Please, add an explanation (which info can be lost or changed and how). I would much much prefer to make the format checks *after* the normal evaluation loop; This would allow to have the args already evaluated (in the sparse usual sense) with their corresponding warnings (for the non-variadic part) coming before the format warnings. > + if (fn->ctype.printf_va_start) > + evaluate_format_printf(fn, head); > + > PREPARE_PTR_LIST(argument_types, argtype); > FOR_EACH_PTR (head, expr) { > struct expression **p = THIS_ADDRESS(expr); Kind regards, -- Luc