Add array type support for probe events. This allows user to get arraied types from memory address. The array type syntax is TYPE[N] Where TYPE is one of types (u8/16/32/64,s8/16/32/64, x8/16/32/64, symbol, string) and N is a fixed value less than 64. The string array type is a bit different from other types. For other base types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not equal to string. The string type itself represents "char array", but string array type represents "char * array". So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string. Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx> --- Changes in v4: - Fix to use calculated size correctly for field definition. (Thank you Namhyung!) Changes in v2: - Add array description in README file - Fix to init s3 code out of loop. - Fix to proceed code when the last code is OP_ARRAY. - Add string array type and bitfield array type. --- Documentation/trace/kprobetrace.txt | 13 ++++ kernel/trace/trace.c | 3 + kernel/trace/trace_probe.c | 130 +++++++++++++++++++++++++++-------- kernel/trace/trace_probe.h | 14 ++++ kernel/trace/trace_probe_tmpl.h | 63 +++++++++++++++-- 5 files changed, 183 insertions(+), 40 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 1d082f8ffeee..8bf752dfc072 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -65,9 +65,22 @@ in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32' or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and x86-64 uses x64). +These value types can be an array. To record array data, you can add '[N]' +(where N is a fixed number, less than 64) to the base type. +E.g. 'x16[4]' means an array of x16 (2bytes hex) with 4 elements. +Note that the array can be applied to memory type fetchargs, you can not +apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is +wrong, but '+8($stack):x8[8]' is OK.) + String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. +The string array type is a bit different from other types. For other base +types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same +as +0(%di):x32.) But string[1] is not equal to string. The string type itself +represents "char array", but string array type represents "char * array". +So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string. + Bitfield is another special type, which takes 3 parameters, bit-width, bit- offset, and container-size (usually 32). The syntax is; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bcd1fd87082d..b7c6698265e5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4614,7 +4614,8 @@ static const char readme_msg[] = "\t $stack<index>, $stack, $retval, $comm\n" #endif "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" - "\t b<bit-width>@<bit-offset>/<container-size>\n" + "\t b<bit-width>@<bit-offset>/<container-size>,\n" + "\t <type>[<array-size>]\n" #endif " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 9458800f394a..0e185050e492 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -363,9 +363,9 @@ static int __parse_bitfield_probe_arg(const char *bf, int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, unsigned int flags) { - struct fetch_insn *code, *tmp = NULL; - const char *t; - int ret; + struct fetch_insn *code, *scode, *tmp = NULL; + char *t, *t2; + int ret, len; if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); @@ -376,24 +376,42 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, pr_info("Failed to allocate memory for command '%s'.\n", arg); return -ENOMEM; } - t = strchr(parg->comm, ':'); + t = strchr(arg, ':'); if (t) { - arg[t - parg->comm] = '\0'; - t++; + *t = '\0'; + t2 = strchr(++t, '['); + if (t2) { + *t2 = '\0'; + parg->count = simple_strtoul(t2 + 1, &t2, 0); + if (strcmp(t2, "]") || parg->count == 0) + return -EINVAL; + if (parg->count > MAX_ARRAY_LEN) + return -E2BIG; + } } /* * The default type of $comm should be "string", and it can't be * dereferenced. */ if (!t && strcmp(arg, "$comm") == 0) - t = "string"; - parg->type = find_fetch_type(t); + parg->type = find_fetch_type("string"); + else + parg->type = find_fetch_type(t); if (!parg->type) { pr_info("Unsupported type: %s\n", t); return -EINVAL; } parg->offset = *size; - *size += parg->type->size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) + return -ENOMEM; + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, + parg->count); + } code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); if (!code) @@ -413,10 +431,20 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, ret = -EINVAL; goto fail; } - /* Since IMM or COMM must be the 1st insn, this is safe */ - if (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) + if (code->op != FETCH_OP_DEREF || parg->count) { + /* + * IMM and COMM is pointing actual address, those must + * be kept, and if parg->count != 0, this is an array + * of string pointers instead of string address itself. + */ code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + } code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */ + code->size = parg->type->size; parg->dynamic = true; } else if (code->op == FETCH_OP_DEREF) { code->op = FETCH_OP_ST_MEM; @@ -430,12 +458,29 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, code->op = FETCH_OP_ST_RAW; code->size = parg->type->size; } + scode = code; /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); if (ret) goto fail; } + /* Loop(Array) operation */ + if (parg->count) { + if (scode->op != FETCH_OP_ST_MEM && + scode->op != FETCH_OP_ST_STRING) { + pr_info("array only accepts memory or address\n"); + ret = -EINVAL; + goto fail; + } + code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + code->op = FETCH_OP_LP_ARRAY; + code->param = parg->count; + } code++; code->op = FETCH_OP_END; @@ -474,14 +519,17 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) kfree(arg->code); kfree(arg->name); kfree(arg->comm); + kfree(arg->fmt); } +/* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, bool is_return) { - int i; + struct probe_arg *parg; + int i, j; int pos = 0; - const char *fmt, *arg; if (!is_return) { @@ -492,33 +540,49 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; } - /* When len=0, we just calculate the needed length */ -#define LEN_OR_ZERO (len ? len - pos : 0) - pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", - tp->args[i].name, tp->args[i].type->fmt); + parg = tp->args + i; + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name); + if (parg->count) { + pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s", + parg->type->fmt); + for (j = 1; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s", + parg->type->fmt); + pos += snprintf(buf + pos, LEN_OR_ZERO, "}"); + } else + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", + parg->type->fmt); } pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); for (i = 0; i < tp->nr_args; i++) { - if (strcmp(tp->args[i].type->name, "string") == 0) + parg = tp->args + i; + if (parg->count) { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s[%d])"; + else + fmt = ", REC->%s[%d]"; + for (j = 0; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, + fmt, parg->name, j); + } else { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s)"; + else + fmt = ", REC->%s"; pos += snprintf(buf + pos, LEN_OR_ZERO, - ", __get_str(%s)", - tp->args[i].name); - else - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tp->args[i].name); + fmt, parg->name); + } } -#undef LEN_OR_ZERO - /* return the length of print_fmt */ return pos; } +#undef LEN_OR_ZERO int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) { @@ -546,11 +610,15 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, /* Set argument names as fields */ for (i = 0; i < tp->nr_args; i++) { struct probe_arg *parg = &tp->args[i]; - - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - offset + parg->offset, - parg->type->size, + const char *fmt = parg->type->fmttype; + int size = parg->type->size; + + if (parg->fmt) + fmt = parg->fmt; + if (parg->count) + size *= parg->count; + ret = trace_define_field(event_call, fmt, parg->name, + offset + parg->offset, size, parg->type->is_signed, FILTER_OTHER); if (ret) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ff91faf70887..d256a19ee6d1 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -43,6 +43,7 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 +#define MAX_ARRAY_LEN 64 #define MAX_STRING_SIZE PATH_MAX /* Reserved field names */ @@ -78,6 +79,14 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) return (u8 *)ent + get_loc_offs(*dl); } +static nokprobe_inline u32 update_data_loc(u32 loc, int consumed) +{ + u32 maxlen = get_loc_len(loc); + u32 offset = get_loc_offs(loc); + + return make_data_loc(maxlen - consumed, offset + consumed); +} + /* Printing function type */ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *); @@ -100,6 +109,8 @@ enum fetch_op { FETCH_OP_ST_STRING, /* String: .offset, .size */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ + // Stage 5 (loop) op + FETCH_OP_LP_ARRAY, /* Array: .param = loop count */ FETCH_OP_END, }; @@ -189,6 +200,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol); _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype) #define ASSIGN_FETCH_TYPE_END {} +#define MAX_ARRAY_LEN 64 #ifdef CONFIG_KPROBE_EVENTS bool trace_kprobe_on_func_entry(struct trace_event_call *call); @@ -209,8 +221,10 @@ struct probe_arg { struct fetch_insn *code; bool dynamic;/* Dynamic array (string) is used */ unsigned int offset; /* Offset from argument entry */ + unsigned int count; /* Array count */ const char *name; /* Name of this argument */ const char *comm; /* Command of this argument */ + char *fmt; /* Format string if needed */ const struct fetch_type *type; /* Type of this argument */ }; diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 32ae2fc78190..edc09f2cd6b2 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -67,10 +67,15 @@ static nokprobe_inline int process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, void *dest, void *base) { - int ret = 0; + struct fetch_insn *s3 = NULL; + int total = 0, ret = 0, i = 0; + u32 loc = 0; + unsigned long lval = val; +stage2: /* 2nd stage: dereference memory if needed */ while (code->op == FETCH_OP_DEREF) { + lval = val; ret = probe_mem_read(&val, (void *)val + code->offset, sizeof(val)); if (ret) @@ -78,11 +83,15 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; } + s3 = code; +stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { - if (code->op == FETCH_OP_ST_STRING) - return fetch_store_strlen(val + code->offset); - else + if (code->op == FETCH_OP_ST_STRING) { + ret += fetch_store_strlen(val + code->offset); + code++; + goto array; + } else return -EILSEQ; } @@ -94,6 +103,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, probe_mem_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: + loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); break; default: @@ -107,6 +117,29 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; } +array: + /* the last stage: Loop on array */ + if (code->op == FETCH_OP_LP_ARRAY) { + total += ret; + if (++i < code->param) { + code = s3; + if (s3->op != FETCH_OP_ST_STRING) { + dest += s3->size; + val += s3->size; + goto stage3; + } + code--; + val = lval + sizeof(char *); + if (dest) { + dest += sizeof(u32); + *(u32 *)dest = update_data_loc(loc, ret); + } + goto stage2; + } + code++; + ret = total; + } + return code->op == FETCH_OP_END ? ret : -EILSEQ; } @@ -156,12 +189,26 @@ static inline int print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field) { - int i; + void *p; + int i, j; for (i = 0; i < nr_args; i++) { - trace_seq_printf(s, " %s=", args[i].name); - if (!args[i].type->print(s, data + args[i].offset, field)) - return -ENOMEM; + struct probe_arg *a = args + i; + + trace_seq_printf(s, " %s=", a->name); + if (likely(!a->count)) { + if (!a->type->print(s, data + a->offset, field)) + return -ENOMEM; + continue; + } + trace_seq_putc(s, '{'); + p = data + a->offset; + for (j = 0; j < a->count; j++) { + if (!a->type->print(s, p, field)) + return -ENOMEM; + trace_seq_putc(s, j == a->count - 1 ? '}' : ','); + p += a->type->size; + } } return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html