Re: [PATCH bpf-next 05/10] bpf: implement accurate raw_tp context access via BTF

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/7/19 5:35 PM, Andrii Nakryiko wrote:
> On Fri, Oct 4, 2019 at 10:04 PM Alexei Starovoitov <ast@xxxxxxxxxx> wrote:
>>
>> libbpf analyzes bpf C program, searches in-kernel BTF for given type name
>> and stores it into expected_attach_type.
>> The kernel verifier expects this btf_id to point to something like:
>> typedef void (*btf_trace_kfree_skb)(void *, struct sk_buff *skb, void *loc);
>> which represents signature of raw_tracepoint "kfree_skb".
>>
>> Then btf_ctx_access() matches ctx+0 access in bpf program with 'skb'
>> and 'ctx+8' access with 'loc' arguments of "kfree_skb" tracepoint.
>> In first case it passes btf_id of 'struct sk_buff *' back to the verifier core
>> and 'void *' in second case.
>>
>> Then the verifier tracks PTR_TO_BTF_ID as any other pointer type.
>> Like PTR_TO_SOCKET points to 'struct bpf_sock',
>> PTR_TO_TCP_SOCK points to 'struct bpf_tcp_sock', and so on.
>> PTR_TO_BTF_ID points to in-kernel structs.
>> If 1234 is btf_id of 'struct sk_buff' in vmlinux's BTF
>> then PTR_TO_BTF_ID#1234 points to one of in kernel skbs.
>>
>> When PTR_TO_BTF_ID#1234 is dereferenced (like r2 = *(u64 *)r1 + 32)
>> the btf_struct_access() checks which field of 'struct sk_buff' is
>> at offset 32. Checks that size of access matches type definition
>> of the field and continues to track the dereferenced type.
>> If that field was a pointer to 'struct net_device' the r2's type
>> will be PTR_TO_BTF_ID#456. Where 456 is btf_id of 'struct net_device'
>> in vmlinux's BTF.
>>
>> Such verifier anlaysis prevents "cheating" in BPF C program.
> 
> typo: analysis

I did ran spellcheck, but couldn't interpret its input :)

> 
>> The program cannot cast arbitrary pointer to 'struct sk_buff *'
>> and access it. C compiler would allow type cast, of course,
>> but the verifier will notice type mismatch based on BPF assembly
>> and in-kernel BTF.
>>
>> Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
>> ---
>>   include/linux/bpf.h          |  15 ++-
>>   include/linux/bpf_verifier.h |   2 +
>>   kernel/bpf/btf.c             | 179 +++++++++++++++++++++++++++++++++++
>>   kernel/bpf/verifier.c        |  69 +++++++++++++-
>>   kernel/trace/bpf_trace.c     |   2 +-
>>   5 files changed, 262 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 5b9d22338606..2dc3a7c313e9 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -281,6 +281,7 @@ enum bpf_reg_type {
>>          PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
>>          PTR_TO_TP_BUFFER,        /* reg points to a writable raw tp's buffer */
>>          PTR_TO_XDP_SOCK,         /* reg points to struct xdp_sock */
>> +       PTR_TO_BTF_ID,
> 
> comments for consistency? ;)

fixed

>>   };
>>
>>   /* The information passed from prog-specific *_is_valid_access
>> @@ -288,7 +289,11 @@ enum bpf_reg_type {
>>    */
>>   struct bpf_insn_access_aux {
>>          enum bpf_reg_type reg_type;
>> -       int ctx_field_size;
>> +       union {
>> +               int ctx_field_size;
>> +               u32 btf_id;
>> +       };
>> +       struct bpf_verifier_env *env; /* for verbose logs */
>>   };
>>
>>   static inline void
>> @@ -747,6 +752,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>>   int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>>                                       const union bpf_attr *kattr,
>>                                       union bpf_attr __user *uattr);
>> +bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>> +                   const struct bpf_prog *prog,
>> +                   struct bpf_insn_access_aux *info);
>> +int btf_struct_access(struct bpf_verifier_env *env,
>> +                     const struct btf_type *t, int off, int size,
>> +                     enum bpf_access_type atype,
>> +                     u32 *next_btf_id);
>> +
>>   #else /* !CONFIG_BPF_SYSCALL */
>>   static inline struct bpf_prog *bpf_prog_get(u32 ufd)
>>   {
>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
>> index 432ba8977a0a..e21782f49c45 100644
>> --- a/include/linux/bpf_verifier.h
>> +++ b/include/linux/bpf_verifier.h
>> @@ -52,6 +52,8 @@ struct bpf_reg_state {
>>                   */
>>                  struct bpf_map *map_ptr;
>>
>> +               u32 btf_id; /* for PTR_TO_BTF_ID */
>> +
>>                  /* Max size from any of the above. */
>>                  unsigned long raw;
>>          };
>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>> index 848f9d4b9d7e..61ff8a54ca22 100644
>> --- a/kernel/bpf/btf.c
>> +++ b/kernel/bpf/btf.c
>> @@ -3433,6 +3433,185 @@ struct btf *btf_parse_vmlinux(void)
>>          return ERR_PTR(err);
>>   }
>>
>> +extern struct btf *btf_vmlinux;
>> +
>> +bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>> +                   const struct bpf_prog *prog,
>> +                   struct bpf_insn_access_aux *info)
>> +{
>> +       u32 btf_id = prog->expected_attach_type;
>> +       const struct btf_param *args;
>> +       const struct btf_type *t;
>> +       const char prefix[] = "btf_trace_";
>> +       const char *tname;
>> +       u32 nr_args;
>> +
>> +       if (!btf_id)
>> +               return true;
>> +
>> +       if (IS_ERR(btf_vmlinux)) {
>> +               bpf_verifier_log_write(info->env, "btf_vmlinux is malformed\n");
>> +               return false;
>> +       }
>> +
>> +       t = btf_type_by_id(btf_vmlinux, btf_id);
>> +       if (!t || BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
>> +               bpf_verifier_log_write(info->env, "btf_id is invalid\n");
>> +               return false;
>> +       }
>> +
>> +       tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
>> +       if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
>> +               bpf_verifier_log_write(info->env,
>> +                                      "btf_id points to wrong type name %s\n",
>> +                                      tname);
>> +               return false;
>> +       }
>> +       tname += sizeof(prefix) - 1;
>> +
>> +       t = btf_type_by_id(btf_vmlinux, t->type);
>> +       if (!btf_type_is_ptr(t))
>> +               return false;
>> +       t = btf_type_by_id(btf_vmlinux, t->type);
>> +       if (!btf_type_is_func_proto(t))
>> +               return false;
> 
> All negative cases but these two have helpful log messages, please add
> two more for these.

no. not here. This is a part of typedef construction from patch 1.
It cannot be anything else. If btf_id points to typedef and
typedef has btf_trace_ prefix it has to be correct.
Above two checks are checking sanity of kernel build.

> 
>> +
>> +       args = (const struct btf_param *)(t + 1);
> 
> IMO, doing args++ (and leaving comment why) here instead of adjusting
> `off/8 + 1` below is cleaner.

I tried your suggestion and it doesn't look any better, but why not.
Since I've coded it anyway.

>> +       /* skip first 'void *__data' argument in btf_trace_* */
>> +       nr_args = btf_type_vlen(t) - 1;
>> +       if (off >= nr_args * 8) {
> 
> Looks like you forgot to check that `off % 8 == 0`?

great catch. yes. fixed

>> +               bpf_verifier_log_write(info->env,
>> +                                      "raw_tp '%s' doesn't have %d-th argument\n",
>> +                                      tname, off / 8);
>> +               return false;
>> +       }
>> +
>> +       /* raw tp arg is off / 8, but typedef has extra 'void *', hence +1 */
>> +       t = btf_type_by_id(btf_vmlinux, args[off / 8 + 1].type);
>> +       if (btf_type_is_int(t))
> 
> this is too limiting, you need to strip const/volatile/restrict and
> resolve typedef's (e.g., size_t, __u64 -- that's all typedefs).

right. done.

> also probably want to allow enums.

eventually yes. I prefer to walk first.

> btw, atomic_t is a struct, so might want to allow up to 8 byte
> struct/unions (passed by value) reads? might never happen for
> tracepoint, not sure

may be in the future. walk first.

> 
>> +               /* accessing a scalar */
>> +               return true;
>> +       if (!btf_type_is_ptr(t)) {
> 
> similar to above, modifiers and typedef resolution has to happen first

done.

>> +               bpf_verifier_log_write(info->env,
>> +                                      "raw_tp '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
>> +                                      tname, off / 8,
>> +                                      __btf_name_by_offset(btf_vmlinux, t->name_off),
>> +                                      btf_kind_str[BTF_INFO_KIND(t->info)]);
>> +               return false;
>> +       }
>> +       if (t->type == 0)
>> +               /* This is a pointer to void.
>> +                * It is the same as scalar from the verifier safety pov.
>> +                * No further pointer walking is allowed.
>> +                */
>> +               return true;
>> +
>> +       /* this is a pointer to another type */
>> +       info->reg_type = PTR_TO_BTF_ID;
>> +       info->btf_id = t->type;
>> +
>> +       t = btf_type_by_id(btf_vmlinux, t->type);
>> +       bpf_verifier_log_write(info->env,
>> +                              "raw_tp '%s' arg%d has btf_id %d type %s '%s'\n",
>> +                              tname, off / 8, info->btf_id,
>> +                              btf_kind_str[BTF_INFO_KIND(t->info)],
>> +                              __btf_name_by_offset(btf_vmlinux, t->name_off));
>> +       return true;
>> +}
>> +
>> +int btf_struct_access(struct bpf_verifier_env *env,
>> +                     const struct btf_type *t, int off, int size,
>> +                     enum bpf_access_type atype,
>> +                     u32 *next_btf_id)
>> +{
>> +       const struct btf_member *member;
>> +       const struct btf_type *mtype;
>> +       const char *tname, *mname;
>> +       int i, moff = 0, msize;
>> +
>> +again:
>> +       tname = btf_name_by_offset(btf_vmlinux, t->name_off);
>> +       if (!btf_type_is_struct(t)) {
> 
> see above about typedef/modifiers resolution

here actually skipping is not necessary.

> 
>> +               bpf_verifier_log_write(env, "Type '%s' is not a struct", tname);
>> +               return -EINVAL;
>> +       }
>> +       if (btf_type_vlen(t) < 1) {
>> +               bpf_verifier_log_write(env, "struct %s doesn't have fields", tname);
>> +               return -EINVAL;
>> +       }
> 
> kind of redundant check...

I wanted to give helpful message, but since you asked.
There are 394 struct FOO {}; in the kernel.
And probably none of them are going to appear in bpf tracing,
so I deleted that check.

> 
>> +
>> +       for_each_member(i, t, member) {
>> +
>> +               /* offset of the field */
>> +               moff = btf_member_bit_offset(t, member);
> 
> what do you want to do with bitfields?

they're scalars.

>> +
>> +               if (off < moff / 8)
>> +                       continue;
>> +
>> +               /* type of the field */
>> +               mtype = btf_type_by_id(btf_vmlinux, member->type);
>> +               mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
> 
> nit: you mix btf_name_by_offset and __btf_name_by_offset, any reason
> to not stick to just one of them (__btf_name_by_offset is safer, so
> that one, probably)?

I tried to use btf_name_by_offset() in verifier.c and
__btf_name_by_offset() in btf.c consistently.
Looks like I missed one spot.
Fixed.

> 
>> +
>> +               /* skip typedef, volotile modifiers */
> 
> typo: volatile
> 
> nit: also, volatile is not special, so either mention
> const/volatile/restrict or just "modifiers"?

fixed

>> +               while (btf_type_is_modifier(mtype))
>> +                       mtype = btf_type_by_id(btf_vmlinux, mtype->type);
>> +
>> +               if (btf_type_is_array(mtype))
>> +                       /* array deref is not supported yet */
>> +                       continue;
>> +
>> +               if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) {
>> +                       bpf_verifier_log_write(env,
>> +                                              "field %s doesn't have size\n",
>> +                                              mname);
>> +                       return -EFAULT;
>> +               }
>> +               if (btf_type_is_ptr(mtype))
>> +                       msize = 8;
>> +               else
>> +                       msize = mtype->size;
>> +               if (off >= moff / 8 + msize)
>> +                       /* rare case, must be a field of the union with smaller size,
>> +                        * let's try another field
>> +                        */
>> +                       continue;
>> +               /* the 'off' we're looking for is either equal to start
>> +                * of this field or inside of this struct
>> +                */
>> +               if (btf_type_is_struct(mtype)) {
>> +                       /* our field must be inside that union or struct */
>> +                       t = mtype;
>> +
>> +                       /* adjust offset we're looking for */
>> +                       off -= moff / 8;
>> +                       goto again;
>> +               }
>> +               if (msize != size) {
>> +                       /* field access size doesn't match */
>> +                       bpf_verifier_log_write(env,
>> +                                              "cannot access %d bytes in struct %s field %s that has size %d\n",
>> +                                              size, tname, mname, msize);
>> +                       return -EACCES;
>> +               }
>> +
>> +               if (btf_type_is_ptr(mtype)) {
>> +                       const struct btf_type *stype;
>> +
>> +                       stype = btf_type_by_id(btf_vmlinux, mtype->type);
>> +                       if (btf_type_is_struct(stype)) {
> 
> again, resolving modifiers/typedefs? though in this case it might be
> too eager?...

done

>> +                               *next_btf_id = mtype->type;
>> +                               return PTR_TO_BTF_ID;
>> +                       }
>> +               }
>> +               /* all other fields are treated as scalars */
>> +               return SCALAR_VALUE;
>> +       }
>> +       bpf_verifier_log_write(env,
>> +                              "struct %s doesn't have field at offset %d\n",
>> +                              tname, off);
>> +       return -EINVAL;
>> +}
>> +
>>   void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
>>                         struct seq_file *m)
>>   {
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 91c4db4d1c6a..3c155873ffea 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -406,6 +406,7 @@ static const char * const reg_type_str[] = {
>>          [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
>>          [PTR_TO_TP_BUFFER]      = "tp_buffer",
>>          [PTR_TO_XDP_SOCK]       = "xdp_sock",
>> +       [PTR_TO_BTF_ID]         = "ptr_",
>>   };
>>
>>   static char slot_type_char[] = {
>> @@ -460,6 +461,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
>>                          /* reg->off should be 0 for SCALAR_VALUE */
>>                          verbose(env, "%lld", reg->var_off.value + reg->off);
>>                  } else {
>> +                       if (t == PTR_TO_BTF_ID)
>> +                               verbose(env, "%s",
>> +                                       btf_name_by_offset(btf_vmlinux,
>> +                                                          btf_type_by_id(btf_vmlinux, reg->btf_id)->name_off));
>>                          verbose(env, "(id=%d", reg->id);
>>                          if (reg_type_may_be_refcounted_or_null(t))
>>                                  verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
>> @@ -2337,10 +2342,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
>>
>>   /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
>>   static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
>> -                           enum bpf_access_type t, enum bpf_reg_type *reg_type)
>> +                           enum bpf_access_type t, enum bpf_reg_type *reg_type,
>> +                           u32 *btf_id)
>>   {
>>          struct bpf_insn_access_aux info = {
>>                  .reg_type = *reg_type,
>> +               .env = env,
>>          };
>>
>>          if (env->ops->is_valid_access &&
>> @@ -2354,7 +2361,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
>>                   */
>>                  *reg_type = info.reg_type;
>>
>> -               env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
>> +               if (*reg_type == PTR_TO_BTF_ID)
>> +                       *btf_id = info.btf_id;
>> +               else
>> +                       env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
> 
> ctx_field_size is passed through bpf_insn_access_aux, but btf_id is
> returned like this. Is there a reason to do it in two different ways?

insn_aux_data is permanent. Meaning that any executing path into
this instruction got to have the same size and offset of ctx access.
I think btf based ctx access doesn't have to be.
There is a check later to make sure r1=ctx dereferenceing btf is
permanent, but r1=btf dereferencing btf is clearly note.
I'm still not sure whether former will be permanent forever.
So went with quick hack above to reduce amount of potential
refactoring later. I'll think about it more.

>>                  /* remember the offset of last byte accessed in ctx */
>>                  if (env->prog->aux->max_ctx_offset < off + size)
>>                          env->prog->aux->max_ctx_offset = off + size;
>> @@ -2745,6 +2755,53 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
>>          reg->smax_value = reg->umax_value;
>>   }
>>
>> +static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
>> +                                  struct bpf_reg_state *regs,
>> +                                  int regno, int off, int size,
>> +                                  enum bpf_access_type atype,
>> +                                  int value_regno)
>> +{
>> +       struct bpf_reg_state *reg = regs + regno;
>> +       const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
>> +       const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
>> +       u32 btf_id;
>> +       int ret;
>> +
>> +       if (atype != BPF_READ) {
>> +               verbose(env, "only read is supported\n");
>> +               return -EACCES;
>> +       }
>> +
>> +       if (off < 0) {
>> +               verbose(env,
>> +                       "R%d is ptr_%s negative access %d is not allowed\n",
> 
> totally nit: but for consistency sake (following variable offset error
> below): R%d is ptr_%s negative access: off=%d\n"?

fixed

>> +                       regno, tname, off);
>> +               return -EACCES;
>> +       }
>> +       if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
> 
> why so strict about reg->var_off.value?

It's variable part of register access.
There is no fixed offset to pass into btf_struct_access().
In other words 'arrays of pointers to btf_id' are not supported yet.
Walk first :)





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux