Re: [PATCH bpf-next 1/3] bpf: add writable context for raw tracepoints

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2019-04-04 at 18:17 -0700, Alexei Starovoitov wrote:
> On Wed, Apr 03, 2019 at 06:39:12PM +0000, Matt Mullins wrote:
> > On Mon, 2019-04-01 at 22:40 +0200, Daniel Borkmann wrote:
> > > On 03/29/2019 01:07 AM, Matt Mullins wrote:
> > > > This is an opt-in interface that allows a tracepoint to provide a safe
> > > > buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
> > > > The size of the buffer must be a compile-time constant, and is checked
> > > > before allowing a BPF program to attach to a tracepoint that uses this
> > > > feature.
> > > > 
> > > > The pointer to this buffer will be the first argument of tracepoints
> > > > that opt in; the buffer is readable by both BPF_PROG_TYPE_RAW_TRACEPOINT
> > > > and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a
> > > > tracepoint, but the buffer to which it points may only be written by the
> > > > latter.
> > > > 
> > > > bpf_probe: assert that writable tracepoint size is correct
> > > 
> > > Maybe also add a kselftest into bpf test suite to i) demo it and ii) make
> > > sure it's continuously been tested by bots running the suite?
> > 
> > Will do.
> > 
> > > 
> > > > Signed-off-by: Matt Mullins <mmullins@xxxxxx>
> > > > ---
> > > >  include/linux/bpf.h             |  2 ++
> > > >  include/linux/bpf_types.h       |  1 +
> > > >  include/linux/tracepoint-defs.h |  1 +
> > > >  include/trace/bpf_probe.h       | 27 +++++++++++++++++++++++++--
> > > >  include/uapi/linux/bpf.h        |  1 +
> > > >  kernel/bpf/syscall.c            |  8 ++++++--
> > > >  kernel/bpf/verifier.c           | 11 +++++++++++
> > > >  kernel/trace/bpf_trace.c        | 21 +++++++++++++++++++++
> > > >  8 files changed, 68 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index a2132e09dc1c..d3c71fd67476 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -263,6 +263,7 @@ enum bpf_reg_type {
> > > >  	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
> > > >  	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
> > > >  	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
> > > > +	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
> > > >  };
> > > >  
> > > >  /* The information passed from prog-specific *_is_valid_access
> > > > @@ -352,6 +353,7 @@ struct bpf_prog_aux {
> > > >  	u32 used_map_cnt;
> > > >  	u32 max_ctx_offset;
> > > >  	u32 max_pkt_offset;
> > > > +	u32 max_tp_access;
> > > >  	u32 stack_depth;
> > > >  	u32 id;
> > > >  	u32 func_cnt; /* used by non-func prog as the number of func progs */
> > > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> > > > index 08bf2f1fe553..c766108608cb 100644
> > > > --- a/include/linux/bpf_types.h
> > > > +++ b/include/linux/bpf_types.h
> > > > @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
> > > >  BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
> > > >  BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
> > > >  BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
> > > > +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
> > > >  #endif
> > > >  #ifdef CONFIG_CGROUP_BPF
> > > >  BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
> > > > diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
> > > > index 49ba9cde7e4b..b29950a19205 100644
> > > > --- a/include/linux/tracepoint-defs.h
> > > > +++ b/include/linux/tracepoint-defs.h
> > > > @@ -45,6 +45,7 @@ struct bpf_raw_event_map {
> > > >  	struct tracepoint	*tp;
> > > >  	void			*bpf_func;
> > > >  	u32			num_args;
> > > > +	u32			writable_size;
> > > >  } __aligned(32);
> > > >  
> > > >  #endif
> > > > diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
> > > > index 505dae0bed80..d6e556c0a085 100644
> > > > --- a/include/trace/bpf_probe.h
> > > > +++ b/include/trace/bpf_probe.h
> > > > @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto)					\
> > > >   * to make sure that if the tracepoint handling changes, the
> > > >   * bpf probe will fail to compile unless it too is updated.
> > > >   */
> > > > -#undef DEFINE_EVENT
> > > > -#define DEFINE_EVENT(template, call, proto, args)			\
> > > > +#define __DEFINE_EVENT(template, call, proto, args, size)		\
> > > >  static inline void bpf_test_probe_##call(void)				\
> > > >  {									\
> > > >  	check_trace_callback_type_##call(__bpf_trace_##template);	\
> > > > @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = {						\
> > > >  	.tp		= &__tracepoint_##call,				\
> > > >  	.bpf_func	= (void *)__bpf_trace_##template,		\
> > > >  	.num_args	= COUNT_ARGS(args),				\
> > > > +	.writable_size	= size,						\
> > > >  };
> > > >  
> > > > +#define FIRST(x, ...) x
> > > > +
> > > > +#undef DEFINE_EVENT_WRITABLE
> > > > +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size)	\
> > > > +static inline void bpf_test_buffer_##call(void)				\
> > > > +{									\
> > > > +	/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
> > > > +	 * BUILD_BUG_ON_ZERO() uses a different mechanism that is not	\
> > > > +	 * dead-code-eliminated.					\
> > > > +	 */								\
> > > > +	FIRST(proto);							\
> > > > +	(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args)));		\
> > > > +}									\
> > > > +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
> > > > +
> > > > +#undef DEFINE_EVENT
> > > > +#define DEFINE_EVENT(template, call, proto, args)			\
> > > > +	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
> > > >  
> > > >  #undef DEFINE_EVENT_PRINT
> > > >  #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
> > > >  	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
> > > >  
> > > >  #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
> > > > +
> > > > +#undef DEFINE_EVENT_WRITABLE
> > > > +#undef __DEFINE_EVENT
> > > > +#undef FIRST
> > > > +
> > > >  #endif /* CONFIG_BPF_EVENTS */
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index 3c38ac9a92a7..c5335d53ce82 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -166,6 +166,7 @@ enum bpf_prog_type {
> > > >  	BPF_PROG_TYPE_LIRC_MODE2,
> > > >  	BPF_PROG_TYPE_SK_REUSEPORT,
> > > >  	BPF_PROG_TYPE_FLOW_DISSECTOR,
> > > > +	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
> > > >  };
> > > >  
> > > >  enum bpf_attach_type {
> > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > > index 62f6bced3a3c..27e2f22879a4 100644
> > > > --- a/kernel/bpf/syscall.c
> > > > +++ b/kernel/bpf/syscall.c
> > > > @@ -1720,12 +1720,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
> > > >  	}
> > > >  	raw_tp->btp = btp;
> > > >  
> > > > -	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
> > > > -				 BPF_PROG_TYPE_RAW_TRACEPOINT);
> > > > +	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
> > > >  	if (IS_ERR(prog)) {
> > > >  		err = PTR_ERR(prog);
> > > >  		goto out_free_tp;
> > > >  	}
> > > > +	if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
> > > > +	    prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
> > > 
> > > I don't think we'd gain a lot by making this an extra prog type which can do the
> > > same as BPF_PROG_TYPE_RAW_TRACEPOINT modulo optional writing. Why not integrating
> > > this directly into BPF_PROG_TYPE_RAW_TRACEPOINT then? The actual opt-in comes from
> > > the DEFINE_EVENT_WRITABLE(), not from the prog type.
> > 
> > I did that to separate the hook into
> > raw_tp_writable_prog_is_valid_access, which (compared to
> > raw_tp_prog_is_valid_access):
> > 
> >   1) permits writes, and
> >   2) encodes the assumption than the context begins with the pointer to
> > that writable buffer
> > 
> > I'm not sure those are appropriate for all users of
> > BPF_PROG_TYPE_RAW_TRACEPOINT, but I can't immediately point out any
> > harm in doing so -- some dereferences of ctx that have historically
> > returned a SCALAR_VALUE would end up tagged as a PTR_TO_TP_BUFFER, but
> > they still won't be able to access through that pointer unless they're
> > attached to the right tracepoint.
> > 
> > I'll try to unify the two and see what I get.
> 
> I think combining raw_tp prog type with raw_tp_writeable is possible,
> but too cumbersome to use from user pov.
> Since such raw_tp will be accepted at prog load time,
> but only at the time of attach it will be rejected in __bpf_probe_register.
> 
> raw_tp_writable_prog_is_valid_access() doesn't know future attach point.
> we cannot use bpf_attr.expected_attach_type here.

We could use any load-time flag of sorts, but I overlooked
expected_attach_type since that should match the enum bpf_attach_type
values, and we don't use BPF_PROG_ATTACH for tracepoints.

> That's why it simply does:
> +       if (off == 0 && size == sizeof(u64))
> +               info->reg_type = PTR_TO_TP_BUFFER;
> 
> essentially hard coding first argument of writeable tp to be the buffer.

It appears to be possible to xdo this to all
BPF_PROG_TYPE_RAW_TRACEPOINTs, but I think it's clearer to keep a
separate bpf_prog_type for programs that assume ctx[0] is the pointer
to the buffer.

> 
> TP invocation is also new.
> Like in patch 2:
> trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
> 
> Patches 1,2,3 actually look fine to me,
> but I agree that selftests are necessary.
> 
> Matt, could you add new writeable tracepoint somewhere in bpf_test_run()
> and corresponding selftest?

Yep, I've just got that working, will be included in v2 (along with
some other minor bugfixes).

> 
> Thanks
> 




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux