On Thu, Apr 04, 2019 at 11:51:31AM -0700, Stanislav Fomichev wrote: > Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: > * ctx_in/ctx_size_in - input context > * ctx_out/ctx_size_out - output context > > The intended use case is to pass some meta data to the test runs that > operate on skb (this has being brought up on recent LPC). > > For programs that use bpf_prog_test_run_skb, support __sk_buff input and > output. Initially, from input __sk_buff, copy _only_ cb and priority into > skb, all other non-zero fields are prohibited (with EINVAL). > If the user has set ctx_out/ctx_size_out, copy the potentially modified > __sk_buff back to the userspace. > > We require all fields of input __sk_buff except the ones we explicitly > support to be set to zero. The expectation is that in the future we might > add support for more fields and we want to fail explicitly if the user > runs the program on the kernel where we don't yet support them. > > The API is intentionally vague (i.e. we don't explicitly add __sk_buff > to bpf_attr, but ctx_in) to potentially let other test_run types use > this interface in the future (this can be xdp_md for xdp types for > example). > > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 7 +++ > kernel/bpf/syscall.c | 2 +- > net/bpf/test_run.c | 133 +++++++++++++++++++++++++++++++++++++-- > 3 files changed, 136 insertions(+), 6 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 837024512baf..8e96f99cebf8 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -396,6 +396,13 @@ union bpf_attr { > __aligned_u64 data_out; > __u32 repeat; > __u32 duration; > + __u32 ctx_size_in; /* input: len of ctx_in */ > + __u32 ctx_size_out; /* input/output: len of ctx_out > + * returns ENOSPC if ctx_out > + * is too small. > + */ > + __aligned_u64 ctx_in; > + __aligned_u64 ctx_out; > } test; > > struct { /* anonymous struct used by BPF_*_GET_*_ID */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 1d65e56594db..4ad754e2943a 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > return cgroup_bpf_prog_query(attr, uattr); > } > > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out > > static int bpf_prog_test_run(const union bpf_attr *attr, > union bpf_attr __user *uattr) > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > index fab142b796ef..593912a66d64 100644 > --- a/net/bpf/test_run.c > +++ b/net/bpf/test_run.c > @@ -123,12 +123,121 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, > return data; > } > > +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) > +{ > + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); > + u32 size = kattr->test.ctx_size_in; > + void *data; > + > + if ((kattr->test.ctx_size_in && !kattr->test.ctx_in) || > + (!kattr->test.ctx_size_in && kattr->test.ctx_in)) > + return ERR_PTR(-EINVAL); > + > + if ((kattr->test.ctx_size_out && !kattr->test.ctx_out) || > + (!kattr->test.ctx_size_out && kattr->test.ctx_out)) > + return ERR_PTR(-EINVAL); These attr checks belong to bpf_prog_test_run() in syscall.c. > + > + if (!size) > + return NULL; > + > + if (size > max_size) Not allowed even the tailing (size - max_size) bytes are zero? > + return ERR_PTR(-EINVAL); The convention should be -E2BIG if the userspace passed in something bigger with tailing non-zero that the kernel cannot understand. > + > + data = kzalloc(max_size, GFP_USER); > + if (!data) > + return ERR_PTR(-ENOMEM); > + > + if (copy_from_user(data, data_in, size)) { > + kfree(data); > + return ERR_PTR(-EFAULT); > + } > + return data; > +} > + > +static int bpf_ctx_finish(const union bpf_attr *kattr, > + union bpf_attr __user *uattr, const void *data, > + u32 size) > +{ > + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); > + u32 copy_size = size; Is the "u32 copy_size = size;" needed? I don't see "size" is changed. > + > + if (!kattr->test.ctx_size_out) > + return 0; > + > + if (copy_size > kattr->test.ctx_size_out) If I read both bpf_ctx_init() and btf_ctx_finish() correctly, the kattr->test.ctx_size_in (input) can be smaller than sizeof(struct __sk_buff) but the kattr->test.ctx_size_out (output) cannot be smaller than sizeof(struct __sk_buff)? > + return -ENOSPC; > + > + if (data_out && copy_to_user(data_out, data, copy_size)) "data_out" check is not needed. > + return -EFAULT; > + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) > + return -EFAULT; > + > + return 0; > +} > + > +/** > + * range_is_zero - test whether buffer is initialized > + * @buf: buffer to check > + * @from: check from this position > + * @to: check up until (excluding) this position > + * > + * This function returns true if the there is a non-zero byte > + * in the buf in the range [from,to). > + */ > +static inline bool range_is_zero(void *buf, size_t from, size_t to) > +{ > + return !memchr_inv((u8 *)buf + from, 0, to - from); > +} > + > +static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) > +{ > + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; > + > + if (!__skb) > + return 0; > + > + /* make sure the fields we don't use are zeroed */ > + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) > + return -EINVAL; > + > + /* priority is allowed */ > + > + if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + > + FIELD_SIZEOF(struct __sk_buff, priority), > + offsetof(struct __sk_buff, cb))) > + return -EINVAL; > + > + /* cb is allowed */ > + > + if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + > + FIELD_SIZEOF(struct __sk_buff, cb), > + sizeof(struct __sk_buff))) > + return -EINVAL; > + > + skb->priority = __skb->priority; > + memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); > + > + return 0; > +} > + > +static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) > +{ > + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; > + > + if (!__skb) > + return; > + > + __skb->priority = skb->priority; > + memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); > +} > + > int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > union bpf_attr __user *uattr) > { > bool is_l2 = false, is_direct_pkt_access = false; > u32 size = kattr->test.data_size_in; > u32 repeat = kattr->test.repeat; > + struct __sk_buff *ctx = NULL; > u32 retval, duration; > int hh_len = ETH_HLEN; > struct sk_buff *skb; > @@ -141,6 +250,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > if (IS_ERR(data)) > return PTR_ERR(data); > > + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); > + if (IS_ERR(ctx)) { > + kfree(data); > + return PTR_ERR(ctx); > + } > + > switch (prog->type) { > case BPF_PROG_TYPE_SCHED_CLS: > case BPF_PROG_TYPE_SCHED_ACT: > @@ -158,6 +273,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > sk = kzalloc(sizeof(struct sock), GFP_USER); > if (!sk) { > kfree(data); > + kfree(ctx); > return -ENOMEM; > } > sock_net_set(sk, current->nsproxy->net_ns); > @@ -166,6 +282,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > skb = build_skb(data, 0); > if (!skb) { > kfree(data); > + kfree(ctx); > kfree(sk); > return -ENOMEM; > } > @@ -180,12 +297,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > __skb_push(skb, hh_len); > if (is_direct_pkt_access) > bpf_compute_data_pointers(skb); > + ret = convert___skb_to_skb(skb, ctx); > + if (ret) > + goto out; > ret = bpf_test_run(prog, skb, repeat, &retval, &duration); > - if (ret) { > - kfree_skb(skb); > - kfree(sk); > - return ret; > - } > + if (ret) > + goto out; > if (!is_l2) { > if (skb_headroom(skb) < hh_len) { > int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); I think the following needs kfree(ctx) also (or goto out). Copying the context here: if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { kfree_skb(skb); kfree(sk); return -ENOMEM; } > @@ -198,14 +315,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > } > memset(__skb_push(skb, hh_len), 0, hh_len); > } > + convert_skb_to___skb(skb, ctx); > > size = skb->len; > /* bpf program can never convert linear skb to non-linear */ > if (WARN_ON_ONCE(skb_is_nonlinear(skb))) > size = skb_headlen(skb); > ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); > + if (!ret) > + ret = bpf_ctx_finish(kattr, uattr, ctx, > + sizeof(struct __sk_buff)); > +out: > kfree_skb(skb); > kfree(sk); > + kfree(ctx); > return ret; > } > > -- > 2.21.0.392.gf8f6787159e-goog >