On Fri, Apr 26, 2019 at 10:55:02AM -0700, Stanislav Fomichev wrote: > On 04/26, Martin KaFai Lau wrote: > > This patch rides on an existing BPF_PROG_TYPE_CGROUP_SKB test > > (test_sock_fields.c) to do a TCP end-to-end test on the new > > bpf_sk_storage_* helpers. > > > > Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> > > --- > > tools/testing/selftests/bpf/bpf_helpers.h | 5 + > > .../bpf/progs/test_sock_fields_kern.c | 49 ++++++++ > > .../testing/selftests/bpf/test_sock_fields.c | 115 +++++++++++++++--- > > 3 files changed, 153 insertions(+), 16 deletions(-) > > > > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h > > index 59e221586cf7..6e80b66d7fb1 100644 > > --- a/tools/testing/selftests/bpf/bpf_helpers.h > > +++ b/tools/testing/selftests/bpf/bpf_helpers.h > > @@ -211,6 +211,11 @@ static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, > > static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, > > unsigned long long flags, unsigned long *res) = > > (void *) BPF_FUNC_strtoul; > > +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, > > + void *value, __u64 flags) = > > + (void *) BPF_FUNC_sk_storage_get; > > +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = > > + (void *)BPF_FUNC_sk_storage_delete; > > > > /* llvm builtin functions that eBPF C program may use to > > * emit BPF_LD_ABS and BPF_LD_IND instructions > > diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > index 37328f148538..93ef3e039da3 100644 > > --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > @@ -55,6 +55,31 @@ struct bpf_map_def SEC("maps") linum_map = { > > .max_entries = __NR_BPF_LINUM_ARRAY_IDX, > > }; > > > > +struct bpf_spinlock_cnt { > > + struct bpf_spin_lock lock; > > + __u32 cnt; > > +}; > > + > > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { > > + .type = BPF_MAP_TYPE_SK_STORAGE, > > + .key_size = sizeof(int), > > + .value_size = sizeof(struct bpf_spinlock_cnt), > > + .max_entries = 0, > > + .map_flags = BPF_F_NO_PREALLOC, > > +}; > > + > > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); > > + > > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { > > + .type = BPF_MAP_TYPE_SK_STORAGE, > > + .key_size = sizeof(int), > > + .value_size = sizeof(struct bpf_spinlock_cnt), > > + .max_entries = 0, > > + .map_flags = BPF_F_NO_PREALLOC, > > +}; > > + > > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); > > + > > static bool is_loopback6(__u32 *a6) > > { > > return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); > > @@ -120,7 +145,9 @@ static void tpcpy(struct bpf_tcp_sock *dst, > > SEC("cgroup_skb/egress") > > int egress_read_sock_fields(struct __sk_buff *skb) > > { > > + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; > > __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; > > + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; > > struct sockaddr_in6 *srv_sa6, *cli_sa6; > > struct bpf_tcp_sock *tp, *tp_ret; > > struct bpf_sock *sk, *sk_ret; > > @@ -161,6 +188,28 @@ int egress_read_sock_fields(struct __sk_buff *skb) > > skcpy(sk_ret, sk); > > tpcpy(tp_ret, tp); > > > > + if (result_idx == EGRESS_SRV_IDX) { > > + /* The userspace has created it for srv sk */ > > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); > > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, > > + 0, 0); > > + } else { > > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, > > + &cli_cnt_init, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, > > + sk, &cli_cnt_init, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + } > > + > > + if (!pkt_out_cnt || !pkt_out_cnt10) > > + RETURN; > > + > [..] > > + pkt_out_cnt->cnt += 1; > > + bpf_spin_lock(&pkt_out_cnt10->lock); > > + pkt_out_cnt10->cnt += 10; > > + bpf_spin_unlock(&pkt_out_cnt10->lock); > Any reason only cnt10 is spin-locked? I see both of them use > bpf_spinlock_cnt struct. Intentionally one takes lock and one does not as a test. I will add one comment here. > > > + > > RETURN; > > } > > > > diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c > > index dcae7f664dce..550baf8aa1ca 100644 > > --- a/tools/testing/selftests/bpf/test_sock_fields.c > > +++ b/tools/testing/selftests/bpf/test_sock_fields.c > > @@ -35,6 +35,11 @@ enum bpf_linum_array_idx { > > __NR_BPF_LINUM_ARRAY_IDX, > > }; > > > > +struct bpf_spinlock_cnt { > > + struct bpf_spin_lock lock; > > + __u32 cnt; > > +}; > > + > > #define CHECK(condition, tag, format...) ({ \ > > int __ret = !!(condition); \ > > if (__ret) { \ > > @@ -50,6 +55,8 @@ enum bpf_linum_array_idx { > > #define DATA_LEN sizeof(DATA) > > > > static struct sockaddr_in6 srv_sa6, cli_sa6; > > +static int sk_pkt_out_cnt10_fd; > > +static int sk_pkt_out_cnt_fd; > > static int linum_map_fd; > > static int addr_map_fd; > > static int tp_map_fd; > > @@ -220,28 +227,90 @@ static void check_result(void) > > "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", > > ingress_linum); > > > > - CHECK(srv_tp.data_segs_out != 1 || > > + CHECK(srv_tp.data_segs_out != 2 || > > srv_tp.data_segs_in || > > srv_tp.snd_cwnd != 10 || > > srv_tp.total_retrans || > > - srv_tp.bytes_acked != DATA_LEN, > > + srv_tp.bytes_acked != 2 * DATA_LEN, > > "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", > > egress_linum); > > > > CHECK(cli_tp.data_segs_out || > > - cli_tp.data_segs_in != 1 || > > + cli_tp.data_segs_in != 2 || > > cli_tp.snd_cwnd != 10 || > > cli_tp.total_retrans || > > - cli_tp.bytes_received != DATA_LEN, > > + cli_tp.bytes_received != 2 * DATA_LEN, > > "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", > > egress_linum); > > } > > > > +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) > > +{ > > + struct bpf_spinlock_cnt pkt_out_cnt = {} , pkt_out_cnt10 = {}; > > + int err; > > + > > + pkt_out_cnt.cnt = ~0; > > + pkt_out_cnt10.cnt = ~0; > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); > > + if (!err) > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, > > + &pkt_out_cnt10); > > + > > + /* The bpf prog only counts for fullsock and > > + * passive conneciton did not become fullsock until 3WHS > > + * had been finished. > > + * The bpf prog only counted two data packet out but we > > + * specially init accept_fd's pkt_out_cnt by 2 in > > + * init_sk_storage(). Hence, 4 here. > > + */ > > + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, > > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", > > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > > + > > + pkt_out_cnt.cnt = ~0; > > + pkt_out_cnt10.cnt = ~0; > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); > > + if (!err) > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, > > + &pkt_out_cnt10); > > + /* Active connection is fullsock from the beginning. > > + * 1 SYN and 1 ACK during 3WHS > > + * 2 Acks on data packet. > > + * > > + * The bpf_prog initialized it to 0xeB9F. > > + */ > > + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || > > + pkt_out_cnt10.cnt != 0xeB9F + 40, > > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", > > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > > +} > > + > > +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) > > +{ > > + struct bpf_spinlock_cnt scnt = {}; > > + int err; > > + > > + scnt.cnt = pkt_out_cnt; > > + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, > > + BPF_NOEXIST); > > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", > > + "err:%d errno:%d", err, errno); > > + > > + scnt.cnt *= 10; > > + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, > > + BPF_NOEXIST); > > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", > > + "err:%d errno:%d", err, errno); > > +} > > + > > static void test(void) > > { > > int listen_fd, cli_fd, accept_fd, epfd, err; > > struct epoll_event ev; > > socklen_t addrlen; > > + int i; > > > > addrlen = sizeof(struct sockaddr_in6); > > ev.events = EPOLLIN; > > @@ -308,24 +377,30 @@ static void test(void) > > accept_fd, errno); > > close(listen_fd); > > > > - /* Send some data from accept_fd to cli_fd */ > > - err = send(accept_fd, DATA, DATA_LEN, 0); > > - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > > - err, errno); > > - > > - /* Have some timeout in recv(cli_fd). Just in case. */ > > ev.data.fd = cli_fd; > > err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); > > CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", > > err, errno); > > > > - err = epoll_wait(epfd, &ev, 1, 1000); > > - CHECK(err != 1 || ev.data.fd != cli_fd, > > - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > > - err, errno, ev.data.fd, cli_fd); > > + init_sk_storage(accept_fd, 2); > > > > - err = recv(cli_fd, NULL, 0, MSG_TRUNC); > > - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > > + for (i = 0; i < 2; i++) { > > + /* Send some data from accept_fd to cli_fd */ > > + err = send(accept_fd, DATA, DATA_LEN, 0); > > + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > > + err, errno); > > + > > + /* Have some timeout in recv(cli_fd). Just in case. */ > > + err = epoll_wait(epfd, &ev, 1, 1000); > > + CHECK(err != 1 || ev.data.fd != cli_fd, > > + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > > + err, errno, ev.data.fd, cli_fd); > > + > > + err = recv(cli_fd, NULL, 0, MSG_TRUNC); > > + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > > + } > > + > > + check_sk_pkt_out_cnt(accept_fd, cli_fd); > > > > close(epfd); > > close(accept_fd); > > @@ -395,6 +470,14 @@ int main(int argc, char **argv) > > CHECK(!map, "cannot find linum_map", "(null)"); > > linum_map_fd = bpf_map__fd(map); > > > > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); > > + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); > > + sk_pkt_out_cnt_fd = bpf_map__fd(map); > > + > > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); > > + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); > > + sk_pkt_out_cnt10_fd = bpf_map__fd(map); > > + > > test(); > > > > bpf_object__close(obj); > > -- > > 2.17.1 > >