On 8/9/19 6:10 PM, Stanislav Fomichev wrote:
Add new helper bpf_sk_storage_clone which optionally clones sk storage and call it from sk_clone_lock. Cc: Martin KaFai Lau <kafai@xxxxxx> Cc: Yonghong Song <yhs@xxxxxx> Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
[...]
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) +{ + struct bpf_sk_storage *new_sk_storage = NULL; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + int ret; + + RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + + if (!sk_storage || hlist_empty(&sk_storage->list)) + goto out; + + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + struct bpf_sk_storage_elem *copy_selem; + struct bpf_sk_storage_map *smap; + struct bpf_map *map; + int refold; + + smap = rcu_dereference(SDATA(selem)->smap); + if (!(smap->map.map_flags & BPF_F_CLONE)) + continue; + + map = bpf_map_inc_not_zero(&smap->map, false); + if (IS_ERR(map)) + continue; + + copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); + if (!copy_selem) { + ret = -ENOMEM; + bpf_map_put(map); + goto err; + } + + if (new_sk_storage) { + selem_link_map(smap, copy_selem); + __selem_link_sk(new_sk_storage, copy_selem); + } else { + ret = sk_storage_alloc(newsk, smap, copy_selem); + if (ret) { + kfree(copy_selem); + atomic_sub(smap->elem_size, + &newsk->sk_omem_alloc); + bpf_map_put(map); + goto err; + } + + new_sk_storage = rcu_dereference(copy_selem->sk_storage); + } + bpf_map_put(map);
The map get/put combination /under/ RCU read lock seems a bit odd to me, could you exactly describe the race that this would be preventing?
+ } + +out: + rcu_read_unlock(); + return 0; + +err: + rcu_read_unlock(); + + bpf_sk_storage_free(newsk); + return ret; +}
Thanks, Daniel