On Wed, Aug 07, 2019 at 05:05:33PM -0700, Stanislav Fomichev wrote: [ ... ] > > > +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) > > > +{ > > > + struct bpf_sk_storage *new_sk_storage = NULL; > > > + struct bpf_sk_storage *sk_storage; > > > + struct bpf_sk_storage_elem *selem; > > > + int ret; > > > + > > > + RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); > > > + > > > + rcu_read_lock(); > > > + sk_storage = rcu_dereference(sk->sk_bpf_storage); > > > + > > > + if (!sk_storage || hlist_empty(&sk_storage->list)) > > > + goto out; > > > + > > > + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { > > > + struct bpf_sk_storage_map *smap; > > > + struct bpf_sk_storage_elem *copy_selem; > > > + > > > + if (!selem->clone) > > > + continue; > > > + > > > + smap = rcu_dereference(SDATA(selem)->smap); > > > + if (!smap) > > > + continue; > > > + > > > + copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); > > > + if (IS_ERR(copy_selem)) { > > > + ret = PTR_ERR(copy_selem); > > > + goto err; > > > + } > > > + > > > + if (!new_sk_storage) { > > > + ret = sk_storage_alloc(newsk, smap, copy_selem); > > > + if (ret) { > > > + kfree(copy_selem); > > > + atomic_sub(smap->elem_size, > > > + &newsk->sk_omem_alloc); > > > + goto err; > > > + } > > > + > > > + new_sk_storage = rcu_dereference(copy_selem->sk_storage); > > > + continue; > > > + } > > > + > > > + raw_spin_lock_bh(&new_sk_storage->lock); > > > + selem_link_map(smap, copy_selem); > > > + __selem_link_sk(new_sk_storage, copy_selem); > > > + raw_spin_unlock_bh(&new_sk_storage->lock); > > > > Considering in this particular case, new socket is not visible to > > outside world yet (both kernel and user space), map_delete/map_update > > operations are not applicable in this situation, so > > the above raw_spin_lock_bh() probably not needed. > I agree, it's doing nothing, but __selem_link_sk has the following comment: > /* sk_storage->lock must be held and sk_storage->list cannot be empty */ I believe I may have forgotten to remove this comment after reusing it in sk_storage_alloc() which also has a similar no-lock-needed situation like here. I would also prefer not to acquire the new_sk_storage->lock here to avoid confusion when investigating racing bugs in the future. > > Just wanted to keep that invariant for this call site as well (in case > we add some lockdep enforcement or smth else). WDYT? > > > > + } > > > + > > > +out: > > > + rcu_read_unlock(); > > > + return 0; > > > + > > > +err: > > > + rcu_read_unlock(); > > > + > > > + bpf_sk_storage_free(newsk); > > > + return ret; > > > +} > > > + > > > BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, > > > void *, value, u64, flags) > > > { > > > struct bpf_sk_storage_data *sdata; > > > > > > - if (flags > BPF_SK_STORAGE_GET_F_CREATE) > > > + if (flags & ~BPF_SK_STORAGE_GET_F_MASK) > > > + return (unsigned long)NULL; > > > + > > > + if ((flags & BPF_SK_STORAGE_GET_F_CLONE) && > > > + !(flags & BPF_SK_STORAGE_GET_F_CREATE)) > > > return (unsigned long)NULL; > > > > > > sdata = sk_storage_lookup(sk, map, true); > > > if (sdata) > > > return (unsigned long)sdata->data; > > > > > > - if (flags == BPF_SK_STORAGE_GET_F_CREATE && > > > + if ((flags & BPF_SK_STORAGE_GET_F_CREATE) && > > > /* Cannot add new elem to a going away sk. > > > * Otherwise, the new elem may become a leak > > > * (and also other memory issues during map > > > @@ -762,6 +853,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, > > > /* sk must be a fullsock (guaranteed by verifier), > > > * so sock_gen_put() is unnecessary. > > > */ > > > + if (!IS_ERR(sdata)) > > > + SELEM(sdata)->clone = > > > + !!(flags & BPF_SK_STORAGE_GET_F_CLONE); > > > sock_put(sk); > > > return IS_ERR(sdata) ? > > > (unsigned long)NULL : (unsigned long)sdata->data; > > > diff --git a/net/core/sock.c b/net/core/sock.c > > > index d57b0cc995a0..f5e801a9cea4 100644 > > > --- a/net/core/sock.c > > > +++ b/net/core/sock.c > > > @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) > > > goto out; > > > } > > > RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); > > > -#ifdef CONFIG_BPF_SYSCALL > > > - RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); > > > -#endif > > > + > > > + if (bpf_sk_storage_clone(sk, newsk)) { > > > + sk_free_unlock_clone(newsk); > > > + newsk = NULL; > > > + goto out; > > > + } > > > > > > newsk->sk_err = 0; > > > newsk->sk_err_soft = 0; > > >