On 12/2/24 16:07, Antonio Quartulli wrote: > +void ovpn_tcp_socket_detach(struct socket *sock) > +{ > + struct ovpn_socket *ovpn_sock; > + struct ovpn_peer *peer; > + > + if (!sock) > + return; > + > + rcu_read_lock(); > + ovpn_sock = rcu_dereference_sk_user_data(sock->sk); > + > + if (!ovpn_sock->peer) { > + rcu_read_unlock(); > + return; > + } > + > + peer = ovpn_sock->peer; > + strp_stop(&peer->tcp.strp); > + > + skb_queue_purge(&peer->tcp.user_queue); > + > + /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */ > + sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; > + sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; > + sock->sk->sk_prot = peer->tcp.sk_cb.prot; > + sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops; > + /* drop reference to peer */ > + rcu_assign_sk_user_data(sock->sk, NULL); > + > + rcu_read_unlock(); > + > + barrier(); It's unclear to me the role of the above barrier. A comment would help > + /* cancel any ongoing work. Done after removing the CBs so that these > + * workers cannot be re-armed > + */ > + cancel_work_sync(&peer->tcp.tx_work); > + strp_done(&peer->tcp.strp); > + skb_queue_purge(&peer->tcp.out_queue); > + > + ovpn_peer_put(peer); > +} > + > +static void ovpn_tcp_send_sock(struct ovpn_peer *peer) > +{ > + struct sk_buff *skb = peer->tcp.out_msg.skb; > + > + if (!skb) > + return; > + > + if (peer->tcp.tx_in_progress) > + return; > + > + peer->tcp.tx_in_progress = true; > + > + do { > + int ret = skb_send_sock_locked(peer->sock->sock->sk, skb, > + peer->tcp.out_msg.offset, > + peer->tcp.out_msg.len); > + if (unlikely(ret < 0)) { > + if (ret == -EAGAIN) > + goto out; > + > + net_warn_ratelimited("%s: TCP error to peer %u: %d\n", > + netdev_name(peer->ovpn->dev), > + peer->id, ret); > + > + /* in case of TCP error we can't recover the VPN > + * stream therefore we abort the connection > + */ > + ovpn_peer_del(peer, > + OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); > + break; > + } > + > + peer->tcp.out_msg.len -= ret; > + peer->tcp.out_msg.offset += ret; > + } while (peer->tcp.out_msg.len > 0); > + > + if (!peer->tcp.out_msg.len) > + dev_sw_netstats_tx_add(peer->ovpn->dev, 1, skb->len); > + > + kfree_skb(peer->tcp.out_msg.skb); > + peer->tcp.out_msg.skb = NULL; > + peer->tcp.out_msg.len = 0; > + peer->tcp.out_msg.offset = 0; > + > +out: > + peer->tcp.tx_in_progress = false; > +} > + > +static void ovpn_tcp_tx_work(struct work_struct *work) > +{ > + struct ovpn_peer *peer; > + > + peer = container_of(work, struct ovpn_peer, tcp.tx_work); > + > + lock_sock(peer->sock->sock->sk); > + ovpn_tcp_send_sock(peer); > + release_sock(peer->sock->sock->sk); > +} > + > +static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sk_buff *skb) > +{ > + if (peer->tcp.out_msg.skb) > + ovpn_tcp_send_sock(peer); > + > + if (peer->tcp.out_msg.skb) { > + dev_core_stats_rx_dropped_inc(peer->ovpn->dev); > + kfree_skb(skb); > + return; > + } > + > + peer->tcp.out_msg.skb = skb; > + peer->tcp.out_msg.len = skb->len; > + peer->tcp.out_msg.offset = 0; > + ovpn_tcp_send_sock(peer); > +} > + > +void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sk_buff *skb) > +{ > + u16 len = skb->len; > + > + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len); > + > + bh_lock_sock(peer->sock->sock->sk); Are you sure this runs in BH context? AFAICS we reach here from an AEAD callback. > + if (sock_owned_by_user(peer->sock->sock->sk)) { > + if (skb_queue_len(&peer->tcp.out_queue) >= > + READ_ONCE(net_hotdata.max_backlog)) { > + dev_core_stats_rx_dropped_inc(peer->ovpn->dev); > + kfree_skb(skb); > + goto unlock; > + } > + __skb_queue_tail(&peer->tcp.out_queue, skb); > + } else { > + ovpn_tcp_send_sock_skb(peer, skb); > + } > +unlock: > + bh_unlock_sock(peer->sock->sock->sk); > +} [...] > +static void ovpn_tcp_build_protos(struct proto *new_prot, > + struct proto_ops *new_ops, > + const struct proto *orig_prot, > + const struct proto_ops *orig_ops); > + > +/* Set TCP encapsulation callbacks */ > +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer) > +{ > + struct strp_callbacks cb = { > + .rcv_msg = ovpn_tcp_rcv, > + .parse_msg = ovpn_tcp_parse, > + }; > + int ret; > + > + /* make sure no pre-existing encapsulation handler exists */ > + if (sock->sk->sk_user_data) > + return -EBUSY; > + > + /* sanity check */ > + if (sock->sk->sk_protocol != IPPROTO_TCP) { > + net_err_ratelimited("%s: provided socket is not TCP as expected\n", > + netdev_name(peer->ovpn->dev)); > + return -EINVAL; > + } > + > + /* only a fully connected socket are expected. Connection should be > + * handled in userspace > + */ > + if (sock->sk->sk_state != TCP_ESTABLISHED) { > + net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", > + netdev_name(peer->ovpn->dev), > + sock->sk->sk_state); > + return -EINVAL; > + } > + > + lock_sock(sock->sk); > + > + ret = strp_init(&peer->tcp.strp, sock->sk, &cb); > + if (ret < 0) { > + DEBUG_NET_WARN_ON_ONCE(1); > + release_sock(sock->sk); > + return ret; > + } > + > + INIT_WORK(&peer->tcp.tx_work, ovpn_tcp_tx_work); > + __sk_dst_reset(sock->sk); > + skb_queue_head_init(&peer->tcp.user_queue); > + skb_queue_head_init(&peer->tcp.out_queue); > + > + /* save current CBs so that they can be restored upon socket release */ > + peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready; > + peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space; > + peer->tcp.sk_cb.prot = sock->sk->sk_prot; > + peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops; > + > + /* assign our static CBs and prot/ops */ > + sock->sk->sk_data_ready = ovpn_tcp_data_ready; > + sock->sk->sk_write_space = ovpn_tcp_write_space; > + > + if (sock->sk->sk_family == AF_INET) { > + sock->sk->sk_prot = &ovpn_tcp_prot; > + sock->sk->sk_socket->ops = &ovpn_tcp_ops; > + } else { > + mutex_lock(&tcp6_prot_mutex); > + if (!ovpn_tcp6_prot.recvmsg) > + ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, > + sock->sk->sk_prot, > + sock->sk->sk_socket->ops); > + mutex_unlock(&tcp6_prot_mutex); This looks like an hack to avoid a build dependency on IPV6, I think the explicit #if IS_ENABLED(CONFIG_IPV6) at init time should be preferable > + > + sock->sk->sk_prot = &ovpn_tcp6_prot; > + sock->sk->sk_socket->ops = &ovpn_tcp6_ops; > + } [...] > +static void ovpn_tcp_close(struct sock *sk, long timeout) > +{ > + struct ovpn_socket *sock; > + > + rcu_read_lock(); > + sock = rcu_dereference_sk_user_data(sk); > + > + strp_stop(&sock->peer->tcp.strp); > + barrier(); Again, is not clear to me the role of the above barrier, please document it. Thanks, Paolo