Re: [PATCH net-next v18 20/25] ovpn: implement peer add/get/dump/delete via netlink

Sabrina Dubroca <sd@xxxxxxxxxxxxxxx> · Fri, 17 Jan 2025 12:48:54 +0100

2025-01-13, 10:31:39 +0100, Antonio Quartulli wrote:
>  int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
>  {
> -	return -EOPNOTSUPP;
> +	struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
> +	struct ovpn_priv *ovpn = info->user_ptr[0];
> +	struct ovpn_socket *ovpn_sock;
> +	struct socket *sock = NULL;
> +	struct ovpn_peer *peer;
> +	u32 sockfd, peer_id;
> +	int ret;
> +
> +	/* peers can only be added when the interface is up and running */
> +	if (!netif_running(ovpn->dev))
> +		return -ENETDOWN;

Since we're not under rtnl_lock here, the device could go down while
we're creating this peer, and we may end up with a down device that
has a peer anyway.

I'm not sure what this (and the peer flushing on NETDEV_DOWN) is
trying to accomplish. Is it a problem to keep peers when the netdevice
is down?

> +
> +	if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
> +		return -EINVAL;
> +
> +	ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
> +			       ovpn_peer_nl_policy, info->extack);
> +	if (ret)
> +		return ret;
> +
> +	ret = ovpn_nl_peer_precheck(ovpn, info, attrs);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
> +			      OVPN_A_PEER_SOCKET))
> +		return -EINVAL;
> +
> +	peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
> +	peer = ovpn_peer_new(ovpn, peer_id);
> +	if (IS_ERR(peer)) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "cannot create new peer object for peer %u: %ld",
> +				       peer_id, PTR_ERR(peer));
> +		return PTR_ERR(peer);
> +	}
> +
> +	/* lookup the fd in the kernel table and extract the socket object */
> +	sockfd = nla_get_u32(attrs[OVPN_A_PEER_SOCKET]);
> +	/* sockfd_lookup() increases sock's refcounter */
> +	sock = sockfd_lookup(sockfd, &ret);
> +	if (!sock) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "cannot lookup peer socket (fd=%u): %d",
> +				       sockfd, ret);
> +		return -ENOTSOCK;

All those returns should be "goto peer_release" (and setting ret) so
that we don't leak peer.

> +	}
> +
> +	/* Only when using UDP as transport protocol the remote endpoint
> +	 * can be configured so that ovpn knows where to send packets to.
> +	 *
> +	 * In case of TCP, the socket is connected to the peer and ovpn
> +	 * will just send bytes over it, without the need to specify a
> +	 * destination.
> +	 */
> +	if (sock->sk->sk_protocol != IPPROTO_UDP &&
> +	    (attrs[OVPN_A_PEER_REMOTE_IPV4] ||
> +	     attrs[OVPN_A_PEER_REMOTE_IPV6])) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "unexpected remote IP address for non UDP socket");
> +		sockfd_put(sock);
> +		return -EINVAL;

goto peer_release

> +	}
> +
> +	ovpn_sock = ovpn_socket_new(sock, peer);
> +	if (IS_ERR(ovpn_sock)) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "cannot encapsulate socket: %ld",
> +				       PTR_ERR(ovpn_sock));
> +		sockfd_put(sock);
> +		return -ENOTSOCK;

goto peer_release

> +	}
> +
> +	peer->sock = ovpn_sock;
> +
> +	ret = ovpn_nl_peer_modify(peer, info, attrs);
> +	if (ret < 0)
> +		goto peer_release;
> +
> +	ret = ovpn_peer_add(ovpn, peer);
> +	if (ret < 0) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "cannot add new peer (id=%u) to hashtable: %d\n",
> +				       peer->id, ret);
> +		goto peer_release;
> +	}
> +
> +	return 0;
> +
> +peer_release:
> +	/* release right away because peer is not used in any context */
> +	ovpn_peer_release(peer);
> +
> +	return ret;
>  }


[...]
>  int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
>  {
> -	return -EOPNOTSUPP;
> +	struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
> +	struct ovpn_priv *ovpn = info->user_ptr[0];
> +	struct ovpn_peer *peer;
> +	u32 peer_id;
> +	int ret;
> +
> +	if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
> +		return -EINVAL;
> +
> +	ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
> +			       ovpn_peer_nl_policy, info->extack);
> +	if (ret)
> +		return ret;
> +
> +	if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
> +			      OVPN_A_PEER_ID))
> +		return -EINVAL;
> +
> +	peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
> +	peer = ovpn_peer_get_by_id(ovpn, peer_id);
> +	if (!peer) {
> +		NL_SET_ERR_MSG_FMT_MOD(info->extack,
> +				       "cannot find peer with id %u", peer_id);
> +		return -ENOENT;
> +	}
> +
> +	netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
> +	ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);

With the delayed socket release (which is similar to what was in v11,
but now with refcounting on the netdevice which should make
rtnl_link_unregister in ovpn_cleanup wait [*]), we may return to
userspace as if the peer was gone, but the socket hasn't been detached
yet.

A userspace application that tries to remove the peer and immediately
re-create it with the same socket could get EBUSY if the workqueue
hasn't done its job yet. That would be quite confusing to the
application.

So I would add a completion to wait here until the socket has been
fully detached. Something like below.

[*] I don't think the current refcounting fully protects against that,
I'll comment on 05/25


-------- 8< --------

diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index 72357bb5f30b..19aa4ee6d468 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -733,6 +733,9 @@ int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
 
 	netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
 	ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);
+	if (ret >= 0 && peer->sock)
+		wait_for_completion(&peer->sock_detach);
+
 	ovpn_peer_put(peer);
 
 	return ret;
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index b032390047fe..6120521d0c32 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -92,6 +92,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
 	ovpn_peer_stats_init(&peer->vpn_stats);
 	ovpn_peer_stats_init(&peer->link_stats);
 	INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send);
+	init_completion(&peer->sock_detach);
 
 	ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL);
 	if (ret < 0) {
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index 7a062cc5a5a4..8c54bf5709ef 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -112,6 +112,7 @@ struct ovpn_peer {
 	struct rcu_head rcu;
 	struct work_struct remove_work;
 	struct work_struct keepalive_work;
+	struct completion sock_detach;
 };
 
 /**
diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
index a5c3bc834a35..7cefac42c3be 100644
--- a/drivers/net/ovpn/socket.c
+++ b/drivers/net/ovpn/socket.c
@@ -31,6 +31,8 @@ static void ovpn_socket_release_kref(struct kref *kref)
 
 	sockfd_put(sock->sock);
 	kfree_rcu(sock, rcu);
+
+	complete(&sock->peer->sock_detach);
 }
 
 /**
@@ -181,12 +183,12 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
 
 	ovpn_sock->sock = sock;
 	kref_init(&ovpn_sock->refcount);
+	ovpn_sock->peer = peer;
 
 	/* TCP sockets are per-peer, therefore they are linked to their unique
 	 * peer
 	 */
 	if (sock->sk->sk_protocol == IPPROTO_TCP) {
-		ovpn_sock->peer = peer;
 		ovpn_peer_hold(peer);
 	} else if (sock->sk->sk_protocol == IPPROTO_UDP) {
 		/* in UDP we only link the ovpn instance since the socket is
diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
index 15827e347f53..3f5a35fd9048 100644
--- a/drivers/net/ovpn/socket.h
+++ b/drivers/net/ovpn/socket.h
@@ -28,12 +28,12 @@ struct ovpn_peer;
  * @rcu: member used to schedule RCU destructor callback
  */
 struct ovpn_socket {
+	struct ovpn_peer *peer;
 	union {
 		struct {
 			struct ovpn_priv *ovpn;
 			netdevice_tracker dev_tracker;
 		};
-		struct ovpn_peer *peer;
 	};
 
 	struct socket *sock;


-- 
Sabrina