This is a note to let you know that I've just added the patch titled net: add a refcount tracker for kernel sockets to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: net-add-a-refcount-tracker-for-kernel-sockets.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit b13e4063fb63f721a1343f2a866d63ad13587cd3 Author: Eric Dumazet <edumazet@xxxxxxxxxx> Date: Thu Oct 20 23:20:18 2022 +0000 net: add a refcount tracker for kernel sockets [ Upstream commit 0cafd77dcd032d1687efaba5598cf07bce85997f ] Commit ffa84b5ffb37 ("net: add netns refcount tracker to struct sock") added a tracker to sockets, but did not track kernel sockets. We still have syzbot reports hinting about netns being destroyed while some kernel TCP sockets had not been dismantled. This patch tracks kernel sockets, and adds a ref_tracker_dir_print() call to net_free() right before the netns is freed. Normally, each layer is responsible for properly releasing its kernel sockets before last call to net_free(). This debugging facility is enabled with CONFIG_NET_NS_REFCNT_TRACKER=y Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx> Reviewed-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx> Tested-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> Stable-dep-of: 0f6ede9fbc74 ("net: defer final 'struct net' free in netns dismantle") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 8c3587d5c308..78beaa765c73 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -92,7 +92,9 @@ struct net { struct ns_common ns; struct ref_tracker_dir refcnt_tracker; - + struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not + * refcounted against netns + */ struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -320,19 +322,31 @@ static inline int check_net(const struct net *net) #endif -static inline void netns_tracker_alloc(struct net *net, - netns_tracker *tracker, gfp_t gfp) +static inline void __netns_tracker_alloc(struct net *net, + netns_tracker *tracker, + bool refcounted, + gfp_t gfp) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); + ref_tracker_alloc(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, + tracker, gfp); #endif } -static inline void netns_tracker_free(struct net *net, - netns_tracker *tracker) +static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, + gfp_t gfp) +{ + __netns_tracker_alloc(net, tracker, true, gfp); +} + +static inline void __netns_tracker_free(struct net *net, + netns_tracker *tracker, + bool refcounted) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_free(&net->refcnt_tracker, tracker); + ref_tracker_free(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, tracker); #endif } @@ -346,7 +360,7 @@ static inline struct net *get_net_track(struct net *net, static inline void put_net_track(struct net *net, netns_tracker *tracker) { - netns_tracker_free(net, tracker); + __netns_tracker_free(net, tracker, true); put_net(net); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1d95a5adce4e..0fe9fd2cf4e2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -319,6 +319,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -439,6 +440,10 @@ static void net_free(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); + + /* There should not be any trackers left there. */ + ref_tracker_dir_exit(&net->notrefcnt_tracker); + kmem_cache_free(net_cachep, net); } } diff --git a/net/core/sock.c b/net/core/sock.c index dce8f878f638..2ba6385e39fd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2111,6 +2111,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) { get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); + } else { + __netns_tracker_alloc(net, &sk->ns_tracker, + false, priority); } sock_net_set(sk, net); @@ -2166,6 +2169,9 @@ static void __sk_destruct(struct rcu_head *head) if (likely(sk->sk_net_refcnt)) put_net_track(sock_net(sk), &sk->ns_tracker); + else + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + sk_prot_free(sk->sk_prot_creator, sk); } @@ -2254,6 +2260,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (likely(newsk->sk_net_refcnt)) { get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); sock_inuse_add(sock_net(newsk), 1); + } else { + /* Kernel sockets are not elevating the struct net refcount. + * Instead, use a tracker to more easily detect if a layer + * is not properly dismantling its kernel sockets at netns + * destroy time. + */ + __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, + false, priority); } sk_node_init(&newsk->sk_node); sock_lock_init(newsk); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8a74847dacaf..3bc862fd89a5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -797,6 +797,17 @@ static int netlink_release(struct socket *sock) } sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + + /* Because struct net might disappear soon, do not keep a pointer. */ + if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + /* Because of deferred_put_nlk_sk and use of work queue, + * it is possible netns will be freed before this socket. + */ + sock_net_set(sk, &init_net); + __netns_tracker_alloc(&init_net, &sk->ns_tracker, + false, GFP_KERNEL); + } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 4444fd82b66d..c5b86066ff66 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -503,6 +503,9 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } + /* Update ns_tracker to current stack trace and refcounted tracker */ + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1);