ip_vs_mutext is used by both netns shutdown code and startup and both implicit uses sk_lock-AF_INET mutex. cleanup CPU-1 startup CPU-2 ip_vs_dst_event() ip_vs_genl_set_cmd() sk_lock-AF_INET __ip_vs_mutex sk_lock-AF_INET __ip_vs_mutex * DEAD LOCK * This can be solved by have the ip_vs_mutex per netns or avid locking when starting/stoping sync-threads. i.e. just add a starting/stoping flag. ip_vs_mutex per name-space seems to be a more future proof solution. Which one should be used ? Signed-off-by: Hans Schillstrom <hans.schillstrom@xxxxxxxxxxxx> --- include/net/ip_vs.h | 2 ++ net/netfilter/ipvs/ip_vs_ctl.c | 15 ++++++++++----- net/netfilter/ipvs/ip_vs_sync.c | 30 +++++++++++++++++++++++++----- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 34a6fa8..e82fa8d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -895,6 +895,8 @@ struct netns_ipvs { struct sockaddr_in sync_mcast_addr; struct task_struct *master_thread; struct task_struct *backup_thread; + atomic_t master_flg; /* Start-up flag*/ + atomic_t backup_flg; int send_mesg_maxlen; int recv_mesg_maxlen; volatile int sync_state; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 699c79a..21c541f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2318,13 +2318,17 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; + /* Unlock since a global socket lock will be taken later */ + mutex_unlock(&__ip_vs_mutex); ret = start_sync_thread(net, dm->state, dm->mcast_ifn, dm->syncid); - goto out_unlock; + goto out_dec; } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; + /* Unlock since a global socket lock will be taken later */ + mutex_unlock(&__ip_vs_mutex); ret = stop_sync_thread(net, dm->state); - goto out_unlock; + goto out_dec; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -3305,12 +3309,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = -EINVAL; goto out; } - + /* Unlock since a global socket lock will be taken later */ + mutex_unlock(&__ip_vs_mutex); if (cmd == IPVS_CMD_NEW_DAEMON) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); - goto out; + goto out_nounlock; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { ret = ip_vs_zero_all(net); @@ -3382,7 +3387,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) out: mutex_unlock(&__ip_vs_mutex); - +out_nounlock: return ret; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e292e5b..7a996dc 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1540,30 +1540,37 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) char *name, *buf = NULL; int (*threadfn)(void *data); int result = -ENOMEM; + atomic_t *run_flg; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* master/backup_flag is used to protect for multiple starts + * the ip_vs_mutex can't be used here due to deadlock problems.*/ if (state == IP_VS_STATE_MASTER) { - if (ipvs->master_thread) + if (ipvs->master_thread || + !atomic_dec_and_test(&ipvs->master_flg)) return -EEXIST; strlcpy(ipvs->master_mcast_ifn, mcast_ifn, sizeof(ipvs->master_mcast_ifn)); ipvs->master_syncid = syncid; realtask = &ipvs->master_thread; + run_flg = &ipvs->master_flg; name = "ipvs_master:%d"; threadfn = sync_thread_master; sock = make_send_sock(net); } else if (state == IP_VS_STATE_BACKUP) { - if (ipvs->backup_thread) + if (ipvs->backup_thread || + !atomic_dec_and_test(&ipvs->backup_flg)) return -EEXIST; strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, sizeof(ipvs->backup_mcast_ifn)); ipvs->backup_syncid = syncid; realtask = &ipvs->backup_thread; + run_flg = &ipvs->backup_flg; name = "ipvs_backup:%d"; threadfn = sync_thread_backup; sock = make_receive_sock(net); @@ -1600,7 +1607,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) /* mark as active */ *realtask = task; ipvs->sync_state |= state; - + /* Free to use again */ + atomic_set(run_flg, 1); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1613,6 +1621,7 @@ outbuf: outsocket: sk_release_kernel(sock->sk); out: + atomic_set(run_flg, -1); return result; } @@ -1621,11 +1630,15 @@ int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); int retc = -EINVAL; + atomic_t *run_flg; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); + /* master/backup_flag is used to protect for multiple shutdowns + * the ip_vs_mutex can't be used here due to deadlock problems.*/ if (state == IP_VS_STATE_MASTER) { - if (!ipvs->master_thread) + if (!ipvs->master_thread || + !atomic_dec_and_test(&ipvs->master_flg)) return -ESRCH; pr_info("stopping master sync thread %d ...\n", @@ -1642,8 +1655,11 @@ int stop_sync_thread(struct net *net, int state) spin_unlock_bh(&ipvs->sync_lock); retc = kthread_stop(ipvs->master_thread); ipvs->master_thread = NULL; + /* Free to use again */ + atomic_set(&ipvs->master_flg, 1); } else if (state == IP_VS_STATE_BACKUP) { - if (!ipvs->backup_thread) + if (!ipvs->backup_thread || + !atomic_dec_and_test(&ipvs->backup_flg)) return -ESRCH; pr_info("stopping backup sync thread %d ...\n", @@ -1652,6 +1668,8 @@ int stop_sync_thread(struct net *net, int state) ipvs->sync_state &= ~IP_VS_STATE_BACKUP; retc = kthread_stop(ipvs->backup_thread); ipvs->backup_thread = NULL; + /* Free to use again */ + atomic_set(&ipvs->backup_flg, 1); } /* decrease the module use count */ @@ -1674,6 +1692,8 @@ int __net_init __ip_vs_sync_init(struct net *net) ipvs->sync_mcast_addr.sin_family = AF_INET; ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); + atomic_set(&ipvs->master_flg, 1); + atomic_set(&ipvs->backup_flg, 1); return 0; } -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe lvs-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html