[PATCH 1/2] md-cluster: fix hanging issue while a new disk adding

Heming Zhao <heming.zhao@xxxxxxxx> · Wed, 12 Jun 2024 10:19:10 +0800

The commit 1bbe254e4336 ("md-cluster: check for timeout while a
new disk adding") is correct in terms of code syntax but not
suite real clustered code logic.

When a timeout occurs while adding a new disk, if recv_daemon()
bypasses the unlock for ack_lockres:CR, another node will be waiting
to grab EX lock. This will cause the cluster to hang indefinitely.

How to fix:

1. In dlm_lock_sync(), change the wait behaviour from forever to a
   timeout, This could avoid the hanging issue when another node
   fails to handle cluster msg. Another result of this change is
   that if another node receives an unknown msg (e.g. a new msg_type),
   the old code will hang, whereas the new code will timeout and fail.
   This could help cluster_md handle new msg_type from different
   nodes with different kernel/module versions (e.g. The user only
   updates one leg's kernel and monitors the stability of the new
   kernel).
2. The old code for __sendmsg() always returns 0 (success) under the
   design (must successfully unlock ->message_lockres). This commit
   makes this function return an error number when an error occurs.

Fixes: 1bbe254e4336 ("md-cluster: check for timeout while a new disk adding")
Signed-off-by: Heming Zhao <heming.zhao@xxxxxxxx>
Reviewed-by: Su Yue <glass.su@xxxxxxxx>
---
 drivers/md/md-cluster.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 8e36a0feec09..27eaaf9fef94 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -130,8 +130,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
 			0, sync_ast, res, res->bast);
 	if (ret)
 		return ret;
-	wait_event(res->sync_locking, res->sync_locking_done);
+	ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
+				60 * HZ);
 	res->sync_locking_done = false;
+	if (!ret) {
+		pr_err("locking DLM '%s' timeout!\n", res->name);
+		return -EBUSY;
+	}
 	if (res->lksb.sb_status == 0)
 		res->mode = mode;
 	return res->lksb.sb_status;
@@ -744,12 +749,14 @@ static void unlock_comm(struct md_cluster_info *cinfo)
 static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 {
 	int error;
+	int ret = 0;
 	int slot = cinfo->slot_number - 1;
 
 	cmsg->slot = cpu_to_le32(slot);
 	/*get EX on Message*/
 	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
 	if (error) {
+		ret = error;
 		pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
 		goto failed_message;
 	}
@@ -759,6 +766,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	/*down-convert EX to CW on Message*/
 	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
 	if (error) {
+		ret = error;
 		pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
 				error);
 		goto failed_ack;
@@ -767,6 +775,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	/*up-convert CR to EX on Ack*/
 	error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_EX);
 	if (error) {
+		ret = error;
 		pr_err("md-cluster: failed to convert CR to EX on ACK(%d)\n",
 				error);
 		goto failed_ack;
@@ -775,6 +784,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	/*down-convert EX to CR on Ack*/
 	error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR);
 	if (error) {
+		ret = error;
 		pr_err("md-cluster: failed to convert EX to CR on ACK(%d)\n",
 				error);
 		goto failed_ack;
@@ -789,7 +799,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 		goto failed_ack;
 	}
 failed_message:
-	return error;
+	return ret;
 }
 
 static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg,
-- 
2.35.3