+ ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: ocfs2/dlm: wait for dlm recovery done when migrating all lock resources
has been added to the -mm tree.  Its filename is
     ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: piaojun <piaojun@xxxxxxxxxx>
Subject: ocfs2/dlm: wait for dlm recovery done when migrating all lock resources

Wait for dlm recovery done when migrating all lock resources in case that
new lock resource left after leaving dlm domain.  And the left lock
resource will cause other nodes BUG.

      NodeA                       NodeB                NodeC

umount:
  dlm_unregister_domain()
    dlm_migrate_all_locks()

                                 NodeB down

do recovery for NodeB
and collect a new lockres
form other live nodes:

  dlm_do_recovery
    dlm_remaster_locks
      dlm_request_all_locks:

  dlm_mig_lockres_handler
    dlm_new_lockres
      __dlm_insert_lockres

at last NodeA become the
master of the new lockres
and leave domain:
  dlm_leave_domain()

                                                  mount:
                                                    dlm_join_domain()

                                                  touch file and request
                                                  for the owner of the new
                                                  lockres, but all the
                                                  other nodes said 'NO',
                                                  so NodeC decide to be
                                                  the owner, and send do
                                                  assert msg to other
                                                  nodes:
                                                  dlmlock()
                                                    dlm_get_lock_resource()
                                                      dlm_do_assert_master()

                                                  other nodes receive the msg
                                                  and found two masters exist.
                                                  at last cause BUG in
                                                  dlm_assert_master_handler()
                                                  -->BUG();

Link: http://lkml.kernel.org/r/5AAA6E25.7090303@xxxxxxxxxx
Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
Signed-off-by: Jun Piao <piaojun@xxxxxxxxxx>
Reviewed-by: Alex Chen <alex.chen@xxxxxxxxxx>
Reviewed-by: Yiwen Jiang <jiangyiwen@xxxxxxxxxx>
Cc: Mark Fasheh <mfasheh@xxxxxxxxxxx>
Cc: Joel Becker <jlbec@xxxxxxxxxxxx>
Cc: Junxiao Bi <junxiao.bi@xxxxxxxxxx>
Cc: Joseph Qi <jiangqi903@xxxxxxxxx>
Cc: Changwei Ge <ge.changwei@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 fs/ocfs2/dlm/dlmcommon.h   |    1 +
 fs/ocfs2/dlm/dlmdomain.c   |   15 +++++++++++++++
 fs/ocfs2/dlm/dlmrecovery.c |   13 ++++++++++---
 3 files changed, 26 insertions(+), 3 deletions(-)

diff -puN fs/ocfs2/dlm/dlmcommon.h~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources fs/ocfs2/dlm/dlmcommon.h
--- a/fs/ocfs2/dlm/dlmcommon.h~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources
+++ a/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
 	u8 node_num;
 	u32 key;
 	u8  joining_node;
+	u8 migrate_done; /* set to 1 means node has migrated all lock resources */
 	wait_queue_head_t dlm_join_events;
 	unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
 	unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff -puN fs/ocfs2/dlm/dlmdomain.c~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources fs/ocfs2/dlm/dlmdomain.c
--- a/fs/ocfs2/dlm/dlmdomain.c~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources
+++ a/fs/ocfs2/dlm/dlmdomain.c
@@ -461,6 +461,19 @@ redo_bucket:
 		cond_resched_lock(&dlm->spinlock);
 		num += n;
 	}
+
+	if (!num) {
+		if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
+			mlog(0, "%s: perhaps there are more lock resources "
+			     "need to be migrated after dlm recovery\n", dlm->name);
+			ret = -EAGAIN;
+		} else {
+			mlog(0, "%s: we won't do dlm recovery after migrating "
+			     "all lock resources\n", dlm->name);
+			dlm->migrate_done = 1;
+		}
+	}
+
 	spin_unlock(&dlm->spinlock);
 	wake_up(&dlm->dlm_thread_wq);
 
@@ -2038,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(c
 	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
 	init_waitqueue_head(&dlm->dlm_join_events);
 
+	dlm->migrate_done = 0;
+
 	dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
 	dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
 
diff -puN fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources fs/ocfs2/dlm/dlmrecovery.c
--- a/fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources
+++ a/fs/ocfs2/dlm/dlmrecovery.c
@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ct
 
 static void dlm_begin_recovery(struct dlm_ctxt *dlm)
 {
-	spin_lock(&dlm->spinlock);
+	assert_spin_locked(&dlm->spinlock);
 	BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
 	printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
 	       dlm->name, dlm->reco.dead_node);
 	dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
-	spin_unlock(&dlm->spinlock);
 }
 
 static void dlm_end_recovery(struct dlm_ctxt *dlm)
@@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ct
 
 	spin_lock(&dlm->spinlock);
 
+	if (dlm->migrate_done) {
+		mlog(0, "%s: no need do recovery after migrating all "
+		     "lock resources\n", dlm->name);
+		spin_unlock(&dlm->spinlock);
+		return 0;
+	}
+
 	/* check to see if the new master has died */
 	if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
 	    test_bit(dlm->reco.new_master, dlm->recovery_map)) {
@@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ct
 	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
 	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->reco.dead_node);
-	spin_unlock(&dlm->spinlock);
 
 	/* take write barrier */
 	/* (stops the list reshuffling thread, proxy ast handling) */
 	dlm_begin_recovery(dlm);
 
+	spin_unlock(&dlm->spinlock);
+
 	if (dlm->reco.new_master == dlm->node_num)
 		goto master_here;
 
_

Patches currently in -mm which might be from piaojun@xxxxxxxxxx are

ocfs2-use-osb-instead-of-ocfs2_sb.patch
ocfs2-use-oi-instead-of-ocfs2_i.patch
ocfs2-clean-up-some-unused-function-declaration.patch
ocfs2-dlm-dont-handle-migrate-lockres-if-already-in-shutdown.patch
ocfs2-dlm-dont-handle-migrate-lockres-if-already-in-shutdown-v3.patch
ocfs2-remove-unnecessary-null-pointer-check-before-kmem_cache_destroy.patch
ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock-resources.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux