Re: [PATCH] FIX: Cannot remove failed disk from container

NeilBrown <neilb@xxxxxxx> · Tue, 3 Jan 2012 11:40:57 +1100

On Tue, 3 Jan 2012 10:31:04 +1100 NeilBrown <neilb@xxxxxxx> wrote:

> On Thu, 29 Dec 2011 14:27:38 +0100 Adam Kwolek <adam.kwolek@xxxxxxxxx> wrote:
> 
> > When disk is failed by mdadm e.g.:
> >     mdadm -f /dev/md/raid_array /dev/sdX
> > and then it is tried to be removed from container e.g.:
> >     mdadm --remove /dev/md/container /dev/sdX
> > 
> > mdadm refuses it with information:
> >     mdadm: /dev/sdX is still in use, cannot remove.
> > 
> > Problem was introduced in commit:
> >     monitor: don't unblock a device that isn't blocked.
> >     /2011-12-06/
> > Disk without unblocking it cannot be really removed from array
> > and reference to if is still reported under 'holders' sysfs entry.
> > 
> > As this commit is necessary for managing degraded array during
> > reshape and rebuild code for unconditional unblocking disk on removal
> > is added.
> > Guard for setting DS_UNBLOCK during reshape/rebuild avoids process
> > performance degradation.
> 
> You seem to be addressing the symptom rather than understanding the real
> problem.
> 
> If a device isn't marked as 'blocked' then it simply doesn't make any sense
> to "unblock" it - that cannot do anything useful.
> 
> If the commit you identify broke things for you, then we need to understand
> exactly why.  What exactly is the problem, how was the code previously
> allowing things to work?  What is the minimal thing we need to do to allow
> things to work again?
> 
> Just setting DS_UNBLOCK because it seems to work but without a clear
> justification isn't acceptable.
> 

The problem was that when were try to "remove" a device that can fail and the
comment said we assume another event would happen soon which would trigger a
second attempt to remove the device.  That apparently isn't true.

Your patch that caused it to "unblock" again only worked because that
triggered another event immediately - so you were relying on a side-effect.

I have committed the following two patches which fix the problem 'properly'.

Thanks,
NeilBrown

From 77b3ac8c6521d836dd3c6ef247c252293920fd52 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@xxxxxxx>
Date: Tue, 3 Jan 2012 11:18:59 +1100
Subject: [PATCH 1/2] monitor: make return from read_and_act more symbolic.

Rather than just a number, use a named flag.

This makes the code easier to understand and allows room for returning
more flags later.

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
 monitor.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/monitor.c b/monitor.c
index 29bde18..cfe4178 100644
--- a/monitor.c
+++ b/monitor.c
@@ -211,6 +211,7 @@ static void signal_manager(void)
  *
  */
 
+#define ARRAY_DIRTY 1
 static int read_and_act(struct active_array *a)
 {
 	unsigned long long sync_completed;
@@ -218,7 +219,7 @@ static int read_and_act(struct active_array *a)
 	int check_reshape = 0;
 	int deactivate = 0;
 	struct mdinfo *mdi;
-	int dirty = 0;
+	int ret = 0;
 	int count = 0;
 
 	a->next_state = bad_word;
@@ -254,14 +255,14 @@ static int read_and_act(struct active_array *a)
 	if (a->curr_state == write_pending) {
 		a->container->ss->set_array_state(a, 0);
 		a->next_state = active;
-		dirty = 1;
+		ret |= ARRAY_DIRTY;
 	}
 	if (a->curr_state == active_idle) {
 		/* Set array to 'clean' FIRST, then mark clean
 		 * in the metadata
 		 */
 		a->next_state = clean;
-		dirty = 1;
+		ret |= ARRAY_DIRTY;
 	}
 	if (a->curr_state == clean) {
 		a->container->ss->set_array_state(a, 1);
@@ -269,7 +270,7 @@ static int read_and_act(struct active_array *a)
 	if (a->curr_state == active ||
 	    a->curr_state == suspended ||
 	    a->curr_state == bad_word)
-		dirty = 1;
+		ret |= ARRAY_DIRTY;
 	if (a->curr_state == readonly) {
 		/* Well, I'm ready to handle things.  If readonly
 		 * wasn't requested, transition to read-auto.
@@ -284,7 +285,7 @@ static int read_and_act(struct active_array *a)
 				a->next_state = read_auto; /* array is clean */
 			else {
 				a->next_state = active; /* Now active for recovery etc */
-				dirty = 1;
+				ret |= ARRAY_DIRTY;
 			}
 		}
 	}
@@ -459,7 +460,7 @@ static int read_and_act(struct active_array *a)
 	if (deactivate)
 		a->container = NULL;
 
-	return dirty;
+	return ret;
 }
 
 static struct mdinfo *
@@ -629,7 +630,6 @@ static int wait_and_act(struct supertype *container, int nowait)
 	rv = 0;
 	dirty_arrays = 0;
 	for (a = *aap; a ; a = a->next) {
-		int is_dirty;
 
 		if (a->replaces && !discard_this) {
 			struct active_array **ap;
@@ -644,14 +644,14 @@ static int wait_and_act(struct supertype *container, int nowait)
 			signal_manager();
 		}
 		if (a->container && !a->to_remove) {
-			is_dirty = read_and_act(a);
+			int ret = read_and_act(a);
 			rv |= 1;
-			dirty_arrays += is_dirty;
+			dirty_arrays += !!(ret & ARRAY_DIRTY);
 			/* when terminating stop manipulating the array after it
 			 * is clean, but make sure read_and_act() is given a
 			 * chance to handle 'active_idle'
 			 */
-			if (sigterm && !is_dirty)
+			if (sigterm && !(ret & ARRAY_DIRTY))
 				a->container = NULL; /* stop touching this array */
 		}
 	}
-- 
1.7.7


From 68226a80812cd9fce63dbd14d2225ffdf16a781b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@xxxxxxx>
Date: Tue, 3 Jan 2012 00:36:23 +1100
Subject: [PATCH 2/2] monitor: ensure we retry soon when 'remove' fails.

If a 'remove' fails there is no certainty that another event will
happen soon, so make sure we retry soon anyway.

Reported-by: Adam Kwolek <adam.kwolek@xxxxxxxxx>
Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
 mdadm.h   |    1 +
 monitor.c |   16 ++++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/mdadm.h b/mdadm.h
index 3bcd052..381ef86 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -867,6 +867,7 @@ struct supertype {
 			*  external:/md0/12
 			*/
 	int devcnt;
+	int retry_soon;
 
 	struct mdinfo *devs;
 
diff --git a/monitor.c b/monitor.c
index cfe4178..c987d10 100644
--- a/monitor.c
+++ b/monitor.c
@@ -212,6 +212,7 @@ static void signal_manager(void)
  */
 
 #define ARRAY_DIRTY 1
+#define ARRAY_BUSY 2
 static int read_and_act(struct active_array *a)
 {
 	unsigned long long sync_completed;
@@ -419,9 +420,9 @@ static int read_and_act(struct active_array *a)
 		if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
 			int remove_result;
 
-			/* the kernel may not be able to immediately remove the
-			 * disk, we can simply wait until the next event to try
-			 * again.
+			/* The kernel may not be able to immediately remove the
+			 * disk.  In that case we wait a little while and
+			 * try again.
 			 */
 			remove_result = write_attr("remove", mdi->state_fd);
 			if (remove_result > 0) {
@@ -429,7 +430,8 @@ static int read_and_act(struct active_array *a)
 				close(mdi->state_fd);
 				close(mdi->recovery_fd);
 				mdi->state_fd = -1;
-			}
+			} else
+				ret |= ARRAY_BUSY;
 		}
 		if (mdi->next_state & DS_INSYNC) {
 			write_attr("+in_sync", mdi->state_fd);
@@ -597,7 +599,7 @@ static int wait_and_act(struct supertype *container, int nowait)
 		struct timespec ts;
 		ts.tv_sec = 24*3600;
 		ts.tv_nsec = 0;
-		if (*aap == NULL) {
+		if (*aap == NULL || container->retry_soon) {
 			/* just waiting to get O_EXCL access */
 			ts.tv_sec = 0;
 			ts.tv_nsec = 20000000ULL;
@@ -612,7 +614,7 @@ static int wait_and_act(struct supertype *container, int nowait)
 		#ifdef DEBUG
 		dprint_wake_reasons(&rfds);
 		#endif
-
+		container->retry_soon = 0;
 	}
 
 	if (update_queue) {
@@ -653,6 +655,8 @@ static int wait_and_act(struct supertype *container, int nowait)
 			 */
 			if (sigterm && !(ret & ARRAY_DIRTY))
 				a->container = NULL; /* stop touching this array */
+			if (ret & ARRAY_BUSY)
+				container->retry_soon = 1;
 		}
 	}
 
-- 
1.7.7



Attachment:
signature.asc

Description: PGP signature