[mdadm PATCH 8/9] mdmon: rework startup and takeover logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



rework startup and takeover logic in order to make it more robust

Signed-off-by: Luca Berra <bluca@xxxxxxxxxx>
---
 mdmon.c |   45 +++++++++++++++++++++------------------------
 1 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/mdmon.c b/mdmon.c
index eef4bfa..b823a8c 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -71,7 +71,7 @@ int mon_tid, mgr_tid;
int sigterm; -static char *pid_dir = VAR_RUN;
+static char *pid_dir = NULL;
int run_child(void *v)
 {
@@ -189,9 +189,6 @@ void remove_pidfile(char *devname)
 	unlink(buf);
 	sprintf(buf, "%s/%s.sock", pid_dir, devname);
 	unlink(buf);
-	if (strcmp(pid_dir, ALT_RUN) == 0)
-		/* try to clean up when we are finished with this dir */
-		rmdir(pid_dir);
 }
static int make_control_sock(char *devname)
@@ -443,26 +440,20 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover)
 	sigaction(SIGPIPE, &act, NULL);
if (takeover) {
-		victim = mdmon_pid(container->devnum, pid_dir);
-		if (victim < 0) {
-			pid_dir = ALT_RUN;
-			victim = mdmon_pid(container->devnum, pid_dir);
+		/* Check the parent process by socket, not pid, which could be
+		 * stale, then get the pid from the same pid_dir */
+		char * victim_pid_dir = VAR_RUN;
+		victim_sock = connect_monitor(container->devname, victim_pid_dir);
+		if (victim_sock < 0) {
+			victim_pid_dir = ALT_RUN;
+			victim_sock = connect_monitor(container->devname, victim_pid_dir);
 		}
-		if (victim >= 0)
-			victim_sock = connect_monitor(container->devname, pid_dir);
+		if (victim_sock >= 0)
+ 			victim = mdmon_pid(container->devnum, victim_pid_dir);
 	}
ignore = chdir("/");
 	if (victim < 0) {
-		pid_dir = ALT_RUN;
-		if (ping_monitor(container->devname) == 0) {
-			fprintf(stderr, "mdmon: %s already managed\n",
-				container->devname);
-			if (!takeover)
-				fprintf(stderr, "\trun mdmon --takeover instead\n");
-			exit(3);
-		}
-		pid_dir = VAR_RUN;
 		if (ping_monitor(container->devname) == 0) {
 			fprintf(stderr, "mdmon: %s already managed\n",
 				container->devname);
@@ -479,14 +470,20 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover)
 	close(mdfd);
/* Ok, this is close enough. We can say goodbye to our parent now.
+	 * take care to remove all possible pid files and sockets.
 	 */
-	if (victim > 0)
+	if (mkdir(ALT_RUN, 0600) >= 0 || (errno == EEXIST && access(ALT_RUN, W_OK) >= 0)) {
+		pid_dir = ALT_RUN;
 		remove_pidfile(devname);
-	if (mkdir(VAR_RUN, 0600) >= 0 || (errno == EEXIST && access(VAR_RUN, W_OK) >= 0))
+	}
+	if (mkdir(VAR_RUN, 0600) >= 0 || (errno == EEXIST && access(VAR_RUN, W_OK) >= 0)) {
+		/* try to clean up when we are finished with ALT_RUN dir */
+		if (pid_dir != NULL)
+			rmdir(pid_dir);
 		pid_dir = VAR_RUN;
-	else if (mkdir(ALT_RUN, 0600) >= 0 || (errno == EEXIST && access(ALT_RUN, W_OK) >= 0))
-		pid_dir = ALT_RUN;
-	else {
+		remove_pidfile(devname);
+	}
+	if (pid_dir == NULL) {
 		fprintf(stderr, "mdmon: Neither %s nor %s are writable\n"
 			"       cannot create .pid or .sock files.  Aborting\n",
 			VAR_RUN, ALT_RUN);
--
1.7.0


--
Luca Berra -- bluca@xxxxxxxxxx
        Communication Media & Services S.r.l.
 /"\
 \ /     ASCII RIBBON CAMPAIGN
  X        AGAINST HTML MAIL
 / \
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux