On Sun, 28 Feb 2010 15:45:45 +0100 Luca Berra <bluca@xxxxxxxxxx> wrote: > rework startup and takeover logic in order to make it more robust With the other things I dropped, this no longer applied, and the I didn't feel up to picking it apart to see what you were really trying to do. Again, if you feel something here is still needed, please explain. Thanks, NeilBrown > > Signed-off-by: Luca Berra <bluca@xxxxxxxxxx> > --- > mdmon.c | 45 +++++++++++++++++++++------------------------ > 1 files changed, 21 insertions(+), 24 deletions(-) > > diff --git a/mdmon.c b/mdmon.c > index eef4bfa..b823a8c 100644 > --- a/mdmon.c > +++ b/mdmon.c > @@ -71,7 +71,7 @@ int mon_tid, mgr_tid; > > int sigterm; > > -static char *pid_dir = VAR_RUN; > +static char *pid_dir = NULL; > > int run_child(void *v) > { > @@ -189,9 +189,6 @@ void remove_pidfile(char *devname) > unlink(buf); > sprintf(buf, "%s/%s.sock", pid_dir, devname); > unlink(buf); > - if (strcmp(pid_dir, ALT_RUN) == 0) > - /* try to clean up when we are finished with this dir */ > - rmdir(pid_dir); > } > > static int make_control_sock(char *devname) > @@ -443,26 +440,20 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) > sigaction(SIGPIPE, &act, NULL); > > if (takeover) { > - victim = mdmon_pid(container->devnum, pid_dir); > - if (victim < 0) { > - pid_dir = ALT_RUN; > - victim = mdmon_pid(container->devnum, pid_dir); > + /* Check the parent process by socket, not pid, which could be > + * stale, then get the pid from the same pid_dir */ > + char * victim_pid_dir = VAR_RUN; > + victim_sock = connect_monitor(container->devname, victim_pid_dir); > + if (victim_sock < 0) { > + victim_pid_dir = ALT_RUN; > + victim_sock = connect_monitor(container->devname, victim_pid_dir); > } > - if (victim >= 0) > - victim_sock = connect_monitor(container->devname, pid_dir); > + if (victim_sock >= 0) > + victim = mdmon_pid(container->devnum, victim_pid_dir); > } > > ignore = chdir("/"); > if (victim < 0) { > - pid_dir = ALT_RUN; > - if (ping_monitor(container->devname) == 0) { > - fprintf(stderr, "mdmon: %s already managed\n", > - container->devname); > - if (!takeover) > - fprintf(stderr, "\trun mdmon --takeover instead\n"); > - exit(3); > - } > - pid_dir = VAR_RUN; > if (ping_monitor(container->devname) == 0) { > fprintf(stderr, "mdmon: %s already managed\n", > container->devname); > @@ -479,14 +470,20 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) > close(mdfd); > > /* Ok, this is close enough. We can say goodbye to our parent now. > + * take care to remove all possible pid files and sockets. > */ > - if (victim > 0) > + if (mkdir(ALT_RUN, 0600) >= 0 || (errno == EEXIST && access(ALT_RUN, W_OK) >= 0)) { > + pid_dir = ALT_RUN; > remove_pidfile(devname); > - if (mkdir(VAR_RUN, 0600) >= 0 || (errno == EEXIST && access(VAR_RUN, W_OK) >= 0)) > + } > + if (mkdir(VAR_RUN, 0600) >= 0 || (errno == EEXIST && access(VAR_RUN, W_OK) >= 0)) { > + /* try to clean up when we are finished with ALT_RUN dir */ > + if (pid_dir != NULL) > + rmdir(pid_dir); > pid_dir = VAR_RUN; > - else if (mkdir(ALT_RUN, 0600) >= 0 || (errno == EEXIST && access(ALT_RUN, W_OK) >= 0)) > - pid_dir = ALT_RUN; > - else { > + remove_pidfile(devname); > + } > + if (pid_dir == NULL) { > fprintf(stderr, "mdmon: Neither %s nor %s are writable\n" > " cannot create .pid or .sock files. Aborting\n", > VAR_RUN, ALT_RUN); -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html