Re: mdadm/Create wait_for_zero_forks is stuck

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Logan

Thanks for your suggestion. I tried to create signalfd before fork()
but it still can't work. And I call sleep(2) before child exits, the
problem still can happen sometimes. This is what I tried. If you have
time to have a look, it's great. It's not a hurry thing.

diff --git a/Create.c b/Create.c
index d94253b1..a40ed027 100644
--- a/Create.c
+++ b/Create.c
@@ -168,15 +168,16 @@ static pid_t write_zeroes_fork(int fd, struct
shape *s, struct supertype *st,
                size_bytes -= sz;
        }

+       printf("sleeping 2 seconds\n");
+       sleep(2);
        exit(ret);
 }

-static int wait_for_zero_forks(int *zero_pids, int count)
+static int wait_for_zero_forks(int *zero_pids, int count, int sfd)
 {
-       int wstatus, ret = 0, i, sfd, wait_count = 0;
+       int wstatus, ret = 0, i, wait_count = 0;
        struct signalfd_siginfo fdsi;
        bool interrupted = false;
-       sigset_t sigset;
        ssize_t s;

        for (i = 0; i < count; i++)
@@ -185,17 +186,6 @@ static int wait_for_zero_forks(int *zero_pids, int count)
        if (!wait_count)
                return 0;

-       sigemptyset(&sigset);
-       sigaddset(&sigset, SIGINT);
-       sigaddset(&sigset, SIGCHLD);
-       sigprocmask(SIG_BLOCK, &sigset, NULL);
-
-       sfd = signalfd(-1, &sigset, 0);
-       if (sfd < 0) {
-               pr_err("Unable to create signalfd: %s\n", strerror(errno));
-               return 1;
-       }
-
        while (1) {
                s = read(sfd, &fdsi, sizeof(fdsi));
                if (s != sizeof(fdsi)) {
@@ -208,10 +198,13 @@ static int wait_for_zero_forks(int *zero_pids, int count)
                        printf("\n");
                        pr_info("Interrupting zeroing processes,
please wait...\n");
                        interrupted = true;
+                       break;
                } else if (fdsi.ssi_signo == SIGCHLD) {
+                       printf("SIGCHILD wait count %d\n", wait_count);
                        if (!--wait_count)
                                break;
-               }
+               } else
+                       printf("invalide %d\n", wait_count);
        }

        close(sfd);
@@ -377,6 +370,39 @@ static int update_metadata(int mdfd, struct shape
*s, struct supertype *st,
        return 0;
 }

+static int prepare_write_zeros(struct shape *s, sigset_t *orig_sigset)
+{
+       sigset_t sigset;
+       int sfd = -1;
+
+       if (!s->write_zeroes)
+               return 0;
+
+       /*
+        * Block SIGINT so the main thread will always wait for the
+        * zeroing processes when being interrupted. Otherwise the
+        * zeroing processes will finish their work in the background
+        * keeping the disk busy.
+        */
+       sigemptyset(&sigset);
+       sigaddset(&sigset, SIGINT);
+       sigaddset(&sigset, SIGCHLD);
+       sigprocmask(SIG_BLOCK, &sigset, orig_sigset);
+
+       sfd = signalfd(-1, &sigset, 0);
+       if (sfd < 0) {
+               pr_err("Unable to create signalfd: %s\n", strerror(errno));
+               return -EACCES;
+       }
+
+       return sfd;
+}
+
+static void restore_sigset(sigset_t *sigset)
+{
+       sigprocmask(SIG_SETMASK, sigset, NULL);
+}
+
 static int add_disks(int mdfd, struct mdinfo *info, struct shape *s,
                     struct context *c, struct supertype *st,
                     struct map_ent **map, struct mddev_dev *devlist,
@@ -388,18 +414,12 @@ static int add_disks(int mdfd, struct mdinfo
*info, struct shape *s,
        int zero_pids[total_slots];
        struct mddev_dev *dv;
        struct mdinfo *infos;
-       sigset_t sigset, orig_sigset;
-       int ret = 0;
+       int ret = 0, sfd = -1;
+       sigset_t orig_sigset;

-       /*
-        * Block SIGINT so the main thread will always wait for the
-        * zeroing processes when being interrupted. Otherwise the
-        * zeroing processes will finish their work in the background
-        * keeping the disk busy.
-        */
-       sigemptyset(&sigset);
-       sigaddset(&sigset, SIGINT);
-       sigprocmask(SIG_BLOCK, &sigset, &orig_sigset);
+       sfd = prepare_write_zeros(s, &orig_sigset);
+       if (sfd < 0)
+               return sfd;
        memset(zero_pids, 0, sizeof(zero_pids));

        infos = xmalloc(sizeof(*infos) * total_slots);
@@ -461,7 +481,8 @@ static int add_disks(int mdfd, struct mdinfo
*info, struct shape *s,
                }

                if (pass == 1) {
-                       ret = wait_for_zero_forks(zero_pids, total_slots);
+                       printf("wait for zero forks\n");
+                       ret = wait_for_zero_forks(zero_pids, total_slots, sfd);
                        if (ret)
                                goto out;

@@ -474,9 +495,9 @@ static int add_disks(int mdfd, struct mdinfo
*info, struct shape *s,

 out:
        if (ret)
-               wait_for_zero_forks(zero_pids, total_slots);
+               wait_for_zero_forks(zero_pids, total_slots, sfd);
+       restore_sigset(&orig_sigset);
        free(infos);
-       sigprocmask(SIG_SETMASK, &orig_sigset, NULL);
        return ret;
 }

Best Regards
Xiao

On Wed, May 22, 2024 at 12:57 AM Logan Gunthorpe <logang@xxxxxxxxxxxx> wrote:
>
> Hi Xiao,
>
> I don't have time to dig into this myself, but my guess would be that
> the signal for one of the children come too quickly, before the
> sigprocmask() call in wait_for_zero_forks().
>
> Seems like SIGCHLD should be blocked before the first call to
> write_zeroes_fork(). I'm really not sure why I put in a block to SIGINT
> and then a block to SIGCHLD after the processes started. I suspect
> adding SIGCHLD to the sigprocmask in add_disks() and just removing the
> sigprocmask in write_zeroes_fork() might fix the issue.
>
> Thanks,
>
> Logan
>
>
>
> On 2024-05-21 01:05, Xiao Ni wrote:
> > Hi Logan
> >
> > I'm trying to fix errors of mdadm regression failures. There is a
> > failure in 00raid5-zero sometimes. I added some logs:
> >
> > In function write_zeroes_fork:
> >                 if (fallocate(fd, FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
> >                               offset_bytes, sz)) {
> >                         pr_err("zeroing %s failed: %s\n", dv->devname,
> >                                strerror(errno));
> >                         ret = 1;
> >                         break;
> >                 } else
> >                         printf("zeroing good\n");
> >
> > In function wait_for_zero_forks:
> >                 if (fdsi.ssi_signo == SIGINT) {
> >                         printf("\n");
> >                         pr_info("Interrupting zeroing processes,
> > please wait...\n");
> >                         interrupted = true;
> >                         break;
> >                 } else if (fdsi.ssi_signo == SIGCHLD) {
> >                         printf("one child finishes, wait count %d\n",
> > wait_count);
> >                         if (!--wait_count)
> >                                 break;
> >                 }
> >
> > while [ 1 ]; do
> >   /usr/sbin/mdadm -CfR /dev/md0 -l 5 -n3 /dev/loop0 /dev/loop1
> > /dev/loop2 --write-zeroes --auto=yes -v
> >   mdadm --wait /dev/md0
> >   mdadm -Ss
> >   sleep 1
> > done
> >
> > zeroing good
> > zeroing good
> > zeroing good
> > one child finishes, wait count 3
> > one child finishes, wait count 2
> >
> > It looks like the farther process misses one child signal.
> >
> > root      174247  0.0  0.0   3628  2552 pts/0    S+   02:52   0:00  |
> >              \_ /usr/sbin/mdadm -CfR /dev/md0 -l 5 -n3 /dev/loop0
> > /dev/loop1 /dev/loop2 --write-zeroes --auto=yes -v
> > root      174248  0.0  0.0      0     0 pts/0    Z+   02:52   0:00  |
> >                  \_ [mdadm] <defunct>
> > root      174249  0.0  0.0      0     0 pts/0    Z+   02:52   0:00  |
> >                  \_ [mdadm] <defunct>
> > root      174250  0.0  0.0      0     0 pts/0    Z+   02:52   0:00  |
> >                  \_ [mdadm] <defunct>
> >
> > ]# cat /proc/174247/stack
> > [<0>] signalfd_dequeue+0x14d/0x170
> > [<0>] signalfd_read_iter+0x7b/0xd0
> > [<0>] vfs_read+0x201/0x330
> > [<0>] ksys_read+0x5f/0xe0
> > [<0>] do_syscall_64+0x7b/0x160
> > [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> >
> > Any ideas for this?
> >
> > Best Regards
> > Xiao
> >
>






[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux