Hi Logan Thanks for your suggestion. I tried to create signalfd before fork() but it still can't work. And I call sleep(2) before child exits, the problem still can happen sometimes. This is what I tried. If you have time to have a look, it's great. It's not a hurry thing. diff --git a/Create.c b/Create.c index d94253b1..a40ed027 100644 --- a/Create.c +++ b/Create.c @@ -168,15 +168,16 @@ static pid_t write_zeroes_fork(int fd, struct shape *s, struct supertype *st, size_bytes -= sz; } + printf("sleeping 2 seconds\n"); + sleep(2); exit(ret); } -static int wait_for_zero_forks(int *zero_pids, int count) +static int wait_for_zero_forks(int *zero_pids, int count, int sfd) { - int wstatus, ret = 0, i, sfd, wait_count = 0; + int wstatus, ret = 0, i, wait_count = 0; struct signalfd_siginfo fdsi; bool interrupted = false; - sigset_t sigset; ssize_t s; for (i = 0; i < count; i++) @@ -185,17 +186,6 @@ static int wait_for_zero_forks(int *zero_pids, int count) if (!wait_count) return 0; - sigemptyset(&sigset); - sigaddset(&sigset, SIGINT); - sigaddset(&sigset, SIGCHLD); - sigprocmask(SIG_BLOCK, &sigset, NULL); - - sfd = signalfd(-1, &sigset, 0); - if (sfd < 0) { - pr_err("Unable to create signalfd: %s\n", strerror(errno)); - return 1; - } - while (1) { s = read(sfd, &fdsi, sizeof(fdsi)); if (s != sizeof(fdsi)) { @@ -208,10 +198,13 @@ static int wait_for_zero_forks(int *zero_pids, int count) printf("\n"); pr_info("Interrupting zeroing processes, please wait...\n"); interrupted = true; + break; } else if (fdsi.ssi_signo == SIGCHLD) { + printf("SIGCHILD wait count %d\n", wait_count); if (!--wait_count) break; - } + } else + printf("invalide %d\n", wait_count); } close(sfd); @@ -377,6 +370,39 @@ static int update_metadata(int mdfd, struct shape *s, struct supertype *st, return 0; } +static int prepare_write_zeros(struct shape *s, sigset_t *orig_sigset) +{ + sigset_t sigset; + int sfd = -1; + + if (!s->write_zeroes) + return 0; + + /* + * Block SIGINT so the main thread will always wait for the + * zeroing processes when being interrupted. Otherwise the + * zeroing processes will finish their work in the background + * keeping the disk busy. + */ + sigemptyset(&sigset); + sigaddset(&sigset, SIGINT); + sigaddset(&sigset, SIGCHLD); + sigprocmask(SIG_BLOCK, &sigset, orig_sigset); + + sfd = signalfd(-1, &sigset, 0); + if (sfd < 0) { + pr_err("Unable to create signalfd: %s\n", strerror(errno)); + return -EACCES; + } + + return sfd; +} + +static void restore_sigset(sigset_t *sigset) +{ + sigprocmask(SIG_SETMASK, sigset, NULL); +} + static int add_disks(int mdfd, struct mdinfo *info, struct shape *s, struct context *c, struct supertype *st, struct map_ent **map, struct mddev_dev *devlist, @@ -388,18 +414,12 @@ static int add_disks(int mdfd, struct mdinfo *info, struct shape *s, int zero_pids[total_slots]; struct mddev_dev *dv; struct mdinfo *infos; - sigset_t sigset, orig_sigset; - int ret = 0; + int ret = 0, sfd = -1; + sigset_t orig_sigset; - /* - * Block SIGINT so the main thread will always wait for the - * zeroing processes when being interrupted. Otherwise the - * zeroing processes will finish their work in the background - * keeping the disk busy. - */ - sigemptyset(&sigset); - sigaddset(&sigset, SIGINT); - sigprocmask(SIG_BLOCK, &sigset, &orig_sigset); + sfd = prepare_write_zeros(s, &orig_sigset); + if (sfd < 0) + return sfd; memset(zero_pids, 0, sizeof(zero_pids)); infos = xmalloc(sizeof(*infos) * total_slots); @@ -461,7 +481,8 @@ static int add_disks(int mdfd, struct mdinfo *info, struct shape *s, } if (pass == 1) { - ret = wait_for_zero_forks(zero_pids, total_slots); + printf("wait for zero forks\n"); + ret = wait_for_zero_forks(zero_pids, total_slots, sfd); if (ret) goto out; @@ -474,9 +495,9 @@ static int add_disks(int mdfd, struct mdinfo *info, struct shape *s, out: if (ret) - wait_for_zero_forks(zero_pids, total_slots); + wait_for_zero_forks(zero_pids, total_slots, sfd); + restore_sigset(&orig_sigset); free(infos); - sigprocmask(SIG_SETMASK, &orig_sigset, NULL); return ret; } Best Regards Xiao On Wed, May 22, 2024 at 12:57 AM Logan Gunthorpe <logang@xxxxxxxxxxxx> wrote: > > Hi Xiao, > > I don't have time to dig into this myself, but my guess would be that > the signal for one of the children come too quickly, before the > sigprocmask() call in wait_for_zero_forks(). > > Seems like SIGCHLD should be blocked before the first call to > write_zeroes_fork(). I'm really not sure why I put in a block to SIGINT > and then a block to SIGCHLD after the processes started. I suspect > adding SIGCHLD to the sigprocmask in add_disks() and just removing the > sigprocmask in write_zeroes_fork() might fix the issue. > > Thanks, > > Logan > > > > On 2024-05-21 01:05, Xiao Ni wrote: > > Hi Logan > > > > I'm trying to fix errors of mdadm regression failures. There is a > > failure in 00raid5-zero sometimes. I added some logs: > > > > In function write_zeroes_fork: > > if (fallocate(fd, FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, > > offset_bytes, sz)) { > > pr_err("zeroing %s failed: %s\n", dv->devname, > > strerror(errno)); > > ret = 1; > > break; > > } else > > printf("zeroing good\n"); > > > > In function wait_for_zero_forks: > > if (fdsi.ssi_signo == SIGINT) { > > printf("\n"); > > pr_info("Interrupting zeroing processes, > > please wait...\n"); > > interrupted = true; > > break; > > } else if (fdsi.ssi_signo == SIGCHLD) { > > printf("one child finishes, wait count %d\n", > > wait_count); > > if (!--wait_count) > > break; > > } > > > > while [ 1 ]; do > > /usr/sbin/mdadm -CfR /dev/md0 -l 5 -n3 /dev/loop0 /dev/loop1 > > /dev/loop2 --write-zeroes --auto=yes -v > > mdadm --wait /dev/md0 > > mdadm -Ss > > sleep 1 > > done > > > > zeroing good > > zeroing good > > zeroing good > > one child finishes, wait count 3 > > one child finishes, wait count 2 > > > > It looks like the farther process misses one child signal. > > > > root 174247 0.0 0.0 3628 2552 pts/0 S+ 02:52 0:00 | > > \_ /usr/sbin/mdadm -CfR /dev/md0 -l 5 -n3 /dev/loop0 > > /dev/loop1 /dev/loop2 --write-zeroes --auto=yes -v > > root 174248 0.0 0.0 0 0 pts/0 Z+ 02:52 0:00 | > > \_ [mdadm] <defunct> > > root 174249 0.0 0.0 0 0 pts/0 Z+ 02:52 0:00 | > > \_ [mdadm] <defunct> > > root 174250 0.0 0.0 0 0 pts/0 Z+ 02:52 0:00 | > > \_ [mdadm] <defunct> > > > > ]# cat /proc/174247/stack > > [<0>] signalfd_dequeue+0x14d/0x170 > > [<0>] signalfd_read_iter+0x7b/0xd0 > > [<0>] vfs_read+0x201/0x330 > > [<0>] ksys_read+0x5f/0xe0 > > [<0>] do_syscall_64+0x7b/0x160 > > [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e > > > > Any ideas for this? > > > > Best Regards > > Xiao > > >