On 02/28, Sapkal, Swapnil wrote: > > Yes, I was able to reproduce the problem with the below patch. ... > I found a case in the dump where the pipe is empty still both reader and > writer are waiting on it. > > [ 1397.829761] E=1 F=0; W=1719147 R=1719147 > [ 1397.837843] RD=1 WR=1 Thanks! and I see no more "WR=1" in the full dump. This means that all live writes hang on the same pipe. So maybe the trivial program below can too reproduce the problem on your machine?? Say, with GROUPS=16 and WRITERS=20 ... or maybe even with GROUPS=1 and WRITERS=320 ... Oleg. ------------------------------------------------------------------------------- #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <assert.h> #include <pthread.h> static int GROUPS, WRITERS; static volatile int ALIVE[1024]; void *group(void *arg) { int fd[2], n, id = (long)arg; char buf[100]; assert(pipe(fd) == 0); for (n = 0; n < WRITERS; ++n) { int pid = fork(); assert(pid >= 0); if (pid) continue; close(fd[0]); for (;;) assert(write(fd[1], buf, sizeof(buf)) == sizeof(buf)); } for (;;) { assert(read(fd[0], buf, sizeof(buf)) == sizeof(buf)); ALIVE[id] = 1; } } int main(int argc, const char *argv[]) { pthread_t pt; int n; assert(argc == 3); GROUPS = atoi(argv[1]); WRITERS = atoi(argv[2]); assert(GROUPS <= 1024); for (n = 0; n < GROUPS; ++n) assert(pthread_create(&pt, NULL, group, (void*)(long)n) == 0); for (;;) { sleep(1); for (n = 0; n < GROUPS; ++n) { if (ALIVE[n] == 0) printf("!!! thread %d stuck?\n", n); ALIVE[n] = 0; } } }