It semms that my suggestion will still not always fix the problem. There is still an ABA problem, Because the descriptor that just died could already be reused for a different purpose. So I changed the code to retry several times until it fails. This fixed the deadlocks so far. Unfortunately the native OS/2 select implementation has a different interface. So the code is not 1:1 portable. On 17.03.12 10.13, Marcel M?ller wrote: > If select() in the implementation of pa_poll returns an error because of > an invalid socket, pa_poll tries to find out which descriptor is bad. > And then retries the select. But this is still in not synchronized > context. So a second descriptor may become bad on the second call to > select. This is not captured. And consequently pa_poll fails and the > mainloop terminates, resulting in deadlocks if further activity is on > the way. > It seems that I managed to trigger this problem on OS/2 from time to > time when closing a pa_stream and a pa_context and opening another > context at almost the same time. > > I would recommend to jump to the previous select() call instead of > executing another one without error checking. To avoid infinite loops > this should only be done if the check removed at least one descriptor. ----- #if defined(OS_IS_OS2) int pa_poll (struct pollfd *fds, unsigned long int nfds, int timeout) { int* selectfds; int nselectfds; struct pollfd* f; struct pollfd* fdsend = fds + nfds; int nreads, nwrites, nexcepts; int* pselectfd; int ready; int nretry = nfds; /* First check how many descriptors we need */ nselectfds = 0; for (f = fds; f != fdsend; ++f) { int events = f->events; nselectfds += !!(events & POLLIN) + !!(events & POLLPRI) + !!(events & POLLOUT); f->revents = 0; } selectfds = alloca(nselectfds * sizeof *selectfds); retry: /* populate selectfds */ nreads = 0; nwrites = 0; nexcepts = 0; pselectfd = selectfds; for (f = fds; f != fdsend; ++f) { if (f->events & POLLIN) { *pselectfd++ = f->fd; ++nreads; } } for (f = fds; f != fdsend; ++f) { if (f->events & POLLOUT) { *pselectfd++ = f->fd; ++nwrites; } } for (f = fds; f != fdsend; ++f) { if (f->events & POLLPRI) { *pselectfd++ = f->fd; ++nexcepts; } } /* execute select */ ready = os2_select(selectfds, nreads, nwrites, nexcepts, timeout); if (ready == -1) { errno = sock_errno(); /* One of the descriptors is bad. Find out which one. */ int removed = 0; for (f = fds; f != fdsend; ++f) if (f->events != 0) { if (os2_select(&f->fd, 1, 0, 0, 0) == -1) { /* do not care any more */ f->events = 0; ++removed; } } DEBUGLOG(("pa_poll - recover? %i\n", removed)); if (nretry) { --nretry; goto retry; } } else if (ready) { /* Find out the descriptors that got ready. */ ready = 0; pselectfd = selectfds; for (f = fds; f != fdsend; ++f) { if (f->events & POLLIN) { if (*pselectfd++ != -1) { ++ready; f->revents = POLLIN; } } } for (f = fds; f != fdsend; ++f) { if (f->events & POLLOUT) { if (*pselectfd++ != -1) { if (!f->revents) ++ready; f->revents |= POLLOUT; } } } for (f = fds; f != fdsend; ++f) { if (f->events & POLLPRI) { if (*pselectfd++ != -1) { if (!f->revents) ++ready; f->revents |= POLLPRI; } } } } return ready; } #elif !defined(HAVE_POLL_H) || defined(OS_IS_DARWIN)