Hello!
There's a bug in the select_tut man-page in the chapter "Combining
Signal and Data Events", which should illustrate pselect()-usage.
See the attachments for detailed information, proof-code and a proposed fix.
Regards,
Seb.
/*
This is a demonstration of how the code provided
in the Linux man-page select_tut(2) / "Combining Signal and Data Events"
for counting SIGCHLD-events is not correct.
The problem is that the example-code assumes that
normal signals get queued somehow, which is not the case; quote from signal(7):
"By contrast, if multiple instances of a standard signal are delivered
while that signal is currently blocked, then only one instance is queued."
Bottom line:
Setting a global flag when a signal has occured will do the job
and that's what pselect() is intended for -- but counting the occurences
of a standard signal as shown in the example DOES NOT WORK.
Compile and run this file to see how the example code will result in zombies
if multiple SIGCHLDs are delivered at the same time;
compile with -DCORRECT to get the correct solution for this case.
See select_tut-badcount-fix.patch for a proposal to fix the documentation,.
Comments to Sebastian Kienzl <seb at riot dot organisation>
*/
#include <stdio.h>
#include <errno.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <unistd.h>
#include <assert.h>
volatile int child_events = 0;
void child_sig_handler( int x )
{
child_events++;
signal( SIGCHLD, child_sig_handler );
}
void child_start()
{
const char* prog = "/bin/cat";
pid_t pid = fork();
assert( pid >= 0 );
if( !pid ) {
/* child */
execl( prog, prog, NULL );
assert( 0 );
}
}
int main( int argc, char**argv )
{
int i;
sigset_t sigmask, orig_sigmask;
/* make a process group so we can kill all our children at once easily */
setpgid( 0, 0 );
sigemptyset( &sigmask );
sigaddset( &sigmask, SIGCHLD );
sigprocmask( SIG_BLOCK, &sigmask, &orig_sigmask );
signal( SIGCHLD, child_sig_handler );
/* start 10 children */
for( i = 0; i < 10; i++ )
child_start();
signal( SIGTERM, SIG_IGN );
for( i = 0; i < 4; i++ ) { /* main loop */
struct timespec timeout = { tv_sec: 2, tv_nsec: 0 };
int pselect_ret;
{
int status;
pid_t pid;
#ifndef CORRECT
for( ; child_events > 0; --child_events ) {
/* do event work here */
pid = wait( &status );
#else
while( ( pid = waitpid( -1, &status, WNOHANG ) ) > 0 ) {
#endif
printf( "child event pid %d status %d\n", pid, status );
}
}
pselect_ret = pselect( 0, 0, 0, 0, &timeout, &orig_sigmask );
assert( pselect_ret <= 0 );
if( pselect_ret < 0 ) {
assert( errno == EINTR );
printf( "pselect interrupted by a signal\n" );
}
else if( pselect_ret == 0 ) {
printf( "pselect timeout\n" );
kill( 0, SIGTERM );
}
}
{
int status, zombies = 0;
while( waitpid( -1, &status, WNOHANG ) > 0 )
zombies++;
printf( "%d zombies reaped\n", zombies );
}
return 0;
}
diff -ur man-pages.orig/man2/select_tut.2 man-pages/man2/select_tut.2
--- man-pages.orig/man2/select_tut.2 2009-01-17 12:52:00.000000000 +0100
+++ man-pages/man2/select_tut.2 2009-01-17 14:51:50.000000000 +0100
@@ -276,23 +276,73 @@
.BR pselect ()
call.
For instance, let us say that the event in question
-was the exit of a child process.
-Before the start of the main loop, we
-would block \fBSIGCHLD\fP using
+was the delivery of a \fBSIGTERM\fP which should cause
+our application to exit. Before the start of the main loop, we
+would block \fBSIGTERM\fP using
.BR sigprocmask (2).
Our
.BR pselect ()
-call would enable \fBSIGCHLD\fP by using the virgin signal mask.
+call would enable \fBSIGTERM\fP by using the virgin signal mask.
Our
program would look like:
.PP
.nf
-int child_events = 0;
+int terminate = 0;
void
+sigterm_handler(int x)
+{
+ terminate = 1;
+ signal(SIGTERM, sigterm_handler);
+}
+
+int
+main(int argc, char **argv)
+{
+ sigset_t sigmask, orig_sigmask;
+
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGTERM);
+ sigprocmask(SIG_BLOCK, &sigmask, &orig_sigmask);
+
+ signal(SIGTERM, sigterm_handler);
+
+ for (;;) { /* main loop */
+
+ if( terminate )
+ break;
+
+ /* if we'd use select() instead of pselect, receiving an
+ unblocked SIGTERM at this point would make select() block
+ until something happened on the file descriptors,
+ which is not what we want!
+ */
+
+ r = pselect(nfds, &rd, &wr, &er, 0, &orig_sigmask);
+
+ /* main body of program */
+ }
+}
+.fi
+.PP
+It is also important to be aware of the fact that standard signals are
+not queued (see
+.BR signal (7)
+) which means that if multiple instances of the same signal occured while
+the signal is blocked (which is anywhere except in
+.BR pselect ()
+in this case), the signal-handler will only get called once.
+.PP
+Thus, if your program is dealing with multiple children whose exit-status
+should be fetched with
+.BR wait (2)
+, it doesn't work to count \fBSIGCHLD\fP events as illustrated in earlier
+versions of this man-page. Rather, your code should look something like this:
+
+.nf
+void
child_sig_handler(int x)
{
- child_events++;
signal(SIGCHLD, child_sig_handler);
}
@@ -306,15 +356,28 @@
sigprocmask(SIG_BLOCK, &sigmask, &orig_sigmask);
signal(SIGCHLD, child_sig_handler);
-
+
for (;;) { /* main loop */
- for (; child_events > 0; child_events\-\-) {
- /* do event work here */
+
+ /* get all state changes of all children */
+ for (;;) {
+ pid_t pid;
+ int status;
+
+ pid = waitpid( -1, &status, WNOHANG );
+ if( pid > 0 ) {
+ /* do event work here */
+ }
+ else {
+ /* no more state changes, break loop */
+ break;
+ }
}
+
r = pselect(nfds, &rd, &wr, &er, 0, &orig_sigmask);
/* main body of program */
- }
+ }
}
.fi
.SS Practical