Hi, I use the "script" command to save the output of certain jobs on a heavily loaded Linux cluster. Every now and then the "script" command hangs. I made some modifications to util-linux 2.23 and have been using the patched "script" command without problems for about a year. Attached I'm submitting the patches (updated to the current master) for review. Best regards, Csaba Kos
From fad482427ddd819a92d1e636e20bbf8adaf721dd Mon Sep 17 00:00:00 2001 From: Csaba Kos <csaba.kos@xxxxxxxxx> Date: Fri, 30 May 2014 14:33:32 +0900 Subject: [PATCH 1/2] script: fix a rare deadlock after child termination --- term-utils/script.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/term-utils/script.c b/term-utils/script.c index e5d239c..32906d0 100644 --- a/term-utils/script.c +++ b/term-utils/script.c @@ -36,6 +36,9 @@ * - added Native Language Support * * 2000-07-30 Per Andreas Buer <per@xxxxxxxxx> - added "q"-option + * + * 2014-05-30 Csaba Kos <csaba.kos@xxxxxxxxx> + * - fixed a rare deadlock after child termination */ /* @@ -114,6 +117,8 @@ int tflg = 0; int forceflg = 0; int isterm; +sigset_t block_mask, unblock_mask; + int die; int resized; @@ -306,6 +311,7 @@ doinput(void) { int errsv = 0; ssize_t cc = 0; char ibuf[BUFSIZ]; + fd_set readfds; /* close things irrelevant for this process */ if (fscript) @@ -314,14 +320,27 @@ doinput(void) { fclose(timingfd); fscript = timingfd = NULL; + FD_ZERO(&readfds); + + /* block SIGCHLD */ + sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask); + while (die == 0) { - if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) { - if (write_all(master, ibuf, cc)) { - warn (_("write failed")); - fail(); + FD_SET(STDIN_FILENO, &readfds); + + /* wait for input or signal (including SIGCHLD) */ + if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL, + &unblock_mask)) > 0) { + + if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) { + if (write_all(master, ibuf, cc)) { + warn (_("write failed")); + fail(); + } } } - else if (cc < 0 && errno == EINTR && resized) + + if (cc < 0 && errno == EINTR && resized) { /* transmit window change information to the child */ if (isterm) { @@ -330,12 +349,15 @@ doinput(void) { } resized = 0; - } else { + } else if (cc <= 0) { errsv = errno; break; } } + /* unblock SIGCHLD */ + sigprocmask(SIG_SETMASK, &unblock_mask, NULL); + /* To be sure that we don't miss any data */ wait_for_empty_fd(slave); wait_for_empty_fd(master); @@ -404,6 +426,7 @@ dooutput(void) { struct timeval tv; double oldtime=time(NULL), newtime; int errsv = 0; + fd_set readfds; close(STDIN_FILENO); #ifdef HAVE_LIBUTIL @@ -416,6 +439,8 @@ dooutput(void) { my_strftime(obuf, sizeof obuf, "%c\n", localtime(&tvec)); fprintf(fscript, _("Script started on %s"), obuf); + FD_ZERO(&readfds); + do { if (die || errsv == EINTR) { struct pollfd fds[] = {{ .fd = master, .events = POLLIN }}; @@ -423,10 +448,23 @@ dooutput(void) { break; } + /* block SIGCHLD */ + sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask); + + FD_SET(master, &readfds); errno = 0; - cc = read(master, obuf, sizeof (obuf)); + + /* wait for input or signal (including SIGCHLD) */ + if ((cc = pselect(master+1, &readfds, NULL, NULL, NULL, + &unblock_mask)) > 0) { + + cc = read(master, obuf, sizeof (obuf)); + } errsv = errno; + /* unblock SIGCHLD */ + sigprocmask(SIG_SETMASK, &unblock_mask, NULL); + if (tflg) gettimeofday(&tv, NULL); -- 1.8.5.rc3.2.gc302941
From a2dd4df349f426c6605e4b151aafccce4b2ea8e7 Mon Sep 17 00:00:00 2001 From: Csaba Kos <csaba.kos@xxxxxxxxx> Date: Fri, 30 May 2014 14:51:38 +0900 Subject: [PATCH 2/2] script: fix spurious exit from input read loop on EINTR. --- term-utils/script.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/term-utils/script.c b/term-utils/script.c index 32906d0..63913c8 100644 --- a/term-utils/script.c +++ b/term-utils/script.c @@ -328,6 +328,7 @@ doinput(void) { while (die == 0) { FD_SET(STDIN_FILENO, &readfds); + errno = 0; /* wait for input or signal (including SIGCHLD) */ if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL, &unblock_mask)) > 0) { @@ -349,7 +350,7 @@ doinput(void) { } resized = 0; - } else if (cc <= 0) { + } else if (cc <= 0 && errno != EINTR) { errsv = errno; break; } -- 1.8.5.rc3.2.gc302941