[PATCH] deadlock in script

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I use the "script" command to save the output of certain jobs on a
heavily loaded Linux cluster. Every now and then the "script" command
hangs.

I made some modifications to util-linux 2.23 and have been using the
patched "script" command without problems for about a year.

Attached I'm submitting the patches (updated to the current master) for review.

Best regards,

Csaba Kos
From fad482427ddd819a92d1e636e20bbf8adaf721dd Mon Sep 17 00:00:00 2001
From: Csaba Kos <csaba.kos@xxxxxxxxx>
Date: Fri, 30 May 2014 14:33:32 +0900
Subject: [PATCH 1/2] script: fix a rare deadlock after child termination

---
 term-utils/script.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/term-utils/script.c b/term-utils/script.c
index e5d239c..32906d0 100644
--- a/term-utils/script.c
+++ b/term-utils/script.c
@@ -36,6 +36,9 @@
  * - added Native Language Support
  *
  * 2000-07-30 Per Andreas Buer <per@xxxxxxxxx> - added "q"-option
+ *
+ * 2014-05-30 Csaba Kos <csaba.kos@xxxxxxxxx>
+ * - fixed a rare deadlock after child termination
  */
 
 /*
@@ -114,6 +117,8 @@ int	tflg = 0;
 int	forceflg = 0;
 int	isterm;
 
+sigset_t block_mask, unblock_mask;
+
 int die;
 int resized;
 
@@ -306,6 +311,7 @@ doinput(void) {
 	int errsv = 0;
 	ssize_t cc = 0;
 	char ibuf[BUFSIZ];
+	fd_set readfds;
 
 	/* close things irrelevant for this process */
 	if (fscript)
@@ -314,14 +320,27 @@ doinput(void) {
 		fclose(timingfd);
 	fscript = timingfd = NULL;
 
+	FD_ZERO(&readfds);
+
+	/* block SIGCHLD */
+	sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask);
+
 	while (die == 0) {
-		if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) {
-			if (write_all(master, ibuf, cc)) {
-				warn (_("write failed"));
-				fail();
+		FD_SET(STDIN_FILENO, &readfds);
+
+		/* wait for input or signal (including SIGCHLD) */
+		if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL,
+			&unblock_mask)) > 0) {
+
+			if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) {
+				if (write_all(master, ibuf, cc)) {
+					warn (_("write failed"));
+					fail();
+				}
 			}
 		}
-		else if (cc < 0 && errno == EINTR && resized)
+
+		if (cc < 0 && errno == EINTR && resized)
 		{
 			/* transmit window change information to the child */
 			if (isterm) {
@@ -330,12 +349,15 @@ doinput(void) {
 			}
 			resized = 0;
 
-		} else {
+		} else if (cc <= 0) {
 			errsv = errno;
 			break;
 		}
 	}
 
+	/* unblock SIGCHLD */
+	sigprocmask(SIG_SETMASK, &unblock_mask, NULL);
+
 	/* To be sure that we don't miss any data */
 	wait_for_empty_fd(slave);
 	wait_for_empty_fd(master);
@@ -404,6 +426,7 @@ dooutput(void) {
 	struct timeval tv;
 	double oldtime=time(NULL), newtime;
 	int errsv = 0;
+	fd_set readfds;
 
 	close(STDIN_FILENO);
 #ifdef HAVE_LIBUTIL
@@ -416,6 +439,8 @@ dooutput(void) {
 	my_strftime(obuf, sizeof obuf, "%c\n", localtime(&tvec));
 	fprintf(fscript, _("Script started on %s"), obuf);
 
+	FD_ZERO(&readfds);
+
 	do {
 		if (die || errsv == EINTR) {
 			struct pollfd fds[] = {{ .fd = master, .events = POLLIN }};
@@ -423,10 +448,23 @@ dooutput(void) {
 				break;
 		}
 
+		/* block SIGCHLD */
+		sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask);
+
+		FD_SET(master, &readfds);
 		errno = 0;
-		cc = read(master, obuf, sizeof (obuf));
+
+		/* wait for input or signal (including SIGCHLD) */
+		if ((cc = pselect(master+1, &readfds, NULL, NULL, NULL,
+			&unblock_mask)) > 0) {
+
+			cc = read(master, obuf, sizeof (obuf));
+		}
 		errsv = errno;
 
+		/* unblock SIGCHLD */
+		sigprocmask(SIG_SETMASK, &unblock_mask, NULL);
+
 		if (tflg)
 			gettimeofday(&tv, NULL);
 
-- 
1.8.5.rc3.2.gc302941

From a2dd4df349f426c6605e4b151aafccce4b2ea8e7 Mon Sep 17 00:00:00 2001
From: Csaba Kos <csaba.kos@xxxxxxxxx>
Date: Fri, 30 May 2014 14:51:38 +0900
Subject: [PATCH 2/2] script: fix spurious exit from input read loop on EINTR.

---
 term-utils/script.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/term-utils/script.c b/term-utils/script.c
index 32906d0..63913c8 100644
--- a/term-utils/script.c
+++ b/term-utils/script.c
@@ -328,6 +328,7 @@ doinput(void) {
 	while (die == 0) {
 		FD_SET(STDIN_FILENO, &readfds);
 
+		errno = 0;
 		/* wait for input or signal (including SIGCHLD) */
 		if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL,
 			&unblock_mask)) > 0) {
@@ -349,7 +350,7 @@ doinput(void) {
 			}
 			resized = 0;
 
-		} else if (cc <= 0) {
+		} else if (cc <= 0 && errno != EINTR) {
 			errsv = errno;
 			break;
 		}
-- 
1.8.5.rc3.2.gc302941


[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux