[PATCH 3/4] fsck: Add -O option to force-kill fscks that run too long.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds a "-O" option giving a number of seconds to allow each
fsck to run.  Used to prevent very long-running fscks from keeping the
system out of service for too long.

Signed-off-by: Frank Mayhar <fmayhar@xxxxxxxxxx>

 fsck/fsck.8 |   14 +++++-
 fsck/fsck.c |  155
++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 fsck/fsck.h |    6 ++-
 3 files changed, 161 insertions(+), 14 deletions(-)

diff --git a/fsck/fsck.8 b/fsck/fsck.8
index 6253de4..d56b0d7 100644
--- a/fsck/fsck.8
+++ b/fsck/fsck.8
@@ -14,6 +14,8 @@ fsck \- check and repair a Linux filesystem
 .IR fstype ]
 .RB [ \-L
 .IR path ]
+.RB [ \-O
+.RI seconds ]
 .RI [ filesys ...]
 .RB [ \-\- ]
 .RI [ fs-specific-options ]
@@ -70,6 +72,9 @@ Usage or syntax error
 .B 32
 Fsck canceled by user request
 .TP
+.B 64
+Fsck canceled due to timeout
+.TP
 .B 128
 Shared-library error
 .PD
@@ -102,7 +107,7 @@ as two lines, each with the device path prepended.
For example:
 .br
 \	/dev/hdc1 status 0 maxrss 92828
 .br
-\	/dev/hdc1 user 2.677592 system 0.861868 elapsed 4
+\	/dev/hdc1 user 2.677592 system 0.861868 elapsed 4.014111
 .TP
 .B \-l
 Lock the whole-disk device by an exclusive
@@ -296,6 +301,13 @@ for mounted filesystems.
 .B \-N
 Don't execute, just show what would be done.
 .TP
+.BI \-O " seconds"
+Allow each fsck to run for a maximum of
+.IR seconds
+seconds, after which time the fsck is considered to have "timed out"
and is
+killed with SIGKILL.  This can be used to prevent long fscks from
keeping the
+system out of service for an inordinately long time.
+.TP
 .B \-P
 When the
 .B \-A
diff --git a/fsck/fsck.c b/fsck/fsck.c
index e004802..28b7016 100644
--- a/fsck/fsck.c
+++ b/fsck/fsck.c
@@ -31,6 +31,7 @@
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/file.h>
+#include <sys/time.h>
 #include <sys/resource.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -134,6 +135,10 @@ struct fsck_instance *instance_list;
 const char fsck_prefix_path[] = FS_SEARCH_PATH;
 char *fsck_path = 0;
 
+int force_timeout = 0;
+int timeout_secs = 0;
+int timeout_active = 0;
+
 int log_output = 0;
 char *log_path = NULL;
 
@@ -535,6 +540,109 @@ static int progress_active(NOARGS)
 }
 
 /*
+ * Subtract the `struct timeval' value Y from X.
+ * Return 1 if the difference is negative, otherwise 0.
+ */
+static int timeval_diff(struct timeval *result,
+		struct timeval *x, struct timeval *y)
+{
+	/* Perform the carry for the later subtraction by updating y. */
+	if (x->tv_usec < y->tv_usec) {
+		int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+		y->tv_usec -= 1000000 * nsec;
+		y->tv_sec += nsec;
+	}
+	if (x->tv_usec - y->tv_usec > 1000000) {
+		int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+		y->tv_usec += 1000000 * nsec;
+		y->tv_sec -= nsec;
+	}
+	if (result) {
+		result->tv_sec = x->tv_sec - y->tv_sec;
+		result->tv_usec = x->tv_usec - y->tv_usec;
+	}
+
+	/* Return 1 if result is negative. */
+	return (x->tv_sec < y->tv_sec) ||
+		(x->tv_sec == y->tv_sec && x->tv_usec < y->tv_usec);
+}
+
+/* Forward reference. */
+static void restart_earliest_timeout(NOARGS);
+
+/*
+ * Catch SIGALRM, find any instance(s) that have timed out and SIGKILL
them
+ * to death.  Restarts the timer if necessary.
+ */
+static void catch_timeout(int i)
+{
+	struct timeval now;
+	struct fsck_instance *inst, *prev;
+
+	timeout_active = 0;
+	gettimeofday(&now, NULL);
+	for (prev = 0, inst = instance_list;
+	     inst;
+	     prev = inst, inst = inst->next) {
+		if (inst->end_time.tv_sec >= now.tv_sec) {
+			/* Instance timed out.  Kill it. */
+			inst->flags |= FLAG_TIMEOUT;
+			kill(inst->pid, SIGKILL);
+		}
+	}
+	/* Restart timer if necessary. */
+	restart_earliest_timeout();
+	return;
+}
+
+/*
+ * Set a timer to go off after the passed number of seconds.
+ */
+static void start_timeout(time_t end_time)
+{
+	struct timeval now;
+	static time_t last_end = 0;
+	struct itimerval itv;
+
+	gettimeofday(&now, NULL);
+	/*
+	 * Set the timer only if it's in the future, will expire before
+	 * the one we already set (if any) and no timer is already active.
+	 */
+	if (now.tv_sec < end_time && (end_time < last_end || !timeout_active))
{
+		timeout_active = 1;
+		last_end = end_time;
+		signal(SIGALRM, catch_timeout);
+		itv.it_interval.tv_sec = itv.it_interval.tv_usec = 0;
+		itv.it_value.tv_sec = end_time - now.tv_sec;
+		itv.it_value.tv_usec = 0;
+		setitimer(ITIMER_REAL, &itv, NULL);
+	}
+}
+
+/*
+ * Search the list of instances for the instance with the earliest
unfired
+ * timeout, if any, and set the timer accordingly.
+ */
+static void restart_earliest_timeout(NOARGS)
+{
+	struct timeval now;
+	static time_t min_end = 0x7fffffff;
+	struct fsck_instance *inst, *prev;
+
+	gettimeofday(&now, NULL);
+	for (prev = 0, inst = instance_list;
+	     inst;
+	     prev = inst, inst = inst->next) {
+		if (inst->end_time.tv_sec < min_end &&
+		    inst->end_time.tv_sec > now.tv_sec)
+			min_end = inst->end_time.tv_sec;
+	}
+	if (min_end < 0x7fffffff)
+		start_timeout(min_end);
+}
+
+/*
  * Put together a logfile name from the log path and passed device
string.
  */
 static void setup_logfile(struct fsck_instance *inst, const char
*device)
@@ -583,7 +691,7 @@ static void start_logging(struct fsck_instance
*inst)
 static void report_fsck_stats(struct fsck_instance *inst)
 {
 	FILE *fl = NULL;
-	time_t time_diff;
+	struct timeval time_diff;
 
 	if (!inst || !report_stats || noexecute)
 		return;
@@ -591,16 +699,16 @@ static void report_fsck_stats(struct fsck_instance
*inst)
 		fl = fdopen(inst->log_fd, "a");
 	if (!fl)
 		fl = stdout;
-	time_diff = inst->end_time - inst->start_time;
+	timeval_diff(&time_diff, &inst->end_time, &inst->start_time);
 	fprintf(fl, "%s status %d maxrss %ld\n",
 		inst->fs->device, inst->exit_status, inst->rusage.ru_maxrss);
-	fprintf(fl, "%s user %d.%06d system %d.%06d elapsed %d\n",
+	fprintf(fl, "%s user %d.%06d system %d.%06d elapsed %d.%06d\n",
 		inst->fs->device,
 		(int)inst->rusage.ru_utime.tv_sec,
 		(int)inst->rusage.ru_utime.tv_usec,
 		(int)inst->rusage.ru_stime.tv_sec,
 		(int)inst->rusage.ru_stime.tv_usec,
-		(int)time_diff);
+		(int)time_diff.tv_sec, (int)time_diff.tv_usec);
 	if (fl != stdout)
 		fclose(fl);
 }
@@ -696,7 +804,12 @@ static int execute(const char *type, struct fs_info
*fs, int interactive)
 	inst->pid = pid;
 	inst->prog = string_copy(prog);
 	inst->type = string_copy(type);
-	inst->start_time = time(0);
+	gettimeofday(&inst->start_time, NULL);
+	if (force_timeout) {
+		inst->end_time.tv_sec = inst->start_time.tv_sec + timeout_secs;
+		inst->end_time.tv_usec = inst->start_time.tv_usec;
+		start_timeout(inst->end_time.tv_sec);
+	}
 	inst->next = NULL;
 
 	/*
@@ -796,10 +909,17 @@ static struct fsck_instance *wait_one(int flags)
 		if (sig == SIGINT) {
 			status = EXIT_UNCORRECTED;
 		} else {
-			warnx(_("Warning... %s for device %s exited "
-			       "with signal %d."),
-			       inst->prog, inst->fs->device, sig);
-			status = EXIT_ERROR;
+			if (sig == SIGKILL && (inst->flags & FLAG_TIMEOUT)) {
+				warnx(_("Warning... %s for device %s killed "
+				       "due to timeout.\n"),
+				       inst->prog, inst->fs->device);
+				status = EXIT_TIMEOUT;
+			} else {
+				warnx(_("Warning... %s for device %s exited "
+				       "with signal %d."),
+				       inst->prog, inst->fs->device, sig);
+				status = EXIT_ERROR;
+			}
 		}
 	} else {
 		warnx(_("%s %s: status is %x, should never happen."),
@@ -808,7 +928,7 @@ static struct fsck_instance *wait_one(int flags)
 	}
 	inst->exit_status = status;
 	inst->flags |= FLAG_DONE;
-	inst->end_time = time(0);
+	gettimeofday(&inst->end_time, NULL);
 	memcpy(&inst->rusage, &rusage, sizeof(struct rusage));
 	if (progress && (inst->flags & FLAG_PROGRESS) &&
 	    !progress_active()) {
@@ -825,7 +945,7 @@ static struct fsck_instance *wait_one(int flags)
 			 * bit before sending the kill, to give it
 			 * time to set up the signal handler
 			 */
-			if (inst2->start_time < time(0)+2) {
+			if (inst2->start_time.tv_sec < time(0)+2) {
 				if (fork() == 0) {
 					sleep(1);
 					kill(inst2->pid, SIGUSR1);
@@ -1351,6 +1471,7 @@ static void __attribute__((__noreturn__))
usage(void)
 		" -l         lock the device using flock()\n"
 		" -L <path>  log fsck output for each device to file in <path>\n"
 		" -N         do not execute, just show what would be done\n"
+		" -O <secs>  do not run any fsck for longer than <secs> seconds\n"
 		" -T         do not show the title on startup\n"
 		" -C <fd>    display progress bar; file descriptor is for GUIs\n"
 		" -V         explain what is being done\n"
@@ -1504,6 +1625,18 @@ static void PRS(int argc, char *argv[])
 					usage();
 				log_path = string_copy(tmp);
 				goto next_arg;
+			case 'O':
+				if (force_timeout)
+					usage();
+				force_timeout++;
+				if (arg[j+1])
+					tmp = arg + j + 1;
+				else if ((i+1) < argc)
+					tmp = argv[++i];
+				else
+					usage();
+				timeout_secs = string_to_int(tmp);
+				goto next_arg;
 			case 'r':
 				report_stats++;
 				break;
diff --git a/fsck/fsck.h b/fsck/fsck.h
index 6dfb107..6e41f40 100644
--- a/fsck/fsck.h
+++ b/fsck/fsck.h
@@ -30,6 +30,7 @@
 #define EXIT_UNCORRECTED 4
 #define EXIT_ERROR       8
 #define EXIT_USAGE       16
+#define EXIT_TIMEOUT     64
 #define EXIT_LIBRARY     128
 
 /*
@@ -51,6 +52,7 @@ struct fs_info {
 
 #define FLAG_DONE 1
 #define FLAG_PROGRESS 2
+#define FLAG_TIMEOUT 4
 
 /*
  * Structure to allow exit codes to be stored
@@ -60,8 +62,8 @@ struct fsck_instance {
 	int	flags;
 	int	lock;		/* flock()ed whole disk file descriptor or -1 */
 	int	exit_status;
-	time_t	start_time;
-	time_t	end_time;
+	struct timeval	start_time;
+	struct timeval	end_time;
 	char *	prog;
 	char *	type;
 	struct fs_info *fs;

-- 
Frank Mayhar
fmayhar@xxxxxxxxxx

--
To unsubscribe from this list: send the line "unsubscribe util-linux" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux