Re: [C/R] sleepers don't wake up on restart

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Oren Laadan [orenl@xxxxxxxxxxxxxxx] wrote:
| 
| I just posted v14-rc3 which includes the c/r of restart-blocks.
| That should improve the situation.
| 
| However, depending on which syscalls one uses, process may still
| seem "stuck" after restart because the current code still does
| not save signals nor task timers; If a signal was pending (SIGALRM
| for example) after freezing but before checkpoint, it will be lost.
| If a timer was set at checkpoint, it will not be restored.
| 
| So depending on your program, you may still experience issues
| until I add patches to handle that.

Ok, Just an fyi, the original program seemed to work fine, but when
I try to restart a small process tree, I get stuck on restart again.

I am running on v14-rc3 branch. Has this got anything to do with
pending SIGCHLD ? Seems to be easier to repro with larger process
trees (2 children per process, 4 or more levels deep).

Test programs (attached) (they need some cleanup though)

	ptree2.c
	p2.loop

--------- Processes after restart:

$ ps -ef|grep ptree

root     10461 10459  0 22:07 pts/0    00:00:00 ./ptree2 -n 1 -d 2
root     10465 10461  0 22:07 pts/0    00:00:00 ./ptree2 -n 1 -d 2
root     10466 10465  0 22:07 pts/0    00:00:00 [ptree2] <defunct>
root     10479  8220  0 22:09 pts/1    00:00:00 grep ptree

---------- Process stacks

tree2        S f6270a90     0 10461  10459
 f5e59380 00000082 08048a86 f6270a90 f6270bfc c2b32260 00000000 0000d9d3
 f5f423b0 00000000 ffffffff 00000000 00000000 00000001 00000000 f6270a88
 00000000 f6270a90 00000000 c02243aa 00000004 00000003 0000000c 00000006
Call Trace:
 [<c02243aa>] do_wait+0x1dd/0x2f6
 [<c021cd14>] default_wake_function+0x0/0x8
 [<c0224542>] sys_wait4+0x7f/0x92
 [<c0224568>] sys_waitpid+0x13/0x17
 [<c0202ce5>] sysenter_do_call+0x12/0x25
 [<c0510000>] rtl8139_init_one+0x5ae/0x887
ptree2        S f5f423b0     0 10465  10461
 f6002180 00000082 c2b265c8 f5f423b0 f5f4251c c2b29260 f67b1f44 e06d0177
 00000282 c023363c c2b265c8 00000000 00000282 0000c350 00000001 0000c350
 00000001 f67b1f44 0000c350 c051be99 00000000 00000001 0000c350 bf9d0e04
Call Trace:
 [<c023363c>] hrtimer_start_range_ns+0x105/0x111
 [<c051be99>] do_nanosleep+0x54/0x8c
 [<c02336d7>] hrtimer_nanosleep+0x8f/0xee
 [<c02332b8>] hrtimer_wakeup+0x0/0x18
 [<c051be7f>] do_nanosleep+0x3a/0x8c
 [<c0233777>] sys_nanosleep+0x41/0x51
 [<c0202ce5>] sysenter_do_call+0x12/0x25
ptree2        ? f6bee040     0 10466  10465
 f638cb80 00000046 00200200 f6bee040 f6bee1ac c2b17260 f6bee038 0000dd77
 00000000 c022f576 ffffffff 00000303 00000000 00000001 00000000 00000012
 f5a61e84 f6bee040 f6bee038 c0224c29 f6270a90 00000001 f6bee038 f5a61f88
Call Trace:
 [<c022f576>] wakeme_after_rcu+0x0/0x8
 [<c0224c29>] do_exit+0x638/0x63c
 [<c0224c87>] do_group_exit+0x5a/0x83
 [<c0224cbd>] sys_exit_group+0xd/0x10
 [<c0202ce5>] sysenter_do_call+0x12/0x25
#include <stdio.h>
#include <unistd.h>
#include <wait.h>
#include <errno.h>
#include <string.h>

int max_depth = 3;
int num_children = 3;

#define CKPT_READY		"checkpoint-ready"
#define CKPT_DONE		"checkpoint-done"
#define TEST_DONE		"test-done"
#define LOG_FILE		"log-ptree2"

#undef SYS_GETGPID

#ifdef SYS_GETGPID
static inline int sys_getgpid()
{
#define	__NR_getgpid	335
        return syscall(__NR_getgpid);
}
#else
#define	sys_getgpid	getpid
#endif

FILE *logfp;

void do_exit(int status)
{
	if (logfp) {
		fflush(logfp);
		fclose(logfp);
	}
	_Exit(status);
}

int test_done()
{
	int rc;

	rc = access(TEST_DONE, F_OK);
	if (rc == 0)
		return 1;
	else if (errno == ENOENT)
		return 0;

	fprintf(logfp, "access(%s) failed, %s\n", TEST_DONE, strerror(errno));
	do_exit(1);
}

int checkpoint_done()
{
	int rc;

	rc = access(CKPT_DONE, F_OK);
	if (rc == 0)
		return 1;
	else if (errno == ENOENT)
		return 0;

	fprintf(logfp, "access(%s) failed, %s\n", CKPT_DONE, strerror(errno));
	do_exit(1);
}

void checkpoint_ready()
{
	int fd;

	fd = creat(CKPT_READY, 0666, 0);
	if (fd < 0) {
		fprintf(logfp, "creat(%s) failed, %s\n", CKPT_READY,
				strerror(errno));
		do_exit(1);
	}
	close(fd);
}

print_exit_status(int pid, int status)
{
	fprintf(logfp, "Pid %d unexpected exit - ", pid);
	if (WIFEXITED(status)) {
		fprintf(logfp, "exit status %d\n", WEXITSTATUS(status));
	} else if (WIFSIGNALED(status)) {
		fprintf(logfp, "got signal %d\n", WTERMSIG(status));
	} else {
		fprintf(logfp, "stopped/continued ?\n");
	}
}

void do_wait()
{
	int rc;
	int n;
	int status;

	n = 0;
	while(1) {
		rc = waitpid(-1, &status, 0);
		if (rc < 0)
			break;

		n++;
		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
			print_exit_status(rc, status);	
	}

	if (errno != ECHILD) {
		fprintf(logfp, "waitpid(%d) failed, error %s\n",
					rc, strerror(errno));
		do_exit(1);
	}

	if (getpid() == 1 && n != num_children * max_depth) {
		fprintf(logfp, "Only %d of %d children exited ?\n",
			num_children, num_children * max_depth);
		do_exit(1);
	}

	do_exit(0);
}

static do_child(int depth, char *suffix);

create_children(int depth, char *parent_suffix)
{
	int i;
	int child_pid;
	char suffix[1024];

	for (i = 0; i < num_children; i++) {
		sprintf(suffix, "%s-%d", parent_suffix, i);

		child_pid = fork();
		if (child_pid == 0)
			do_child(depth, suffix);
		else if (child_pid < 0) {
			fprintf(logfp, "fork() failed, depth %d, "
				"child %d, error %s\n", depth, i,
				strerror(errno));
			do_exit(1);
		}
	}
}

do_child(int depth, char *suffix)
{
	int i;
	FILE *cfp;
	char cfile[256];
	char *mode = "w";

	/*
	 * Recursively calls do_child() and both parent and child
	 * execute the code below
	 */
	if (depth < max_depth)
		create_children(depth+1, suffix);

	sprintf(cfile, "%s%s", LOG_FILE, suffix);

	i = 0;
	while (!test_done()) {
		/* truncate the first time, append after that */
		cfp = fopen(cfile, mode);
		mode = "a";
		if (!cfp) {
			fprintf(logfp, "fopen(%s) failed, error %s\n", cfile,
					strerror(errno));
			do_exit(1);
		}
		fprintf(cfp, "gpid %d, pid %d: i %d\n", sys_getgpid(),
				getpid(), i++);
		fflush(cfp);
		sleep(1);
		fprintf(cfp, "gpid %d: woke up from sleep(1)\n", sys_getgpid());
		fflush(cfp);
		fclose(cfp);
	}

	/* Wait for any children that pre-deceased us */
	do_wait();

	do_exit(0);
}

static void usage(char *argv[])
{
	printf("%s [h] [-d max-depth] [-n max-children]\n", argv[0]);
	printf("\t <max-depth> max depth of process tree, default 3\n");
	printf("\t <num-children> # of children per process, default 3\n");
	do_exit(1);
}

main(int argc, char *argv[])
{
	int c;
	int i;
	int status;

	if (test_done()) {
		printf("Remove %s before running test\n", TEST_DONE);
		do_exit(1);
	}

	while ((c = getopt(argc, argv, "hd:n:")) != EOF) {
		switch (c) {
		case 'd': max_depth = atoi(optarg); break;
		case 'n': num_children = atoi(optarg); break;
		case 'h':
		default:
			usage(argv);
		}
	};

	logfp = fopen(LOG_FILE, "w");
	if (!logfp) {
		fprintf(stderr, "fopen(%s) failed, %s\n", LOG_FILE,
					strerror(errno));
		fflush(stderr);
		do_exit(1);
	}
	close(0);close(1);close(2);

	create_children(1, "");

	/*
 	 * Now that we closed the special files and created process tree
	 * tell any wrapper scripts, we are ready for checkpoint
	 */
	checkpoint_ready();

#if 0
	while(!checkpoint_done())
		sleep(1);
#endif

	do_wait();
}
#!/bin/bash

freezermountpoint=/cgroups
CHECKPOINT=".."
NS_EXEC="$CHECKPOINT/bin/ns_exec"
CR="$CHECKPOINT/bin/cr"
RSTR="$CHECKPOINT/bin/rstr"
MKTREE="$CHECKPOINT/bin/mktree"
ECHO="/bin/echo -e"

TEST_CMD="./ptree2"
TEST_ARGS="-n 1 -d 2"	# -n: children per process, -d: depth of process tree
SCRIPT_LOG="log-p2-loop"
TEST_PID_FILE="pid.ptree2";

LOG_FILE="loop-ptree2.log"
SNAPSHOT_DIR="snap1"

TEST_DONE="test-done"
CHECKPOINT_FILE="checkpoint-ptree2";
CHECKPOINT_READY="checkpoint-ready"
CHECKPOINT_DONE="checkpoint-done"
TEST_LOG_PREFIX="log-ptree2"
TEST_LOG_SNAP="${TEST_LOG_PREFIX}.snap"

freeze()
{
	$ECHO "\t - Freezing $1"
	$ECHO FROZEN > ${freezermountpoint}/$1/freezer.state
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: \'echo FROZEN \> $state\' returned $ret"
	fi
}

unfreeze()
{
	$ECHO "\t - Unfreezing $1"
	$ECHO THAWED > ${freezermountpoint}/$1/freezer.state
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: \'echo THAWED \> $state\' returned $ret"
	fi
}

cleancgroup()
{
	$ECHO "\t - Clean cgroup of $1"
	rmdir ${freezermountpoint}/$1
	if [ -d ${freezermountpoint}/$1 ]; then
		$ECHO ***** WARNING ${freezermountpoint}/$1 remains
	fi
}

checkpoint()
{
	local pid=$1

	$ECHO "Checkpoint: $CR $pid $CHECKPOINT_FILE"
	$CR $pid $CHECKPOINT_FILE
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: Checkpoint of $pid failed"
		ps aux |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1;
	fi
}


function create_container
{
	local pid;

	$ECHO "\t - $NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"
	$NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS &

	# Wait for test to finish setup
	while [ ! -f $CHECKPOINT_READY ]; do
		/bin/$ECHO -e "\t - Waiting for $CHECKPOINT_READY"
		sleep 1;
	done;

	# Find global pid of container-init
	pid=`cat $TEST_PID_FILE`;
	if [  "x$pid" == "x" ]; then
		$ECHO "***** FAIL: Invalid container-init pid $pid"
		ps -ef |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1
	fi
	$ECHO "Created container with pid $pid" >> $SCRIPT_LOG
	echo $pid
}

function restart_container
{
	local ret;

	$ECHO "\t - Exec $NS_EXEC -cpuim -- $MKTREE --no-pids < $CHECKPOINT_FILE"

	sleep 1

	$NS_EXEC -cpuim -- $MKTREE --no-pids < $CHECKPOINT_FILE >> $SCRIPT_LOG 2>&1 &
	ret=$?

	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: Restart of $pid failed"
		ps aux |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1;
	fi
}


# Check freezer mount point
line=`grep freezer /proc/mounts`
$ECHO $line | grep "\<ns\>"
if [ $? -ne 0 ]; then
	$ECHO "please mount freezer and ns cgroups"
	$ECHO "  mkdir /cgroups"
	$ECHO "  mount -t cgroup -o freezer,ns cgroup /cgroups"
	exit 1
fi
#freezermountpoint=`$ECHO $line | awk '{ print $2 '}`

# Make sure no stray e2 from another run is still going
killall $TEST_CMD > $SCRIPT_LOG 2>&1

cnt=1
while [ 1 ]; do
	> $SCRIPT_LOG;
	dmesg -c > /dev/null

	$ECHO "===== Iteration $cnt"

	# Remove any 'state' files, start the app and let it tell us
	# when it is ready
	rm -f $CHECKPOINT_READY $TEST_DONE $TEST_PID_FILE

	$NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS&
	$ECHO "\t - $NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"

	# Wait for test to finish setup
	while [ ! -f $CHECKPOINT_READY ]; do
		$ECHO "\t - Waiting for $CHECKPOINT_READY"
		sleep 1;
	done;

	ps -ef |grep ptree2 >> $SCRIPT_LOG

	# Find global pid of container-init
	pid=`cat $TEST_PID_FILE`;
	if [  "x$pid" == "x" ]; then
		$ECHO "***** FAIL: Invalid container-init pid $pid"
		ps -ef |grep $TEST_CMD
		exit 1
	fi
	$ECHO $pid
	#pid=`create_container`
	$ECHO "\t - Done creating container"

	# Prepare for snapshot
	if [ -d $SNAPSHOT_DIR ]; then
		rm -rf ${SNAPSHOT_DIR}.prev
		mv $SNAPSHOT_DIR ${SNAPSHOT_DIR}.prev
		mkdir $SNAPSHOT_DIR
	fi

	freeze $pid

	num_pids1=`ps -ef |grep $TEST_CMD | wc -l`

	checkpoint $pid

	#$ECHO t > /proc/sysrq-trigger
	#dmesg > dmesg-1.out

	# Snapshot the log files
	cp ${TEST_LOG_PREFIX}* $SNAPSHOT_DIR
	touch $CHECKPOINT_DONE

	killall -9 `basename $TEST_CMD`

	unfreeze $pid

	sleep 3

	cleancgroup $pid

	# Restore the snapshot after the main process has been killed
	/bin/cp ${SNAPSHOT_DIR}/* .

	# Restart.
	restart_container

	sleep 3;
	num_pids2=`ps -ef |grep $TEST_CMD | wc -l`

	ps -ef |grep ptree2 >> $SCRIPT_LOG

	$ECHO "\t - num_pids1 $num_pids1, num_pids2 $num_pids2"; 

	# Find global-pid of container-init
	nspid=`pidof $NS_EXEC`
	if [ "x$nspid" == "x" ]; then
		$ECHO "***** FAIL: Can't find pid of $NS_EXEC"
		exit 1;
	fi
	
	# End test gracefully
	touch $TEST_DONE

	$ECHO "\t - Restart: Waiting for container-init (gloabl-pid $nspid) to exit"
	wait $nspid;
	ret=$?

	$ECHO "Container-init (global-pid $nspid) exited, status $ret"

	if [ -d /cgroups/$pid ]; then
		cleancgroup $pid
	fi

	cnt=$((cnt+1))
done
_______________________________________________
Containers mailing list
Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/containers

[Index of Archives]     [Cgroups]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux