From: Sukadev Bhattiprolu <sukadev@xxxxxxxxxxxxxxxxxx> Date: Thu, 28 Jan 2010 23:18:50 -0800 Subject: [PATCH 11/11] filelease2: Test C/R during lease-break-interval When a process, P1 (lease-owner of a file) is notified of an impending lease-break (due to a conflicting open of the file by process P2 (lease- breaker), have P1 "flush" some data to the file. Checkpoint processes P1 and P2 before the kernel forcibly revokes the lease (i.e checkpoint within the lease-break-interval). Upon restart, verify that the P2's open() of the file completes properly and that the data flushed by P1 is visible to P2. NOTE: This test passes even with out support for checkpoint of file-locks. The reason for this is that the kernel terminates the lease of P1 before notifying P1 about the lease-break. Since we checkpoint while P1 is in signal handler, neither P1 nor P2 have a lease and so the C/R passes. Hopefully the data-comparison checks will catch some errors during development. If not, we may need to redesign the test or drop it later. Signed-off-by: Sukadev Bhattiprolu <sukadev@xxxxxxxxxxxxxxxxxx> --- fileio/Makefile | 4 +- fileio/filelease2.c | 400 ++++++++++++++++++++++++++++++++++++++++++++++ fileio/run-filelease2.sh | 3 + 3 files changed, 405 insertions(+), 2 deletions(-) create mode 100644 fileio/filelease2.c create mode 100755 fileio/run-filelease2.sh diff --git a/fileio/Makefile b/fileio/Makefile index bd28561..4cf401b 100644 --- a/fileio/Makefile +++ b/fileio/Makefile @@ -1,4 +1,4 @@ -targets = fileio1 filelock1 filelease1 fsetown1 +targets = fileio1 filelock1 fsetown1 filelease1 filelease2 INCLUDE = ../libcrtest LIBCRTEST = ../libcrtest/common.o @@ -9,4 +9,4 @@ all: $(LIBCRTEST) $(targets) clean: rm -f $(targets) - rm -rf cr_fileio* cr_filelock1* cr_filelease1* cr_fsetown1* + rm -rf cr_fileio* cr_filelock1* cr_filelease[12]* cr_fsetown1* diff --git a/fileio/filelease2.c b/fileio/filelease2.c new file mode 100644 index 0000000..1a53617 --- /dev/null +++ b/fileio/filelease2.c @@ -0,0 +1,400 @@ +#include <stdio.h> +#include <unistd.h> +#define __USE_GNU +#include <fcntl.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include <wait.h> +#include "libcrtest.h" + +#define TEST_FILE1 "data.d/data.filelease2" +#define LOG_FILE "logs.d/log.filelease2" + +int event_fd1; + +static int test_fd; +static int got_sigio; +static int num_children; +static int pid1, pid2; +char test_data[256]; + +/* + * Description: + * Ensure that processes checkpointed when they are in the middle + * of a lease-break, are restored correctly. + * + * Implementation: + * Process P1 takes F_WRLCK lease on a file. + * Process P2 attempts to set F_WRLCK lease on the file + * Process P1 gets a SIGIO signal about the pending lease-break. + * Initiate a checkpoint before the downgrade is complete. + * After checkpoint/restart, ensure Process P1 still has the lease + * and that it can be downgraded. + * Ensure Process P2 gets the F_RDLCK lease. + */ + +char *get_lease_desc(int type) +{ + switch(type) { + case F_RDLCK: return "F_RDLCK"; + case F_WRLCK: return "F_WRLCK"; + case F_UNLCK: return "F_UNLCK"; + default: return "Unknown !"; + } +} + +void set_lease(int fd, int type) +{ + int rc; + + fprintf(logfp, "%d: set_lease() called for fd %d, type %s\n", + getpid(), fd, get_lease_desc(type)); + + rc = fcntl(fd, F_SETLEASE, type); + if (rc < 0) { + fprintf(logfp, "%d: set_lease(type %d):, ERROR %s\n", + getpid(), type, strerror(errno)); + if (errno == EINVAL) + fprintf(logfp, "%d: Maybe the fs does not support " + "F_SETLEASE (eg: NFS)\n", getpid()); + fflush(logfp); + kill(getppid(), SIGUSR1); + do_exit(1); + } +} + +void test_lease(int fd, int exp_type) +{ + int rc; + + rc = fcntl(fd, F_GETLEASE, 0); + if (rc < 0 || rc > 2) { + fprintf(logfp, "ERROR: fcntl(F_GETLEASE): expected %s, rc %d, " + "error %s\n", get_lease_desc(exp_type), rc, + strerror(errno)); + do_exit(1); + } + + if (rc != exp_type) { + fprintf(logfp, "%d: FAIL: Expected %s, actual %s\n", getpid(), + get_lease_desc(exp_type), get_lease_desc(rc)); + do_exit(1); + } + + fprintf(logfp, "%d: PASS: Expected %s, actual %s\n", getpid(), + get_lease_desc(exp_type), get_lease_desc(rc)); + return; +} + +set_signal_action(int sig, void(*action)(int, siginfo_t *, void *)) +{ + int rc; + struct sigaction act; + + act.sa_sigaction = action; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + + rc = sigaction(sig, &act, NULL); + if (rc < 0) { + fprintf(logfp, "%d: sigaction() sig %d failed, error %s\n", + getpid(), sig, strerror(errno)); + do_exit(1); + } +} + +static void iohandler(int sig, siginfo_t *info, void *arg) +{ + int rc; + + got_sigio++; + fprintf(logfp, "%d: Got signal %d\n", getpid(), sig); + fflush(logfp); + + /* + * Before giving up the lease, write some data to the file + */ + rc = write(test_fd, test_data, sizeof(test_data)); + if (rc != sizeof(test_data)) { + fprintf(logfp, "%d: write() failed, n %d, error %s\n", getpid(), + rc, strerror(errno)); + do_exit(1); + } + + set_checkpoint_ready(); + fprintf(logfp, "***** %d: Ready for checkpoint\n", getpid()); + fflush(logfp); + + /* + * Wait for checkpoint/restart + */ + while(!test_done()) + sleep(1); + + fprintf(logfp, "%d: Test-done\n", getpid()); + fflush(logfp); + + /* + * Checkpoint/restart is done, ensure we still have the lease + * and then terminate the lease. + * + * TODO: Looks like the lease is revoked even before the handler + * returns and hence the following test_lease() fails. This + * behavior is not obvious from the description of F_SETLEASE + * in the man page. Disable the test-lease() test for now + * (it does not affect C/R). + */ + /* test_lease(test_fd, F_WRLCK); */ + + set_lease(test_fd, F_UNLCK); + + return; +} + +/* Lease holder */ +int do_child1(int idx) +{ + int type = F_WRLCK; + + fprintf(logfp, "%d: Setting lease to type %s\n", getpid(), + get_lease_desc(type)); + fflush(logfp); + + set_signal_action(SIGIO, iohandler); + + test_fd = open(TEST_FILE1, O_RDWR); + if (test_fd < 0) { + fprintf(logfp, "%d: open(%s) failed, error %s\n", getpid(), + TEST_FILE1, strerror(errno)); + do_exit(1); + } + + set_lease(test_fd, type); + + /* + * Tell parent we are ready for checkpoint. + */ + notify_one_event(event_fd1); + + while(!got_sigio) + sleep(1); + + do_exit(0); +} + +/* Lease breaker */ +int do_child2(int idx) +{ + int rc; + int fd; + int type = F_WRLCK; + char buf[256]; + + fprintf(logfp, "%d: Setting lease to type %s\n", getpid(), + get_lease_desc(type)); + fflush(logfp); + + /* + * Tell parent we are (almost) ready for checkpoint. + */ + notify_one_event(event_fd1); + + /* + * To break the lease, open the file for write. This should block + * until sibling drops the lease (after Checkpoint/restart is done). + */ + fd = open(TEST_FILE1, O_RDWR); + if (fd < 0) { + fprintf(logfp, "%d: open(%s) failed, error %s\n", getpid(), + TEST_FILE1, strerror(errno)); + do_exit(1); + } + + /* + * If checkpoint is not done yet, then maybe the lease-break-interval + * was not long enough for the wrapper scripts to complete checkpoint. + * So fail the test. + */ + if (!test_checkpoint_done()) { + fprintf(logfp, "%d: Checkpoint not done yet ?\n", getpid()); + do_exit(1); + } + + rc = read(fd, buf, sizeof(test_data)); + if (rc != sizeof(test_data)) { + fprintf(logfp, "%d: read() failed, rc %d, error %s\n", + getpid(), rc, strerror(errno)); + do_exit(1); + } + + if (memcmp(test_data, buf, sizeof(test_data))) { + fprintf(logfp, "%d: FAILED: Data miscompare !!!\n", getpid()); + do_exit(1); + } + + do_exit(0); +} + +void setup_test_data() +{ + int rc; + int fd; + char buf[256]; + + rc = unlink(TEST_FILE1); + if (rc < 0 && errno != ENOENT) { + fprintf(logfp, "ERROR: unlink(%s): %s\n", TEST_FILE1, + strerror(errno)); + do_exit(1); + } + + fd = open(TEST_FILE1, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (fd < 0) { + fprintf(logfp, "ERROR: open(%s): %s\n", TEST_FILE1, + strerror(errno)); + do_exit(1); + } + + memset(buf, 0, sizeof(buf)); + write(fd, buf, sizeof(buf)); + + memset(test_data, 1, sizeof(test_data)); + close(fd); + + return; +} + +kill_children(int sig) +{ + if (pid1) + kill(pid1, sig); + if (pid2) + kill(pid2, sig); + do_wait(2); +} + +int create_child(int idx, int (*child_func)(int)) +{ + int rc; + + rc = fork(); + if (rc == 0) + (*child_func)(idx); + + if (rc < 0) { + fprintf(logfp, "%d: fork() failed, error %s\n", getpid(), + strerror(errno)); + kill_children(SIGKILL); + } + + fprintf(logfp, "%d: Created child %d, pid %d\n", getpid(), idx, rc); + fflush(logfp); + + num_children++; + wait_for_events(event_fd1, 1); + + return rc; +} + +void child_handler(int sig, siginfo_t *info, void *arg) +{ + int i; + int rc; + int status; + + fprintf(logfp, "%d: Got signal %d\n", getpid(), sig); + fflush(logfp); + + if (sig == SIGUSR1) + goto failed; + + while(num_children) { + rc = waitpid(-1, &status, WNOHANG); + if (rc < 0) { + fprintf(logfp, "%d: waitpid(): failed, rc %d, " + "error %s\n", getpid(), rc, + strerror(errno)); + goto failed; + } + + if (!rc) + break; + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) + num_children--; + else { + print_exit_status(info->si_pid, status); + goto failed; + } + } + + if (!num_children) { + fprintf(logfp, "Both children exited cleanly, test passed\n"); + do_exit(0); + } + return; + +failed: + kill_children(SIGKILL); + fprintf(logfp, "Test FAILED\n"); + do_exit(1); +} + +main(int argc, char *argv[]) +{ + int i, status, rc; + int pid; + + if (test_done()) { + printf("Remove %s before running test\n", TEST_DONE); + do_exit(1); + } + + logfp = fopen(LOG_FILE, "w"); + if (!logfp) { + perror("open() logfile"); + do_exit(1); + } + + printf("%s: Closing stdio fds and writing messages to %s\n", + argv[0], LOG_FILE); + + for (i=0; i<100; i++) { + if (fileno(logfp) != i) + close(i); + } + + setup_test_data(); + event_fd1 = setup_notification(); + + /* + * Before waiting for events below, ensure we will be notified + * if a child encounters an error and/or exits prematurely. + */ + set_signal_action(SIGUSR1, child_handler); + set_signal_action(SIGCHLD, child_handler); + + pid1 = create_child(0, do_child1); + + pid2 = create_child(1, do_child2); + + /* + * NOTE: We have some guessing to do here. The notification from + * the second child (in create_child()) just tells us that + * the child is _about_ to attempt the lease. Give it extra + * time to actually block before enabling checkpoint. + * + * And this extra time must be less than the lease-break-window + * (set by the test wrapper-script. + */ + sleep(10); + + /* + * Just wait for children to exit and exit from SIGCHLD handler. + */ + while(num_children) + pause(); + + do_exit(9); /* should not get here */ +} diff --git a/fileio/run-filelease2.sh b/fileio/run-filelease2.sh new file mode 100755 index 0000000..67de611 --- /dev/null +++ b/fileio/run-filelease2.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./run-fcntltests.sh filelease2 -- 1.6.0.4 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers