> However if admins run a command such as sync or fsfreeze along side, > fsync/fdatasync may return success even if writeback has failed. > That could lead to data corruption. For reproducing the problem, compile the attached C program (iogen.c) and run with 'runtest.sh' script in the next mail: # gcc -o iogen iogen.c # bash ./runtest.sh "iogen" does write(), fsync() and checks if on-disk data is same as application's buffer after successful fsync. "runtest.sh" injects failure for the file being written by "iogen". (You need to enable CONFIG_HWPOISON_INJECT=m for the memory error injection to work.) Without the patch, fsync returns success even though data is not on disk. TEST: ext4 / ioerr / sync-command (iogen): inject (admin): Injecting I/O error (admin): Calling sync(2) (iogen): remove FAIL: corruption! DIFF 00000200: de de de de de de de de | 00 00 00 00 00 00 00 00 ... With the patch, fsync detects error correctly. TEST: ext4 / ioerr / sync-command (iogen): inject (admin): Injecting I/O error (admin): Calling sync(2) INFO: App fsync: Input/output error (iogen): remove PASS: detected error right (iogen): end -- cut here -- #define _GNU_SOURCE #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> unsigned char *app_buf; unsigned char *ondisk_data; char *testfile; size_t buflen; int fd; int rfd; void dumpdiff(unsigned char *buf1, unsigned char *buf2, int len) { int i, j; for(i = 0; i < len; i += 8) { if (!memcmp(&buf1[i], &buf2[i], 8)) continue; fprintf(stderr, "DIFF %08x: ", i); for(j = 0; j < 8 && i + j < len; j++) fprintf(stderr, "%02x ", buf1[i]); fprintf(stderr, " | "); for(j = 0; j < 8 && i + j < len; j++) fprintf(stderr, "%02x ", buf2[i]); fprintf(stderr, "\n"); } } void notify_injector(char *str) { if (str) fprintf(stderr, "(iogen): %s\n", str); write(1, "\n", 2); sleep(1); } void open_fds(void) { fd = open(testfile, O_RDWR); if (fd < 0) { perror("????: App open"); exit(1); } rfd = open(testfile, O_RDONLY|O_DIRECT); /* for verification */ if (rfd < 0) { perror("????: App open rfd"); exit(1); } } void init_fd_status(void) { int r; r = fsync(fd); /* flush and clean */ if (r) { perror("????: App fsync0"); exit(1); } r = pread(fd, app_buf, buflen, 0); /* stage onto cache */ if (r != buflen) { perror("????: App read1"); exit(1); } } void close_fds(void) { int r; r = close(rfd); if (r) perror("????: App close read fd"); r = close(fd); if (r) perror("????: App close write fd"); } void write_data(int cnt) { int r; memset(app_buf, cnt, buflen); r = pwrite(fd, app_buf, buflen, 0); if (r != buflen) perror("????: App write1"); } int sync_data(void) { int r, r2; r = fsync(fd); if (r) perror("INFO: App fsync"); r2 = fsync(fd); if (r2) perror("????: App fsync (redo)"); return r; } void read_data_direct(void) { int r; r = pread(rfd, ondisk_data, buflen, 0); if (r != buflen) { perror("????: App direct read"); r = pread(rfd, ondisk_data, buflen, 0); if (r != buflen) perror("FAIL: App direct read (retry)"); } } void check_data(int fsync_result) { int r; r = memcmp(app_buf, ondisk_data, buflen); if (r) { /* data is different */ if (fsync_result == 0) { fprintf(stderr, "FAIL: corruption!\n"); dumpdiff(app_buf, ondisk_data, buflen); } else fprintf(stderr, "PASS: detected error right\n"); } else { /* data is same */ if (fsync_result == 0) fprintf(stderr, "PASS: no error, data is ok\n"); else fprintf(stderr, "????: sync failed, data is ok\n"); } } void cleanup_data(int cnt) { int r; /* write-fsync-read without error injection */ memset(app_buf, cnt, buflen); r = pwrite(fd, app_buf, buflen, 0); if (r != buflen) perror("BUG : App write (w/o failure)"); r = fsync(fd); if (r) perror("BUG : App fsync (w/o failure)"); r = pread(rfd, ondisk_data, buflen, 0); if (r != buflen) perror("BUG : App read (w/o failure)"); r = memcmp(app_buf, ondisk_data, buflen); if (r) fprintf(stderr, "BUG : memcmp failed\n"); } /* * Do this: * 1) write * 2) inject failure * 3) fsync (should return error) * 4) remove failure * 5) check on-disk data (using direct read) */ void runtest(void) { int fsync_result; notify_injector("start"); open_fds(); init_fd_status(); write_data(0xde); notify_injector("inject"); fsync_result = sync_data(); notify_injector("remove"); /* re-read and compare */ read_data_direct(); check_data(fsync_result); cleanup_data(0); close_fds(); notify_injector("end"); } int main(int argc, char **argv) { testfile = argv[1]; buflen = atoi(argv[2]); app_buf = malloc(buflen); if (!app_buf) exit(1); if (posix_memalign((void **) &ondisk_data, 4096, buflen)) exit(1); runtest(); free(app_buf); free(ondisk_data); return 0; } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href