On 05/27/2010 11:06 AM, liubo wrote: > On 05/22/2010 01:03 AM, Josef Bacik wrote: > >> In order for AIO to work, we need to implement aio_write. This patch converts >> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and >> nothing broke, and the AIO stuff magically started working. Thanks, >> >> Signed-off-by: Josef Bacik <josef@xxxxxxxxxx> >> >> > > Hi, Josef, > > I've tested your patch(May 22) with my tools, and one case triggered a bug > which made writev operation hang up, more information is followed. > > - Steps to trigger it: > # mount /dev/sda8 /home/btrfsdisk -o nodatacow > # gcc direct-io.c -o direct-io > # ./direct-io O_DIRECT writev /home/btrfsdisk/testrw 4M > > then on another tty, after "dmesg"... > > [snip] > device fsid f44b0879c75c0e99-1d4b28f2d5c503ae devid 1 transid 11177 > /dev/sda8 > btrfs: setting nodatacow > INFO: task direct-io:1399 blocked for more than 120 seconds. > "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > direct-io D 0000000000000003 0 1399 1341 0x00000000 > ffff880137c379c8 0000000000000082 ffff880137c379d8 ffffffff00000000 > ffff880137c37fd8 ffff880139730000 0000000000015440 ffff880137c37fd8 > 0000000000015440 0000000000015440 0000000000015440 0000000000015440 > Call Trace: > [<ffffffffa0119d4a>] wait_extent_bit+0xe3/0x163 [btrfs] > [<ffffffff8106651f>] ? autoremove_wake_function+0x0/0x39 > [<ffffffffa0119e47>] lock_extent_bits+0x7d/0xa8 [btrfs] > [<ffffffffa0119e88>] lock_extent+0x16/0x18 [btrfs] > [<ffffffffa01025ce>] btrfs_direct_IO+0x8e/0x1be [btrfs] > [<ffffffff810c7301>] generic_file_direct_write+0xed/0x16d > [<ffffffffa010bb91>] btrfs_file_aio_write+0x2af/0x8d2 [btrfs] > [<ffffffff81100eae>] ? try_get_mem_cgroup_from_mm+0x39/0x49 > [<ffffffffa010b8e2>] ? btrfs_file_aio_write+0x0/0x8d2 [btrfs] > [<ffffffff811063ed>] do_sync_readv_writev+0xc1/0x100 > [<ffffffff81106120>] ? might_fault+0x21/0x23 > [<ffffffff81106151>] ? copy_from_user+0x2f/0x31 > [<ffffffff811c90ab>] ? security_file_permission+0x16/0x18 > [<ffffffff81107145>] do_readv_writev+0xa7/0x127 > [<ffffffff81107208>] vfs_writev+0x43/0x4e > [<ffffffff811072f8>] sys_writev+0x4a/0x93 > [<ffffffff81009c32>] system_call_fastpath+0x16/0x1b > > > So, can you figure out if anything in your patch leads to the bug? > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > Sorry, I forgot the attachment... Plz get it. Thanks, - Liubo
/******************************************************************************/ /* */ /* Copyright (c) 2010 FUJITSU LIMITED */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ /* the GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* */ /* Author: Liu Bo <liubo2009@xxxxxxxxxxxxxx> */ /* */ /******************************************************************************/ #include <stdio.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/uio.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <string.h> #include <malloc.h> #define BUFFERSIZE 8192 #ifndef O_DIRECT #define O_DIRECT 00040000 /* direct disk access hint */ #endif static int rw_test(char *type, int fd, int nullfd, int zerofd, char *size); /* Direct-io Test * * Return Value: * 0: succeed * 1: fail * * Parse command line arguments * flag: * O_DIRECT, O_DIRECT | O_SYNC * rwtype: * read, readv, write, writev * test file name: * testrw * test file size: * 0 | 1K | 4K | 4M | 4G | 4G+1 */ int main(int argc, char *argv[]) { char *rwtype; char *testfile; char *size; int fd, nullfd, zerofd; int flag = O_RDWR; int ret; if (--argc != 4) { fprintf(stderr, "4 arguments are needed!\n"); exit(1); } if (!strcmp(argv[1], "O_DIRECT")) flag |= O_DIRECT; else if (!strcmp(argv[1], "O_DIRECT | O_SYNC") || !strcmp(argv[1], "O_DIRECT|O_SYNC")) flag |= O_DIRECT | O_SYNC; rwtype = argv[2]; testfile = argv[3]; size = argv[4]; if ((fd = open(testfile, flag)) < 0) { fprintf(stderr, "Cannot open the test file! \ : %s\n\n", strerror(errno)); exit(1); } if ((nullfd = open("/dev/null", O_WRONLY)) < 0) { fprintf(stderr, "Cannot open the test file! \ : %s\n\n", strerror(errno)); exit(1); } if ((zerofd = open("/dev/zero", O_RDONLY)) < 0) { fprintf(stderr, "Cannot open the test file! \ : %s\n\n", strerror(errno)); exit(1); } ret = rw_test(rwtype, fd, nullfd, zerofd, size); close(fd); close(nullfd); close(zerofd); if (ret != 0) return 1; return 0; } /* echo "=========================" >> $RESULTFILE * rw_test is used to test read/write/readv/writev. * * Arguments: * type: read/write/readv/writev * fd: read/write file descripter * nullfd: write file descripter binded to /dev/null * zerofd: read file descripter binded to /dev/zero * size: 0 | 1K | 4K | 4M | 4G | 4G+1 * * Return value: * 0: succeed * 1: write error * -1: read error */ static int rw_test(char *type, int fd, int nullfd, int zerofd, char *size) { ssize_t n; int iovcnt; long limit = 0; if(!strcmp(size, "1K")) { limit = 1024; } else if (!strcmp(size, "4K")) { limit = 1024 * 4; } else if (!strcmp(size, "4M")) { limit = 1024 * 1024 * 4; } else if (!strcmp(size, "4G")) { limit = 1024 * 1024 * 4; limit *= 1024; } else if (!strcmp(size, "4G+1")) { limit = 1024 * 1024 * 4; limit *= 1024; limit += 1; } if (!strcmp(type, "read")) { char *buf; buf = valloc(BUFFERSIZE); if (!buf) { perror("valloc error"); exit(1); } memset(buf, 0, BUFFERSIZE); while ((n = read(fd, buf, BUFFERSIZE)) > 0) { if (write(nullfd, buf, n) != n) { fprintf(stderr, "write error! :\ %s\n", strerror(errno)); return 1; } } if (n < 0) { fprintf(stderr, "read error : %s\n\n", \ strerror(errno)); return -1; } } else if (!strcmp(type, "write")) { char *buf; long count = 0; buf = valloc(BUFFERSIZE); if (!buf) { perror("valloc error"); exit(1); } memset(buf, 0, BUFFERSIZE); while ((n = read(zerofd, buf, BUFFERSIZE)) > 0) { if (write(fd, buf, n) != n) { fprintf(stderr, "write error! :\ %s\n", strerror(errno)); return 1; } count += n; if (count >= limit) break; } if (count < limit) { fprintf(stderr, "read error : %s\n\n", \ strerror(errno)); return -1; } } else if (!strcmp(type, "readv")) { iovcnt = 2; char *buf[iovcnt]; struct iovec iov[iovcnt]; int i; for (i = 0; i < iovcnt; i++) { buf[i] = valloc(BUFFERSIZE); if (!buf[i]) { perror("valloc error"); exit(1); } memset(buf[i], 0, BUFFERSIZE); iov[i].iov_base = buf[i]; iov[i].iov_len = BUFFERSIZE; } while ((n = readv(fd, iov, iovcnt)) > 0) { if (writev(nullfd, iov, iovcnt) != (iovcnt * BUFFERSIZE)) { fprintf(stderr, "writev error! :\ %s\n", strerror(errno)); return 1; } } if (n < 0) { fprintf(stderr, "readv error : %s\n\n", \ strerror(errno)); return -1; } } else if (!strcmp(type, "writev")) { iovcnt = 2; char *buf[iovcnt]; struct iovec iov[iovcnt]; long count = 0; int i; for (i = 0; i < iovcnt; i++) { buf[i] = valloc(BUFFERSIZE); if (!buf[i]) { perror("valloc error"); exit(1); } memset(buf[i], 0, BUFFERSIZE); iov[i].iov_base = buf[i]; iov[i].iov_len = BUFFERSIZE; } while ((n = readv(zerofd, iov, iovcnt)) > 0) { if (writev(fd, iov, iovcnt) != (iovcnt * BUFFERSIZE)) { fprintf(stderr, "writev error! :\ %s\n", strerror(errno)); return 1; } count += n; if (count >= limit) break; } if (count < limit) { fprintf(stderr, "readv error : %s\n\n", \ strerror(errno)); return -1; } } return 0; }