On Mon, Mar 16, 2015 at 5:07 PM, Andreas Dilger <adilger@xxxxxxxxx> wrote: > >> On Mar 16, 2015, at 12:34 PM, Milosz Tanski <milosz@xxxxxxxxx> wrote: >> >> preadv2 is a new syscall introduced that is like preadv2 but with flag > > Sorry, "preadv2 ... is like preadv2"? I already have a fix for in my branch. Robert Elliott was the first one to notice that (via private email). > >> argument. The first use case of this is to let us add a flag to perform a >> non-blocking file using the page cache. > > This is also missing a Signed-off-by: line. Good catch. I'm going to fix the above to issues, add a pre-test check for preadv2 (I just noticed it's missing) and I'm going to resend this patch. > > Cheers, Andreas >> --- >> src/Makefile | 2 +- >> src/preadv2-pwritev2.h | 52 +++++++++++++++++ >> src/preadv2.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++ >> tests/generic/067 | 85 ++++++++++++++++++++++++++++ >> tests/generic/067.out | 9 +++ >> tests/generic/group | 1 + >> 6 files changed, 298 insertions(+), 1 deletion(-) >> create mode 100644 src/preadv2-pwritev2.h >> create mode 100644 src/preadv2.c >> create mode 100755 tests/generic/067 >> create mode 100644 tests/generic/067.out >> >> diff --git a/src/Makefile b/src/Makefile >> index 4781736..f7d3681 100644 >> --- a/src/Makefile >> +++ b/src/Makefile >> @@ -19,7 +19,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ >> bulkstat_unlink_test_modified t_dir_offset t_futimens t_immutable \ >> stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \ >> seek_copy_test t_readdir_1 t_readdir_2 fsync-tester nsexec cloner \ >> - renameat2 t_getcwd e4compact >> + renameat2 t_getcwd e4compact preadv2 >> >> SUBDIRS = >> >> diff --git a/src/preadv2-pwritev2.h b/src/preadv2-pwritev2.h >> new file mode 100644 >> index 0000000..786e524 >> --- /dev/null >> +++ b/src/preadv2-pwritev2.h >> @@ -0,0 +1,52 @@ >> +#ifndef PREADV2_PWRITEV2_H >> +#define PREADV2_PWRITEV2_H >> + >> +#include "global.h" >> + >> +#ifndef HAVE_PREADV2 >> +#include <sys/syscall.h> >> + >> +#if !defined(SYS_preadv2) && defined(__x86_64__) >> +#define SYS_preadv2 323 >> +#define SYS_pwritev2 324 >> +#endif >> + >> +#if !defined (SYS_preadv2) && defined(__i386__) >> +#define SYS_preadv2 359 >> +#define SYS_pwritev2 360 >> +#endif >> + >> +/* LO_HI_LONG taken from glibc */ >> +#define LO_HI_LONG(val) \ >> + (off_t) val, \ >> + (off_t) ((((uint64_t) (val)) >> (sizeof (long) * 4)) >> (sizeof (long) * 4)) >> + >> +static inline ssize_t >> +preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) >> +{ >> +#ifdef SYS_preadv2 >> + return syscall(SYS_preadv2, fd, iov, iovcnt, LO_HI_LONG(offset), >> + flags); >> +#else >> + errno = ENOSYS; >> + return -1; >> +#endif >> +} >> + >> +static inline ssize_t >> +pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) >> +{ >> +#ifdef SYS_pwritev2 >> + return syscall(SYS_pwritev2, fd, iov, iovcnt, LO_HI_LONG(offset), >> + flags); >> +#else >> + errno = ENOSYS; >> + return -1; >> +#endif >> +} >> + >> +#define RWF_NONBLOCK 0x00000001 >> +#define RWF_DSYNC 0x00000002 >> + >> +#endif /* HAVE_PREADV2 */ >> +#endif /* PREADV2_PWRITEV2_H */ >> diff --git a/src/preadv2.c b/src/preadv2.c >> new file mode 100644 >> index 0000000..a4f89b5 >> --- /dev/null >> +++ b/src/preadv2.c >> @@ -0,0 +1,150 @@ >> +/* >> + * Copyright 2014 Red Hat, Inc. All rights reserved. >> + * Copyright 2015 Milosz Tanski >> + * >> + * License: GPLv2 >> + * >> + */ >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <getopt.h> >> +#include <string.h> >> +#include <unistd.h> >> +#include <errno.h> >> +#include <linux/fs.h> /* for RWF_NONBLOCK */ >> + >> +/* >> + * Once preadv2 is part of the upstream kernel and there is glibc support for >> + * it. We'll add support for preadv2 to xfs_io and this will be unnecessary. >> + */ >> +#include "preadv2-pwritev2.h" >> + >> +/* >> + * Test to see if the system call is implemented. If -EINVAL or -ENOSYS >> + * are returned, consider the call unimplemented. All other errors are >> + * considered success. >> + * >> + * Returns: 0 if the system call is implemented, 1 if the system call >> + * is not implemented. >> + */ >> +int >> +preadv2_check(int fd) >> +{ >> + int ret; >> + struct iovec iov[] = {}; >> + >> + /* 0 length read; just check iof the syscall is there. >> + * >> + * - 0 length iovec >> + * - Position is -1 (eg. use current position) >> + */ >> + ret = preadv2(fd, iov, 0, -1, 0); >> + >> + if (ret < 0) { >> + if (errno == ENOSYS || errno == EINVAL) >> + return 1; >> + } >> + >> + return 0; >> +} >> + >> +void >> +usage(char *prog) >> +{ >> + fprintf(stderr, "Usage: %s [-v] [-ctdw] [-n] -p POS -l LEN <filename>\n\n", prog); >> + fprintf(stderr, "General arguments:\n"); >> + fprintf(stderr, " -v Verify that the syscall is supported and quit:\n"); >> + fprintf(stderr, "\n"); >> + fprintf(stderr, "Open arguments:\n"); >> + fprintf(stderr, " -c Open file with O_CREAT flag\n"); >> + fprintf(stderr, " -t Open file with O_TRUNC flag\n"); >> + fprintf(stderr, " -d Open file with O_DIRECT flag\n"); >> + fprintf(stderr, " -w Open file with O_RDWR flag vs O_RDONLY (default)\n"); >> + fprintf(stderr, "\n"); >> + fprintf(stderr, "preadv2 arguments:\n"); >> + fprintf(stderr, " -n use RWF_NONBLOCK when performing read\n"); >> + fprintf(stderr, " -p POS offset file to read at\n"); >> + fprintf(stderr, " -l LEN length of file data to read\n"); >> + fprintf(stderr, "\n"); >> + fflush(stderr); >> +} >> + >> +int >> +main(int argc, char **argv) >> +{ >> + int fd; >> + int ret; >> + int opt; >> + off_t pos = -1; >> + struct iovec iov = { NULL, 0 }; >> + int o_flags = 0; >> + int r_flags = 0; >> + char *filename; >> + >> + while ((opt = getopt(argc, argv, "vctdwnp:l:")) != -1) { >> + switch (opt) { >> + case 'v': >> + /* >> + * See if we were called to check for availability of >> + * sys_preadv2. STDIN is okay, since we do a zero >> + * length read (see man 2 read). >> + */ >> + ret = preadv2_check(STDIN_FILENO); >> + exit(ret); >> + case 'c': >> + o_flags |= O_CREAT; >> + break; >> + case 't': >> + o_flags |= O_TRUNC; >> + break; >> + case 'd': >> + o_flags |= O_DIRECT; >> + break; >> + case 'w': >> + o_flags |= O_RDWR; >> + break; >> + case 'n': >> + r_flags |= RWF_NONBLOCK; >> + break; >> + case 'p': >> + pos = atoll(optarg); >> + break; >> + case 'l': >> + iov.iov_len = atoll(optarg); >> + break; >> + default: >> + fprintf(stderr, "invalid option: %c\n", opt); >> + usage(argv[0]); >> + exit(1); >> + } >> + } >> + >> + if (optind >= argc) { >> + usage(argv[0]); >> + exit(1); >> + } >> + >> + if ((o_flags & O_RDWR) != O_RDWR) >> + o_flags |= O_RDONLY; >> + >> + if ((iov.iov_base = malloc(iov.iov_len)) == NULL) { >> + perror("malloc"); >> + exit(1); >> + } >> + >> + filename = argv[optind]; >> + fd = open(filename, o_flags); >> + >> + if (fd < 0) { >> + perror("open"); >> + exit(1); >> + } >> + >> + if ((ret = preadv2(fd, &iov, 1, pos, r_flags)) == -1) { >> + perror("preadv2"); >> + exit(ret); >> + } >> + >> + free(iov.iov_base); >> + exit(0); >> +} >> diff --git a/tests/generic/067 b/tests/generic/067 >> new file mode 100755 >> index 0000000..4cc58f8 >> --- /dev/null >> +++ b/tests/generic/067 >> @@ -0,0 +1,85 @@ >> +#! /bin/bash >> +# FS QA Test No. 067 >> +# >> +# Test for the preadv2 syscall >> +# >> +#----------------------------------------------------------------------- >> +# Copyright (c) 2015 Milosz Tanski <mtanski@xxxxxxxxx>. All Rights Reserved. >> +# >> +# This program is free software; you can redistribute it and/or >> +# modify it under the terms of the GNU General Public License as >> +# published by the Free Software Foundation. >> +# >> +# This program is distributed in the hope that it would be useful, >> +# but WITHOUT ANY WARRANTY; without even the implied warranty of >> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> +# GNU General Public License for more details. >> +# >> +# You should have received a copy of the GNU General Public License >> +# along with this program; if not, write the Free Software Foundation, >> +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA >> +#----------------------------------------------------------------------- >> +# >> + >> +seq=`basename $0` >> +seqres=$RESULT_DIR/$seq >> +echo "QA output created by $seq" >> + >> +here=`pwd` >> +tmp=/tmp/$$ >> +status=1 # failure is the default! >> +trap "_cleanup; exit \$status" 0 1 2 3 15 >> + >> +_cleanup() >> +{ >> + cd / >> + rm -f $tmp.* >> +} >> + >> +# get standard environment, filters and checks >> +. ./common/rc >> +. ./common/filter >> + >> +# real QA test starts here >> + >> +# Modify as appropriate. >> +_supported_fs generic >> +_supported_os Linux >> +_require_test >> + >> +# test file we'll be using >> +file=$SCRATCH_MNT/067.preadv2.$$ >> + >> +# Create a file: >> +# two regions of data and a hole in the middle >> +# use O_DIRECT so it's not in the page cache >> +echo "create file" >> +$XFS_IO_PROG -t -f -d \ >> + -c "pwrite 0 1024" \ >> + -c "pwrite 2048 1024" \ >> + $file > /dev/null >> + >> +# Make sure it returns EAGAIN on uncached data >> +echo "uncached" >> +$here/src/preadv2 -n -p 0 -l 1024 $file >> + >> +# Make sure we read in the whole file, after that RWF_NONBLOCK should return us all the data >> +echo "cached" >> +$XFS_IO_PROG -f $file -c "pread 0 4096" $file > /dev/null >> +$here/src/preadv2 -n -p 0 -l 1024 $file >> + >> +# O_DIRECT and RWF_NONBLOCK should return EAGAIN always >> +echo "O_DIRECT" >> +$here/src/preadv2 -d -n -p 0 -l 1024 $file >> + >> +# Holes do not block >> +echo "holes" >> +$here/src/preadv2 -n -p 2048 -l 1024 $file >> + >> +# EOF behavior (no EAGAIN) >> +echo "EOF" >> +$here/src/preadv2 -n -p 3072 -l 1 $file >> + >> +# success, all done >> +status=0 >> +exit >> diff --git a/tests/generic/067.out b/tests/generic/067.out >> new file mode 100644 >> index 0000000..6e3740f >> --- /dev/null >> +++ b/tests/generic/067.out >> @@ -0,0 +1,9 @@ >> +QA output created by 067 >> +create file >> +uncached >> +preadv2: Resource temporarily unavailable >> +cached >> +O_DIRECT >> +preadv2: Resource temporarily unavailable >> +holes >> +EOF >> diff --git a/tests/generic/group b/tests/generic/group >> index e5db772..91c5870 100644 >> --- a/tests/generic/group >> +++ b/tests/generic/group >> @@ -69,6 +69,7 @@ >> 064 auto quick prealloc >> 065 metadata auto quick >> 066 metadata auto quick >> +067 auto quick rw >> 068 other auto freeze dangerous stress >> 069 rw udf auto quick >> 070 attr udf auto quick stress >> -- >> 1.9.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > Cheers, Andreas > > > > > -- Milosz Tanski CTO 16 East 34th Street, 15th floor New York, NY 10016 p: 646-253-9055 e: milosz@xxxxxxxxx -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html