Anand Avati <avati@xxxxxxxxxx> writes: > Implement ->direct_IO() method in aops. The ->direct_IO() method combines > the existing fuse_direct_read/fuse_direct_write methods to implement > O_DIRECT functionality. > > Reaching ->direct_IO() in the read path via generic_file_aio_read ensures > proper synchronization with page cache with its existing framework. > > Reaching ->direct_IO() in the write path via fuse_file_aio_write is made > to come via generic_file_direct_write() which makes it play nice with > the page cache w.r.t other mmap pages etc. > > On files marked 'direct_io' by the filesystem server, IO always follows > the fuse_direct_read/write path. There is no effect of fcntl(O_DIRECT) > and it always succeeds. > > On files not marked with 'direct_io' by the filesystem server, the IO > path depends on O_DIRECT flag by the application. This can be passed > at the time of open() as well as via fcntl(). > > Note that asynchronous O_DIRECT iocb jobs are completed synchronously > always (this has been the case with FUSE even before this patch) > > Signed-off-by: Anand Avati <avati@xxxxxxxxxx> > --- > > Tested with concurrent read and write DDs with oflag=direct and iflag=direct set > in a few writes and a few reads > > fs/fuse/dir.c | 3 - > fs/fuse/file.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++-------- > 2 files changed, 107 insertions(+), 20 deletions(-) > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 2066328..7e5dbd0 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, > if (fc->no_create) > return -ENOSYS; > > - if (flags & O_DIRECT) > - return -EINVAL; > - > forget = fuse_alloc_forget(); > if (!forget) > return -ENOMEM; > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index 4a199fd..0f426b5 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) > struct fuse_conn *fc = get_fuse_conn(inode); > int err; > > - /* VFS checks this, but only _after_ ->open() */ > - if (file->f_flags & O_DIRECT) > - return -EINVAL; > - > err = generic_file_open(inode, file); > if (err) > return err; > @@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, > struct file *file = iocb->ki_filp; > struct address_space *mapping = file->f_mapping; > size_t count = 0; > + size_t ocount = 0; > ssize_t written = 0; > + ssize_t written_buffered = 0; > struct inode *inode = mapping->host; > ssize_t err; > struct iov_iter i; > + loff_t endbyte = 0; > > WARN_ON(iocb->ki_pos != pos); > > - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); > + ocount = 0; > + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); > if (err) > return err; > > + count = ocount; > + > mutex_lock(&inode->i_mutex); > vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); > > @@ -962,11 +964,36 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, > > file_update_time(file); > > - iov_iter_init(&i, iov, nr_segs, count, 0); > - written = fuse_perform_write(file, mapping, &i, pos); > - if (written >= 0) > - iocb->ki_pos = pos + written; > + if (file->f_flags & O_DIRECT) { > + written = generic_file_direct_write(iocb, iov, &nr_segs, > + pos, &iocb->ki_pos, > + count, ocount); > + if (written < 0 || written == count) > + goto out; > + > + pos += written; > + count -= written; > + > + iov_iter_init(&i, iov, nr_segs, count, 0); This codepath is still untested (hint: that last argument in iov_iter_init() is wrong). Yeah, it's not easy to test, but you can do something like passing a fraction of count to generic_file_direct_write() so one half is written through the direct IO and the other half through the buffered IO. Thanks, Miklos > + written_buffered = fuse_perform_write(file, mapping, &i, pos); > + if (written_buffered < 0) { > + err = written_buffered; > + goto out; > + } > + endbyte = pos + written_buffered - 1; > > + err = filemap_write_and_wait_range(file->f_mapping, pos, > + endbyte); > + if (err) > + goto out; > + written += written_buffered; > + iocb->ki_pos = pos + written_buffered; > + } else { > + iov_iter_init(&i, iov, nr_segs, count, 0); > + written = fuse_perform_write(file, mapping, &i, pos); > + if (written >= 0) > + iocb->ki_pos = pos + written; > + } > out: > current->backing_dev_info = NULL; > mutex_unlock(&inode->i_mutex); > @@ -1101,30 +1128,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, > return res; > } > > -static ssize_t fuse_direct_write(struct file *file, const char __user *buf, > - size_t count, loff_t *ppos) > +static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, > + size_t count, loff_t *ppos) > { > struct inode *inode = file->f_path.dentry->d_inode; > ssize_t res; > > - if (is_bad_inode(inode)) > - return -EIO; > - > - /* Don't allow parallel writes to the same file */ > - mutex_lock(&inode->i_mutex); > res = generic_write_checks(file, ppos, &count, 0); > if (!res) { > res = fuse_direct_io(file, buf, count, ppos, 1); > if (res > 0) > fuse_write_update_size(inode, *ppos); > } > - mutex_unlock(&inode->i_mutex); > > fuse_invalidate_attr(inode); > > return res; > } > > +static ssize_t fuse_direct_write(struct file *file, const char __user *buf, > + size_t count, loff_t *ppos) > +{ > + struct inode *inode = file->f_path.dentry->d_inode; > + ssize_t res; > + > + if (is_bad_inode(inode)) > + return -EIO; > + > + /* Don't allow parallel writes to the same file */ > + mutex_lock(&inode->i_mutex); > + res = __fuse_direct_write(file, buf, count, ppos); > + mutex_unlock(&inode->i_mutex); > + > + return res; > +} > + > static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) > { > __free_page(req->pages[0]); > @@ -2077,6 +2115,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, > return 0; > } > > +static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov, > + unsigned long nr_segs, loff_t *ppos, int rw) > +{ > + const struct iovec *vector = iov; > + ssize_t ret = 0; > + > + while (nr_segs > 0) { > + void __user *base; > + size_t len; > + ssize_t nr; > + > + base = vector->iov_base; > + len = vector->iov_len; > + vector++; > + nr_segs--; > + > + if (rw == WRITE) > + nr = __fuse_direct_write(filp, base, len, ppos); > + else > + nr = fuse_direct_read(filp, base, len, ppos); > + > + if (nr < 0) { > + if (!ret) > + ret = nr; > + break; > + } > + ret += nr; > + if (nr != len) > + break; > + } > + > + return ret; > +} > + > + > +static ssize_t > +fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, > + loff_t offset, unsigned long nr_segs) > +{ > + ssize_t ret = 0; > + struct file *file = NULL; > + loff_t pos = 0; > + > + file = iocb->ki_filp; > + pos = offset; > + > + ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); > + > + return ret; > +} > + > static const struct file_operations fuse_file_operations = { > .llseek = fuse_file_llseek, > .read = do_sync_read, > @@ -2120,6 +2209,7 @@ static const struct address_space_operations fuse_file_aops = { > .readpages = fuse_readpages, > .set_page_dirty = __set_page_dirty_nobuffers, > .bmap = fuse_bmap, > + .direct_IO = fuse_direct_IO, > }; > > void fuse_init_file_inode(struct inode *inode) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html