Direct I/O differs from buffered I/O in that it uses bio_iov_iter_get_pages for grabbing page references and for manually faulting in pages instead of triggering actual page faults. For disabling these manual page faults, it's not enough to call pagefault_disable(); instead, we use the new ITER_FLAG_FAST_ONLY flag for telling iomap_dio_rw to stop faulting pages in for us. Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx> --- fs/gfs2/file.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 99df7934b4d8..6feb857a8a1c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -763,21 +763,42 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t count = iov_iter_count(to); + size_t written = 0; ssize_t ret; + /* + * In this function, we disable page faults when whe're holding the + * inode glock while doing I/O. If a page fault occurs, we drop the + * inode glock, fault in the pages manually, and then we retry. Other + * than in gfs2_file_read_iter, iomap_dio_rw can trigger implicit as + * well as manual page faults, and we need to disable both kinds + * separately. + */ + if (!count) return 0; /* skip atime */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; + pagefault_disable(); + to->type |= ITER_FLAG_FAST_ONLY; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0); + to->type &= ~ITER_FLAG_FAST_ONLY; + pagefault_enable(); + gfs2_glock_dq(gh); + if (ret > 0) + written += ret; + if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) && + fault_in_iov_iter(to)) + goto retry; out_uninit: gfs2_holder_uninit(gh); - return ret; + return written ? written : ret; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -790,6 +811,12 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t offset = iocb->ki_pos; ssize_t ret; + /* + * In this function, we disable page faults when whe're holding the + * inode glock while doing I/O. If a page fault occurs, we drop the + * inode glock, fault in the pages manually, and then we retry. + */ + /* * Deferred lock, even if its a write, since we do no allocation on * this path. All we need to change is the atime, and this lock mode @@ -799,6 +826,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, * VFS does. */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; @@ -807,11 +835,16 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, if (offset + len > i_size_read(&ip->i_inode)) goto out; + from->type |= ITER_FLAG_FAST_ONLY; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0); + from->type &= ~ITER_FLAG_FAST_ONLY; + if (ret == -ENOTBLK) ret = 0; out: gfs2_glock_dq(gh); + if (unlikely(ret == -EFAULT) && fault_in_iov_iter(from)) + goto retry; out_uninit: gfs2_holder_uninit(gh); return ret; -- 2.26.3