Using ->direct_IO and ->readpage for the generic swap file infrastructure requires all sorts of nasty workarounds. ->readpage implementations don't play nicely with swap cache pages, and ->direct_IO implementations have different locking conventions for every filesystem. Instead, use ->read_iter/->write_iter with an ITER_BVEC and let the filesystem take care of it. This will also allow us to easily transition to kernel AIO if that gets merged in the future. Cc: Mel Gorman <mgorman@xxxxxxx> Signed-off-by: Omar Sandoval <osandov@xxxxxxxxxxx> --- mm/page_io.c | 30 +++++++++++++++++++++++------- mm/swapfile.c | 11 ++++++++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 532a39b..61165b0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -263,7 +263,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, if (sis->flags & SWP_FILE) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; - struct address_space *mapping = swap_file->f_mapping; struct iov_iter from; struct bio_vec bv = { .bv_page = page, @@ -279,9 +278,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, - &kiocb, &from, - kiocb.ki_pos); + ret = swap_file->f_op->write_iter(&kiocb, &from); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; @@ -344,12 +341,31 @@ int swap_readpage(struct page *page) } if (sis->flags & SWP_FILE) { + struct kiocb kiocb; struct file *swap_file = sis->swap_file; - struct address_space *mapping = swap_file->f_mapping; + struct iov_iter to; + struct bio_vec bv = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0, + }; + + iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE); + + init_sync_kiocb(&kiocb, swap_file); + kiocb.ki_pos = page_file_offset(page); + kiocb.ki_nbytes = PAGE_SIZE; - ret = mapping->a_ops->readpage(swap_file, page); - if (!ret) + ret = swap_file->f_op->read_iter(&kiocb, &to); + if (ret == PAGE_SIZE) { + SetPageUptodate(page); count_vm_event(PSWPIN); + ret = 0; + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); return ret; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 63f55cc..4e14122 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2379,7 +2379,16 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) name = NULL; goto bad_swap; } - swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0); + swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_DIRECT, 0); + if (swap_file == ERR_PTR(-EINVAL)) { + /* + * XXX: there are several filesystems that implement ->bmap but + * not ->direct_IO. It's unlikely that anyone is using a + * swapfile on, e.g., the MINIX fs, but this kludge will keep us + * from getting a complaint from the one person who does. + */ + swap_file = file_open_name(name, O_RDWR | O_LARGEFILE, 0); + } if (IS_ERR(swap_file)) { error = PTR_ERR(swap_file); swap_file = NULL; -- 2.2.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>