The patch titled Subject: mm: handle THP in swap_*page_fs() has been added to the -mm tree. Its filename is mm-handle-thp-in-swap_page_fs.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/mm-handle-thp-in-swap_page_fs.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/mm-handle-thp-in-swap_page_fs.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: NeilBrown <neilb@xxxxxxx> Subject: mm: handle THP in swap_*page_fs() Pages passed to swap_readpage()/swap_writepage() are not necessarily all the same size - there may be transparent-huge-pages involves. The BIO paths of swap_*page() handle this correctly, but the SWP_FS_OPS path does not. So we need to use thp_size() to find the size, not just assume PAGE_SIZE, and we need to track the total length of the request, not just assume it is "page * PAGE_SIZE". Link: https://lkml.kernel.org/r/165119301488.15698.9457662928942765453.stgit@noble.brown Signed-off-by: NeilBrown <neilb@xxxxxxx> Reported-by: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: David Howells <dhowells@xxxxxxxxxx> Cc: Geert Uytterhoeven <geert+renesas@xxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/page_io.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) --- a/mm/page_io.c~mm-handle-thp-in-swap_page_fs +++ a/mm/page_io.c @@ -239,6 +239,7 @@ struct swap_iocb { struct kiocb iocb; struct bio_vec bvec[SWAP_CLUSTER_MAX]; int pages; + int len; }; static mempool_t *sio_pool; @@ -261,7 +262,7 @@ static void sio_write_complete(struct ki struct page *page = sio->bvec[0].bv_page; int p; - if (ret != PAGE_SIZE * sio->pages) { + if (ret != sio->len) { /* * In the case of swap-over-nfs, this can be a * temporary failure if the system has limited @@ -301,7 +302,7 @@ static int swap_writepage_fs(struct page sio = *wbc->swap_plug; if (sio) { if (sio->iocb.ki_filp != swap_file || - sio->iocb.ki_pos + sio->pages * PAGE_SIZE != pos) { + sio->iocb.ki_pos + sio->len != pos) { swap_write_unplug(sio); sio = NULL; } @@ -312,10 +313,12 @@ static int swap_writepage_fs(struct page sio->iocb.ki_complete = sio_write_complete; sio->iocb.ki_pos = pos; sio->pages = 0; + sio->len = 0; } sio->bvec[sio->pages].bv_page = page; - sio->bvec[sio->pages].bv_len = PAGE_SIZE; + sio->bvec[sio->pages].bv_len = thp_size(page); sio->bvec[sio->pages].bv_offset = 0; + sio->len += thp_size(page); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) { swap_write_unplug(sio); @@ -371,8 +374,7 @@ void swap_write_unplug(struct swap_iocb struct address_space *mapping = sio->iocb.ki_filp->f_mapping; int ret; - iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages, - PAGE_SIZE * sio->pages); + iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages, sio->len); ret = mapping->a_ops->swap_rw(&sio->iocb, &from); if (ret != -EIOCBQUEUED) sio_write_complete(&sio->iocb, ret); @@ -383,7 +385,7 @@ static void sio_read_complete(struct kio struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); int p; - if (ret == PAGE_SIZE * sio->pages) { + if (ret == sio->len) { for (p = 0; p < sio->pages; p++) { struct page *page = sio->bvec[p].bv_page; @@ -415,7 +417,7 @@ static void swap_readpage_fs(struct page sio = *plug; if (sio) { if (sio->iocb.ki_filp != sis->swap_file || - sio->iocb.ki_pos + sio->pages * PAGE_SIZE != pos) { + sio->iocb.ki_pos + sio->len != pos) { swap_read_unplug(sio); sio = NULL; } @@ -426,10 +428,12 @@ static void swap_readpage_fs(struct page sio->iocb.ki_pos = pos; sio->iocb.ki_complete = sio_read_complete; sio->pages = 0; + sio->len = 0; } sio->bvec[sio->pages].bv_page = page; - sio->bvec[sio->pages].bv_len = PAGE_SIZE; + sio->bvec[sio->pages].bv_len = thp_size(page); sio->bvec[sio->pages].bv_offset = 0; + sio->len += thp_size(page); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) { swap_read_unplug(sio); @@ -521,8 +525,7 @@ void __swap_read_unplug(struct swap_iocb struct address_space *mapping = sio->iocb.ki_filp->f_mapping; int ret; - iov_iter_bvec(&from, READ, sio->bvec, sio->pages, - PAGE_SIZE * sio->pages); + iov_iter_bvec(&from, READ, sio->bvec, sio->pages, sio->len); ret = mapping->a_ops->swap_rw(&sio->iocb, &from); if (ret != -EIOCBQUEUED) sio_read_complete(&sio->iocb, ret); _ Patches currently in -mm which might be from neilb@xxxxxxx are mm-create-new-mm-swaph-header-file.patch mm-drop-swap_dirty_folio.patch mm-move-responsibility-for-setting-swp_fs_ops-to-swap_activate.patch mm-reclaim-mustnt-enter-fs-for-swp_fs_ops-swap-space.patch mm-introduce-swap_rw-and-use-it-for-reads-from-swp_fs_ops-swap-space.patch nfs-rename-nfs_direct_io-and-use-as-swap_rw.patch mm-perform-async-writes-to-swp_fs_ops-swap-space-using-swap_rw.patch doc-update-documentation-for-swap_activate-and-swap_rw.patch mm-submit-multipage-reads-for-swp_fs_ops-swap-space.patch mm-submit-multipage-write-for-swp_fs_ops-swap-space.patch mm-handle-thp-in-swap_page_fs.patch vfs-add-fmode_can_odirect-file-flag.patch mm-discard-__gfp_atomic.patch