From: Huang Ying <ying.huang@xxxxxxxxx> In the patch, the swap writing is enhanced to support to write a THP (Transparent Huge Page) as a whole. This is a part of the THP swap optimization and will improve swap write IO performance for the more large continuous IOs. Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Shaohua Li <shli@xxxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxx> --- include/linux/page-flags.h | 4 ++-- include/linux/vm_event_item.h | 1 + mm/page_io.c | 21 ++++++++++++++++----- mm/vmstat.c | 1 + 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d33e3280c8ad..ba2d470d2d0a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -303,8 +303,8 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ -TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND) - TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND) +TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) + TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index d84ae90ccd5c..5b5b0f094060 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -84,6 +84,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, + THP_SWPOUT, #endif #ifdef CONFIG_MEMORY_BALLOON BALLOON_INFLATE, diff --git a/mm/page_io.c b/mm/page_io.c index 23f6d0d3470f..ec5229fb3607 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -27,16 +27,18 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { + int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, 1); + bio = bio_alloc(gfp_flags, nr); if (bio) { bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - bio_add_page(bio, page, PAGE_SIZE, 0); - BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE); + for (i = 0; i < nr; i++) + bio_add_page(bio, page + i, PAGE_SIZE, 0); + VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); } return bio; } @@ -257,6 +259,15 @@ static sector_t swap_page_sector(struct page *page) return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); } +static inline void count_swpout_vm_event(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (unlikely(PageTransHuge(page))) + count_vm_event(THP_SWPOUT); +#endif + count_vm_events(PSWPOUT, hpage_nr_pages(page)); +} + int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func) { @@ -308,7 +319,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); if (!ret) { - count_vm_event(PSWPOUT); + count_swpout_vm_event(page); return 0; } @@ -321,7 +332,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, goto out; } bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); - count_vm_event(PSWPOUT); + count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); submit_bio(bio); diff --git a/mm/vmstat.c b/mm/vmstat.c index c432e581f9a9..ebfd79df1008 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1070,6 +1070,7 @@ const char * const vmstat_text[] = { #endif "thp_zero_page_alloc", "thp_zero_page_alloc_failed", + "thp_swpout", #endif #ifdef CONFIG_MEMORY_BALLOON "balloon_inflate", -- 2.11.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>