The patch titled Subject: psi: annotate refault stalls from IO submission has been added to the -mm tree. Its filename is psi-annotate-refault-stalls-from-io-submission.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/psi-annotate-refault-stalls-from-io-submission.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/psi-annotate-refault-stalls-from-io-submission.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Johannes Weiner <hannes@xxxxxxxxxxx> Subject: psi: annotate refault stalls from IO submission psi tracks the time tasks wait for refaulting pages to become uptodate, but it does not track the time spent submitting the IO. The submission part can be significant if backing storage is contended or when cgroup throttling (io.latency) is in effect - a lot of time is spent in submit_bio(). In that case, we underreport memory pressure. Annotate the submit_bio() paths (or the indirection through readpage) for refaults and swapin to get proper psi coverage of delays there. Link: http://lkml.kernel.org/r/20190722201337.19180-1-hannes@xxxxxxxxxxx Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/btrfs/extent_io.c | 14 ++++++++++++-- fs/ext4/readpage.c | 9 +++++++++ fs/f2fs/data.c | 8 ++++++++ fs/mpage.c | 9 +++++++++ mm/filemap.c | 20 ++++++++++++++++++++ mm/page_io.c | 11 ++++++++--- mm/readahead.c | 24 +++++++++++++++++++++++- 7 files changed, 89 insertions(+), 6 deletions(-) --- a/fs/btrfs/extent_io.c~psi-annotate-refault-stalls-from-io-submission +++ a/fs/btrfs/extent_io.c @@ -13,6 +13,7 @@ #include <linux/pagevec.h> #include <linux/prefetch.h> #include <linux/cleancache.h> +#include <linux/psi.h> #include "extent_io.h" #include "extent_map.h" #include "ctree.h" @@ -4265,6 +4266,9 @@ int extent_readpages(struct address_spac struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; int nr = 0; u64 prev_em_start = (u64)-1; + int ret = 0; + bool refault = false; + unsigned long pflags; while (!list_empty(pages)) { u64 contig_end = 0; @@ -4279,6 +4283,10 @@ int extent_readpages(struct address_spac put_page(page); break; } + if (PageWorkingset(page) && !refault) { + psi_memstall_enter(&pflags); + refault = true; + } pagepool[nr++] = page; contig_end = page_offset(page) + PAGE_SIZE - 1; @@ -4299,8 +4307,10 @@ int extent_readpages(struct address_spac free_extent_map(em_cached); if (bio) - return submit_one_bio(bio, 0, bio_flags); - return 0; + ret = submit_one_bio(bio, 0, bio_flags); + if (refault) + psi_memstall_leave(&pflags); + return ret; } /* --- a/fs/ext4/readpage.c~psi-annotate-refault-stalls-from-io-submission +++ a/fs/ext4/readpage.c @@ -44,6 +44,7 @@ #include <linux/backing-dev.h> #include <linux/pagevec.h> #include <linux/cleancache.h> +#include <linux/psi.h> #include "ext4.h" @@ -116,6 +117,8 @@ int ext4_mpage_readpages(struct address_ int length; unsigned relative_block = 0; struct ext4_map_blocks map; + bool refault = false; + unsigned long pflags; map.m_pblk = 0; map.m_lblk = 0; @@ -134,6 +137,10 @@ int ext4_mpage_readpages(struct address_ if (add_to_page_cache_lru(page, mapping, page->index, readahead_gfp_mask(mapping))) goto next_page; + if (PageWorkingset(page) && !refault) { + psi_memstall_enter(&pflags); + refault = true; + } } if (page_has_buffers(page)) @@ -291,5 +298,7 @@ int ext4_mpage_readpages(struct address_ BUG_ON(pages && !list_empty(pages)); if (bio) submit_bio(bio); + if (refault) + psi_memstall_leave(&pflags); return 0; } --- a/fs/f2fs/data.c~psi-annotate-refault-stalls-from-io-submission +++ a/fs/f2fs/data.c @@ -1699,6 +1699,8 @@ static int f2fs_mpage_readpages(struct a sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; struct f2fs_map_blocks map; + bool refault = false; + unsigned long pflags; int ret = 0; map.m_pblk = 0; @@ -1720,6 +1722,10 @@ static int f2fs_mpage_readpages(struct a page_index(page), readahead_gfp_mask(mapping))) goto next_page; + if (PageWorkingset(page) && !refault) { + psi_memstall_enter(&pflags); + refault = true; + } } ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, @@ -1736,6 +1742,8 @@ next_page: BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); + if (refault) + psi_memstall_leave(&pflags); return pages ? 0 : ret; } --- a/fs/mpage.c~psi-annotate-refault-stalls-from-io-submission +++ a/fs/mpage.c @@ -30,6 +30,7 @@ #include <linux/backing-dev.h> #include <linux/pagevec.h> #include <linux/cleancache.h> +#include <linux/psi.h> #include "internal.h" /* @@ -389,6 +390,8 @@ mpage_readpages(struct address_space *ma .get_block = get_block, .is_readahead = true, }; + bool refault = false; + unsigned long pflags; unsigned page_idx; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -404,10 +407,16 @@ mpage_readpages(struct address_space *ma args.bio = do_mpage_readpage(&args); } put_page(page); + if (PageWorkingset(page) && !refault) { + psi_memstall_enter(&pflags); + refault = true; + } } BUG_ON(!list_empty(pages)); if (args.bio) mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); + if (refault) + psi_memstall_leave(&pflags); return 0; } EXPORT_SYMBOL(mpage_readpages); --- a/mm/filemap.c~psi-annotate-refault-stalls-from-io-submission +++ a/mm/filemap.c @@ -2056,6 +2056,8 @@ static ssize_t generic_file_buffered_rea pgoff_t end_index; loff_t isize; unsigned long nr, ret; + unsigned long pflags; + bool refault; cond_resched(); find_page: @@ -2204,9 +2206,17 @@ readpage: * PG_error will be set again if readpage fails. */ ClearPageError(page); + + refault = PageWorkingset(page); + if (refault) + psi_memstall_enter(&pflags); + /* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); + if (refault) + psi_memstall_leave(&pflags); + if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { put_page(page); @@ -2805,11 +2815,14 @@ static struct page *do_read_cache_page(s void *data, gfp_t gfp) { + bool refault = false; struct page *page; int err; repeat: page = find_get_page(mapping, index); if (!page) { + unsigned long pflags; + page = __page_cache_alloc(gfp); if (!page) return ERR_PTR(-ENOMEM); @@ -2822,12 +2835,19 @@ repeat: return ERR_PTR(err); } + refault = PageWorkingset(page); filler: + if (refault) + psi_memstall_enter(&pflags); + if (filler) err = filler(data, page); else err = mapping->a_ops->readpage(data, page); + if (refault) + psi_memstall_leave(&pflags); + if (err < 0) { put_page(page); return ERR_PTR(err); --- a/mm/page_io.c~psi-annotate-refault-stalls-from-io-submission +++ a/mm/page_io.c @@ -24,6 +24,7 @@ #include <linux/blkdev.h> #include <linux/uio.h> #include <linux/sched/task.h> +#include <linux/psi.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -354,10 +355,14 @@ int swap_readpage(struct page *page, boo struct swap_info_struct *sis = page_swap_info(page); blk_qc_t qc; struct gendisk *disk; + unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); + + psi_memstall_enter(&pflags); + if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); @@ -371,7 +376,7 @@ int swap_readpage(struct page *page, boo ret = mapping->a_ops->readpage(swap_file, page); if (!ret) count_vm_event(PSWPIN); - return ret; + goto out; } ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); @@ -382,7 +387,7 @@ int swap_readpage(struct page *page, boo } count_vm_event(PSWPIN); - return 0; + goto out; } ret = 0; @@ -416,8 +421,8 @@ int swap_readpage(struct page *page, boo } __set_current_state(TASK_RUNNING); bio_put(bio); - out: + psi_memstall_leave(&pflags); return ret; } --- a/mm/readahead.c~psi-annotate-refault-stalls-from-io-submission +++ a/mm/readahead.c @@ -22,6 +22,7 @@ #include <linux/mm_inline.h> #include <linux/blk-cgroup.h> #include <linux/fadvise.h> +#include <linux/psi.h> #include "internal.h" @@ -92,6 +93,9 @@ int read_cache_pages(struct address_spac int ret = 0; while (!list_empty(pages)) { + unsigned long pflags; + bool refault; + page = lru_to_page(pages); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, @@ -101,7 +105,15 @@ int read_cache_pages(struct address_spac } put_page(page); + refault = PageWorkingset(page); + if (refault) + psi_memstall_enter(&pflags); + ret = filler(data, page); + + if (refault) + psi_memstall_leave(&pflags); + if (unlikely(ret)) { read_cache_pages_invalidate_pages(mapping, pages); break; @@ -132,8 +144,18 @@ static int read_pages(struct address_spa for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = lru_to_page(pages); list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) + if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) { + bool refault = PageWorkingset(page); + unsigned long pflags; + + if (refault) + psi_memstall_enter(&pflags); + mapping->a_ops->readpage(filp, page); + + if (refault) + psi_memstall_leave(&pflags); + } put_page(page); } ret = 0; _ Patches currently in -mm which might be from hannes@xxxxxxxxxxx are psi-annotate-refault-stalls-from-io-submission.patch