[alternative-merged] psi-annotate-refault-stalls-from-io-submission.patch removed from -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: psi: annotate refault stalls from IO submission
has been removed from the -mm tree.  Its filename was
     psi-annotate-refault-stalls-from-io-submission.patch

This patch was dropped because an alternative patch was merged

------------------------------------------------------
From: Johannes Weiner <hannes@xxxxxxxxxxx>
Subject: psi: annotate refault stalls from IO submission

psi tracks the time tasks wait for refaulting pages to become uptodate,
but it does not track the time spent submitting the IO.  The submission
part can be significant if backing storage is contended or when cgroup
throttling (io.latency) is in effect - a lot of time is spent in
submit_bio().  In that case, we underreport memory pressure.

The error scales with how aggressively IO is throttled compared to the
device's capability.

For example, we have system maintenance software throttled down pretty
hard on IO compared to the workload.  When memory is contended, the
system software starts thrashing cache, but since the backing device is
actually pretty fast, the majority of "io time" is from injected
throttling delays during submit_bio().

As a result we barely see memory pressure, when the reality is that
there is almost no progress due to the thrashing and we should be
killing misbehaving stuff.

Annotate the submit_bio() paths (or the indirection through readpage)
for refaults and swapin to get proper psi coverage of delays there.

Link: http://lkml.kernel.org/r/20190722201337.19180-1-hannes@xxxxxxxxxxx
Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Dave Chinner <david@xxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 fs/btrfs/extent_io.c |   14 ++++++++++++--
 fs/ext4/readpage.c   |    9 +++++++++
 fs/f2fs/data.c       |    8 ++++++++
 fs/mpage.c           |    9 +++++++++
 mm/filemap.c         |   20 ++++++++++++++++++++
 mm/page_io.c         |   11 ++++++++---
 mm/readahead.c       |   24 +++++++++++++++++++++++-
 7 files changed, 89 insertions(+), 6 deletions(-)

--- a/fs/btrfs/extent_io.c~psi-annotate-refault-stalls-from-io-submission
+++ a/fs/btrfs/extent_io.c
@@ -13,6 +13,7 @@
 #include <linux/pagevec.h>
 #include <linux/prefetch.h>
 #include <linux/cleancache.h>
+#include <linux/psi.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "ctree.h"
@@ -4265,6 +4266,9 @@ int extent_readpages(struct address_spac
 	struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
 	int nr = 0;
 	u64 prev_em_start = (u64)-1;
+	int ret = 0;
+	bool refault = false;
+	unsigned long pflags;
 
 	while (!list_empty(pages)) {
 		u64 contig_end = 0;
@@ -4279,6 +4283,10 @@ int extent_readpages(struct address_spac
 				put_page(page);
 				break;
 			}
+			if (PageWorkingset(page) && !refault) {
+				psi_memstall_enter(&pflags);
+				refault = true;
+			}
 
 			pagepool[nr++] = page;
 			contig_end = page_offset(page) + PAGE_SIZE - 1;
@@ -4299,8 +4307,10 @@ int extent_readpages(struct address_spac
 		free_extent_map(em_cached);
 
 	if (bio)
-		return submit_one_bio(bio, 0, bio_flags);
-	return 0;
+		ret = submit_one_bio(bio, 0, bio_flags);
+	if (refault)
+		psi_memstall_leave(&pflags);
+	return ret;
 }
 
 /*
--- a/fs/ext4/readpage.c~psi-annotate-refault-stalls-from-io-submission
+++ a/fs/ext4/readpage.c
@@ -44,6 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 #include <linux/cleancache.h>
+#include <linux/psi.h>
 
 #include "ext4.h"
 
@@ -116,6 +117,8 @@ int ext4_mpage_readpages(struct address_
 	int length;
 	unsigned relative_block = 0;
 	struct ext4_map_blocks map;
+	bool refault = false;
+	unsigned long pflags;
 
 	map.m_pblk = 0;
 	map.m_lblk = 0;
@@ -134,6 +137,10 @@ int ext4_mpage_readpages(struct address_
 			if (add_to_page_cache_lru(page, mapping, page->index,
 				  readahead_gfp_mask(mapping)))
 				goto next_page;
+			if (PageWorkingset(page) && !refault) {
+				psi_memstall_enter(&pflags);
+				refault = true;
+			}
 		}
 
 		if (page_has_buffers(page))
@@ -291,5 +298,7 @@ int ext4_mpage_readpages(struct address_
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
 		submit_bio(bio);
+	if (refault)
+		psi_memstall_leave(&pflags);
 	return 0;
 }
--- a/fs/f2fs/data.c~psi-annotate-refault-stalls-from-io-submission
+++ a/fs/f2fs/data.c
@@ -1699,6 +1699,8 @@ static int f2fs_mpage_readpages(struct a
 	sector_t last_block_in_bio = 0;
 	struct inode *inode = mapping->host;
 	struct f2fs_map_blocks map;
+	bool refault = false;
+	unsigned long pflags;
 	int ret = 0;
 
 	map.m_pblk = 0;
@@ -1720,6 +1722,10 @@ static int f2fs_mpage_readpages(struct a
 						  page_index(page),
 						  readahead_gfp_mask(mapping)))
 				goto next_page;
+			if (PageWorkingset(page) && !refault) {
+				psi_memstall_enter(&pflags);
+				refault = true;
+			}
 		}
 
 		ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
@@ -1736,6 +1742,8 @@ next_page:
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
 		__submit_bio(F2FS_I_SB(inode), bio, DATA);
+	if (refault)
+		psi_memstall_leave(&pflags);
 	return pages ? 0 : ret;
 }
 
--- a/fs/mpage.c~psi-annotate-refault-stalls-from-io-submission
+++ a/fs/mpage.c
@@ -30,6 +30,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 #include <linux/cleancache.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 /*
@@ -389,6 +390,8 @@ mpage_readpages(struct address_space *ma
 		.get_block = get_block,
 		.is_readahead = true,
 	};
+	bool refault = false;
+	unsigned long pflags;
 	unsigned page_idx;
 
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -404,10 +407,16 @@ mpage_readpages(struct address_space *ma
 			args.bio = do_mpage_readpage(&args);
 		}
 		put_page(page);
+		if (PageWorkingset(page) && !refault) {
+			psi_memstall_enter(&pflags);
+			refault = true;
+		}
 	}
 	BUG_ON(!list_empty(pages));
 	if (args.bio)
 		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
+	if (refault)
+		psi_memstall_leave(&pflags);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
--- a/mm/filemap.c~psi-annotate-refault-stalls-from-io-submission
+++ a/mm/filemap.c
@@ -2035,6 +2035,8 @@ static ssize_t generic_file_buffered_rea
 		pgoff_t end_index;
 		loff_t isize;
 		unsigned long nr, ret;
+		unsigned long pflags;
+		bool refault;
 
 		cond_resched();
 find_page:
@@ -2183,9 +2185,17 @@ readpage:
 		 * PG_error will be set again if readpage fails.
 		 */
 		ClearPageError(page);
+
+		refault = PageWorkingset(page);
+		if (refault)
+			psi_memstall_enter(&pflags);
+
 		/* Start the actual read. The read will unlock the page. */
 		error = mapping->a_ops->readpage(filp, page);
 
+		if (refault)
+			psi_memstall_leave(&pflags);
+
 		if (unlikely(error)) {
 			if (error == AOP_TRUNCATED_PAGE) {
 				put_page(page);
@@ -2779,11 +2789,14 @@ static struct page *do_read_cache_page(s
 				void *data,
 				gfp_t gfp)
 {
+	bool refault = false;
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
+		unsigned long pflags;
+
 		page = __page_cache_alloc(gfp);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
@@ -2796,12 +2809,19 @@ repeat:
 			return ERR_PTR(err);
 		}
 
+		refault = PageWorkingset(page);
 filler:
+		if (refault)
+			psi_memstall_enter(&pflags);
+
 		if (filler)
 			err = filler(data, page);
 		else
 			err = mapping->a_ops->readpage(data, page);
 
+		if (refault)
+			psi_memstall_leave(&pflags);
+
 		if (err < 0) {
 			put_page(page);
 			return ERR_PTR(err);
--- a/mm/page_io.c~psi-annotate-refault-stalls-from-io-submission
+++ a/mm/page_io.c
@@ -24,6 +24,7 @@
 #include <linux/blkdev.h>
 #include <linux/uio.h>
 #include <linux/sched/task.h>
+#include <linux/psi.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -354,10 +355,14 @@ int swap_readpage(struct page *page, boo
 	struct swap_info_struct *sis = page_swap_info(page);
 	blk_qc_t qc;
 	struct gendisk *disk;
+	unsigned long pflags;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageUptodate(page), page);
+
+	psi_memstall_enter(&pflags);
+
 	if (frontswap_load(page) == 0) {
 		SetPageUptodate(page);
 		unlock_page(page);
@@ -371,7 +376,7 @@ int swap_readpage(struct page *page, boo
 		ret = mapping->a_ops->readpage(swap_file, page);
 		if (!ret)
 			count_vm_event(PSWPIN);
-		return ret;
+		goto out;
 	}
 
 	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
@@ -382,7 +387,7 @@ int swap_readpage(struct page *page, boo
 		}
 
 		count_vm_event(PSWPIN);
-		return 0;
+		goto out;
 	}
 
 	ret = 0;
@@ -416,8 +421,8 @@ int swap_readpage(struct page *page, boo
 	}
 	__set_current_state(TASK_RUNNING);
 	bio_put(bio);
-
 out:
+	psi_memstall_leave(&pflags);
 	return ret;
 }
 
--- a/mm/readahead.c~psi-annotate-refault-stalls-from-io-submission
+++ a/mm/readahead.c
@@ -22,6 +22,7 @@
 #include <linux/mm_inline.h>
 #include <linux/blk-cgroup.h>
 #include <linux/fadvise.h>
+#include <linux/psi.h>
 
 #include "internal.h"
 
@@ -92,6 +93,9 @@ int read_cache_pages(struct address_spac
 	int ret = 0;
 
 	while (!list_empty(pages)) {
+		unsigned long pflags;
+		bool refault;
+
 		page = lru_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping, page->index,
@@ -101,7 +105,15 @@ int read_cache_pages(struct address_spac
 		}
 		put_page(page);
 
+		refault = PageWorkingset(page);
+		if (refault)
+			psi_memstall_enter(&pflags);
+
 		ret = filler(data, page);
+
+		if (refault)
+			psi_memstall_leave(&pflags);
+
 		if (unlikely(ret)) {
 			read_cache_pages_invalidate_pages(mapping, pages);
 			break;
@@ -132,8 +144,18 @@ static int read_pages(struct address_spa
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = lru_to_page(pages);
 		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
+		if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			bool refault = PageWorkingset(page);
+			unsigned long pflags;
+
+			if (refault)
+				psi_memstall_enter(&pflags);
+
 			mapping->a_ops->readpage(filp, page);
+
+			if (refault)
+				psi_memstall_leave(&pflags);
+		}
 		put_page(page);
 	}
 	ret = 0;
_

Patches currently in -mm which might be from hannes@xxxxxxxxxxx are

block-annotate-refault-stalls-from-io-submission.patch




[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux