From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> reclaim_stat's rotate is used for controlling the ratio of scanning page between file and anonymous LRU. All new anonymous pages are counted for rotate before the patch, protecting anonymous pages on active LRU, and, it makes that reclaim on anonymous LRU is less happened than file LRU. Now, situation is changed. all new anonymous pages are not added to the active LRU so rotate would be far less than before. It will cause that reclaim on anonymous LRU happens more and it would result in bad effect on some system that is optimized for previous setting. Therefore, this patch counts a new anonymous page as a reclaim_state's rotate. Although it is non-logical to add this count to the reclaim_state's rotate in current algorithm, reducing the regression would be more important. I found this regression on kernel-build test and it is roughly 2~5% performance degradation. With this workaround, performance is completely restored. v2: fix a bug that reuses the rotate value for previous page Reported-by: kernel test robot <lkp@xxxxxxxxx> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> --- mm/swap.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index 442d27e..1f19301 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -187,6 +187,9 @@ int get_kernel_page(unsigned long start, int write, struct page **pages) } EXPORT_SYMBOL_GPL(get_kernel_page); +static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, + void *arg); + static void pagevec_lru_move_fn(struct pagevec *pvec, void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), void *arg) @@ -199,6 +202,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); + void *arg_orig = arg; if (pagepgdat != pgdat) { if (pgdat) @@ -207,8 +211,22 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, spin_lock_irqsave(&pgdat->lru_lock, flags); } + if (move_fn == __pagevec_lru_add_fn) { + struct list_head *entry = &page->lru; + unsigned long next = (unsigned long)entry->next; + unsigned long rotate = next & 2; + + if (rotate) { + VM_BUG_ON(arg); + + next = next & ~2; + entry->next = (struct list_head *)next; + arg = (void *)rotate; + } + } lruvec = mem_cgroup_page_lruvec(page, pgdat); (*move_fn)(page, lruvec, arg); + arg = arg_orig; } if (pgdat) spin_unlock_irqrestore(&pgdat->lru_lock, flags); @@ -475,6 +493,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } + + if (PageSwapBacked(page) && !unevictable) { + struct list_head *entry = &page->lru; + unsigned long next = (unsigned long)entry->next; + + next = next | 2; + entry->next = (struct list_head *)next; + } lru_cache_add(page); } @@ -927,6 +953,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, { enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); + unsigned long rotate = (unsigned long)arg; VM_BUG_ON_PAGE(PageLRU(page), page); @@ -962,7 +989,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, if (page_evictable(page)) { lru = page_lru(page); update_page_reclaim_stat(lruvec, page_is_file_cache(page), - PageActive(page)); + PageActive(page) | rotate); if (was_unevictable) count_vm_event(UNEVICTABLE_PGRESCUED); } else { -- 2.7.4