On Tue, Oct 29, 2019 at 06:45:12PM +0800, zhong jiang wrote: > On 2019/10/29 17:40, Michal Hocko wrote: > > On Tue 29-10-19 17:30:57, zhong jiang wrote: > >> On 2019/10/29 16:11, Michal Hocko wrote: > >>> [Cc Minchan] > > [...] > >>> Removing a long existing BUG_ON begs for a much better explanation. > >>> shrink_page_list is not a trivial piece of code but I _suspect_ that > >>> removing it should be ok for mapped pages at least (try_to_unmap) but I > >>> am not so sure how unmapped unevictable pages are handled from top of my > >>> head. > >> As to the unmapped unevictable pages. shrink_page_list has taken that into account. > >> > >> shinkr_page_list > >> page_evictable --> will filter the unevictable pages to putback its lru. > > Ohh, it is right there at the top. Missed it. The check has been added > > by Nick along with the BUG_ON. So it is sounds more like a "this > > shouldn't happen" bugon. I wouldn't mind to remove it with that > > justification. > As you has said, Minchan fix the same kind of bug by checking PageUnevictable (I did not notice before) > Wait for Minchan to see whether he has better reason. thanks, madvise_pageout could work with a shared page and one of the vmas among processes could do mlock so it could pass Unevictable LRU pages into shrink_page_list. It's pointless to try reclaim unevictable pages from the beginning so I want to fix madvise_pageout via introducing only_evictable flag into the API so that madvise_pageout uses it as "true". If we want to remove the PageUnevictable VM_BUG_ON_PAGE in shrink_page_list, I want to see more strong reason why it happens and why caller couldn't filter them out from the beginning. diff --git a/mm/gup.c b/mm/gup.c index 8f236a335ae9..d1ad1c3ec596 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1468,7 +1468,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, drain_allow = false; } - if (!isolate_lru_page(head)) { + if (!isolate_lru_page(head, false)) { list_add_tail(&head->lru, &cma_page_list); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + diff --git a/mm/internal.h b/mm/internal.h index 0d5f720c75ab..13319612bef0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -85,7 +85,7 @@ extern unsigned long highest_memmap_pfn; /* * in mm/vmscan.c: */ -extern int isolate_lru_page(struct page *page); +extern int isolate_lru_page(struct page *page, bool only_evictable); extern void putback_lru_page(struct page *page); /* diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0a1b4b484ac5..095560f7f8ec 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -609,7 +609,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * Isolate the page to avoid collapsing an hugepage * currently in use by the VM. */ - if (isolate_lru_page(page)) { + if (isolate_lru_page(page, false)) { unlock_page(page); result = SCAN_DEL_PAGE_LRU; goto out; @@ -1642,7 +1642,7 @@ static void collapse_file(struct mm_struct *mm, goto out_unlock; } - if (isolate_lru_page(page)) { + if (isolate_lru_page(page, false)) { result = SCAN_DEL_PAGE_LRU; goto out_unlock; } diff --git a/mm/madvise.c b/mm/madvise.c index 2be9f3fdb05e..2639de560a0b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -363,7 +363,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) + if (!isolate_lru_page(page, true)) list_add(&page->lru, &page_list); } else deactivate_page(page); @@ -441,7 +441,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) + if (!isolate_lru_page(page, true)) list_add(&page->lru, &page_list); } else deactivate_page(page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 363106578876..6d913215b074 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5847,7 +5847,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); if (target_type == MC_TARGET_PAGE) { page = target.page; - if (!isolate_lru_page(page)) { + if (!isolate_lru_page(page, false)) { if (!mem_cgroup_move_account(page, true, mc.from, mc.to)) { mc.precharge -= HPAGE_PMD_NR; @@ -5895,7 +5895,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, */ if (PageTransCompound(page)) goto put; - if (!device && isolate_lru_page(page)) + if (!device && isolate_lru_page(page, false)) goto put; if (!mem_cgroup_move_account(page, false, mc.from, mc.to)) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3151c87dff73..ef37c67a7bab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -567,7 +567,7 @@ static const char * const action_page_types[] = { */ static int delete_from_lru_cache(struct page *p) { - if (!isolate_lru_page(p)) { + if (!isolate_lru_page(p, false)) { /* * Clear sensible page flags, so that the buddy system won't * complain when the page is unpoison-and-freed. @@ -1782,7 +1782,7 @@ static int __soft_offline_page(struct page *page, int flags) * handles a large number of cases for us. */ if (PageLRU(page)) - ret = isolate_lru_page(page); + ret = isolate_lru_page(page, false); else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index df570e5c71cc..8ba483d3d8cd 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1314,7 +1314,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) */ if (PageHWPoison(page)) { if (WARN_ON(PageLRU(page))) - isolate_lru_page(page); + isolate_lru_page(page, false); if (page_mapped(page)) try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); continue; @@ -1327,7 +1327,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) * LRU and non-lru movable pages. */ if (PageLRU(page)) - ret = isolate_lru_page(page); + ret = isolate_lru_page(page, false); else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); if (!ret) { /* Success */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4ae967bcf954..585e5845f071 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -974,7 +974,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, * Avoid migrating a page that is shared with others. */ if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) { - if (!isolate_lru_page(head)) { + if (!isolate_lru_page(head, false)) { list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_cache(head), diff --git a/mm/migrate.c b/mm/migrate.c index 4fe45d1428c8..710e00317a8f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1563,7 +1563,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, struct page *head; head = compound_head(page); - err = isolate_lru_page(head); + err = isolate_lru_page(head, false); if (err) goto out_putpage; @@ -1895,7 +1895,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) if (!migrate_balanced_pgdat(pgdat, compound_nr(page))) return 0; - if (isolate_lru_page(page)) + if (isolate_lru_page(page, false)) return 0; /* @@ -2450,7 +2450,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) allow_drain = false; } - if (isolate_lru_page(page)) { + if (isolate_lru_page(page, false)) { if (remap) { migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; migrate->cpages--; diff --git a/mm/mlock.c b/mm/mlock.c index a72c1eeded77..307e340fe2e0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -70,7 +70,7 @@ void clear_page_mlock(struct page *page) * * See __pagevec_lru_add_fn for more explanation. */ - if (!isolate_lru_page(page)) { + if (!isolate_lru_page(page, false)) { putback_lru_page(page); } else { /* @@ -97,7 +97,7 @@ void mlock_vma_page(struct page *page) mod_zone_page_state(page_zone(page), NR_MLOCK, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); - if (!isolate_lru_page(page)) + if (!isolate_lru_page(page, false)) putback_lru_page(page); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index ee4eecc7e1c2..c44fb52c745f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1793,7 +1793,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * (2) the lru_lock must not be held. * (3) interrupts must be enabled. */ -int isolate_lru_page(struct page *page) +int isolate_lru_page(struct page *page, bool only_evictable) { int ret = -EBUSY; @@ -1805,6 +1805,8 @@ int isolate_lru_page(struct page *page) struct lruvec *lruvec; spin_lock_irq(&pgdat->lru_lock); + if (only_evictable && PageUnevictable(page)) + goto out; lruvec = mem_cgroup_page_lruvec(page, pgdat); if (PageLRU(page)) { int lru = page_lru(page); @@ -1813,6 +1815,7 @@ int isolate_lru_page(struct page *page) del_page_from_lru_list(page, lruvec, lru); ret = 0; } +out: spin_unlock_irq(&pgdat->lru_lock); } return ret;