On Tue, Sep 15, 2020 at 05:22:22PM -0400, Aristeu Rozanski wrote: > Hi Oscar, Naoya, Hi Aristeu, thanks for reporting this. > I've run these tests using mmotm and mmotm with this patchset on top. Could you please re-run the tests with the below patch applied, and attached then the logs here? diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 84a7f228af36..d7b6e7724e47 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -67,6 +67,7 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release) { + dump_page(page, "page_handle_poison"); if (release) { put_page(page); drain_all_pages(page_zone(page)); @@ -77,7 +78,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo * Doing this check for free pages is also fine since dissolve_free_huge_page * returns 0 for non-hugetlb pages as well. */ - if (dissolve_free_huge_page(page) || !take_page_off_buddy(page)) + if (dissolve_free_huge_page(page) || !take_page_off_buddy(page)) { /* * We could fail to take off the target page from buddy * for example due to racy page allocaiton, but that's @@ -85,7 +86,9 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo * and if someone really want to use it, they should * take it. */ + pr_info("%s: hugepage_or_freepage failed¸n", __func__); return false; + } } SetPageHWPoison(page); @@ -1858,8 +1861,11 @@ static int __soft_offline_page(struct page *page) if (!ret) { bool release = !huge; - if (!page_handle_poison(page, true, release)) + if (!page_handle_poison(page, true, release)) { + pr_info("%s: page_handle_poison -EBUSY\n", __func__); + dump_page(page, "__soft_offline_page after migrate"); ret = -EBUSY; + } } else { if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); @@ -1872,6 +1878,7 @@ static int __soft_offline_page(struct page *page) } else { pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n", pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags); + dump_page(page, "__soft_offline_page isolation failed"); ret = -EBUSY; } return ret; @@ -1882,8 +1889,11 @@ static int soft_offline_in_use_page(struct page *page) struct page *hpage = compound_head(page); if (!PageHuge(page) && PageTransHuge(hpage)) - if (try_to_split_thp_page(page, "soft offline") < 0) + if (try_to_split_thp_page(page, "soft offline") < 0) { + pr_info("%s: try_to_split_thp_page -EBUSY\n", __func__); + dump_page(page, "try_to_split_thp_page"); return -EBUSY; + } return __soft_offline_page(page); } @@ -1891,8 +1901,11 @@ static int soft_offline_free_page(struct page *page) { int rc = 0; - if (!page_handle_poison(page, true, false)) + if (!page_handle_poison(page, true, false)) { + pr_info("%s: page_handle_poison -EBUSY\n", __func__); + dump_page(page, "soft_offline_free_page"); rc = -EBUSY; + } return rc; } Thanks -- Oscar Salvador SUSE L3