On Wed, 19 Aug 2020 19:59:50 +0200 David Hildenbrand <david@xxxxxxxxxx> wrote: > We make sure that we cannot have any memory holes right at the beginning > of offline_pages(). We no longer need walk_system_ram_range() and can > call test_pages_isolated() and __offline_isolated_pages() directly. > > offlined_pages always corresponds to nr_pages, so we can simplify that. This patch ran afoul of Pavel's "mm/memory_hotplug: drain per-cpu pages again during memory offline", here: > @@ -1481,7 +1459,7 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, > int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) > { > const unsigned long end_pfn = start_pfn + nr_pages; > - unsigned long pfn, system_ram_pages = 0, offlined_pages = 0; > + unsigned long pfn, system_ram_pages = 0; > int ret, node, nr_isolate_pageblock; > unsigned long flags; > struct zone *zone; > @@ -1579,16 +1557,12 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) > reason = "failure to dissolve huge pages"; > goto failed_removal_isolated; > } > - /* check again */ > - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, > - NULL, check_pages_isolated_cb); > - } while (ret); > - > - /* Ok, all of our target is isolated. > - We cannot do rollback at this point. */ > - walk_system_ram_range(start_pfn, end_pfn - start_pfn, > - &offlined_pages, offline_isolated_pages_cb); > - pr_info("Offlined Pages %ld\n", offlined_pages); > + } while (test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE)); > + > + /* Mark all sections offline and remove free pages from the buddy. */ > + __offline_isolated_pages(start_pfn, end_pfn); > + pr_info("Offlined Pages %ld\n", nr_pages); > + > /* > * Onlining will reset pagetype flags and makes migrate type I did this. Looks OK? From: David Hildenbrand <david@xxxxxxxxxx> Subject: mm/memory_hotplug: simplify page offlining We make sure that we cannot have any memory holes right at the beginning of offline_pages(). We no longer need walk_system_ram_range() and can call test_pages_isolated() and __offline_isolated_pages() directly. offlined_pages always corresponds to nr_pages, so we can simplify that. Link: https://lkml.kernel.org/r/20200819175957.28465-4-david@xxxxxxxxxx Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Reviewed-by: Oscar Salvador <osalvador@xxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxxxxxxxxxx> Cc: Baoquan He <bhe@xxxxxxxxxx> Cc: Pankaj Gupta <pankaj.gupta.linux@xxxxxxxxx> Cc: Charan Teja Reddy <charante@xxxxxxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx> Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Michel Lespinasse <walken@xxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxx> Cc: Tony Luck <tony.luck@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/memory_hotplug.c | 61 +++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) --- a/mm/memory_hotplug.c~mm-memory_hotplug-simplify-page-offlining +++ a/mm/memory_hotplug.c @@ -1383,28 +1383,6 @@ do_migrate_range(unsigned long start_pfn return ret; } -/* Mark all sections offline and remove all free pages from the buddy. */ -static int -offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, - void *data) -{ - unsigned long *offlined_pages = (unsigned long *)data; - - *offlined_pages += __offline_isolated_pages(start, start + nr_pages); - return 0; -} - -/* - * Check all pages in range, recorded as memory resource, are isolated. - */ -static int -check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, - void *data) -{ - return test_pages_isolated(start_pfn, start_pfn + nr_pages, - MEMORY_OFFLINE); -} - static int __init cmdline_parse_movable_node(char *p) { movable_node_enabled = true; @@ -1491,7 +1469,7 @@ static int count_system_ram_pages_cb(uns int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) { const unsigned long end_pfn = start_pfn + nr_pages; - unsigned long pfn, system_ram_pages = 0, offlined_pages = 0; + unsigned long pfn, system_ram_pages = 0; int ret, node, nr_isolate_pageblock; unsigned long flags; struct zone *zone; @@ -1589,16 +1567,27 @@ int __ref offline_pages(unsigned long st reason = "failure to dissolve huge pages"; goto failed_removal_isolated; } - /* check again */ - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, - NULL, check_pages_isolated_cb); - } while (ret); - - /* Ok, all of our target is isolated. - We cannot do rollback at this point. */ - walk_system_ram_range(start_pfn, end_pfn - start_pfn, - &offlined_pages, offline_isolated_pages_cb); - pr_info("Offlined Pages %ld\n", offlined_pages); + + /* + * per-cpu pages are drained in start_isolate_page_range, but if + * there are still pages that are not free, make sure that we + * drain again, because when we isolated range we might + * have raced with another thread that was adding pages to pcp + * list. + * + * Forward progress should be still guaranteed because + * pages on the pcp list can only belong to MOVABLE_ZONE + * because has_unmovable_pages explicitly checks for + * PageBuddy on freed pages on other zones. + */ + if (ret) + drain_all_pages(zone); + } while (test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE)); + + /* Mark all sections offline and remove free pages from the buddy. */ + __offline_isolated_pages(start_pfn, end_pfn); + pr_info("Offlined Pages %ld\n", nr_pages); + /* * Onlining will reset pagetype flags and makes migrate type * MOVABLE, so just need to decrease the number of isolated @@ -1609,11 +1598,11 @@ int __ref offline_pages(unsigned long st spin_unlock_irqrestore(&zone->lock, flags); /* removal success */ - adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); - zone->present_pages -= offlined_pages; + adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + zone->present_pages -= nr_pages; pgdat_resize_lock(zone->zone_pgdat, &flags); - zone->zone_pgdat->node_present_pages -= offlined_pages; + zone->zone_pgdat->node_present_pages -= nr_pages; pgdat_resize_unlock(zone->zone_pgdat, &flags); init_per_zone_wmark_min(); _