> Am 03.09.2020 um 23:58 schrieb Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>: > > On Wed, 19 Aug 2020 19:59:50 +0200 David Hildenbrand <david@xxxxxxxxxx> wrote: > >> We make sure that we cannot have any memory holes right at the beginning >> of offline_pages(). We no longer need walk_system_ram_range() and can >> call test_pages_isolated() and __offline_isolated_pages() directly. >> >> offlined_pages always corresponds to nr_pages, so we can simplify that. > > This patch ran afoul of Pavel's "mm/memory_hotplug: drain per-cpu pages > again during memory offline", here: > >> @@ -1481,7 +1459,7 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, >> int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) >> { >> const unsigned long end_pfn = start_pfn + nr_pages; >> - unsigned long pfn, system_ram_pages = 0, offlined_pages = 0; >> + unsigned long pfn, system_ram_pages = 0; >> int ret, node, nr_isolate_pageblock; >> unsigned long flags; >> struct zone *zone; >> @@ -1579,16 +1557,12 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) >> reason = "failure to dissolve huge pages"; >> goto failed_removal_isolated; >> } >> - /* check again */ >> - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, >> - NULL, check_pages_isolated_cb); >> - } while (ret); >> - >> - /* Ok, all of our target is isolated. >> - We cannot do rollback at this point. */ >> - walk_system_ram_range(start_pfn, end_pfn - start_pfn, >> - &offlined_pages, offline_isolated_pages_cb); >> - pr_info("Offlined Pages %ld\n", offlined_pages); >> + } while (test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE)); >> + >> + /* Mark all sections offline and remove free pages from the buddy. */ >> + __offline_isolated_pages(start_pfn, end_pfn); >> + pr_info("Offlined Pages %ld\n", nr_pages); >> + >> /* >> * Onlining will reset pagetype flags and makes migrate type > > I did this. Looks OK? > Reading on my smartphone, it looks like you squashed both patches? > From: David Hildenbrand <david@xxxxxxxxxx> > Subject: mm/memory_hotplug: simplify page offlining > > We make sure that we cannot have any memory holes right at the beginning > of offline_pages(). We no longer need walk_system_ram_range() and can > call test_pages_isolated() and __offline_isolated_pages() directly. > > offlined_pages always corresponds to nr_pages, so we can simplify that. > > Link: https://lkml.kernel.org/r/20200819175957.28465-4-david@xxxxxxxxxx > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> > Acked-by: Michal Hocko <mhocko@xxxxxxxx> > Reviewed-by: Oscar Salvador <osalvador@xxxxxxx> > Cc: Wei Yang <richard.weiyang@xxxxxxxxxxxxxxxxx> > Cc: Baoquan He <bhe@xxxxxxxxxx> > Cc: Pankaj Gupta <pankaj.gupta.linux@xxxxxxxxx> > Cc: Charan Teja Reddy <charante@xxxxxxxxxxxxxx> > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx> > Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> > Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > Cc: Michel Lespinasse <walken@xxxxxxxxxx> > Cc: Mike Rapoport <rppt@xxxxxxxxxx> > Cc: Tony Luck <tony.luck@xxxxxxxxx> > Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > --- > > mm/memory_hotplug.c | 61 +++++++++++++++++------------------------- > 1 file changed, 25 insertions(+), 36 deletions(-) > > --- a/mm/memory_hotplug.c~mm-memory_hotplug-simplify-page-offlining > +++ a/mm/memory_hotplug.c > @@ -1383,28 +1383,6 @@ do_migrate_range(unsigned long start_pfn > return ret; > } > > -/* Mark all sections offline and remove all free pages from the buddy. */ > -static int > -offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, > - void *data) > -{ > - unsigned long *offlined_pages = (unsigned long *)data; > - > - *offlined_pages += __offline_isolated_pages(start, start + nr_pages); > - return 0; > -} > - > -/* > - * Check all pages in range, recorded as memory resource, are isolated. > - */ > -static int > -check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, > - void *data) > -{ > - return test_pages_isolated(start_pfn, start_pfn + nr_pages, > - MEMORY_OFFLINE); > -} > - > static int __init cmdline_parse_movable_node(char *p) > { > movable_node_enabled = true; > @@ -1491,7 +1469,7 @@ static int count_system_ram_pages_cb(uns > int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) > { > const unsigned long end_pfn = start_pfn + nr_pages; > - unsigned long pfn, system_ram_pages = 0, offlined_pages = 0; > + unsigned long pfn, system_ram_pages = 0; > int ret, node, nr_isolate_pageblock; > unsigned long flags; > struct zone *zone; > @@ -1589,16 +1567,27 @@ int __ref offline_pages(unsigned long st > reason = "failure to dissolve huge pages"; > goto failed_removal_isolated; > } > - /* check again */ > - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, > - NULL, check_pages_isolated_cb); > - } while (ret); > - > - /* Ok, all of our target is isolated. > - We cannot do rollback at this point. */ > - walk_system_ram_range(start_pfn, end_pfn - start_pfn, > - &offlined_pages, offline_isolated_pages_cb); > - pr_info("Offlined Pages %ld\n", offlined_pages); > + > + /* > + * per-cpu pages are drained in start_isolate_page_range, but if > + * there are still pages that are not free, make sure that we > + * drain again, because when we isolated range we might > + * have raced with another thread that was adding pages to pcp > + * list. > + * > + * Forward progress should be still guaranteed because > + * pages on the pcp list can only belong to MOVABLE_ZONE > + * because has_unmovable_pages explicitly checks for > + * PageBuddy on freed pages on other zones. > + */ > + if (ret) > + drain_all_pages(zone); > + } while (test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE)); I think we have to do ret = test_pages_isolated() if (ret) ... } while (ret); So keeping the old code flow. I cannot resend before next Tuesday. > + > + /* Mark all sections offline and remove free pages from the buddy. */ > + __offline_isolated_pages(start_pfn, end_pfn); > + pr_info("Offlined Pages %ld\n", nr_pages); > + > /* > * Onlining will reset pagetype flags and makes migrate type > * MOVABLE, so just need to decrease the number of isolated > @@ -1609,11 +1598,11 @@ int __ref offline_pages(unsigned long st > spin_unlock_irqrestore(&zone->lock, flags); > > /* removal success */ > - adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); > - zone->present_pages -= offlined_pages; > + adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); > + zone->present_pages -= nr_pages; > > pgdat_resize_lock(zone->zone_pgdat, &flags); > - zone->zone_pgdat->node_present_pages -= offlined_pages; > + zone->zone_pgdat->node_present_pages -= nr_pages; > pgdat_resize_unlock(zone->zone_pgdat, &flags); > > init_per_zone_wmark_min(); > _ >