Reading a sysfs "memoryN/valid_zones" file leads to the following oops when the first page of a range is not backed by struct page. show_valid_zones() assumes that 'start_pfn' is always valid for page_zone(). BUG: unable to handle kernel paging request at ffffea017a000000 IP: show_valid_zones+0x6f/0x160 This issue may happen on x86-64 systems with 64GiB or more memory since their memory block size is bumped up to 2GiB. [1] An example of such systems is desribed below. 0x3240000000 is only aligned by 1GiB and this memory block starts from 0x3200000000, which is not backed by struct page. BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable Since test_pages_in_a_zone() already checks holes, fix this issue by extending this function to return 'valid_start' and 'valid_end' for a given range. show_valid_zones() then proceeds with the valid range. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@xxxxxxx Signed-off-by: Toshi Kani <toshi.kani@xxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Zhang Zhen <zhenzhang.zhang@xxxxxxxxxx> Cc: Reza Arbab <arbab@xxxxxxxxxxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: <stable@xxxxxxxxxxxxxxx> [4.4+] Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> --- This patch backports commit a96dfddbcc to 4.4. This is 2/2 of patch-set. The 1/2 patch is commit deb88a2a19e that applies/has applied cleanly to 4.4. Link: http://www.spinics.net/lists/stable/msg158242.html --- drivers/base/memory.c | 11 +++++------ include/linux/memory_hotplug.h | 3 ++- mm/memory_hotplug.c | 20 +++++++++++++++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 213456c..3805279 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -388,30 +388,29 @@ static ssize_t show_valid_zones(struct device *dev, { struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn, end_pfn; + unsigned long valid_start, valid_end; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - struct page *first_page; struct zone *zone; start_pfn = section_nr_to_pfn(mem->start_section_nr); end_pfn = start_pfn + nr_pages; - first_page = pfn_to_page(start_pfn); /* The block contains more than one zone can not be offlined. */ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return sprintf(buf, "none\n"); - zone = page_zone(first_page); + zone = page_zone(pfn_to_page(valid_start)); if (zone_idx(zone) == ZONE_MOVABLE - 1) { /*The mem block is the last memoryblock of this zone.*/ - if (end_pfn == zone_end_pfn(zone)) + if (valid_end == zone_end_pfn(zone)) return sprintf(buf, "%s %s\n", zone->name, (zone + 1)->name); } if (zone_idx(zone) == ZONE_MOVABLE) { /*The mem block is the first memoryblock of ZONE_MOVABLE.*/ - if (start_pfn == zone->zone_start_pfn) + if (valid_start == zone->zone_start_pfn) return sprintf(buf, "%s %s\n", zone->name, (zone - 1)->name); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4340599..04f5b7a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long, int); -extern int test_pages_in_a_zone(unsigned long, unsigned long); +extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fb9fed4..dd74825 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1402,10 +1402,13 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) /* * Confirm all pages in a range [start, end) belong to the same zone. + * When true, return its valid [start, end). */ -int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) +int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end) { unsigned long pfn, sec_end_pfn; + unsigned long start, end; struct zone *zone = NULL; struct page *page; int i; @@ -1427,14 +1430,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; + if (!zone) + start = pfn + i; zone = page_zone(page); + end = pfn + MAX_ORDER_NR_PAGES; } } - if (zone) + if (zone) { + *valid_start = start; + *valid_end = end; return 1; - else + } else { return 0; + } } /* @@ -1752,6 +1761,7 @@ static int __ref __offline_pages(unsigned long start_pfn, long offlined_pages; int ret, drain, retry_max, node; unsigned long flags; + unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; @@ -1762,10 +1772,10 @@ static int __ref __offline_pages(unsigned long start_pfn, return -EINVAL; /* This makes hotplug much easier...and readable. we assume this for now. .*/ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return -EINVAL; - zone = page_zone(pfn_to_page(start_pfn)); + zone = page_zone(pfn_to_page(valid_start)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html