The patch titled Subject: fs/proc/page.c: allow inspection of last section and fix end detection has been added to the -mm tree. Its filename is fs-proc-pagec-allow-inspection-of-last-section-and-fix-end-detection.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/fs-proc-pagec-allow-inspection-of-last-section-and-fix-end-detection.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/fs-proc-pagec-allow-inspection-of-last-section-and-fix-end-detection.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: David Hildenbrand <david@xxxxxxxxxx> Subject: fs/proc/page.c: allow inspection of last section and fix end detection If max_pfn does not fall onto a section boundary, it is possible to inspect PFNs up to max_pfn, and PFNs above max_pfn, however, max_pfn itself can't be inspected. We can have a valid (and online) memmap at and above max_pfn if max_pfn is not aligned to a section boundary. The whole early section has a memmap and is marked online. Being able to inspect the state of these PFNs is valuable for debugging, especially because max_pfn can change on memory hotplug and expose these memmaps. Also, querying page flags via "./page-types -r -a 0x144001," (tools/vm/page-types.c) inside a x86-64 guest with 4160MB under QEMU results in an (almost) endless loop in user space, because the end is not detected properly when starting after max_pfn. Instead, let's allow to inspect all pages in the highest section and return 0 directly if we try to access pages above that section. While at it, check the count before adjusting it, to avoid masking user errors. Link: http://lkml.kernel.org/r/20191211163201.17179-3-david@xxxxxxxxxx Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> Cc: Alexey Dobriyan <adobriyan@xxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> Cc: Bob Picco <bob.picco@xxxxxxxxxx> Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Steven Sistare <steven.sistare@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/proc/page.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) --- a/fs/proc/page.c~fs-proc-pagec-allow-inspection-of-last-section-and-fix-end-detection +++ a/fs/proc/page.c @@ -21,6 +21,21 @@ #define KPMMASK (KPMSIZE - 1) #define KPMBITS (KPMSIZE * BITS_PER_BYTE) +static inline unsigned long get_max_dump_pfn(void) +{ +#ifdef CONFIG_SPARSEMEM + /* + * The memmap of early sections is completely populated and marked + * online even if max_pfn does not fall on a section boundary - + * pfn_to_online_page() will succeed on all pages. Allow inspecting + * these memmaps. + */ + return round_up(max_pfn, PAGES_PER_SECTION); +#else + return max_pfn; +#endif +} + /* /proc/kpagecount - an array exposing page counts * * Each entry is a u64 representing the corresponding @@ -29,6 +44,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -37,9 +53,11 @@ static ssize_t kpagecount_read(struct fi u64 pcount; pfn = src / KPMSIZE; - count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { /* @@ -208,6 +226,7 @@ u64 stable_page_flags(struct page *page) static ssize_t kpageflags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -215,9 +234,11 @@ static ssize_t kpageflags_read(struct fi ssize_t ret = 0; pfn = src / KPMSIZE; - count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { /* @@ -253,6 +274,7 @@ static const struct file_operations proc static ssize_t kpagecgroup_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -261,9 +283,11 @@ static ssize_t kpagecgroup_read(struct f u64 ino; pfn = src / KPMSIZE; - count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { /* _ Patches currently in -mm which might be from david@xxxxxxxxxx are mm-fix-uninitialized-memmaps-on-a-partially-populated-last-section.patch fs-proc-pagec-allow-inspection-of-last-section-and-fix-end-detection.patch mm-initialize-memmap-of-unavailable-memory-directly.patch mm-memory_hotplug-shrink-zones-when-offlining-memory.patch mm-memory_hotplug-poison-memmap-in-remove_pfn_range_from_zone.patch mm-memory_hotplug-we-always-have-a-zone-in-find_smallestbiggest_section_pfn.patch mm-memory_hotplug-dont-check-for-all-holes-in-shrink_zone_span.patch mm-memory_hotplug-drop-local-variables-in-shrink_zone_span.patch mm-memory_hotplug-cleanup-__remove_pages.patch