The patch titled Subject: mm/hwpoison: introduce per-memory_block hwpoison counter has been added to the -mm mm-unstable branch. Its filename is mm-hwpoison-introduce-per-memory_block-hwpoison-counter.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-hwpoison-introduce-per-memory_block-hwpoison-counter.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Subject: mm/hwpoison: introduce per-memory_block hwpoison counter Date: Wed, 21 Sep 2022 18:13:59 +0900 Currently PageHWPoison flag does not behave well when experiencing memory hotremove/hotplug. Any data field in struct page is unreliable when the associated memory is offlined, and the current mechanism can't tell whether a memory section is onlined because a new memory devices is installed or because previous failed offline operations are undone. Especially if there's a hwpoisoned memory, it's unclear what the best option is. So introduce a new mechanism to make struct memory_block remember that a memory block has hwpoisoned memory inside it. And make any online event fail if the onlined memory block contains hwpoison. struct memory_block is freed and reallocated over ACPI-based hotremove/hotplug, but not over sysfs-based hotremove/hotplug. So it's desirable to implement hwpoison counter on this struct. Note that clear_hwpoisoned_pages() is relocated to be called earlier than now, just before unregistering struct memory_block. Otherwise, the per-memory_block hwpoison counter is freed and we fail to adjust global hwpoison counter properly. Link: https://lkml.kernel.org/r/20220921091359.25889-5-naoya.horiguchi@xxxxxxxxx Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Jane Chu <jane.chu@xxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Muchun Song <songmuchun@xxxxxxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Yang Shi <shy828301@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- --- a/drivers/base/memory.c~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/drivers/base/memory.c @@ -183,6 +183,9 @@ static int memory_block_online(struct me struct zone *zone; int ret; + if (atomic_long_read(&mem->nr_hwpoison)) + return -EHWPOISON; + zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, start_pfn, nr_pages); @@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigne mem = find_memory_block_by_id(block_id); if (WARN_ON_ONCE(!mem)) continue; + clear_hwpoisoned_pages(atomic_long_read(&mem->nr_hwpoison)); unregister_memory_block_under_nodes(mem); remove_memory_block(mem); } @@ -1164,3 +1168,35 @@ int walk_dynamic_memory_groups(int nid, } return ret; } + +#ifdef CONFIG_MEMORY_FAILURE + +void memblk_nr_poison_inc(unsigned long pfn) +{ + const unsigned long block_id = pfn_to_block_id(pfn); + struct memory_block *mem = find_memory_block_by_id(block_id); + + if (mem) + atomic_long_inc(&mem->nr_hwpoison); +} + +void memblk_nr_poison_sub(unsigned long pfn, long i) +{ + const unsigned long block_id = pfn_to_block_id(pfn); + struct memory_block *mem = find_memory_block_by_id(block_id); + + if (mem) + atomic_long_sub(i, &mem->nr_hwpoison); +} + +unsigned long memblk_nr_poison(unsigned long pfn) +{ + const unsigned long block_id = pfn_to_block_id(pfn); + struct memory_block *mem = find_memory_block_by_id(block_id); + + if (mem) + return atomic_long_read(&mem->nr_hwpoison); + return 0; +} + +#endif --- a/include/linux/memory.h~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/include/linux/memory.h @@ -85,6 +85,9 @@ struct memory_block { unsigned long nr_vmemmap_pages; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ +#ifdef CONFIG_MEMORY_FAILURE + atomic_long_t nr_hwpoison; +#endif }; int arch_get_memory_phys_device(unsigned long start_pfn); --- a/include/linux/mm.h~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/include/linux/mm.h @@ -3280,6 +3280,10 @@ extern int soft_offline_page(unsigned lo #ifdef CONFIG_MEMORY_FAILURE extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); extern void num_poisoned_pages_inc(unsigned long pfn); +extern void memblk_nr_poison_inc(unsigned long pfn); +extern void memblk_nr_poison_sub(unsigned long pfn, long i); +extern unsigned long memblk_nr_poison(unsigned long pfn); +extern void clear_hwpoisoned_pages(long nr_poison); #else static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) { @@ -3289,6 +3293,10 @@ static inline int __get_huge_page_for_hw static inline void num_poisoned_pages_inc(unsigned long pfn) { } + +static inline void clear_hwpoisoned_pages(long nr_poison) +{ +} #endif #ifndef arch_memory_failure --- a/mm/internal.h~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/mm/internal.h @@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; -#ifdef CONFIG_MEMORY_FAILURE -void clear_hwpoisoned_pages(struct page *memmap, int nr_pages); -#else -static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) -{ -} -#endif - extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); --- a/mm/memory-failure.c~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/mm/memory-failure.c @@ -74,14 +74,17 @@ atomic_long_t num_poisoned_pages __read_ static bool hw_memory_failure __read_mostly = false; -static inline void num_poisoned_pages_inc(unsigned long pfn) +void num_poisoned_pages_inc(unsigned long pfn) { atomic_long_inc(&num_poisoned_pages); + memblk_nr_poison_inc(pfn); } static inline void num_poisoned_pages_sub(unsigned long pfn, long i) { atomic_long_sub(i, &num_poisoned_pages); + if (pfn != -1UL) + memblk_nr_poison_sub(pfn, i); } /* @@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); if (!ret || freeit) { + /* + * TODO: per-memory_block counter might break when the page + * size to be unpoisoned is larger than a memory_block. + */ num_poisoned_pages_sub(pfn, count); unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", page_to_pfn(p), &unpoison_rs); @@ -2618,25 +2625,7 @@ retry: return ret; } -void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +void clear_hwpoisoned_pages(long nr_poison) { - int i, total = 0; - - /* - * A further optimization is to have per section refcounted - * num_poisoned_pages. But that would need more space per memmap, so - * for now just do a quick global check to speed up this routine in the - * absence of bad pages. - */ - if (atomic_long_read(&num_poisoned_pages) == 0) - return; - - for (i = 0; i < nr_pages; i++) { - if (PageHWPoison(&memmap[i])) { - total++; - ClearPageHWPoison(&memmap[i]); - } - } - if (total) - num_poisoned_pages_sub(total); + num_poisoned_pages_sub(-1UL, nr_poison); } --- a/mm/sparse.c~mm-hwpoison-introduce-per-memory_block-hwpoison-counter +++ a/mm/sparse.c @@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_se unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) { - clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, - nr_pages - map_offset); section_deactivate(pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ _ Patches currently in -mm which might be from naoya.horiguchi@xxxxxxx are mm-huge_memory-use-pfn_to_online_page-in-split_huge_pages_all.patch mmhwpoisonhugetlbmemory_hotplug-hotremove-memory-section-with-hwpoisoned-hugepage.patch mm-hwpoison-move-definitions-of-num_poisoned_pages_-to-memory-failurec.patch mm-hwpoison-pass-pfn-to-num_poisoned_pages_.patch mm-hwpoison-introduce-per-memory_block-hwpoison-counter.patch