Call the ->memory_failure() which is implemented by pmem driver, in order to finally notify filesystem to handle the corrupted data. The old collecting and killing processes are moved into mf_dax_mapping_kill_procs(), which will be called by filesystem. Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxxxxx> --- drivers/nvdimm/pmem.c | 24 +++++++++++++++++++++ mm/memory-failure.c | 50 +++++-------------------------------------- 2 files changed, 29 insertions(+), 45 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 875076b0ea6c..4a114937c43b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -363,9 +363,33 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } +static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap, + unsigned long pfn, int flags) +{ + struct pmem_device *pdev; + struct gendisk *disk; + loff_t disk_offset; + int rc = 0; + unsigned long size = page_size(pfn_to_page(pfn)); + + pdev = container_of(pgmap, struct pmem_device, pgmap); + disk = pdev->disk; + if (!disk) + return -ENXIO; + + disk_offset = PFN_PHYS(pfn) - pdev->phys_addr - pdev->data_offset; + if (disk->fops->corrupted_range) { + rc = disk->fops->corrupted_range(disk, NULL, disk_offset, size, &flags); + if (rc == -ENODEV) + rc = -ENXIO; + } + return rc; +} + static const struct dev_pagemap_ops fsdax_pagemap_ops = { .kill = pmem_pagemap_kill, .cleanup = pmem_pagemap_cleanup, + .memory_failure = pmem_pagemap_memory_failure, }; static int pmem_attach_disk(struct device *dev, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 37bc6e2a9564..0109ad607fb8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1269,28 +1269,11 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) { struct page *page = pfn_to_page(pfn); - const bool unmap_success = true; - unsigned long size = 0; - struct to_kill *tk; - LIST_HEAD(to_kill); int rc = -EBUSY; - loff_t start; - dax_entry_t cookie; - - /* - * Prevent the inode from being freed while we are interrogating - * the address_space, typically this would be handled by - * lock_page(), but dax pages do not use the page lock. This - * also prevents changes to the mapping of this pfn until - * poison signaling is complete. - */ - cookie = dax_lock_page(page); - if (!cookie) - goto out; if (hwpoison_filter(page)) { rc = 0; - goto unlock; + goto out; } if (pgmap->type == MEMORY_DEVICE_PRIVATE) { @@ -1298,7 +1281,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, * TODO: Handle HMM pages which may need coordination * with device-side memory. */ - goto unlock; + goto out; } /* @@ -1307,33 +1290,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, */ SetPageHWPoison(page); - /* - * Unlike System-RAM there is no possibility to swap in a - * different physical page at a given virtual address, so all - * userspace consumption of ZONE_DEVICE memory necessitates - * SIGBUS (i.e. MF_MUST_KILL) - */ - flags |= MF_ACTION_REQUIRED | MF_MUST_KILL; - collect_procs_file(page, page->mapping, page->index, &to_kill, - flags & MF_ACTION_REQUIRED); + /* call driver to handle the memory failure */ + if (pgmap->ops->memory_failure) + rc = pgmap->ops->memory_failure(pgmap, pfn, flags); - list_for_each_entry(tk, &to_kill, nd) - if (tk->size_shift) - size = max(size, 1UL << tk->size_shift); - if (size) { - /* - * Unmap the largest mapping to avoid breaking up - * device-dax mappings which are constant size. The - * actual size of the mapping being torn down is - * communicated in siginfo, see kill_proc() - */ - start = (page->index << PAGE_SHIFT) & ~(size - 1); - unmap_mapping_range(page->mapping, start, start + size, 0); - } - kill_procs(&to_kill, flags & MF_MUST_KILL, !unmap_success, pfn, flags); - rc = 0; -unlock: - dax_unlock_page(page, cookie); out: /* drop pgmap ref acquired in caller */ put_dev_pagemap(pgmap); -- 2.29.2