[PATCH 3/6] mm/memory-failure.c: add code to resolve quasi-hwpoisoned page

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch introduces three ways to resolve quasi-hwpoisoned pages:
 1. unpoison: this is a test feature, but if users accept data lost (then
    continue with rereading old data from storage,) this could be tolerable.
 2. truncate: if discarding a part of a file which includes a memory error
    is OK for your applications, this could be reasonable too.
 3. full page overwrite: if your application is prepared to dirty pagecache
    error and it has a copy data (or it can recreate the proper data,)
    the application can overwrite the page-sized address range on the error
    and continue to run without caring about the error.

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
 include/linux/pagemap.h | 16 +++++++++++++
 mm/filemap.c            | 14 ++++++++---
 mm/memory-failure.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/truncate.c           |  7 ++++++
 4 files changed, 95 insertions(+), 4 deletions(-)

diff --git v3.14-rc6.orig/include/linux/pagemap.h v3.14-rc6/include/linux/pagemap.h
index 5e234d0d0baf..715962f7ea7a 100644
--- v3.14-rc6.orig/include/linux/pagemap.h
+++ v3.14-rc6/include/linux/pagemap.h
@@ -589,12 +589,28 @@ static inline int add_to_page_cache(struct page *page,
 #ifdef CONFIG_MEMORY_FAILURE
 bool mapping_hwpoisoned_range(struct address_space *mapping,
 				loff_t start_byte, loff_t end_byte);
+bool page_quasi_hwpoisoned(struct address_space *mapping, struct page *page);
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+				struct page *page, bool free);
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+				loff_t pos, size_t count);
 #else
 static inline bool mapping_hwpoisoned_range(struct address_space *mapping,
 				loff_t start_byte, loff_t end_byte)
 {
 	return false;
 }
+static inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+					struct page *page)
+{
+	return false;
+}
+#define hwpoison_resolve_pagecache_error(mapping, page, free) do {} while (0)
+static inline bool hwpoison_partial_overwrite(struct address_space *mapping,
+				loff_t pos, size_t count)
+{
+	return false;
+}
 #endif /* CONFIG_MEMORY_FAILURE */
 
 #endif /* _LINUX_PAGEMAP_H */
diff --git v3.14-rc6.orig/mm/filemap.c v3.14-rc6/mm/filemap.c
index 887f2dfaf185..f58b36e313ad 100644
--- v3.14-rc6.orig/mm/filemap.c
+++ v3.14-rc6/mm/filemap.c
@@ -2110,8 +2110,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
         if (unlikely(*pos < 0))
                 return -EINVAL;
 
-	if (unlikely(mapping_hwpoisoned_range(file->f_mapping, *pos,
-					      *pos + *count)))
+	if (unlikely(hwpoison_partial_overwrite(file->f_mapping, *pos, *count)))
 		return -EHWPOISON;
 
 	if (!isblk) {
@@ -2222,7 +2221,13 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
-	if (written)
+	/*
+	 * When the write range includes hwpoisoned region (then written is
+	 * -EHWPOISON,) we already confirmed in generic_write_checks() that
+	 * it's full page overwrite and we can safely invalidate the error,
+	 * so the write doesn't have to fail.
+	 */
+	if (written && written != -EHWPOISON)
 		goto out;
 
 	/*
@@ -2362,6 +2367,9 @@ static ssize_t generic_perform_write(struct file *file,
 		if (mapping_writably_mapped(mapping))
 			flush_dcache_page(page);
 
+		if (page_quasi_hwpoisoned(mapping, page))
+			hwpoison_resolve_pagecache_error(mapping, page, false);
+
 		pagefault_disable();
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
 		pagefault_enable();
diff --git v3.14-rc6.orig/mm/memory-failure.c v3.14-rc6/mm/memory-failure.c
index 34f2c046af22..0eca5449d251 100644
--- v3.14-rc6.orig/mm/memory-failure.c
+++ v3.14-rc6/mm/memory-failure.c
@@ -665,6 +665,57 @@ static void clear_pagecache_tag_hwpoison(struct address_space *mapping,
 	spin_unlock_irq(&mapping->tree_lock);
 }
 
+inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+					struct page *page)
+{
+	if (!sysctl_memory_failure_recovery)
+		return false;
+	return unlikely(get_pagecache_tag_hwpoison(mapping, page_index(page)));
+}
+
+/*
+ * This function clears a quasi-hwpoisoned page and turns it into a normal
+ * LRU page. Callers should check that @page is really quasi-hwpoisoned,
+ * and must not call this for real error pages.
+ */
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+				      struct page *page, bool free)
+{
+	VM_BUG_ON(PageLRU(page));
+	VM_BUG_ON(!PageLocked(page));
+
+	ClearPageHWPoison(page);
+	clear_pagecache_tag_hwpoison(mapping, page_index(page));
+	dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page));
+	putback_lru_page(page);
+	if (free) {
+		lru_add_drain_all();
+		delete_from_page_cache(page);
+	}
+	iput(mapping->host);
+}
+
+/*
+ * Return true if a given range [pos, pos+count) *partially* overlaps with
+ * hwpoisoned page. Effectively it checks only boundary pages' overlapness.
+ */
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+				loff_t pos, size_t count)
+{
+	if (!sysctl_memory_failure_recovery)
+		return false;
+	if (!mapping_hwpoisoned_range(mapping, pos, pos + count))
+		return false;
+
+	if (!PAGE_ALIGNED(pos) &&
+	    get_pagecache_tag_hwpoison(mapping, pos >> PAGE_SHIFT))
+		return true;
+	if (!PAGE_ALIGNED(pos + count) &&
+	    get_pagecache_tag_hwpoison(mapping, (pos + count) >> PAGE_SHIFT))
+		return true;
+	return false;
+}
+
 /*
  * Dirty pagecache page
  *
@@ -691,7 +742,10 @@ static void clear_pagecache_tag_hwpoison(struct address_space *mapping,
  *
  * This quasi-hwpoisoned page works to keep reporting the error for all
  * processes which try to access to the error address until it is resolved
- * or the system reboots.
+ * or the system reboots. Quasi-hwpoisoned pages can be resolved by unpoison,
+ * truncate, and full page overwrite. In full page overwrite, the quasi-
+ * hwpoisoned pages safely turn into the normal LRU pages, so we expect
+ * userspace to do this when they received the error report if possible.
  *
  * Issues: when the error hit a hole page the error is not properly
  * propagated.
@@ -1496,12 +1550,18 @@ int unpoison_memory(unsigned long pfn)
 	 * the free buddy page pool.
 	 */
 	if (TestClearPageHWPoison(page)) {
+		struct address_space *mapping = page_mapping(page);
+		if (mapping && page_quasi_hwpoisoned(mapping, page)) {
+			hwpoison_resolve_pagecache_error(mapping, page, true);
+			goto unlock;
+		}
 		pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
 		atomic_long_sub(nr_pages, &num_poisoned_pages);
 		freeit = 1;
 		if (PageHuge(page))
 			clear_page_hwpoison_huge_page(page);
 	}
+unlock:
 	unlock_page(page);
 
 	put_page(page);
diff --git v3.14-rc6.orig/mm/truncate.c v3.14-rc6/mm/truncate.c
index 353b683afd6e..92d7097dfc6d 100644
--- v3.14-rc6.orig/mm/truncate.c
+++ v3.14-rc6/mm/truncate.c
@@ -103,6 +103,10 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 	cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
 	ClearPageMappedToDisk(page);
+
+	if (page_quasi_hwpoisoned(mapping, page))
+		hwpoison_resolve_pagecache_error(mapping, page, false);
+
 	delete_from_page_cache(page);
 	return 0;
 }
@@ -439,6 +443,9 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
+	if (page_quasi_hwpoisoned(mapping, page))
+		hwpoison_resolve_pagecache_error(mapping, page, false);
+
 	spin_lock_irq(&mapping->tree_lock);
 	if (PageDirty(page))
 		goto failed;
-- 
1.8.5.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]