Re: [v2 UPDATE PATCH] mm: shmem: don't truncate page if memory failure happens

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Linus,

Does this version look good to you? Would you please consider to apply
this to -rc2? Or you prefer to get it from Andrew?

Thanks,
Yang

On Tue, Nov 16, 2021 at 11:32 AM Yang Shi <shy828301@xxxxxxxxx> wrote:
>
> The current behavior of memory failure is to truncate the page cache
> regardless of dirty or clean.  If the page is dirty the later access
> will get the obsolete data from disk without any notification to the
> users.  This may cause silent data loss.  It is even worse for shmem
> since shmem is in-memory filesystem, truncating page cache means
> discarding data blocks.  The later read would return all zero.
>
> The right approach is to keep the corrupted page in page cache, any
> later access would return error for syscalls or SIGBUS for page fault,
> until the file is truncated, hole punched or removed.  The regular
> storage backed filesystems would be more complicated so this patch is
> focused on shmem.  This also unblock the support for soft offlining
> shmem THP.
>
> [arnd@xxxxxxxx: fix uninitialized variable use in me_pagecache_clean()]
>   Link: https://lkml.kernel.org/r/20211022064748.4173718-1-arnd@xxxxxxxxxx
> [Fix invalid pointer dereference in shmem_read_mapping_page_gfp() with a
>  slight different implementation from what Ajay Garg <ajaygargnsit@xxxxxxxxx>
>  and Muchun Song <songmuchun@xxxxxxxxxxxxx> proposed and reworked the
>  error handling of shmem_write_begin() suggested by Linus]
>   Link: https://lore.kernel.org/linux-mm/20211111084617.6746-1-ajaygargnsit@xxxxxxxxx/
>
> Link: https://lkml.kernel.org/r/20211020210755.23964-6-shy828301@xxxxxxxxx
> Signed-off-by: Yang Shi <shy828301@xxxxxxxxx>
> Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx>
> Cc: Hugh Dickins <hughd@xxxxxxxxxx>
> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
> Cc: Oscar Salvador <osalvador@xxxxxxx>
> Cc: Peter Xu <peterx@xxxxxxxxxx>
> Cc: Ajay Garg <ajaygargnsit@xxxxxxxxx>
> Cc: Muchun Song <songmuchun@xxxxxxxxxxxxx>
> ---
> v2: Incorporated the suggestions from Willy.
> v1: Fixed pointer dereference oops reported by Ajay Garg and reworked the
>     error handling of shmem_write_begin() suggested by Linus.
>
>  mm/memory-failure.c | 14 ++++++++++---
>  mm/shmem.c          | 51 +++++++++++++++++++++++++++++++++++++++------
>  mm/userfaultfd.c    |  5 +++++
>  3 files changed, 61 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 07c875fdeaf0..f64ebb6226cb 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -58,6 +58,7 @@
>  #include <linux/ratelimit.h>
>  #include <linux/page-isolation.h>
>  #include <linux/pagewalk.h>
> +#include <linux/shmem_fs.h>
>  #include "internal.h"
>  #include "ras/ras_event.h"
>
> @@ -867,6 +868,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
>  {
>         int ret;
>         struct address_space *mapping;
> +       bool extra_pins;
>
>         delete_from_lru_cache(p);
>
> @@ -895,18 +897,24 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
>                 goto out;
>         }
>
> +       /*
> +        * The shmem page is kept in page cache instead of truncating
> +        * so is expected to have an extra refcount after error-handling.
> +        */
> +       extra_pins = shmem_mapping(mapping);
> +
>         /*
>          * Truncation is a bit tricky. Enable it per file system for now.
>          *
>          * Open: to take i_rwsem or not for this? Right now we don't.
>          */
>         ret = truncate_error_page(p, page_to_pfn(p), mapping);
> +       if (has_extra_refcount(ps, p, extra_pins))
> +               ret = MF_FAILED;
> +
>  out:
>         unlock_page(p);
>
> -       if (has_extra_refcount(ps, p, false))
> -               ret = MF_FAILED;
> -
>         return ret;
>  }
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index dc038ce78700..2ba64918132b 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2456,6 +2456,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
>         struct inode *inode = mapping->host;
>         struct shmem_inode_info *info = SHMEM_I(inode);
>         pgoff_t index = pos >> PAGE_SHIFT;
> +       int ret = 0;
>
>         /* i_rwsem is held by caller */
>         if (unlikely(info->seals & (F_SEAL_GROW |
> @@ -2466,7 +2467,19 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
>                         return -EPERM;
>         }
>
> -       return shmem_getpage(inode, index, pagep, SGP_WRITE);
> +       ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
> +
> +       if (ret)
> +               return ret;
> +
> +       if (PageHWPoison(*pagep)) {
> +               unlock_page(*pagep);
> +               put_page(*pagep);
> +               *pagep = NULL;
> +               return -EIO;
> +       }
> +
> +       return 0;
>  }
>
>  static int
> @@ -2553,6 +2566,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                         if (sgp == SGP_CACHE)
>                                 set_page_dirty(page);
>                         unlock_page(page);
> +
> +                       if (PageHWPoison(page)) {
> +                               put_page(page);
> +                               error = -EIO;
> +                               break;
> +                       }
>                 }
>
>                 /*
> @@ -3092,7 +3111,8 @@ static const char *shmem_get_link(struct dentry *dentry,
>                 page = find_get_page(inode->i_mapping, 0);
>                 if (!page)
>                         return ERR_PTR(-ECHILD);
> -               if (!PageUptodate(page)) {
> +               if (PageHWPoison(page) ||
> +                   !PageUptodate(page)) {
>                         put_page(page);
>                         return ERR_PTR(-ECHILD);
>                 }
> @@ -3100,6 +3120,13 @@ static const char *shmem_get_link(struct dentry *dentry,
>                 error = shmem_getpage(inode, 0, &page, SGP_READ);
>                 if (error)
>                         return ERR_PTR(error);
> +               if (!page)
> +                      return ERR_PTR(-ECHILD);
> +               if (PageHWPoison(page)) {
> +                       unlock_page(page);
> +                       put_page(page);
> +                       return ERR_PTR(-ECHILD);
> +               }
>                 unlock_page(page);
>         }
>         set_delayed_call(done, shmem_put_link, page);
> @@ -3750,6 +3777,13 @@ static void shmem_destroy_inodecache(void)
>         kmem_cache_destroy(shmem_inode_cachep);
>  }
>
> +/* Keep the page in page cache instead of truncating it */
> +static int shmem_error_remove_page(struct address_space *mapping,
> +                                  struct page *page)
> +{
> +       return 0;
> +}
> +
>  const struct address_space_operations shmem_aops = {
>         .writepage      = shmem_writepage,
>         .set_page_dirty = __set_page_dirty_no_writeback,
> @@ -3760,7 +3794,7 @@ const struct address_space_operations shmem_aops = {
>  #ifdef CONFIG_MIGRATION
>         .migratepage    = migrate_page,
>  #endif
> -       .error_remove_page = generic_error_remove_page,
> +       .error_remove_page = shmem_error_remove_page,
>  };
>  EXPORT_SYMBOL(shmem_aops);
>
> @@ -4168,9 +4202,14 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
>         error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
>                                   gfp, NULL, NULL, NULL);
>         if (error)
> -               page = ERR_PTR(error);
> -       else
> -               unlock_page(page);
> +               return ERR_PTR(error);
> +
> +       unlock_page(page);
> +       if (PageHWPoison(page)) {
> +               put_page(page);
> +               return ERR_PTR(-EIO);
> +       }
> +
>         return page;
>  #else
>         /*
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index ac6f036298cd..0780c2a57ff1 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -232,6 +232,11 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
>                 goto out;
>         }
>
> +       if (PageHWPoison(page)) {
> +               ret = -EIO;
> +               goto out_release;
> +       }
> +
>         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
>                                        page, false, wp_copy);
>         if (ret)
> --
> 2.26.2
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux