Re: [RFC v1 2/2] mm: khugepaged: recover from poisoned file-backed memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 28, 2022 at 9:02 PM Tong Tiangen <tongtiangen@xxxxxxxxxx> wrote:
>
>
>
> 在 2022/3/24 7:29, Jiaqi Yan 写道:
> > Make collapse_file roll back when copying pages failed.
> > More concretely:
> > * extract copy operations into a separate loop
> > * postpone the updates for nr_none until both scan and copy succeeded
> > * postpone joining small xarray entries until both scan and copy
> >    succeeded
> > * as for update operations to NR_XXX_THPS
> >      * for SHMEM file, postpone until both scan and copy succeeded
> >      * for other file, roll back if scan succeeded but copy failed
> >
> > Signed-off-by: Jiaqi Yan <jiaqiyan@xxxxxxxxxx>
> > ---
> >   include/linux/highmem.h | 18 ++++++++++
> >   mm/khugepaged.c         | 75 +++++++++++++++++++++++++++--------------
> >   2 files changed, 67 insertions(+), 26 deletions(-)
> >
> > diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> > index 15d0aa4d349c..fc5aa221bdb5 100644
> > --- a/include/linux/highmem.h
> > +++ b/include/linux/highmem.h
> > @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from)
> >       kunmap_local(vfrom);
> >   }
> >
> > +/*
> > + * Machine check exception handled version of copy_highpage.
> > + * Return true if copying page content failed; otherwise false.
> > + */
> > +static inline bool copy_highpage_mc(struct page *to, struct page *from)
> > +{
> > +     char *vfrom, *vto;
> > +     unsigned long ret;
> > +
> > +     vfrom = kmap_local_page(from);
> > +     vto = kmap_local_page(to);
> > +     ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
> > +     kunmap_local(vto);
> > +     kunmap_local(vfrom);
> > +
> > +     return ret > 0;
> > +}
> > +
> >   #endif
> >
> >   static inline void memcpy_page(struct page *dst_page, size_t dst_off,
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 84ed177f56ff..ed2b1cd4bbc6 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm,
> >   {
> >       struct address_space *mapping = file->f_mapping;
> >       gfp_t gfp;
> > -     struct page *new_page;
> > +     struct page *new_page, *page, *tmp;
> >       pgoff_t index, end = start + HPAGE_PMD_NR;
> >       LIST_HEAD(pagelist);
> >       XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
> >       int nr_none = 0, result = SCAN_SUCCEED;
> >       bool is_shmem = shmem_file(file);
> > +     bool copy_failed = false;
> >       int nr;
> >
> >       VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
> > @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm,
> >       }
> >       nr = thp_nr_pages(new_page);
> >
> > -     if (is_shmem)
> > -             __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
> > -     else {
> > +     if (!is_shmem) {
> >               __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr);
> >               filemap_nr_thps_inc(mapping);
> >               /*
> > @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm,
> >               }
> >       }
> >
> > -     if (nr_none) {
> > -             __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
> > -             if (is_shmem)
> > -                     __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
> > -     }
> > -
> > -     /* Join all the small entries into a single multi-index entry */
> > -     xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > -     xas_store(&xas, new_page);
> >   xa_locked:
> >       xas_unlock_irq(&xas);
> >   xa_unlocked:
> >
> >       if (result == SCAN_SUCCEED) {
> > -             struct page *page, *tmp;
> > -
> >               /*
> >                * Replacing old pages with new one has succeeded, now we
> > -              * need to copy the content and free the old pages.
> > +              * attempt to copy the contents.
> >                */
> >               index = start;
> > -             list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> > +             list_for_each_entry(page, &pagelist, lru) {
> >                       while (index < page->index) {
> >                               clear_highpage(new_page + (index % HPAGE_PMD_NR));
> >                               index++;
> >                       }
> > -                     copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
> > -                                     page);
> > +                     if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) {
> > +                             copy_failed = true;
>
> The 1st patch here used "copy_succeed = false", It is best that the
> logic of the two positions can be unified.

copy_failed here will be eliminated once we have SCAN_COPY_MC
defined in version 2. Version 2 also renames "copy_succeeded"
in collapse_huge_page() to "copied", mimicking the "isolated" variable
for __collapse_huge_page_isolate().


>
> > +                             break;
> > +                     }
> > +                     index++;
> > +             }
> > +             while (!copy_failed && index < end) {
> > +                     clear_highpage(new_page + (page->index % HPAGE_PMD_NR));
> > +                     index++;
> > +             }
> > +     }
> > +
> > +     if (result == SCAN_SUCCEED && !copy_failed) {
> > +             /*
> > +              * Copying old pages to huge one has succeeded, now we
> > +              * need to free the old pages.
> > +              */
> > +             list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> >                       list_del(&page->lru);
> >                       page->mapping = NULL;
> >                       page_ref_unfreeze(page, 1);
> > @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm,
> >                       ClearPageUnevictable(page);
> >                       unlock_page(page);
> >                       put_page(page);
> > -                     index++;
> >               }
> > -             while (index < end) {
> > -                     clear_highpage(new_page + (index % HPAGE_PMD_NR));
> > -                     index++;
> > +
> > +             xas_lock_irq(&xas);
> > +             if (is_shmem)
> > +                     __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
> > +             if (nr_none) {
> > +                     __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
> > +                     if (is_shmem)
> > +                             __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
> >               }
> > +             /* Join all the small entries into a single multi-index entry. */
> > +             xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > +             xas_store(&xas, new_page);
> > +             xas_unlock_irq(&xas);
> >
> >               SetPageUptodate(new_page);
> >               page_ref_add(new_page, HPAGE_PMD_NR - 1);
> > @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm,
> >
> >               khugepaged_pages_collapsed++;
> >       } else {
> > -             struct page *page;
> > -
> > -             /* Something went wrong: roll back page cache changes */
> > +             /*
> > +              * Something went wrong:
> > +              * either result != SCAN_SUCCEED or copy_failed,
> > +              * roll back page cache changes
> > +              */
> >               xas_lock_irq(&xas);
> >               mapping->nrpages -= nr_none;
> >
> > @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm,
> >                       xas_lock_irq(&xas);
> >               }
> >               VM_BUG_ON(nr_none);
> > +             /*
> > +              * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file,
> > +              * which is not updated yet for SHMEM file.
> > +              * These undos are not needed if result is not SCAN_SUCCEED.
> > +              */
> > +             if (!is_shmem && result == SCAN_SUCCEED) {
> > +                     __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr);
> > +                     filemap_nr_thps_dec(mapping);
> > +             }
> >               xas_unlock_irq(&xas);
> >
> >               new_page->mapping = NULL;





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux