Re: [PATCH v2 2/4] mm/hwpoison: check if a subpage of a hugetlb folio is raw HWPOISON

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@xxxxxxxxxx> wrote:
>
> On 06/23/23 16:40, Jiaqi Yan wrote:
> > Adds the functionality to tell if a subpage of a hugetlb folio is a
> > raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> > be not set; otherwise hugepage's HWPOISON list becomes meaningless.
> >
> > Exports this functionality to be immediately used in the read operation
> > for hugetlbfs.
> >
> > Signed-off-by: Jiaqi Yan <jiaqiyan@xxxxxxxxxx>
> > ---
> >  include/linux/hugetlb.h | 19 +++++++++++++++++++
> >  include/linux/mm.h      |  7 +++++++
> >  mm/hugetlb.c            | 10 ++++++++++
> >  mm/memory-failure.c     | 34 ++++++++++++++++++++++++----------
> >  4 files changed, 60 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 21f942025fec..8b73a12b7b38 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
> >  void hugetlb_unregister_node(struct node *node);
> >  #endif
> >
> > +/*
> > + * Struct raw_hwp_page represents information about "raw error page",
> > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > + */
> > +struct raw_hwp_page {
> > +     struct llist_node node;
> > +     struct page *page;
> > +};
> > +
> > +static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +{
> > +     return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > +}
> > +
> > +/*
> > + * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
> > + */
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> > +
> >  #else        /* CONFIG_HUGETLB_PAGE */
> >  struct hstate {};
> >
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 66032f0d515c..41a283bd41a7 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
> >  extern void memory_failure_queue(unsigned long pfn, int flags);
> >  extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >                                       bool *migratable_cleared);
> > +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> >  void num_poisoned_pages_inc(unsigned long pfn);
> >  void num_poisoned_pages_sub(unsigned long pfn, long i);
> >  struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
> > @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >       return 0;
> >  }
> >
> > +static inline bool __is_raw_hwp_subpage(struct folio *folio,
> > +                                     struct page *subpage)
> > +{
> > +     return false;
> > +}
> > +
> >  static inline void num_poisoned_pages_inc(unsigned long pfn)
> >  {
> >  }
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ea24718db4af..6b860de87590 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >       return ret;
> >  }
> >
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > +{
> > +     bool ret;
> > +
> > +     spin_lock_irq(&hugetlb_lock);
> > +     ret = __is_raw_hwp_subpage(folio, subpage);
> > +     spin_unlock_irq(&hugetlb_lock);
>
> Can you describe what races the hugetlb_lock prevents here?

I think we should sync here with __get_huge_page_for_hwpoison, who
iterates and inserts an entry to raw_hwp_list. llist itself doesn't
ensure insertion is synchronized with iterating from
__is_raw_hwp_subpage.

> --
> Mike Kravetz
>
> > +     return ret;
> > +}
> > +
> >  void folio_putback_active_hugetlb(struct folio *folio)
> >  {
> >       spin_lock_irq(&hugetlb_lock);
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index c415c3c462a3..891248e2930e 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
> >  #endif /* CONFIG_FS_DAX */
> >
> >  #ifdef CONFIG_HUGETLB_PAGE
> > -/*
> > - * Struct raw_hwp_page represents information about "raw error page",
> > - * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > - */
> > -struct raw_hwp_page {
> > -     struct llist_node node;
> > -     struct page *page;
> > -};
> >
> > -static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> >  {
> > -     return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > +     struct llist_head *raw_hwp_head;
> > +     struct raw_hwp_page *p, *tmp;
> > +     bool ret = false;
> > +
> > +     if (!folio_test_hwpoison(folio))
> > +             return false;
> > +
> > +     /*
> > +      * When RawHwpUnreliable is set, kernel lost track of which subpages
> > +      * are HWPOISON. So return as if ALL subpages are HWPOISONed.
> > +      */
> > +     if (folio_test_hugetlb_raw_hwp_unreliable(folio))
> > +             return true;
> > +
> > +     raw_hwp_head = raw_hwp_list_head(folio);
> > +     llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) {
> > +             if (subpage == p->page) {
> > +                     ret = true;
> > +                     break;
> > +             }
> > +     }
> > +
> > +     return ret;
> >  }
> >
> >  static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
> > --
> > 2.41.0.162.gfafddb0af9-goog
> >





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux