Re: [mm-unstable PATCH v5 4/8] mm, hwpoison: make unpoison aware of raw error info in hwpoisoned hugepage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2022/7/8 13:36, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
> 
> Raw error info list needs to be removed when hwpoisoned hugetlb is
> unpoisoned.  And unpoison handler needs to know how many errors there
> are in the target hugepage. So add them.
> 
> HPageVmemmapOptimized(hpage) and HPageRawHwpUnreliable(hpage)) can't be
> unpoisoned, so let's skip them.
> 
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
> Reported-by: kernel test robot <lkp@xxxxxxxxx>
> ---
> v4 -> v5:
> - fix type of return value of free_raw_hwp_pages()
>   (found by kernel test robot),
> - prevent unpoison for HPageVmemmapOptimized and HPageRawHwpUnreliable.
> ---
>  include/linux/swapops.h |  9 ++++++++
>  mm/memory-failure.c     | 50 ++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 53 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index a01aeb3fcc0b..ddc98f96ad2c 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -498,6 +498,11 @@ static inline void num_poisoned_pages_dec(void)
>  	atomic_long_dec(&num_poisoned_pages);
>  }
>  
> +static inline void num_poisoned_pages_sub(long i)
> +{
> +	atomic_long_sub(i, &num_poisoned_pages);
> +}
> +
>  #else
>  
>  static inline swp_entry_t make_hwpoison_entry(struct page *page)
> @@ -518,6 +523,10 @@ static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
>  static inline void num_poisoned_pages_inc(void)
>  {
>  }
> +
> +static inline void num_poisoned_pages_sub(long i)
> +{
> +}
>  #endif
>  
>  static inline int non_swap_entry(swp_entry_t entry)
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 6833c5e4b410..89e74ec8a95f 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1720,22 +1720,41 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
>  	return ret;
>  }
>  
> -int hugetlb_clear_page_hwpoison(struct page *hpage)
> +static long free_raw_hwp_pages(struct page *hpage, bool move_flag)

NO strong opinion: Maybe the return type should be "unsigned" as it always >= 0 ?

>  {
>  	struct llist_head *head;
>  	struct llist_node *t, *tnode;
> +	long count = 0;
>  
> -	if (!HPageRawHwpUnreliable(hpage))
> -		ClearPageHWPoison(hpage);
> +	/*
> +	 * HPageVmemmapOptimized hugepages can't be unpoisoned because
> +	 * struct pages for tail pages are required to free hwpoisoned
> +	 * hugepages.  HPageRawHwpUnreliable hugepages shouldn't be
> +	 * unpoisoned by definition.
> +	 */
> +	if (HPageVmemmapOptimized(hpage) || HPageRawHwpUnreliable(hpage))
> +		return 0;
>  	head = raw_hwp_list_head(hpage);
>  	llist_for_each_safe(tnode, t, head->first) {
>  		struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
>  
> -		SetPageHWPoison(p->page);
> +		if (move_flag)
> +			SetPageHWPoison(p->page);
>  		kfree(p);
> +		count++;
>  	}
>  	llist_del_all(head);
> -	return 0;
> +	return count;
> +}
> +
> +int hugetlb_clear_page_hwpoison(struct page *hpage)

It seems the return value is unused?

> +{
> +	int ret = -EBUSY;
> +
> +	if (!HPageRawHwpUnreliable(hpage))
> +		ret = !TestClearPageHWPoison(hpage);
> +	free_raw_hwp_pages(hpage, true);
> +	return ret;
>  }
>  
>  /*
> @@ -1879,6 +1898,10 @@ static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
>  	return 0;
>  }
>  
> +static inline long free_raw_hwp_pages(struct page *hpage, bool move_flag)

If return type is changed, remember to change here too.

> +{
> +	return 0;
> +}
>  #endif	/* CONFIG_HUGETLB_PAGE */
>  
>  static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
> @@ -2284,6 +2307,7 @@ int unpoison_memory(unsigned long pfn)
>  	struct page *p;
>  	int ret = -EBUSY;
>  	int freeit = 0;
> +	long count = 1;
>  	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
>  					DEFAULT_RATELIMIT_BURST);
>  
> @@ -2331,6 +2355,13 @@ int unpoison_memory(unsigned long pfn)
>  
>  	ret = get_hwpoison_page(p, MF_UNPOISON);
>  	if (!ret) {
> +		if (PageHuge(p)) {
> +			count = free_raw_hwp_pages(page, false);

It seems the current behavior is: if any subpage of a hugetlb page is unpoisoned, then all of the
hwpoisoned subpages will be unpoisoned. I'm not sure whether this is what we want.

Thanks.

> +			if (count == 0) {
> +				ret = -EBUSY;
> +				goto unlock_mutex;
> +			}
> +		}
>  		ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
>  	} else if (ret < 0) {
>  		if (ret == -EHWPOISON) {
> @@ -2339,6 +2370,13 @@ int unpoison_memory(unsigned long pfn)
>  			unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
>  					 pfn, &unpoison_rs);
>  	} else {
> +		if (PageHuge(p)) {
> +			count = free_raw_hwp_pages(page, false);
> +			if (count == 0) {
> +				ret = -EBUSY;
> +				goto unlock_mutex;
> +			}
> +		}
>  		freeit = !!TestClearPageHWPoison(p);
>  
>  		put_page(page);
> @@ -2351,7 +2389,7 @@ int unpoison_memory(unsigned long pfn)
>  unlock_mutex:
>  	mutex_unlock(&mf_mutex);
>  	if (!ret || freeit) {
> -		num_poisoned_pages_dec();
> +		num_poisoned_pages_sub(count);
>  		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
>  				 page_to_pfn(p), &unpoison_rs);
>  	}
> 





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux