Re: [PATCH v1 2/5] s390/uv: convert gmap_make_secure() to work on folios

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu,  4 Apr 2024 18:36:39 +0200
David Hildenbrand <david@xxxxxxxxxx> wrote:

> We have various goals that require gmap_make_secure() to only work on
> folios. We want to limit the use of page_mapcount() to the places where it
> is absolutely necessary, we want to avoid using page flags of tail
> pages, and we want to remove page_has_private().
> 
> So, let's convert gmap_make_secure() to folios. While s390x makes sure
> to never have PMD-mapped THP in processes that use KVM -- by remapping
> them using PTEs in thp_split_walk_pmd_entry()->split_huge_pmd() -- we might
> still find PTE-mapped THPs and could end up working on tail pages of
> such large folios for now.
> 
> To handle that cleanly, let's simply split any PTE-mapped large folio,
> so we can be sure that we are always working with small folios and never
> on tail pages.
> 
> There is no real change: splitting will similarly fail on unexpected folio
> references, just like it would already when we try to freeze the folio
> refcount.
> 
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
>  arch/s390/include/asm/page.h |  1 +
>  arch/s390/kernel/uv.c        | 66 ++++++++++++++++++++++--------------
>  2 files changed, 42 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
> index 9381879f7ecf..54d015bcd8e3 100644
> --- a/arch/s390/include/asm/page.h
> +++ b/arch/s390/include/asm/page.h
> @@ -215,6 +215,7 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
>  
>  #define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
>  #define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
> +#define folio_to_phys(page)	pfn_to_phys(folio_pfn(folio))
>  
>  static inline void *pfn_to_virt(unsigned long pfn)
>  {
> diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
> index 7401838b960b..adcbd4b13035 100644
> --- a/arch/s390/kernel/uv.c
> +++ b/arch/s390/kernel/uv.c
> @@ -181,36 +181,36 @@ int uv_convert_owned_from_secure(unsigned long paddr)
>  }
>  
>  /*
> - * Calculate the expected ref_count for a page that would otherwise have no
> + * Calculate the expected ref_count for a folio that would otherwise have no
>   * further pins. This was cribbed from similar functions in other places in
>   * the kernel, but with some slight modifications. We know that a secure
> - * page can not be a huge page for example.
> + * folio can only be a small folio for example.
>   */
> -static int expected_page_refs(struct page *page)
> +static int expected_folio_refs(struct folio *folio)
>  {
>  	int res;
>  
> -	res = page_mapcount(page);
> -	if (PageSwapCache(page)) {
> +	res = folio_mapcount(folio);
> +	if (folio_test_swapcache(folio)) {
>  		res++;
> -	} else if (page_mapping(page)) {
> +	} else if (folio_mapping(folio)) {
>  		res++;
> -		if (page_has_private(page))
> +		if (folio_has_private(folio))
>  			res++;
>  	}
>  	return res;
>  }
>  
> -static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
> +static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
>  {
>  	int expected, cc = 0;
>  
> -	if (PageWriteback(page))
> +	if (folio_test_writeback(folio))
>  		return -EAGAIN;
> -	expected = expected_page_refs(page);
> -	if (!page_ref_freeze(page, expected))
> +	expected = expected_folio_refs(folio);
> +	if (!folio_ref_freeze(folio, expected))
>  		return -EBUSY;
> -	set_bit(PG_arch_1, &page->flags);
> +	set_bit(PG_arch_1, &folio->flags);
>  	/*
>  	 * If the UVC does not succeed or fail immediately, we don't want to
>  	 * loop for long, or we might get stall notifications.
> @@ -220,9 +220,9 @@ static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
>  	 * -EAGAIN and we let the callers deal with it.
>  	 */
>  	cc = __uv_call(0, (u64)uvcb);
> -	page_ref_unfreeze(page, expected);
> +	folio_ref_unfreeze(folio, expected);
>  	/*
> -	 * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
> +	 * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
>  	 * If busy or partially completed, return -EAGAIN.
>  	 */
>  	if (cc == UVC_CC_OK)
> @@ -277,7 +277,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
>  	bool local_drain = false;
>  	spinlock_t *ptelock;
>  	unsigned long uaddr;
> -	struct page *page;
> +	struct folio *folio;
>  	pte_t *ptep;
>  	int rc;
>  
> @@ -306,33 +306,49 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
>  	if (!ptep)
>  		goto out;
>  	if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
> -		page = pte_page(*ptep);
> +		folio = page_folio(pte_page(*ptep));
>  		rc = -EAGAIN;
> -		if (trylock_page(page)) {
> +
> +		/* We might get PTE-mapped large folios; split them first. */
> +		if (folio_test_large(folio)) {
> +			rc = -E2BIG;
> +		} else if (folio_trylock(folio)) {
>  			if (should_export_before_import(uvcb, gmap->mm))
> -				uv_convert_from_secure(page_to_phys(page));
> -			rc = make_page_secure(page, uvcb);
> -			unlock_page(page);
> +				uv_convert_from_secure(folio_to_phys(folio));
> +			rc = make_folio_secure(folio, uvcb);
> +			folio_unlock(folio);
>  		}
>  
>  		/*
> -		 * Once we drop the PTL, the page may get unmapped and
> +		 * Once we drop the PTL, the folio may get unmapped and
>  		 * freed immediately. We need a temporary reference.
>  		 */
> -		if (rc == -EAGAIN)
> -			get_page(page);
> +		if (rc == -EAGAIN || rc == -E2BIG)
> +			folio_get(folio);
>  	}
>  	pte_unmap_unlock(ptep, ptelock);
>  out:
>  	mmap_read_unlock(gmap->mm);
>  
> +	if (rc == -E2BIG) {
> +		/*
> +		 * Splitting might fail with -EBUSY due to unexpected folio
> +		 * references, just like make_folio_secure(). So handle it
> +		 * ahead of time without the PTL being held.
> +		 */
> +		folio_lock(folio);
> +		rc = split_folio(folio);

if split_folio returns -EAGAIN...

> +		folio_unlock(folio);
> +		folio_put(folio);
> +	}
> +
>  	if (rc == -EAGAIN) {

... we will not skip this ...

>  		/*
>  		 * If we are here because the UVC returned busy or partial
>  		 * completion, this is just a useless check, but it is safe.
>  		 */
> -		wait_on_page_writeback(page);
> -		put_page(page);
> +		folio_wait_writeback(folio);
> +		folio_put(folio);

... and we will do one folio_put() too many

>  	} else if (rc == -EBUSY) {
>  		/*
>  		 * If we have tried a local drain and the page refcount

are we sure that split_folio() can never return -EAGAIN now and in the
future too?

maybe just change it to  } else if (...   ?





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux