Re: [PATCH] mm: make faultaround produce old ptes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 17-05-16 15:32:46, Kirill A. Shutemov wrote:
> Currently, faultaround code produces young pte. This can screw up vmscan
> behaviour[1], as it makes vmscan think that these pages are hot and not
> push them out on first round.
> 
> Let modify faultaround to produce old pte, so they can easily be
> reclaimed under memory pressure.

Could you be more specific about what was the original issue that led to
this patch? I can understand that marking all those pages new might be
too optimistic but when does it matter actually? Sparsely access file
mmap?

> This can to some extend defeat purpose of faultaround on machines
> without hardware accessed bit as it will not help up with reducing
> number of minor page faults.
> 
> We may want to disable faultaround on such machines altogether, but
> that's subject for separate patchset.
> 
> [1] https://lkml.kernel.org/r/1460992636-711-1-git-send-email-vinmenon@xxxxxxxxxxxxxx
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Mel Gorman <mgorman@xxxxxxx>
> Cc: Rik van Riel <riel@xxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> Cc: Vinayak Menon <vinmenon@xxxxxxxxxxxxxx>
> Cc: Minchan Kim <minchan@xxxxxxxxxx>
> ---
>  include/linux/mm.h |  2 +-
>  mm/filemap.c       |  2 +-
>  mm/memory.c        | 23 ++++++++++++++++++-----
>  3 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 573dfebddcca..a0e773204be0 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -591,7 +591,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>  }
>  
>  void do_set_pte(struct vm_area_struct *vma, unsigned long address,
> -		struct page *page, pte_t *pte, bool write, bool anon);
> +		struct page *page, pte_t *pte, bool write, bool anon, bool old);
>  #endif
>  
>  /*
> diff --git a/mm/filemap.c b/mm/filemap.c
> index b366a9902f1c..dd789e159a77 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2196,7 +2196,7 @@ repeat:
>  		if (file->f_ra.mmap_miss > 0)
>  			file->f_ra.mmap_miss--;
>  		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
> -		do_set_pte(vma, addr, page, pte, false, false);
> +		do_set_pte(vma, addr, page, pte, false, false, true);
>  		unlock_page(page);
>  		goto next;
>  unlock:
> diff --git a/mm/memory.c b/mm/memory.c
> index d79c6db41502..67c03b2fe20c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2855,7 +2855,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
>   * vm_ops->map_pages.
>   */
>  void do_set_pte(struct vm_area_struct *vma, unsigned long address,
> -		struct page *page, pte_t *pte, bool write, bool anon)
> +		struct page *page, pte_t *pte, bool write, bool anon, bool old)
>  {
>  	pte_t entry;
>  
> @@ -2863,6 +2863,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
>  	entry = mk_pte(page, vma->vm_page_prot);
>  	if (write)
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	if (old)
> +		entry = pte_mkold(entry);
>  	if (anon) {
>  		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>  		page_add_new_anon_rmap(page, vma, address, false);
> @@ -3000,9 +3002,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  	 */
>  	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
>  		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
> -		do_fault_around(vma, address, pte, pgoff, flags);
>  		if (!pte_same(*pte, orig_pte))
>  			goto unlock_out;
> +		do_fault_around(vma, address, pte, pgoff, flags);
> +		/* Check if the fault is handled by faultaround */
> +		if (!pte_same(*pte, orig_pte)) {
> +			/*
> +			 * Faultaround produce old pte, but the pte we've
> +			 * handler fault for should be young.
> +			 */
> +			pte_t entry = pte_mkyoung(*pte);
> +			if (ptep_set_access_flags(vma, address, pte, entry, 0))
> +				update_mmu_cache(vma, address, pte);
> +			goto unlock_out;
> +		}
>  		pte_unmap_unlock(pte, ptl);
>  	}
>  
> @@ -3017,7 +3030,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  		put_page(fault_page);
>  		return ret;
>  	}
> -	do_set_pte(vma, address, fault_page, pte, false, false);
> +	do_set_pte(vma, address, fault_page, pte, false, false, false);
>  	unlock_page(fault_page);
>  unlock_out:
>  	pte_unmap_unlock(pte, ptl);
> @@ -3069,7 +3082,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  		}
>  		goto uncharge_out;
>  	}
> -	do_set_pte(vma, address, new_page, pte, true, true);
> +	do_set_pte(vma, address, new_page, pte, true, true, false);
>  	mem_cgroup_commit_charge(new_page, memcg, false, false);
>  	lru_cache_add_active_or_unevictable(new_page, vma);
>  	pte_unmap_unlock(pte, ptl);
> @@ -3126,7 +3139,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  		put_page(fault_page);
>  		return ret;
>  	}
> -	do_set_pte(vma, address, fault_page, pte, true, false);
> +	do_set_pte(vma, address, fault_page, pte, true, false, false);
>  	pte_unmap_unlock(pte, ptl);
>  
>  	if (set_page_dirty(fault_page))
> -- 
> 2.8.1

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]