Re: mm/memory.c: Add update local tlb for smp race

maobibo <maobibo@xxxxxxxxxxx> · Sat, 16 May 2020 17:34:35 +0800

On 05/15/2020 09:50 PM, David Hildenbrand wrote:
> On 14.05.20 08:50, Bibo Mao wrote:
>> If there are two threads hitting page fault at the address, one
>> thread updates pte entry and local tlb, the other thread can update
>> local tlb also, rather than give up and let page fault happening
>> again.
> 
> Let me suggest
> 
> "mm/memory: optimize concurrent page faults at same address
> 
> If two threads concurrently fault at the same address, the thread that
> won the race updates the PTE and its local TLB. For now, the other
> thread gives up, simply does nothing, and continues.
> 
> It could happen that this second thread triggers another fault, whereby
> it only updates its local TLB while handling the fault. Instead of
> triggering another fault, let's directly update the local TLB of the
> second thread.
> "
> 
> If I got the intention of this patch correctly.
> 
> Are there any performance numbers to support this patch?
> 
> (I can't say too much about the correctness and/or usefulness of this patch)

yes, that is the situation. On MIPS platform software can update TLB,
so update_mmu_cache is used here. This does not happen frequently, and with the three series patches in later mail. I test lat_pagefault in lmbench, here is is result:

with these three series patches, 
# ./lat_pagefault  -N 10  /tmp/1 
Pagefaults on /tmp/1: 1.4973 microseconds
# ./lat_pagefault -P 4 -N 10  /tmp/1 
Pagefaults on /tmp/1: 1.5716 microseconds

original version, without these three series patch
#  ./lat_pagefault  -N 10  /tmp/1 
Pagefaults on /tmp/1: 1.6489 microseconds
# ./lat_pagefault -P 4 -N 10  /tmp/1
Pagefaults on /tmp/1: 1.7214 microseconds

>>
>> 	modified:   mm/memory.c
> 
> This does not belong into a patch description.

well, I will modify the patch description.

regards
bibo,mao

> 
> 
>> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
>> ---
>>  mm/memory.c | 30 ++++++++++++++++++++++--------
>>  1 file changed, 22 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index f703fe8..3a741ce 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2436,11 +2436,10 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>  		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>>  			/*
>>  			 * Other thread has already handled the fault
>> -			 * and we don't need to do anything. If it's
>> -			 * not the case, the fault will be triggered
>> -			 * again on the same address.
>> +			 * and update local tlb only
>>  			 */
>>  			ret = false;
>> +			update_mmu_cache(vma, addr, vmf->pte);
>>  			goto pte_unlock;
>>  		}
>>  
>> @@ -2463,8 +2462,9 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>  		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
>>  		locked = true;
>>  		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>> -			/* The PTE changed under us. Retry page fault. */
>> +			/* The PTE changed under us. update local tlb */
>>  			ret = false;
>> +			update_mmu_cache(vma, addr, vmf->pte);
>>  			goto pte_unlock;
>>  		}
>>  
>> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>  		}
>>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  		entry = mk_pte(new_page, vma->vm_page_prot);
>> +		entry = pte_mkyoung(entry);
>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>  		/*
>>  		 * Clear the pte entry and flush it first, before updating the
>> @@ -2752,6 +2753,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>  		new_page = old_page;
>>  		page_copied = 1;
>>  	} else {
>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>  		mem_cgroup_cancel_charge(new_page, memcg, false);
>>  	}
>>  
>> @@ -2812,6 +2814,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
>>  	 * pte_offset_map_lock.
>>  	 */
>>  	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>> +		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>  		pte_unmap_unlock(vmf->pte, vmf->ptl);
>>  		return VM_FAULT_NOPAGE;
>>  	}
>> @@ -2936,6 +2939,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>>  			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>  					vmf->address, &vmf->ptl);
>>  			if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>> +				update_mmu_cache(vma, vmf->address, vmf->pte);
>>  				unlock_page(vmf->page);
>>  				pte_unmap_unlock(vmf->pte, vmf->ptl);
>>  				put_page(vmf->page);
>> @@ -3341,8 +3345,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>  						vma->vm_page_prot));
>>  		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>  				vmf->address, &vmf->ptl);
>> -		if (!pte_none(*vmf->pte))
>> +		if (!pte_none(*vmf->pte)) {
>> +			update_mmu_cache(vma, vmf->address, vmf->pte);
>>  			goto unlock;
>> +		}
>>  		ret = check_stable_address_space(vma->vm_mm);
>>  		if (ret)
>>  			goto unlock;
>> @@ -3373,13 +3379,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>  	__SetPageUptodate(page);
>>  
>>  	entry = mk_pte(page, vma->vm_page_prot);
>> +	entry = pte_mkyoung(entry);
>>  	if (vma->vm_flags & VM_WRITE)
>>  		entry = pte_mkwrite(pte_mkdirty(entry));
>>  
>>  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>>  			&vmf->ptl);
>> -	if (!pte_none(*vmf->pte))
>> +	if (!pte_none(*vmf->pte)) {
>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>  		goto release;
>> +	}
>>  
>>  	ret = check_stable_address_space(vma->vm_mm);
>>  	if (ret)
>> @@ -3646,11 +3655,14 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>  	}
>>  
>>  	/* Re-check under ptl */
>> -	if (unlikely(!pte_none(*vmf->pte)))
>> +	if (unlikely(!pte_none(*vmf->pte))) {
>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>  		return VM_FAULT_NOPAGE;
>> +	}
>>  
>>  	flush_icache_page(vma, page);
>>  	entry = mk_pte(page, vma->vm_page_prot);
>> +	entry = pte_mkyoung(entry);
>>  	if (write)
>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>  	/* copy-on-write page */
>> @@ -4224,8 +4236,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>>  	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
>>  	spin_lock(vmf->ptl);
>>  	entry = vmf->orig_pte;
>> -	if (unlikely(!pte_same(*vmf->pte, entry)))
>> +	if (unlikely(!pte_same(*vmf->pte, entry))) {
>> +		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>  		goto unlock;
>> +	}
>>  	if (vmf->flags & FAULT_FLAG_WRITE) {
>>  		if (!pte_write(entry))
>>  			return do_wp_page(vmf);
>>
> 
>