On 05/15/2020 09:50 PM, David Hildenbrand wrote: > On 14.05.20 08:50, Bibo Mao wrote: >> If there are two threads hitting page fault at the address, one >> thread updates pte entry and local tlb, the other thread can update >> local tlb also, rather than give up and let page fault happening >> again. > > Let me suggest > > "mm/memory: optimize concurrent page faults at same address > > If two threads concurrently fault at the same address, the thread that > won the race updates the PTE and its local TLB. For now, the other > thread gives up, simply does nothing, and continues. > > It could happen that this second thread triggers another fault, whereby > it only updates its local TLB while handling the fault. Instead of > triggering another fault, let's directly update the local TLB of the > second thread. > " > > If I got the intention of this patch correctly. > > Are there any performance numbers to support this patch? > > (I can't say too much about the correctness and/or usefulness of this patch) yes, that is the situation. On MIPS platform software can update TLB, so update_mmu_cache is used here. This does not happen frequently, and with the three series patches in later mail. I test lat_pagefault in lmbench, here is is result: with these three series patches, # ./lat_pagefault -N 10 /tmp/1 Pagefaults on /tmp/1: 1.4973 microseconds # ./lat_pagefault -P 4 -N 10 /tmp/1 Pagefaults on /tmp/1: 1.5716 microseconds original version, without these three series patch # ./lat_pagefault -N 10 /tmp/1 Pagefaults on /tmp/1: 1.6489 microseconds # ./lat_pagefault -P 4 -N 10 /tmp/1 Pagefaults on /tmp/1: 1.7214 microseconds >> >> modified: mm/memory.c > > This does not belong into a patch description. well, I will modify the patch description. regards bibo,mao > > >> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> >> --- >> mm/memory.c | 30 ++++++++++++++++++++++-------- >> 1 file changed, 22 insertions(+), 8 deletions(-) >> >> diff --git a/mm/memory.c b/mm/memory.c >> index f703fe8..3a741ce 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -2436,11 +2436,10 @@ static inline bool cow_user_page(struct page *dst, struct page *src, >> if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { >> /* >> * Other thread has already handled the fault >> - * and we don't need to do anything. If it's >> - * not the case, the fault will be triggered >> - * again on the same address. >> + * and update local tlb only >> */ >> ret = false; >> + update_mmu_cache(vma, addr, vmf->pte); >> goto pte_unlock; >> } >> >> @@ -2463,8 +2462,9 @@ static inline bool cow_user_page(struct page *dst, struct page *src, >> vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); >> locked = true; >> if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { >> - /* The PTE changed under us. Retry page fault. */ >> + /* The PTE changed under us. update local tlb */ >> ret = false; >> + update_mmu_cache(vma, addr, vmf->pte); >> goto pte_unlock; >> } >> >> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) >> } >> flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); >> entry = mk_pte(new_page, vma->vm_page_prot); >> + entry = pte_mkyoung(entry); >> entry = maybe_mkwrite(pte_mkdirty(entry), vma); >> /* >> * Clear the pte entry and flush it first, before updating the >> @@ -2752,6 +2753,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) >> new_page = old_page; >> page_copied = 1; >> } else { >> + update_mmu_cache(vma, vmf->address, vmf->pte); >> mem_cgroup_cancel_charge(new_page, memcg, false); >> } >> >> @@ -2812,6 +2814,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) >> * pte_offset_map_lock. >> */ >> if (!pte_same(*vmf->pte, vmf->orig_pte)) { >> + update_mmu_cache(vmf->vma, vmf->address, vmf->pte); >> pte_unmap_unlock(vmf->pte, vmf->ptl); >> return VM_FAULT_NOPAGE; >> } >> @@ -2936,6 +2939,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, >> vmf->address, &vmf->ptl); >> if (!pte_same(*vmf->pte, vmf->orig_pte)) { >> + update_mmu_cache(vma, vmf->address, vmf->pte); >> unlock_page(vmf->page); >> pte_unmap_unlock(vmf->pte, vmf->ptl); >> put_page(vmf->page); >> @@ -3341,8 +3345,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) >> vma->vm_page_prot)); >> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, >> vmf->address, &vmf->ptl); >> - if (!pte_none(*vmf->pte)) >> + if (!pte_none(*vmf->pte)) { >> + update_mmu_cache(vma, vmf->address, vmf->pte); >> goto unlock; >> + } >> ret = check_stable_address_space(vma->vm_mm); >> if (ret) >> goto unlock; >> @@ -3373,13 +3379,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) >> __SetPageUptodate(page); >> >> entry = mk_pte(page, vma->vm_page_prot); >> + entry = pte_mkyoung(entry); >> if (vma->vm_flags & VM_WRITE) >> entry = pte_mkwrite(pte_mkdirty(entry)); >> >> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, >> &vmf->ptl); >> - if (!pte_none(*vmf->pte)) >> + if (!pte_none(*vmf->pte)) { >> + update_mmu_cache(vma, vmf->address, vmf->pte); >> goto release; >> + } >> >> ret = check_stable_address_space(vma->vm_mm); >> if (ret) >> @@ -3646,11 +3655,14 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, >> } >> >> /* Re-check under ptl */ >> - if (unlikely(!pte_none(*vmf->pte))) >> + if (unlikely(!pte_none(*vmf->pte))) { >> + update_mmu_cache(vma, vmf->address, vmf->pte); >> return VM_FAULT_NOPAGE; >> + } >> >> flush_icache_page(vma, page); >> entry = mk_pte(page, vma->vm_page_prot); >> + entry = pte_mkyoung(entry); >> if (write) >> entry = maybe_mkwrite(pte_mkdirty(entry), vma); >> /* copy-on-write page */ >> @@ -4224,8 +4236,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) >> vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); >> spin_lock(vmf->ptl); >> entry = vmf->orig_pte; >> - if (unlikely(!pte_same(*vmf->pte, entry))) >> + if (unlikely(!pte_same(*vmf->pte, entry))) { >> + update_mmu_cache(vmf->vma, vmf->address, vmf->pte); >> goto unlock; >> + } >> if (vmf->flags & FAULT_FLAG_WRITE) { >> if (!pte_write(entry)) >> return do_wp_page(vmf); >> > >