On 03/04/2018 23:57, David Rientjes wrote: > On Wed, 28 Mar 2018, Laurent Dufour wrote: > >>>> diff --git a/include/linux/mm.h b/include/linux/mm.h >>>> index 4d02524a7998..2f3e98edc94a 100644 >>>> --- a/include/linux/mm.h >>>> +++ b/include/linux/mm.h >>>> @@ -300,6 +300,7 @@ extern pgprot_t protection_map[16]; >>>> #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ >>>> #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ >>>> #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ >>>> +#define FAULT_FLAG_SPECULATIVE 0x200 /* Speculative fault, not holding mmap_sem */ >>>> >>>> #define FAULT_FLAG_TRACE \ >>>> { FAULT_FLAG_WRITE, "WRITE" }, \ >>> >>> I think FAULT_FLAG_SPECULATIVE should be introduced in the patch that >>> actually uses it. >> >> I think you're right, I'll move down this define in the series. >> >>>> diff --git a/mm/memory.c b/mm/memory.c >>>> index e0ae4999c824..8ac241b9f370 100644 >>>> --- a/mm/memory.c >>>> +++ b/mm/memory.c >>>> @@ -2288,6 +2288,13 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, >>>> } >>>> EXPORT_SYMBOL_GPL(apply_to_page_range); >>>> >>>> +static bool pte_map_lock(struct vm_fault *vmf) >>> >>> inline? >> >> Agreed. >> > > Ignore this, the final form of the function after the full patchset > shouldn't be inline. Indeed, I only kept as inlined the small pte_map_lock() and later pte_spinlock() defined when CONFIG_SPECULATIVE_PAGE_FAULT is not set. >>>> +{ >>>> + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, >>>> + vmf->address, &vmf->ptl); >>>> + return true; >>>> +} >>>> + >>>> /* >>>> * handle_pte_fault chooses page fault handler according to an entry which was >>>> * read non-atomically. Before making any commitment, on those architectures >>>> @@ -2477,6 +2484,7 @@ static int wp_page_copy(struct vm_fault *vmf) >>>> const unsigned long mmun_start = vmf->address & PAGE_MASK; >>>> const unsigned long mmun_end = mmun_start + PAGE_SIZE; >>>> struct mem_cgroup *memcg; >>>> + int ret = VM_FAULT_OOM; >>>> >>>> if (unlikely(anon_vma_prepare(vma))) >>>> goto oom; >>>> @@ -2504,7 +2512,11 @@ static int wp_page_copy(struct vm_fault *vmf) >>>> /* >>>> * Re-check the pte - we dropped the lock >>>> */ >>>> - vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); >>>> + if (!pte_map_lock(vmf)) { >>>> + mem_cgroup_cancel_charge(new_page, memcg, false); >>>> + ret = VM_FAULT_RETRY; >>>> + goto oom_free_new; >>>> + } >>> >>> Ugh, but we aren't oom here, so maybe rename oom_free_new so that it makes >>> sense for return values other than VM_FAULT_OOM? >> >> You're right, now this label name is not correct, I'll rename it to >> "out_free_new" and rename also the label "oom" to "out" since it is generic too >> now. >> > > I think it would just be better to introduce a out_uncharge that handles > the mem_cgroup_cancel_charge() in the exit path. Yes adding an out_uncharge label sounds good too. I'll add it and also rename oom_* ones to out_*. > > diff --git a/mm/memory.c b/mm/memory.c > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2645,9 +2645,8 @@ static int wp_page_copy(struct vm_fault *vmf) > * Re-check the pte - we dropped the lock > */ > if (!pte_map_lock(vmf)) { > - mem_cgroup_cancel_charge(new_page, memcg, false); > ret = VM_FAULT_RETRY; > - goto oom_free_new; > + goto out_uncharge; > } > if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { > if (old_page) { > @@ -2735,6 +2734,8 @@ static int wp_page_copy(struct vm_fault *vmf) > put_page(old_page); > } > return page_copied ? VM_FAULT_WRITE : 0; > +out_uncharge: > + mem_cgroup_cancel_charge(new_page, memcg, false); > oom_free_new: > put_page(new_page); > oom: >