The patch titled Subject: mm/mmu_notifier: contextual information for event triggering invalidation has been added to the -mm tree. Its filename is mm-mmu_notifier-contextual-information-for-event-triggering-invalidation.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-mmu_notifier-contextual-information-for-event-triggering-invalidation.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-mmu_notifier-contextual-information-for-event-triggering-invalidation.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ Binary file patches/mm-mmu_notifier-contextual-information-for-event-triggering-invalidation.patch matches Subject: mm/mmu_notifier: contextual information for event triggering invalidation CPU page table update can happens for many reasons, not only as a result of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also as a result of kernel activities (memory compression, reclaim, migration, ...). Users of mmu notifier API track changes to the CPU page table and take specific action for them. While current API only provide range of virtual address affected by the change, not why the changes is happening. This patchset adds event information so that users of mmu notifier can differentiate among broad category: - UNMAP: munmap() or mremap() - CLEAR: page table is cleared (migration, compaction, reclaim, ...) - PROTECTION_VMA: change in access protections for the range - PROTECTION_PAGE: change in access protections for page in the range - SOFT_DIRTY: soft dirtyness tracking Being able to identify munmap() and mremap() from other reasons why the page table is cleared is important to allow user of mmu notifier to update their own internal tracking structure accordingly (on munmap or mremap it is not longer needed to track range of virtual address as it becomes invalid). Link: http://lkml.kernel.org/r/20181203201817.10759-4-jglisse@xxxxxxxxxx Signed-off-by: J�me Glisse <jglisse@xxxxxxxxxx> Cc: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx> Cc: Ross Zwisler <zwisler@xxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Radim Krcmar <rkrcmar@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Christian Koenig <christian.koenig@xxxxxxx> Cc: Felix Kuehling <felix.kuehling@xxxxxxx> Cc: Ralph Campbell <rcampbell@xxxxxxxxxx> Cc: John Hubbard <jhubbard@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/dax.c | 1 + fs/proc/task_mmu.c | 1 + include/linux/mmu_notifier.h | 33 +++++++++++++++++++++++++++++++++ kernel/events/uprobes.c | 1 + mm/huge_memory.c | 4 ++++ mm/hugetlb.c | 4 ++++ mm/khugepaged.c | 1 + mm/ksm.c | 2 ++ mm/madvise.c | 1 + mm/memory.c | 5 +++++ mm/migrate.c | 2 ++ mm/mprotect.c | 1 + mm/mremap.c | 1 + mm/oom_kill.c | 1 + mm/rmap.c | 2 ++ 15 files changed, 60 insertions(+) --- a/fs/dax.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/fs/dax.c @@ -761,6 +761,7 @@ static void dax_entry_mkclean(struct add struct mmu_notifier_range range; unsigned long address; + range.event = MMU_NOTIFY_PROTECTION_PAGE; range.mm = vma->vm_mm; cond_resched(); --- a/fs/proc/task_mmu.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/fs/proc/task_mmu.c @@ -1156,6 +1156,7 @@ static ssize_t clear_refs_write(struct f range.start = 0; range.end = -1UL; range.mm = mm; + range.event = MMU_NOTIFY_SOFT_DIRTY; mmu_notifier_invalidate_range_start(&range); } walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); --- a/include/linux/mmu_notifier.h~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/include/linux/mmu_notifier.h @@ -25,10 +25,43 @@ struct mmu_notifier_mm { spinlock_t lock; }; +/* + * What event is triggering the invalidation: + * + * MMU_NOTIFY_UNMAP + * either munmap() that unmap the range or a mremap() that move the range + * + * MMU_NOTIFY_CLEAR + * clear page table entry (many reasons for this like madvise() or replacing + * a page by another one, ...). + * + * MMU_NOTIFY_PROTECTION_VMA + * update is due to protection change for the range ie using the vma access + * permission (vm_page_prot) to update the whole range is enough no need to + * inspect changes to the CPU page table (mprotect() syscall) + * + * MMU_NOTIFY_PROTECTION_PAGE + * update is due to change in read/write flag for pages in the range so to + * mirror those changes the user must inspect the CPU page table (from the + * end callback). + * + * + * MMU_NOTIFY_SOFT_DIRTY + * soft dirty accounting (still same page and same access flags) + */ +enum mmu_notifier_event { + MMU_NOTIFY_UNMAP = 0, + MMU_NOTIFY_CLEAR, + MMU_NOTIFY_PROTECTION_VMA, + MMU_NOTIFY_PROTECTION_PAGE, + MMU_NOTIFY_SOFT_DIRTY, +}; + struct mmu_notifier_range { struct mm_struct *mm; unsigned long start; unsigned long end; + enum mmu_notifier_event event; bool blockable; }; --- a/kernel/events/uprobes.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/kernel/events/uprobes.c @@ -174,6 +174,7 @@ static int __replace_page(struct vm_area struct mmu_notifier_range range; struct mem_cgroup *memcg; + range.event = MMU_NOTIFY_CLEAR; range.start = addr; range.end = addr + PAGE_SIZE; range.mm = mm; --- a/mm/huge_memory.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/huge_memory.c @@ -1182,6 +1182,7 @@ static vm_fault_t do_huge_pmd_wp_page_fa cond_resched(); } + range.event = MMU_NOTIFY_CLEAR; range.start = haddr; range.end = range.start + HPAGE_PMD_SIZE; range.mm = vma->vm_mm; @@ -1347,6 +1348,7 @@ alloc: vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); + range.event = MMU_NOTIFY_CLEAR; range.start = haddr; range.end = range.start + HPAGE_PMD_SIZE; range.mm = vma->vm_mm; @@ -2027,6 +2029,7 @@ void __split_huge_pud(struct vm_area_str struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; + range.event = MMU_NOTIFY_CLEAR; range.start = address & HPAGE_PUD_MASK; range.end = range.start + HPAGE_PUD_SIZE; range.mm = mm; @@ -2246,6 +2249,7 @@ void __split_huge_pmd(struct vm_area_str struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; + range.event = MMU_NOTIFY_CLEAR; range.start = address & HPAGE_PMD_MASK; range.end = range.start + HPAGE_PMD_SIZE; range.mm = mm; --- a/mm/hugetlb.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/hugetlb.c @@ -3244,6 +3244,7 @@ int copy_hugetlb_page_range(struct mm_st cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + range.event = MMU_NOTIFY_CLEAR; range.start = vma->vm_start; range.end = vma->vm_end; range.mm = src; @@ -3344,6 +3345,7 @@ void __unmap_hugepage_range(struct mmu_g unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; + range.event = MMU_NOTIFY_CLEAR; range.start = start; range.end = end; range.mm = mm; @@ -3629,6 +3631,7 @@ retry_avoidcopy: __SetPageUptodate(new_page); set_page_huge_active(new_page); + range.event = MMU_NOTIFY_CLEAR; range.start = haddr; range.end = range.start + huge_page_size(h); range.mm = mm; @@ -4346,6 +4349,7 @@ unsigned long hugetlb_change_protection( bool shared_pmd = false; struct mmu_notifier_range range; + range.event = MMU_NOTIFY_PROTECTION_VMA; range.start = start; range.end = end; range.mm = mm; --- a/mm/khugepaged.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/khugepaged.c @@ -1016,6 +1016,7 @@ static void collapse_huge_page(struct mm pte = pte_offset_map(pmd, address); pte_ptl = pte_lockptr(mm, pmd); + range.event = MMU_NOTIFY_CLEAR; range.start = address; range.end = range.start + HPAGE_PMD_SIZE; range.mm = mm; --- a/mm/ksm.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/ksm.c @@ -1064,6 +1064,7 @@ static int write_protect_page(struct vm_ BUG_ON(PageTransCompound(page)); + range.event = MMU_NOTIFY_CLEAR; range.start = pvmw.address; range.end = range.start + PAGE_SIZE; range.mm = mm; @@ -1153,6 +1154,7 @@ static int replace_page(struct vm_area_s if (!pmd) goto out; + range.event = MMU_NOTIFY_CLEAR; range.start = addr; range.end = addr + PAGE_SIZE; range.mm = mm; --- a/mm/madvise.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/madvise.c @@ -466,6 +466,7 @@ static int madvise_free_single_vma(struc if (!vma_is_anonymous(vma)) return -EINVAL; + range.event = MMU_NOTIFY_CLEAR; range.start = max(vma->vm_start, start_addr); if (range.start >= vma->vm_end) return -EINVAL; --- a/mm/memory.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/memory.c @@ -1007,6 +1007,7 @@ int copy_page_range(struct mm_struct *ds * is_cow_mapping() returns true. */ is_cow = is_cow_mapping(vma->vm_flags); + range.event = MMU_NOTIFY_PROTECTION_PAGE; range.start = addr; range.end = end; range.mm = src_mm; @@ -1334,6 +1335,7 @@ void unmap_vmas(struct mmu_gather *tlb, { struct mmu_notifier_range range; + range.event = MMU_NOTIFY_UNMAP; range.start = start_addr; range.end = end_addr; range.mm = vma->vm_mm; @@ -1358,6 +1360,7 @@ void zap_page_range(struct vm_area_struc struct mmu_notifier_range range; struct mmu_gather tlb; + range.event = MMU_NOTIFY_CLEAR; range.start = start; range.end = range.start + size; range.mm = vma->vm_mm; @@ -1387,6 +1390,7 @@ static void zap_page_range_single(struct struct mmu_notifier_range range; struct mmu_gather tlb; + range.event = MMU_NOTIFY_CLEAR; range.start = address; range.end = range.start + size; range.mm = vma->vm_mm; @@ -2260,6 +2264,7 @@ static vm_fault_t wp_page_copy(struct vm struct mem_cgroup *memcg; struct mmu_notifier_range range; + range.event = MMU_NOTIFY_CLEAR; range.start = vmf->address & PAGE_MASK; range.end = range.start + PAGE_SIZE; range.mm = mm; --- a/mm/migrate.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/migrate.c @@ -2302,6 +2302,7 @@ static void migrate_vma_collect(struct m struct mmu_notifier_range range; struct mm_walk mm_walk; + range.event = MMU_NOTIFY_CLEAR; range.start = migrate->start; range.end = migrate->end; range.mm = mm_walk.mm; @@ -2722,6 +2723,7 @@ static void migrate_vma_pages(struct mig if (!notified) { notified = true; + range.event = MMU_NOTIFY_CLEAR; range.start = addr; range.end = migrate->end; range.mm = mm; --- a/mm/mprotect.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/mprotect.c @@ -186,6 +186,7 @@ static inline unsigned long change_pmd_r /* invoke the mmu notifier if the pmd is populated */ if (!range.start) { + range.event = MMU_NOTIFY_PROTECTION_VMA; range.start = addr; range.end = end; range.mm = mm; --- a/mm/mremap.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/mremap.c @@ -249,6 +249,7 @@ unsigned long move_page_tables(struct vm old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); + range.event = MMU_NOTIFY_UNMAP; range.start = old_addr; range.end = old_end; range.mm = vma->vm_mm; --- a/mm/oom_kill.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/oom_kill.c @@ -531,6 +531,7 @@ bool __oom_reap_task_mm(struct mm_struct struct mmu_notifier_range range; struct mmu_gather tlb; + range.event = MMU_NOTIFY_CLEAR; range.start = vma->vm_start; range.end = vma->vm_end; range.mm = mm; --- a/mm/rmap.c~mm-mmu_notifier-contextual-information-for-event-triggering-invalidation +++ a/mm/rmap.c @@ -896,6 +896,7 @@ static bool page_mkclean_one(struct page * We have to assume the worse case ie pmd for invalidation. Note that * the page can not be free from this function. */ + range.event = MMU_NOTIFY_PROTECTION_PAGE; range.mm = vma->vm_mm; range.start = address; range.end = min(vma->vm_end, range.start + @@ -1372,6 +1373,7 @@ static bool try_to_unmap_one(struct page * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ + range.event = MMU_NOTIFY_CLEAR; range.mm = vma->vm_mm; range.start = vma->vm_start; range.end = min(vma->vm_end, range.start + _ Patches currently in -mm which might be from jglisse@xxxxxxxxxx are