From: Dave Jiang <dave.jiang@xxxxxxxxx> Subject: dax: remove VM_MIXEDMAP for fsdax and device dax This patch is reworked from an earlier patch that Dan has posted: https://patchwork.kernel.org/patch/10131727/ VM_MIXEDMAP is used by dax to direct mm paths like vm_normal_page() that the memory page it is dealing with is not typical memory from the linear map. The get_user_pages_fast() path, since it does not resolve the vma, is already using {pte,pmd}_devmap() as a stand-in for VM_MIXEDMAP, so we use that as a VM_MIXEDMAP replacement in some locations. In the cases where there is no pte to consult we fallback to using vma_is_dax() to detect the VM_MIXEDMAP special case. Now that we have explicit driver pfn_t-flag opt-in/opt-out for get_user_pages() support for DAX we can stop setting VM_MIXEDMAP. This also means we no longer need to worry about safely manipulating vm_flags in a future where we support dynamically changing the dax mode of a file. DAX should also now be supported with madvise_behavior(), vma_merge(), and copy_page_range(). This patch has been tested against ndctl unit test. It has also been tested against xfstests commit: 625515d using fake pmem created by memmap and no additional issues have been observed. Link: http://lkml.kernel.org/r/152847720311.55924.16999195879201817653.stgit@xxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> Acked-by: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/dax/device.c | 2 +- fs/ext2/file.c | 1 - fs/ext4/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- mm/hmm.c | 6 ++++-- mm/huge_memory.c | 4 ++-- mm/ksm.c | 3 +++ mm/memory.c | 6 ++++++ mm/migrate.c | 3 ++- mm/mlock.c | 3 ++- mm/mmap.c | 9 +++++---- 11 files changed, 27 insertions(+), 14 deletions(-) --- a/drivers/dax/device.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/drivers/dax/device.c @@ -474,7 +474,7 @@ static int dax_mmap(struct file *filp, s return rc; vma->vm_ops = &dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; return 0; } --- a/fs/ext2/file.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/fs/ext2/file.c @@ -126,7 +126,6 @@ static int ext2_file_mmap(struct file *f file_accessed(file); vma->vm_ops = &ext2_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP; return 0; } #else --- a/fs/ext4/file.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/fs/ext4/file.c @@ -374,7 +374,7 @@ static int ext4_file_mmap(struct file *f file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; } else { vma->vm_ops = &ext4_file_vm_ops; } --- a/fs/xfs/xfs_file.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/fs/xfs/xfs_file.c @@ -1169,7 +1169,7 @@ xfs_file_mmap( file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; return 0; } --- a/mm/hmm.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/hmm.c @@ -676,7 +676,8 @@ int hmm_vma_get_pfns(struct hmm_range *r return -EINVAL; /* FIXME support hugetlb fs */ - if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || + vma_is_dax(vma)) { hmm_pfns_special(range); return -EINVAL; } @@ -849,7 +850,8 @@ int hmm_vma_fault(struct hmm_range *rang return -EINVAL; /* FIXME support hugetlb fs */ - if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || + vma_is_dax(vma)) { hmm_pfns_special(range); return -EINVAL; } --- a/mm/huge_memory.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/huge_memory.c @@ -762,11 +762,11 @@ int vmf_insert_pfn_pmd(struct vm_area_st * but we need to be consistent with PTEs and architectures that * can't support a 'special' bit. */ - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && + !pfn_t_devmap(pfn)); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); - BUG_ON(!pfn_t_devmap(pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; --- a/mm/ksm.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/ksm.c @@ -2430,6 +2430,9 @@ int ksm_madvise(struct vm_area_struct *v VM_HUGETLB | VM_MIXEDMAP)) return 0; /* just ignore the advice */ + if (vma_is_dax(vma)) + return 0; + #ifdef VM_SAO if (*vm_flags & VM_SAO) return 0; --- a/mm/memory.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/memory.c @@ -859,6 +859,10 @@ struct page *_vm_normal_page(struct vm_a return NULL; } } + + if (pte_devmap(pte)) + return NULL; + print_bad_pte(vma, addr, pte, NULL); return NULL; } @@ -923,6 +927,8 @@ struct page *vm_normal_page_pmd(struct v } } + if (pmd_devmap(pmd)) + return NULL; if (is_zero_pfn(pfn)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) --- a/mm/migrate.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/migrate.c @@ -2951,7 +2951,8 @@ int migrate_vma(const struct migrate_vma /* Sanity check the arguments */ start &= PAGE_MASK; end &= PAGE_MASK; - if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) + if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || + vma_is_dax(vma)) return -EINVAL; if (start < vma->vm_start || start >= vma->vm_end) return -EINVAL; --- a/mm/mlock.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/mlock.c @@ -527,7 +527,8 @@ static int mlock_fixup(struct vm_area_st vm_flags_t old_flags = vma->vm_flags; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || + vma_is_dax(vma)) /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out; --- a/mm/mmap.c~dax-remove-vm_mixedmap-for-fsdax-and-device-dax +++ a/mm/mmap.c @@ -1796,11 +1796,12 @@ out: vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current->mm))) - mm->locked_vm += (len >> PAGE_SHIFT); - else + if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || + is_vm_hugetlb_page(vma) || + vma == get_gate_vma(current->mm)) vma->vm_flags &= VM_LOCKED_CLEAR_MASK; + else + mm->locked_vm += (len >> PAGE_SHIFT); } if (file) _