The patch titled Subject: mm, THP, swap: support PMD swap mapping when splitting huge PMD has been added to the -mm tree. Its filename is mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Huang Ying <ying.huang@xxxxxxxxx> Subject: mm, THP, swap: support PMD swap mapping when splitting huge PMD A huge PMD need to be split when zap a part of the PMD mapping etc. If the PMD mapping is a swap mapping, we need to split it too. This patch implemented the support for this. This is similar as splitting the PMD page mapping, except we need to decrease the PMD swap mapping count for the huge swap cluster too. If the PMD swap mapping count becomes 0, the huge swap cluster will be split. Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap PMD, so pmd_present() check is called before them. Link: http://lkml.kernel.org/r/20180622035151.6676-7-ying.huang@xxxxxxxxx Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Shaohua Li <shli@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> Cc: Zi Yan <zi.yan@xxxxxxxxxxxxxx> Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- diff -puN include/linux/swap.h~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd include/linux/swap.h --- a/include/linux/swap.h~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd +++ a/include/linux/swap.h @@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page( #ifdef CONFIG_THP_SWAP extern int split_swap_cluster(swp_entry_t entry); +extern int split_swap_cluster_map(swp_entry_t entry); #else static inline int split_swap_cluster(swp_entry_t entry) { return 0; } + +static inline int split_swap_cluster_map(swp_entry_t entry) +{ + return 0; +} #endif #ifdef CONFIG_MEMCG diff -puN mm/huge_memory.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd mm/huge_memory.c --- a/mm/huge_memory.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd +++ a/mm/huge_memory.c @@ -1603,6 +1603,47 @@ out: return 0; } +#ifdef CONFIG_THP_SWAP +static void __split_huge_swap_pmd(struct vm_area_struct *vma, + unsigned long haddr, + pmd_t *pmd) +{ + struct mm_struct *mm = vma->vm_mm; + pgtable_t pgtable; + pmd_t _pmd; + swp_entry_t entry; + int i, soft_dirty; + + entry = pmd_to_swp_entry(*pmd); + soft_dirty = pmd_soft_dirty(*pmd); + + split_swap_cluster_map(entry); + + pgtable = pgtable_trans_huge_withdraw(mm, pmd); + pmd_populate(mm, &_pmd, pgtable); + + for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) { + pte_t *pte, ptent; + + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte_none(*pte)); + ptent = swp_entry_to_pte(entry); + if (soft_dirty) + ptent = pte_swp_mksoft_dirty(ptent); + set_pte_at(mm, haddr, pte, ptent); + pte_unmap(pte); + } + smp_wmb(); /* make pte visible before pmd */ + pmd_populate(mm, pmd, pgtable); +} +#else +static inline void __split_huge_swap_pmd(struct vm_area_struct *vma, + unsigned long haddr, + pmd_t *pmd) +{ +} +#endif + /* * Return true if we do MADV_FREE successfully on entire pmd page. * Otherwise, return false. @@ -2069,7 +2110,7 @@ static void __split_huge_pmd_locked(stru VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); - VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd) + VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)); count_vm_event(THP_SPLIT_PMD); @@ -2091,8 +2132,11 @@ static void __split_huge_pmd_locked(stru put_page(page); add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (is_huge_zero_pmd(*pmd)) { + } else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) { /* + * is_huge_zero_pmd() may return true for PMD swap + * entry, so checking pmd_present() firstly. + * * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside * __split_huge_pmd() ? @@ -2135,6 +2179,9 @@ static void __split_huge_pmd_locked(stru page = pfn_to_page(swp_offset(entry)); } else #endif + if (thp_swap_supported() && is_swap_pmd(old_pmd)) + return __split_huge_swap_pmd(vma, haddr, pmd); + else page = pmd_page(old_pmd); VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); @@ -2226,14 +2273,15 @@ void __split_huge_pmd(struct vm_area_str * pmd against. Otherwise we can end up replacing wrong page. */ VM_BUG_ON(freeze && !page); - if (page && page != pmd_page(*pmd)) - goto out; + /* pmd_page() should be called only if pmd_present() */ + if (page && (!pmd_present(*pmd) || page != pmd_page(*pmd))) + goto out; if (pmd_trans_huge(*pmd)) { page = pmd_page(*pmd); if (PageMlocked(page)) clear_page_mlock(page); - } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))) + } else if (!(pmd_devmap(*pmd) || is_swap_pmd(*pmd))) goto out; __split_huge_pmd_locked(vma, pmd, haddr, freeze); out: diff -puN mm/swapfile.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd mm/swapfile.c --- a/mm/swapfile.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd +++ a/mm/swapfile.c @@ -4043,6 +4043,34 @@ static void free_swap_count_continuation } } +#ifdef CONFIG_THP_SWAP +/* The corresponding page table shouldn't be changed under us */ +int split_swap_cluster_map(swp_entry_t entry) +{ + struct swap_info_struct *si; + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + + VM_BUG_ON(!is_cluster_offset(offset)); + si = _swap_info_get(entry); + if (!si) + return -EBUSY; + ci = lock_cluster(si, offset); + /* The swap cluster has been split by someone else */ + if (!cluster_is_huge(ci)) + goto out; + cluster_set_count(ci, cluster_count(ci) - 1); + VM_BUG_ON(cluster_count(ci) < SWAPFILE_CLUSTER); + if (cluster_count(ci) == SWAPFILE_CLUSTER && + !(si->swap_map[offset] & SWAP_HAS_CACHE)) + cluster_clear_huge(ci); + +out: + unlock_cluster(ci); + return 0; +} +#endif + static int __init swapfile_init(void) { int nid; _ Patches currently in -mm which might be from ying.huang@xxxxxxxxx are mm-clear_huge_page-move-order-algorithm-into-a-separate-function.patch mm-huge-page-copy-target-sub-page-last-when-copy-huge-page.patch mm-hugetlbfs-rename-address-to-haddr-in-hugetlb_cow.patch mm-hugetlbfs-pass-fault-address-to-cow-handler.patch mm-swap-fix-race-between-swapoff-and-some-swap-operations.patch mm-swap-fix-race-between-swapoff-and-some-swap-operations-v6.patch mm-fix-race-between-swapoff-and-mincore.patch mm-thp-swap-enable-pmd-swap-operations-for-config_thp_swap.patch mm-thp-swap-make-config_thp_swap-depends-on-config_swap.patch mm-thp-swap-support-pmd-swap-mapping-in-swap_duplicate.patch mm-thp-swap-support-pmd-swap-mapping-in-swapcache_free_cluster.patch mm-thp-swap-support-pmd-swap-mapping-in-free_swap_and_cache-swap_free.patch mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch mm-thp-swap-support-pmd-swap-mapping-in-split_swap_cluster.patch mm-thp-swap-support-to-read-a-huge-swap-cluster-for-swapin-a-thp.patch mm-thp-swap-swapin-a-thp-as-a-whole.patch mm-thp-swap-support-to-count-thp-swapin-and-its-fallback.patch mm-thp-swap-add-sysfs-interface-to-configure-thp-swapin.patch mm-thp-swap-support-pmd-swap-mapping-in-swapoff.patch mm-thp-swap-support-pmd-swap-mapping-in-madvise_free.patch mm-cgroup-thp-swap-support-to-move-swap-account-for-pmd-swap-mapping.patch mm-thp-swap-support-to-copy-pmd-swap-mapping-when-fork.patch mm-thp-swap-free-pmd-swap-mapping-when-zap_huge_pmd.patch mm-thp-swap-support-pmd-swap-mapping-for-madv_willneed.patch mm-thp-swap-support-pmd-swap-mapping-in-mincore.patch mm-thp-swap-support-pmd-swap-mapping-in-common-path.patch mm-thp-swap-create-pmd-swap-mapping-when-unmap-the-thp.patch mm-thp-avoid-to-split-thp-when-reclaim-madv_free-thp.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html