+ mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm, THP, swap: support PMD swap mapping when splitting huge PMD
has been added to the -mm tree.  Its filename is
     mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Huang Ying <ying.huang@xxxxxxxxx>
Subject: mm, THP, swap: support PMD swap mapping when splitting huge PMD

A huge PMD need to be split when zap a part of the PMD mapping etc.  If
the PMD mapping is a swap mapping, we need to split it too.  This patch
implemented the support for this.  This is similar as splitting the PMD
page mapping, except we need to decrease the PMD swap mapping count for
the huge swap cluster too.  If the PMD swap mapping count becomes 0, the
huge swap cluster will be split.

Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap PMD,
so pmd_present() check is called before them.

Link: http://lkml.kernel.org/r/20180622035151.6676-7-ying.huang@xxxxxxxxx
Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Shaohua Li <shli@xxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Cc: Zi Yan <zi.yan@xxxxxxxxxxxxxx>
Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---


diff -puN include/linux/swap.h~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd include/linux/swap.h
--- a/include/linux/swap.h~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd
+++ a/include/linux/swap.h
@@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page(
 
 #ifdef CONFIG_THP_SWAP
 extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster_map(swp_entry_t entry);
 #else
 static inline int split_swap_cluster(swp_entry_t entry)
 {
 	return 0;
 }
+
+static inline int split_swap_cluster_map(swp_entry_t entry)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_MEMCG
diff -puN mm/huge_memory.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd mm/huge_memory.c
--- a/mm/huge_memory.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd
+++ a/mm/huge_memory.c
@@ -1603,6 +1603,47 @@ out:
 	return 0;
 }
 
+#ifdef CONFIG_THP_SWAP
+static void __split_huge_swap_pmd(struct vm_area_struct *vma,
+				  unsigned long haddr,
+				  pmd_t *pmd)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgtable_t pgtable;
+	pmd_t _pmd;
+	swp_entry_t entry;
+	int i, soft_dirty;
+
+	entry = pmd_to_swp_entry(*pmd);
+	soft_dirty = pmd_soft_dirty(*pmd);
+
+	split_swap_cluster_map(entry);
+
+	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+	pmd_populate(mm, &_pmd, pgtable);
+
+	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) {
+		pte_t *pte, ptent;
+
+		pte = pte_offset_map(&_pmd, haddr);
+		VM_BUG_ON(!pte_none(*pte));
+		ptent = swp_entry_to_pte(entry);
+		if (soft_dirty)
+			ptent = pte_swp_mksoft_dirty(ptent);
+		set_pte_at(mm, haddr, pte, ptent);
+		pte_unmap(pte);
+	}
+	smp_wmb(); /* make pte visible before pmd */
+	pmd_populate(mm, pmd, pgtable);
+}
+#else
+static inline void __split_huge_swap_pmd(struct vm_area_struct *vma,
+					 unsigned long haddr,
+					 pmd_t *pmd)
+{
+}
+#endif
+
 /*
  * Return true if we do MADV_FREE successfully on entire pmd page.
  * Otherwise, return false.
@@ -2069,7 +2110,7 @@ static void __split_huge_pmd_locked(stru
 	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
 	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+	VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd)
 				&& !pmd_devmap(*pmd));
 
 	count_vm_event(THP_SPLIT_PMD);
@@ -2091,8 +2132,11 @@ static void __split_huge_pmd_locked(stru
 		put_page(page);
 		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
 		return;
-	} else if (is_huge_zero_pmd(*pmd)) {
+	} else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) {
 		/*
+		 * is_huge_zero_pmd() may return true for PMD swap
+		 * entry, so checking pmd_present() firstly.
+		 *
 		 * FIXME: Do we want to invalidate secondary mmu by calling
 		 * mmu_notifier_invalidate_range() see comments below inside
 		 * __split_huge_pmd() ?
@@ -2135,6 +2179,9 @@ static void __split_huge_pmd_locked(stru
 		page = pfn_to_page(swp_offset(entry));
 	} else
 #endif
+	if (thp_swap_supported() && is_swap_pmd(old_pmd))
+		return __split_huge_swap_pmd(vma, haddr, pmd);
+	else
 		page = pmd_page(old_pmd);
 	VM_BUG_ON_PAGE(!page_count(page), page);
 	page_ref_add(page, HPAGE_PMD_NR - 1);
@@ -2226,14 +2273,15 @@ void __split_huge_pmd(struct vm_area_str
 	 * pmd against. Otherwise we can end up replacing wrong page.
 	 */
 	VM_BUG_ON(freeze && !page);
-	if (page && page != pmd_page(*pmd))
-	        goto out;
+	/* pmd_page() should be called only if pmd_present() */
+	if (page && (!pmd_present(*pmd) || page != pmd_page(*pmd)))
+		goto out;
 
 	if (pmd_trans_huge(*pmd)) {
 		page = pmd_page(*pmd);
 		if (PageMlocked(page))
 			clear_page_mlock(page);
-	} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
+	} else if (!(pmd_devmap(*pmd) || is_swap_pmd(*pmd)))
 		goto out;
 	__split_huge_pmd_locked(vma, pmd, haddr, freeze);
 out:
diff -puN mm/swapfile.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd mm/swapfile.c
--- a/mm/swapfile.c~mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd
+++ a/mm/swapfile.c
@@ -4043,6 +4043,34 @@ static void free_swap_count_continuation
 	}
 }
 
+#ifdef CONFIG_THP_SWAP
+/* The corresponding page table shouldn't be changed under us */
+int split_swap_cluster_map(swp_entry_t entry)
+{
+	struct swap_info_struct *si;
+	struct swap_cluster_info *ci;
+	unsigned long offset = swp_offset(entry);
+
+	VM_BUG_ON(!is_cluster_offset(offset));
+	si = _swap_info_get(entry);
+	if (!si)
+		return -EBUSY;
+	ci = lock_cluster(si, offset);
+	/* The swap cluster has been split by someone else */
+	if (!cluster_is_huge(ci))
+		goto out;
+	cluster_set_count(ci, cluster_count(ci) - 1);
+	VM_BUG_ON(cluster_count(ci) < SWAPFILE_CLUSTER);
+	if (cluster_count(ci) == SWAPFILE_CLUSTER &&
+	    !(si->swap_map[offset] & SWAP_HAS_CACHE))
+		cluster_clear_huge(ci);
+
+out:
+	unlock_cluster(ci);
+	return 0;
+}
+#endif
+
 static int __init swapfile_init(void)
 {
 	int nid;
_

Patches currently in -mm which might be from ying.huang@xxxxxxxxx are

mm-clear_huge_page-move-order-algorithm-into-a-separate-function.patch
mm-huge-page-copy-target-sub-page-last-when-copy-huge-page.patch
mm-hugetlbfs-rename-address-to-haddr-in-hugetlb_cow.patch
mm-hugetlbfs-pass-fault-address-to-cow-handler.patch
mm-swap-fix-race-between-swapoff-and-some-swap-operations.patch
mm-swap-fix-race-between-swapoff-and-some-swap-operations-v6.patch
mm-fix-race-between-swapoff-and-mincore.patch
mm-thp-swap-enable-pmd-swap-operations-for-config_thp_swap.patch
mm-thp-swap-make-config_thp_swap-depends-on-config_swap.patch
mm-thp-swap-support-pmd-swap-mapping-in-swap_duplicate.patch
mm-thp-swap-support-pmd-swap-mapping-in-swapcache_free_cluster.patch
mm-thp-swap-support-pmd-swap-mapping-in-free_swap_and_cache-swap_free.patch
mm-thp-swap-support-pmd-swap-mapping-when-splitting-huge-pmd.patch
mm-thp-swap-support-pmd-swap-mapping-in-split_swap_cluster.patch
mm-thp-swap-support-to-read-a-huge-swap-cluster-for-swapin-a-thp.patch
mm-thp-swap-swapin-a-thp-as-a-whole.patch
mm-thp-swap-support-to-count-thp-swapin-and-its-fallback.patch
mm-thp-swap-add-sysfs-interface-to-configure-thp-swapin.patch
mm-thp-swap-support-pmd-swap-mapping-in-swapoff.patch
mm-thp-swap-support-pmd-swap-mapping-in-madvise_free.patch
mm-cgroup-thp-swap-support-to-move-swap-account-for-pmd-swap-mapping.patch
mm-thp-swap-support-to-copy-pmd-swap-mapping-when-fork.patch
mm-thp-swap-free-pmd-swap-mapping-when-zap_huge_pmd.patch
mm-thp-swap-support-pmd-swap-mapping-for-madv_willneed.patch
mm-thp-swap-support-pmd-swap-mapping-in-mincore.patch
mm-thp-swap-support-pmd-swap-mapping-in-common-path.patch
mm-thp-swap-create-pmd-swap-mapping-when-unmap-the-thp.patch
mm-thp-avoid-to-split-thp-when-reclaim-madv_free-thp.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux