[PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead of splitting the compound page with FOLL_SPLIT, this patch allows
uprobe to only split pmd for huge pages.

A helper function mm_address_trans_huge(mm, address) was introduced to
test whether the address in mm is pointing to THP.

Signed-off-by: Song Liu <songliubraving@xxxxxx>
---
 include/linux/huge_mm.h |  8 ++++++++
 kernel/events/uprobes.c | 38 ++++++++++++++++++++++++++++++++------
 mm/huge_memory.c        | 24 ++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2d8a40fd06e4..4832d6580969 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -163,6 +163,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page, pgtable_t prealloc_pgtable);
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address);
+
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
 
@@ -302,6 +304,12 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct page *page,
 		pgtable_t prealloc_pgtable) {}
 
+static inline bool mm_address_trans_huge(struct mm_struct *mm,
+					 unsigned long address)
+{
+	return false;
+}
+
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ba49da99d2a2..56eeccc2f7a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
+#include <asm/pgalloc.h>
 
 #include <linux/uprobes.h>
 
@@ -153,7 +154,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct page_vma_mapped_walk pvmw = {
-		.page = old_page,
+		.page = compound_head(old_page),
 		.vma = vma,
 		.address = addr,
 	};
@@ -165,8 +166,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
 				addr + PAGE_SIZE);
 
-	VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
 	if (!orig) {
 		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
 					    &memcg, false);
@@ -188,7 +187,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	get_page(new_page);
 	if (orig) {
-		page_add_file_rmap(new_page, false);
+		page_add_file_rmap(compound_head(new_page),
+				   PageTransHuge(compound_head(new_page)));
 		inc_mm_counter(mm, mm_counter_file(new_page));
 		dec_mm_counter(mm, MM_ANONPAGES);
 	} else {
@@ -207,7 +207,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	set_pte_at_notify(mm, addr, pvmw.pte,
 			mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(old_page, false);
+	page_remove_rmap(compound_head(old_page),
+			 PageTransHuge(compound_head(old_page)));
 	if (!page_mapped(old_page))
 		try_to_free_swap(old_page);
 	page_vma_mapped_walk_done(&pvmw);
@@ -475,17 +476,42 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	int ret, is_register, ref_ctr_updated = 0;
 	pgoff_t index;
+	pgtable_t prealloc_pgtable = NULL;
+	unsigned long foll_flags = FOLL_FORCE;
 
 	is_register = is_swbp_insn(&opcode);
 	uprobe = container_of(auprobe, struct uprobe, arch);
 
+	/* do not FOLL_SPLIT yet */
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+			foll_flags, &old_page, &vma, NULL);
+
+	if (ret <= 0)
+		return ret;
+
+	if (mm_address_trans_huge(mm, vaddr)) {
+		prealloc_pgtable = pte_alloc_one(mm);
+		if (likely(prealloc_pgtable)) {
+			split_huge_pmd_address(vma, vaddr, false, NULL,
+					       prealloc_pgtable);
+			goto verify;
+		} else {
+			/* fallback to FOLL_SPLIT */
+			foll_flags |= FOLL_SPLIT;
+			put_page(old_page);
+		}
+	} else {
+		goto verify;
+	}
+
 retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-			FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+			foll_flags, &old_page, &vma, NULL);
 	if (ret <= 0)
 		return ret;
 
+verify:
 	ret = verify_opcode(old_page, vaddr, &opcode);
 	if (ret <= 0)
 		goto put_old;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dcb0e30213af..4714871353c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2360,6 +2360,30 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable);
 }
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		return false;
+
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return false;
+
+	pud = pud_offset(p4d, address);
+	if (!pud_present(*pud))
+		return false;
+
+	pmd = pmd_offset(pud, address);
+
+	return pmd_trans_huge(*pmd);
+}
+
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
 			     unsigned long start,
 			     unsigned long end,
-- 
2.17.1




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux