Re: [PATCH v4 6/8] hugetlb: batch PMD split for bulk vmemmap dedup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2023/9/19 07:01, Mike Kravetz wrote:
From: Joao Martins <joao.m.martins@xxxxxxxxxx>

In an effort to minimize amount of TLB flushes, batch all PMD splits
belonging to a range of pages in order to perform only 1 (global) TLB
flush.

Add a flags field to the walker and pass whether it's a bulk allocation
or just a single page to decide to remap. First value
(VMEMMAP_SPLIT_NO_TLB_FLUSH) designates the request to not do the TLB
flush when we split the PMD.

Rebased and updated by Mike Kravetz

Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
---
  mm/hugetlb_vmemmap.c | 79 +++++++++++++++++++++++++++++++++++++++++---
  1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 147ed15bcae4..e8bc2f7567db 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -27,6 +27,7 @@
   * @reuse_addr:		the virtual address of the @reuse_page page.
   * @vmemmap_pages:	the list head of the vmemmap pages that can be freed
   *			or is mapped from.
+ * @flags:		used to modify behavior in bulk operations
   */
  struct vmemmap_remap_walk {
  	void			(*remap_pte)(pte_t *pte, unsigned long addr,
@@ -35,9 +36,11 @@ struct vmemmap_remap_walk {
  	struct page		*reuse_page;
  	unsigned long		reuse_addr;
  	struct list_head	*vmemmap_pages;
+#define VMEMMAP_SPLIT_NO_TLB_FLUSH	BIT(0)

Please add a brief comment following this macro to explain what's the
behavior.

+	unsigned long		flags;
  };
-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
  {
  	pmd_t __pmd;
  	int i;
@@ -80,7 +83,8 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
  		/* Make pte visible before pmd. See comment in pmd_install(). */
  		smp_wmb();
  		pmd_populate_kernel(&init_mm, pmd, pgtable);
-		flush_tlb_kernel_range(start, start + PMD_SIZE);
+		if (flush)
+			flush_tlb_kernel_range(start, start + PMD_SIZE);
  	} else {
  		pte_free_kernel(&init_mm, pgtable);
  	}
@@ -127,11 +131,20 @@ static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
  	do {
  		int ret;
- ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
+		ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK,
+				walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH);

!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH)?

Thanks.

  		if (ret)
  			return ret;
next = pmd_addr_end(addr, end);
+
+		/*
+		 * We are only splitting, not remapping the hugetlb vmemmap
+		 * pages.
+		 */
+		if (!walk->remap_pte)
+			continue;
+
  		vmemmap_pte_range(pmd, addr, next, walk);
  	} while (pmd++, addr = next, addr != end);
@@ -198,7 +211,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
  			return ret;
  	} while (pgd++, addr = next, addr != end);
- flush_tlb_kernel_range(start, end);
+	if (walk->remap_pte)
+		flush_tlb_kernel_range(start, end);
return 0;
  }
@@ -300,6 +314,36 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
  	set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
  }
+/**
+ * vmemmap_remap_split - split the vmemmap virtual address range [@start, @end)
+ *                      backing PMDs of the directmap into PTEs
+ * @start:     start address of the vmemmap virtual address range that we want
+ *             to remap.
+ * @end:       end address of the vmemmap virtual address range that we want to
+ *             remap.
+ * @reuse:     reuse address.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+static int vmemmap_remap_split(unsigned long start, unsigned long end,
+				unsigned long reuse)
+{
+	int ret;
+	struct vmemmap_remap_walk walk = {
+		.remap_pte	= NULL,
+		.flags		= VMEMMAP_SPLIT_NO_TLB_FLUSH,
+	};
+
+	/* See the comment in the vmemmap_remap_free(). */
+	BUG_ON(start - reuse != PAGE_SIZE);
+
+	mmap_read_lock(&init_mm);
+	ret = vmemmap_remap_range(reuse, end, &walk);
+	mmap_read_unlock(&init_mm);
+
+	return ret;
+}
+
  /**
   * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end)
   *			to the page which @reuse is mapped to, then free vmemmap
@@ -323,6 +367,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
  		.remap_pte	= vmemmap_remap_pte,
  		.reuse_addr	= reuse,
  		.vmemmap_pages	= vmemmap_pages,
+		.flags		= 0,
  	};
  	int nid = page_to_nid((struct page *)reuse);
  	gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
@@ -371,6 +416,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
  			.remap_pte	= vmemmap_restore_pte,
  			.reuse_addr	= reuse,
  			.vmemmap_pages	= vmemmap_pages,
+			.flags		= 0,
  		};
vmemmap_remap_range(reuse, end, &walk);
@@ -422,6 +468,7 @@ static int vmemmap_remap_alloc(unsigned long start, unsigned long end,
  		.remap_pte	= vmemmap_restore_pte,
  		.reuse_addr	= reuse,
  		.vmemmap_pages	= &vmemmap_pages,
+		.flags		= 0,
  	};
/* See the comment in the vmemmap_remap_free(). */
@@ -630,11 +677,35 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
  	free_vmemmap_page_list(&vmemmap_pages);
  }
+static void hugetlb_vmemmap_split(const struct hstate *h, struct page *head)
+{
+	unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
+	unsigned long vmemmap_reuse;
+
+	if (!vmemmap_should_optimize(h, head))
+		return;
+
+	vmemmap_end	= vmemmap_start + hugetlb_vmemmap_size(h);
+	vmemmap_reuse	= vmemmap_start;
+	vmemmap_start	+= HUGETLB_VMEMMAP_RESERVE_SIZE;
+
+	/*
+	 * Split PMDs on the vmemmap virtual address range [@vmemmap_start,
+	 * @vmemmap_end]
+	 */
+	vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse);
+}
+
  void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
  {
  	struct folio *folio;
  	LIST_HEAD(vmemmap_pages);
+ list_for_each_entry(folio, folio_list, lru)
+		hugetlb_vmemmap_split(h, &folio->page);
+
+	flush_tlb_all();
+
  	list_for_each_entry(folio, folio_list, lru) {
  		int ret = __hugetlb_vmemmap_optimize(h, &folio->page,
  								&vmemmap_pages);





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux