[RFC 04/10] mm: allocate virtual space for process-local memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Implement first half of kmalloc and kfree for process-local memory,
which deals with allocating virtual address ranges in the process-local
memory area.

While the process-local mappings will be visible only in a
single address space, the address of each allocation is still unique to
aid in debugging (e.g., to see page faults instead of silent invalid
accesses). For that purpose, use a global allocator for virtual address
ranges of process-local mappings. Note that the single centralized lock
is good enough for our use case.

Signed-off-by: Marius Hillenbrand <mhillenb@xxxxxxxxx>
Cc: Alexander Graf <graf@xxxxxxxxx>
Cc: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
 arch/x86/mm/proclocal.c   |   7 +-
 include/linux/mm_types.h  |   4 +
 include/linux/proclocal.h |  19 +++++
 mm/proclocal.c            | 150 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/proclocal.c b/arch/x86/mm/proclocal.c
index c64a8ea6360d..dd641995cc9f 100644
--- a/arch/x86/mm/proclocal.c
+++ b/arch/x86/mm/proclocal.c
@@ -10,6 +10,8 @@
 #include <linux/set_memory.h>
 #include <asm/tlb.h>
 
+extern void handle_proclocal_page(struct mm_struct *mm, struct page *page,
+				  unsigned long addr);
 
 static void unmap_leftover_pages_pte(struct mm_struct *mm, pmd_t *pmd,
 				     unsigned long addr, unsigned long end,
@@ -27,6 +29,8 @@ static void unmap_leftover_pages_pte(struct mm_struct *mm, pmd_t *pmd,
 		pte_clear(mm, addr, pte);
 		set_direct_map_default_noflush(page);
 
+		/* callback to non-arch allocator */
+		handle_proclocal_page(mm, page, addr);
 		/*
 		 * scrub page contents. since mm teardown happens from a
 		 * different mm, we cannot just use the process-local virtual
@@ -126,6 +130,7 @@ static void arch_proclocal_teardown_pt(struct mm_struct *mm)
 
 void arch_proclocal_teardown_pages_and_pt(struct mm_struct *mm)
 {
-	unmap_free_leftover_proclocal_pages(mm);
+	if (mm->proclocal_nr_pages)
+		unmap_free_leftover_proclocal_pages(mm);
 	arch_proclocal_teardown_pt(mm);
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1cb9243dd299..4bd8737cc7a6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -510,6 +510,10 @@ struct mm_struct {
 		/* HMM needs to track a few things per mm */
 		struct hmm *hmm;
 #endif
+
+#ifdef CONFIG_PROCLOCAL
+		size_t proclocal_nr_pages;
+#endif
 	} __randomize_layout;
 
 	/*
diff --git a/include/linux/proclocal.h b/include/linux/proclocal.h
index 9dae140c0796..c408e0d1104c 100644
--- a/include/linux/proclocal.h
+++ b/include/linux/proclocal.h
@@ -8,8 +8,27 @@
 
 struct mm_struct;
 
+void *kmalloc_proclocal(size_t size);
+void *kzalloc_proclocal(size_t size);
+void kfree_proclocal(void *vaddr);
+
 void proclocal_mm_exit(struct mm_struct *mm);
 #else  /* !CONFIG_PROCLOCAL */
+static inline void *kmalloc_proclocal(size_t size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+static inline void * kzalloc_proclocal(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+static inline void kfree_proclocal(void *vaddr)
+{
+	kfree(vaddr);
+}
+
 static inline void proclocal_mm_exit(struct mm_struct *mm) { }
 #endif
 
diff --git a/mm/proclocal.c b/mm/proclocal.c
index 72c485c450bf..7a6217faf765 100644
--- a/mm/proclocal.c
+++ b/mm/proclocal.c
@@ -8,6 +8,7 @@
  *
  * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  */
+#include <linux/genalloc.h>
 #include <linux/mm.h>
 #include <linux/proclocal.h>
 #include <linux/set_memory.h>
@@ -18,6 +19,43 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
+static pte_t *pte_lookup_map(struct mm_struct *mm, unsigned long kvaddr)
+{
+	pgd_t *pgd = pgd_offset(mm, kvaddr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (IS_ERR_OR_NULL(pgd) || !pgd_present(*pgd))
+		return ERR_PTR(-1);
+
+	p4d = p4d_offset(pgd, kvaddr);
+	if (IS_ERR_OR_NULL(p4d) || !p4d_present(*p4d))
+		return ERR_PTR(-1);
+
+	pud = pud_offset(p4d, kvaddr);
+	if (IS_ERR_OR_NULL(pud) || !pud_present(*pud))
+		return ERR_PTR(-1);
+
+	pmd = pmd_offset(pud, kvaddr);
+	if (IS_ERR_OR_NULL(pmd) || !pmd_present(*pmd))
+		return ERR_PTR(-1);
+
+	return pte_offset_map(pmd, kvaddr);
+}
+
+static struct page *proclocal_find_first_page(struct mm_struct *mm, const void *kvaddr)
+{
+	pte_t *ptep = pte_lookup_map(mm, (unsigned long) kvaddr);
+
+	if(IS_ERR_OR_NULL(ptep))
+		return NULL;
+	if (!pte_present(*ptep))
+		return NULL;
+
+	return pfn_to_page(pte_pfn(*ptep));
+}
+
 void proclocal_release_pages(struct list_head *pages)
 {
 	struct page *pos, *n;
@@ -27,9 +65,121 @@ void proclocal_release_pages(struct list_head *pages)
 	}
 }
 
+static DEFINE_SPINLOCK(proclocal_lock);
+static struct gen_pool *allocator;
+
+static int proclocal_allocator_init(void)
+{
+	int rc;
+
+	allocator = gen_pool_create(PAGE_SHIFT, -1);
+	if (unlikely(IS_ERR(allocator)))
+		return PTR_ERR(allocator);
+	if (!allocator)
+		return -1;
+
+	rc = gen_pool_add(allocator, PROCLOCAL_START, PROCLOCAL_SIZE, -1);
+
+	if (rc)
+		gen_pool_destroy(allocator);
+
+	return rc;
+}
+late_initcall(proclocal_allocator_init);
+
+static void *alloc_virtual(size_t nr_pages)
+{
+	void *kvaddr;
+	spin_lock(&proclocal_lock);
+	kvaddr = (void *)gen_pool_alloc(allocator, nr_pages * PAGE_SIZE);
+	spin_unlock(&proclocal_lock);
+	return kvaddr;
+}
+
+static void free_virtual(const void *kvaddr, size_t nr_pages)
+{
+	spin_lock(&proclocal_lock);
+	gen_pool_free(allocator, (unsigned long)kvaddr,
+			     nr_pages * PAGE_SIZE);
+	spin_unlock(&proclocal_lock);
+}
+
+void *kmalloc_proclocal(size_t size)
+{
+	void *kvaddr = NULL;
+	size_t nr_pages = round_up(size, PAGE_SIZE) / PAGE_SIZE;
+	size_t nr_pages_virtual = nr_pages + 1; /* + guard page */
+
+	BUG_ON(!current);
+	if (!size)
+		return ZERO_SIZE_PTR;
+	might_sleep();
+
+	kvaddr = alloc_virtual(nr_pages_virtual);
+
+	if (IS_ERR_OR_NULL(kvaddr))
+		return kvaddr;
+
+	/* tbd: subsequent patch will allocate and map physical pages */
+
+	return kvaddr;
+}
+EXPORT_SYMBOL(kmalloc_proclocal);
+
+void * kzalloc_proclocal(size_t size)
+{
+	void * kvaddr = kmalloc_proclocal(size);
+
+	if (!IS_ERR_OR_NULL(kvaddr))
+		memset(kvaddr, 0, size);
+	return kvaddr;
+}
+EXPORT_SYMBOL(kzalloc_proclocal);
+
+void kfree_proclocal(void *kvaddr)
+{
+	struct page *first_page;
+	int nr_pages;
+	struct mm_struct *mm;
+
+	if (!kvaddr || kvaddr == ZERO_SIZE_PTR)
+		return;
+
+	pr_debug("kfree for %p (current %p mm %p)\n", kvaddr,
+		 current, current ? current->mm : 0);
+
+	BUG_ON((unsigned long)kvaddr < PROCLOCAL_START);
+	BUG_ON((unsigned long)kvaddr >= (PROCLOCAL_START + PROCLOCAL_SIZE));
+	BUG_ON(!current);
+
+	mm = current->mm;
+	down_write(&mm->mmap_sem);
+	first_page = proclocal_find_first_page(mm, kvaddr);
+	if (IS_ERR_OR_NULL(first_page)) {
+		pr_err("double-free?!\n");
+		BUG();
+	} /* double-free? */
+	nr_pages = first_page->proclocal_nr_pages;
+	BUG_ON(!nr_pages);
+	mm->proclocal_nr_pages -= nr_pages;
+	/* subsequent patch will unmap and release physical pages */
+	up_write(&mm->mmap_sem);
+
+	free_virtual(kvaddr, nr_pages + 1);
+}
+EXPORT_SYMBOL(kfree_proclocal);
+
 void proclocal_mm_exit(struct mm_struct *mm)
 {
 	pr_debug("proclocal_mm_exit for mm %p pgd %p (current is %p)\n", mm, mm->pgd, current);
 
 	arch_proclocal_teardown_pages_and_pt(mm);
 }
+
+void handle_proclocal_page(struct mm_struct *mm, struct page *page,
+				  unsigned long addr)
+{
+	if (page->proclocal_nr_pages) {
+		free_virtual((void *)addr, page->proclocal_nr_pages + 1);
+	}
+}
-- 
2.21.0




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux