Re: [kvm-unit-tests PATCH v1 05/12] lib/alloc_page: fix and improve the page allocator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 1/4/21 5:11 AM, Claudio Imbrenda wrote:
On Thu, 24 Dec 2020 10:17:06 -0800
Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> wrote:

On 12/16/20 12:11 PM, Claudio Imbrenda wrote:
This patch introduces some improvements to the code, mostly
readability improvements, but also some semantic details, and
improvements in the documentation.

* introduce and use pfn_t to semantically tag parameters as PFNs
* remove the PFN macro, use virt_to_pfn instead
* rename area_or_metadata_contains and area_contains to
area_contains_pfn and usable_area_contains_pfn respectively
* fix/improve comments in lib/alloc_page.h
* move some wrapper functions to the header

Fixes: 8131e91a4b61 ("lib/alloc_page: complete rewrite of the page
allocator") Fixes: 34c950651861 ("lib/alloc_page: allow reserving
arbitrary memory ranges")

Signed-off-by: Claudio Imbrenda<imbrenda@xxxxxxxxxxxxx>
---
   lib/alloc_page.h |  49 +++++++++-----
   lib/alloc_page.c | 165
+++++++++++++++++++++++------------------------ 2 files changed,
116 insertions(+), 98 deletions(-)

diff --git a/lib/alloc_page.h b/lib/alloc_page.h
index b6aace5..d8550c6 100644
--- a/lib/alloc_page.h
+++ b/lib/alloc_page.h
@@ -8,6 +8,7 @@
   #ifndef ALLOC_PAGE_H
   #define ALLOC_PAGE_H 1
+#include <stdbool.h>
   #include <asm/memory_areas.h>
#define AREA_ANY -1
@@ -23,7 +24,7 @@ bool page_alloc_initialized(void);
    * top_pfn is the physical frame number of the first page
immediately after
    * the end of the area to initialize
    */
-void page_alloc_init_area(u8 n, uintptr_t base_pfn, uintptr_t
top_pfn); +void page_alloc_init_area(u8 n, phys_addr_t base_pfn,
phys_addr_t top_pfn);
   /* Enables the page allocator. At least one area must have been
initialized */ void page_alloc_ops_enable(void);
@@ -37,9 +38,12 @@ void *memalign_pages_area(unsigned int areas,
size_t alignment, size_t size);
   /*
    * Allocate aligned memory from any area.
- * Equivalent to memalign_pages_area(~0, alignment, size).
+ * Equivalent to memalign_pages_area(AREA_ANY, alignment, size).
    */
-void *memalign_pages(size_t alignment, size_t size);
+static inline void *memalign_pages(size_t alignment, size_t size)
+{
+	return memalign_pages_area(AREA_ANY, alignment, size);
+}
/*
    * Allocate naturally aligned memory from the specified areas.
@@ -48,16 +52,22 @@ void *memalign_pages(size_t alignment, size_t
size); void *alloc_pages_area(unsigned int areas, unsigned int
order);
   /*
- * Allocate one page from any area.
- * Equivalent to alloc_pages(0);
+ * Allocate naturally aligned memory from any area.

This one allocates page size memory and the comment should reflect
that.
I'll fix the comment
+ * Equivalent to alloc_pages_area(AREA_ANY, order);
    */
-void *alloc_page(void);
+static inline void *alloc_pages(unsigned int order)
+{
+	return alloc_pages_area(AREA_ANY, order);
+}
/*
- * Allocate naturally aligned memory from any area.
- * Equivalent to alloc_pages_area(~0, order);
+ * Allocate one page from any area.
+ * Equivalent to alloc_pages(0);
    */
-void *alloc_pages(unsigned int order);
+static inline void *alloc_page(void)
+{
+	return alloc_pages(0);
+}
/*
    * Frees a memory block allocated with any of the memalign_pages*
or @@ -66,23 +76,32 @@ void *alloc_pages(unsigned int order);
    */
   void free_pages(void *mem);
-/* For backwards compatibility */
+/*
+ * Free one page.
+ * Equivalent to free_pages(mem).
+ */
   static inline void free_page(void *mem)
   {
   	return free_pages(mem);
   }
-/* For backwards compatibility */
+/*
+ * Free pages by order.
+ * Equivalent to free_pages(mem).
+ */
   static inline void free_pages_by_order(void *mem, unsigned int
order) {
   	free_pages(mem);
   }
/*
- * Allocates and reserves the specified memory range if possible.
- * Returns NULL in case of failure.
+ * Allocates and reserves the specified physical memory range if
possible.
+ * If the specified range cannot be reserved in its entirety, no
action is
+ * performed and false is returned.
+ *
+ * Returns true in case of success, false otherwise.
    */
-void *alloc_pages_special(uintptr_t addr, size_t npages);
+bool alloc_pages_special(phys_addr_t addr, size_t npages);
/*
    * Frees a reserved memory range that had been reserved with
@@ -91,6 +110,6 @@ void *alloc_pages_special(uintptr_t addr, size_t
npages);
    * exactly, it can also be a subset, in which case only the
specified
    * pages will be freed and unreserved.
    */
-void free_pages_special(uintptr_t addr, size_t npages);
+void free_pages_special(phys_addr_t addr, size_t npages);
#endif
diff --git a/lib/alloc_page.c b/lib/alloc_page.c
index ed0ff02..8d2700d 100644
--- a/lib/alloc_page.c
+++ b/lib/alloc_page.c
@@ -17,25 +17,29 @@
#define IS_ALIGNED_ORDER(x,order) IS_ALIGNED((x),BIT_ULL(order))
   #define NLISTS ((BITS_PER_LONG) - (PAGE_SHIFT))
-#define PFN(x) ((uintptr_t)(x) >> PAGE_SHIFT)
#define ORDER_MASK 0x3f
   #define ALLOC_MASK	0x40
   #define SPECIAL_MASK	0x80
+typedef phys_addr_t pfn_t;
+
   struct mem_area {
   	/* Physical frame number of the first usable frame in the
area */
-	uintptr_t base;
+	pfn_t base;
   	/* Physical frame number of the first frame outside the
area */
-	uintptr_t top;
-	/* Combination of SPECIAL_MASK, ALLOC_MASK, and order */
+	pfn_t top;
+	/* Per page metadata, each entry is a combination *_MASK
and order */ u8 *page_states;
   	/* One freelist for each possible block size, up to
NLISTS */ struct linked_list freelists[NLISTS];
   };
+/* Descriptors for each possible area */
   static struct mem_area areas[MAX_AREAS];
+/* Mask of initialized areas */
   static unsigned int areas_mask;
+/* Protects areas and areas mask */
   static struct spinlock lock;
bool page_alloc_initialized(void)
@@ -43,12 +47,24 @@ bool page_alloc_initialized(void)
   	return areas_mask != 0;
   }
-static inline bool area_or_metadata_contains(struct mem_area *a,
uintptr_t pfn) +/*
+ * Each memory area contains an array of metadata entries at the
very
+ * beginning. The usable memory follows immediately afterwards.
+ * This function returns true if the given pfn falls anywhere
within the
+ * memory area, including the metadata area.
+ */
+static inline bool area_contains_pfn(struct mem_area *a, pfn_t pfn)
   {
-	return (pfn >= PFN(a->page_states)) && (pfn < a->top);
+	return (pfn >= virt_to_pfn(a->page_states)) && (pfn <
a->top); }
-static inline bool area_contains(struct mem_area *a, uintptr_t pfn)
+/*
+ * Each memory area contains an array of metadata entries at the
very
+ * beginning. The usable memory follows immediately afterwards.
+ * This function returns true if the given pfn falls in the usable
range of
+ * the given memory area.
+ */
+static inline bool usable_area_contains_pfn(struct mem_area *a,
pfn_t pfn) {
   	return (pfn >= a->base) && (pfn < a->top);
   }
@@ -69,21 +85,19 @@ static inline bool area_contains(struct
mem_area *a, uintptr_t pfn) */
   static void split(struct mem_area *a, void *addr)
   {
-	uintptr_t pfn = PFN(addr);
-	struct linked_list *p;
-	uintptr_t i, idx;
+	pfn_t pfn = virt_to_pfn(addr);
+	pfn_t i, idx;
   	u8 order;
- assert(a && area_contains(a, pfn));
+	assert(a && usable_area_contains_pfn(a, pfn));
   	idx = pfn - a->base;
   	order = a->page_states[idx];
   	assert(!(order & ~ORDER_MASK) && order && (order <
NLISTS)); assert(IS_ALIGNED_ORDER(pfn, order));
-	assert(area_contains(a, pfn + BIT(order) - 1));
+	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
/* Remove the block from its free list */
-	p = list_remove(addr);
-	assert(p);
+	list_remove(addr);
/* update the block size for each page in the block */
   	for (i = 0; i < BIT(order); i++) {
@@ -92,9 +106,9 @@ static void split(struct mem_area *a, void *addr)
   	}
   	order--;
   	/* add the first half block to the appropriate free list
*/
-	list_add(a->freelists + order, p);
+	list_add(a->freelists + order, addr);
   	/* add the second half block to the appropriate free list
*/
-	list_add(a->freelists + order, (void *)((pfn + BIT(order))
* PAGE_SIZE));
+	list_add(a->freelists + order, pfn_to_virt(pfn +
BIT(order))); }
/*
@@ -105,7 +119,7 @@ static void split(struct mem_area *a, void
*addr) */
   static void *page_memalign_order(struct mem_area *a, u8 al, u8 sz)
   {
-	struct linked_list *p, *res = NULL;
+	struct linked_list *p;
   	u8 order;
assert((al < NLISTS) && (sz < NLISTS));
@@ -130,17 +144,17 @@ static void *page_memalign_order(struct
mem_area *a, u8 al, u8 sz) for (; order > sz; order--)
   		split(a, p);
- res = list_remove(p);
-	memset(a->page_states + (PFN(res) - a->base), ALLOC_MASK |
order, BIT(order));
-	return res;
+	list_remove(p);
+	memset(a->page_states + (virt_to_pfn(p) - a->base),
ALLOC_MASK | order, BIT(order));
+	return p;
   }
-static struct mem_area *get_area(uintptr_t pfn)
+static struct mem_area *get_area(pfn_t pfn)
   {
   	uintptr_t i;
for (i = 0; i < MAX_AREAS; i++)
-		if ((areas_mask & BIT(i)) && area_contains(areas +
i, pfn))
+		if ((areas_mask & BIT(i)) &&
usable_area_contains_pfn(areas + i, pfn)) return areas + i;
   	return NULL;
   }
@@ -160,17 +174,16 @@ static struct mem_area *get_area(uintptr_t
pfn)
    * - all of the pages of the two blocks must have the same block
size
    * - the function is called with the lock held
    */
-static bool coalesce(struct mem_area *a, u8 order, uintptr_t pfn,
uintptr_t pfn2) +static bool coalesce(struct mem_area *a, u8 order,
pfn_t pfn, pfn_t pfn2) {
-	uintptr_t first, second, i;
-	struct linked_list *li;
+	pfn_t first, second, i;
assert(IS_ALIGNED_ORDER(pfn, order) &&
IS_ALIGNED_ORDER(pfn2, order)); assert(pfn2 == pfn + BIT(order));
   	assert(a);
/* attempting to coalesce two blocks that belong to
different areas */
-	if (!area_contains(a, pfn) || !area_contains(a, pfn2 +
BIT(order) - 1))
+	if (!usable_area_contains_pfn(a, pfn) ||
!usable_area_contains_pfn(a, pfn2 + BIT(order) - 1)) return false;
   	first = pfn - a->base;
   	second = pfn2 - a->base;
@@ -179,17 +192,15 @@ static bool coalesce(struct mem_area *a, u8
order, uintptr_t pfn, uintptr_t pfn2 return false;
/* we can coalesce, remove both blocks from their
freelists */
-	li = list_remove((void *)(pfn2 << PAGE_SHIFT));
-	assert(li);
-	li = list_remove((void *)(pfn << PAGE_SHIFT));
-	assert(li);
+	list_remove(pfn_to_virt(pfn2));
+	list_remove(pfn_to_virt(pfn));
   	/* check the metadata entries and update with the new
size */ for (i = 0; i < (2ull << order); i++) {
   		assert(a->page_states[first + i] == order);
   		a->page_states[first + i] = order + 1;
   	}
   	/* finally add the newly coalesced block to the
appropriate freelist */
-	list_add(a->freelists + order + 1, li);
+	list_add(a->freelists + order + 1, pfn_to_virt(pfn));
   	return true;
   }
@@ -209,7 +220,7 @@ static bool coalesce(struct mem_area *a, u8
order, uintptr_t pfn, uintptr_t pfn2 */
   static void _free_pages(void *mem)
   {
-	uintptr_t pfn2, pfn = PFN(mem);
+	pfn_t pfn2, pfn = virt_to_pfn(mem);
   	struct mem_area *a = NULL;
   	uintptr_t i, p;
   	u8 order;
@@ -232,7 +243,7 @@ static void _free_pages(void *mem)
   	/* ensure that the block is aligned properly for its size
*/ assert(IS_ALIGNED_ORDER(pfn, order));
   	/* ensure that the area can contain the whole block */
-	assert(area_contains(a, pfn + BIT(order) - 1));
+	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
for (i = 0; i < BIT(order); i++) {
   		/* check that all pages of the block have
consistent metadata */ @@ -268,63 +279,68 @@ void free_pages(void
*mem) spin_unlock(&lock);
   }
-static void *_alloc_page_special(uintptr_t addr)
+static bool _alloc_page_special(pfn_t pfn)
   {
   	struct mem_area *a;
-	uintptr_t mask, i;
+	pfn_t mask, i;
- a = get_area(PFN(addr));
-	assert(a);
-	i = PFN(addr) - a->base;
+	a = get_area(pfn);
+	if (!a)
+		return false;
+	i = pfn - a->base;
   	if (a->page_states[i] & (ALLOC_MASK | SPECIAL_MASK))
-		return NULL;
+		return false;
   	while (a->page_states[i]) {
-		mask = GENMASK_ULL(63, PAGE_SHIFT +
a->page_states[i]);
-		split(a, (void *)(addr & mask));
+		mask = GENMASK_ULL(63, a->page_states[i]);
+		split(a, pfn_to_virt(pfn & mask));
   	}
   	a->page_states[i] = SPECIAL_MASK;
-	return (void *)addr;
+	return true;
   }
-static void _free_page_special(uintptr_t addr)
+static void _free_page_special(pfn_t pfn)
   {
   	struct mem_area *a;
-	uintptr_t i;
+	pfn_t i;
- a = get_area(PFN(addr));
+	a = get_area(pfn);
   	assert(a);
-	i = PFN(addr) - a->base;
+	i = pfn - a->base;
   	assert(a->page_states[i] == SPECIAL_MASK);
   	a->page_states[i] = ALLOC_MASK;
-	_free_pages((void *)addr);
+	_free_pages(pfn_to_virt(pfn));
   }
-void *alloc_pages_special(uintptr_t addr, size_t n)
+bool alloc_pages_special(phys_addr_t addr, size_t n)

The convention for these alloc functions seems to be that of
returning void *. For example, alloc_pages_area(), alloc_pages() etc.
  Probably we should maintain the convention or change all of their
return type.
what if you try to allocate memory that is not directly addressable?
(e.g. on x86_32)

you pass a phys_addr_t and it succeeds, but you can't get a pointer to
it, how should I indicate success/failure?


The function can perhaps return an error code via a parameter to indicate why NULL was returned.


   {
-	uintptr_t i;
+	pfn_t pfn;
+	size_t i;
assert(IS_ALIGNED(addr, PAGE_SIZE));
+	pfn = addr >> PAGE_SHIFT;
   	spin_lock(&lock);
   	for (i = 0; i < n; i++)
-		if (!_alloc_page_special(addr + i * PAGE_SIZE))
+		if (!_alloc_page_special(pfn + i))

Can the PFN macro be used here instead of the 'pfn' variable ?
I remove the PFN macro in this patch, also addr is not a virtual
address.

   			break;
   	if (i < n) {
   		for (n = 0 ; n < i; n++)
-			_free_page_special(addr + n * PAGE_SIZE);
-		addr = 0;
+			_free_page_special(pfn + n);
+		n = 0;
   	}
   	spin_unlock(&lock);
-	return (void *)addr;
+	return n;
   }
-void free_pages_special(uintptr_t addr, size_t n)
+void free_pages_special(phys_addr_t addr, size_t n)
   {
-	uintptr_t i;
+	pfn_t pfn;
+	size_t i;
assert(IS_ALIGNED(addr, PAGE_SIZE));
+	pfn = addr >> PAGE_SHIFT;
   	spin_lock(&lock);
   	for (i = 0; i < n; i++)
-		_free_page_special(addr + i * PAGE_SIZE);
+		_free_page_special(pfn + i);

Can the PFN macro be used here instead of the 'pfn' variable ?
same as above

   	spin_unlock(&lock);
   }
@@ -351,11 +367,6 @@ void *alloc_pages_area(unsigned int area,
unsigned int order) return page_memalign_order_area(area, order,
order); }
-void *alloc_pages(unsigned int order)
-{
-	return alloc_pages_area(AREA_ANY, order);
-}
-
   /*
    * Allocates (1 << order) physically contiguous aligned pages.
    * Returns NULL if the allocation was not possible.
@@ -370,18 +381,6 @@ void *memalign_pages_area(unsigned int area,
size_t alignment, size_t size) return
page_memalign_order_area(area, size, alignment); }
-void *memalign_pages(size_t alignment, size_t size)
-{
-	return memalign_pages_area(AREA_ANY, alignment, size);
-}
-
-/*
- * Allocates one page
- */
-void *alloc_page()
-{
-	return alloc_pages(0);
-}
static struct alloc_ops page_alloc_ops = {
   	.memalign = memalign_pages,
@@ -416,7 +415,7 @@ void page_alloc_ops_enable(void)
    * - the memory area to add does not overlap with existing areas
    * - the memory area to add has at least 5 pages available
    */
-static void _page_alloc_init_area(u8 n, uintptr_t start_pfn,
uintptr_t top_pfn) +static void _page_alloc_init_area(u8 n, pfn_t
start_pfn, pfn_t top_pfn) {
   	size_t table_size, npages, i;
   	struct mem_area *a;
@@ -437,7 +436,7 @@ static void _page_alloc_init_area(u8 n,
uintptr_t start_pfn, uintptr_t top_pfn)
   	/* fill in the values of the new area */
   	a = areas + n;
-	a->page_states = (void *)(start_pfn << PAGE_SHIFT);
+	a->page_states = pfn_to_virt(start_pfn);
   	a->base = start_pfn + table_size;
   	a->top = top_pfn;
   	npages = top_pfn - a->base;
@@ -447,14 +446,14 @@ static void _page_alloc_init_area(u8 n,
uintptr_t start_pfn, uintptr_t top_pfn) for (i = 0; i < MAX_AREAS;
i++) { if (!(areas_mask & BIT(i)))
   			continue;
-		assert(!area_or_metadata_contains(areas + i,
start_pfn));
-		assert(!area_or_metadata_contains(areas + i,
top_pfn - 1));
-		assert(!area_or_metadata_contains(a,
PFN(areas[i].page_states)));
-		assert(!area_or_metadata_contains(a, areas[i].top
- 1));
+		assert(!area_contains_pfn(areas + i, start_pfn));
+		assert(!area_contains_pfn(areas + i, top_pfn - 1));
+		assert(!area_contains_pfn(a,
virt_to_pfn(areas[i].page_states)));
+		assert(!area_contains_pfn(a, areas[i].top - 1));
   	}
   	/* initialize all freelists for the new area */
   	for (i = 0; i < NLISTS; i++)
-		a->freelists[i].next = a->freelists[i].prev =
a->freelists + i;
+		a->freelists[i].prev = a->freelists[i].next =
a->freelists + i;
   	/* initialize the metadata for the available memory */
   	for (i = a->base; i < a->top; i += 1ull << order) {
@@ -473,13 +472,13 @@ static void _page_alloc_init_area(u8 n,
uintptr_t start_pfn, uintptr_t top_pfn) assert(order < NLISTS);
   		/* initialize the metadata and add to the
freelist */ memset(a->page_states + (i - a->base), order,
BIT(order));
-		list_add(a->freelists + order, (void *)(i <<
PAGE_SHIFT));
+		list_add(a->freelists + order, pfn_to_virt(i));
   	}
   	/* finally mark the area as present */
   	areas_mask |= BIT(n);
   }
-static void __page_alloc_init_area(u8 n, uintptr_t cutoff,
uintptr_t base_pfn, uintptr_t *top_pfn) +static void
__page_alloc_init_area(u8 n, pfn_t cutoff, pfn_t base_pfn, pfn_t
*top_pfn) { if (*top_pfn > cutoff) {
   		spin_lock(&lock);
@@ -500,7 +499,7 @@ static void __page_alloc_init_area(u8 n,
uintptr_t cutoff, uintptr_t base_pfn, u
    * Prerequisites:
    * see _page_alloc_init_area
    */
-void page_alloc_init_area(u8 n, uintptr_t base_pfn, uintptr_t
top_pfn) +void page_alloc_init_area(u8 n, phys_addr_t base_pfn,
phys_addr_t top_pfn) {
   	if (n != AREA_ANY_NUMBER) {
   		__page_alloc_init_area(n, 0, base_pfn, &top_pfn);



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux