[RFC PATCH 5/6] zsmalloc: introduce handle mapping API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Introduce new API to map/unmap zsmalloc handle/object.  The key
difference is that this API does not impose atomicity restrictions
on its users, unlike zs_map_object() which returns with page-faults
and preemption disabled - handle mapping API does not need a per-CPU
vm-area because the users are required to provide an aux buffer for
objects that span several physical pages.

Keep zs_map_object/zs_unmap_object for the time being, as there are
still users of it, but eventually old API will be removed.

Signed-off-by: Sergey Senozhatsky <senozhatsky@xxxxxxxxxxxx>
---
 include/linux/zsmalloc.h |  29 ++++++++
 mm/zsmalloc.c            | 148 ++++++++++++++++++++++++++++-----------
 2 files changed, 138 insertions(+), 39 deletions(-)

diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index a48cd0ffe57d..72d84537dd38 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -58,4 +58,33 @@ unsigned long zs_compact(struct zs_pool *pool);
 unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
 
 void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
+
+struct zs_handle_mapping {
+	unsigned long handle;
+	/* Points to start of the object data either within local_copy or
+	 * within local_mapping. This is what callers should use to access
+	 * or modify handle data.
+	 */
+	void *handle_mem;
+
+	enum zs_mapmode mode;
+	union {
+		/*
+		 * Handle object data copied, because it spans across several
+		 * (non-contiguous) physical pages. This pointer should be
+		 * set by the zs_map_handle() caller beforehand and should
+		 * never be accessed directly.
+		 */
+		void *local_copy;
+		/*
+		 * Handle object mapped directly. Should never be used
+		 * directly.
+		 */
+		void *local_mapping;
+	};
+};
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a5c1f9852072..281bba4a3277 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1132,18 +1132,14 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 	area->vm_buf = NULL;
 }
 
-static void *__zs_map_object(struct mapping_area *area,
-			struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyin(void *buf, struct zpdesc *zpdesc, int off, int size)
 {
+	struct zpdesc *zpdescs[2];
 	size_t sizes[2];
-	char *buf = area->vm_buf;
-
-	/* disable page faults to match kmap_local_page() return conditions */
-	pagefault_disable();
 
-	/* no read fastpath */
-	if (area->vm_mm == ZS_MM_WO)
-		goto out;
+	zpdescs[0] = zpdesc;
+	zpdescs[1] = get_next_zpdesc(zpdesc);
+	BUG_ON(!zpdescs[1]);
 
 	sizes[0] = PAGE_SIZE - off;
 	sizes[1] = size - sizes[0];
@@ -1151,21 +1147,17 @@ static void *__zs_map_object(struct mapping_area *area,
 	/* copy object to per-cpu buffer */
 	memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]);
 	memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]);
-out:
-	return area->vm_buf;
 }
 
-static void __zs_unmap_object(struct mapping_area *area,
-			struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyout(void *buf, struct zpdesc *zpdesc, int off, int size)
 {
+	struct zpdesc *zpdescs[2];
 	size_t sizes[2];
-	char *buf;
 
-	/* no write fastpath */
-	if (area->vm_mm == ZS_MM_RO)
-		goto out;
+	zpdescs[0] = zpdesc;
+	zpdescs[1] = get_next_zpdesc(zpdesc);
+	BUG_ON(!zpdescs[1]);
 
-	buf = area->vm_buf;
 	buf = buf + ZS_HANDLE_SIZE;
 	size -= ZS_HANDLE_SIZE;
 	off += ZS_HANDLE_SIZE;
@@ -1176,10 +1168,6 @@ static void __zs_unmap_object(struct mapping_area *area,
 	/* copy per-cpu buffer to object */
 	memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]);
 	memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]);
-
-out:
-	/* enable page faults to match kunmap_local() return conditions */
-	pagefault_enable();
 }
 
 static int zs_cpu_prepare(unsigned int cpu)
@@ -1260,6 +1248,8 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages);
  * against nested mappings.
  *
  * This function returns with preemption and page faults disabled.
+ *
+ * NOTE: this function is deprecated and will be removed.
  */
 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm)
@@ -1268,10 +1258,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	struct zpdesc *zpdesc;
 	unsigned long obj, off;
 	unsigned int obj_idx;
-
 	struct size_class *class;
 	struct mapping_area *area;
-	struct zpdesc *zpdescs[2];
 	void *ret;
 
 	/*
@@ -1309,12 +1297,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 		goto out;
 	}
 
-	/* this object spans two pages */
-	zpdescs[0] = zpdesc;
-	zpdescs[1] = get_next_zpdesc(zpdesc);
-	BUG_ON(!zpdescs[1]);
+	ret = area->vm_buf;
+	/* disable page faults to match kmap_local_page() return conditions */
+	pagefault_disable();
+	if (mm != ZS_MM_WO) {
+		/* this object spans two pages */
+		zs_obj_copyin(area->vm_buf, zpdesc, off, class->size);
+	}
 
-	ret = __zs_map_object(area, zpdescs, off, class->size);
 out:
 	if (likely(!ZsHugePage(zspage)))
 		ret += ZS_HANDLE_SIZE;
@@ -1323,13 +1313,13 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 }
 EXPORT_SYMBOL_GPL(zs_map_object);
 
+/* NOTE: this function is deprecated and will be removed. */
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 {
 	struct zspage *zspage;
 	struct zpdesc *zpdesc;
 	unsigned long obj, off;
 	unsigned int obj_idx;
-
 	struct size_class *class;
 	struct mapping_area *area;
 
@@ -1340,23 +1330,103 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 	off = offset_in_page(class->size * obj_idx);
 
 	area = this_cpu_ptr(&zs_map_area);
-	if (off + class->size <= PAGE_SIZE)
+	if (off + class->size <= PAGE_SIZE) {
 		kunmap_local(area->vm_addr);
-	else {
-		struct zpdesc *zpdescs[2];
+		goto out;
+	}
 
-		zpdescs[0] = zpdesc;
-		zpdescs[1] = get_next_zpdesc(zpdesc);
-		BUG_ON(!zpdescs[1]);
+	if (area->vm_mm != ZS_MM_RO)
+		zs_obj_copyout(area->vm_buf, zpdesc, off, class->size);
+	/* enable page faults to match kunmap_local() return conditions */
+	pagefault_enable();
 
-		__zs_unmap_object(area, zpdescs, off, class->size);
-	}
+out:
 	local_unlock(&zs_map_area.lock);
-
 	zspage_read_unlock(zspage);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+	struct zspage *zspage;
+	struct zpdesc *zpdesc;
+	unsigned long obj, off;
+	unsigned int obj_idx;
+	struct size_class *class;
+
+	obj = handle_to_obj(map->handle);
+	obj_to_location(obj, &zpdesc, &obj_idx);
+	zspage = get_zspage(zpdesc);
+	class = zspage_class(pool, zspage);
+	off = offset_in_page(class->size * obj_idx);
+
+	if (off + class->size <= PAGE_SIZE) {
+		kunmap_local(map->local_mapping);
+		goto out;
+	}
+
+	if (map->mode != ZS_MM_RO)
+		zs_obj_copyout(map->local_copy, zpdesc, off, class->size);
+
+out:
+	zspage_read_unlock(zspage);
+}
+EXPORT_SYMBOL_GPL(zs_unmap_handle);
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+	struct zspage *zspage;
+	struct zpdesc *zpdesc;
+	unsigned long obj, off;
+	unsigned int obj_idx;
+	struct size_class *class;
+
+	WARN_ON(in_interrupt());
+
+	/* It guarantees it can get zspage from handle safely */
+	pool_read_lock(pool);
+	obj = handle_to_obj(map->handle);
+	obj_to_location(obj, &zpdesc, &obj_idx);
+	zspage = get_zspage(zpdesc);
+
+	/*
+	 * migration cannot move any zpages in this zspage. Here, class->lock
+	 * is too heavy since callers would take some time until they calls
+	 * zs_unmap_object API so delegate the locking from class to zspage
+	 * which is smaller granularity.
+	 */
+	zspage_read_lock(zspage);
+	pool_read_unlock(pool);
+
+	class = zspage_class(pool, zspage);
+	off = offset_in_page(class->size * obj_idx);
+
+	if (off + class->size <= PAGE_SIZE) {
+		/* this object is contained entirely within a page */
+		map->local_mapping = kmap_local_zpdesc(zpdesc);
+		map->handle_mem = map->local_mapping + off;
+		goto out;
+	}
+
+	if (WARN_ON_ONCE(!map->local_copy)) {
+		zspage_read_unlock(zspage);
+		return -EINVAL;
+	}
+
+	map->handle_mem = map->local_copy;
+	if (map->mode != ZS_MM_WO) {
+		/* this object spans two pages */
+		zs_obj_copyin(map->local_copy, zpdesc, off, class->size);
+	}
+
+out:
+	if (likely(!ZsHugePage(zspage)))
+		map->handle_mem += ZS_HANDLE_SIZE;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(zs_map_handle);
+
 /**
  * zs_huge_class_size() - Returns the size (in bytes) of the first huge
  *                        zsmalloc &size_class.
-- 
2.48.1.262.g85cc9f2d1e-goog





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux