Re: [patch v2 ] mm/zs_malloc: Fix bit spinlock replacement

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2016-10-19 17:50:38 [+0200], Mike Galbraith wrote:
> 
> Do not alter HANDLE_SIZE, memory corruption ensues.  The handle is
> a pointer, allocate space for the struct it points to and align it
> ZS_ALIGN.  Also, when accessing the struct, mask HANDLE_PIN_BIT.
> 
> v2: mutex is only needed for PREEMPT_RT_FULL, with PREEMPT_RT_RTB,
> preemption is disabled when we take it...
> 
> Signed-off-by: Mike Galbraith <umgwanakikbuti@xxxxxxxxx>

The folded version:


From: Mike Galbraith <umgwanakikbuti@xxxxxxxxx>
Date: Tue, 22 Mar 2016 11:16:09 +0100
Subject: [PATCH] mm/zsmalloc: copy with get_cpu_var() and locking

get_cpu_var() disables preemption and triggers a might_sleep() splat later.
This is replaced with get_locked_var().
This bitspinlocks are replaced with a proper mutex which requires a slightly
larger struct to allocate.

Signed-off-by: Mike Galbraith <umgwanakikbuti@xxxxxxxxx>
[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
fixed the size magic]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
 mm/zsmalloc.c |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 6 deletions(-)

--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -53,6 +53,7 @@
 #include <linux/mount.h>
 #include <linux/migrate.h>
 #include <linux/pagemap.h>
+#include <linux/locallock.h>
 
 #define ZSPAGE_MAGIC	0x58
 
@@ -70,9 +71,22 @@
  */
 #define ZS_MAX_ZSPAGE_ORDER 2
 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
-
 #define ZS_HANDLE_SIZE (sizeof(unsigned long))
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+struct zsmalloc_handle {
+	unsigned long addr;
+	struct mutex lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
+
+#else
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
+#endif
+
 /*
  * Object location (<PFN>, <obj_idx>) is encoded as
  * as single (unsigned long) handle value.
@@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_p
 
 static int create_cache(struct zs_pool *pool)
 {
-	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
 					0, 0, NULL);
 	if (!pool->handle_cachep)
 		return 1;
@@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool
 
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
-	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+	void *p;
+
+	p = kmem_cache_alloc(pool->handle_cachep,
+			     gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+#ifdef CONFIG_PREEMPT_RT_FULL
+	if (p) {
+		struct zsmalloc_handle *zh = p;
+
+		mutex_init(&zh->lock);
+	}
+#endif
+	return (unsigned long)p;
 }
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+	return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
 {
 	kmem_cache_free(pool->handle_cachep, (void *)handle);
@@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_
 
 static void record_obj(unsigned long handle, unsigned long obj)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	WRITE_ONCE(zh->addr, obj);
+#else
 	/*
 	 * lsb of @obj represents handle lock while other bits
 	 * represent object value the handle is pointing so
 	 * updating shouldn't do store tearing.
 	 */
 	WRITE_ONCE(*(unsigned long *)handle, obj);
+#endif
 }
 
 /* zpool driver */
@@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc");
 
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
 
 static bool is_zspage_isolated(struct zspage *zspage)
 {
@@ -902,7 +940,13 @@ static unsigned long location_to_obj(str
 
 static unsigned long handle_to_obj(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return zh->addr;
+#else
 	return *(unsigned long *)handle;
+#endif
 }
 
 static unsigned long obj_to_head(struct page *page, void *obj)
@@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct
 
 static inline int testpin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return mutex_is_locked(&zh->lock);
+#else
 	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static inline int trypin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return mutex_trylock(&zh->lock);
+#else
 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void pin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return mutex_lock(&zh->lock);
+#else
 	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void unpin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return mutex_unlock(&zh->lock);
+#else
 	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void reset_page(struct page *page)
@@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool
 	class = pool->size_class[class_idx];
 	off = (class->size * obj_idx) & ~PAGE_MASK;
 
-	area = &get_cpu_var(zs_map_area);
+	area = &get_locked_var(zs_map_area_lock, zs_map_area);
 	area->vm_mm = mm;
 	if (off + class->size <= PAGE_SIZE) {
 		/* this object is contained entirely within a page */
@@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *poo
 
 		__zs_unmap_object(area, pages, off, class->size);
 	}
-	put_cpu_var(zs_map_area);
+	put_locked_var(zs_map_area_lock, zs_map_area);
 
 	migrate_read_unlock(zspage);
 	unpin_tag(handle);

Sebastian
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux