Introduce new API to map/unmap zsmalloc handle/object. The key difference is that this API does not impose atomicity restrictions on its users, unlike zs_map_object() which returns with page-faults and preemption disabled - handle mapping API does not need a per-CPU vm-area because the users are required to provide an aux buffer for objects that span several physical pages. Keep zs_map_object/zs_unmap_object for the time being, as there are still users of it, but eventually old API will be removed. Signed-off-by: Sergey Senozhatsky <senozhatsky@xxxxxxxxxxxx> --- include/linux/zsmalloc.h | 29 ++++++++ mm/zsmalloc.c | 148 ++++++++++++++++++++++++++++----------- 2 files changed, 138 insertions(+), 39 deletions(-) diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..72d84537dd38 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -58,4 +58,33 @@ unsigned long zs_compact(struct zs_pool *pool); unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); + +struct zs_handle_mapping { + unsigned long handle; + /* Points to start of the object data either within local_copy or + * within local_mapping. This is what callers should use to access + * or modify handle data. + */ + void *handle_mem; + + enum zs_mapmode mode; + union { + /* + * Handle object data copied, because it spans across several + * (non-contiguous) physical pages. This pointer should be + * set by the zs_map_handle() caller beforehand and should + * never be accessed directly. + */ + void *local_copy; + /* + * Handle object mapped directly. Should never be used + * directly. + */ + void *local_mapping; + }; +}; + +int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map); +void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map); + #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a5c1f9852072..281bba4a3277 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1132,18 +1132,14 @@ static inline void __zs_cpu_down(struct mapping_area *area) area->vm_buf = NULL; } -static void *__zs_map_object(struct mapping_area *area, - struct zpdesc *zpdescs[2], int off, int size) +static void zs_obj_copyin(void *buf, struct zpdesc *zpdesc, int off, int size) { + struct zpdesc *zpdescs[2]; size_t sizes[2]; - char *buf = area->vm_buf; - - /* disable page faults to match kmap_local_page() return conditions */ - pagefault_disable(); - /* no read fastpath */ - if (area->vm_mm == ZS_MM_WO) - goto out; + zpdescs[0] = zpdesc; + zpdescs[1] = get_next_zpdesc(zpdesc); + BUG_ON(!zpdescs[1]); sizes[0] = PAGE_SIZE - off; sizes[1] = size - sizes[0]; @@ -1151,21 +1147,17 @@ static void *__zs_map_object(struct mapping_area *area, /* copy object to per-cpu buffer */ memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]); memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]); -out: - return area->vm_buf; } -static void __zs_unmap_object(struct mapping_area *area, - struct zpdesc *zpdescs[2], int off, int size) +static void zs_obj_copyout(void *buf, struct zpdesc *zpdesc, int off, int size) { + struct zpdesc *zpdescs[2]; size_t sizes[2]; - char *buf; - /* no write fastpath */ - if (area->vm_mm == ZS_MM_RO) - goto out; + zpdescs[0] = zpdesc; + zpdescs[1] = get_next_zpdesc(zpdesc); + BUG_ON(!zpdescs[1]); - buf = area->vm_buf; buf = buf + ZS_HANDLE_SIZE; size -= ZS_HANDLE_SIZE; off += ZS_HANDLE_SIZE; @@ -1176,10 +1168,6 @@ static void __zs_unmap_object(struct mapping_area *area, /* copy per-cpu buffer to object */ memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]); memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]); - -out: - /* enable page faults to match kunmap_local() return conditions */ - pagefault_enable(); } static int zs_cpu_prepare(unsigned int cpu) @@ -1260,6 +1248,8 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages); * against nested mappings. * * This function returns with preemption and page faults disabled. + * + * NOTE: this function is deprecated and will be removed. */ void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm) @@ -1268,10 +1258,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, struct zpdesc *zpdesc; unsigned long obj, off; unsigned int obj_idx; - struct size_class *class; struct mapping_area *area; - struct zpdesc *zpdescs[2]; void *ret; /* @@ -1309,12 +1297,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, goto out; } - /* this object spans two pages */ - zpdescs[0] = zpdesc; - zpdescs[1] = get_next_zpdesc(zpdesc); - BUG_ON(!zpdescs[1]); + ret = area->vm_buf; + /* disable page faults to match kmap_local_page() return conditions */ + pagefault_disable(); + if (mm != ZS_MM_WO) { + /* this object spans two pages */ + zs_obj_copyin(area->vm_buf, zpdesc, off, class->size); + } - ret = __zs_map_object(area, zpdescs, off, class->size); out: if (likely(!ZsHugePage(zspage))) ret += ZS_HANDLE_SIZE; @@ -1323,13 +1313,13 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, } EXPORT_SYMBOL_GPL(zs_map_object); +/* NOTE: this function is deprecated and will be removed. */ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) { struct zspage *zspage; struct zpdesc *zpdesc; unsigned long obj, off; unsigned int obj_idx; - struct size_class *class; struct mapping_area *area; @@ -1340,23 +1330,103 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) off = offset_in_page(class->size * obj_idx); area = this_cpu_ptr(&zs_map_area); - if (off + class->size <= PAGE_SIZE) + if (off + class->size <= PAGE_SIZE) { kunmap_local(area->vm_addr); - else { - struct zpdesc *zpdescs[2]; + goto out; + } - zpdescs[0] = zpdesc; - zpdescs[1] = get_next_zpdesc(zpdesc); - BUG_ON(!zpdescs[1]); + if (area->vm_mm != ZS_MM_RO) + zs_obj_copyout(area->vm_buf, zpdesc, off, class->size); + /* enable page faults to match kunmap_local() return conditions */ + pagefault_enable(); - __zs_unmap_object(area, zpdescs, off, class->size); - } +out: local_unlock(&zs_map_area.lock); - zspage_read_unlock(zspage); } EXPORT_SYMBOL_GPL(zs_unmap_object); +void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map) +{ + struct zspage *zspage; + struct zpdesc *zpdesc; + unsigned long obj, off; + unsigned int obj_idx; + struct size_class *class; + + obj = handle_to_obj(map->handle); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); + class = zspage_class(pool, zspage); + off = offset_in_page(class->size * obj_idx); + + if (off + class->size <= PAGE_SIZE) { + kunmap_local(map->local_mapping); + goto out; + } + + if (map->mode != ZS_MM_RO) + zs_obj_copyout(map->local_copy, zpdesc, off, class->size); + +out: + zspage_read_unlock(zspage); +} +EXPORT_SYMBOL_GPL(zs_unmap_handle); + +int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map) +{ + struct zspage *zspage; + struct zpdesc *zpdesc; + unsigned long obj, off; + unsigned int obj_idx; + struct size_class *class; + + WARN_ON(in_interrupt()); + + /* It guarantees it can get zspage from handle safely */ + pool_read_lock(pool); + obj = handle_to_obj(map->handle); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); + + /* + * migration cannot move any zpages in this zspage. Here, class->lock + * is too heavy since callers would take some time until they calls + * zs_unmap_object API so delegate the locking from class to zspage + * which is smaller granularity. + */ + zspage_read_lock(zspage); + pool_read_unlock(pool); + + class = zspage_class(pool, zspage); + off = offset_in_page(class->size * obj_idx); + + if (off + class->size <= PAGE_SIZE) { + /* this object is contained entirely within a page */ + map->local_mapping = kmap_local_zpdesc(zpdesc); + map->handle_mem = map->local_mapping + off; + goto out; + } + + if (WARN_ON_ONCE(!map->local_copy)) { + zspage_read_unlock(zspage); + return -EINVAL; + } + + map->handle_mem = map->local_copy; + if (map->mode != ZS_MM_WO) { + /* this object spans two pages */ + zs_obj_copyin(map->local_copy, zpdesc, off, class->size); + } + +out: + if (likely(!ZsHugePage(zspage))) + map->handle_mem += ZS_HANDLE_SIZE; + + return 0; +} +EXPORT_SYMBOL_GPL(zs_map_handle); + /** * zs_huge_class_size() - Returns the size (in bytes) of the first huge * zsmalloc &size_class. -- 2.48.1.262.g85cc9f2d1e-goog