The zspage consists of multiple objects, each containing a "link struct" to connect to the next object. The "link struct" is placed at the beginning of each object. On a 32-bit system, the size of the "link struct" is 4 bytes, which means the address returned by `zs_map_object` is always 4-byte aligned. For better performance, zram compression/decompression is offloaded to hardware designed by the IC vendor. For example, Realtek's hardware requires 16-byte alignment. However, due to the 4-byte alignment, a `memcpy` operation is needed to move data from the 4-byte aligned address to the 16-byte aligned address, which negatively impacts zram performance. This patch places "link struct" in the tail of the object that ensures the address returned by `zs_map_object` is aligned to the class size and DELTA, thereby improving zram performance by eliminating unnecessary memory copying. Signed-off-by: Kenny Cheng <chao.shun.cheng.tw@xxxxxxxxx> --- mm/zsmalloc.c | 69 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 817626a351f8..0d14bc8a8a6a 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -800,7 +800,7 @@ static unsigned long handle_to_obj(unsigned long handle) return *(unsigned long *)handle; } -static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj, +static inline bool obj_allocated(struct zpdesc *zpdesc, void *link, unsigned long *phandle) { unsigned long handle; @@ -810,7 +810,7 @@ static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj, VM_BUG_ON_PAGE(!is_first_zpdesc(zpdesc), zpdesc_page(zpdesc)); handle = zpdesc->handle; } else - handle = *(unsigned long *)obj; + handle = *(unsigned long *)link; if (!(handle & OBJ_ALLOCATED_TAG)) return false; @@ -911,7 +911,12 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) struct link_free *link; void *vaddr; - set_first_obj_offset(zpdesc, off); + if (is_first_zpdesc(zpdesc)) { + set_first_obj_offset(zpdesc, off); + off = class->size - ZS_HANDLE_SIZE; + } else { + set_first_obj_offset(zpdesc, off + ZS_HANDLE_SIZE); + } vaddr = kmap_local_zpdesc(zpdesc); link = (struct link_free *)vaddr + off / sizeof(*link); @@ -1095,7 +1100,7 @@ static void __zs_unmap_object(struct mapping_area *area, off += ZS_HANDLE_SIZE; sizes[0] = PAGE_SIZE - off; - sizes[1] = size - sizes[0]; + sizes[1] = size - sizes[0] - ZS_HANDLE_SIZE; /* copy per-cpu buffer to object */ memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]); @@ -1240,9 +1245,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, ret = __zs_map_object(area, zpdescs, off, class->size); out: - if (likely(!ZsHugePage(zspage))) - ret += ZS_HANDLE_SIZE; - return ret; } EXPORT_SYMBOL_GPL(zs_map_object); @@ -1303,28 +1305,35 @@ EXPORT_SYMBOL_GPL(zs_huge_class_size); static unsigned long obj_malloc(struct zs_pool *pool, struct zspage *zspage, unsigned long handle) { - int i, nr_zpdesc, offset; + int i, nr_obj_zpdesc, nr_link_zpdesc; unsigned long obj; struct link_free *link; struct size_class *class; struct zpdesc *m_zpdesc; - unsigned long m_offset; + unsigned long obj_off; + unsigned long link_off; void *vaddr; class = pool->size_class[zspage->class]; obj = get_freeobj(zspage); - offset = obj * class->size; - nr_zpdesc = offset >> PAGE_SHIFT; - m_offset = offset_in_page(offset); + obj_off = obj * class->size; + link_off = obj_off + class->size - ZS_HANDLE_SIZE; + nr_obj_zpdesc = obj_off >> PAGE_SHIFT; + nr_link_zpdesc = link_off >> PAGE_SHIFT; + link_off = offset_in_page(offset); m_zpdesc = get_first_zpdesc(zspage); - for (i = 0; i < nr_zpdesc; i++) + for (i = 0; i < nr_obj_zpdesc; i++) m_zpdesc = get_next_zpdesc(m_zpdesc); - vaddr = kmap_local_zpdesc(m_zpdesc); - link = (struct link_free *)vaddr + m_offset / sizeof(*link); + if (nr_obj_zpdesc == nr_link_zpdesc) + vaddr = kmap_local_zpdesc(m_zpdesc); + else + vaddr = kmap_local_zpdesc(get_next_zpdesc(m_zpdesc)); + + link = (struct link_free *)vaddr + link_off / sizeof(*link); set_freeobj(zspage, link->next >> OBJ_TAG_BITS); if (likely(!ZsHugePage(zspage))) /* record handle in the header of allocated chunk */ @@ -1422,6 +1431,11 @@ static void obj_free(int class_size, unsigned long obj) obj_to_location(obj, &f_zpdesc, &f_objidx); f_offset = offset_in_page(class_size * f_objidx); + f_offset += class->size - ZS_HANDLE_SIZE; + if (f_offset >= PAGE_SIZE) { + f_zpdesc = get_next_zpdesc(f_zpdesc); + f_offset = offset_in_page(f_offset); + } zspage = get_zspage(f_zpdesc); vaddr = kmap_local_zpdesc(f_zpdesc); @@ -1556,6 +1570,12 @@ static unsigned long find_alloced_obj(struct size_class *class, void *addr = kmap_local_zpdesc(zpdesc); offset = get_first_obj_offset(zpdesc); + + if (is_first_zpdesc(zpdesc)) + offset += class->size - ZS_HANDLE_SIZE; + else + offset -= ZS_HANDLE_SIZE; + offset += class->size * index; while (offset < PAGE_SIZE) { @@ -1806,7 +1826,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, /* the migrate_write_lock protects zpage access via zs_map_object */ migrate_write_lock(zspage); - offset = get_first_obj_offset(zpdesc); + offset = get_first_obj_offset(zpdesc) + class->size - ZS_HANDLE_SIZE; s_addr = kmap_local_zpdesc(zpdesc); /* @@ -1828,6 +1848,23 @@ static int zs_page_migrate(struct page *newpage, struct page *page, } kunmap_local(s_addr); + /* + * Don't forget to check if there is an obj's link on the next page. + * If there is, it also needs to be handled. + */ + if (offset_in_page(addr) && get_next_zpdesc(zpdesc)) { + s_addr = kmap_local_zpdesc(get_next_zpdesc(zpdesc)); + addr = s_addr + offset_in_page(addr); + if (obj_allocated(zpdesc, addr, &handle)) { + + old_obj = handle_to_obj(handle); + obj_to_location(old_obj, &dummy, &obj_idx); + new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx); + record_obj(handle, new_obj); + } + kunmap_local(s_addr); + } + replace_sub_page(class, zspage, newzpdesc, zpdesc); /* * Since we complete the data copy and set up new zspage structure, -- 2.34.1