[PATCH v2 5/6] mm: zswap: store incompressible page as-is

Takero Funaki <flintglass@xxxxxxxxx> · Sat, 6 Jul 2024 02:25:21 +0000

This patch allows zswap to accept incompressible pages and store them
into zpool if possible.

This change is required to achieve zero rejection on zswap_store(). With
proper amount of proactive shrinking, swapout can be buffered by zswap
without IO latency. Storing incompressible pages may seem costly, but it
can reduce latency. A rare incompressible page in a large batch of
compressive pages can delay the entire batch during swapping.

The memory overhead is negligible because the underlying zsmalloc
already accepts nearly incompressible pages. zsmalloc stores data close
to PAGE_SIZE to a dedicated page. Thus storing as-is saves decompression
cycles without allocation overhead. zswap itself has not rejected pages
in these cases.

To store the page as-is, use the compressed data size field `length` in
struct `zswap_entry`. The length == PAGE_SIZE indicates
incompressible data.

If a zpool backend does not support allocating PAGE_SIZE (zbud), the
behavior remains unchanged. The allocation failure reported by the zpool
blocks accepting the page as before.

Signed-off-by: Takero Funaki <flintglass@xxxxxxxxx>
---
 mm/zswap.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 76691ca7b6a7..def0f948a4ab 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -186,6 +186,8 @@ static struct shrinker *zswap_shrinker;
  * length - the length in bytes of the compressed page data.  Needed during
  *          decompression. For a same value filled page length is 0, and both
  *          pool and lru are invalid and must be ignored.
+ *          If length is equal to PAGE_SIZE, the data stored in handle is
+ *          not compressed. The data must be copied to page as-is.
  * pool - the zswap_pool the entry's data is in
  * handle - zpool allocation handle that stores the compressed page data
  * value - value of the same-value filled pages which have same content
@@ -969,9 +971,23 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 	 */
 	comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
 	dlen = acomp_ctx->req->dlen;
-	if (comp_ret)
+
+	/* coa_compress returns -EINVAL for errors including insufficient dlen */
+	if (comp_ret && comp_ret != -EINVAL)
 		goto unlock;
 
+	/*
+	 * If the data cannot be compressed well, store the data as-is.
+	 * Switching by a threshold at
+	 * PAGE_SIZE - (allocation granularity)
+	 * zbud and z3fold use 64B granularity.
+	 * zsmalloc stores >3632B in one page for 4K page arch.
+	 */
+	if (comp_ret || dlen > PAGE_SIZE - 64) {
+		/* we do not use compressed result anymore */
+		comp_ret = 0;
+		dlen = PAGE_SIZE;
+	}
 	zpool = zswap_find_zpool(entry);
 	gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 	if (zpool_malloc_support_movable(zpool))
@@ -981,14 +997,20 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 		goto unlock;
 
 	buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
-	memcpy(buf, dst, dlen);
+
+	/* PAGE_SIZE indicates not compressed. */
+	if (dlen == PAGE_SIZE)
+		memcpy_from_folio(buf, folio, 0, PAGE_SIZE);
+	else
+		memcpy(buf, dst, dlen);
+
 	zpool_unmap_handle(zpool, handle);
 
 	entry->handle = handle;
 	entry->length = dlen;
 
 unlock:
-	if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC)
+	if (alloc_ret == -ENOSPC)
 		zswap_reject_compress_poor++;
 	else if (comp_ret)
 		zswap_reject_compress_fail++;
@@ -1006,6 +1028,14 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page)
 	struct crypto_acomp_ctx *acomp_ctx;
 	u8 *src;
 
+	if (entry->length == PAGE_SIZE) {
+		/* the content is not compressed. copy back as-is.  */
+		src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+		memcpy_to_page(page, 0, src, entry->length);
+		zpool_unmap_handle(zpool, entry->handle);
+		return;
+	}
+
 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
 	mutex_lock(&acomp_ctx->mutex);
 
-- 
2.43.0