On Mon, Jan 29, 2024 at 5:42 PM Johannes Weiner <hannes@xxxxxxxxxxx> wrote: > > zswap_store() is long and mixes work at the zswap layer with work at > the backend and compression layer. Move compression & backend work to > zswap_compress(), mirroring zswap_decompress(). > > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> > --- > mm/zswap.c | 145 ++++++++++++++++++++++++++++------------------------- > 1 file changed, 77 insertions(+), 68 deletions(-) > > diff --git a/mm/zswap.c b/mm/zswap.c > index bdc9f82fe4b9..f9b9494156ba 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1316,6 +1316,79 @@ static int zswap_enabled_param_set(const char *val, > return ret; > } > > +static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) > +{ > + struct crypto_acomp_ctx *acomp_ctx; > + struct scatterlist input, output; > + unsigned int dlen = PAGE_SIZE; > + unsigned long handle; > + struct zpool *zpool; > + char *buf; > + gfp_t gfp; > + int ret; > + u8 *dst; > + > + acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); > + > + mutex_lock(&acomp_ctx->mutex); > + > + dst = acomp_ctx->buffer; > + sg_init_table(&input, 1); > + sg_set_page(&input, &folio->page, PAGE_SIZE, 0); > + > + /* > + * We need PAGE_SIZE * 2 here since there maybe over-compression case, > + * and hardware-accelerators may won't check the dst buffer size, so > + * giving the dst buffer with enough length to avoid buffer overflow. > + */ > + sg_init_one(&output, dst, PAGE_SIZE * 2); > + acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); > + > + /* > + * it maybe looks a little bit silly that we send an asynchronous request, > + * then wait for its completion synchronously. This makes the process look > + * synchronous in fact. > + * Theoretically, acomp supports users send multiple acomp requests in one > + * acomp instance, then get those requests done simultaneously. but in this > + * case, zswap actually does store and load page by page, there is no > + * existing method to send the second page before the first page is done > + * in one thread doing zwap. > + * but in different threads running on different cpu, we have different > + * acomp instance, so multiple threads can do (de)compression in parallel. > + */ > + ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); > + dlen = acomp_ctx->req->dlen; > + if (ret) { > + zswap_reject_compress_fail++; > + goto unlock; > + } > + > + zpool = zswap_find_zpool(entry); > + gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; > + if (zpool_malloc_support_movable(zpool)) > + gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; > + ret = zpool_malloc(zpool, dlen, gfp, &handle); > + if (ret == -ENOSPC) { > + zswap_reject_compress_poor++; > + goto unlock; > + } > + if (ret) { > + zswap_reject_alloc_fail++; > + goto unlock; > + } > + > + buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); > + memcpy(buf, dst, dlen); > + zpool_unmap_handle(zpool, handle); > + > + entry->handle = handle; > + entry->length = dlen; > + > +unlock: > + mutex_unlock(&acomp_ctx->mutex); > + return ret == 0; > +} > + > static void zswap_decompress(struct zswap_entry *entry, struct page *page) > { > struct zpool *zpool = zswap_find_zpool(entry); > @@ -1472,18 +1545,11 @@ bool zswap_store(struct folio *folio) > struct page *page = &folio->page; > struct zswap_tree *tree = swap_zswap_tree(swp); > struct zswap_entry *entry, *dupentry; > - struct scatterlist input, output; > - struct crypto_acomp_ctx *acomp_ctx; > struct obj_cgroup *objcg = NULL; > struct mem_cgroup *memcg = NULL; > struct zswap_pool *pool; > - struct zpool *zpool; > - unsigned int dlen = PAGE_SIZE; > - unsigned long handle, value; > - char *buf; > - u8 *src, *dst; > - gfp_t gfp; > - int ret; > + unsigned long value; > + u8 *src; > > VM_WARN_ON_ONCE(!folio_test_locked(folio)); > VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); > @@ -1568,65 +1634,10 @@ bool zswap_store(struct folio *folio) > mem_cgroup_put(memcg); > } > > - /* compress */ > - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); > - > - mutex_lock(&acomp_ctx->mutex); > - > - dst = acomp_ctx->buffer; > - sg_init_table(&input, 1); > - sg_set_page(&input, &folio->page, PAGE_SIZE, 0); > + if (!zswap_compress(folio, entry)) > + goto put_pool; > > - /* > - * We need PAGE_SIZE * 2 here since there maybe over-compression case, > - * and hardware-accelerators may won't check the dst buffer size, so > - * giving the dst buffer with enough length to avoid buffer overflow. > - */ > - sg_init_one(&output, dst, PAGE_SIZE * 2); > - acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); > - /* > - * it maybe looks a little bit silly that we send an asynchronous request, > - * then wait for its completion synchronously. This makes the process look > - * synchronous in fact. > - * Theoretically, acomp supports users send multiple acomp requests in one > - * acomp instance, then get those requests done simultaneously. but in this > - * case, zswap actually does store and load page by page, there is no > - * existing method to send the second page before the first page is done > - * in one thread doing zwap. > - * but in different threads running on different cpu, we have different > - * acomp instance, so multiple threads can do (de)compression in parallel. > - */ > - ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); > - dlen = acomp_ctx->req->dlen; > - > - if (ret) { > - zswap_reject_compress_fail++; > - goto put_dstmem; > - } > - > - /* store */ > - zpool = zswap_find_zpool(entry); > - gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; > - if (zpool_malloc_support_movable(zpool)) > - gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; > - ret = zpool_malloc(zpool, dlen, gfp, &handle); > - if (ret == -ENOSPC) { > - zswap_reject_compress_poor++; > - goto put_dstmem; > - } > - if (ret) { > - zswap_reject_alloc_fail++; > - goto put_dstmem; > - } > - buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); > - memcpy(buf, dst, dlen); > - zpool_unmap_handle(zpool, handle); > - mutex_unlock(&acomp_ctx->mutex); > - > - /* populate entry */ > entry->swpentry = swp; > - entry->handle = handle; > - entry->length = dlen; > > insert_entry: > entry->objcg = objcg; > @@ -1663,8 +1674,6 @@ bool zswap_store(struct folio *folio) > > return true; > > -put_dstmem: > - mutex_unlock(&acomp_ctx->mutex); > put_pool: > zswap_pool_put(entry->pool); > freepage: > -- > 2.43.0 > LGTM :) Reviewed-by: Nhat Pham <nphamcs@xxxxxxxxx>