Optimize the performance of `dma_pool_free()` by implementing an xarray to map a `vaddr` to its corresponding `block`. This eliminates the need to iterate through the entire `page_list` for vaddr-to-block translation, thereby improving performance. Performance results from the `DMAPOOL_TEST` test show the improvement. Before the patch: ``` dmapool test: size:16 align:16 blocks:8192 time:34432 dmapool test: size:64 align:64 blocks:8192 time:62262 dmapool test: size:256 align:256 blocks:8192 time:238137 dmapool test: size:1024 align:1024 blocks:2048 time:61386 dmapool test: size:4096 align:4096 blocks:1024 time:75342 dmapool test: size:68 align:32 blocks:8192 time:88243 ``` After the patch: ``` dmapool test: size:16 align:16 blocks:8192 time:37954 dmapool test: size:64 align:64 blocks:8192 time:40036 dmapool test: size:256 align:256 blocks:8192 time:41942 dmapool test: size:1024 align:1024 blocks:2048 time:10964 dmapool test: size:4096 align:4096 blocks:1024 time:6101 dmapool test: size:68 align:32 blocks:8192 time:41307 ``` This change reduces the runtime overhead, particularly for larger block sizes. Co-developed-by: Raphael Isemann <teemperor@xxxxxxxxx> Signed-off-by: Raphael Isemann <teemperor@xxxxxxxxx> Signed-off-by: Brian Johannesmeyer <bjohannesmeyer@xxxxxxxxx> --- mm/dmapool.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/mm/dmapool.c b/mm/dmapool.c index f2b96be25412..1cc2cc87ab93 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -35,6 +35,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/wait.h> +#include <linux/xarray.h> #ifdef CONFIG_SLUB_DEBUG_ON #define DMAPOOL_DEBUG 1 @@ -59,6 +60,7 @@ struct dma_pool { /* the pool */ unsigned int boundary; char name[32]; struct list_head pools; + struct xarray block_map; }; struct dma_page { /* cacheable header for 'allocation' bytes */ @@ -96,23 +98,7 @@ static DEVICE_ATTR_RO(pools); static struct dma_block *pool_find_block(struct dma_pool *pool, void *vaddr) { - struct dma_page *page; - size_t offset, index; - - list_for_each_entry(page, &pool->page_list, page_list) { - if (vaddr < page->vaddr) - continue; - offset = vaddr - page->vaddr; - if (offset >= pool->allocation) - continue; - - index = offset / pool->size; - if (index >= page->blocks_per_page) - return NULL; - - return &page->blocks[index]; - } - return NULL; + return xa_load(&pool->block_map, (unsigned long)vaddr); } #ifdef DMAPOOL_DEBUG @@ -273,6 +259,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, retval->boundary = boundary; retval->allocation = allocation; INIT_LIST_HEAD(&retval->pools); + xa_init(&retval->block_map); /* * pools_lock ensures that the ->dma_pools list does not get corrupted. @@ -324,6 +311,12 @@ static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page) block->dma = page->dma + offset; block->next_block = NULL; + if (xa_err(xa_store(&pool->block_map, (unsigned long)block->vaddr, + block, GFP_KERNEL))) { + pr_err("dma_pool: Failed to store block in xarray\n"); + return; + } + if (last) last->next_block = block; else @@ -385,6 +378,7 @@ void dma_pool_destroy(struct dma_pool *pool) if (unlikely(!pool)) return; + xa_destroy(&pool->block_map); mutex_lock(&pools_reg_lock); mutex_lock(&pools_lock); list_del(&pool->pools); -- 2.34.1