Move the allocation of requests from compound pages to a common function to allow usages by blk-mq and dmaengine. Since the routine has more to do with memory allocation and management, it is moved to be exported by the mempool.h and be part of mm subsystem. Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> --- block/Kconfig | 1 block/blk-mq.c | 94 +++++++++----------------------- include/linux/mempool.h | 10 +++ mm/Kconfig | 6 ++ mm/Makefile | 1 mm/context_alloc.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 68 deletions(-) create mode 100644 mm/context_alloc.c diff --git a/block/Kconfig b/block/Kconfig index c23094a14a2b..d2b3aba69f5e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -7,6 +7,7 @@ menuconfig BLOCK default y select SBITMAP select SRCU + select CONTEXT_ALLOC help Provide block layer support for the kernel. diff --git a/block/blk-mq.c b/block/blk-mq.c index 323c9cb28066..4bcdf50712a7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -10,7 +10,6 @@ #include <linux/backing-dev.h> #include <linux/bio.h> #include <linux/blkdev.h> -#include <linux/kmemleak.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/slab.h> @@ -26,6 +25,7 @@ #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/prefetch.h> +#include <linux/mempool.h> #include <trace/events/block.h> @@ -2015,8 +2015,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { - struct page *page; - if (tags->rqs && set->ops->exit_request) { int i; @@ -2030,16 +2028,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } } - while (!list_empty(&tags->page_list)) { - page = list_first_entry(&tags->page_list, struct page, lru); - list_del_init(&page->lru); - /* - * Remove kmemleak object previously allocated in - * blk_mq_alloc_rqs(). - */ - kmemleak_free(page_address(page)); - __free_pages(page, page->private); - } + context_free_from_pages(&tags->page_list); } void blk_mq_free_rq_map(struct blk_mq_tags *tags) @@ -2089,11 +2078,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, return tags; } -static size_t order_to_size(unsigned int order) -{ - return (size_t)PAGE_SIZE << order; -} - static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, int node) { @@ -2109,12 +2093,20 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, return 0; } +static void blk_mq_assign_request(void *data, void *ctx, int idx) +{ + struct blk_mq_tags *tags = (struct blk_mq_tags *)data; + struct request *rq = ctx; + + tags->static_rqs[idx] = rq; +} + int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx, unsigned int depth) { - unsigned int i, j, entries_per_page, max_order = 4; - size_t rq_size, left; - int node; + unsigned int i; + size_t rq_size; + int node, rc; node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx); if (node == NUMA_NO_NODE) @@ -2128,62 +2120,28 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ rq_size = round_up(sizeof(struct request) + set->cmd_size, cache_line_size()); - left = rq_size * depth; - - for (i = 0; i < depth; ) { - int this_order = max_order; - struct page *page; - int to_do; - void *p; - - while (this_order && left < order_to_size(this_order - 1)) - this_order--; - - do { - page = alloc_pages_node(node, - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, - this_order); - if (page) - break; - if (!this_order--) - break; - if (order_to_size(this_order) < rq_size) - break; - } while (1); - if (!page) - goto fail; + rc = context_alloc_from_pages((void *)tags, depth, rq_size, + &tags->page_list, 4, node, + blk_mq_assign_request); + if (rc < 0) + goto fail; - page->private = this_order; - list_add_tail(&page->lru, &tags->page_list); + for (i = 0; i < rc; i++) { + struct request *rq = tags->static_rqs[i]; - p = page_address(page); - /* - * Allow kmemleak to scan these pages as they contain pointers - * to additional allocations like via ops->init_request(). - */ - kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); - entries_per_page = order_to_size(this_order) / rq_size; - to_do = min(entries_per_page, depth - i); - left -= to_do * rq_size; - for (j = 0; j < to_do; j++) { - struct request *rq = p; - - tags->static_rqs[i] = rq; - if (blk_mq_init_request(set, rq, hctx_idx, node)) { - tags->static_rqs[i] = NULL; - goto fail; - } - - p += rq_size; - i++; + if (blk_mq_init_request(set, rq, hctx_idx, node)) { + tags->static_rqs[i] = NULL; + rc = -ENOMEM; + goto fail; } } + return 0; fail: blk_mq_free_rqs(set, tags, hctx_idx); - return -ENOMEM; + return rc; } /* diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 0c964ac107c2..f96c5d6b39fd 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -108,4 +108,14 @@ static inline mempool_t *mempool_create_page_pool(int min_nr, int order) (void *)(long)order); } +/* + * Management functions to allocate or free a fixed size context memory from + * compound pages. + */ +int context_alloc_from_pages(void *data, unsigned int ctx_num, size_t ctx_size, + struct list_head *page_list, int max_order, + int node, + void (*init_ctx)(void *data, void *ctx, int idx)); +void context_free_from_pages(struct list_head *page_list); + #endif /* _LINUX_MEMPOOL_H */ diff --git a/mm/Kconfig b/mm/Kconfig index ab80933be65f..680d0a437832 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -739,4 +739,10 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +# +# Provide context allocation from compound pages +# +config CONTEXT_ALLOC + bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index 1937cc251883..c2110d161c7c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -108,3 +108,4 @@ obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o +obj-$(CONFIG_CONTEXT_ALLOC) += context_alloc.o diff --git a/mm/context_alloc.c b/mm/context_alloc.c new file mode 100644 index 000000000000..e7f3b6454156 --- /dev/null +++ b/mm/context_alloc.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common function for struct allocation. Moved from blk-mq code + * + * Copyright (C) 2013-2014 Jens Axboe + */ +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/mm_types.h> +#include <linux/list.h> +#include <linux/kmemleak.h> +#include <linux/mm.h> + +/** + * context_free_from_pages() - free allocated pages + * @page_list - list of pages to be freed + * + * Function to release all the pages in the @page_list back to the kernel. + */ +void context_free_from_pages(struct list_head *page_list) +{ + struct page *page, *n; + + list_for_each_entry_safe(page, n, page_list, lru) { + list_del_init(&page->lru); + /* + * Remove kmemleak object previously allocated to track the + * pages. + */ + kmemleak_free(page_address(page)); + __free_pages(page, page->private); + } +} +EXPORT_SYMBOL_GPL(context_free_from_pages); + +static size_t order_to_size(unsigned int order) +{ + return (size_t)PAGE_SIZE << order; +} + +/** + * context_alloc_from_pages() - allocate @ctx_num blocks of @ctx_size from a + * compound pages starting with highest order + * @max_order for page allocation. + * @data - data ptr from caller to be passed to init_data + * @ctx_num - total number of context blocks needed + * @ctx_size - size of the context block + * @page_list - list to keep track of the pages allocated + * @max_order - max allocation order to try for pages allocation + * @node - NUMA node for the pages to be allocated from + * @init_ctx - caller provided init function for context init + * + * This function initially came from blk-mq request allocation code. It allows + * the caller to pre-allocate a large number of fix sized memory blocks for + * fast I/O processing without having to go through allocation path during I/O. + * The function will start with allocating pages with @max_order. With failure + * it will continue to attempt allocation with order size decreasing. + * + * Return values: N number of data blocks allocated or -ENOMEM on failure + */ +int context_alloc_from_pages(void *data, unsigned int ctx_num, + size_t ctx_size, struct list_head *page_list, + int max_order, int node, + void (*init_ctx)(void *data, void *ctx, int idx)) +{ + size_t left; + unsigned int i, j, entries_per_page; + + left = ctx_size * ctx_num; + + for (i = 0; i < ctx_num; ) { + int this_order = max_order; + struct page *page; + int to_do; + void *p; + + /* Find a memory order size that would fit the need. */ + while (this_order && left < order_to_size(this_order - 1)) + this_order--; + + /* + * Continue to try to allocate a compound page starting + * with this_order at this_order. Keep trying while + * decremeting the order while failure until order is 0 + * or order size is less than the context size. + */ + do { + page = alloc_pages_node(node, + GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_ZERO, + this_order); + if (page) + break; + if (!this_order--) + break; + if (order_to_size(this_order) < ctx_size) + break; + } while (1); + + if (!page) + goto fail; + + page->private = this_order; + list_add_tail(&page->lru, page_list); + + p = page_address(page); + + /* Allow kmemleak to track the allocation. */ + kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); + + /* + * Calculate the total context block for this allocation and + * initialize them using function provided by caller. + */ + entries_per_page = order_to_size(this_order) / ctx_size; + to_do = min(entries_per_page, ctx_num - i); + left -= to_do * ctx_size; + + if (!init_ctx) { + i += to_do; + continue; + } + + for (j = 0; j < to_do; j++) { + init_ctx(data, p, i); + p += ctx_size; + i++; + } + } + + return i; + +fail: + context_free_from_pages(page_list); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(context_alloc_from_pages);