Generally drivers have a specific idea what their HW structure size should be. In a lot of cases this is related to PAGE_SIZE, but not always. ARM64, for example, allows a 4K IO page table size on a 64K CPU page table system. Currently we don't have any good support for sub page allocations, but make the API accommodate this by accepting a sub page size from the caller and rounding up internally. This is done by moving away from order as the size input and using size: size == 1 << (order + PAGE_SHIFT) Following patches convert drivers away from using order and try to specify allocation sizes independent of PAGE_SIZE. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/iommu-pages.c | 29 +++++++++++++++--------- drivers/iommu/iommu-pages.h | 44 ++++++++++++++++++++++++++++++++----- include/linux/iommu.h | 6 ++--- 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/iommu-pages.c b/drivers/iommu/iommu-pages.c index 0369f0d51c3412..4637eeb80254c7 100644 --- a/drivers/iommu/iommu-pages.c +++ b/drivers/iommu/iommu-pages.c @@ -23,24 +23,32 @@ IOPTDESC_MATCH(memcg_data, memcg_data); static_assert(sizeof(struct ioptdesc) <= sizeof(struct page)); /** - * iommu_alloc_pages_node - Allocate a zeroed page of a given order from - * specific NUMA node + * iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from + * specific NUMA node * @nid: memory NUMA node id * @gfp: buddy allocator flags - * @order: page order + * @size: Memory size to allocate, rounded up to a power of 2 * - * Returns the virtual address of the allocated page. The page must be - * freed either by calling iommu_free_page() or via iommu_put_pages_list(). + * Returns the virtual address of the allocated page. The page must be freed + * either by calling iommu_free_page() or via iommu_put_pages_list(). The + * returned allocation is round_up_pow_two(size) big, and is physically aligned + * to its size. */ -void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order) +void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size) { - const unsigned long pgcnt = 1UL << order; + unsigned long pgcnt; struct folio *folio; + unsigned int order; /* This uses page_address() on the memory. */ if (WARN_ON(gfp & __GFP_HIGHMEM)) return NULL; + /* + * Currently sub page allocations result in a full page being returned. + */ + order = get_order(size); + /* * __folio_alloc_node() does not handle NUMA_NO_NODE like * alloc_pages_node() did. @@ -61,12 +69,13 @@ void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order) * This is necessary for the proper accounting as IOMMU state can be * rather large, i.e. multiple gigabytes in size. */ + pgcnt = 1UL << order; mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, pgcnt); lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, pgcnt); return folio_address(folio); } -EXPORT_SYMBOL_GPL(iommu_alloc_pages_node); +EXPORT_SYMBOL_GPL(iommu_alloc_pages_node_sz); static void __iommu_free_page(struct ioptdesc *iopt) { @@ -82,7 +91,7 @@ static void __iommu_free_page(struct ioptdesc *iopt) * iommu_free_pages - free pages * @virt: virtual address of the page to be freed. * - * The page must have have been allocated by iommu_alloc_pages_node() + * The page must have have been allocated by iommu_alloc_pages_node_sz() */ void iommu_free_pages(void *virt) { @@ -96,7 +105,7 @@ EXPORT_SYMBOL_GPL(iommu_free_pages); * iommu_put_pages_list - free a list of pages. * @list: The list of pages to be freed * - * Frees a list of pages allocated by iommu_alloc_pages_node(). + * Frees a list of pages allocated by iommu_alloc_pages_node_sz(). */ void iommu_put_pages_list(struct iommu_pages_list *list) { diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index f4578f252e2580..3c4575d637da6d 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -46,14 +46,14 @@ static inline struct ioptdesc *virt_to_ioptdesc(void *virt) return folio_ioptdesc(virt_to_folio(virt)); } -void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order); +void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size); void iommu_free_pages(void *virt); void iommu_put_pages_list(struct iommu_pages_list *list); /** * iommu_pages_list_add - add the page to a iommu_pages_list * @list: List to add the page to - * @virt: Address returned from iommu_alloc_pages_node() + * @virt: Address returned from iommu_alloc_pages_node_sz() */ static inline void iommu_pages_list_add(struct iommu_pages_list *list, void *virt) @@ -84,16 +84,48 @@ static inline bool iommu_pages_list_empty(struct iommu_pages_list *list) return list_empty(&list->pages); } +/** + * iommu_alloc_pages_node - Allocate a zeroed page of a given order from + * specific NUMA node + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @order: page order + * + * Returns the virtual address of the allocated page. + * Prefer to use iommu_alloc_pages_node_lg2() + */ +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, + unsigned int order) +{ + return iommu_alloc_pages_node_sz(nid, gfp, 1 << (order + PAGE_SHIFT)); +} + /** * iommu_alloc_pages - allocate a zeroed page of a given order * @gfp: buddy allocator flags * @order: page order * * returns the virtual address of the allocated page + * Prefer to use iommu_alloc_pages_lg2() */ static inline void *iommu_alloc_pages(gfp_t gfp, int order) { - return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, order); + return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, + 1 << (order + PAGE_SHIFT)); +} + +/** + * iommu_alloc_pages_sz - Allocate a zeroed page of a given size from + * specific NUMA node + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @size: Memory size to allocate, this is rounded up to a power of 2 + * + * Returns the virtual address of the allocated page. + */ +static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size) +{ + return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size); } /** @@ -102,10 +134,11 @@ static inline void *iommu_alloc_pages(gfp_t gfp, int order) * @gfp: buddy allocator flags * * returns the virtual address of the allocated page + * Prefer to use iommu_alloc_pages_node_lg2() */ static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) { - return iommu_alloc_pages_node(nid, gfp, 0); + return iommu_alloc_pages_node_sz(nid, gfp, PAGE_SIZE); } /** @@ -113,10 +146,11 @@ static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) * @gfp: buddy allocator flags * * returns the virtual address of the allocated page + * Prefer to use iommu_alloc_pages_lg2() */ static inline void *iommu_alloc_page(gfp_t gfp) { - return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, 0); + return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, PAGE_SIZE); } #endif /* __IOMMU_PAGES_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 166d8e1bcb100d..b74c9f3dbcce1d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -327,9 +327,9 @@ typedef unsigned int ioasid_t; #define IOMMU_DIRTY_NO_CLEAR (1 << 0) /* - * Pages allocated through iommu_alloc_pages_node() can be placed on this list - * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node() - * can be used this way! + * Pages allocated through iommu_alloc_pages_node_sz() can be placed on this + * list using iommu_pages_list_add(). Note: ONLY pages from + * iommu_alloc_pages_node_sz() can be used this way! */ struct iommu_pages_list { struct list_head pages; -- 2.43.0