Every set_pages_array_wb call resulted in cross-core interrupts and TLB flushes. Merge more of them for less overhead. This reduces the time needed to free a 1.6 GiB GTT WC buffer as part of Vulkan CTS from ~2 sec to < 0.25 sec. (Allocation still takes more than 2 sec though) Signed-off-by: Bas Nieuwenhuizen <basni at chromium.org> --- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 31 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 4c659405a008a..9440ba0a55116 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -299,6 +299,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray) #endif return 0; } + +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages) +{ +#if IS_ENABLED(CONFIG_AGP) + unsigned long i; + + for (i = 0; i < numpages; i++) + unmap_page_from_agp(p + i); +#endif + return 0; +} + +#else /* for !CONFIG_X86 */ + +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages) +{ + return set_memory_wb((unsigned long)page_address(p), numpages); +} + #endif /* for !CONFIG_X86 */ static int ttm_set_pages_caching(struct dma_pool *pool, @@ -387,18 +406,16 @@ static void ttm_pool_update_free_locked(struct dma_pool *pool, static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page) { struct page *page = d_page->p; - unsigned i, num_pages; + unsigned num_pages; int ret; /* Don't set WB on WB page pool. */ if (!(pool->type & IS_CACHED)) { num_pages = pool->size / PAGE_SIZE; - for (i = 0; i < num_pages; ++i, ++page) { - ret = set_pages_array_wb(&page, 1); - if (ret) { - pr_err("%s: Failed to set %d pages to wb!\n", - pool->dev_name, 1); - } + ret = ttm_set_page_range_wb(page, num_pages); + if (ret) { + pr_err("%s: Failed to set %d pages to wb!\n", + pool->dev_name, num_pages); } } -- 2.18.0.233.g985f88cf7e-goog