From: Bas Nieuwenhuizen <basni@xxxxxxxxxxxx> Every set_pages_array_wb call resulted in cross-core interrupts and TLB flushes. Merge more of them for less overhead. This reduces the time needed to free a 1.6 GiB GTT WC buffer as part of Vulkan CTS from ~2 sec to < 0.25 sec. (Allocation still takes more than 2 sec though) Signed-off-by: Bas Nieuwenhuizen <basni at chromium.org> Signed-off-by: Huang Rui <ray.huang at amd.com> --- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 3f14c1c..27fb543 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -301,6 +301,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray) #endif return 0; } + +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages) +{ +#if IS_ENABLED(CONFIG_AGP) + unsigned long i; + + for (i = 0; i < numpages; i++) + unmap_page_from_agp(p + i); +#endif + return 0; +} + +#else /* for !CONFIG_X86 */ + +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages) +{ + return set_memory_wb((unsigned long)page_address(p), numpages); +} + #endif /* for !CONFIG_X86 */ static int ttm_set_pages_caching(struct dma_pool *pool, @@ -389,17 +408,14 @@ static void ttm_pool_update_free_locked(struct dma_pool *pool, static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page) { struct page *page = d_page->p; - unsigned i, num_pages; + unsigned num_pages; /* Don't set WB on WB page pool. */ if (!(pool->type & IS_CACHED)) { num_pages = pool->size / PAGE_SIZE; - for (i = 0; i < num_pages; ++i, ++page) { - if (set_pages_array_wb(&page, 1)) { - pr_err("%s: Failed to set %d pages to wb!\n", - pool->dev_name, 1); - } - } + if (ttm_set_page_range_wb(page, num_pages)) + pr_err("%s: Failed to set %d pages to wb!\n", + pool->dev_name, num_pages); } list_del(&d_page->page_list); -- 2.7.4