The previous allocation mechanism would get 2 contiguous allocations, one for the page directories, and one for the page tables. As each page table is 1 page, and there are 512 of these per page directory, this goes to 1MB. An unfriendly request at best. Worse still, our HW now supports 4 page directories, and a 2MB allocation is not allowed. In order to fix this, this patch attempts to split up each page table allocation into a single, discrete allocation. There is nothing really fancy about the patch itself, it just has to manage an extra pointer indirection, and have a fancier bit of logic to free up the pages. To accommodate some of the added complexity, two new helpers are introduced to allocate, and free the page table pages. NOTE: I really wanted to split the way we do allocations, and the way in which we identify the page table/page directory being used. I found splitting this functionality up to be too unwieldy. I apologize in advance to the reviewer. I'd recommend looking at the result, rather than the diff. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 135 +++++++++++++++++++++++++++--------- 2 files changed, 107 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 43c2ee7..6b23b13 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -653,6 +653,7 @@ struct i915_gtt { }; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) +#define GEN8_LEGACY_PDPS 4 struct i915_hw_ppgtt { struct i915_address_space base; struct kref ref; @@ -660,14 +661,14 @@ struct i915_hw_ppgtt { unsigned num_pd_entries; union { struct page **pt_pages; - struct page *gen8_pt_pages; + struct page **gen8_pt_pages[GEN8_LEGACY_PDPS]; }; struct page *pd_pages; int num_pd_pages; int num_pt_pages; union { uint32_t pd_offset; - dma_addr_t pd_dma_addr[4]; + dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS]; }; union { dma_addr_t *pt_dma_addr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index af07371..8946b1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -63,7 +63,19 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) -#define GEN8_LEGACY_PDPS 4 + +/* GEN8 legacy style addressis defined as a 3 level page table: + * 31:30 | 29:21 | 20:12 | 11:0 + * PDPE | PDE | PTE | offset + * The difference as compared to normal x86 3 level page table is the PDPEs are + * programmed via register. + */ +#define GEN8_PDPE_SHIFT 30 +#define GEN8_PDPE_MASK 0x3 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -260,32 +272,36 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; + unsigned which_pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; + unsigned which_pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; + unsigned which_pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; - unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; unsigned last_pte, i; scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, I915_CACHE_LLC, use_scratch); while (num_entries) { - struct page *page_table = &ppgtt->gen8_pt_pages[act_pt]; + struct page *page_table = ppgtt->gen8_pt_pages[which_pdpe][which_pde]; - last_pte = first_pte + num_entries; + last_pte = which_pte + num_entries; if (last_pte > GEN8_PTES_PER_PAGE) last_pte = GEN8_PTES_PER_PAGE; pt_vaddr = kmap_atomic(page_table); - for (i = first_pte; i < last_pte; i++) + for (i = which_pte; i < last_pte; i++) { pt_vaddr[i] = scratch_pte; + num_entries--; + BUG_ON(num_entries < 0); + } kunmap_atomic(pt_vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + which_pte = 0; + if (which_pde + 1 == GEN8_PDES_PER_PAGE) + which_pdpe++; + which_pde = (which_pde + 1) & GEN8_PDE_MASK; } } @@ -297,39 +313,57 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; - unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; + unsigned which_pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; + unsigned which_pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; + unsigned which_pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; struct sg_page_iter sg_iter; - pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[which_pdpe][which_pde]); + for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { dma_addr_t page_addr; page_addr = sg_dma_address(sg_iter.sg) + - (sg_iter.sg_pgoffset << PAGE_SHIFT); - pt_vaddr[act_pte] = gen8_pte_encode(page_addr, cache_level, - true); - if (++act_pte == GEN8_PTES_PER_PAGE) { - kunmap_atomic(pt_vaddr); - act_pt++; - pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); - act_pte = 0; + (sg_iter.sg_pgoffset << PAGE_SHIFT); + pt_vaddr[which_pte] = + gen8_pte_encode(page_addr, cache_level, true); + + which_pte = (which_pte + 1) & GEN8_PTE_MASK; + if (which_pte == 0) { + kunmap_atomic(pt_vaddr); + if (which_pde + 1 == GEN8_PDES_PER_PAGE) + which_pdpe++; + which_pde = (which_pde + 1) & GEN8_PDE_MASK; + pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[which_pdpe][which_pde]); } } + kunmap_atomic(pt_vaddr); } -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) +static void gen8_free_page_tables(struct page **pt_pages) { int i; - for (i = 0; i < ppgtt->num_pd_pages ; i++) + if (pt_pages == NULL) + return; + + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) + if (pt_pages[i]) + __free_pages(pt_pages[i], 0); +} + +static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for (i = 0; i < ppgtt->num_pd_pages; i++) { + gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); kfree(ppgtt->gen8_pt_dma_addr[i]); + } kfree(ppgtt->gen8_pt_dma_addr); - __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT)); __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); } @@ -367,20 +401,59 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_ppgtt_free(ppgtt); } +static struct page **__gen8_alloc_page_tables(void) +{ + struct page **pt_pages; + int i; + + pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); + if (!pt_pages) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { + pt_pages[i] = alloc_page(GFP_KERNEL); + if (!pt_pages[i]) + goto bail; + } + + return pt_pages; + +bail: + gen8_free_page_tables(pt_pages); + kfree(pt_pages); + return ERR_PTR(-ENOMEM); +} + static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, const int max_pdp) { - struct page *pt_pages; + struct page **pt_pages[GEN8_LEGACY_PDPS]; const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + int i, ret; - pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); - if (!pt_pages) - return -ENOMEM; + for (i = 0; i < max_pdp; i++) { + pt_pages[i] = __gen8_alloc_page_tables(); + if (IS_ERR(pt_pages[i])) { + ret = PTR_ERR(pt_pages[i]); + goto unwind_out; + } + } + + /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, + * "atomic" - for cleanup purposes. + */ + for (i = 0; i < max_pdp; i++) + ppgtt->gen8_pt_pages[i] = pt_pages[i]; - ppgtt->gen8_pt_pages = pt_pages; ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); return 0; + +unwind_out: + while(i--) + gen8_free_page_tables(pt_pages[i]); + + return ret; } static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) @@ -473,7 +546,7 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, struct page *p; int ret; - p = &ppgtt->gen8_pt_pages[pd * GEN8_PDES_PER_PAGE + pt]; + p = ppgtt->gen8_pt_pages[pd][pt]; pt_addr = pci_map_page(ppgtt->base.dev->pdev, p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); -- 1.8.5.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx