As we move toward dynamic page table allocation, it becomes much easier to manage our data structures if break do things less coarsely by breaking up all of our actions into individual tasks. This makes the code easier to write, read, and verify. Aside from the dissection of the allocation functions, the patch statically allocates the page table structures without a page directory. This remains the same for all platforms, The patch itself should not have much functional difference. The primary noticeable difference is the fact that page tables are no longer allocated, but rather statically declared as part of the page directory. This has non-zero overhead, but things gain non-trivial complexity as a result. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 226 +++++++++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 +- 2 files changed, 147 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d91a545..5c08cf9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -183,6 +183,102 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, return pte; } +static void free_pt_single(struct i915_pagetab *pt) +{ + if (WARN_ON(!pt->page)) + return; + __free_page(pt->page); + kfree(pt); +} + +static struct i915_pagetab *alloc_pt_single(void) +{ + struct i915_pagetab *pt; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!pt->page) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + + return pt; +} + +/** + * alloc_pt_range() - Allocate a multiple page tables + * @pd: The page directory which will have at least @count entries + * available to point to the allocated page tables. + * @pde: First page directory entry for which we are allocating. + * @count: Number of pages to allocate. + * + * Allocates multiple page table pages and sets the appropriate entries in the + * page table structure within the page directory. Function cleans up after + * itself on any failures. + * + * Return: 0 if allocation succeeded. + */ +static int alloc_pt_range(struct i915_pagedir *pd, uint16_t pde, size_t count) +{ + int i, ret; + + /* 512 is the max page tables per pagedir on any platform. + * TODO: make WARN after patch series is done + */ + BUG_ON(pde + count > I915_PDES_PER_PD); + + for (i = pde; i < pde + count; i++) { + struct i915_pagetab *pt = alloc_pt_single(); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto err_out; + } + WARN(pd->page_tables[i], + "Leaking page directory entry %d (%pa)\n", + i, pd->page_tables[i]); + pd->page_tables[i] = pt; + } + + return 0; + +err_out: + while (i--) + free_pt_single(pd->page_tables[i]); + return ret; +} + +static void __free_pd_single(struct i915_pagedir *pd) +{ + __free_page(pd->page); + kfree(pd); +} + +#define free_pd_single(pd) do { \ + if ((pd)->page) { \ + __free_pd_single(pd); \ + } \ +} while (0) + +static struct i915_pagedir *alloc_pd_single(void) +{ + struct i915_pagedir *pd; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!pd->page) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + return pd; +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, uint64_t val, bool synchronous) @@ -223,7 +319,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD; for (i = used_pd - 1; i >= 0; i--) { - dma_addr_t addr = ppgtt->pdp.pagedir[i].daddr; + dma_addr_t addr = ppgtt->pdp.pagedir[i]->daddr; ret = gen8_write_pdp(ring, i, addr, synchronous); if (ret) return ret; @@ -250,8 +346,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, I915_CACHE_LLC, use_scratch); while (num_entries) { - struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe]; - struct page *page_table = pd->page_tables[pde].page; + struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe]; + struct i915_pagetab *pt = pd->page_tables[pde]; + struct page *page_table = pt->page; last_pte = pte + num_entries; if (last_pte > GEN8_PTES_PER_PT) @@ -294,8 +391,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, break; if (pt_vaddr == NULL) { - struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe]; - struct page *page_table = pd->page_tables[pde].page; + struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe]; + struct i915_pagetab *pt = pd->page_tables[pde]; + struct page *page_table = pt->page; pt_vaddr = kmap_atomic(page_table); } @@ -320,18 +418,13 @@ static void gen8_free_page_tables(struct i915_pagedir *pd) { int i; - if (pd->page_tables == NULL) + if (!pd->page) return; - for (i = 0; i < I915_PDES_PER_PD; i++) - if (pd->page_tables[i].page) - __free_page(pd->page_tables[i].page); -} - -static void gen8_free_page_directories(struct i915_pagedir *pd) -{ - kfree(pd->page_tables); - __free_page(pd->page); + for (i = 0; i < I915_PDES_PER_PD; i++) { + free_pt_single(pd->page_tables[i]); + pd->page_tables[i] = NULL; + } } static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) @@ -339,8 +432,8 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < ppgtt->num_pd_pages; i++) { - gen8_free_page_tables(&ppgtt->pdp.pagedir[i]); - gen8_free_page_directories(&ppgtt->pdp.pagedir[i]); + gen8_free_page_tables(ppgtt->pdp.pagedir[i]); + free_pd_single(ppgtt->pdp.pagedir[i]); } } @@ -352,14 +445,16 @@ static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt) for (i = 0; i < ppgtt->num_pd_pages; i++) { /* TODO: In the future we'll support sparse mappings, so this * will have to change. */ - if (!ppgtt->pdp.pagedir[i].daddr) + if (!ppgtt->pdp.pagedir[i]->daddr) continue; - pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i].daddr, PAGE_SIZE, + pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i]->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); for (j = 0; j < I915_PDES_PER_PD; j++) { - dma_addr_t addr = ppgtt->pdp.pagedir[i].page_tables[j].daddr; + struct i915_pagedir *pd = ppgtt->pdp.pagedir[i]; + struct i915_pagetab *pt = pd->page_tables[j]; + dma_addr_t addr = pt->daddr; if (addr) pci_unmap_page(hwdev, addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); @@ -381,24 +476,20 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) { - int i, j; + int i, ret; for (i = 0; i < ppgtt->num_pd_pages; i++) { - struct i915_pagedir *pd = &ppgtt->pdp.pagedir[i]; - for (j = 0; j < I915_PDES_PER_PD; j++) { - struct i915_pagetab *pt = &pd->page_tables[j]; - pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pt->page) - goto unwind_out; - - } + ret = alloc_pt_range(ppgtt->pdp.pagedir[i], + 0, I915_PDES_PER_PD); + if (ret) + goto unwind_out; } return 0; unwind_out: while (i--) - gen8_free_page_tables(&ppgtt->pdp.pagedir[i]); + gen8_free_page_tables(ppgtt->pdp.pagedir[i]); return -ENOMEM; } @@ -409,16 +500,9 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, int i; for (i = 0; i < max_pdp; i++) { - struct i915_pagetab *pt; - pt = kcalloc(I915_PDES_PER_PD, sizeof(*pt), GFP_KERNEL); - if (!pt) + ppgtt->pdp.pagedir[i] = alloc_pd_single(); + if (IS_ERR(ppgtt->pdp.pagedir[i])) goto unwind_out; - - ppgtt->pdp.pagedir[i].page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!ppgtt->pdp.pagedir[i].page) - goto unwind_out; - - ppgtt->pdp.pagedir[i].page_tables = pt; } ppgtt->num_pd_pages = max_pdp; @@ -427,10 +511,8 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, return 0; unwind_out: - while (i--) { - kfree(ppgtt->pdp.pagedir[i].page_tables); - __free_page(ppgtt->pdp.pagedir[i].page); - } + while (i--) + free_pd_single(ppgtt->pdp.pagedir[i]); return -ENOMEM; } @@ -465,14 +547,14 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, int ret; pd_addr = pci_map_page(ppgtt->base.dev->pdev, - ppgtt->pdp.pagedir[pdpe].page, 0, + ppgtt->pdp.pagedir[pdpe]->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); if (ret) return ret; - ppgtt->pdp.pagedir[pdpe].daddr = pd_addr; + ppgtt->pdp.pagedir[pdpe]->daddr = pd_addr; return 0; } @@ -482,8 +564,8 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, const int pde) { dma_addr_t pt_addr; - struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe]; - struct i915_pagetab *pt = &pd->page_tables[pde]; + struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe]; + struct i915_pagetab *pt = pd->page_tables[pde]; struct page *p = pt->page; int ret; @@ -546,10 +628,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) * will never need to touch the PDEs again. */ for (i = 0; i < max_pdp; i++) { + struct i915_pagedir *pd = ppgtt->pdp.pagedir[i]; gen8_ppgtt_pde_t *pd_vaddr; - pd_vaddr = kmap_atomic(ppgtt->pdp.pagedir[i].page); + pd_vaddr = kmap_atomic(ppgtt->pdp.pagedir[i]->page); for (j = 0; j < I915_PDES_PER_PD; j++) { - dma_addr_t addr = ppgtt->pdp.pagedir[i].page_tables[j].daddr; + struct i915_pagetab *pt = pd->page_tables[j]; + dma_addr_t addr = pt->daddr; pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, I915_CACHE_LLC); } @@ -597,7 +681,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { u32 expected; gen6_gtt_pte_t *pt_vaddr; - dma_addr_t pt_addr = ppgtt->pd.page_tables[pde].daddr; + dma_addr_t pt_addr = ppgtt->pd.page_tables[pde]->daddr; pd_entry = readl(pd_addr + pde); expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); @@ -608,7 +692,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page); for (pte = 0; pte < GEN6_PTES_PER_PT; pte+=4) { unsigned long va = (pde * PAGE_SIZE * GEN6_PTES_PER_PT) + @@ -658,7 +742,7 @@ static void gen6_map_page_tables(struct i915_hw_ppgtt *ppgtt) WARN_ON(ppgtt->pd.pd_offset & 0x3f); for (i = 0; i < ppgtt->num_pd_entries; i++) - gen6_map_single(ppgtt, i, ppgtt->pd.page_tables[i].daddr); + gen6_map_single(ppgtt, i, ppgtt->pd.page_tables[i]->daddr); readl(dev_priv->gtt.gsm); } @@ -902,7 +986,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, if (last_pte > GEN6_PTES_PER_PT) last_pte = GEN6_PTES_PER_PT; - pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page); for (i = pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; @@ -930,7 +1014,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { if (pt_vaddr == NULL) - pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page); pt_vaddr[pte] = vm->pte_encode(sg_page_iter_dma_address(&sg_iter), @@ -952,7 +1036,7 @@ static void gen6_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt) for (i = 0; i < ppgtt->num_pd_entries; i++) pci_unmap_page(ppgtt->base.dev->pdev, - ppgtt->pd.page_tables[i].daddr, + ppgtt->pd.page_tables[i]->daddr, 4096, PCI_DMA_BIDIRECTIONAL); } @@ -961,8 +1045,9 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < ppgtt->num_pd_entries; i++) - __free_page(ppgtt->pd.page_tables[i].page); - kfree(ppgtt->pd.page_tables); + free_pt_single(ppgtt->pd.page_tables[i]); + + free_pd_single(&ppgtt->pd); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -1019,27 +1104,6 @@ alloc: return 0; } -static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) -{ - struct i915_pagetab *pt; - int i; - - pt = kcalloc(ppgtt->num_pd_entries, sizeof(*pt), GFP_KERNEL); - if (!pt) - return -ENOMEM; - - for (i = 0; i < ppgtt->num_pd_entries; i++) { - pt[i].page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pt->page) { - gen6_ppgtt_free(ppgtt); - return -ENOMEM; - } - } - - ppgtt->pd.page_tables = pt; - return 0; -} - static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) { int ret; @@ -1048,7 +1112,7 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ret = gen6_ppgtt_allocate_page_tables(ppgtt); + ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries); if (ret) { drm_mm_remove_node(&ppgtt->node); return ret; @@ -1066,7 +1130,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) struct page *page; dma_addr_t pt_addr; - page = ppgtt->pd.page_tables[i].page; + page = ppgtt->pd.page_tables[i]->page; pt_addr = pci_map_page(dev->pdev, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); @@ -1075,7 +1139,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) return -EIO; } - ppgtt->pd.page_tables[i].daddr = pt_addr; + ppgtt->pd.page_tables[i]->daddr = pt_addr; } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index d30f6de..9cec163 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -318,12 +318,12 @@ struct i915_pagedir { dma_addr_t daddr; }; - struct i915_pagetab *page_tables; + struct i915_pagetab *page_tables[I915_PDES_PER_PD]; /* PDEs */ }; struct i915_pagedirpo { /* struct page *page; */ - struct i915_pagedir pagedir[GEN8_LEGACY_PDPS]; + struct i915_pagedir *pagedir[GEN8_LEGACY_PDPS]; }; struct i915_hw_ppgtt { -- 1.9.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx