These page table helpers make the code much cleaner. There is some room to use the arch/x86 header files. The reason I've opted not to is in several cases, the definitions are dictated by the CONFIG_ options which do not always indicate the restrictions in the GPU. While here, clean up the defines to have more concise names, and consolidate between gen6 and gen8 where appropriate. v2: Use I915_PAGE_SIZE to remove PAGE_SIZE dep in the new code (Jesse) Fix bugged I915_PTE_MASK define, which was unused (Chris) BUG_ON bad length/size - taking directly from Chris (Chris) define NUM_PTE (Chris) I've made a lot of tiny errors in these helpers. Often I'd correct an error only to introduce another one. While IGT was capable of catching them, the tests often took a while to catch, and where hard/slow to debug in the kernel. As a result, to test this, I compiled i915_gem_gtt.h in userspace, and ran tests from userspace. What follows isn't by any means complete, but it was able to catch lot of bugs. Gen8 is also untested, but since the current code is almost identical, I feel pretty comfortable with that. void test_pte(uint32_t base) { uint32_t ret; assert_pte_index((base + 0), 0); assert_pte_index((base + 1), 0); assert_pte_index((base + 0x1000), 1); assert_pte_index((base + (1<<22)), 0); assert_pte_index((base + ((1<<22) - 1)), 1023); assert_pte_index((base + (1<<21)), 512); assert_pte_count(base + 0, 0, 0); assert_pte_count(base + 0, 1, 1); assert_pte_count(base + 0, 0x1000, 1); assert_pte_count(base + 0, 0x1001, 2); assert_pte_count(base + 0, 1<<21, 512); assert_pte_count(base + 0, 1<<22, 1024); assert_pte_count(base + 0, (1<<22) - 1, 1024); assert_pte_count(base + (1<<21), 1<<22, 512); assert_pte_count(base + (1<<21), (1<<22)+1, 512); assert_pte_count(base + (1<<21), 10<<22, 512); } void test_pde(uint32_t base) { assert(gen6_pde_index(base + 0) == 0); assert(gen6_pde_index(base + 1) == 0); assert(gen6_pde_index(base + (1<<21)) == 0); assert(gen6_pde_index(base + (1<<22)) == 1); assert(gen6_pde_index(base + ((256<<22)))== 256); assert(gen6_pde_index(base + ((512<<22))) == 0); assert(gen6_pde_index(base + ((513<<22))) == 1); /* This is actually not possible on gen6 */ assert(gen6_pde_count(base + 0, 0) == 0); assert(gen6_pde_count(base + 0, 1) == 1); assert(gen6_pde_count(base + 0, 1<<21) == 1); assert(gen6_pde_count(base + 0, 1<<22) == 1); assert(gen6_pde_count(base + 0, (1<<22) + 0x1000) == 2); assert(gen6_pde_count(base + 0x1000, 1<<22) == 2); assert(gen6_pde_count(base + 0, 511<<22) == 511); assert(gen6_pde_count(base + 0, 512<<22) == 512); assert(gen6_pde_count(base + 0x1000, 512<<22) == 512); assert(gen6_pde_count(base + (1<<22), 512<<22) == 511); } int main() { test_pde(0); while (1) test_pte(rand() & ~((1<<22) - 1)); return 0; } v3: Some small rebase conflicts resolved Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 89 +++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 123 +++++++++++++++++++++++++++++++++--- 2 files changed, 156 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 886c9c3..5a62ef1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -240,7 +240,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, int i, ret; /* bit of a hack to find the actual last used pd */ - int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; + int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD; for (i = used_pd - 1; i >= 0; i--) { dma_addr_t addr = ppgtt->pd_dma_addr[i]; @@ -260,9 +260,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); unsigned num_entries = length >> PAGE_SHIFT; unsigned last_pte, i; @@ -273,8 +273,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; last_pte = pte + num_entries; - if (last_pte > GEN8_PTES_PER_PAGE) - last_pte = GEN8_PTES_PER_PAGE; + if (last_pte > GEN8_PTES_PER_PT) + last_pte = GEN8_PTES_PER_PT; pt_vaddr = kmap_atomic(page_table); @@ -288,7 +288,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, kunmap_atomic(pt_vaddr); pte = 0; - if (++pde == GEN8_PDES_PER_PAGE) { + if (++pde == I915_PDES_PER_PD) { pdpe++; pde = 0; } @@ -303,9 +303,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); struct sg_page_iter sg_iter; pt_vaddr = NULL; @@ -320,12 +320,12 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr[pte] = gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), cache_level, true); - if (++pte == GEN8_PTES_PER_PAGE) { + if (++pte == GEN8_PTES_PER_PT) { if (!HAS_LLC(ppgtt->base.dev)) drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); kunmap_atomic(pt_vaddr); pt_vaddr = NULL; - if (++pde == GEN8_PDES_PER_PAGE) { + if (++pde == I915_PDES_PER_PD) { pdpe++; pde = 0; } @@ -346,7 +346,7 @@ static void gen8_free_page_tables(struct page **pt_pages) if (pt_pages == NULL) return; - for (i = 0; i < GEN8_PDES_PER_PAGE; i++) + for (i = 0; i < I915_PDES_PER_PD; i++) if (pt_pages[i]) __free_pages(pt_pages[i], 0); } @@ -378,7 +378,7 @@ static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt) pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + for (j = 0; j < I915_PDES_PER_PD; j++) { dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; if (addr) pci_unmap_page(hwdev, addr, PAGE_SIZE, @@ -404,11 +404,11 @@ static struct page **__gen8_alloc_page_tables(void) struct page **pt_pages; int i; - pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); + pt_pages = kcalloc(I915_PDES_PER_PD, sizeof(struct page *), GFP_KERNEL); if (!pt_pages) return ERR_PTR(-ENOMEM); - for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { + for (i = 0; i < I915_PDES_PER_PD; i++) { pt_pages[i] = alloc_page(GFP_KERNEL); if (!pt_pages[i]) goto bail; @@ -458,7 +458,7 @@ static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < ppgtt->num_pd_pages; i++) { - ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, + ppgtt->gen8_pt_dma_addr[i] = kcalloc(I915_PDES_PER_PD, sizeof(dma_addr_t), GFP_KERNEL); if (!ppgtt->gen8_pt_dma_addr[i]) @@ -496,7 +496,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, return ret; } - ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; + ppgtt->num_pd_entries = max_pdp * I915_PDES_PER_PD; ret = gen8_ppgtt_allocate_dma(ppgtt); if (ret) @@ -557,7 +557,7 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) { const int max_pdp = DIV_ROUND_UP(size, 1 << 30); - const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + const int min_pt_pages = I915_PDES_PER_PD * max_pdp; int i, j, ret; if (size % (1<<30)) @@ -576,7 +576,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) if (ret) goto bail; - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + for (j = 0; j < I915_PDES_PER_PD; j++) { ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); if (ret) goto bail; @@ -594,7 +594,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) for (i = 0; i < max_pdp; i++) { gen8_ppgtt_pde_t *pd_vaddr; pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + for (j = 0; j < I915_PDES_PER_PD; j++) { dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, I915_CACHE_LLC); @@ -610,7 +610,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.start = 0; - ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; + ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PT * PAGE_SIZE; DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); @@ -656,9 +656,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) seq_printf(m, "\tPDE: %x\n", pd_entry); pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { + for (pte = 0; pte < GEN6_PTES_PER_PT; pte+=4) { unsigned long va = - (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + + (pde * PAGE_SIZE * GEN6_PTES_PER_PT) + (pte * PAGE_SIZE); int i; bool found = false; @@ -937,29 +937,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; + unsigned pde = gen6_pde_index(start); unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned pte = gen6_pte_index(start); unsigned last_pte, i; scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > I915_PPGTT_PT_ENTRIES) - last_pte = I915_PPGTT_PT_ENTRIES; + last_pte = pte + num_entries; + if (last_pte > GEN6_PTES_PER_PT) + last_pte = GEN6_PTES_PER_PT; - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - for (i = first_pte; i < last_pte; i++) + for (i = pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; kunmap_atomic(pt_vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + num_entries -= last_pte - pte; + pte = 0; + pde++; } } @@ -971,25 +970,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; - unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned pde = gen6_pde_index(start); + unsigned pte = gen6_pte_index(start); struct sg_page_iter sg_iter; pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { if (pt_vaddr == NULL) - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - pt_vaddr[act_pte] = + pt_vaddr[pte] = vm->pte_encode(sg_page_iter_dma_address(&sg_iter), cache_level, true, flags); - - if (++act_pte == I915_PPGTT_PT_ENTRIES) { + if (++pte == GEN6_PTES_PER_PT) { kunmap_atomic(pt_vaddr); pt_vaddr = NULL; - act_pt++; - act_pte = 0; + pde++; + pte = 0; } } if (pt_vaddr) @@ -1068,7 +1065,7 @@ alloc: if (ppgtt->node.start < dev_priv->gtt.mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); - ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; + ppgtt->num_pd_entries = I915_PDES_PER_PD; return 0; } @@ -1173,7 +1170,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.start = 0; - ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; + ppgtt->base.total = ppgtt->num_pd_entries * GEN6_PTES_PER_PT * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; ppgtt->pd_offset = diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0199c5a..465549f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,8 +38,16 @@ typedef uint32_t gen6_gtt_pte_t; typedef uint64_t gen8_gtt_pte_t; typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) -/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ +/* GEN Agnostic defines */ +#define I915_PAGE_SIZE 4096 +#define I915_PDES_PER_PD 512 +#define I915_PTE_MASK (I915_PAGE_SIZE-1) +#define I915_PDE_MASK (I915_PDES_PER_PD-1) + +/* GEN6 PPGTT resembles a 2 level page table: + * 31:22 | 21:12 | 11:0 + * PDE | PTE | offset + */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) @@ -47,13 +55,16 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define GEN6_PPGTT_PD_ENTRIES 512 -#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) +#define GEN6_PD_SIZE (I915_PDES_PER_PD * PAGE_SIZE) #define GEN6_PD_ALIGN (PAGE_SIZE * 16) #define GEN6_PDE_VALID (1 << 0) #define GEN7_PTE_CACHE_L3_LLC (3 << 1) +#define GEN6_PDE_SHIFT 22 +#define GEN6_PTES_PER_PT (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) +#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) + #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) #define BYT_PTE_WRITEABLE (1 << 1) @@ -72,6 +83,14 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ + +#define GEN8_LEGACY_PDPES 4 +#define GEN8_PTES_PER_PT (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) + /* GEN8 legacy style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset @@ -81,12 +100,6 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN8_PDPE_SHIFT 30 #define GEN8_PDPE_MASK 0x3 #define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_LEGACY_PDPES 4 -#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) -#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -273,6 +286,96 @@ struct i915_hw_ppgtt { void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; +static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +{ + const uint32_t mask = NUM_PTE(pde_shift) - 1; + return (address >> PAGE_SHIFT) & mask; +} + +/* Helper to counts the number of PTEs within the given length. This count does + * not cross a page table boundary, so the max value would be + * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8. + */ +static inline size_t i915_pte_count(uint64_t addr, size_t length, + uint32_t pde_shift) +{ + const uint64_t mask = ~((1 << pde_shift) - 1); + uint64_t end; + + BUG_ON(length == 0); + BUG_ON(offset_in_page(addr|length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); + + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); +} + +static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +{ + return (addr >> shift) & I915_PDE_MASK; +} + +static inline size_t i915_pde_count(uint64_t addr, uint64_t length, + uint32_t pde_shift) +{ + const uint32_t pdp_shift = pde_shift + 9; + const uint64_t mask = ~((1 << pdp_shift) - 1); + uint64_t end; + + BUG_ON(length == 0); + BUG_ON(offset_in_page(addr|length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return I915_PDES_PER_PD - i915_pde_index(addr, pde_shift); + + return i915_pde_index(end, pde_shift) - i915_pde_index(addr, pde_shift); +} + +static inline uint32_t gen6_pte_index(uint32_t addr) +{ + return i915_pte_index(addr, GEN6_PDE_SHIFT); +} + +static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +{ + return i915_pte_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline uint32_t gen6_pde_index(uint32_t addr) +{ + return i915_pde_index(addr, GEN6_PDE_SHIFT); +} + +static inline size_t gen6_pde_count(uint32_t addr, uint32_t length) +{ + return i915_pde_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline uint32_t gen8_pte_index(uint64_t address) +{ + return i915_pte_index(address, GEN8_PDE_SHIFT); +} + +static inline uint32_t gen8_pde_index(uint64_t address) +{ + return i915_pde_index(address, GEN8_PDE_SHIFT); +} + +static inline uint32_t gen8_pdpe_index(uint64_t address) +{ + return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; +} + +static inline uint32_t gen8_pml4e_index(uint64_t address) +{ + BUG(); +} + int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, -- 2.0.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx