The GTT and PPGTT can be thought of more generally as GPU address spaces. Many of their actions (insert entries), state (LRU lists) and many of their characteristics (size), can be shared. Do that. Created a i915_gtt_vm helper macro since for now we always want the regular GTT address space. Eventually we'll ween ourselves off of using this except in cases where we obviously want the GGTT (like display). Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 43 +++++++---- drivers/gpu/drm/i915/i915_gem.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 142 +++++++++++++++++++----------------- 4 files changed, 109 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9e99ccd..7504c69 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -260,8 +260,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data) count, size); seq_printf(m, "%zu [%lu] gtt total\n", - dev_priv->gtt.total, - dev_priv->gtt.mappable_end - dev_priv->gtt.start); + i915_gtt_vm->total, + dev_priv->gtt.mappable_end - i915_gtt_vm->start); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7c5e3da..4261258 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -400,6 +400,27 @@ enum i915_cache_level { typedef uint32_t gen6_gtt_pte_t; +struct i915_address_space { + struct drm_device *dev; + unsigned long start; /* Start offset always 0 for dri2 */ + size_t total; /* size addr space maps (ex. 2GB for ggtt) */ + + struct { + dma_addr_t addr; + struct page *page; + } scratch; + + gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, + enum i915_cache_level level); + void (*clear_range)(struct i915_address_space *i915_mm, + unsigned int first_entry, + unsigned int num_entries); + void (*insert_entries)(struct i915_address_space *i915_mm, + struct sg_table *st, + unsigned int first_entry, + enum i915_cache_level cache_level); +}; + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -408,8 +429,7 @@ typedef uint32_t gen6_gtt_pte_t; * the spec. */ struct i915_gtt { - unsigned long start; /* Start offset of used GTT */ - size_t total; /* Total size GTT can map */ + struct i915_address_space base; size_t stolen_size; /* Total size of stolen memory */ unsigned long mappable_end; /* End offset that we can CPU map */ @@ -430,31 +450,22 @@ struct i915_gtt { size_t *stolen, phys_addr_t *mappable_base, unsigned long *mappable_end); void (*gtt_remove)(struct drm_device *dev); - void (*gtt_clear_range)(struct drm_device *dev, - unsigned int first_entry, - unsigned int num_entries); - void (*gtt_insert_entries)(struct drm_device *dev, - struct sg_table *st, - unsigned int pg_start, - enum i915_cache_level cache_level); - gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, - enum i915_cache_level level); }; -#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT) +#define i915_gtt_vm ((struct i915_address_space *)&(dev_priv->gtt.base)) struct i915_hw_ppgtt { + struct i915_address_space base; struct drm_mm_node node; - struct drm_device *dev; unsigned num_pd_entries; struct page **pt_pages; uint32_t pd_offset; dma_addr_t *pt_dma_addr; /* pte functions, mirroring the interface of the global gtt. */ - void (*clear_range)(struct i915_hw_ppgtt *ppgtt, + void (*clear_range)(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries); - void (*insert_entries)(struct i915_hw_ppgtt *ppgtt, + void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, unsigned int pg_start, enum i915_cache_level cache_level); @@ -1044,7 +1055,7 @@ typedef struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; - struct i915_gtt gtt; + struct i915_gtt gtt; /* VMA representing the global address space */ struct i915_gem_mm mm; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dfb1a84c..dfd7d34 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -184,7 +184,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned += obj->gtt_space->size; mutex_unlock(&dev->struct_mutex); - args->aper_size = dev_priv->gtt.total; + args->aper_size = i915_gtt_vm->total; args->aper_available_size = args->aper_size - pinned; return 0; @@ -2950,7 +2950,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; size_t max = map_and_fenceable ? - dev_priv->gtt.mappable_end : dev_priv->gtt.total; + dev_priv->gtt.mappable_end : dev_priv->gtt.base.total; int ret; fence_size = i915_gem_get_gtt_size(dev, @@ -4074,7 +4074,7 @@ int i915_gem_init(struct drm_device *dev) if (intel_enable_ppgtt(dev) && HAS_HW_CONTEXTS(dev)) { i915_gem_setup_global_gtt(dev, 0, dev_priv->gtt.mappable_end, - dev_priv->gtt.total, false); + i915_gtt_vm->total, false); i915_gem_context_init(dev); if (dev_priv->hw_contexts_disabled) { drm_mm_takedown(&dev_priv->mm.gtt_space); @@ -4087,7 +4087,7 @@ ggtt_only: if (HAS_HW_CONTEXTS(dev)) DRM_DEBUG_DRIVER("Context setup failed\n"); i915_gem_setup_global_gtt(dev, 0, dev_priv->gtt.mappable_end, - dev_priv->gtt.total, true); + i915_gtt_vm->total, true); } ret = i915_gem_init_hw(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6954e95..de8939e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -102,7 +102,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->dev->dev_private; + struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; gen6_gtt_pte_t __iomem *pd_addr; uint32_t pd_entry; int i; @@ -181,18 +181,18 @@ static int gen6_ppgtt_enable(struct drm_device *dev) } /* PPGTT support for Sandybdrige/Gen6 and later */ -static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, +static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_entry, unsigned num_entries) { - struct drm_i915_private *dev_priv = ppgtt->dev->dev_private; + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr, scratch_pte; unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; unsigned last_pte, i; - scratch_pte = ppgtt->pte_encode(dev_priv->gtt.scratch.addr, - I915_CACHE_LLC); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); while (num_entries) { last_pte = first_pte + num_entries; @@ -212,11 +212,13 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, } } -static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, +static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, unsigned first_entry, enum i915_cache_level cache_level) { + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr; unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; @@ -227,7 +229,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, dma_addr_t page_addr; page_addr = sg_page_iter_dma_address(&sg_iter); - pt_vaddr[act_pte] = ppgtt->pte_encode(page_addr, cache_level); + pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level); if (++act_pte == I915_PPGTT_PT_ENTRIES) { kunmap_atomic(pt_vaddr); act_pt++; @@ -241,14 +243,14 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) { + struct i915_address_space *vm = &ppgtt->base; int i; drm_mm_remove_node(&ppgtt->node); if (ppgtt->pt_dma_addr) { for (i = 0; i < ppgtt->num_pd_entries; i++) - pci_unmap_page(ppgtt->dev->pdev, - ppgtt->pt_dma_addr[i], + pci_unmap_page(vm->dev->pdev, ppgtt->pt_dma_addr[i], 4096, PCI_DMA_BIDIRECTIONAL); } @@ -262,7 +264,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { #define GEN6_PD_ALIGN (PAGE_SIZE * 16) #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) - struct drm_device *dev = ppgtt->dev; + struct i915_address_space *vm = &ppgtt->base; + struct drm_device *dev = vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; int i; int ret = -ENOMEM; @@ -277,25 +280,28 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) &ppgtt->node, GEN6_PD_SIZE, GEN6_PD_ALIGN, 0, dev_priv->gtt.mappable_end, - dev_priv->gtt.total, + i915_gtt_vm->total, DRM_MM_TOPDOWN); if (ret) return ret; if (IS_HASWELL(dev)) { - ppgtt->pte_encode = hsw_pte_encode; + vm->pte_encode = hsw_pte_encode; } else if (IS_VALLEYVIEW(dev)) { - ppgtt->pte_encode = byt_pte_encode; + vm->pte_encode = byt_pte_encode; } else { - ppgtt->pte_encode = gen6_pte_encode; + vm->pte_encode = gen6_pte_encode; } ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; ppgtt->enable = gen6_ppgtt_enable; - ppgtt->clear_range = gen6_ppgtt_clear_range; - ppgtt->insert_entries = gen6_ppgtt_insert_entries; ppgtt->cleanup = gen6_ppgtt_cleanup; ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries, GFP_KERNEL); + vm->clear_range = gen6_ppgtt_clear_range; + vm->insert_entries = gen6_ppgtt_insert_entries; + vm->start = 0; + vm->total = 512 * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; + if (!ppgtt->pt_pages) { drm_mm_remove_node(&ppgtt->node); return -ENOMEM; @@ -326,8 +332,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->pt_dma_addr[i] = pt_addr; } - ppgtt->clear_range(ppgtt, 0, - ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES); + vm->clear_range(vm, 0, ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", ppgtt->node.size >> 20, @@ -357,9 +362,10 @@ err_pt_alloc: int i915_gem_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { + struct drm_i915_private *dev_priv = dev->dev_private; int ret; - ppgtt->dev = dev; + ppgtt->base.dev = dev; if (INTEL_INFO(dev)->gen < 8) ret = gen6_ppgtt_init(ppgtt); @@ -373,17 +379,17 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - ppgtt->insert_entries(ppgtt, obj->pages, - obj->gtt_space->start >> PAGE_SHIFT, - cache_level); + ppgtt->base.insert_entries(&ppgtt->base, obj->pages, + obj->gtt_space->start >> PAGE_SHIFT, + cache_level); } void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_object *obj) { - ppgtt->clear_range(ppgtt, - obj->gtt_space->start >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT); + ppgtt->base.clear_range(&ppgtt->base, + obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT); } extern int intel_iommu_gfx_mapped; @@ -430,8 +436,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_gem_object *obj; /* First fill our portion of the GTT with scratch pages */ - dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE, - dev_priv->gtt.total / PAGE_SIZE); + i915_gtt_vm->clear_range(&dev_priv->gtt.base, + i915_gtt_vm->start / PAGE_SIZE, + i915_gtt_vm->total / PAGE_SIZE); if (dev_priv->mm.aliasing_ppgtt) gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); @@ -463,12 +470,12 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) * within the global GTT as well as accessible by the GPU through the GMADR * mapped BAR (dev_priv->mm.gtt->gtt). */ -static void gen6_ggtt_insert_entries(struct drm_device *dev, +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, unsigned int first_entry, enum i915_cache_level level) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = vm->dev->dev_private; gen6_gtt_pte_t __iomem *gtt_entries = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; int i = 0; @@ -477,8 +484,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev, for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { addr = sg_page_iter_dma_address(&sg_iter); - iowrite32(dev_priv->gtt.pte_encode(addr, level), - >t_entries[i]); + iowrite32(vm->pte_encode(addr, level), >t_entries[i]); i++; } @@ -489,8 +495,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev, * hardware should work, we must keep this posting read for paranoia. */ if (i != 0) - WARN_ON(readl(>t_entries[i-1]) - != dev_priv->gtt.pte_encode(addr, level)); + WARN_ON(readl(>t_entries[i-1]) != + vm->pte_encode(addr, level)); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -500,14 +506,14 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev, POSTING_READ(GFX_FLSH_CNTL_GEN6); } -static void gen6_ggtt_clear_range(struct drm_device *dev, +static void gen6_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = vm->dev->dev_private; gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; - const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; + const int max_entries = (vm->total >> PAGE_SHIFT) - first_entry; int i; if (WARN(num_entries > max_entries, @@ -515,15 +521,15 @@ static void gen6_ggtt_clear_range(struct drm_device *dev, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = dev_priv->gtt.pte_encode(dev_priv->gtt.scratch.addr, - I915_CACHE_LLC); + scratch_pte = vm->pte_encode(vm->scratch.addr, + I915_CACHE_LLC); for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); } -static void i915_ggtt_insert_entries(struct drm_device *dev, +static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, unsigned int pg_start, enum i915_cache_level cache_level) @@ -535,7 +541,7 @@ static void i915_ggtt_insert_entries(struct drm_device *dev, } -static void i915_ggtt_clear_range(struct drm_device *dev, +static void i915_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries) { @@ -549,9 +555,9 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->gtt.gtt_insert_entries(dev, obj->pages, - obj->gtt_space->start >> PAGE_SHIFT, - cache_level); + i915_gtt_vm->insert_entries(&dev_priv->gtt.base, obj->pages, + obj->gtt_space->start >> PAGE_SHIFT, + cache_level); obj->has_global_gtt_mapping = 1; } @@ -561,9 +567,9 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->gtt.gtt_clear_range(obj->base.dev, - obj->gtt_space->start >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT); + i915_gtt_vm->clear_range(&dev_priv->gtt.base, + obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT); obj->has_global_gtt_mapping = 0; } @@ -659,21 +665,21 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, obj->has_global_gtt_mapping = 1; } - dev_priv->gtt.start = start; - dev_priv->gtt.total = end - start; + i915_gtt_vm->start = start; + i915_gtt_vm->total = end - start; /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", hole_start, hole_end); - dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE, - (hole_end-hole_start) / PAGE_SIZE); + i915_gtt_vm->clear_range(i915_gtt_vm, hole_start / PAGE_SIZE, + (hole_end-hole_start) / PAGE_SIZE); } /* And finally clear the reserved guard page (if exists) */ if (guard_page) - dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1); + i915_gtt_vm->clear_range(i915_gtt_vm, end / PAGE_SIZE - 1, 1); } static int setup_scratch_page(struct drm_device *dev) @@ -696,8 +702,8 @@ static int setup_scratch_page(struct drm_device *dev) #else dma_addr = page_to_phys(page); #endif - dev_priv->gtt.scratch.page = page; - dev_priv->gtt.scratch.addr = dma_addr; + i915_gtt_vm->scratch.page = page; + i915_gtt_vm->scratch.addr = dma_addr; return 0; } @@ -705,11 +711,12 @@ static int setup_scratch_page(struct drm_device *dev) static void teardown_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - set_pages_wb(dev_priv->gtt.scratch.page, 1); - pci_unmap_page(dev->pdev, dev_priv->gtt.scratch.addr, + + set_pages_wb(i915_gtt_vm->scratch.page, 1); + pci_unmap_page(dev->pdev, i915_gtt_vm->scratch.addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - put_page(dev_priv->gtt.scratch.page); - __free_page(dev_priv->gtt.scratch.page); + put_page(i915_gtt_vm->scratch.page); + __free_page(i915_gtt_vm->scratch.page); } static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -772,8 +779,8 @@ static int gen6_gmch_probe(struct drm_device *dev, if (ret) DRM_ERROR("Scratch setup failed\n"); - dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range; - dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries; + i915_gtt_vm->clear_range = gen6_ggtt_clear_range; + i915_gtt_vm->insert_entries = gen6_ggtt_insert_entries; return ret; } @@ -803,8 +810,8 @@ static int i915_gmch_probe(struct drm_device *dev, intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); - dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range; - dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries; + i915_gtt_vm->clear_range = i915_ggtt_clear_range; + i915_gtt_vm->insert_entries = i915_ggtt_insert_entries; return 0; } @@ -827,20 +834,23 @@ int i915_gem_gtt_init(struct drm_device *dev) gtt->gtt_probe = gen6_gmch_probe; gtt->gtt_remove = gen6_gmch_remove; if (IS_HASWELL(dev)) - gtt->pte_encode = hsw_pte_encode; + gtt->base.pte_encode = hsw_pte_encode; else if (IS_VALLEYVIEW(dev)) - gtt->pte_encode = byt_pte_encode; + gtt->base.pte_encode = byt_pte_encode; else - gtt->pte_encode = gen6_pte_encode; + gtt->base.pte_encode = gen6_pte_encode; } - ret = gtt->gtt_probe(dev, >t->total, >t->stolen_size, + ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, >t->mappable_base, >t->mappable_end); if (ret) return ret; + gtt->base.dev = dev; + /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_INFO("Memory usable by graphics device = %zdM\n", gtt->total >> 20); + DRM_INFO("Memory usable by graphics device = %zdM\n", + gtt->base.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); -- 1.8.2.3