I think this is a nice generalization on it's own, but it's primarily prep work for my PPGTT support. Does this bother anyone? The only down side I can see is we waste 2k of cpu unmappable space (unless we have something else that is <= 2k and doesn't need to be page aligned). It's RFC because I have only hacked this up and haven't thoroughly tested a bunch of error and suspend/resume paths. Cc: Daniel Vetter <daniel.vetter at ffwll.ch> Cc: Chris Wilson <chris at chris-wilson.co.uk> Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 56 ++++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3877c45..27573d8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -440,6 +440,7 @@ struct i915_hw_ppgtt { uint32_t pd_offset; dma_addr_t *pt_dma_addr; dma_addr_t scratch_page_dma_addr; + struct drm_mm_node node; /* pte functions, mirroring the interface of the global gtt. */ void (*clear_range)(struct i915_hw_ppgtt *ppgtt, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 50df194..b474afb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -75,12 +75,9 @@ static inline gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev, return pte; } -static int gen6_ppgtt_enable(struct drm_device *dev) +static void gen6_write_pdes(struct drm_i915_private *dev_priv, + struct i915_hw_ppgtt *ppgtt) { - drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t pd_offset; - struct intel_ring_buffer *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; gen6_gtt_pte_t __iomem *pd_addr; uint32_t pd_entry; int i; @@ -97,6 +94,17 @@ static int gen6_ppgtt_enable(struct drm_device *dev) writel(pd_entry, pd_addr + i); } readl(pd_addr); +} + +static int gen6_ppgtt_enable(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t pd_offset; + struct intel_ring_buffer *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int i; + + gen6_write_pdes(dev_priv, ppgtt); pd_offset = ppgtt->pd_offset; pd_offset /= 64; /* in cachelines, */ @@ -208,6 +216,8 @@ static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) { int i; + drm_mm_put_block(&ppgtt->node); + if (ppgtt->pt_dma_addr) { for (i = 0; i < ppgtt->num_pd_entries; i++) pci_unmap_page(ppgtt->dev->pdev, @@ -226,15 +236,22 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { struct drm_device *dev = ppgtt->dev; struct drm_i915_private *dev_priv = dev->dev_private; - unsigned first_pd_entry_in_global_pt; + const u32 pde_size = I915_PPGTT_PD_ENTRIES * sizeof(gen6_gtt_pte_t); int i; int ret = -ENOMEM; /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 - * entries. For aliasing ppgtt support we just steal them at the end for - * now. */ - first_pd_entry_in_global_pt = - gtt_total_entries(dev_priv->gtt) - I915_PPGTT_PD_ENTRIES; + * entries. Carve out enough space in the cpu unmappable area. + */ + BUG_ON(!drm_mm_initialized(&dev_priv->mm.gtt_space)); + ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, + &ppgtt->node, pde_size, + PAGE_SIZE, 0, + dev_priv->gtt.mappable_end, + dev_priv->gtt.total); + if (ret) + return ret; + ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; ppgtt->enable = gen6_ppgtt_enable; @@ -243,8 +260,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->cleanup = gen6_ppgtt_cleanup; ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries, GFP_KERNEL); - if (!ppgtt->pt_pages) + if (!ppgtt->pt_pages) { + drm_mm_put_block(&ppgtt->node); return -ENOMEM; + } for (i = 0; i < ppgtt->num_pd_entries; i++) { ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); @@ -274,7 +293,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->clear_range(ppgtt, 0, ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES); - ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); + DRM_DEBUG_DRIVER("Allocated pde space at GSM offset: %lx\n", + ppgtt->node.start / PAGE_SIZE); + ppgtt->pd_offset = ppgtt->node.start / PAGE_SIZE; return 0; @@ -291,6 +312,7 @@ err_pt_alloc: __free_page(ppgtt->pt_pages[i]); } kfree(ppgtt->pt_pages); + drm_mm_put_block(&ppgtt->node); return ret; } @@ -397,6 +419,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE, dev_priv->gtt.total / PAGE_SIZE); + if (dev_priv->mm.aliasing_ppgtt) + gen6_write_pdes(dev_priv, dev_priv->mm.aliasing_ppgtt); + list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { i915_gem_clflush_object(obj); i915_gem_gtt_bind_object(obj, obj->cache_level); @@ -641,12 +666,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev) if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { int ret; - if (INTEL_INFO(dev)->gen <= 7) { - /* PPGTT pdes are stolen from global gtt ptes, so shrink the - * aperture accordingly when using aliasing ppgtt. */ - gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; - } - i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); ret = i915_gem_init_aliasing_ppgtt(dev); @@ -655,7 +674,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev) DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); drm_mm_takedown(&dev_priv->mm.gtt_space); - gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE; } i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } -- 1.8.2.1