On Mon, Nov 28, 2011 at 09:35:31PM +0100, Daniel Vetter wrote: > This adds support to bind/unbind objects and wires it up. Objects are > only put into the ppgtt when necessary, i.e. at execbuf time. > > Objects are still unconditionally put into the global gtt. > > v2: Kill the quick hack and explicitly pass cache_level to ppgtt_bind > like for the global gtt function. Noticed by Chris Wilson. > > Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch> > --- > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index c918124..9c81cda 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -513,6 +513,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, > struct drm_file *file, > struct list_head *objects) > { > + drm_i915_private_t *dev_priv = ring->dev->dev_private; > struct drm_i915_gem_object *obj; > int ret, retry; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > @@ -621,6 +622,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, > } > > i915_gem_object_unpin(obj); > + > + /* ... and ensure ppgtt mapping exist if needed. */ > + if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { > + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, > + obj, obj->cache_level); > + > + obj->has_aliasing_ppgtt_mapping = 1; > + } > } > > if (ret != -ENOSPC || retry > 1) > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index bd9b520..061ae12 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -34,22 +34,31 @@ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, > unsigned first_entry, > unsigned num_entries) > { > - int i, j; > uint32_t *pt_vaddr; > uint32_t scratch_pte; > + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; > + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > + unsigned last_pte, i; > > scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr); > scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC; > > - for (i = 0; i < ppgtt->num_pd_entries; i++) { > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[i]); > + while (num_entries) { > + last_pte = first_pte + num_entries; > + if (last_pte > I915_PPGTT_PT_ENTRIES) > + last_pte = I915_PPGTT_PT_ENTRIES; > + > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); > > - for (j = 0; j < I915_PPGTT_PT_ENTRIES; j++) > - pt_vaddr[j] = scratch_pte; > + for (i = first_pte; i < last_pte; i++) > + pt_vaddr[i] = scratch_pte; > > kunmap_atomic(pt_vaddr); > - } > > + num_entries -= last_pte - first_pte; > + first_pte = 0; > + act_pd++; > + } > } > > int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) > @@ -162,6 +171,131 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) > kfree(ppgtt); > } > > +static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, > + struct scatterlist *sg_list, > + unsigned sg_len, > + unsigned first_entry, > + uint32_t pte_flags) > +{ > + uint32_t *pt_vaddr, pte; > + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; > + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > + unsigned i, j, m, segment_len; > + dma_addr_t page_addr; > + struct scatterlist *sg; > + > + /* init sg walking */ > + sg = sg_list; > + i = 0; > + segment_len = sg_dma_len(sg) >> PAGE_SHIFT; > + m = 0; > + > + while (i < sg_len) { > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); > + > + for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) { > + page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT); > + pte = GEN6_PTE_ADDR_ENCODE(page_addr); > + pt_vaddr[j] = pte | pte_flags; > + > + /* grab the next page */ > + m++; > + if (m == segment_len) { > + sg = sg_next(sg); > + i++; > + if (i == sg_len) > + break; > + > + segment_len = sg_dma_len(sg) >> PAGE_SHIFT; > + m = 0; > + } > + } > + > + kunmap_atomic(pt_vaddr); > + > + first_pte = 0; > + act_pd++; > + } > +} > + > +static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, > + unsigned first_entry, unsigned num_entries, > + struct page **pages, uint32_t pte_flags) > +{ > + uint32_t *pt_vaddr, pte; > + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; > + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > + unsigned last_pte, i; > + dma_addr_t page_addr; > + > + while (num_entries) { > + last_pte = first_pte + num_entries; > + last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES); > + > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); > + > + for (i = first_pte; i < last_pte; i++) { > + page_addr = page_to_phys(*pages); > + pte = GEN6_PTE_ADDR_ENCODE(page_addr); > + pt_vaddr[i] = pte | pte_flags; > + > + pages++; > + } > + > + kunmap_atomic(pt_vaddr); > + > + num_entries -= last_pte - first_pte; > + first_pte = 0; > + act_pd++; > + } > +} Same comment as 3/11 on these... don't we need a clflush?