On Tue, 4 Sep 2012 21:02:57 +0100 Chris Wilson <chris at chris-wilson.co.uk> wrote: > Rather than have multiple data structures for describing our page layout > in conjunction with the array of pages, we can migrate all users over to > a scatterlist. > > One major advantage, other than unifying the page tracking structures, > this offers is that we replace the vmalloc'ed array (which can be up to > a megabyte in size) with a chain of individual pages which helps reduce > memory pressure. > > The disadvantage is that we then do not have a simple array to iterate, > or to access randomly. The common case for this is in the relocation > processing, which will typically fit within a single scatterlist page > and so be almost the same cost as the simple array. For iterating over > the array, the extra function call could be optimised away, but in > reality is an insignificant cost of either binding the pages, or > performing the pwrite/pread. > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> Now that my eyes are done bleeding, easy ones: ERROR: space required after that ',' (ctx:VxV) #69: FILE: drivers/char/agp/intel-gtt.c:99: + for_each_sg(st->sgl, sg, num_entries,i) ^ WARNING: Prefer pr_err(... to printk(KERN_ERR, ... #189: FILE: drivers/gpu/drm/drm_cache.c:117: + printk(KERN_ERR "Timed out waiting for cache flush.\n"); WARNING: Prefer pr_err(... to printk(KERN_ERR, ... #191: FILE: drivers/gpu/drm/drm_cache.c:119: + printk(KERN_ERR "Architecture has no drm_cache.c support\n"); In addition to the inline comments, it would have been even slightly easier to review without the s/page/i since it seems to just be for no compelling reason anyway. > --- > drivers/char/agp/intel-gtt.c | 51 +++++------- > drivers/gpu/drm/drm_cache.c | 23 ++++++ > drivers/gpu/drm/i915/i915_drv.h | 18 +++-- > drivers/gpu/drm/i915/i915_gem.c | 79 ++++++++++++------ > drivers/gpu/drm/i915/i915_gem_dmabuf.c | 99 +++++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- > drivers/gpu/drm/i915/i915_gem_gtt.c | 121 ++++++---------------------- > drivers/gpu/drm/i915/i915_gem_tiling.c | 16 ++-- > drivers/gpu/drm/i915/i915_irq.c | 25 +++--- > drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++- > include/drm/drmP.h | 1 + > include/drm/intel-gtt.h | 10 +-- > 12 files changed, 236 insertions(+), 219 deletions(-) > > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c > index 58e32f7..511d1b1 100644 > --- a/drivers/char/agp/intel-gtt.c > +++ b/drivers/char/agp/intel-gtt.c > @@ -84,40 +84,33 @@ static struct _intel_private { > #define IS_IRONLAKE intel_private.driver->is_ironlake > #define HAS_PGTBL_EN intel_private.driver->has_pgtbl_enable > > -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries, > - struct scatterlist **sg_list, int *num_sg) > +static int intel_gtt_map_memory(struct page **pages, > + unsigned int num_entries, > + struct sg_table *st) > { > - struct sg_table st; > struct scatterlist *sg; > int i; > > - if (*sg_list) > - return 0; /* already mapped (for e.g. resume */ > - > DBG("try mapping %lu pages\n", (unsigned long)num_entries); > > - if (sg_alloc_table(&st, num_entries, GFP_KERNEL)) > + if (sg_alloc_table(st, num_entries, GFP_KERNEL)) > goto err; > > - *sg_list = sg = st.sgl; > - > - for (i = 0 ; i < num_entries; i++, sg = sg_next(sg)) > + for_each_sg(st->sgl, sg, num_entries,i) > sg_set_page(sg, pages[i], PAGE_SIZE, 0); > > - *num_sg = pci_map_sg(intel_private.pcidev, *sg_list, > - num_entries, PCI_DMA_BIDIRECTIONAL); > - if (unlikely(!*num_sg)) > + if (!pci_map_sg(intel_private.pcidev, > + st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL)) > goto err; > > return 0; > > err: > - sg_free_table(&st); > + sg_free_table(st); > return -ENOMEM; > } > -EXPORT_SYMBOL(intel_gtt_map_memory); > > -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) > +static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) > { > struct sg_table st; > DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count); > @@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) > > sg_free_table(&st); > } > -EXPORT_SYMBOL(intel_gtt_unmap_memory); > > static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode) > { > @@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags) > return false; > } > > -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, > - unsigned int sg_len, > +void intel_gtt_insert_sg_entries(struct sg_table *st, > unsigned int pg_start, > unsigned int flags) > { > @@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, > > /* sg may merge pages, but we have to separate > * per-page addr for GTT */ > - for_each_sg(sg_list, sg, sg_len, i) { > + for_each_sg(st->sgl, sg, st->nents, i) { > len = sg_dma_len(sg) >> PAGE_SHIFT; > for (m = 0; m < len; m++) { > dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); > - intel_private.driver->write_entry(addr, > - j, flags); > + intel_private.driver->write_entry(addr, j, flags); > j++; > } > } > @@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, > } > EXPORT_SYMBOL(intel_gtt_insert_sg_entries); > > -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, > - struct page **pages, unsigned int flags) > +static void intel_gtt_insert_pages(unsigned int first_entry, > + unsigned int num_entries, > + struct page **pages, > + unsigned int flags) > { > int i, j; > > @@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, > } > readl(intel_private.gtt+j-1); > } > -EXPORT_SYMBOL(intel_gtt_insert_pages); > > static int intel_fake_agp_insert_entries(struct agp_memory *mem, > off_t pg_start, int type) > @@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem, > global_cache_flush(); > > if (intel_private.base.needs_dmar) { > - ret = intel_gtt_map_memory(mem->pages, mem->page_count, > - &mem->sg_list, &mem->num_sg); > + struct sg_table st; > + > + ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st); > if (ret != 0) > return ret; > > - intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg, > - pg_start, type); > + intel_gtt_insert_sg_entries(&st, pg_start, type); > + mem->sg_list = st.sgl; > + mem->num_sg = st.nents; Can you explain how the corresponding free for the sg_table gets called here? > } else > intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages, > type); > diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c > index 08758e0..628a2e0 100644 > --- a/drivers/gpu/drm/drm_cache.c > +++ b/drivers/gpu/drm/drm_cache.c > @@ -100,6 +100,29 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages) > EXPORT_SYMBOL(drm_clflush_pages); > > void > +drm_clflush_sg(struct sg_table *st) > +{ > +#if defined(CONFIG_X86) > + if (cpu_has_clflush) { > + struct scatterlist *sg; > + int i; > + > + mb(); > + for_each_sg(st->sgl, sg, st->nents, i) > + drm_clflush_page(sg_page(sg)); > + mb(); > + } > + > + if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0) > + printk(KERN_ERR "Timed out waiting for cache flush.\n"); > +#else > + printk(KERN_ERR "Architecture has no drm_cache.c support\n"); > + WARN_ON_ONCE(1); > +#endif > +} > +EXPORT_SYMBOL(drm_clflush_sg); > + > +void > drm_clflush_virt_range(char *addr, unsigned long length) > { > #if defined(CONFIG_X86) > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0747472..1a714fa 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -992,16 +992,11 @@ struct drm_i915_gem_object { > > unsigned int has_aliasing_ppgtt_mapping:1; > unsigned int has_global_gtt_mapping:1; > + unsigned int has_dma_mapping:1; > > - struct page **pages; > + struct sg_table *pages; > int pages_pin_count; > > - /** > - * DMAR support > - */ > - struct scatterlist *sg_list; > - int num_sg; > - > /* prime dma-buf support */ > struct sg_table *sg_table; > void *dma_buf_vmapping; > @@ -1328,6 +1323,15 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj); > void i915_gem_lastclose(struct drm_device *dev); > > int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); > +static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) > +{ > + struct scatterlist *sg = obj->pages->sgl; > + while (n >= SG_MAX_SINGLE_ALLOC) { > + sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1); > + n -= SG_MAX_SINGLE_ALLOC - 1; > + } > + return sg_page(sg+n); > +} > static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) > { > BUG_ON(obj->pages == NULL); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 171bc51..06589a9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -411,6 +411,8 @@ i915_gem_shmem_pread(struct drm_device *dev, > int hit_slowpath = 0; > int prefaulted = 0; > int needs_clflush = 0; > + struct scatterlist *sg; > + int i; > > user_data = (char __user *) (uintptr_t) args->data_ptr; > remain = args->size; > @@ -439,9 +441,15 @@ i915_gem_shmem_pread(struct drm_device *dev, > > offset = args->offset; > > - while (remain > 0) { > + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { > struct page *page; > > + if (i < offset >> PAGE_SHIFT) > + continue; > + > + if (remain <= 0) > + break; > + > /* Operation in this page > * > * shmem_page_offset = offset within page in shmem file > @@ -452,7 +460,7 @@ i915_gem_shmem_pread(struct drm_device *dev, > if ((shmem_page_offset + page_length) > PAGE_SIZE) > page_length = PAGE_SIZE - shmem_page_offset; > > - page = obj->pages[offset >> PAGE_SHIFT]; > + page = sg_page(sg); > page_do_bit17_swizzling = obj_do_bit17_swizzling && > (page_to_phys(page) & (1 << 17)) != 0; > > @@ -731,6 +739,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > int hit_slowpath = 0; > int needs_clflush_after = 0; > int needs_clflush_before = 0; > + int i; > + struct scatterlist *sg; > > user_data = (char __user *) (uintptr_t) args->data_ptr; > remain = args->size; > @@ -765,10 +775,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > offset = args->offset; > obj->dirty = 1; > > - while (remain > 0) { > + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { > struct page *page; > int partial_cacheline_write; > > + if (i < offset >> PAGE_SHIFT) > + continue; > + > + if (remain <= 0) > + break; > + > /* Operation in this page > * > * shmem_page_offset = offset within page in shmem file > @@ -787,7 +803,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > ((shmem_page_offset | page_length) > & (boot_cpu_data.x86_clflush_size - 1)); > > - page = obj->pages[offset >> PAGE_SHIFT]; > + page = sg_page(sg); > page_do_bit17_swizzling = obj_do_bit17_swizzling && > (page_to_phys(page) & (1 << 17)) != 0; > > @@ -1633,6 +1649,7 @@ static void > i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) > { > int page_count = obj->base.size / PAGE_SIZE; > + struct scatterlist *sg; > int ret, i; > > BUG_ON(obj->madv == __I915_MADV_PURGED); > @@ -1653,19 +1670,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) > if (obj->madv == I915_MADV_DONTNEED) > obj->dirty = 0; > > - for (i = 0; i < page_count; i++) { > + for_each_sg(obj->pages->sgl, sg, page_count, i) { > + struct page *page = sg_page(sg); > + > if (obj->dirty) > - set_page_dirty(obj->pages[i]); > + set_page_dirty(page); > > if (obj->madv == I915_MADV_WILLNEED) > - mark_page_accessed(obj->pages[i]); > + mark_page_accessed(page); > > - page_cache_release(obj->pages[i]); > + page_cache_release(page); > } > obj->dirty = 0; > > - drm_free_large(obj->pages); > - obj->pages = NULL; > + sg_free_table(obj->pages); > + kfree(obj->pages); > } > > static int > @@ -1682,6 +1701,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) > return -EBUSY; > > ops->put_pages(obj); > + obj->pages = NULL; > > list_del(&obj->gtt_list); > if (i915_gem_object_is_purgeable(obj)) > @@ -1739,6 +1759,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > int page_count, i; > struct address_space *mapping; > + struct sg_table *st; > + struct scatterlist *sg; > struct page *page; > gfp_t gfp; > > @@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); > BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); > > - /* Get the list of pages out of our struct file. They'll be pinned > - * at this point until we release them. > - */ > + st = kmalloc(sizeof(*st), GFP_KERNEL); > + if (st == NULL) > + return -ENOMEM; > + > page_count = obj->base.size / PAGE_SIZE; > - obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); > - if (obj->pages == NULL) > + if (sg_alloc_table(st, page_count, GFP_KERNEL)) { > + sg_free_table(st); > + kfree(st); > return -ENOMEM; > + } I think the call here to sg_free_table is bogus. > > - /* Fail silently without starting the shrinker */ > + /* Get the list of pages out of our struct file. They'll be pinned > + * at this point until we release them. > + * > + * Fail silently without starting the shrinker > + */ > mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; > gfp = mapping_gfp_mask(mapping); > gfp |= __GFP_NORETRY | __GFP_NOWARN; > gfp &= ~(__GFP_IO | __GFP_WAIT); > - for (i = 0; i < page_count; i++) { > + for_each_sg(st->sgl, sg, page_count, i) { > page = shmem_read_mapping_page_gfp(mapping, i, gfp); > if (IS_ERR(page)) { > i915_gem_purge(dev_priv, page_count); > @@ -1785,20 +1814,20 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > gfp &= ~(__GFP_IO | __GFP_WAIT); > } > > - obj->pages[i] = page; > + sg_set_page(sg, page, PAGE_SIZE, 0); > } > > if (i915_gem_object_needs_bit17_swizzle(obj)) > i915_gem_object_do_bit_17_swizzle(obj); > > + obj->pages = st; > return 0; > > err_pages: > - while (i--) > - page_cache_release(obj->pages[i]); > - > - drm_free_large(obj->pages); > - obj->pages = NULL; > + for_each_sg(st->sgl, sg, i, page_count) > + page_cache_release(sg_page(sg)); > + sg_free_table(st); > + kfree(st); > return PTR_ERR(page); > } > > @@ -2974,7 +3003,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) > > trace_i915_gem_object_clflush(obj); > > - drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); > + drm_clflush_sg(obj->pages); > } > > /** Flushes the GTT write domain for the object if it's dirty. */ > @@ -3724,6 +3753,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) > i915_gem_object_put_pages(obj); > i915_gem_object_free_mmap_offset(obj); > > + BUG_ON(obj->pages); > + > drm_gem_object_release(&obj->base); > i915_gem_info_remove_obj(dev_priv, obj->base.size); > > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c > index eca4726..4bb1b94 100644 > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c > @@ -28,33 +28,57 @@ > #include <linux/dma-buf.h> > > static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, > - enum dma_data_direction dir) > + enum dma_data_direction dir) > { > struct drm_i915_gem_object *obj = attachment->dmabuf->priv; > - struct drm_device *dev = obj->base.dev; > - int npages = obj->base.size / PAGE_SIZE; > - struct sg_table *sg; > - int ret; > - int nents; > + struct sg_table *st; > + struct scatterlist *src, *dst; > + int ret, i; > > - ret = i915_mutex_lock_interruptible(dev); > + ret = i915_mutex_lock_interruptible(obj->base.dev); > if (ret) > return ERR_PTR(ret); > > ret = i915_gem_object_get_pages(obj); > if (ret) { > - sg = ERR_PTR(ret); > + st = ERR_PTR(ret); > + goto out; > + } > + > + /* Copy sg so that we make an independent mapping */ > + st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); > + if (st == NULL) { > + st = ERR_PTR(-ENOMEM); > + goto out; > + } > + > + ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL); > + if (ret) { > + kfree(st); > + st = ERR_PTR(ret); > + goto out; > + } > + > + src = obj->pages->sgl; > + dst = st->sgl; > + for (i = 0; i < obj->pages->nents; i++) { > + sg_set_page(dst, sg_page(src), PAGE_SIZE, 0); > + dst = sg_next(dst); > + src = sg_next(src); > + } > + > + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { > + sg_free_table(st); > + kfree(st); > + st = ERR_PTR(-ENOMEM); > goto out; > } > > - /* link the pages into an SG then map the sg */ > - sg = drm_prime_pages_to_sg(obj->pages, npages); > - nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir); > i915_gem_object_pin_pages(obj); <bikeshed> I think the right way to go about this is to add rm_prime_pages_to_st since you're pushing the whole st>sg thing, other drivers can leverage it. </bikeshed> The lifetime description we discussed on IRC would have helped here as well. > > out: > - mutex_unlock(&dev->struct_mutex); > - return sg; > + mutex_unlock(&obj->base.dev->struct_mutex); > + return st; > } > > static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, > @@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) > { > struct drm_i915_gem_object *obj = dma_buf->priv; > struct drm_device *dev = obj->base.dev; > - int ret; > + struct scatterlist *sg; > + struct page **pages; > + int ret, i; > > ret = i915_mutex_lock_interruptible(dev); > if (ret) > @@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) > } > > ret = i915_gem_object_get_pages(obj); > - if (ret) { > - mutex_unlock(&dev->struct_mutex); > - return ERR_PTR(ret); > - } > + if (ret) > + goto error; > > - obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL); > - if (!obj->dma_buf_vmapping) { > - DRM_ERROR("failed to vmap object\n"); > - goto out_unlock; > - } > + ret = -ENOMEM; > + > + pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *)); > + if (pages == NULL) > + goto error; > + > + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) > + pages[i] = sg_page(sg); > + > + obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL); > + drm_free_large(pages); > + > + if (!obj->dma_buf_vmapping) > + goto error; > > obj->vmapping_count = 1; > i915_gem_object_pin_pages(obj); > out_unlock: > mutex_unlock(&dev->struct_mutex); > return obj->dma_buf_vmapping; > + > +error: > + mutex_unlock(&dev->struct_mutex); > + return ERR_PTR(ret); > } > > static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) The return on vmap failing looks incorrect to me here. Also, I think leaving the DRM_ERROR would have been nice. > @@ -184,22 +221,19 @@ static const struct dma_buf_ops i915_dmabuf_ops = { > }; > > struct dma_buf *i915_gem_prime_export(struct drm_device *dev, > - struct drm_gem_object *gem_obj, int flags) > + struct drm_gem_object *gem_obj, int flags) > { > struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); > > - return dma_buf_export(obj, &i915_dmabuf_ops, > - obj->base.size, 0600); > + return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600); > } > > struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, > - struct dma_buf *dma_buf) > + struct dma_buf *dma_buf) > { > struct dma_buf_attachment *attach; > struct sg_table *sg; > struct drm_i915_gem_object *obj; > - int npages; > - int size; > int ret; > > /* is this one of own objects? */ > @@ -223,21 +257,19 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, > goto fail_detach; > } > > - size = dma_buf->size; > - npages = size / PAGE_SIZE; > - > obj = kzalloc(sizeof(*obj), GFP_KERNEL); > if (obj == NULL) { > ret = -ENOMEM; > goto fail_unmap; > } > > - ret = drm_gem_private_object_init(dev, &obj->base, size); > + ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size); > if (ret) { > kfree(obj); > goto fail_unmap; > } > > + obj->has_dma_mapping = true; > obj->sg_table = sg; > obj->base.import_attach = attach; > > @@ -249,3 +281,4 @@ fail_detach: > dma_buf_detach(dma_buf, attach); > return ERR_PTR(ret); > } > + > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index e6b2205..4ab0083 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -210,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, > if (ret) > return ret; > > - vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); > + vaddr = kmap_atomic(i915_gem_object_get_page(obj, > + reloc->offset >> PAGE_SHIFT)); > *(uint32_t *)(vaddr + page_offset) = reloc->delta; > kunmap_atomic(vaddr); > } else { > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 1847731..6746109 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -167,8 +167,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) > } > > static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, > - struct scatterlist *sg_list, > - unsigned sg_len, > + const struct sg_table *pages, > unsigned first_entry, > uint32_t pte_flags) > { > @@ -180,12 +179,12 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, > struct scatterlist *sg; > > /* init sg walking */ > - sg = sg_list; > + sg = pages->sgl; > i = 0; > segment_len = sg_dma_len(sg) >> PAGE_SHIFT; > m = 0; > > - while (i < sg_len) { > + while (i < pages->nents) { > pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); > > for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) { > @@ -194,13 +193,11 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, > pt_vaddr[j] = pte | pte_flags; > > /* grab the next page */ > - m++; > - if (m == segment_len) { > - sg = sg_next(sg); > - i++; > - if (i == sg_len) > + if (++m == segment_len) { > + if (++i == pages->nents) > break; > > + sg = sg_next(sg); > segment_len = sg_dma_len(sg) >> PAGE_SHIFT; > m = 0; > } > @@ -213,44 +210,10 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, > } > } > > -static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, > - unsigned first_entry, unsigned num_entries, > - struct page **pages, uint32_t pte_flags) > -{ > - uint32_t *pt_vaddr, pte; > - unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; > - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > - unsigned last_pte, i; > - dma_addr_t page_addr; > - > - while (num_entries) { > - last_pte = first_pte + num_entries; > - last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES); > - > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); > - > - for (i = first_pte; i < last_pte; i++) { > - page_addr = page_to_phys(*pages); > - pte = GEN6_PTE_ADDR_ENCODE(page_addr); > - pt_vaddr[i] = pte | pte_flags; > - > - pages++; > - } > - > - kunmap_atomic(pt_vaddr); > - > - num_entries -= last_pte - first_pte; > - first_pte = 0; > - act_pd++; > - } > -} > - > void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, > struct drm_i915_gem_object *obj, > enum i915_cache_level cache_level) > { > - struct drm_device *dev = obj->base.dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > uint32_t pte_flags = GEN6_PTE_VALID; > > switch (cache_level) { Methinks this isn't what you wanted to do. > @@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, > BUG(); > } > > - if (obj->sg_table) { > - i915_ppgtt_insert_sg_entries(ppgtt, > - obj->sg_table->sgl, > - obj->sg_table->nents, > - obj->gtt_space->start >> PAGE_SHIFT, > - pte_flags); > - } else if (dev_priv->mm.gtt->needs_dmar) { > - BUG_ON(!obj->sg_list); > - > - i915_ppgtt_insert_sg_entries(ppgtt, > - obj->sg_list, > - obj->num_sg, > - obj->gtt_space->start >> PAGE_SHIFT, > - pte_flags); > - } else > - i915_ppgtt_insert_pages(ppgtt, > - obj->gtt_space->start >> PAGE_SHIFT, > - obj->base.size >> PAGE_SHIFT, > - obj->pages, > - pte_flags); > + i915_ppgtt_insert_sg_entries(ppgtt, > + obj->sg_table ?: obj->pages, > + obj->gtt_space->start >> PAGE_SHIFT, > + pte_flags); > } I got lost here. Is it, if there is a prime sg_table use that, otherwise just use the object's sgt? If so, I think has_dma_mapping is more readable. Also, I wonder if ?: pissed off the clang people? > > void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, > @@ -361,44 +308,26 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) > > int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) > { > - struct drm_device *dev = obj->base.dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > - > - /* don't map imported dma buf objects */ > - if (dev_priv->mm.gtt->needs_dmar && !obj->sg_table) > - return intel_gtt_map_memory(obj->pages, > - obj->base.size >> PAGE_SHIFT, > - &obj->sg_list, > - &obj->num_sg); > - else > + if (obj->has_dma_mapping) > return 0; > + > + if (!dma_map_sg(&obj->base.dev->pdev->dev, > + obj->pages->sgl, obj->pages->nents, > + PCI_DMA_BIDIRECTIONAL)) > + return -ENOSPC; > + > + return 0; > } > > void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, > enum i915_cache_level cache_level) > { > struct drm_device *dev = obj->base.dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); > > - if (obj->sg_table) { > - intel_gtt_insert_sg_entries(obj->sg_table->sgl, > - obj->sg_table->nents, > - obj->gtt_space->start >> PAGE_SHIFT, > - agp_type); > - } else if (dev_priv->mm.gtt->needs_dmar) { > - BUG_ON(!obj->sg_list); > - > - intel_gtt_insert_sg_entries(obj->sg_list, > - obj->num_sg, > - obj->gtt_space->start >> PAGE_SHIFT, > - agp_type); > - } else > - intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, > - obj->base.size >> PAGE_SHIFT, > - obj->pages, > - agp_type); > - > + intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages, > + obj->gtt_space->start >> PAGE_SHIFT, > + agp_type); > obj->has_global_gtt_mapping = 1; > } > > @@ -418,10 +347,10 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) > > interruptible = do_idling(dev_priv); > > - if (obj->sg_list) { > - intel_gtt_unmap_memory(obj->sg_list, obj->num_sg); > - obj->sg_list = NULL; > - } > + if (!obj->has_dma_mapping) > + dma_unmap_sg(&dev->pdev->dev, > + obj->pages->sgl, obj->pages->nents, > + PCI_DMA_BIDIRECTIONAL); > > undo_idling(dev_priv, interruptible); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c > index b964df5..8093ecd 100644 > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c > @@ -470,18 +470,20 @@ i915_gem_swizzle_page(struct page *page) > void > i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) > { > + struct scatterlist *sg; > int page_count = obj->base.size >> PAGE_SHIFT; > int i; > > if (obj->bit_17 == NULL) > return; > > - for (i = 0; i < page_count; i++) { > - char new_bit_17 = page_to_phys(obj->pages[i]) >> 17; > + for_each_sg(obj->pages->sgl, sg, page_count, i) { > + struct page *page = sg_page(sg); > + char new_bit_17 = page_to_phys(page) >> 17; > if ((new_bit_17 & 0x1) != > (test_bit(i, obj->bit_17) != 0)) { > - i915_gem_swizzle_page(obj->pages[i]); > - set_page_dirty(obj->pages[i]); > + i915_gem_swizzle_page(page); > + set_page_dirty(page); > } > } > } > @@ -489,6 +491,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) > void > i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) > { > + struct scatterlist *sg; > int page_count = obj->base.size >> PAGE_SHIFT; > int i; > > @@ -502,8 +505,9 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) > } > } > > - for (i = 0; i < page_count; i++) { > - if (page_to_phys(obj->pages[i]) & (1 << 17)) > + for_each_sg(obj->pages->sgl, sg, page_count, i) { > + struct page *page = sg_page(sg); > + if (page_to_phys(page) & (1 << 17)) > __set_bit(i, obj->bit_17); > else > __clear_bit(i, obj->bit_17); > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index d601013..dd49046 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -888,20 +888,20 @@ i915_error_object_create(struct drm_i915_private *dev_priv, > struct drm_i915_gem_object *src) > { > struct drm_i915_error_object *dst; > - int page, page_count; > + int i, count; > u32 reloc_offset; > > if (src == NULL || src->pages == NULL) > return NULL; > > - page_count = src->base.size / PAGE_SIZE; > + count = src->base.size / PAGE_SIZE; > > - dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC); > + dst = kmalloc(sizeof(*dst) + count * sizeof(u32 *), GFP_ATOMIC); > if (dst == NULL) > return NULL; > > reloc_offset = src->gtt_offset; > - for (page = 0; page < page_count; page++) { > + for (i = 0; i < count; i++) { > unsigned long flags; > void *d; > > @@ -924,30 +924,33 @@ i915_error_object_create(struct drm_i915_private *dev_priv, > memcpy_fromio(d, s, PAGE_SIZE); > io_mapping_unmap_atomic(s); > } else { > + struct page *page; > void *s; > > - drm_clflush_pages(&src->pages[page], 1); > + page = i915_gem_object_get_page(src, i); > + > + drm_clflush_pages(&page, 1); > > - s = kmap_atomic(src->pages[page]); > + s = kmap_atomic(page); > memcpy(d, s, PAGE_SIZE); > kunmap_atomic(s); > > - drm_clflush_pages(&src->pages[page], 1); > + drm_clflush_pages(&page, 1); > } > local_irq_restore(flags); > > - dst->pages[page] = d; > + dst->pages[i] = d; > > reloc_offset += PAGE_SIZE; > } > - dst->page_count = page_count; > + dst->page_count = count; > dst->gtt_offset = src->gtt_offset; > > return dst; > > unwind: > - while (page--) > - kfree(dst->pages[page]); > + while (i--) > + kfree(dst->pages[i]); > kfree(dst); > return NULL; > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 55cdb4d..984a0c5 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -464,7 +464,7 @@ init_pipe_control(struct intel_ring_buffer *ring) > goto err_unref; > > pc->gtt_offset = obj->gtt_offset; > - pc->cpu_page = kmap(obj->pages[0]); > + pc->cpu_page = kmap(sg_page(obj->pages->sgl)); > if (pc->cpu_page == NULL) > goto err_unpin; > > @@ -491,7 +491,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) > return; > > obj = pc->obj; > - kunmap(obj->pages[0]); > + > + kunmap(sg_page(obj->pages->sgl)); > i915_gem_object_unpin(obj); > drm_gem_object_unreference(&obj->base); > > @@ -1026,7 +1027,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) > if (obj == NULL) > return; > > - kunmap(obj->pages[0]); > + kunmap(sg_page(obj->pages->sgl)); > i915_gem_object_unpin(obj); > drm_gem_object_unreference(&obj->base); > ring->status_page.obj = NULL; > @@ -1053,7 +1054,7 @@ static int init_status_page(struct intel_ring_buffer *ring) > } > > ring->status_page.gfx_addr = obj->gtt_offset; > - ring->status_page.page_addr = kmap(obj->pages[0]); > + ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); > if (ring->status_page.page_addr == NULL) { > ret = -ENOMEM; > goto err_unpin; > diff --git a/include/drm/drmP.h b/include/drm/drmP.h > index d6b67bb..d5f0c16 100644 > --- a/include/drm/drmP.h > +++ b/include/drm/drmP.h > @@ -1367,6 +1367,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic); > > /* Cache management (drm_cache.c) */ > void drm_clflush_pages(struct page *pages[], unsigned long num_pages); > +void drm_clflush_sg(struct sg_table *st); > void drm_clflush_virt_range(char *addr, unsigned long length); > > /* Locking IOCTL support (drm_lock.h) */ > diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h > index 8e29d55..2e37e9f 100644 > --- a/include/drm/intel-gtt.h > +++ b/include/drm/intel-gtt.h > @@ -30,16 +30,10 @@ void intel_gmch_remove(void); > bool intel_enable_gtt(void); > > void intel_gtt_chipset_flush(void); > -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg); > -void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries); > -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries, > - struct scatterlist **sg_list, int *num_sg); > -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list, > - unsigned int sg_len, > +void intel_gtt_insert_sg_entries(struct sg_table *st, > unsigned int pg_start, > unsigned int flags); > -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, > - struct page **pages, unsigned int flags); > +void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries); > > /* Special gtt memory types */ > #define AGP_DCACHE_MEMORY 1 -- Ben Widawsky, Intel Open Source Technology Center