On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote: > Currently objects for which the hardware needs a contiguous physical > address are allocated a shadow backing storage to satisfy the contraint. > This shadow buffer is not wired into the normal obj->pages and so the > physical object is incoherent with accesses via the GPU, GTT and CPU. By > setting up the appropriate scatter-gather table, we can allow userspace > to access the physical object via either a GTT mmaping of or by rendering > into the GEM bo. However, keeping the CPU mmap of the shmemfs backing > storage coherent with the contiguous shadow is not yet possible. > Fortuituously, CPU mmaps of objects requiring physical addresses are not > expected to be coherent anyway. > > This allows the physical constraint of the GEM object to be transparent > to userspace and allow it to efficiently render into or update them via > the GTT and GPU. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_dma.c | 3 + > drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++------------- > include/uapi/drm/i915_drm.h | 1 + > 3 files changed, 129 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 12571aac7516..dd43fdc736ac 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_CMD_PARSER_VERSION: > value = i915_cmd_parser_get_version(); > break; > + case I915_PARAM_HAS_COHERENT_PHYS_GTT: > + value = 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index e302a23031fe..3eb5c07e1018 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > return 0; > } > > -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) > +static int > +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) > { > - drm_dma_handle_t *phys = obj->phys_handle; > + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; > + char *vaddr = obj->phys_handle->vaddr; > + struct sg_table *st; > + struct scatterlist *sg; > + int i; > > - if (!phys) > - return; > + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) > + return -EINVAL; > + > + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > + struct page *page; > + char *src; > + > + page = shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + return PTR_ERR(page); > + > + src = kmap_atomic(page); > + memcpy(vaddr, src, PAGE_SIZE); > + kunmap_atomic(src); > + > + page_cache_release(page); > + vaddr += PAGE_SIZE; > + } > + > + drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size); No longer wc/uc so now we flush. > + i915_gem_chipset_flush(obj->base.dev); > + > + st = kmalloc(sizeof(*st), GFP_KERNEL); > + if (st == NULL) > + return -ENOMEM; > + > + if (sg_alloc_table(st, 1, GFP_KERNEL)) { > + kfree(st); > + return -ENOMEM; > + } > + > + sg = st->sgl; > + sg->offset = 0; > + sg->length = obj->base.size; > + > + sg_dma_address(sg) = obj->phys_handle->busaddr; > + sg_dma_len(sg) = obj->base.size; > > - if (obj->madv == I915_MADV_WILLNEED) { > + obj->pages = st; > + obj->has_dma_mapping = true; > + return 0; > +} > + > +static void > +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) > +{ > + int ret; > + > + BUG_ON(obj->madv == __I915_MADV_PURGED); > + > + ret = i915_gem_object_set_to_cpu_domain(obj, true); > + if (ret) { > + /* In the event of a disaster, abandon all caches and > + * hope for the best. > + */ > + WARN_ON(ret != -EIO); > + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; > + } > + > + if (obj->madv == I915_MADV_DONTNEED) > + obj->dirty = 0; > + > + if (obj->dirty) { > struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; > - char *vaddr = phys->vaddr; > + char *vaddr = obj->phys_handle->vaddr; > int i; > > for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page = shmem_read_mapping_page(mapping, i); > - if (!IS_ERR(page)) { > - char *dst = kmap_atomic(page); > - memcpy(dst, vaddr, PAGE_SIZE); > - kunmap_atomic(dst); > + struct page *page; > + char *dst; > + > + page = shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + continue; > > - drm_clflush_pages(&page, 1); > + dst = kmap_atomic(page); > + memcpy(dst, vaddr, PAGE_SIZE); > + kunmap_atomic(dst); > > - set_page_dirty(page); > + set_page_dirty(page); > + if (obj->madv == I915_MADV_WILLNEED) > mark_page_accessed(page); > - page_cache_release(page); > - } > + page_cache_release(page); > vaddr += PAGE_SIZE; > } > - i915_gem_chipset_flush(obj->base.dev); > + obj->dirty = 0; > } > > -#ifdef CONFIG_X86 > - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); > -#endif > - drm_pci_free(obj->base.dev, phys); This gets leaked. I suppose you should still keep i915_gem_object_detach_phys() around. > - obj->phys_handle = NULL; > + sg_free_table(obj->pages); > + kfree(obj->pages); > + > + obj->has_dma_mapping = false; > +} > + > +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { > + .get_pages = i915_gem_object_get_pages_phys, > + .put_pages = i915_gem_object_put_pages_phys, > +}; > + > +static int > +drop_pages(struct drm_i915_gem_object *obj) > +{ > + struct i915_vma *vma, *next; > + int ret; > + > + drm_gem_object_reference(&obj->base); > + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) > + if (i915_vma_unbind(vma)) > + break; > + > + ret = i915_gem_object_put_pages(obj); > + drm_gem_object_unreference(&obj->base); > + > + return ret; > } > > int > @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > int align) > { > drm_dma_handle_t *phys; > - struct address_space *mapping; > - char *vaddr; > - int i; > + int ret; > > if (obj->phys_handle) { > if ((unsigned long)obj->phys_handle->vaddr & (align -1)) > @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > if (obj->base.filp == NULL) > return -EINVAL; > > + ret = drop_pages(obj); > + if (ret) > + return ret; > + > /* create a new object */ > phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); > if (!phys) > return -ENOMEM; > > - vaddr = phys->vaddr; > -#ifdef CONFIG_X86 > - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); > -#endif > - mapping = file_inode(obj->base.filp)->i_mapping; > - for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page; > - char *src; > - > - page = shmem_read_mapping_page(mapping, i); > - if (IS_ERR(page)) > - return PTR_ERR(page); > - > - src = kmap_atomic(page); > - memcpy(vaddr, src, PAGE_SIZE); > - kunmap_atomic(src); > - > - mark_page_accessed(page); > - page_cache_release(page); > - > - vaddr += PAGE_SIZE; > - } > - > obj->phys_handle = phys; > - return 0; > + obj->ops = &i915_gem_phys_ops; > + > + return i915_gem_object_get_pages(obj); > } > > static int > @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, > return -EFAULT; > } > > + drm_clflush_virt_range(vaddr, args->size); And need to flush again. Check. The patch looks sane enough to me, save for the phys_handle leak. > i915_gem_chipset_flush(dev); > return 0; > } > @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > * pread/pwrite currently are reading and writing from the CPU > * perspective, requiring manual detiling by the client. > */ > - if (obj->phys_handle) { > - ret = i915_gem_phys_pwrite(obj, args, file); > - goto out; > - } > - > if (obj->tiling_mode == I915_TILING_NONE && > (obj->base.filp == NULL || > (obj->base.write_domain != I915_GEM_DOMAIN_CPU && > @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > * textures). Fallback to the shmem path in that case. */ > } > > - if (ret == -EFAULT || ret == -ENOSPC) > - ret = i915_gem_shmem_pwrite(dev, obj, args, file); > + if (ret == -EFAULT || ret == -ENOSPC) { > + if (obj->phys_handle) > + ret = i915_gem_phys_pwrite(obj, args, file); > + else > + ret = i915_gem_shmem_pwrite(dev, obj, args, file); > + } > > out: > drm_gem_object_unreference(&obj->base); > @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, > * Stolen memory is always coherent with the GPU as it is explicitly > * marked as wc by the system, or the system is cache-coherent. > */ > - if (obj->stolen) > + if (obj->stolen || obj->phys_handle) > return false; > > /* If the GPU is snooping the contents of the CPU cache, > @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) > i915_gem_object_put_pages(obj); > i915_gem_object_free_mmap_offset(obj); > i915_gem_object_release_stolen(obj); > - i915_gem_object_detach_phys(obj); > > BUG_ON(obj->pages); > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index beee13a53fbb..cf1425c9f54e 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 > #define I915_PARAM_HAS_WT 27 > #define I915_PARAM_CMD_PARSER_VERSION 28 > +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 > > typedef struct drm_i915_getparam { > int param; > -- > 1.9.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx