On Thu, Jul 10, 2014 at 09:53:43PM +0100, Chris Wilson wrote: > Currently objects for which the hardware needs a contiguous physical > address are allocated a shadow backing storage to satisfy the contraint. > This shadow buffer is not wired into the normal obj->pages and so the > physical object is incoherent with accesses via the GPU, GTT and CPU. By > setting up the appropriate scatter-gather table, we can allow userspace > to access the physical object via either a GTT mmaping of or by rendering > into the GEM bo. However, keeping the CPU mmap of the shmemfs backing > storage coherent with the contiguous shadow is not yet possible. > Fortuituously, CPU mmaps of objects requiring physical addresses are not > expected to be coherent anyway. > > This allows the physical constraint of the GEM object to be transparent > to userspace and allow it to efficiently render into or update them via > the GTT and GPU. > > v2: Fix leak of pci handle spotted by Ville > v3: Remove the now duplicate call to detach_phys_object during free. > v4: Wait for rendering before pwrite. As this patch makes it possible to > render into the phys object, we should make it correct as well! > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Ok, I guess you can make a case that this fixes a bug, but then the commit message should talk about that instead of only mentioning phys object coherency (which is kinda just a side-effect of the rework to track phys objects in the sg tables like everything else). Plus mentioning that kms_cursors_crc provokes this if people bother to plug in a vga connector or something. With that I'll pull it into -fixes with a cc: stable without requiring testcase for the coherency part. -Daniel > --- > drivers/gpu/drm/i915/i915_dma.c | 3 + > drivers/gpu/drm/i915/i915_drv.h | 6 +- > drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++------------- > include/uapi/drm/i915_drm.h | 1 + > 4 files changed, 150 insertions(+), 67 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 5ae608121f03..1b9798503b77 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_CMD_PARSER_VERSION: > value = i915_cmd_parser_get_version(); > break; > + case I915_PARAM_HAS_COHERENT_PHYS_GTT: > + value = 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 211db0653831..f81d787d6b47 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1787,10 +1787,10 @@ struct drm_i915_gem_object { > unsigned long user_pin_count; > struct drm_file *pin_filp; > > - /** for phy allocated objects */ > - drm_dma_handle_t *phys_handle; > - > union { > + /** for phy allocated objects */ > + drm_dma_handle_t *phys_handle; > + > struct i915_gem_userptr { > uintptr_t ptr; > unsigned read_only :1; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 83ccbabdcacf..d083cf5bdbbd 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > return 0; > } > > -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) > +static int > +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) > { > - drm_dma_handle_t *phys = obj->phys_handle; > + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; > + char *vaddr = obj->phys_handle->vaddr; > + struct sg_table *st; > + struct scatterlist *sg; > + int i; > > - if (!phys) > - return; > + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) > + return -EINVAL; > + > + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > + struct page *page; > + char *src; > + > + page = shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + return PTR_ERR(page); > + > + src = kmap_atomic(page); > + memcpy(vaddr, src, PAGE_SIZE); > + drm_clflush_virt_range(vaddr, PAGE_SIZE); > + kunmap_atomic(src); > + > + page_cache_release(page); > + vaddr += PAGE_SIZE; > + } > + > + i915_gem_chipset_flush(obj->base.dev); > + > + st = kmalloc(sizeof(*st), GFP_KERNEL); > + if (st == NULL) > + return -ENOMEM; > + > + if (sg_alloc_table(st, 1, GFP_KERNEL)) { > + kfree(st); > + return -ENOMEM; > + } > + > + sg = st->sgl; > + sg->offset = 0; > + sg->length = obj->base.size; > > - if (obj->madv == I915_MADV_WILLNEED) { > + sg_dma_address(sg) = obj->phys_handle->busaddr; > + sg_dma_len(sg) = obj->base.size; > + > + obj->pages = st; > + obj->has_dma_mapping = true; > + return 0; > +} > + > +static void > +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) > +{ > + int ret; > + > + BUG_ON(obj->madv == __I915_MADV_PURGED); > + > + ret = i915_gem_object_set_to_cpu_domain(obj, true); > + if (ret) { > + /* In the event of a disaster, abandon all caches and > + * hope for the best. > + */ > + WARN_ON(ret != -EIO); > + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; > + } > + > + if (obj->madv == I915_MADV_DONTNEED) > + obj->dirty = 0; > + > + if (obj->dirty) { > struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; > - char *vaddr = phys->vaddr; > + char *vaddr = obj->phys_handle->vaddr; > int i; > > for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page = shmem_read_mapping_page(mapping, i); > - if (!IS_ERR(page)) { > - char *dst = kmap_atomic(page); > - memcpy(dst, vaddr, PAGE_SIZE); > - drm_clflush_virt_range(dst, PAGE_SIZE); > - kunmap_atomic(dst); > - > - set_page_dirty(page); > + struct page *page; > + char *dst; > + > + page = shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + continue; > + > + dst = kmap_atomic(page); > + drm_clflush_virt_range(vaddr, PAGE_SIZE); > + memcpy(dst, vaddr, PAGE_SIZE); > + kunmap_atomic(dst); > + > + set_page_dirty(page); > + if (obj->madv == I915_MADV_WILLNEED) > mark_page_accessed(page); > - page_cache_release(page); > - } > + page_cache_release(page); > vaddr += PAGE_SIZE; > } > - i915_gem_chipset_flush(obj->base.dev); > + obj->dirty = 0; > } > > -#ifdef CONFIG_X86 > - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); > -#endif > - drm_pci_free(obj->base.dev, phys); > - obj->phys_handle = NULL; > + sg_free_table(obj->pages); > + kfree(obj->pages); > + > + obj->has_dma_mapping = false; > +} > + > +static void > +i915_gem_object_release_phys(struct drm_i915_gem_object *obj) > +{ > + drm_pci_free(obj->base.dev, obj->phys_handle); > +} > + > +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { > + .get_pages = i915_gem_object_get_pages_phys, > + .put_pages = i915_gem_object_put_pages_phys, > + .release = i915_gem_object_release_phys, > +}; > + > +static int > +drop_pages(struct drm_i915_gem_object *obj) > +{ > + struct i915_vma *vma, *next; > + int ret; > + > + drm_gem_object_reference(&obj->base); > + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) > + if (i915_vma_unbind(vma)) > + break; > + > + ret = i915_gem_object_put_pages(obj); > + drm_gem_object_unreference(&obj->base); > + > + return ret; > } > > int > @@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > int align) > { > drm_dma_handle_t *phys; > - struct address_space *mapping; > - char *vaddr; > - int i; > + int ret; > > if (obj->phys_handle) { > if ((unsigned long)obj->phys_handle->vaddr & (align -1)) > @@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > if (obj->base.filp == NULL) > return -EINVAL; > > + ret = drop_pages(obj); > + if (ret) > + return ret; > + > /* create a new object */ > phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); > if (!phys) > return -ENOMEM; > > - vaddr = phys->vaddr; > -#ifdef CONFIG_X86 > - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); > -#endif > - mapping = file_inode(obj->base.filp)->i_mapping; > - for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page; > - char *src; > - > - page = shmem_read_mapping_page(mapping, i); > - if (IS_ERR(page)) { > -#ifdef CONFIG_X86 > - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); > -#endif > - drm_pci_free(obj->base.dev, phys); > - return PTR_ERR(page); > - } > - > - src = kmap_atomic(page); > - memcpy(vaddr, src, PAGE_SIZE); > - kunmap_atomic(src); > - > - mark_page_accessed(page); > - page_cache_release(page); > - > - vaddr += PAGE_SIZE; > - } > - > obj->phys_handle = phys; > - return 0; > + obj->ops = &i915_gem_phys_ops; > + > + return i915_gem_object_get_pages(obj); > } > > static int > @@ -312,6 +385,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, > struct drm_device *dev = obj->base.dev; > void *vaddr = obj->phys_handle->vaddr + args->offset; > char __user *user_data = to_user_ptr(args->data_ptr); > + int ret; > + > + /* We manually control the domain here and pretend that it > + * remains coherent i.e. in the GTT domain, like shmem_pwrite. > + */ > + ret = i915_gem_object_wait_rendering(obj, false); > + if (ret) > + return ret; > > if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { > unsigned long unwritten; > @@ -327,6 +408,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, > return -EFAULT; > } > > + drm_clflush_virt_range(vaddr, args->size); > i915_gem_chipset_flush(dev); > return 0; > } > @@ -1047,11 +1129,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > * pread/pwrite currently are reading and writing from the CPU > * perspective, requiring manual detiling by the client. > */ > - if (obj->phys_handle) { > - ret = i915_gem_phys_pwrite(obj, args, file); > - goto out; > - } > - > if (obj->tiling_mode == I915_TILING_NONE && > obj->base.write_domain != I915_GEM_DOMAIN_CPU && > cpu_write_needs_clflush(obj)) { > @@ -1061,8 +1138,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > * textures). Fallback to the shmem path in that case. */ > } > > - if (ret == -EFAULT || ret == -ENOSPC) > - ret = i915_gem_shmem_pwrite(dev, obj, args, file); > + if (ret == -EFAULT || ret == -ENOSPC) { > + if (obj->phys_handle) > + ret = i915_gem_phys_pwrite(obj, args, file); > + else > + ret = i915_gem_shmem_pwrite(dev, obj, args, file); > + } > > out: > drm_gem_object_unreference(&obj->base); > @@ -3499,7 +3580,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, > * Stolen memory is always coherent with the GPU as it is explicitly > * marked as wc by the system, or the system is cache-coherent. > */ > - if (obj->stolen) > + if (obj->stolen || obj->phys_handle) > return false; > > /* If the GPU is snooping the contents of the CPU cache, > @@ -4442,8 +4523,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) > } > } > > - i915_gem_object_detach_phys(obj); > - > /* Stolen objects don't hold a ref, but do hold pin count. Fix that up > * before progressing. */ > if (obj->stolen) > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index ff57f07c3249..c6b229fe4a11 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 > #define I915_PARAM_HAS_WT 27 > #define I915_PARAM_CMD_PARSER_VERSION 28 > +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 > > typedef struct drm_i915_getparam { > int param; > -- > 2.0.1 > -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx