By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL). v2: Compile with CONFIG_MMU_NOTIFIER Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 21 +++ drivers/gpu/drm/i915/i915_gem.c | 9 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 277 +++++++++++++++++++++++++++++++ include/drm/i915_drm.h | 15 ++ 6 files changed, 321 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0f2c549..754d665 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ i915_gem_gtt.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_gem_userptr.o \ i915_sysfs.o \ i915_trace_points.o \ intel_display.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 69969be..b6e284a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1899,6 +1899,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_ROOT_ONLY|DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5084b29..77c78d9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include <linux/backlight.h> #include <linux/intel-iommu.h> #include <linux/kref.h> +#include <linux/mmu_notifier.h> /* General customization: */ @@ -927,6 +928,7 @@ struct drm_i915_gem_object_ops { */ int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *); + void (*release)(struct drm_i915_gem_object *); }; struct drm_i915_gem_object { @@ -1072,6 +1074,23 @@ struct drm_i915_gem_object { atomic_t pending_flip; }; +struct i915_gem_userptr_object { + struct drm_i915_gem_object gem; + uintptr_t user_ptr; + size_t user_size; + int read_only; + + struct mm_struct *mm; +#if defined(CONFIG_MMU_NOTIFIER) + struct mmu_notifier mn; +#endif +}; + +union drm_i915_gem_objects { + struct drm_i915_gem_object base; + struct i915_gem_userptr_object userptr; +}; + inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj) { return obj->base.import_attach != NULL; @@ -1333,6 +1352,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_tiling(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8183c0f..cda4dc8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2565,9 +2565,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) /* Avoid an unnecessary call to unbind on rebind. */ obj->map_and_fenceable = true; + obj->gtt_offset -= obj->gtt_space->start; drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; - obj->gtt_offset = 0; return 0; } @@ -3083,7 +3083,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_offset = obj->gtt_space->start; + obj->gtt_offset += obj->gtt_space->start; fenceable = obj->gtt_space->size == fence_size && @@ -3876,6 +3876,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); + if (obj->ops->release) + obj->ops->release(obj); + drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); @@ -4263,7 +4266,7 @@ i915_gem_load(struct drm_device *dev) dev_priv->slab = kmem_cache_create("i915_gem_object", - sizeof(struct drm_i915_gem_object), 0, + sizeof(union drm_i915_gem_objects), 0, SLAB_HWCACHE_ALIGN, NULL); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c new file mode 100644 index 0000000..645290a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -0,0 +1,277 @@ +/* + * Copyright ? 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include <linux/mmu_notifier.h> +#include <linux/swap.h> + +static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj) +{ + return container_of(obj, struct i915_gem_userptr_object, gem); +} + +#if defined(CONFIG_MMU_NOTIFIER) +static void i915_gem_userptr_mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct i915_gem_userptr_object *vmap; + + vmap = container_of(mn, struct i915_gem_userptr_object, mn); + BUG_ON(vmap->mm != mm); + vmap->mm = NULL; + + /* XXX Schedule an eventual unbind? E.g. hook into require request? + * However, locking will be complicated. + */ +} + +static const struct mmu_notifier_ops i915_gem_userptr_notifier = { + .release = i915_gem_userptr_mn_release, +}; + +static void +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap) +{ + if (vmap->mm) { + mmu_notifier_unregister(&vmap->mn, vmap->mm); + BUG_ON(vmap->mm); + } +} + +static int +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap) +{ + vmap->mn.ops = &i915_gem_userptr_notifier; + return mmu_notifier_register(&vmap->mn, vmap->mm); +} + +#else + +static void +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap) +{ +} + +static int +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap) +{ + return 0; +} +#endif + +static int +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +{ + struct i915_gem_userptr_object *vmap = to_userptr_object(obj); + int num_pages = obj->base.size >> PAGE_SHIFT; + struct sg_table *st; + struct scatterlist *sg; + struct page **pvec; + int n, pinned, ret; + + if (vmap->mm == NULL) + return -EFAULT; + + if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE, + (char __user *)vmap->user_ptr, vmap->user_size)) + return -EFAULT; + + /* If userspace should engineer that these pages are replaced in + * the vma between us binding this page into the GTT and completion + * of rendering... Their loss. If they change the mapping of their + * pages they need to create a new bo to point to the new vma. + * + * However, that still leaves open the possibility of the vma + * being copied upon fork. Which falls under the same userspace + * synchronisation issue as a regular bo, except that this time + * the process may not be expecting that a particular piece of + * memory is tied to the GPU. + */ + + pvec = kmalloc(num_pages*sizeof(struct page *), + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (pvec == NULL) { + pvec = drm_malloc_ab(num_pages, sizeof(struct page *)); + if (pvec == NULL) + return -ENOMEM; + } + + pinned = 0; + if (vmap->mm == current->mm) + pinned = __get_user_pages_fast(vmap->user_ptr, num_pages, + !vmap->read_only, pvec); + if (pinned < num_pages) { + struct mm_struct *mm = vmap->mm; + ret = 0; + mutex_unlock(&obj->base.dev->struct_mutex); + down_read(&mm->mmap_sem); + if (vmap->mm != NULL) + ret = get_user_pages(current, mm, + vmap->user_ptr + (pinned << PAGE_SHIFT), + num_pages - pinned, + !vmap->read_only, 0, + pvec + pinned, + NULL); + up_read(&mm->mmap_sem); + mutex_lock(&obj->base.dev->struct_mutex); + if (ret > 0) + pinned += ret; + + if (obj->pages || pinned < num_pages) { + ret = obj->pages ? 0 : -EFAULT; + goto cleanup_pinned; + } + } + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (st == NULL) { + ret = -ENOMEM; + goto cleanup_pinned; + } + + if (sg_alloc_table(st, num_pages, GFP_KERNEL)) { + ret = -ENOMEM; + goto cleanup_st; + } + + for_each_sg(st->sgl, sg, num_pages, n) + sg_set_page(sg, pvec[n], PAGE_SIZE, 0); + drm_free_large(pvec); + + obj->pages = st; + return 0; + +cleanup_st: + kfree(st); +cleanup_pinned: + release_pages(pvec, pinned, 0); + drm_free_large(pvec); + return ret; +} + +static void +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) +{ + struct scatterlist *sg; + int i; + + if (obj->madv != I915_MADV_WILLNEED) + obj->dirty = 0; + + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { + struct page *page = sg_page(sg); + + if (obj->dirty) + set_page_dirty(page); + + mark_page_accessed(page); + page_cache_release(page); + } + obj->dirty = 0; + + sg_free_table(obj->pages); + kfree(obj->pages); +} + +static void +i915_gem_userptr_release(struct drm_i915_gem_object *obj) +{ + struct i915_gem_userptr_object *vmap = to_userptr_object(obj); + + i915_gem_userptr_release__mmu_notifier(vmap); +} + +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { + .get_pages = i915_gem_userptr_get_pages, + .put_pages = i915_gem_userptr_put_pages, + .release = i915_gem_userptr_release, +}; + +/** + * Creates a new mm object that wraps some user memory. + */ +int +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_userptr *args = data; + struct i915_gem_userptr_object *obj; + loff_t first_data_page, last_data_page; + int num_pages; + int ret; + u32 handle; + + first_data_page = args->user_ptr / PAGE_SIZE; + last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE; + num_pages = last_data_page - first_data_page + 1; + if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total) + return -E2BIG; + + ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->user_ptr, + args->user_size); + if (ret) + return ret; + + /* Allocate the new object */ + obj = i915_gem_object_alloc(dev); + if (obj == NULL) + return -ENOMEM; + + if (drm_gem_private_object_init(dev, &obj->gem.base, + num_pages * PAGE_SIZE)) { + i915_gem_object_free(&obj->gem); + return -ENOMEM; + } + + i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops); + obj->gem.cache_level = I915_CACHE_LLC_MLC; + + obj->gem.gtt_offset = offset_in_page(args->user_ptr); + obj->user_ptr = args->user_ptr; + obj->user_size = args->user_size; + obj->read_only = args->flags & I915_USERPTR_READ_ONLY; + + /* And keep a pointer to the current->mm for resolving the user pages + * at binding. This means that we need to hook into the mmu_notifier + * in order to detect if the mmu is destroyed. + */ + obj->mm = current->mm; + ret = i915_gem_userptr_init__mmu_notifier(obj); + if (ret) + return ret; + + ret = drm_gem_handle_create(file, &obj->gem.base, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference(&obj->gem.base); + if (ret) + return ret; + + args->handle = handle; + return 0; +} diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 0e6e135..4f41f8d 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_SET_CACHEING 0x2f #define DRM_I915_GEM_GET_CACHEING 0x30 #define DRM_I915_REG_READ 0x31 +#define DRM_I915_GEM_USERPTR 0x32 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -255,6 +256,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -476,6 +478,19 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +struct drm_i915_gem_userptr { + __u64 user_ptr; + __u32 user_size; + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + struct drm_i915_gem_set_domain { /** Handle for the object */ __u32 handle; -- 1.7.10.4