Acquiring the backing struct pages for the userptr range is not free; the first client for userptr would insist on frequently creating userptr objects ahead of time and not use them. For that first client, deferring the cost of populating the userptr (calling get_user_pages()) to the actual execbuf was a substantial improvement. However, not all clients are the same, and most would like to validate that the userptr is valid and backed by struct pages upon creation, so offer a I915_USERPTR_POPULATE flag to do just that. Note that big difference between I915_USERPTR_POPULATE and the deferred scheme is that POPULATE is guaranteed to be synchronous, the result is known before the ioctl returns (and the handle exposed). However, due to system memory pressure, the object may be paged out before use, requiring them to be paged back in on execbuf (as may always happen). Testcase: igt/gem_userptr_blits/populate Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> Cc: Jason Ekstrand <jason@xxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_userptr.c | 63 ++++++++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 1 + 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index dbc5818dd28b..f05f1322ec27 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -746,6 +746,64 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) return ret; } +static int populate(struct drm_i915_gem_object *obj) +{ + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; + const unsigned long addr = obj->userptr.ptr; + struct sg_table *pages; + struct page **pvec; + int pinned; + int ret = 0; + + pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); + if (!pvec) + return -ENOMEM; + + pinned = __get_user_pages_fast(addr, num_pages, + !obj->userptr.read_only, + pvec); + if (pinned < 0) { + ret = pinned; + goto err; + } + + if (pinned < num_pages) { + unsigned int flags; + + flags = 0; + if (!obj->userptr.read_only) + flags |= FOLL_WRITE; + + down_read(¤t->mm->mmap_sem); + do { + ret = get_user_pages(addr + pinned * PAGE_SIZE, + num_pages - pinned, + flags, + pvec + pinned, NULL); + if (ret < 0) + break; + + pinned += ret; + } while (pinned < num_pages); + up_read(¤t->mm->mmap_sem); + if (ret < 0) + goto err; + } + + mutex_lock(&obj->mm.lock); + pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); + mutex_unlock(&obj->mm.lock); + if (!IS_ERR(pages)) + pinned = 0; + else + ret = PTR_ERR(pages); + +err: + release_pages(pvec, pinned, 0); + kvfree(pvec); + return ret; +} + /** * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -799,6 +857,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_PROBE | + I915_USERPTR_POPULATE | I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; @@ -816,7 +875,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file return -ENODEV; } - if (args->flags & I915_USERPTR_PROBE) { + if (args->flags & (I915_USERPTR_PROBE | I915_USERPTR_POPULATE)) { /* * Check that the range pointed to represents real struct * pages and not iomappings (at this moment in time!) @@ -846,6 +905,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ret = i915_gem_userptr_init__mm_struct(obj); if (ret == 0) ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + if (ret == 0 && args->flags & I915_USERPTR_POPULATE) + ret = populate(obj); if (ret == 0) ret = drm_gem_handle_create(file, &obj->base, &handle); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 392ede2ca63e..500c408c912a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1354,6 +1354,7 @@ struct drm_i915_gem_userptr { __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 #define I915_USERPTR_PROBE 0x2 +#define I915_USERPTR_POPULATE 0x4 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** * Returned handle for the object. -- 2.15.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx