On 06/03/2017 09:29, Chris Wilson wrote:
Before we instantiate/pin the backing store for our use, we
can prepopulate the shmemfs filp efficiently using the a
write into the pagecache. We avoid the penalty of instantiating
all the pages, important if the user is just writing to a few
and never uses the object on the GPU, and using a direct write
into shmemfs allows it to avoid the cost of retrieving a page
(either swapin or clearing-before-use) before it is overwritten.
This can be extended later to provide additional specialisation for
other backends (other than shmemfs).
References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Matthew Auld <matthew.william.auld@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gem.c | 78 ++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_object.h | 3 ++
2 files changed, 81 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7c20601fe1de..73d5cee23dd7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1452,6 +1452,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+ ret = -ENODEV;
+ if (obj->ops->pwrite)
+ ret = obj->ops->pwrite(obj, args);
ops->pwrite_nowait or some good name to signify the conditions?
I am not sure that the obj->mm.pages check can be in the vfunc since
this call happens before the i915_gem_object_wait. Which to me sounds
like we should be explicit at this level that the ops->pwrite is not
allowed to run if obj->mm.pages has already been instantiated.
Or if this is only true for the normal objects, and the future
specialisation may become full, then the standard pattern of
if (obj->ops->pwrite)
ret = obj->ops->pwrite();
else
ret = default_obj_pwrite();
?
In which case ops->pwrite would be the correct name to keep.
The ops->pwrite for the shmemfs objects would then become
i915_gem_object_pwrite_gtt from below followed by default_obj_pwrite.
I don't know, maybe I am complicating it too much?
+ if (ret != -ENODEV)
+ goto err;
+
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
@@ -2575,6 +2581,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
goto out_unlock;
}
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
_shmem ? Or _nowait_shmem depending on the outcome of the above. Just
think _gtt is wrong. Or keep it for consistency since get/put pages for
shmem objects are also called _gtt at the moment, so until that is changed?
+ const struct drm_i915_gem_pwrite *arg)
+{
+ struct address_space *mapping = obj->base.filp->f_mapping;
+ char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+ u64 remain, offset;
+ unsigned int pg;
+
+ /* Before we instantiate/pin the backing store for our use, we
+ * can prepopulate the shmemfs filp efficiently using the a
+ * write into the pagecache. We avoid the penalty of instantiating
+ * all the pages, important if the user is just writing to a few
+ * and never uses the object on the GPU, and using a direct write
+ * into shmemfs allows it to avoid the cost of retrieving a page
+ * (either swapin or clearing-before-use) before it is overwritten.
+ */
+ if (READ_ONCE(obj->mm.pages))
+ return -ENODEV;
+
+ /* Before the pages are instantioted the object is treated as being
Typo in instantiated.
+ * in the CPU domain. The pages will be clflushed as required before
+ * use, and we can freely write into the pages directly. If userspace
+ * races pwrite with any other operation; corruption will ensue -
+ * that is userspace's perogative!
Typo in prerogative. (Thanks to automatic spell checker, have to say I
thought it was spelled perogative myself!)
+ */
+
+ remain = arg->size;
+ offset = arg->offset;
+ pg = offset_in_page(offset);
+
+ do {
+ unsigned int len, unwritten;
+ struct page *page;
+ void *data, *vaddr;
+ int err;
+
+ len = PAGE_SIZE - pg;
+ if (len > remain)
+ len = remain;
+
+ err = pagecache_write_begin(obj->base.filp, mapping,
+ offset, len,
+ 0, &page, &data);
+ if (err < 0)
+ return err;
+
+ vaddr = kmap(page);
+ unwritten = copy_from_user(vaddr + pg, user_data, len);
+ kunmap(page);
+
+ err = pagecache_write_end(obj->base.filp, mapping, offset,
+ len, len - unwritten,
+ page, data);
+ if (err < 0)
+ return err;
+
+ if (unwritten)
+ return -EFAULT;
+
+ remain -= len;
+ user_data += len;
+ offset += len;
+ pg = 0;
+ } while (remain);
+
+ return 0;
+}
+
static bool ban_context(const struct i915_gem_context *ctx)
{
return (i915_gem_context_is_bannable(ctx) &&
@@ -3991,8 +4066,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE,
+
.get_pages = i915_gem_object_get_pages_gtt,
.put_pages = i915_gem_object_put_pages_gtt,
+
+ .pwrite = i915_gem_object_pwrite_gtt,
};
struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 33b0dc4782a9..d26155e0a026 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,6 +56,9 @@ struct drm_i915_gem_object_ops {
struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
+ int (*pwrite)(struct drm_i915_gem_object *,
+ const struct drm_i915_gem_pwrite *);
+
int (*dmabuf_export)(struct drm_i915_gem_object *);
void (*release)(struct drm_i915_gem_object *);
};
Code looks OK, just would like to discuss the design of the vfunc a bit.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx