In order to allow for asynchronous gathering of pages tracked by the obj->resv, we take advantage of pinning the pages before doing waiting on the reservation, and where possible do an early wait before acquiring the object lock (with a follow up locked waited to ensure we have exclusive access where necessary). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 104 +++++++++++---------- drivers/gpu/drm/i915/i915_gem.c | 22 +++-- 3 files changed, 68 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index a93e233cfaa9..84992d590da5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -154,7 +154,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); int err; - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_async(obj); if (err) return err; @@ -175,7 +175,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); int err; - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_async(obj); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index f9044bbdd429..bda990113124 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -49,17 +49,11 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) assert_object_held(obj); - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_WC) return 0; - /* Flush and acquire obj->pages so that we are coherent through + /* + * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. * For example, if the obj->filp was moved to swap without us @@ -67,10 +61,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) * continue to assume that the obj remained out of the CPU cached * domain. */ - ret = i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages_async(obj); if (ret) return ret; + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto out_unpin; + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); /* Serialise direct access to this object with the barriers for @@ -91,8 +92,9 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } +out_unpin: i915_gem_object_unpin_pages(obj); - return 0; + return ret; } /** @@ -110,17 +112,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) assert_object_held(obj); - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - if (obj->write_domain == I915_GEM_DOMAIN_GTT) return 0; - /* Flush and acquire obj->pages so that we are coherent through + /* + * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. * For example, if the obj->filp was moved to swap without us @@ -128,10 +124,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) * continue to assume that the obj remained out of the CPU cached * domain. */ - ret = i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages_async(obj); if (ret) return ret; + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto out_unpin; + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for @@ -152,8 +155,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } +out_unpin: i915_gem_object_unpin_pages(obj); - return 0; + return ret; } /** @@ -603,19 +607,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out; } - /* - * Try to flush the object off the GPU without holding the lock. - * We will repeat the flush holding the lock in the normal manner - * to catch cases where we are gazumped. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_PRIORITY | - (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT); - if (err) - goto out; - /* * Proxy objects do not control access to the backing storage, ergo * they cannot be used as a means to manipulate the cache domain @@ -636,10 +627,23 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * continue to assume that the obj remained out of the CPU cached * domain. */ - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_async(obj); if (err) goto out; + /* + * Try to flush the object off the GPU without holding the lock. + * We will repeat the flush holding the lock in the normal manner + * to catch cases where we are gazumped. + */ + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); + if (err) + goto out_unpin; + err = i915_gem_object_lock_interruptible(obj); if (err) goto out_unpin; @@ -680,25 +684,25 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_pin_pages_async(obj); if (ret) return ret; + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + goto err_unpin; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (ret) goto err_unlock; - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err_unlock; - if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) - goto err_unpin; + goto err_unlock; else goto out; } @@ -718,10 +722,10 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, /* return with the pages pinned */ return 0; -err_unpin: - i915_gem_object_unpin_pages(obj); err_unlock: i915_gem_object_unlock(obj); +err_unpin: + i915_gem_object_unpin_pages(obj); return ret; } @@ -734,10 +738,14 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_pin_pages_async(obj); if (ret) return ret; + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + goto err_unpin; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, @@ -745,15 +753,11 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, if (ret) goto err_unlock; - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err_unlock; - if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) - goto err_unpin; + goto err_unlock; else goto out; } @@ -782,9 +786,9 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, /* return with the pages pinned */ return 0; -err_unpin: - i915_gem_object_unpin_pages(obj); err_unlock: i915_gem_object_unlock(obj); +err_unpin: + i915_gem_object_unpin_pages(obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e835355eb93..21416b87905e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -512,20 +512,21 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + ret = i915_gem_object_pin_pages_async(obj); if (ret) goto out; - ret = i915_gem_object_pin_pages(obj); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (ret) - goto out; + goto out_unpin; ret = i915_gem_shmem_pread(obj, args); if (ret == -EFAULT || ret == -ENODEV) ret = i915_gem_gtt_pread(obj, args); +out_unpin: i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -812,16 +813,16 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret != -ENODEV) goto err; + ret = i915_gem_object_pin_pages_async(obj); + if (ret) + goto err; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; + goto err_unpin; ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise @@ -845,6 +846,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pwrite(obj, args); } +err_unpin: i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx