From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Add Android native sync support with fences exported as file descriptors via the execbuf ioctl (rsvd2 field is used). This is a continuation of Jesse Barnes's previous work, squashed to arrive at the final destination, cleaned up, with some fixes and preliminary light testing. GEM requests are extended with fence structures which are associated with Android sync fences exported to user space via file descriptors. Fences which are waited upon, and while exported to userspace, are referenced and added to the irq_queue so they are signalled when requests are completed. There is no overhead apart from the where fences are not requested. Based on patches by Jesse Barnes: drm/i915: Android sync points for i915 v3 drm/i915: add fences to the request struct drm/i915: sync fence fixes/updates To do: * Extend driver data with context id / ring id (TBD). v2: * Code review comments. (Chris Wilson) * ring->add_request() was a wrong thing to call - rebase on top of John Harrison's (drm/i915: Early alloc request) to ensure correct request is present before creating a fence. * Take a request reference from signalling path as well to ensure request sticks around while fence is on the request completion wait queue. v3: * Use worker to unreference on completion so it can lock the mutex from interrupt context. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> Cc: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig | 14 ++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 27 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 24 +++ drivers/gpu/drm/i915/i915_sync.c | 254 +++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 8 +- 6 files changed, 325 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_sync.c diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 4e39ab3..abafe68 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -43,6 +43,20 @@ config DRM_I915_KMS If in doubt, say "Y". +config DRM_I915_SYNC + bool "Enable explicit sync support" + depends on DRM_I915 + default y if STAGING + select STAGING + select ANDROID + select SYNC + help + Choose this option to enable Android native sync support and the + corresponding i915 driver code to expose it. Slightly increases + driver size and pulls in sync support from staging. + + If in doubt, say "Y". + config DRM_I915_FBDEV bool "Enable legacy fbdev support for the modesetting intel driver" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 16e3dc3..bcff481 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -58,6 +58,7 @@ i915-y += intel_audio.o \ intel_sprite.o i915-$(CONFIG_ACPI) += intel_acpi.o intel_opregion.o i915-$(CONFIG_DRM_I915_FBDEV) += intel_fbdev.o +i915-$(CONFIG_DRM_I915_SYNC) += i915_sync.o # modesetting output/encoder code i915-y += dvo_ch7017.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 023f422..c4d254c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -49,6 +49,7 @@ #include <linux/intel-iommu.h> #include <linux/kref.h> #include <linux/pm_qos.h> +#include <linux/fence.h> /* General customization: */ @@ -2092,6 +2093,15 @@ struct drm_i915_gem_request { struct list_head client_list; uint32_t uniq; + +#ifdef CONFIG_DRM_I915_SYNC + /* core fence obj for this request, may be exported */ + struct fence fence; + + wait_queue_t wait; + + struct work_struct unref_work; +#endif }; void i915_gem_request_free(struct kref *req_ref); @@ -2583,6 +2593,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv, void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); +/* i915_sync.c */ +struct sync_fence; + +#ifdef CONFIG_DRM_I915_SYNC +int i915_create_sync_fence_ring(struct intel_engine_cs *ring, + struct intel_context *ctx, + struct sync_fence **sync_fence, int *fence_fd); +#else +static inline +int i915_create_sync_fence_ring(struct intel_engine_cs *ring, + struct intel_context *ctx, + struct sync_fence **sync_fence, int *fence_fd) +{ + return -ENODEV; +} +#endif + #define PIN_MAPPABLE 0x1 #define PIN_NONBLOCK 0x2 #define PIN_GLOBAL 0x4 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2a3d1a9..b04157a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -32,6 +32,7 @@ #include "i915_trace.h" #include "intel_drv.h" #include <linux/dma_remapping.h> +#include "../../../staging/android/sync.h" #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) @@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, u32 flags; int ret; bool need_relocs; + struct sync_fence *sync_fence = NULL; + int fence_fd = -1; if (!i915_gem_check_execbuffer(args)) return -EINVAL; @@ -1509,8 +1512,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto req_err; + if (args->flags & I915_EXEC_FENCE_OUT) { + ret = i915_create_sync_fence_ring(ring, ctx, + &sync_fence, &fence_fd); + if (ret) + goto req_err; + } + ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, &eb->vmas, batch_obj, exec_start, flags); + if (ret) + goto exec_err; + + if (sync_fence) { + sync_fence_install(sync_fence, fence_fd); + args->rsvd2 = fence_fd; + sync_fence = NULL; + } + +exec_err: + if (sync_fence) { + fput(sync_fence->file); + put_unused_fd(fence_fd); + } req_err: /* diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c new file mode 100644 index 0000000..77b7bc9 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sync.c @@ -0,0 +1,254 @@ +/* + * Copyright © 2013-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> + * Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> + * + */ + +#include <drm/drmP.h> +#include <drm/drm_vma_manager.h> +#include <drm/i915_drm.h> +#include "i915_drv.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include <linux/oom.h> +#include <linux/shmem_fs.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/pci.h> +#include <linux/dma-buf.h> +#include "../../../staging/android/sync.h" + +static DEFINE_SPINLOCK(fence_lock); + +/* + * i915 Android native sync fences. + * + * We implement sync points in terms of i915 seqnos. They're exposed through + * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched + * with other Android timelines and aggregated into sync_fences, etc. + * + * TODO: + * * Display engine fences. + * * Extend driver data with context id / ring id. + */ + +#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence) + +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_ring_get_timeline_name(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + + return req->ring->name; +} + +static void i915_gem_request_unreference_worker(struct work_struct *work) +{ + struct drm_i915_gem_request *req = + container_of(work, struct drm_i915_gem_request, unref_work); + struct drm_device *dev = req->ring->dev; + + mutex_lock(&dev->struct_mutex); + i915_gem_request_unreference(req); + mutex_unlock(&dev->struct_mutex); +} + +static int +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key) +{ + struct drm_i915_gem_request *req = wait->private; + struct intel_engine_cs *ring = req->ring; + + if (!i915_gem_request_completed(req, false)) + return 0; + + fence_signal_locked(&req->fence); + + __remove_wait_queue(&ring->irq_queue, wait); + ring->irq_put(ring); + + INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker); + schedule_work(&req->unref_work); + + return 0; +} + +static bool i915_fence_ring_enable_signaling(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + struct intel_engine_cs *ring = req->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + wait_queue_t *wait = &req->wait; + + /* queue fence wait queue on irq queue and get fence */ + if (i915_gem_request_completed(req, false) || + i915_terminally_wedged(&dev_priv->gpu_error)) + return false; + + if (!ring->irq_get(ring)) + return false; + + if (i915_gem_request_completed(req, false)) { + ring->irq_put(ring); + return true; + } + + wait->flags = 0; + wait->private = req; + wait->func = i915_fence_ring_check; + + i915_gem_request_reference(req); + + __add_wait_queue(&ring->irq_queue, wait); + + return true; +} + +static bool i915_fence_ring_signaled(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + + return i915_gem_request_completed(req, false); +} + +static signed long +i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + struct drm_i915_private *dev_priv = req->ring->dev->dev_private; + int ret; + s64 timeout; + + timeout = jiffies_to_nsecs(timeout_js); + + ret = __i915_wait_request(req, + atomic_read(&dev_priv->gpu_error.reset_counter), + intr, &timeout, NULL); + if (ret == -ETIME) + return 0; + + return ret; +} + +static int +i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + + if (size < sizeof(req->seqno)) + return -ENOMEM; + + memcpy(data, &req->seqno, + sizeof(req->seqno)); + + return sizeof(req->seqno); +} + +static void i915_fence_ring_value_str(struct fence *fence, char *str, int size) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + + snprintf(str, size, "%u", req->seqno); +} + +static void +i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + struct intel_engine_cs *ring = req->ring; + + snprintf(str, size, "%u", ring->get_seqno(ring, false)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + struct drm_device *dev = req->ring->dev; + + mutex_lock(&dev->struct_mutex); + i915_gem_request_unreference(req); + mutex_unlock(&dev->struct_mutex); +} + +static struct fence_ops i915_fence_ring_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_ring_get_timeline_name, + .enable_signaling = i915_fence_ring_enable_signaling, + .signaled = i915_fence_ring_signaled, + .wait = i915_fence_ring_wait, + .fill_driver_data = i915_fence_ring_fill_driver_data, + .fence_value_str = i915_fence_ring_value_str, + .timeline_value_str = i915_fence_ring_timeline_value_str, + .release = i915_fence_release +}; + +int i915_create_sync_fence_ring(struct intel_engine_cs *ring, + struct intel_context *ctx, + struct sync_fence **sync_fence, int *fence_fd) +{ + struct drm_i915_gem_request *req; + struct sync_fence *sfence; + char ring_name[6] = "ring0"; + int fd; + + if (!intel_ring_initialized(ring)) { + DRM_DEBUG("Ring not initialized!\n"); + return -EAGAIN; + } + + req = ring->outstanding_lazy_request; + if (WARN_ON_ONCE(!req)) + return -ECANCELED; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + DRM_DEBUG("No available file descriptor!\n"); + return fd; + } + + i915_gem_request_reference(req); + + fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock, + ctx->user_handle, req->seqno); + + ring_name[4] += ring->id; + sfence = sync_fence_create_dma(ring_name, &req->fence); + if (!sfence) + goto err; + + *sync_fence = sfence; + *fence_fd = fd; + + return 0; + +err: + i915_gem_request_unreference(req); + put_unused_fd(fd); + return -ENOMEM; +} diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2e559f6e..c197770 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ __u64 flags; __u64 rsvd1; /* now used for context info */ - __u64 rsvd2; + __u64 rsvd2; /* now used for fence fd */ }; /** Resets the SO write offset registers for transform feedback on gen7. */ @@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +#define I915_EXEC_FENCE_OUT (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 2.2.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx