From: Daniel Vetter <daniel.vetter@xxxxxxxx> The userspace might need some sort of cache coherency management e.g. when CPU and GPU domains are being accessed through dma-buf at the same time. To circumvent this problem there are begin/end coherency markers, that forward directly to existing dma-buf device drivers vfunc hooks. Userspace can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be used like following: - mmap dma-buf fd - for each drawing/upload cycle in CPU 1. SYNC_START ioctl 2. read/write to mmap area or a 2d sub-region of it 3. SYNC_END ioctl. - munamp once you don't need the buffer any more v2 (Tiago): Fix header file type names (u64 -> __u64) v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end dma-buf functions. Check for overflows in start/length. v4 (Tiago): use 2d regions for sync. Cc: Sumit Semwal <sumit.semwal@xxxxxxxxxx> Signed-off-by: Daniel Vetter <daniel.vetter@xxxxxxxxx> Signed-off-by: Tiago Vignatti <tiago.vignatti@xxxxxxxxx> --- I'm unable to test the 2d sync properly, because begin/end access in i915 don't track mapped range for nothing. Documentation/dma-buf-sharing.txt | 13 ++++++ drivers/dma-buf/dma-buf.c | 77 ++++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_dmabuf.c | 6 ++- include/linux/dma-buf.h | 20 +++++---- include/uapi/linux/dma-buf.h | 57 +++++++++++++++++++++++++ 5 files changed, 150 insertions(+), 23 deletions(-) create mode 100644 include/uapi/linux/dma-buf.h diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt index 480c8de..8061ac0 100644 --- a/Documentation/dma-buf-sharing.txt +++ b/Documentation/dma-buf-sharing.txt @@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases: No special interfaces, userspace simply calls mmap on the dma-buf fd. + Also, the userspace might need some sort of cache coherency management e.g. + when CPU and GPU domains are being accessed through dma-buf at the same + time. To circumvent this problem there are begin/end coherency markers, that + forward directly to existing dma-buf device drivers vfunc hooks. Userspace + can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The + sequence would be used like following: + - mmap dma-buf fd + - for each drawing/upload cycle in CPU + 1. SYNC_START ioctl + 2. read/write to mmap area or a 2d sub-region of it + 3. SYNC_END ioctl. + - munamp once you don't need the buffer any more + 2. Supporting existing mmap interfaces in importers Similar to the motivation for kernel cpu access it is again important that diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 155c146..b6a4a06 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -251,11 +251,55 @@ out: return events; } +static long dma_buf_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct dma_buf *dmabuf; + struct dma_buf_sync sync; + enum dma_data_direction direction; + + dmabuf = file->private_data; + + if (!is_dma_buf_file(file)) + return -EINVAL; + + if (cmd != DMA_BUF_IOCTL_SYNC) + return -ENOTTY; + + if (copy_from_user(&sync, (void __user *) arg, sizeof(sync))) + return -EFAULT; + + if (sync.flags & DMA_BUF_SYNC_RW) + direction = DMA_BIDIRECTIONAL; + else if (sync.flags & DMA_BUF_SYNC_READ) + direction = DMA_FROM_DEVICE; + else if (sync.flags & DMA_BUF_SYNC_WRITE) + direction = DMA_TO_DEVICE; + else + return -EINVAL; + + if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK) + return -EINVAL; + + /* TODO: check for overflowing the buffer's size - how so, checking region by + * region here? Maybe need to check for the other parameters as well. */ + + if (sync.flags & DMA_BUF_SYNC_END) + dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel, + sync.num_regions, sync.regions, direction); + else + dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel, + sync.num_regions, sync.regions, direction); + + return 0; +} + static const struct file_operations dma_buf_fops = { .release = dma_buf_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll, + .unlocked_ioctl = dma_buf_ioctl, }; /* @@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment); * preparations. Coherency is only guaranteed in the specified range for the * specified access direction. * @dmabuf: [in] buffer to prepare cpu access for. - * @start: [in] start of range for cpu access. - * @len: [in] length of range for cpu access. - * @direction: [in] length of range for cpu access. + * @stride_bytes: [in] stride in bytes for cpu access. + * @bytes_per_pixel: [in] bytes per pixel of the region for cpu access. + * @num_regions: [in] number of regions. + * @region: [in] vector of 2-dimensional regions for cpu access. + * @direction: [in] direction of range for cpu access. * * Can return negative error values, returns 0 on success. */ -int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len, - enum dma_data_direction direction) +int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes, + size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[], + enum dma_data_direction direction) { int ret = 0; @@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len, return -EINVAL; if (dmabuf->ops->begin_cpu_access) - ret = dmabuf->ops->begin_cpu_access(dmabuf, start, - len, direction); + ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, bytes_per_pixel, + num_regions, regions, direction); return ret; } @@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access); * actions. Coherency is only guaranteed in the specified range for the * specified access direction. * @dmabuf: [in] buffer to complete cpu access for. - * @start: [in] start of range for cpu access. - * @len: [in] length of range for cpu access. - * @direction: [in] length of range for cpu access. + * @stride_bytes: [in] stride in bytes for cpu access. + * @bytes_per_pixel: [in] bytes per pixel of the region for cpu access. + * @num_regions: [in] number of regions. + * @regions: [in] vector of 2-dimensional regions for cpu access. + * @direction: [in] direction of range for cpu access. * * This call must always succeed. */ -void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len, - enum dma_data_direction direction) +void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes, + size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[], + enum dma_data_direction direction) { WARN_ON(!dmabuf); if (dmabuf->ops->end_cpu_access) - dmabuf->ops->end_cpu_access(dmabuf, start, len, direction); + dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, bytes_per_pixel, + num_regions, regions, direction); } EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 95cbfff..e5bb7a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * return 0; } -static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel, + size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; @@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size return ret; } -static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) +static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel, + size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index f98bd70..ed457cb 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -33,6 +33,8 @@ #include <linux/fence.h> #include <linux/wait.h> +#include <uapi/linux/dma-buf.h> + struct device; struct dma_buf; struct dma_buf_attachment; @@ -93,10 +95,10 @@ struct dma_buf_ops { /* after final dma_buf_put() */ void (*release)(struct dma_buf *); - int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, - enum dma_data_direction); - void (*end_cpu_access)(struct dma_buf *, size_t, size_t, - enum dma_data_direction); + int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t, + struct dma_buf_sync_region [], enum dma_data_direction); + void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t, + struct dma_buf_sync_region [], enum dma_data_direction); void *(*kmap_atomic)(struct dma_buf *, unsigned long); void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *); void *(*kmap)(struct dma_buf *, unsigned long); @@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); -int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len, - enum dma_data_direction dir); -void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len, - enum dma_data_direction dir); +int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, + size_t bytes_per_pixel, size_t num_regions, + struct dma_buf_sync_region regions[], enum dma_data_direction dir); +void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, + size_t bytes_per_pixel, size_t num_regions, + struct dma_buf_sync_region regions[], enum dma_data_direction dir); void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long); void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *); void *dma_buf_kmap(struct dma_buf *, unsigned long); diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h new file mode 100644 index 0000000..c63b578 --- /dev/null +++ b/include/uapi/linux/dma-buf.h @@ -0,0 +1,57 @@ +/* + * Framework for buffer objects that can be shared across devices/subsystems. + * + * Copyright(C) 2015 Intel Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _DMA_BUF_UAPI_H_ +#define _DMA_BUF_UAPI_H_ + +enum dma_buf_sync_flags { + DMA_BUF_SYNC_READ = (1 << 0), + DMA_BUF_SYNC_WRITE = (2 << 0), + DMA_BUF_SYNC_RW = (3 << 0), + DMA_BUF_SYNC_START = (0 << 2), + DMA_BUF_SYNC_END = (1 << 2), + + DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW | + DMA_BUF_SYNC_END +}; + +/* 2-dimensional region, used for multi-range flush. This can be used to + * synchronize the CPU by batching several sub-regions, smaller than the + * mapped dma-buf, all at once. */ +struct dma_buf_sync_region { + __u64 x; + __u64 y; + __u64 width; + __u64 height; +}; + +/* begin/end dma-buf functions used for userspace mmap. */ +struct dma_buf_sync { + enum dma_buf_sync_flags flags; + + __u64 stride_bytes; + __u32 bytes_per_pixel; + __u32 num_regions; + + struct dma_buf_sync_region regions[]; +}; + +#define DMA_BUF_BASE 'b' +#define DMA_BUF_IOCTL_SYNC _IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync) + +#endif -- 2.1.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx