Support extended version of VIRTIO_GPU_CMD_SUBMIT_3D command that allows passing in-fence IDs to host for waiting, removing need to do expensive host-guest roundtrips in a case of waiting for fences on a guest side. Guest userspace must enable new VIRTGPU_CONTEXT_PARAM_FENCE_PASSING flag and host must support new VIRTIO_GPU_F_FENCE_PASSING feature in order to activate the fence passing for a given virtio-gpu context. Array of in-fence IDs is then prepended to the VIRTIO_GPU_CMD_SUBMIT_3D's data, the previously unused padding field of the command is reused for the number of in-fences. A new VIRTGPU_EXECBUF_SHARED_FENCE flag is added to the job submission UAPI and must be set by userspace if it wants to make fence shareable with/on host. Certain jobs won't want to share fence, in particular Venus will benefit from this flag. Link: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1138 Link: https://gitlab.freedesktop.org/digetx/qemu/-/commits/native-context-iris Link: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4679609 Signed-off-by: Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx> --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + drivers/gpu/drm/virtio/virtgpu_drv.h | 11 ++- drivers/gpu/drm/virtio/virtgpu_fence.c | 15 +++- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 11 ++- drivers/gpu/drm/virtio/virtgpu_kms.c | 8 +- drivers/gpu/drm/virtio/virtgpu_submit.c | 99 ++++++++++++++++++++++++- drivers/gpu/drm/virtio/virtgpu_vq.c | 7 +- include/uapi/drm/virtgpu_drm.h | 3 + include/uapi/linux/virtio_gpu.h | 11 ++- 9 files changed, 152 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 644b8ee51009..544918bd38e9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -148,6 +148,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_RESOURCE_UUID, VIRTIO_GPU_F_RESOURCE_BLOB, VIRTIO_GPU_F_CONTEXT_INIT, + VIRTIO_GPU_F_FENCE_PASSING, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 8513b671f871..1dc503cb53de 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -149,6 +149,7 @@ struct virtio_gpu_fence { struct virtio_gpu_fence_event *e; struct virtio_gpu_fence_driver *drv; struct list_head node; + bool host_shareable; }; struct virtio_gpu_vbuffer { @@ -246,6 +247,7 @@ struct virtio_gpu_device { bool has_resource_blob; bool has_host_visible; bool has_context_init; + bool has_fence_passing; struct virtio_shm_region host_visible_region; struct drm_mm host_visible_mm; @@ -273,6 +275,7 @@ struct virtio_gpu_fpriv { uint32_t num_rings; uint64_t base_fence_ctx; uint64_t ring_idx_mask; + bool fence_passing_enabled; struct mutex context_lock; }; @@ -367,7 +370,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, void *data, uint32_t data_size, uint32_t ctx_id, struct virtio_gpu_object_array *objs, - struct virtio_gpu_fence *fence); + struct virtio_gpu_fence *fence, + uint32_t cmd_size, + unsigned int num_in_fences); void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, @@ -420,6 +425,9 @@ virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device *vgdev, uint32_t width, uint32_t height, uint32_t x, uint32_t y); +void virtio_gpu_cmd_in_fence(struct virtio_gpu_device *vgdev, + uint32_t ctx_id, uint64_t fence_id); + /* virtgpu_display.c */ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -439,6 +447,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, struct virtio_gpu_fence *fence); void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev, u64 fence_id); +struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence); /* virtgpu_object.c */ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo); diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c index f28357dbde35..1fd3cfeca2f5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -27,9 +27,6 @@ #include "virtgpu_drv.h" -#define to_virtio_gpu_fence(x) \ - container_of(x, struct virtio_gpu_fence, f) - static const char *virtio_gpu_get_driver_name(struct dma_fence *f) { return "virtio_gpu"; @@ -71,6 +68,14 @@ static const struct dma_fence_ops virtio_gpu_fence_ops = { .timeline_value_str = virtio_gpu_timeline_value_str, }; +struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence) +{ + if (dma_fence->ops != &virtio_gpu_fence_ops) + return NULL; + + return container_of(dma_fence, struct virtio_gpu_fence, f); +} + struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev, uint64_t base_fence_ctx, uint32_t ring_idx) @@ -122,6 +127,10 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, cpu_to_le32(VIRTIO_GPU_FLAG_INFO_RING_IDX); cmd_hdr->ring_idx = (u8)fence->ring_idx; } + + if (fence->host_shareable) + cmd_hdr->flags |= + cpu_to_le32(VIRTIO_GPU_FLAG_FENCE_SHAREABLE); } void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev, diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index b24b11f25197..3028786c59cd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -514,7 +514,8 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, return PTR_ERR(buf); virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size, - vfpriv->ctx_id, NULL, NULL); + vfpriv->ctx_id, NULL, NULL, + rc_blob->cmd_size, 0); } if (guest_blob) @@ -642,6 +643,14 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev, vfpriv->ring_idx_mask = value; break; + case VIRTGPU_CONTEXT_PARAM_FENCE_PASSING: + if (!vgdev->has_fence_passing && value) { + ret = -EINVAL; + goto out_unlock; + } + + vfpriv->fence_passing_enabled = !!value; + break; default: ret = -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 5a3b5aaed1f3..9f4617a75edd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -197,12 +197,16 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) { vgdev->has_context_init = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_FENCE_PASSING)) { + vgdev->has_fence_passing = true; + } - DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible", + DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible %cfence_passing", vgdev->has_virgl_3d ? '+' : '-', vgdev->has_edid ? '+' : '-', vgdev->has_resource_blob ? '+' : '-', - vgdev->has_host_visible ? '+' : '-'); + vgdev->has_host_visible ? '+' : '-', + vgdev->has_fence_passing ? '+' : '-'); DRM_INFO("features: %ccontext_init\n", vgdev->has_context_init ? '+' : '-'); diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 3c00135ead45..129d063029a6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -25,6 +25,11 @@ struct virtio_gpu_submit_post_dep { u64 point; }; +struct virtio_gpu_in_fence { + u64 id; + u32 context; +}; + struct virtio_gpu_submit { struct virtio_gpu_submit_post_dep *post_deps; unsigned int num_out_syncobjs; @@ -32,6 +37,9 @@ struct virtio_gpu_submit { struct drm_syncobj **in_syncobjs; unsigned int num_in_syncobjs; + struct virtio_gpu_in_fence *in_fences; + unsigned int num_in_fences; + struct virtio_gpu_object_array *buflist; struct drm_virtgpu_execbuffer *exbuf; struct virtio_gpu_fence *out_fence; @@ -41,6 +49,8 @@ struct virtio_gpu_submit { struct drm_file *file; int out_fence_fd; u64 fence_ctx; + u32 data_size; + u32 cmd_size; u32 ring_idx; void *buf; }; @@ -48,11 +58,44 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { + struct virtio_gpu_fence *fence = to_virtio_gpu_fence(in_fence); u32 context = submit->fence_ctx + submit->ring_idx; + struct virtio_gpu_in_fence *vfence, *in_fences; + u32 i; if (dma_fence_match_context(in_fence, context)) return 0; + if (fence && fence->host_shareable && + submit->vfpriv->fence_passing_enabled) { + /* + * Merge sync_file + syncobj in-fences to avoid sending more + * than one fence per-context to host. Use latest fence from + * the same context. + */ + for (i = 0; i < submit->num_in_fences; i++) { + vfence = &submit->in_fences[i]; + + if (dma_fence_match_context(in_fence, vfence->context)) { + vfence->id = max(vfence->id, fence->fence_id); + return 0; + } + } + + in_fences = krealloc_array(submit->in_fences, + submit->num_in_fences + 1, + sizeof(*in_fences), GFP_KERNEL); + if (!in_fences) + return -ENOMEM; + + in_fences[submit->num_in_fences].id = fence->fence_id; + in_fences[submit->num_in_fences].context = context; + submit->in_fences = in_fences; + submit->num_in_fences++; + + return 0; + } + return dma_fence_wait(in_fence, true); } @@ -331,6 +374,7 @@ static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit) virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs); + kfree(submit->in_fences); if (!IS_ERR(submit->buf)) kvfree(submit->buf); @@ -348,12 +392,51 @@ static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit) fput(submit->sync_file->file); } -static void virtio_gpu_submit(struct virtio_gpu_submit *submit) +static int virtio_gpu_attach_in_fences(struct virtio_gpu_submit *submit) { - virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->exbuf->size, + size_t in_fences_size = sizeof(u64) * submit->num_in_fences; + size_t new_data_size = submit->data_size + in_fences_size; + void *buf = submit->buf; + u64 *in_fences; + unsigned int i; + + if (new_data_size < submit->data_size) + return -EINVAL; + + buf = kvrealloc(buf, submit->data_size, new_data_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memmove(buf + in_fences_size, buf, submit->data_size); + in_fences = buf; + + for (i = 0; i < submit->num_in_fences; i++) + in_fences[i] = cpu_to_le64(submit->in_fences[i].id); + + submit->data_size = new_data_size; + submit->buf = buf; + + return 0; +} + +static int virtio_gpu_submit(struct virtio_gpu_submit *submit) +{ + int err; + + if (submit->num_in_fences) { + err = virtio_gpu_attach_in_fences(submit); + if (err) + return err; + } + + virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->data_size, submit->vfpriv->ctx_id, submit->buflist, - submit->out_fence); + submit->out_fence, submit->cmd_size, + submit->num_in_fences); + virtio_gpu_notify(submit->vgdev); + + return 0; } static void virtio_gpu_complete_submit(struct virtio_gpu_submit *submit) @@ -401,6 +484,12 @@ static int virtio_gpu_init_submit(struct virtio_gpu_submit *submit, } } + if ((exbuf->flags & VIRTGPU_EXECBUF_SHARED_FENCE) && + vfpriv->fence_passing_enabled && out_fence) + out_fence->host_shareable = true; + + submit->data_size = exbuf->size; + submit->cmd_size = exbuf->size; submit->out_fence = out_fence; submit->fence_ctx = fence_ctx; submit->ring_idx = ring_idx; @@ -527,7 +616,9 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto cleanup; - virtio_gpu_submit(&submit); + ret = virtio_gpu_submit(&submit); + if (ret) + goto cleanup; /* * Set up usr-out data after submitting the job to optimize diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index b1a00c0c25a7..29d462b69bad 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -1079,7 +1079,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, void *data, uint32_t data_size, uint32_t ctx_id, struct virtio_gpu_object_array *objs, - struct virtio_gpu_fence *fence) + struct virtio_gpu_fence *fence, + uint32_t cmd_size, + unsigned int num_in_fences) { struct virtio_gpu_cmd_submit *cmd_p; struct virtio_gpu_vbuffer *vbuf; @@ -1093,7 +1095,8 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D); cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); - cmd_p->size = cpu_to_le32(data_size); + cmd_p->size = cpu_to_le32(cmd_size); + cmd_p->num_in_fences = cpu_to_le32(num_in_fences); virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); } diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index b1d0e56565bc..fd486fdf0441 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -52,10 +52,12 @@ extern "C" { #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 #define VIRTGPU_EXECBUF_RING_IDX 0x04 +#define VIRTGPU_EXECBUF_SHARED_FENCE 0x08 #define VIRTGPU_EXECBUF_FLAGS (\ VIRTGPU_EXECBUF_FENCE_FD_IN |\ VIRTGPU_EXECBUF_FENCE_FD_OUT |\ VIRTGPU_EXECBUF_RING_IDX |\ + VIRTGPU_EXECBUF_SHARED_FENCE |\ 0) struct drm_virtgpu_map { @@ -198,6 +200,7 @@ struct drm_virtgpu_resource_create_blob { #define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 #define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 #define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 +#define VIRTGPU_CONTEXT_PARAM_FENCE_PASSING 0x0004 struct drm_virtgpu_context_set_param { __u64 param; __u64 value; diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index f556fde07b76..c3182c8255cf 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -65,6 +65,11 @@ */ #define VIRTIO_GPU_F_CONTEXT_INIT 4 +/* + * VIRTIO_GPU_CMD_SUBMIT_3D + */ +#define VIRTIO_GPU_F_FENCE_PASSING 5 + enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -133,6 +138,10 @@ enum virtio_gpu_shm_id { * of the command ring that needs to used when creating the fence */ #define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1) +/* + * The fence is shareable between host contexts if flag is set. + */ +#define VIRTIO_GPU_FLAG_FENCE_SHAREABLE (1 << 2) struct virtio_gpu_ctrl_hdr { __le32 type; @@ -304,7 +313,7 @@ struct virtio_gpu_ctx_resource { struct virtio_gpu_cmd_submit { struct virtio_gpu_ctrl_hdr hdr; __le32 size; - __le32 padding; + __le32 num_in_fences; }; #define VIRTIO_GPU_CAPSET_VIRGL 1 -- 2.41.0