On 2023-02-28 03:33, Christian König wrote: > This adds the infrastructure for an execution context for GEM buffers > which is similar to the existinc TTMs execbuf util and intended to replace > it in the long term. > > The basic functionality is that we abstracts the necessary loop to lock > many different GEM buffers with automated deadlock and duplicate handling. > > v2: drop xarray and use dynamic resized array instead, the locking > overhead is unecessary and measureable. > v3: drop duplicate tracking, radeon is really the only one needing that. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > Documentation/gpu/drm-mm.rst | 12 ++ > drivers/gpu/drm/Kconfig | 6 + > drivers/gpu/drm/Makefile | 2 + > drivers/gpu/drm/drm_exec.c | 249 +++++++++++++++++++++++++++++++++++ > include/drm/drm_exec.h | 115 ++++++++++++++++ > 5 files changed, 384 insertions(+) > create mode 100644 drivers/gpu/drm/drm_exec.c > create mode 100644 include/drm/drm_exec.h > > diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst > index a79fd3549ff8..a52e6f4117d6 100644 > --- a/Documentation/gpu/drm-mm.rst > +++ b/Documentation/gpu/drm-mm.rst > @@ -493,6 +493,18 @@ DRM Sync Objects > .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c > :export: > > +DRM Execution context > +===================== > + > +.. kernel-doc:: drivers/gpu/drm/drm_exec.c > + :doc: Overview > + > +.. kernel-doc:: include/drm/drm_exec.h > + :internal: > + > +.. kernel-doc:: drivers/gpu/drm/drm_exec.c > + :export: > + > GPU Scheduler > ============= > > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig > index 17d252dc25e2..84a5fc28c48d 100644 > --- a/drivers/gpu/drm/Kconfig > +++ b/drivers/gpu/drm/Kconfig > @@ -200,6 +200,12 @@ config DRM_TTM > GPU memory types. Will be enabled automatically if a device driver > uses it. > > +config DRM_EXEC > + tristate > + depends on DRM > + help > + Execution context for command submissions > + > config DRM_BUDDY > tristate > depends on DRM > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile > index ab4460fcd63f..d40defbb0347 100644 > --- a/drivers/gpu/drm/Makefile > +++ b/drivers/gpu/drm/Makefile > @@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o > # > # Memory-management helpers > # > +# > +obj-$(CONFIG_DRM_EXEC) += drm_exec.o > > obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o > > diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c > new file mode 100644 > index 000000000000..df546cc5a227 > --- /dev/null > +++ b/drivers/gpu/drm/drm_exec.c > @@ -0,0 +1,249 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ > + > +#include <drm/drm_exec.h> > +#include <drm/drm_gem.h> > +#include <linux/dma-resv.h> > + > +/** > + * DOC: Overview > + * > + * This component mainly abstracts the retry loop necessary for locking > + * multiple GEM objects while preparing hardware operations (e.g. command > + * submissions, page table updates etc..). > + * > + * If a contention is detected while locking a GEM object the cleanup procedure > + * unlocks all previously locked GEM objects and locks the contended one first > + * before locking any further objects. > + * > + * After an object is locked fences slots can optionally be reserved on the > + * dma_resv object inside the GEM object. > + * > + * A typical usage pattern should look like this:: > + * > + * struct drm_gem_object *obj; > + * struct drm_exec exec; > + * unsigned long index; > + * int ret; > + * > + * drm_exec_init(&exec, true); > + * drm_exec_while_not_all_locked(&exec) { > + * ret = drm_exec_prepare_obj(&exec, boA, 1); > + * drm_exec_continue_on_contention(&exec); > + * if (ret) > + * goto error; > + * > + * ret = drm_exec_lock(&exec, boB, 1); > + * drm_exec_continue_on_contention(&exec); > + * if (ret) > + * goto error; > + * } > + * > + * drm_exec_for_each_locked_object(&exec, index, obj) { > + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); > + * ... > + * } > + * drm_exec_fini(&exec); Maybe add the error: label here to show how recovery is to be had. > + * > + * See struct dma_exec for more details. > + */ > + > +/* Dummy value used to initially enter the retry loop */ > +#define DRM_EXEC_DUMMY (void*)~0 > + > +/* Unlock all objects and drop references */ > +static void drm_exec_unlock_all(struct drm_exec *exec) > +{ > + struct drm_gem_object *obj; > + unsigned long index; > + > + drm_exec_for_each_locked_object(exec, index, obj) { > + dma_resv_unlock(obj->resv); > + drm_gem_object_put(obj); > + } > + > + if (exec->prelocked) { > + dma_resv_unlock(exec->prelocked->resv); > + drm_gem_object_put(exec->prelocked); > + exec->prelocked = NULL; > + } > +} > + > +/** > + * drm_exec_init - initialize a drm_exec object > + * @exec: the drm_exec object to initialize > + * @interruptible: if locks should be acquired interruptible > + * > + * Initialize the object and make sure that we can track locked and duplicate > + * objects. > + */ > +void drm_exec_init(struct drm_exec *exec, bool interruptible) > +{ > + exec->interruptible = interruptible; > + exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); > + > + /* If allocation here fails, just delay that till the first use */ > + exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0; I didn't see a subsequent allocation for objects. Does it make sense to continue here if we weren't able to allocate? It seems that the "first use" is most likely immediately after drm_exec_init(), and if kmalloc() failed, perhaps things are only about to get worse. (Pet peeve :-) "till" --> "until".) > + exec->num_objects = 0; > + exec->contended = DRM_EXEC_DUMMY; > + exec->prelocked = NULL; > +} > +EXPORT_SYMBOL(drm_exec_init); > + > +/** > + * drm_exec_fini - finalize a drm_exec object > + * @exec: the drm_exec object to finilize > + * > + * Unlock all locked objects, drop the references to objects and free all memory > + * used for tracking the state. > + */ > +void drm_exec_fini(struct drm_exec *exec) > +{ > + drm_exec_unlock_all(exec); > + kvfree(exec->objects); > + if (exec->contended != DRM_EXEC_DUMMY) { > + drm_gem_object_put(exec->contended); > + ww_acquire_fini(&exec->ticket); > + } > +} > +EXPORT_SYMBOL(drm_exec_fini); > + > +/** > + * drm_exec_cleanup - cleanup when contention is detected > + * @exec: the drm_exec object to cleanup > + * > + * Cleanup the current state and return true if we should stay inside the retry > + * loop, false if there wasn't any contention detected and we can keep the > + * objects locked. > + */ > +bool drm_exec_cleanup(struct drm_exec *exec) > +{ > + if (likely(!exec->contended)) { > + ww_acquire_done(&exec->ticket); > + return false; > + } > + > + if (likely(exec->contended == DRM_EXEC_DUMMY)) { > + exec->contended = NULL; > + ww_acquire_init(&exec->ticket, &reservation_ww_class); > + return true; > + } > + > + drm_exec_unlock_all(exec); > + exec->num_objects = 0; > + return true; > +} > +EXPORT_SYMBOL(drm_exec_cleanup); > + > +/* Track the locked object in the xa and reserve fences */ > +static int drm_exec_obj_locked(struct drm_exec *exec, > + struct drm_gem_object *obj) > +{ > + if (unlikely(exec->num_objects == exec->max_objects)) { > + size_t size = exec->max_objects * sizeof(void *); > + void *tmp; > + > + tmp = kvrealloc(exec->objects, size, size + PAGE_SIZE, > + GFP_KERNEL); > + if (!tmp) > + return -ENOMEM; > + > + exec->objects = tmp; > + exec->max_objects += PAGE_SIZE / sizeof(void *); > + } > + drm_gem_object_get(obj); > + exec->objects[exec->num_objects++] = obj; > + > + return 0; > +} > + > +/* Make sure the contended object is locked first */ > +static int drm_exec_lock_contended(struct drm_exec *exec) > +{ > + struct drm_gem_object *obj = exec->contended; > + int ret; > + > + if (likely(!obj)) > + return 0; > + > + if (exec->interruptible) { > + ret = dma_resv_lock_slow_interruptible(obj->resv, > + &exec->ticket); > + if (unlikely(ret)) > + goto error_dropref; > + } else { > + dma_resv_lock_slow(obj->resv, &exec->ticket); > + } > + > + ret = drm_exec_obj_locked(exec, obj); > + if (unlikely(ret)) { > + dma_resv_unlock(obj->resv); > + goto error_dropref; > + } > + > + swap(exec->prelocked, obj); > + > +error_dropref: > + /* Always cleanup the contention so that error handling can kick in */ > + drm_gem_object_put(obj); > + exec->contended = NULL; > + return ret; > +} > + > +/** > + * drm_exec_prepare_obj - prepare a GEM object for use > + * @exec: the drm_exec object with the state > + * @obj: the GEM object to prepare > + * @num_fences: how many fences to reserve > + * > + * Prepare a GEM object for use by locking it and reserving fence slots. All > + * succesfully locked objects are put into the locked container. Duplicates > + * detected as well and automatically moved into the duplicates container. > + * > + * Returns: -EDEADLK if a contention is detected, -ENOMEM when memory > + * allocation failed and zero for success. > + */ > +int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, > + unsigned int num_fences) > +{ > + int ret; > + > + ret = drm_exec_lock_contended(exec); > + if (unlikely(ret)) > + return ret; > + > + if (exec->prelocked == obj) { > + drm_gem_object_put(exec->prelocked); > + exec->prelocked = NULL; > + > + return dma_resv_reserve_fences(obj->resv, num_fences); > + } > + > + if (exec->interruptible) > + ret = dma_resv_lock_interruptible(obj->resv, &exec->ticket); > + else > + ret = dma_resv_lock(obj->resv, &exec->ticket); > + > + if (unlikely(ret == -EDEADLK)) { > + drm_gem_object_get(obj); > + exec->contended = obj; > + return -EDEADLK; > + } > + > + if (unlikely(ret)) > + return ret; > + > + ret = drm_exec_obj_locked(exec, obj); > + if (ret) > + goto error_unlock; > + > + /* Keep locked when reserving fences fails */ > + return dma_resv_reserve_fences(obj->resv, num_fences); > + > +error_unlock: > + dma_resv_unlock(obj->resv); > + return ret; > +} > +EXPORT_SYMBOL(drm_exec_prepare_obj); > + > +MODULE_DESCRIPTION("DRM execution context"); > +MODULE_LICENSE("Dual MIT/GPL"); > diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h > new file mode 100644 > index 000000000000..65e518c01db3 > --- /dev/null > +++ b/include/drm/drm_exec.h > @@ -0,0 +1,115 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ > + > +#ifndef __DRM_EXEC_H__ > +#define __DRM_EXEC_H__ > + > +#include <linux/ww_mutex.h> > + > +struct drm_gem_object; > + > +/** > + * struct drm_exec - Execution context > + */ > +struct drm_exec { > + /** > + * @interruptible: If locks should be taken interruptible > + */ > + bool interruptible; > + > + /** > + * @ticket: WW ticket used for acquiring locks > + */ > + struct ww_acquire_ctx ticket; > + > + /** > + * @num_objects: number of objects locked > + */ > + unsigned int num_objects; "num_objects" may be confused with the size of "objects" array. Maybe "num_locked" would be clearer? > + > + /** > + * @max_objects: maximum objects in array > + */ > + unsigned int max_objects; > + > + /** > + * @objects: array of the locked objects > + */ > + struct drm_gem_object **objects; > + > + /** > + * @contended: contended GEM object we backet of for > + */ > + struct drm_gem_object *contended; Perhaps "backed off for" in the comment? > + > + /** > + * @prelocked: already locked GEM object because of contention > + */ > + struct drm_gem_object *prelocked; Could this be a subset of the objects array as opposed to a single object? -- Regards, Luben