Add a work pool to reset domain. The work pool will be used to schedule any task in the reset domain. If on successful reset of the domain indicated by a flag in reset context, all work that are queued will be drained. Their work handlers won't be executed. Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 104 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 22 +++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 02d874799c16..713362a60c9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -117,6 +117,51 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref) kvfree(reset_domain); } +static void amdgpu_reset_domain_cancel_all_work(struct work_struct *work) +{ + struct amdgpu_reset_domain *reset_domain = + container_of(work, struct amdgpu_reset_domain, clear); + int i; + + for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i) + if (atomic_cmpxchg(&reset_domain->work[i].in_use, 1, 0)) + cancel_work(&reset_domain->work[i].work); + + drain_workqueue(reset_domain->wq); + reset_domain->drain = false; +} + +static void amdgpu_reset_work_handler(struct work_struct *work) +{ + struct amdgpu_reset_work *reset_work = + container_of(work, struct amdgpu_reset_work, work); + + /* Don't do anything if reset domain is in drain mode */ + if (reset_work->domain->drain) + return; + + reset_work->handler(&reset_work->context); + if (reset_work->context.flags & (1U << AMDGPU_RESET_CANCEL_ALL)) { + reset_work->domain->drain = true; + schedule_work(&reset_work->domain->clear); + } + + atomic_set(&reset_work->in_use, 0); +} + +static void +amdgpu_reset_init_work_pool(struct amdgpu_reset_domain *reset_domain) +{ + int i; + + for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i) { + INIT_WORK(&reset_domain->work[i].work, + amdgpu_reset_work_handler); + atomic_set(&reset_domain->work[i].in_use, 0); + reset_domain->work[i].domain = reset_domain; + } +} + struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, char *wq_name) { @@ -139,6 +184,8 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d } + INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work); + amdgpu_reset_init_work_pool(reset_domain); atomic_set(&reset_domain->in_gpu_reset, 0); atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); @@ -152,12 +199,67 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain) down_write(&reset_domain->sem); } - void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) { atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } +static int +amdgpu_reset_domain_get_work(struct amdgpu_reset_domain *reset_domain, + struct amdgpu_reset_work **reset_work) +{ + int i; + if (!reset_work) + return -EINVAL; + + *reset_work = NULL; + for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i) { + if (!atomic_cmpxchg(&reset_domain->work[i].in_use, 0, 1)) { + *reset_work = &reset_domain->work[i]; + return 0; + } + } + /* All resources occupied */ + + return -EBUSY; +} + +static void amdgpu_reset_init_work(struct amdgpu_reset_work *reset_work, + struct amdgpu_reset_context *reset_context, + amdgpu_reset_work_func_t reset_work_handler) +{ + memcpy(&reset_work->context, reset_context, sizeof(*reset_context)); + reset_work->handler = reset_work_handler; +} + +int amdgpu_reset_schedule_work(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context, + amdgpu_reset_work_func_t reset_work_handler) +{ + struct amdgpu_reset_work *reset_work; + int ret; + + if (!reset_context || !reset_context->reset_req_dev || + !reset_work_handler) + return -EINVAL; + + ret = amdgpu_reset_domain_get_work(adev->reset_domain, &reset_work); + + if (ret) + return ret; + + if (!ret) { + amdgpu_reset_init_work(reset_work, reset_context, + reset_work_handler); + + queue_work(adev->reset_domain->wq, &reset_work->work); + + if (reset_context->flags & (1U << AMDGPU_RESET_SCHEDULE_NOW)) + flush_work(&reset_work->work); + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 471d789b33a5..d1393050d3ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -27,12 +27,16 @@ #include "amdgpu.h" #define AMDGPU_RESET_MAX_HANDLERS 5 +#define AMDGPU_MAX_RESET_WORK 5 enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, + AMDGPU_RESET_XCP = 3, + AMDGPU_RESET_SCHEDULE_NOW = 4, + AMDGPU_RESET_CANCEL_ALL = 5, }; struct amdgpu_reset_context { @@ -80,13 +84,28 @@ enum amdgpu_reset_domain_type { XGMI_HIVE }; +typedef void (*amdgpu_reset_work_func_t)( + struct amdgpu_reset_context *reset_context); + +struct amdgpu_reset_work { + struct work_struct work; + struct amdgpu_reset_context context; + struct amdgpu_reset_domain *domain; + atomic_t in_use; + + amdgpu_reset_work_func_t handler; +}; + struct amdgpu_reset_domain { struct kref refcount; struct workqueue_struct *wq; enum amdgpu_reset_domain_type type; + struct amdgpu_reset_work work[AMDGPU_MAX_RESET_WORK]; + struct work_struct clear; struct rw_semaphore sem; atomic_t in_gpu_reset; atomic_t reset_res; + bool drain; }; @@ -129,6 +148,9 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +int amdgpu_reset_schedule_work(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context, + amdgpu_reset_work_func_t handler); #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ -- 2.25.1