From: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> There is a race condition issue between the IRQ context trying to trigger preemption and the user context trying to submit commands to the GPU. The check in a5xx_flush() API only updates the wptr if the GPU is not in preemption. In the cases where we move from PREEMPT_START to PREEMPT_NONE there is a small window where the preempt state is still in START but the CPU context switches to the user thread which is in the a5xx_flush() call to update the wptr, but fails to update the wptr to the GPU since the preempt state is not PREEMPT_NONE. This leads to a GPU stall. Introduce a new intermediate state PREEMPT_ABORT and change preempt_trigger() to use gpu's current ring instead of the ring retrieved from get_next_ring() while in this state. Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 8 +++++++- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index f062a90..6fb8c2f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -56,6 +56,8 @@ struct a5xx_gpu { * PREEMPT_NONE - no preemption in progress. Next state START. * PREEMPT_START - The trigger is evaulating if preemption is possible. Next * states: TRIGGERED, NONE + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next + * state: NONE. * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next * states: FAULTED, PENDING * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger @@ -67,6 +69,7 @@ struct a5xx_gpu { enum preempt_state { PREEMPT_NONE = 0, PREEMPT_START, + PREEMPT_ABORT, PREEMPT_TRIGGERED, PREEMPT_FAULTED, PREEMPT_PENDING, @@ -154,7 +157,10 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, /* Return true if we are in a preempt state */ static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) { - return !(atomic_read(&a5xx_gpu->preempt_state) == PREEMPT_NONE); + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_ABORT); } #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 6a3767d..40f4840 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -122,9 +122,20 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) * one do nothing except to update the wptr to the latest and greatest */ if (!ring || (a5xx_gpu->cur_ring == ring)) { - update_wptr(gpu, ring); - - /* Set the state back to NONE */ + /* + * Its possible that while a preemption request is in progress + * from an irq context, a user context trying to submit might + * fail to update the write pointer, because it determines + * that the preempt state is not PREEMPT_NONE. + * + * Close the race by introducing an intermediate + * state PREEMPT_ABORT to let the submit path + * know that the ringbuffer is not going to change + * and can safely update the write pointer. + */ + + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); + update_wptr(gpu, a5xx_gpu->cur_ring); set_preempt_state(a5xx_gpu, PREEMPT_NONE); return; } -- 1.9.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel