Re: [PATCH v4 09/14] drm/panfrost: Make sure job interrupts are masked before resetting

Steven Price <steven.price@xxxxxxx> · Mon, 28 Jun 2021 10:47:53 +0100



On 28/06/2021 08:42, Boris Brezillon wrote:
> This is not yet needed because we let active jobs be killed during by
> the reset and we don't really bother making sure they can be restarted.
> But once we start adding soft-stop support, controlling when we deal
> with the remaining interrrupts and making sure those are handled before
> the reset is issued gets tricky if we keep job interrupts active.
> 
> Let's prepare for that and mask+flush job IRQs before issuing a reset.
> 
> v4:
> * Add a comment explaining why we WARN_ON(!job) in the irq handler
> * Keep taking the job_lock when evicting stalled jobs
> 
> v3:
> * New patch
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>

Reviewed-by: Steven Price <steven.price@xxxxxxx>

> ---
>  drivers/gpu/drm/panfrost/panfrost_job.c | 27 ++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 98193a557a2d..4bd4d11377b7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -34,6 +34,7 @@ struct panfrost_queue_state {
>  struct panfrost_job_slot {
>  	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
>  	spinlock_t job_lock;
> +	int irq;
>  };
>  
>  static struct panfrost_job *
> @@ -400,6 +401,15 @@ static void panfrost_reset(struct panfrost_device *pfdev,
>  	if (bad)
>  		drm_sched_increase_karma(bad);
>  
> +	/* Mask job interrupts and synchronize to make sure we won't be
> +	 * interrupted during our reset.
> +	 */
> +	job_write(pfdev, JOB_INT_MASK, 0);
> +	synchronize_irq(pfdev->js->irq);
> +
> +	/* Schedulers are stopped and interrupts are masked+flushed, we don't
> +	 * need to protect the 'evict unfinished jobs' lock with the job_lock.
> +	 */
>  	spin_lock(&pfdev->js->job_lock);
>  	for (i = 0; i < NUM_JOB_SLOTS; i++) {
>  		if (pfdev->jobs[i]) {
> @@ -502,7 +512,14 @@ static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
>  			struct panfrost_job *job;
>  
>  			job = pfdev->jobs[j];
> -			/* Only NULL if job timeout occurred */
> +			/* The only reason this job could be NULL is if the
> +			 * job IRQ handler is called just after the
> +			 * in-flight job eviction in the reset path, and
> +			 * this shouldn't happen because the job IRQ has
> +			 * been masked and synchronized when this eviction
> +			 * happens.
> +			 */
> +			WARN_ON(!job);
>  			if (job) {
>  				pfdev->jobs[j] = NULL;
>  
> @@ -562,7 +579,7 @@ static void panfrost_reset_work(struct work_struct *work)
>  int panfrost_job_init(struct panfrost_device *pfdev)
>  {
>  	struct panfrost_job_slot *js;
> -	int ret, j, irq;
> +	int ret, j;
>  
>  	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
>  
> @@ -572,11 +589,11 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>  
>  	spin_lock_init(&js->job_lock);
>  
> -	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
> -	if (irq <= 0)
> +	js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
> +	if (js->irq <= 0)
>  		return -ENODEV;
>  
> -	ret = devm_request_threaded_irq(pfdev->dev, irq,
> +	ret = devm_request_threaded_irq(pfdev->dev, js->irq,
>  					panfrost_job_irq_handler,
>  					panfrost_job_irq_handler_thread,
>  					IRQF_SHARED, KBUILD_MODNAME "-job",
>