[PATCH 5/8] drm/amdgpu: move priority decay logic into amd_sched_priority_ctr

deathsimple@xxxxxxxxxxx (Christian König) · Fri, 9 Jun 2017 12:54:42 +0200

Am 09.06.2017 um 00:06 schrieb Andres Rodriguez:
> So that it can be re-used.
>
> A priority counter can be used to track priority requests. If no active
> requests exists, the counter will default to ()->default_priority.
>
> The re-factored version is now allowed to decay below NORMAL. This
> allows us to create requests that lower the priority. This didn't
> matter for rings, as no priority below NORMAL has an effect.

NAK to the whole approach. This handling is amdgpu specific and not 
related to the gpu scheduler in any way.

Christian.

>
> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      |  69 +++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |   6 +-
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 122 ++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  21 +++++
>   4 files changed, 164 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 2d8b20a7..159ab0e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -142,115 +142,86 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
>   /**
>    * amdgpu_ring_undo - reset the wptr
>    *
>    * @ring: amdgpu_ring structure holding ring information
>    *
>    * Reset the driver's copy of the wptr (all asics).
>    */
>   void amdgpu_ring_undo(struct amdgpu_ring *ring)
>   {
>   	ring->wptr = ring->wptr_old;
>   
>   	if (ring->funcs->end_use)
>   		ring->funcs->end_use(ring);
>   }
>   
> +static void amdgpu_ring_priority_set(struct amd_sched_priority_ctr *p,
> +				     enum amd_sched_priority priority)
> +{
> +	struct amdgpu_ring *ring = container_of(p, struct amdgpu_ring,
> +						priority_ctr);
> +
> +	if (ring->funcs->set_priority)
> +		ring->funcs->set_priority(ring, priority);
> +}
> +
>   /**
>    * amdgpu_ring_priority_put - restore a ring's priority
>    *
>    * @ring: amdgpu_ring structure holding the information
>    * @priority: target priority
>    *
>    * Release a request for executing at @priority
>    */
>   void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
>   			      enum amd_sched_priority priority)
>   {
> -	int i;
> -
> -	if (!ring->funcs->set_priority)
> -		return;
> -
> -	if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
> -		return;
> -
> -	/* no need to restore if the job is already at the lowest priority */
> -	if (priority == AMD_SCHED_PRIORITY_NORMAL)
> -		return;
> -
> -	mutex_lock(&ring->priority_mutex);
> -	/* something higher prio is executing, no need to decay */
> -	if (ring->priority > priority)
> -		goto out_unlock;
> -
> -	/* decay priority to the next level with a job available */
> -	for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
> -		if (i == AMD_SCHED_PRIORITY_NORMAL
> -				|| atomic_read(&ring->num_jobs[i])) {
> -			ring->priority = i;
> -			ring->funcs->set_priority(ring, i);
> -			break;
> -		}
> -	}
> -
> -out_unlock:
> -	mutex_unlock(&ring->priority_mutex);
> +	if (ring->funcs->set_priority)
> +		amd_sched_priority_ctr_put(&ring->priority_ctr, priority);
>   }
>   
>   /**
>    * amdgpu_ring_priority_get - change the ring's priority
>    *
>    * @ring: amdgpu_ring structure holding the information
>    * @priority: target priority
>    *
>    * Request a ring's priority to be raised to @priority (refcounted).
>    */
>   void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
>   			      enum amd_sched_priority priority)
>   {
> -	if (!ring->funcs->set_priority)
> -		return;
> -
> -	atomic_inc(&ring->num_jobs[priority]);
> -
> -	mutex_lock(&ring->priority_mutex);
> -	if (priority <= ring->priority)
> -		goto out_unlock;
> -
> -	ring->priority = priority;
> -	ring->funcs->set_priority(ring, priority);
> -
> -out_unlock:
> -	mutex_unlock(&ring->priority_mutex);
> +	if (ring->funcs->set_priority)
> +		amd_sched_priority_ctr_get(&ring->priority_ctr, priority);
>   }
>   
>   /**
>    * amdgpu_ring_init - init driver ring struct.
>    *
>    * @adev: amdgpu_device pointer
>    * @ring: amdgpu_ring structure holding ring information
>    * @max_ndw: maximum number of dw for ring alloc
>    * @nop: nop packet for this ring
>    *
>    * Initialize the driver information for the selected ring (all asics).
>    * Returns 0 on success, error on failure.
>    */
>   int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   		     unsigned max_dw, struct amdgpu_irq_src *irq_src,
>   		     unsigned irq_type)
>   {
> -	int r, i;
> +	int r;
>   
>   	if (ring->adev == NULL) {
>   		if (adev->num_rings >= AMDGPU_MAX_RINGS)
>   			return -EINVAL;
>   
>   		ring->adev = adev;
>   		ring->idx = adev->num_rings++;
>   		adev->rings[ring->idx] = ring;
>   		r = amdgpu_fence_driver_init_ring(ring,
>   			amdgpu_sched_hw_submission);
>   		if (r)
>   			return r;
>   	}
>   
>   	if (ring->funcs->support_64bit_ptrs) {
> @@ -312,55 +283,55 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   	/* Allocate ring buffer */
>   	if (ring->ring_obj == NULL) {
>   		r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
>   					    AMDGPU_GEM_DOMAIN_GTT,
>   					    &ring->ring_obj,
>   					    &ring->gpu_addr,
>   					    (void **)&ring->ring);
>   		if (r) {
>   			dev_err(adev->dev, "(%d) ring create failed\n", r);
>   			return r;
>   		}
>   		amdgpu_ring_clear_ring(ring);
>   	}
>   
>   	ring->max_dw = max_dw;
> -	ring->priority = AMD_SCHED_PRIORITY_NORMAL;
> -	mutex_init(&ring->priority_mutex);
>   	INIT_LIST_HEAD(&ring->lru_list);
>   	amdgpu_ring_lru_touch(adev, ring);
> -
> -	for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
> -		atomic_set(&ring->num_jobs[i], 0);
> +	amd_sched_priority_ctr_init(&ring->priority_ctr,
> +				    AMD_SCHED_PRIORITY_NORMAL,
> +				    amdgpu_ring_priority_set);
>   
>   	if (amdgpu_debugfs_ring_init(adev, ring)) {
>   		DRM_ERROR("Failed to register debugfs file for rings !\n");
>   	}
>   
>   	return 0;
>   }
>   
>   /**
>    * amdgpu_ring_fini - tear down the driver ring struct.
>    *
>    * @adev: amdgpu_device pointer
>    * @ring: amdgpu_ring structure holding ring information
>    *
>    * Tear down the driver information for the selected ring (all asics).
>    */
>   void amdgpu_ring_fini(struct amdgpu_ring *ring)
>   {
> +	amd_sched_priority_ctr_fini(&ring->priority_ctr);
> +
>   	ring->ready = false;
>   
>   	if (ring->funcs->support_64bit_ptrs) {
>   		amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs);
>   		amdgpu_wb_free_64bit(ring->adev, ring->fence_offs);
>   		amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs);
>   		amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs);
>   	} else {
>   		amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
>   		amdgpu_wb_free(ring->adev, ring->fence_offs);
>   		amdgpu_wb_free(ring->adev, ring->rptr_offs);
>   		amdgpu_wb_free(ring->adev, ring->wptr_offs);
>   	}
>   
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7348769..2a04c0d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -147,30 +147,31 @@ struct amdgpu_ring_funcs {
>   	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>   	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
>   	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
>   	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
>   	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>   	/* priority functions */
>   	void (*set_priority) (struct amdgpu_ring *ring,
>   			      enum amd_sched_priority priority);
>   };
>   
>   struct amdgpu_ring {
>   	struct amdgpu_device		*adev;
>   	const struct amdgpu_ring_funcs	*funcs;
>   	struct amdgpu_fence_driver	fence_drv;
>   	struct amd_gpu_scheduler	sched;
> +	struct amd_sched_priority_ctr	priority_ctr;
>   	struct list_head		lru_list;
>   
>   	struct amdgpu_bo	*ring_obj;
>   	volatile uint32_t	*ring;
>   	unsigned		rptr_offs;
>   	u64			wptr;
>   	u64			wptr_old;
>   	unsigned		ring_size;
>   	unsigned		max_dw;
>   	int			count_dw;
>   	uint64_t		gpu_addr;
>   	uint64_t		ptr_mask;
>   	uint32_t		buf_mask;
>   	bool			ready;
>   	u32			idx;
> @@ -181,35 +182,30 @@ struct amdgpu_ring {
>   	uint64_t                mqd_gpu_addr;
>   	void                    *mqd_ptr;
>   	uint64_t                eop_gpu_addr;
>   	u32			doorbell_index;
>   	bool			use_doorbell;
>   	unsigned		wptr_offs;
>   	unsigned		fence_offs;
>   	uint64_t		current_ctx;
>   	char			name[16];
>   	unsigned		cond_exe_offs;
>   	u64			cond_exe_gpu_addr;
>   	volatile u32		*cond_exe_cpu_addr;
>   	unsigned		vm_inv_eng;
>   	bool			has_compute_vm_bug;
>   
> -	atomic_t		num_jobs[AMD_SCHED_PRIORITY_MAX];
> -	struct mutex		priority_mutex;
> -	/* protected by priority_mutex */
> -	int			priority;
> -
>   #if defined(CONFIG_DEBUG_FS)
>   	struct dentry *ent;
>   #endif
>   };
>   
>   int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
>   void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
>   void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>   void amdgpu_ring_commit(struct amdgpu_ring *ring);
>   void amdgpu_ring_undo(struct amdgpu_ring *ring);
>   void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
>   			      enum amd_sched_priority priority);
>   void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
>   			      enum amd_sched_priority priority);
>   int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 38cea6f..a203736 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -488,30 +488,152 @@ int amd_sched_job_init(struct amd_sched_job *job,
>   	job->sched = sched;
>   	job->s_entity = entity;
>   	job->s_fence = amd_sched_fence_create(entity, owner);
>   	if (!job->s_fence)
>   		return -ENOMEM;
>   	job->id = atomic64_inc_return(&sched->job_id_count);
>   
>   	INIT_WORK(&job->finish_work, amd_sched_job_finish);
>   	INIT_LIST_HEAD(&job->node);
>   	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
>   
>   	return 0;
>   }
>   
>   /**
> + * Initialize a priority_ctr
> + *
> + * @p: priority_ctr to initialize
> + * @default_priority: priority to be used when no active requests exist
> + * @set_priority: callback for changing the underlying resource's priority
> + *
> + * The @set_priority callback will be invoked whenever a transition
> + * happens into a new priority state, e.g. NORMAL->HIGH or HIGH->NORMAL. It
> + * is guaranteed to be invoked exactly once per state transition.
> + *
> + */
> +void amd_sched_priority_ctr_init(struct amd_sched_priority_ctr *p,
> +				 enum amd_sched_priority default_priority,
> +				 amd_sched_set_priority_func_t set_priority)
> +{
> +	int i;
> +
> +	for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
> +		atomic_set(&p->requests[i], 0);
> +
> +	p->default_priority = default_priority;
> +	p->priority = p->default_priority;
> +	p->set_priority = set_priority;
> +	mutex_init(&p->mutex);
> +}
> +
> +/**
> + * Cleanup a priority_ctr
> + *
> + * @p: priority_ctr to cleanup
> + *
> + * Must be called when the priority_ctr is no longer needed
> + */
> +void amd_sched_priority_ctr_fini(struct amd_sched_priority_ctr *p)
> +{
> +	p->priority = AMD_SCHED_PRIORITY_INVALID;
> +}
> +
> +/**
> + * Begin a request to set the current priority to @priority
> + *
> + * @p: priority_ctr
> + * @priority: requested priority
> + *
> + * Requests are reference counted.
> + *
> + * The current priority will be the highest priority for which an
> + * active request exists.
> + *
> + * If no active requests exist, the resource will default to
> + * p->default_priority
> + */
> +void amd_sched_priority_ctr_get(struct amd_sched_priority_ctr *p,
> +				enum amd_sched_priority priority)
> +{
> +	if (WARN_ON(priority > AMD_SCHED_PRIORITY_MAX
> +			|| priority < AMD_SCHED_PRIORITY_MIN))
> +		return;
> +
> +	if (atomic_inc_return(&p->requests[priority]) > 1)
> +		return;
> +
> +	mutex_lock(&p->mutex);
> +
> +	/* A request with higher priority already exists */
> +	if (priority <= p->priority)
> +		goto out_unlock;
> +
> +	p->set_priority(p, priority);
> +	p->priority = priority;
> +
> +out_unlock:
> +	mutex_unlock(&p->mutex);
> +}
> +
> +/**
> + * End a request to set the current priority to @priority
> + *
> + * @p: priority_ctr
> + * @priority: requested priority
> + *
> + * Refer to @amd_sched_priority_ctr_get for more details.
> + */
> +void amd_sched_priority_ctr_put(struct amd_sched_priority_ctr *p,
> +				enum amd_sched_priority priority)
> +{
> +	int i;
> +	enum amd_sched_priority decayed_priority = AMD_SCHED_PRIORITY_INVALID;
> +
> +	if (WARN_ON(priority > AMD_SCHED_PRIORITY_MAX
> +			|| priority < AMD_SCHED_PRIORITY_MIN))
> +		return;
> +
> +	if (atomic_dec_return(&p->requests[priority]) > 0)
> +		return;
> +
> +	mutex_lock(&p->mutex);
> +
> +	/* something higher prio is executing, no need to decay */
> +	if (p->priority > priority)
> +		goto out_unlock;
> +
> +	/* decay priority to the next level with an active request */
> +	for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
> +		if (atomic_read(&p->requests[i])) {
> +			decayed_priority = i;
> +			break;
> +		}
> +	}
> +
> +	/* If no requests are active, use the default priority */
> +	if (decayed_priority == AMD_SCHED_PRIORITY_INVALID)
> +		decayed_priority = p->default_priority;
> +
> +	p->set_priority(p, decayed_priority);
> +	p->priority = decayed_priority;
> +
> +out_unlock:
> +	mutex_unlock(&p->mutex);
> +}
> +
> +/**
>    * Return ture if we can push more jobs to the hw.
>    */
>   static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
>   {
>   	return atomic_read(&sched->hw_rq_count) <
>   		sched->hw_submission_limit;
>   }
>   
>   /**
>    * Wake up the scheduler when it is ready
>    */
>   static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
>   {
>   	if (amd_sched_ready(sched))
>   		wake_up_interruptible(&sched->wake_up_worker);
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index da040bc..b9283b5 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -17,30 +17,31 @@
>    * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>    * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>    * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>    * OTHER DEALINGS IN THE SOFTWARE.
>    *
>    */
>   
>   #ifndef _GPU_SCHEDULER_H_
>   #define _GPU_SCHEDULER_H_
>   
>   #include <linux/kfifo.h>
>   #include <linux/dma-fence.h>
>   
>   struct amd_gpu_scheduler;
>   struct amd_sched_rq;
> +struct amd_sched_priority_ctr;
>   
>   /**
>    * A scheduler entity is a wrapper around a job queue or a group
>    * of other entities. Entities take turns emitting jobs from their
>    * job queues to corresponding hardware ring based on scheduling
>    * policy.
>   */
>   struct amd_sched_entity {
>   	struct list_head		list;
>   	struct amd_sched_rq		*rq;
>   	struct amd_gpu_scheduler	*sched;
>   
>   	spinlock_t			queue_lock;
>   	struct kfifo                    job_queue;
>   
> @@ -112,30 +113,41 @@ struct amd_sched_backend_ops {
>   	void (*timedout_job)(struct amd_sched_job *sched_job);
>   	void (*free_job)(struct amd_sched_job *sched_job);
>   };
>   
>   enum amd_sched_priority {
>   	AMD_SCHED_PRIORITY_MIN,
>   	AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
>   	AMD_SCHED_PRIORITY_NORMAL,
>   	AMD_SCHED_PRIORITY_HIGH_SW,
>   	AMD_SCHED_PRIORITY_HIGH_HW,
>   	AMD_SCHED_PRIORITY_KERNEL,
>   	AMD_SCHED_PRIORITY_MAX,
>   	AMD_SCHED_PRIORITY_INVALID = -1
>   };
>   
> +typedef void (*amd_sched_set_priority_func_t)(struct amd_sched_priority_ctr *p,
> +					      enum amd_sched_priority priority);
> +
> +struct amd_sched_priority_ctr {
> +	struct mutex			mutex;
> +	atomic_t			requests[AMD_SCHED_PRIORITY_MAX];
> +	enum amd_sched_priority		priority;
> +	enum amd_sched_priority		default_priority;
> +	amd_sched_set_priority_func_t	set_priority;
> +};
> +
>   /**
>    * One scheduler is implemented for each hardware ring
>   */
>   struct amd_gpu_scheduler {
>   	const struct amd_sched_backend_ops	*ops;
>   	uint32_t			hw_submission_limit;
>   	long				timeout;
>   	const char			*name;
>   	struct amd_sched_rq		sched_rq[AMD_SCHED_PRIORITY_MAX];
>   	wait_queue_head_t		wake_up_worker;
>   	wait_queue_head_t		job_scheduled;
>   	atomic_t			hw_rq_count;
>   	atomic64_t			job_id_count;
>   	struct task_struct		*thread;
>   	struct list_head	ring_mirror_list;
> @@ -166,16 +178,25 @@ int amd_sched_job_init(struct amd_sched_job *job,
>   		       struct amd_gpu_scheduler *sched,
>   		       struct amd_sched_entity *entity,
>   		       void *owner);
>   void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
>   void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
>   bool amd_sched_dependency_optimized(struct dma_fence* fence,
>   				    struct amd_sched_entity *entity);
>   void amd_sched_job_kickout(struct amd_sched_job *s_job);
>   
>   static inline enum amd_sched_priority
>   amd_sched_get_job_priority(struct amd_sched_job *job)
>   {
>   	return (job->s_entity->rq - job->sched->sched_rq);
>   }
>   
> +void amd_sched_priority_ctr_init(struct amd_sched_priority_ctr *p,
> +				 enum amd_sched_priority default_priority,
> +				 amd_sched_set_priority_func_t set_priority);
> +void amd_sched_priority_ctr_fini(struct amd_sched_priority_ctr *p);
> +void amd_sched_priority_ctr_get(struct amd_sched_priority_ctr *p,
> +				enum amd_sched_priority priority);
> +void amd_sched_priority_ctr_put(struct amd_sched_priority_ctr *p,
> +				enum amd_sched_priority priority);
> +
>   #endif