[PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A minor update to this patch is attached. The rest of the series is
unchanged and rebased cleanly on 4.17-rc2 on my system.

Regards,
  Felix


On 2018-04-10 05:33 PM, Felix Kuehling wrote:
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c         | 10 +++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c     | 25 +++++++++++++++++------
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h     |  7 ++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 ++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 ++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 ++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h             |  1 +
>  7 files changed, 63 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 36c9269e..5d7cccc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>  	}
>  }
>  
> +void write_kernel_doorbell64(void __iomem *db, u64 value)
> +{
> +	if (db) {
> +		WARN(((unsigned long)db & 7) != 0,
> +		     "Unaligned 64-bit doorbell");
> +		writeq(value, (u64 __iomem *)db);
> +		pr_debug("writing %llu to doorbell address 0x%p\n", value, db);+	}
> +}
> +
>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>  					struct kfd_process *process,
>  					unsigned int doorbell_id)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 9f38161..476951d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
>  	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>  	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>  
> -	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
> +	retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>  					&kq->wptr_mem);
>  
>  	if (retval != 0)
> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>  	size_t available_size;
>  	size_t queue_size_dwords;
>  	uint32_t wptr, rptr;
> +	uint64_t wptr64;
>  	unsigned int *queue_address;
>  
>  	/* When rptr == wptr, the buffer is empty.
> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>  	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>  	 */
>  	rptr = *kq->rptr_kernel;
> -	wptr = *kq->wptr_kernel;
> +	wptr = kq->pending_wptr;
> +	wptr64 = kq->pending_wptr64;
>  	queue_address = (unsigned int *)kq->pq_kernel_addr;
>  	queue_size_dwords = kq->queue->properties.queue_size / 4;
>  
> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>  		while (wptr > 0) {
>  			queue_address[wptr] = kq->nop_packet;
>  			wptr = (wptr + 1) % queue_size_dwords;
> +			wptr64++;
>  		}
>  	}
>  
>  	*buffer_ptr = &queue_address[wptr];
>  	kq->pending_wptr = wptr + packet_size_in_dwords;
> +	kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>  
>  	return 0;
>  
> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>  	pr_debug("\n");
>  #endif
>  
> -	*kq->wptr_kernel = kq->pending_wptr;
> -	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
> -				kq->pending_wptr);
> +	kq->ops_asic_specific.submit_packet(kq);
>  }
>  
>  static void rollback_packet(struct kernel_queue *kq)
>  {
> -	kq->pending_wptr = *kq->wptr_kernel;
> +	if (kq->dev->device_info->doorbell_size == 8) {
> +		kq->pending_wptr64 = *kq->wptr64_kernel;
> +		kq->pending_wptr = *kq->wptr_kernel %
> +			(kq->queue->properties.queue_size / 4);
> +	} else {
> +		kq->pending_wptr = *kq->wptr_kernel;
> +	}
>  }
>  
>  struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>  	case CHIP_HAWAII:
>  		kernel_queue_init_cik(&kq->ops_asic_specific);
>  		break;
> +
> +	case CHIP_VEGA10:
> +	case CHIP_RAVEN:
> +		kernel_queue_init_v9(&kq->ops_asic_specific);
> +		break;
>  	default:
>  		WARN(1, "Unexpected ASIC family %u",
>  		     dev->device_info->asic_family);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 5940531..97aff20 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -72,6 +72,7 @@ struct kernel_queue {
>  	struct kfd_dev		*dev;
>  	struct mqd_manager	*mqd;
>  	struct queue		*queue;
> +	uint64_t		pending_wptr64;
>  	uint32_t		pending_wptr;
>  	unsigned int		nop_packet;
>  
> @@ -79,7 +80,10 @@ struct kernel_queue {
>  	uint32_t		*rptr_kernel;
>  	uint64_t		rptr_gpu_addr;
>  	struct kfd_mem_obj	*wptr_mem;
> -	uint32_t		*wptr_kernel;
> +	union {
> +		uint64_t	*wptr64_kernel;
> +		uint32_t	*wptr_kernel;
> +	};
>  	uint64_t		wptr_gpu_addr;
>  	struct kfd_mem_obj	*pq;
>  	uint64_t		pq_gpu_addr;
> @@ -97,5 +101,6 @@ struct kernel_queue {
>  
>  void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>  void kernel_queue_init_vi(struct kernel_queue_ops *ops);
> +void kernel_queue_init_v9(struct kernel_queue_ops *ops);
>  
>  #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
> index a90eb44..19e54ac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
> @@ -26,11 +26,13 @@
>  static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>  			enum kfd_queue_type type, unsigned int queue_size);
>  static void uninitialize_cik(struct kernel_queue *kq);
> +static void submit_packet_cik(struct kernel_queue *kq);
>  
>  void kernel_queue_init_cik(struct kernel_queue_ops *ops)
>  {
>  	ops->initialize = initialize_cik;
>  	ops->uninitialize = uninitialize_cik;
> +	ops->submit_packet = submit_packet_cik;
>  }
>  
>  static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
> @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>  static void uninitialize_cik(struct kernel_queue *kq)
>  {
>  }
> +
> +static void submit_packet_cik(struct kernel_queue *kq)
> +{
> +	*kq->wptr_kernel = kq->pending_wptr;
> +	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
> +				kq->pending_wptr);
> +}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index ece7d59..684a3bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -29,11 +29,13 @@
>  static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>  			enum kfd_queue_type type, unsigned int queue_size);
>  static void uninitialize_v9(struct kernel_queue *kq);
> +static void submit_packet_v9(struct kernel_queue *kq);
>  
>  void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>  {
>  	ops->initialize = initialize_v9;
>  	ops->uninitialize = uninitialize_v9;
> +	ops->submit_packet = submit_packet_v9;
>  }
>  
>  static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
> @@ -58,6 +60,13 @@ static void uninitialize_v9(struct kernel_queue *kq)
>  	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>  }
>  
> +static void submit_packet_v9(struct kernel_queue *kq)
> +{
> +	*kq->wptr64_kernel = kq->pending_wptr64;
> +	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
> +				kq->pending_wptr64);
> +}
> +
>  static int pm_map_process_v9(struct packet_manager *pm,
>  		uint32_t *buffer, struct qcm_process_device *qpd)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> index f9019ef..bf20c6d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> @@ -29,11 +29,13 @@
>  static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
>  			enum kfd_queue_type type, unsigned int queue_size);
>  static void uninitialize_vi(struct kernel_queue *kq);
> +static void submit_packet_vi(struct kernel_queue *kq);
>  
>  void kernel_queue_init_vi(struct kernel_queue_ops *ops)
>  {
>  	ops->initialize = initialize_vi;
>  	ops->uninitialize = uninitialize_vi;
> +	ops->submit_packet = submit_packet_vi;
>  }
>  
>  static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
> @@ -58,6 +60,13 @@ static void uninitialize_vi(struct kernel_queue *kq)
>  	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>  }
>  
> +static void submit_packet_vi(struct kernel_queue *kq)
> +{
> +	*kq->wptr_kernel = kq->pending_wptr;
> +	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
> +				kq->pending_wptr);
> +}
> +
>  unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
>  {
>  	union PM4_MES_TYPE_3_HEADER header;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 06b210b..10d5b54 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -769,6 +769,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>  void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
>  u32 read_kernel_doorbell(u32 __iomem *db);
>  void write_kernel_doorbell(void __iomem *db, u32 value);
> +void write_kernel_doorbell64(void __iomem *db, u64 value);
>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>  					struct kfd_process *process,
>  					unsigned int doorbell_id);

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-drm-amdkfd-Add-64-bit-doorbell-and-wptr-support-to-k.patch
Type: text/x-patch
Size: 9339 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20180424/567fb64f/attachment-0001.bin>


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux