[PATCH 066/117] drm/amdgpu: add initial vcn support and decode tests

alexdeucher@xxxxxxxxx (Alex Deucher) · Wed, 10 May 2017 14:46:57 -0400

From: Leo Liu <leo.liu@xxxxxxx>

VCN is the new media block on Raven. Add core support
and the ring and ib tests for decode.

Signed-off-by: Leo Liu <leo.liu at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile     |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  40 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 425 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  35 +++
 4 files changed, 500 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2c80df0..f3c8245 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -99,6 +99,10 @@ amdgpu-y += \
 	vce_v3_0.o \
 	vce_v4_0.o
 
+# add VCN block
+amdgpu-y += \
+	amdgpu_vcn.o
+
 # add amdkfd interfaces
 amdgpu-y += \
 	 amdgpu_amdkfd.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1fbb698..e884f5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1181,6 +1181,31 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
 void amdgpu_get_pcie_info(struct amdgpu_device *adev);
 
 /*
+ * VCN
+ */
+#define AMDGPU_VCN_STACK_SIZE		(200*1024)
+#define AMDGPU_VCN_HEAP_SIZE		(256*1024)
+#define AMDGPU_VCN_SESSION_SIZE		(50*1024)
+#define AMDGPU_VCN_FIRMWARE_OFFSET	256
+#define AMDGPU_VCN_MAX_ENC_RINGS	3
+
+struct amdgpu_vcn {
+	struct amdgpu_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	unsigned		fw_version;
+	void			*saved_bo;
+	struct delayed_work	idle_work;
+	const struct firmware	*fw;	/* VCN firmware */
+	struct amdgpu_ring	ring_dec;
+	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+	struct amdgpu_irq_src	irq;
+	struct amd_sched_entity entity_dec;
+	struct amd_sched_entity entity_enc;
+	uint32_t                srbm_soft_reset;
+};
+
+/*
  * SDMA
  */
 struct amdgpu_sdma_instance {
@@ -1576,11 +1601,18 @@ struct amdgpu_device {
 	/* sdma */
 	struct amdgpu_sdma		sdma;
 
-	/* uvd */
-	struct amdgpu_uvd		uvd;
+	union {
+		struct {
+			/* uvd */
+			struct amdgpu_uvd		uvd;
+
+			/* vce */
+			struct amdgpu_vce		vce;
+		};
 
-	/* vce */
-	struct amdgpu_vce		vce;
+		/* vcn */
+		struct amdgpu_vcn		vcn;
+	};
 
 	/* firmwares */
 	struct amdgpu_firmware		firmware;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
new file mode 100644
index 0000000..97b09b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vcn.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#include "vega10/soc15ip.h"
+#include "raven1/VCN/vcn_1_0_offset.h"
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware Names */
+#define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
+	unsigned long bo_size;
+	const char *fw_name;
+	const struct common_firmware_header *hdr;
+	unsigned version_major, version_minor, family_id;
+	int r;
+
+	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		fw_name = FIRMWARE_RAVEN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
+			fw_name);
+		return r;
+	}
+
+	r = amdgpu_ucode_validate(adev->vcn.fw);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
+			fw_name);
+		release_firmware(adev->vcn.fw);
+		adev->vcn.fw = NULL;
+		return r;
+	}
+
+	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+	version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+	DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
+		version_major, version_minor, family_id);
+
+
+	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+		  +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+		  +  AMDGPU_VCN_SESSION_SIZE * 40;
+	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
+				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+		return r;
+	}
+
+	ring = &adev->vcn.ring_dec;
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up VCN dec run queue.\n");
+		return r;
+	}
+
+	return 0;
+}
+
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+{
+	kfree(adev->vcn.saved_bo);
+
+	amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
+
+	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
+			      &adev->vcn.gpu_addr,
+			      (void **)&adev->vcn.cpu_addr);
+
+	amdgpu_ring_fini(&adev->vcn.ring_dec);
+
+	release_firmware(adev->vcn.fw);
+
+	return 0;
+}
+
+int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+{
+	unsigned size;
+	void *ptr;
+
+	if (adev->vcn.vcpu_bo == NULL)
+		return 0;
+
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+	ptr = adev->vcn.cpu_addr;
+
+	adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
+	if (!adev->vcn.saved_bo)
+		return -ENOMEM;
+
+	memcpy_fromio(adev->vcn.saved_bo, ptr, size);
+
+	return 0;
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev)
+{
+	unsigned size;
+	void *ptr;
+
+	if (adev->vcn.vcpu_bo == NULL)
+		return -EINVAL;
+
+	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+	ptr = adev->vcn.cpu_addr;
+
+	if (adev->vcn.saved_bo != NULL) {
+		memcpy_toio(ptr, adev->vcn.saved_bo, size);
+		kfree(adev->vcn.saved_bo);
+		adev->vcn.saved_bo = NULL;
+	} else {
+		const struct common_firmware_header *hdr;
+		unsigned offset;
+
+		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
+			    le32_to_cpu(hdr->ucode_size_bytes));
+		size -= le32_to_cpu(hdr->ucode_size_bytes);
+		ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		memset_io(ptr, 0, size);
+	}
+
+	return 0;
+}
+
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+			       bool direct, struct dma_fence **fence)
+{
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
+	struct list_head head;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	struct amdgpu_device *adev = ring->adev;
+	uint64_t addr;
+	int i, r;
+
+	memset(&tv, 0, sizeof(tv));
+	tv.bo = &bo->tbo;
+
+	INIT_LIST_HEAD(&head);
+	list_add(&tv.head, &head);
+
+	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
+	if (r)
+		return r;
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r)
+		goto err;
+
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	if (r)
+		goto err;
+
+	ib = &job->ibs[0];
+	addr = amdgpu_bo_gpu_offset(bo);
+	ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
+	ib->ptr[1] = addr;
+	ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	if (direct) {
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = dma_fence_get(f);
+		if (r)
+			goto err_free;
+
+		amdgpu_job_free(job);
+	} else {
+		r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
+				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+		if (r)
+			goto err_free;
+	}
+
+	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	amdgpu_bo_unref(&bo);
+	dma_fence_put(f);
+
+	return 0;
+
+err_free:
+	amdgpu_job_free(job);
+
+err:
+	ttm_eu_backoff_reservation(&ticket, &head);
+	return r;
+}
+
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+			      struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_VRAM,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+			     NULL, NULL, &bo);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an vcn create msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000000);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(0x00000000);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000000);
+	msg[7] = cpu_to_le32(0x00000780);
+	msg[8] = cpu_to_le32(0x00000440);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x01b37000);
+	for (i = 11; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+
+	return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
+}
+
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+			       bool direct, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_VRAM,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+			     NULL, NULL, &bo);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an vcn destroy msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000002);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	for (i = 4; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+
+	return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
+}
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vcn.idle_work.work);
+	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+
+	if (fences == 0) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, false);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+		}
+	} else {
+		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+	}
+}
+
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	if (set_clocks) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, true);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+		}
+	}
+}
+
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+}
+
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence;
+	long r;
+
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+		r = 0;
+	}
+
+	dma_fence_put(fence);
+
+error:
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
new file mode 100644
index 0000000..a32182c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCN_H__
+#define __AMDGPU_VCN_H__
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev);
+int amdgpu_vcn_resume(struct amdgpu_device *adev);
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif
-- 
2.5.5