Move microcode loading to be target specific. While this results in a bit more code duplication (especially between A3XX/A4XX) this gives us more flexibility for newer targets that don't need to keep an extra copy of the firmware data around in memory. Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 83 ++++++++++++++++++++++----------- drivers/gpu/drm/msm/adreno/a3xx_gpu.h | 2 + drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 69 ++++++++++++++++++++------- drivers/gpu/drm/msm/adreno/a4xx_gpu.h | 2 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 24 +++++++--- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 30 ------------ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 3 -- 7 files changed, 127 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 4baef27..fbc2d11 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -69,11 +69,58 @@ static bool a3xx_me_init(struct msm_gpu *gpu) return a3xx_idle(gpu); } -static int a3xx_hw_init(struct msm_gpu *gpu) +static int a3xx_ucode_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); + const struct firmware *fw; uint32_t *ptr, len; + int i; + + if (!a3xx_gpu->pm4) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + + a3xx_gpu->pm4 = fw; + } + + if (!a3xx_gpu->pfp) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + + a3xx_gpu->pfp = fw; + } + + /* Load PM4: */ + ptr = (uint32_t *)(a3xx_gpu->pm4->data); + len = a3xx_gpu->pm4->size / 4; + DBG("loading PM4 ucode version: %x", ptr[1]); + + gpu_write(gpu, REG_AXXX_CP_DEBUG, + AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | + AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); + gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); + + /* Load PFP: */ + ptr = (uint32_t *)(a3xx_gpu->pfp->data); + len = a3xx_gpu->pfp->size / 4; + DBG("loading PFP ucode version: %x", ptr[5]); + + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); + + return 0; +} + +static int a3xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); int i, ret; DBG("%s", gpu->name); @@ -225,6 +272,10 @@ static int a3xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + ret = a3xx_ucode_init(gpu); + if (ret) + return ret; + /* setup access protection: */ gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); @@ -249,33 +300,6 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* VBIF registers */ gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000); - /* NOTE: PM4/micro-engine firmware registers look to be the same - * for a2xx and a3xx.. we could possibly push that part down to - * adreno_gpu base class. Or push both PM4 and PFP but - * parameterize the pfp ucode addr/data registers.. - */ - - /* Load PM4: */ - ptr = (uint32_t *)(adreno_gpu->pm4->data); - len = adreno_gpu->pm4->size / 4; - DBG("loading PM4 ucode version: %x", ptr[1]); - - gpu_write(gpu, REG_AXXX_CP_DEBUG, - AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | - AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); - gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); - for (i = 1; i < len; i++) - gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); - - /* Load PFP: */ - ptr = (uint32_t *)(adreno_gpu->pfp->data); - len = adreno_gpu->pfp->size / 4; - DBG("loading PFP ucode version: %x", ptr[5]); - - gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); - for (i = 1; i < len; i++) - gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); - /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) || adreno_is_a320(adreno_gpu)) { @@ -326,6 +350,9 @@ static void a3xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + release_firmware(a3xx_gpu->pm4); + release_firmware(a3xx_gpu->pfp); + adreno_gpu_cleanup(adreno_gpu); #ifdef CONFIG_MSM_OCMEM diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h index ab60dc9..e4fcc0d 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -32,6 +32,8 @@ struct a3xx_gpu { /* if OCMEM is used for GMEM: */ uint32_t ocmem_base; void *ocmem_hdl; + + const struct firmware *pm4, *pfp; }; #define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 8199a4b..7d0a858 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -141,12 +141,55 @@ static bool a4xx_me_init(struct msm_gpu *gpu) return a4xx_idle(gpu); } -static int a4xx_hw_init(struct msm_gpu *gpu) +static int a4xx_ucode_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); + const struct firmware *fw; uint32_t *ptr, len; - int i, ret; + int i; + + if (!a4xx_gpu->pm4) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + + a4xx_gpu->pm4 = fw; + } + + if (!a4xx_gpu->pfp) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + + a4xx_gpu->pfp = fw; + } + + /* Load PM4: */ + ptr = (uint32_t *)(a4xx_gpu->pm4->data); + len = a4xx_gpu->pm4->size / 4; + DBG("loading PM4 ucode version: %u", ptr[0]); + gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]); + + /* Load PFP: */ + ptr = (uint32_t *)(a4xx_gpu->pfp->data); + len = a4xx_gpu->pfp->size / 4; + DBG("loading PFP ucode version: %u", ptr[0]); + + gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]); + + return 0; +} + +static int a4xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); + int ret; if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); @@ -273,22 +316,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; - /* Load PM4: */ - ptr = (uint32_t *)(adreno_gpu->pm4->data); - len = adreno_gpu->pm4->size / 4; - DBG("loading PM4 ucode version: %u", ptr[0]); - gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); - for (i = 1; i < len; i++) - gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]); - - /* Load PFP: */ - ptr = (uint32_t *)(adreno_gpu->pfp->data); - len = adreno_gpu->pfp->size / 4; - DBG("loading PFP ucode version: %u", ptr[0]); - - gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0); - for (i = 1; i < len; i++) - gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]); + ret = a4xx_ucode_init(gpu); + if (ret) + return ret; /* clear ME_HALT to start micro engine */ gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0); @@ -324,6 +354,9 @@ static void a4xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + release_firmware(a4xx_gpu->pm4); + release_firmware(a4xx_gpu->pfp); + adreno_gpu_cleanup(adreno_gpu); #ifdef CONFIG_MSM_OCMEM diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h index f757184..2c80af7 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h @@ -27,6 +27,8 @@ struct a4xx_gpu { /* if OCMEM is used for GMEM: */ uint32_t ocmem_base; void *ocmem_hdl; + + const struct firmware *pm4, *pfp; }; #define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 7e09d44..e8e726e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -434,20 +434,30 @@ static int a5xx_preempt_start(struct msm_gpu *gpu) static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, - const struct firmware *fw, u64 *iova) + const char *fwname, u64 *iova) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct firmware *fw; struct drm_gem_object *bo; void *ptr; + fw = adreno_request_fw(adreno_gpu, fwname); + if (IS_ERR(fw)) + return ERR_CAST(fw); + ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); - if (IS_ERR(ptr)) - return ERR_CAST(ptr); + if (IS_ERR(ptr)) { + bo = ERR_CAST(ptr); + goto out; + } memcpy(ptr, &fw->data[4], fw->size - 4); msm_gem_put_vaddr(bo); +out: + release_firmware(fw); return bo; } @@ -458,8 +468,8 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) int ret; if (!a5xx_gpu->pm4_bo) { - a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4, - &a5xx_gpu->pm4_iova); + a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, + adreno_gpu->info->pm4fw, &a5xx_gpu->pm4_iova); if (IS_ERR(a5xx_gpu->pm4_bo)) { ret = PTR_ERR(a5xx_gpu->pm4_bo); @@ -471,8 +481,8 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) } if (!a5xx_gpu->pfp_bo) { - a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp, - &a5xx_gpu->pfp_iova); + a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, + adreno_gpu->info->pfpfw, &a5xx_gpu->pfp_iova); if (IS_ERR(a5xx_gpu->pfp_bo)) { ret = PTR_ERR(a5xx_gpu->pfp_bo); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index de63ff2..9253550 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -138,29 +138,6 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ERR_PTR(-ENOENT); } -static int adreno_load_fw(struct adreno_gpu *adreno_gpu) -{ - const struct firmware *fw; - - if (adreno_gpu->pm4) - return 0; - - fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); - if (IS_ERR(fw)) - return PTR_ERR(fw); - adreno_gpu->pm4 = fw; - - fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); - if (IS_ERR(fw)) { - release_firmware(adreno_gpu->pm4); - adreno_gpu->pm4 = NULL; - return PTR_ERR(fw); - } - adreno_gpu->pfp = fw; - - return 0; -} - int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -168,10 +145,6 @@ int adreno_hw_init(struct msm_gpu *gpu) DBG("%s", gpu->name); - ret = adreno_load_fw(adreno_gpu); - if (ret) - return ret; - for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; @@ -569,8 +542,5 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - release_firmware(adreno_gpu->pm4); - release_firmware(adreno_gpu->pfp); - msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 8d3d0a9..bfdaaf2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -114,9 +114,6 @@ struct adreno_gpu { FW_LOCATION_HELPER, } fwloc; - /* firmware: */ - const struct firmware *pm4, *pfp; - /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific -- 1.9.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel