> 2021年9月7日 20:37,Koenig, Christian <Christian.Koenig@xxxxxxx> 写道: > > Am 07.09.21 um 14:26 schrieb xinhui pan: >> There is one dedicated IB pool for IB test. So lets use it for uvd msg >> too. >> >> For some older HW, use one reserved BO at specific range. >> >> Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 173 +++++++++++++++--------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 2 + >> 2 files changed, 112 insertions(+), 63 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >> index d451c359606a..b741ab7b2468 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >> @@ -299,8 +299,35 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) >> } >> /* from uvd v5.0 HW addressing capacity increased to 64 bits */ >> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) >> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) { >> adev->uvd.address_64_bit = true; >> + } else { >> + struct amdgpu_bo *bo = NULL; >> + >> + r = amdgpu_bo_create_reserved(adev, PAGE_SIZE, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_VRAM, >> + &bo, NULL, &adev->uvd.ib_bo_cpu_addr); >> + if (r) >> + return r; >> + amdgpu_bo_kunmap(bo); >> + amdgpu_bo_unpin(bo); >> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM, >> + 0, 256 << 20); >> + if (r) { >> + amdgpu_bo_unreserve(bo); >> + amdgpu_bo_unref(&bo); >> + return r; >> + } >> + r = amdgpu_bo_kmap(bo, &adev->uvd.ib_bo_cpu_addr); >> + if (r) { >> + amdgpu_bo_unpin(bo); >> + amdgpu_bo_unreserve(bo); >> + amdgpu_bo_unref(&bo); >> + return r; >> + } >> + adev->uvd.ib_bo = bo; >> + amdgpu_bo_unreserve(bo); > > Can we have the same handling for both cases please? E.g. always allocate a single BO in SW init and place it appropriately. One single BO is not good. validating BO into GTT or VRAM still cause memory allocation. I will have a try to use two BOs. More aggressively, I will let vc/vcn/uvd IB test share the same two BOs. > > And I think we need some kind of lock to prevent concurrent use of the BO. It takes reservation lock. > > Christian. > >> + } >> switch (adev->asic_type) { >> case CHIP_TONGA: >> @@ -342,6 +369,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) >> for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) >> amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); >> } >> + amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, >> + (void **)&adev->uvd.ib_bo_cpu_addr); >> release_firmware(adev->uvd.fw); >> return 0; >> @@ -1066,7 +1095,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) >> return 0; >> } >> -static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >> +static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, uint64_t addr, >> bool direct, struct dma_fence **fence) >> { >> struct amdgpu_device *adev = ring->adev; >> @@ -1074,29 +1103,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >> struct amdgpu_job *job; >> struct amdgpu_ib *ib; >> uint32_t data[4]; >> - uint64_t addr; >> long r; >> int i; >> unsigned offset_idx = 0; >> unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; >> - amdgpu_bo_kunmap(bo); >> - amdgpu_bo_unpin(bo); >> - >> - if (!ring->adev->uvd.address_64_bit) { >> - struct ttm_operation_ctx ctx = { true, false }; >> - >> - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); >> - amdgpu_uvd_force_into_uvd_segment(bo); >> - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); >> - if (r) >> - goto err; >> - } >> - >> r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : >> AMDGPU_IB_POOL_DELAYED, &job); >> if (r) >> - goto err; >> + return r; >> if (adev->asic_type >= CHIP_VEGA10) { >> offset_idx = 1 + ring->me; >> @@ -1110,7 +1125,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >> data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0); >> ib = &job->ibs[0]; >> - addr = amdgpu_bo_gpu_offset(bo); >> ib->ptr[0] = data[0]; >> ib->ptr[1] = addr; >> ib->ptr[2] = data[1]; >> @@ -1123,33 +1137,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >> } >> ib->length_dw = 16; >> - if (direct) { >> - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, >> - msecs_to_jiffies(10)); >> - if (r == 0) >> - r = -ETIMEDOUT; >> - if (r < 0) >> - goto err_free; >> - >> + if (direct) >> r = amdgpu_job_submit_direct(job, ring, &f); >> - if (r) >> - goto err_free; >> - } else { >> - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, >> - AMDGPU_SYNC_ALWAYS, >> - AMDGPU_FENCE_OWNER_UNDEFINED); >> - if (r) >> - goto err_free; >> - >> + else >> r = amdgpu_job_submit(job, &adev->uvd.entity, >> - AMDGPU_FENCE_OWNER_UNDEFINED, &f); >> - if (r) >> - goto err_free; >> - } >> - >> - amdgpu_bo_fence(bo, f, false); >> - amdgpu_bo_unreserve(bo); >> - amdgpu_bo_unref(&bo); >> + AMDGPU_FENCE_OWNER_UNDEFINED, &f); >> + if (r) >> + goto err_free; >> if (fence) >> *fence = dma_fence_get(f); >> @@ -1159,10 +1153,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >> err_free: >> amdgpu_job_free(job); >> - >> -err: >> - amdgpu_bo_unreserve(bo); >> - amdgpu_bo_unref(&bo); >> return r; >> } >> @@ -1173,16 +1163,31 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, >> struct dma_fence **fence) >> { >> struct amdgpu_device *adev = ring->adev; >> - struct amdgpu_bo *bo = NULL; >> + struct amdgpu_bo *bo = adev->uvd.ib_bo; >> + struct dma_fence *f = NULL; >> + struct amdgpu_ib ib; >> uint32_t *msg; >> int r, i; >> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_GTT, >> - &bo, NULL, (void **)&msg); >> - if (r) >> - return r; >> - >> + if (bo) { >> + r = amdgpu_bo_reserve(bo, true); >> + if (r) >> + return r; >> + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, >> + msecs_to_jiffies(10)); >> + if (r == 0) >> + r = -ETIMEDOUT; >> + if (r < 0) >> + goto err; >> + ib.gpu_addr = amdgpu_bo_gpu_offset(bo); >> + msg = adev->uvd.ib_bo_cpu_addr; >> + } else { >> + memset(&msg, 0, sizeof(msg)); >> + r = amdgpu_ib_get(adev, NULL, PAGE_SIZE, >> + AMDGPU_IB_POOL_DIRECT, >> + &ib); >> + msg = ib.ptr; >> + } >> /* stitch together an UVD create msg */ >> msg[0] = cpu_to_le32(0x00000de4); >> msg[1] = cpu_to_le32(0x00000000); >> @@ -1198,23 +1203,52 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, >> for (i = 11; i < 1024; ++i) >> msg[i] = cpu_to_le32(0x0); >> - return amdgpu_uvd_send_msg(ring, bo, true, fence); >> + r = amdgpu_uvd_send_msg(ring, ib.gpu_addr, true, &f); >> + if (r) >> + goto err; >> + if (bo) >> + amdgpu_bo_fence(bo, f, false); >> + else >> + amdgpu_ib_free(adev, &ib, f); >> + if (fence) >> + *fence = dma_fence_get(f); >> + dma_fence_put(f); >> +err: >> + if (bo) >> + amdgpu_bo_unreserve(bo); >> + return r; >> } >> int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, >> bool direct, struct dma_fence **fence) >> { >> struct amdgpu_device *adev = ring->adev; >> - struct amdgpu_bo *bo = NULL; >> + struct amdgpu_bo *bo = adev->uvd.ib_bo; >> + struct dma_fence *f = NULL; >> + struct amdgpu_ib ib; >> uint32_t *msg; >> int r, i; >> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_GTT, >> - &bo, NULL, (void **)&msg); >> - if (r) >> - return r; >> - >> + if (bo) { >> + r = amdgpu_bo_reserve(bo, true); >> + if (r) >> + return r; >> + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, >> + msecs_to_jiffies(10)); >> + if (r == 0) >> + r = -ETIMEDOUT; >> + if (r < 0) >> + goto err; >> + ib.gpu_addr = amdgpu_bo_gpu_offset(bo); >> + msg = adev->uvd.ib_bo_cpu_addr; >> + } else { >> + memset(&msg, 0, sizeof(msg)); >> + r = amdgpu_ib_get(adev, NULL, PAGE_SIZE, >> + direct ? >> + AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED, >> + &ib); >> + msg = ib.ptr; >> + } >> /* stitch together an UVD destroy msg */ >> msg[0] = cpu_to_le32(0x00000de4); >> msg[1] = cpu_to_le32(0x00000002); >> @@ -1223,7 +1257,20 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, >> for (i = 4; i < 1024; ++i) >> msg[i] = cpu_to_le32(0x0); >> - return amdgpu_uvd_send_msg(ring, bo, direct, fence); >> + r = amdgpu_uvd_send_msg(ring, ib.gpu_addr, true, &f); >> + if (r) >> + goto err; >> + if (bo) >> + amdgpu_bo_fence(bo, f, false); >> + else >> + amdgpu_ib_free(adev, &ib, f); >> + if (fence) >> + *fence = dma_fence_get(f); >> + dma_fence_put(f); >> +err: >> + if (bo) >> + amdgpu_bo_unreserve(bo); >> + return r; >> } >> static void amdgpu_uvd_idle_work_handler(struct work_struct *work) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >> index edbb8194ee81..3ff49daf558c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >> @@ -68,6 +68,8 @@ struct amdgpu_uvd { >> /* store image width to adjust nb memory state */ >> unsigned decode_image_width; >> uint32_t keyselect; >> + struct amdgpu_bo *ib_bo; >> + void *ib_bo_cpu_addr; >> }; >> int amdgpu_uvd_sw_init(struct amdgpu_device *adev); >
<<attachment: winmail.dat>>