[AMD Official Use Only] we need take this lock. IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs. ________________________________________ 发件人: Koenig, Christian <Christian.Koenig@xxxxxxx> 发送时间: 2021年9月10日 18:02 收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx 抄送: Deucher, Alexander 主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test *sigh* yeah, you are probably right. Wouldn't be to simple if this would be easy, doesn't it? In this case you can also skip taking the reservation lock for the pre-allocated BO, since there should absolutely be only one user at a time. Christian. Am 10.09.21 um 11:42 schrieb Pan, Xinhui: > [AMD Official Use Only] > > oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold. > This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says > Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool. > > Looks like we should only use reserved BO for direct IB submission. > As for delayed IB submission, we could alloc a new one dynamicly. > ________________________________________ > 发件人: Koenig, Christian <Christian.Koenig@xxxxxxx> > 发送时间: 2021年9月10日 16:53 > 收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > 抄送: Deucher, Alexander > 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test > > It should not unless we are OOM which should not happen during driver > initialization. > > But there is another problem I'm thinking about: Do we still use UVD IB > submissions to forcefully tear down UVD sessions when a process crashes? > > If yes that stuff could easily deadlock with an IB test executed during > GPU reset. > > Christian. > > Am 10.09.21 um 10:18 schrieb Pan, Xinhui: >> [AMD Official Use Only] >> >> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue. >> For now, the new placement is calculated by new = old ∩ new. >> >> ________________________________________ >> 发件人: Koenig, Christian <Christian.Koenig@xxxxxxx> >> 发送时间: 2021年9月10日 14:24 >> 收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> 抄送: Deucher, Alexander >> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test >> >> Am 10.09.21 um 02:38 schrieb xinhui pan: >>> move BO allocation in sw_init. >>> >>> Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++---------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 1 + >>> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 8 +-- >>> drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 8 +-- >>> 4 files changed, 49 insertions(+), 43 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >>> index d451c359606a..e2eaac941d37 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) >>> const char *fw_name; >>> const struct common_firmware_header *hdr; >>> unsigned family_id; >>> + struct amdgpu_bo *bo = NULL; >>> + void *addr; >>> int i, j, r; >>> >>> INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); >>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) >>> adev->uvd.filp[i] = NULL; >>> } >>> >>> + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE, >>> + AMDGPU_GEM_DOMAIN_GTT, >>> + &bo, NULL, &addr); >>> + if (r) >>> + return r; >>> + >>> /* from uvd v5.0 HW addressing capacity increased to 64 bits */ >>> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) >>> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) { >>> adev->uvd.address_64_bit = true; >>> + amdgpu_bo_kunmap(bo); >>> + amdgpu_bo_unpin(bo); >>> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM, >>> + 0, 256 << 20); >> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here, >> cause I want to remove amdgpu_bo_pin_restricted() sooner or later. >> >>> + if (r) { >>> + amdgpu_bo_unreserve(bo); >>> + amdgpu_bo_unref(&bo); >>> + return r; >>> + } >>> + r = amdgpu_bo_kmap(bo, &addr); >>> + if (r) { >>> + amdgpu_bo_unpin(bo); >>> + amdgpu_bo_unreserve(bo); >>> + amdgpu_bo_unref(&bo); >>> + return r; >>> + } >>> + } >>> + adev->uvd.ib_bo = bo; >>> + amdgpu_bo_unreserve(bo); >>> >>> switch (adev->asic_type) { >>> case CHIP_TONGA: >>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) >>> for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) >>> amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); >>> } >>> + amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL); >>> release_firmware(adev->uvd.fw); >>> >>> return 0; >>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >>> unsigned offset_idx = 0; >>> unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; >>> >>> - amdgpu_bo_kunmap(bo); >>> - amdgpu_bo_unpin(bo); >>> - >>> - if (!ring->adev->uvd.address_64_bit) { >>> - struct ttm_operation_ctx ctx = { true, false }; >>> - >>> - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); >>> - amdgpu_uvd_force_into_uvd_segment(bo); >>> - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); >>> - if (r) >>> - goto err; >>> - } >>> - >>> r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : >>> AMDGPU_IB_POOL_DELAYED, &job); >>> if (r) >>> - goto err; >>> + return r; >>> >>> if (adev->asic_type >= CHIP_VEGA10) { >>> offset_idx = 1 + ring->me; >>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >>> } >>> >>> amdgpu_bo_fence(bo, f, false); >>> - amdgpu_bo_unreserve(bo); >>> - amdgpu_bo_unref(&bo); >>> >>> if (fence) >>> *fence = dma_fence_get(f); >>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, >>> >>> err_free: >>> amdgpu_job_free(job); >>> - >>> -err: >>> - amdgpu_bo_unreserve(bo); >>> - amdgpu_bo_unref(&bo); >>> return r; >>> } >>> >>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, >>> struct dma_fence **fence) >>> { >>> struct amdgpu_device *adev = ring->adev; >>> - struct amdgpu_bo *bo = NULL; >>> + struct amdgpu_bo *bo = adev->uvd.ib_bo; >>> uint32_t *msg; >>> int r, i; >>> >>> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, >>> - AMDGPU_GEM_DOMAIN_GTT, >>> - &bo, NULL, (void **)&msg); >>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL); >>> if (r) >>> return r; >>> >>> + msg = amdgpu_bo_kptr(bo); >>> /* stitch together an UVD create msg */ >>> msg[0] = cpu_to_le32(0x00000de4); >>> msg[1] = cpu_to_le32(0x00000000); >>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, >>> for (i = 11; i < 1024; ++i) >>> msg[i] = cpu_to_le32(0x0); >>> >>> - return amdgpu_uvd_send_msg(ring, bo, true, fence); >>> + r = amdgpu_uvd_send_msg(ring, bo, true, fence); >>> + >>> + amdgpu_bo_unreserve(bo); >>> + return r; >>> } >>> >>> int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, >>> bool direct, struct dma_fence **fence) >>> { >>> struct amdgpu_device *adev = ring->adev; >>> - struct amdgpu_bo *bo = NULL; >>> + struct amdgpu_bo *bo = adev->uvd.ib_bo; >>> uint32_t *msg; >>> int r, i; >>> >>> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, >>> - AMDGPU_GEM_DOMAIN_GTT, >>> - &bo, NULL, (void **)&msg); >>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL); >> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on >> the clean side. >> >> Lockdep will sooner or later complain that we reserve a BO in the reset >> path, but that is mostly a theoretical problem and existed before. So >> I'm fine with sticking with that for now cause the problem was there >> before as well. >> >> It's just that trylock might not work because the BO is busy with >> indirect submission. >> >> Apart from those two minor issues the patch looks good to me. >> >> Thanks, >> Christian. >> >>> if (r) >>> return r; >>> >>> + msg = amdgpu_bo_kptr(bo); >>> /* stitch together an UVD destroy msg */ >>> msg[0] = cpu_to_le32(0x00000de4); >>> msg[1] = cpu_to_le32(0x00000002); >>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, >>> for (i = 4; i < 1024; ++i) >>> msg[i] = cpu_to_le32(0x0); >>> >>> - return amdgpu_uvd_send_msg(ring, bo, direct, fence); >>> + r = amdgpu_uvd_send_msg(ring, bo, direct, fence); >>> + >>> + amdgpu_bo_unreserve(bo); >>> + return r; >>> } >>> >>> static void amdgpu_uvd_idle_work_handler(struct work_struct *work) >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >>> index edbb8194ee81..76ac9699885d 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h >>> @@ -68,6 +68,7 @@ struct amdgpu_uvd { >>> /* store image width to adjust nb memory state */ >>> unsigned decode_image_width; >>> uint32_t keyselect; >>> + struct amdgpu_bo *ib_bo; >>> }; >>> >>> int amdgpu_uvd_sw_init(struct amdgpu_device *adev); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c >>> index bc571833632e..301c0cea7164 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c >>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, >>> static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) >>> { >>> struct dma_fence *fence = NULL; >>> - struct amdgpu_bo *bo = NULL; >>> + struct amdgpu_bo *bo = ring->adev->uvd.ib_bo; >>> long r; >>> >>> - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, >>> - AMDGPU_GEM_DOMAIN_VRAM, >>> - &bo, NULL, NULL); >>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL); >>> if (r) >>> return r; >>> >>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) >>> >>> error: >>> dma_fence_put(fence); >>> - amdgpu_bo_unpin(bo); >>> amdgpu_bo_unreserve(bo); >>> - amdgpu_bo_unref(&bo); >>> return r; >>> } >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c >>> index b6e82d75561f..efa270288029 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c >>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl >>> static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) >>> { >>> struct dma_fence *fence = NULL; >>> - struct amdgpu_bo *bo = NULL; >>> + struct amdgpu_bo *bo = ring->adev->uvd.ib_bo; >>> long r; >>> >>> - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, >>> - AMDGPU_GEM_DOMAIN_VRAM, >>> - &bo, NULL, NULL); >>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL); >>> if (r) >>> return r; >>> >>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) >>> >>> error: >>> dma_fence_put(fence); >>> - amdgpu_bo_unpin(bo); >>> amdgpu_bo_unreserve(bo); >>> - amdgpu_bo_unref(&bo); >>> return r; >>> } >>>