[AMD Official Use Only - General] OK, I see. Thanks for the explanations. BR Evan > -----Original Message----- > From: Koenig, Christian <Christian.Koenig@xxxxxxx> > Sent: Tuesday, March 21, 2023 1:32 AM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; Christian König > <ckoenig.leichtzumerken@xxxxxxxxx>; Quan, Evan <Evan.Quan@xxxxxxx>; > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access > performance using sdma > > Ah, yes! GART doesn't need to be read to make a GTT allocation. > > When GART becomes ready it will be filled with all the buffers which were > allocated before it was ready. > > So this is perfectly fine. > > Thanks, > Christian. > > Am 20.03.23 um 18:24 schrieb Kim, Jonathan: > > [Public] > > > > This was a long time ago but I think we agreed allocation was ok before > GART was ready. > > IIRC, there was also some mentioned related scenario where APUs needed > to work without VRAM but allocations were required (but I don't know the > details regarding that). > > I vaguely remember the requirement for GART readiness for the bounce > buffer allocation caused some problems elsewhere. > > Are there problems observed with the bounce buffer being allocated > without GART readiness? > > > > Thanks, > > > > Jon > >> -----Original Message----- > >> From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> > >> Sent: Monday, March 20, 2023 1:02 PM > >> To: Quan, Evan <Evan.Quan@xxxxxxx>; Kim, Jonathan > >> <Jonathan.Kim@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > >> Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Koenig, Christian > >> <Christian.Koenig@xxxxxxx> > >> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access > >> performance using sdma > >> > >> Caution: This message originated from an External Source. Use proper > >> caution when opening attachments, clicking links, or responding. > >> > >> > >> I don't think so. Have we recently re-ordered something here? > >> > >> Christian. > >> > >> Am 20.03.23 um 08:05 schrieb Quan, Evan: > >>> [AMD Official Use Only - General] > >>> > >>> I happened to find the sdma_access_bo allocation from GTT seems > >> performing before gart is ready. > >>> That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is > >>> still > >> NULL. > >>> Is that done intentionally ? > >>> > >>> Evan > >>>> -----Original Message----- > >>>> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf > Of > >>>> Jonathan Kim > >>>> Sent: Wednesday, January 5, 2022 3:12 AM > >>>> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > >>>> Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Kim, Jonathan > >>>> <Jonathan.Kim@xxxxxxx>; Koenig, Christian > >> <Christian.Koenig@xxxxxxx> > >>>> Subject: [PATCH] drm/amdgpu: improve debug VRAM access > performance > >>>> using sdma > >>>> > >>>> For better performance during VRAM access for debugged processes, > >>>> do read/write copies over SDMA. > >>>> > >>>> In order to fulfill post mortem debugging on a broken device, > >>>> fallback to stable MMIO access when gpu recovery is disabled or > >>>> when job > >> submission > >>>> time outs are set to max. Failed SDMA access should automatically > >>>> fall back to MMIO access. > >>>> > >>>> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid > >>>> page-table updates and TLB flushes on access. > >>>> > >>>> Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > >>>> --- > >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78 > >>>> +++++++++++++++++++++++++ > >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 +- > >>>> 2 files changed, 82 insertions(+), 1 deletion(-) > >>>> > >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > >>>> index 367abed1d6e6..512df4c09772 100644 > >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > >>>> @@ -48,6 +48,7 @@ > >>>> #include <drm/ttm/ttm_range_manager.h> > >>>> > >>>> #include <drm/amdgpu_drm.h> > >>>> +#include <drm/drm_drv.h> > >>>> > >>>> #include "amdgpu.h" > >>>> #include "amdgpu_object.h" > >>>> @@ -1429,6 +1430,70 @@ static void > >> amdgpu_ttm_vram_mm_access(struct > >>>> amdgpu_device *adev, loff_t pos, > >>>> } > >>>> } > >>>> > >>>> +static int amdgpu_ttm_access_memory_sdma(struct > ttm_buffer_object > >>>> *bo, > >>>> + unsigned long offset, void > >>>> + *buf, int > >>>> len, int write) > >>>> +{ > >>>> + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); > >>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > >>>> + struct amdgpu_job *job; > >>>> + struct dma_fence *fence; > >>>> + uint64_t src_addr, dst_addr; > >>>> + unsigned int num_dw; > >>>> + int r, idx; > >>>> + > >>>> + if (len != PAGE_SIZE) > >>>> + return -EINVAL; > >>>> + > >>>> + if (!adev->mman.sdma_access_ptr) > >>>> + return -EACCES; > >>>> + > >>>> + r = drm_dev_enter(adev_to_drm(adev), &idx); > >>>> + if (r) > >>>> + return r; > >>>> + > >>>> + if (write) > >>>> + memcpy(adev->mman.sdma_access_ptr, buf, len); > >>>> + > >>>> + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); > >>>> + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, > >>>> AMDGPU_IB_POOL_DELAYED, &job); > >>>> + if (r) > >>>> + goto out; > >>>> + > >>>> + src_addr = write ? amdgpu_bo_gpu_offset(adev- > >>>>> mman.sdma_access_bo) : > >>>> + amdgpu_bo_gpu_offset(abo); > >>>> + dst_addr = write ? amdgpu_bo_gpu_offset(abo) : > >>>> + amdgpu_bo_gpu_offset(adev- > >>>>> mman.sdma_access_bo); > >>>> + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > >>>> + dst_addr, > >>>> PAGE_SIZE, false); > >>>> + > >>>> + amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job- > >ibs[0]); > >>>> + WARN_ON(job->ibs[0].length_dw > num_dw); > >>>> + > >>>> + r = amdgpu_job_submit(job, &adev->mman.entity, > >>>> AMDGPU_FENCE_OWNER_UNDEFINED, &fence); > >>>> + if (r) { > >>>> + amdgpu_job_free(job); > >>>> + goto out; > >>>> + } > >>>> + > >>>> + if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) > >>>> + r = -ETIMEDOUT; > >>>> + dma_fence_put(fence); > >>>> + > >>>> + if (!(r || write)) > >>>> + memcpy(buf, adev->mman.sdma_access_ptr, len); > >>>> +out: > >>>> + drm_dev_exit(idx); > >>>> + return r; > >>>> +} > >>>> + > >>>> +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct > >>>> amdgpu_device *adev) > >>>> +{ > >>>> + return amdgpu_gpu_recovery == 0 || > >>>> + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || > >>>> + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || > >>>> + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || > >>>> + adev->video_timeout == MAX_SCHEDULE_TIMEOUT; } > >>>> + > >>>> /** > >>>> * amdgpu_ttm_access_memory - Read or Write memory that backs a > >>>> buffer object. > >>>> * > >>>> @@ -1453,6 +1518,10 @@ static int > amdgpu_ttm_access_memory(struct > >>>> ttm_buffer_object *bo, > >>>> if (bo->resource->mem_type != TTM_PL_VRAM) > >>>> return -EIO; > >>>> > >>>> + if (!amdgpu_ttm_allow_post_mortem_debug(adev) && > >>>> + !amdgpu_ttm_access_memory_sdma(bo, offset, > >>>> + buf, > >>>> len, write)) > >>>> + return len; > >>>> + > >>>> amdgpu_res_first(bo->resource, offset, len, &cursor); > >>>> while (cursor.remaining) { > >>>> size_t count, size = cursor.size; @@ -1793,6 +1862,12 > >>>> @@ int amdgpu_ttm_init(struct amdgpu_device > >> *adev) > >>>> return r; > >>>> } > >>>> > >>>> + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, > >>>> + AMDGPU_GEM_DOMAIN_GTT, > >>>> + &adev->mman.sdma_access_bo, NULL, > >>>> + adev->mman.sdma_access_ptr)) > >>>> + DRM_WARN("Debug VRAM access will use slowpath MM > >>>> access\n"); > >>>> + > >>>> return 0; > >>>> } > >>>> > >>>> @@ -1823,6 +1898,9 @@ void amdgpu_ttm_fini(struct amdgpu_device > >>>> *adev) > >>>> ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); > >>>> ttm_device_fini(&adev->mman.bdev); > >>>> adev->mman.initialized = false; > >>>> + if (adev->mman.sdma_access_ptr) > >>>> + amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, > >>>> NULL, > >>>> + &adev->mman.sdma_access_ptr); > >>>> DRM_INFO("amdgpu: ttm finalized\n"); > >>>> } > >>>> > >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > >>>> index 91a087f9dc7c..b0116c4a768f 100644 > >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > >>>> @@ -98,6 +98,10 @@ struct amdgpu_mman { > >>>> u64 fw_vram_usage_size; > >>>> struct amdgpu_bo *fw_vram_usage_reserved_bo; > >>>> void *fw_vram_usage_va; > >>>> + > >>>> + /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ > >>>> + struct amdgpu_bo *sdma_access_bo; > >>>> + void *sdma_access_ptr; > >>>> }; > >>>> > >>>> struct amdgpu_copy_mem { > >>>> @@ -193,5 +197,4 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct > >>>> amdgpu_device *adev, struct ttm_tt *ttm, > >>>> int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int > >>>> mem_type); > >>>> > >>>> void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); > >>>> - > >>>> #endif > >>>> -- > >>>> 2.25.1