Add
@Min, Frank for the review.
Regards,
Hawking
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx>
On Behalf Of Wang, Kevin(Yang)
Sent: Monday, June 28, 2021 10:44
To: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
[AMD Official Use Only]
[AMD Official Use Only]
On 6/25/2021 8:54 AM, Kevin Wang wrote:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>
> Signed-off-by: Kevin Wang <kevin1.wang@xxxxxxx>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 42 ++-----------
> 3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
> int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>
> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> - uint32_t *buf, size_t size, bool write);
> + void *buf, size_t size, bool write);
> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
> uint32_t reg, uint32_t acc_flags);
> void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
> amdgpu_acpi_is_power_shift_control_supported());
> }
>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> + uint32_t *value, bool write)
> +{
> + BUG_ON(!IS_ALIGNED(pos, 4));
> +
> + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> + if (write)
> + WREG32_NO_KIQ(mmMM_DATA, *value);
> + else
> + *value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + while (size) {
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> + uint64_t bytes = 4 - (pos & 0x3);
> + uint32_t shift = (pos & 0x3) * 8;
> + uint32_t mask = 0xffffffff << shift;
> + uint32_t value = 0;
> +
> + if (size < bytes) {
> + mask &= 0xffffffff >> (bytes - size) * 8;
> + bytes = size;
> + }
> +
> + if (mask != 0xffffffff) {
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> + if (write) {
> + value &= ~mask;
> + value |= (*(uint32_t *)buf << shift) & mask;
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> + } else {
> + value = (value & mask) >> shift;
> + memcpy(buf, &value, bytes);
> + }
> + } else {
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> + }
> +
> + pos += bytes;
> + buf += bytes;
> + size -= bytes;
> + }
> +}
> +
> /*
> * VRAM access helper functions
> */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
> * @write: true - write to vram, otherwise - read from vram
> */
> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> - uint32_t *buf, size_t size, bool write)
> + void *buf, size_t size, bool write)
> {
> unsigned long flags;
> - uint32_t hi = ~0;
> uint64_t last;
>
> -
> #ifdef CONFIG_64BIT
> last = min(pos + size, adev->gmc.visible_vram_size);
> if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> return;
>
> pos += count;
> - buf += count / 4;
> + buf += count;
> size -= count;
> }
> #endif
> -
> spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> - for (last = pos + size; pos < last; pos += 4) {
> - uint32_t tmp = pos >> 31;
> -
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> - if (tmp != hi) {
> - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> - hi = tmp;
> - }
> - if (write)
> - WREG32_NO_KIQ(mmMM_DATA, *buf++);
> - else
> - *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> - }
> + amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
> spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> struct amdgpu_res_cursor cursor;
> - unsigned long flags;
> - uint32_t value = 0;
> int ret = 0;
>
> if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);
> while (cursor.remaining) {
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> - uint64_t bytes = 4 - (cursor.start & 3);
> - uint32_t shift = (cursor.start & 3) * 8;
> - uint32_t mask = 0xffffffff << shift;
> -
> - if (cursor.size < bytes) {
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> - bytes = cursor.size;
> - }
> + amdgpu_device_vram_access(adev, cursor.start,
> + buf, cursor.size,
> + write);
>
> - if (mask != 0xffffffff) {
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
The new logic seems to skip this mmio_idx_lock for accessing index/data
pair ragisters. BTW, where is the visible memory aperture check?
Thanks
Lijo
the mmio_idx_lock lock is in amdgpu_device_vram_access() function.
this patch is extending the amdgpu_device_vram_access() function to support un-aligned address support.
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> - value = RREG32_NO_KIQ(mmMM_DATA);
> - if (write) {
> - value &= ~mask;
> - value |= (*(uint32_t *)buf << shift) & mask;
> - WREG32_NO_KIQ(mmMM_DATA, value);
> - }
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> - if (!write) {
> - value = (value & mask) >> shift;
> - memcpy(buf, &value, bytes);
> - }
> - } else {
> - bytes = cursor.size & ~0x3ULL;
> - amdgpu_device_vram_access(adev, cursor.start,
> - (uint32_t *)buf, bytes,
> - write);
> - }
> -
> - ret += bytes;
> - buf = (uint8_t *)buf + bytes;
> - amdgpu_res_next(&cursor, bytes);
> + ret += cursor.size;
> + buf += cursor.size;
> + amdgpu_res_next(&cursor, cursor.size);
> }
>
> return ret;
>
|