[AMD Official Use Only]
<comments inline>
From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx>
Sent: Tuesday, July 13, 2021 3:11 PM To: Wang, Kevin(Yang) <Kevin1.Wang@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx> Cc: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Min, Frank <Frank.Min@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Am 13.07.21 um 05:23 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible > 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is > enabled. > > Signed-off-by: Kevin Wang <kevin1.wang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++------- > 1 file changed, 89 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 2aa2eb5de37a..8f6f605bc459 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, > return ttm_bo_eviction_valuable(bo, place); > } > > +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos, > + uint32_t *value, bool write) Please drop the _vram_ and _align_ part from the name. Just amdgpu_device_mm_access. [kevin]: I will correct it in next patch.
> +{ > + unsigned long flags; > + > + BUG_ON(!IS_ALIGNED(pos, 4)); > + > + spin_lock_irqsave(&adev->mmio_idx_lock, flags); > + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); > + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); > + if (write) > + WREG32_NO_KIQ(mmMM_DATA, *value); > + else > + *value = RREG32_NO_KIQ(mmMM_DATA); > + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > +} That should still be in amdgpu_device.c and you completely messed up the drm_dev_enter()/drm_dev_exit() annotation. Looks like you are working on an old branch where that is not yet merged? [kevin]: yes, I'm working on amd-staging-drm-next branch, the following patch (from drm-next-misc) is not merged into this branch.
This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.
v5:
Protect more places wher memcopy_to/form_io takes place
Protect IB submissions
v6: Switch to !drm_dev_enter instead of scoping entire code
with brackets.
v7:
Drop guard of HW ring commands emission protection since they
are in GART and not in MMIO.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>
> + > +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, > + void *buf, size_t size, bool write) > +{ > + while (size) { > + uint64_t aligned_pos = ALIGN_DOWN(pos, 4); > + uint64_t bytes = 4 - (pos & 0x3); > + uint32_t shift = (pos & 0x3) * 8; > + uint32_t mask = 0xffffffff << shift; > + uint32_t value = 0; > + > + if (size < bytes) { > + mask &= 0xffffffff >> (bytes - size) * 8; > + bytes = size; > + } > + > + if (mask != 0xffffffff) { > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false); > + if (write) { > + value &= ~mask; > + value |= (*(uint32_t *)buf << shift) & mask; > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true); > + } else { > + value = (value & mask) >> shift; > + memcpy(buf, &value, bytes); > + } > + } else { > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write); > + } > + > + pos += bytes; > + buf += bytes; > + size -= bytes; > + } > +} > + > +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos, > + void *buf, size_t size, bool write) > +{ > + uint64_t last; > + > +#ifdef CONFIG_64BIT > + last = min(pos + size, adev->gmc.visible_vram_size); > + if (last > pos) { > + void __iomem *addr = adev->mman.aper_base_kaddr + pos; > + size_t count = last - pos; > + > + if (write) { > + memcpy_toio(addr, buf, count); > + mb(); > + amdgpu_device_flush_hdp(adev, NULL); > + } else { > + amdgpu_device_invalidate_hdp(adev, NULL); > + mb(); > + memcpy_fromio(buf, addr, count); > + } > + > + if (count == size) > + return; > + > + pos += count; > + buf += count; > + size -= count; > + } > +#endif I would put this as a separate function into amdgpu_device.c. But all of this should only be the second step since we need a much smaller patch for backporting first. > + > + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write); > +} > + > /** > * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. > * > @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); > struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > struct amdgpu_res_cursor cursor; > - unsigned long flags; > - uint32_t value = 0; > int ret = 0; > > if (bo->mem.mem_type != TTM_PL_VRAM) > @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > > amdgpu_res_first(&bo->mem, offset, len, &cursor); > while (cursor.remaining) { > - uint64_t aligned_pos = cursor.start & ~(uint64_t)3; > - uint64_t bytes = 4 - (cursor.start & 3); > - uint32_t shift = (cursor.start & 3) * 8; > - uint32_t mask = 0xffffffff << shift; > - > - if (cursor.size < bytes) { > - mask &= 0xffffffff >> (bytes - cursor.size) * 8; > - bytes = cursor.size; > - } > - > - if (mask != 0xffffffff) { > - spin_lock_irqsave(&adev->mmio_idx_lock, flags); > - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); > - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); > - value = RREG32_NO_KIQ(mmMM_DATA); > - if (write) { > - value &= ~mask; > - value |= (*(uint32_t *)buf << shift) & mask; > - WREG32_NO_KIQ(mmMM_DATA, value); > - } > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > - if (!write) { > - value = (value & mask) >> shift; > - memcpy(buf, &value, bytes); > - } This here is the problematic part and should use amdgpu_ttm_vram_access() instead. That should be implemented first since we might need to backport that. Regards, Christian. > - } else { > - bytes = cursor.size & ~0x3ULL; > - amdgpu_device_vram_access(adev, cursor.start, > - (uint32_t *)buf, bytes, > - write); > - } > - > - ret += bytes; > - buf = (uint8_t *)buf + bytes; > - amdgpu_res_next(&cursor, bytes); > + amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write); > + ret += cursor.size; > + buf += cursor.size; > + amdgpu_res_next(&cursor, cursor.size); > } > > return ret; |
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx