On 6/24/2024 5:19 PM, Christian König wrote: > Am 24.06.24 um 11:52 schrieb Lazar, Lijo: >> >> On 6/24/2024 3:08 PM, Christian König wrote: >>> Am 24.06.24 um 08:34 schrieb Lazar, Lijo: >>>> On 6/24/2024 12:01 PM, Vignesh Chander wrote: >>>>> correctly handle the case when trylock fails when gpu is >>>>> about to be reset by dropping the request instead of using mmio >>>>> >>>>> Signed-off-by: Vignesh Chander <Vignesh.Chander@xxxxxxx> >>>> Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> >>>> >>>> Thanks, >>>> Lijo >>>> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 >>>>> ++++++++++++---------- >>>>> 1 file changed, 21 insertions(+), 17 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> index a7ce8280b17ce7..3cfd24699d691d 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> @@ -613,10 +613,11 @@ uint32_t amdgpu_device_rreg(struct >>>>> amdgpu_device *adev, >>>>> if ((reg * 4) < adev->rmmio_size) { >>>>> if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && >>>>> - amdgpu_sriov_runtime(adev) && >>>>> - down_read_trylock(&adev->reset_domain->sem)) { >>>>> - ret = amdgpu_kiq_rreg(adev, reg, 0); >>>>> - up_read(&adev->reset_domain->sem); >>>>> + amdgpu_sriov_runtime(adev) { >>>>> + if (down_read_trylock(&adev->reset_domain->sem)) { >>>>> + ret = amdgpu_kiq_rreg(adev, reg, 0); >>>>> + up_read(&adev->reset_domain->sem); >>>>> + } >>> What has actually changed here? As far as I can see that isn't >>> functionally different to the existing code. >>> >> In earlier logic, if it fails to get the lock, it takes the 'else' path. >> The 'else' path is not meant for sriov/vf. > > Yeah, but the read or write is then just silently dropped. > > That can't be correct either. > These are void funcs. Moreover, the drops will happen if there is concurrent access from another thread while a reset is going on. That is expected and those accesses during a reset won't help anyway. Thanks, Lijo > Regards, > Christian. > >> >> Thanks, >> Lijo >> >>> Regards, >>> Christian. >>> >>>>> } else { >>>>> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); >>>>> } >>>>> @@ -681,10 +682,11 @@ uint32_t amdgpu_device_xcc_rreg(struct >>>>> amdgpu_device *adev, >>>>> &rlcg_flag)) { >>>>> ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, >>>>> GET_INST(GC, xcc_id)); >>>>> } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && >>>>> - amdgpu_sriov_runtime(adev) && >>>>> - down_read_trylock(&adev->reset_domain->sem)) { >>>>> - ret = amdgpu_kiq_rreg(adev, reg, xcc_id); >>>>> - up_read(&adev->reset_domain->sem); >>>>> + amdgpu_sriov_runtime(adev) { >>>>> + if (down_read_trylock(&adev->reset_domain->sem)) { >>>>> + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); >>>>> + up_read(&adev->reset_domain->sem); >>>>> + } >>>>> } else { >>>>> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); >>>>> } >>>>> @@ -740,10 +742,11 @@ void amdgpu_device_wreg(struct amdgpu_device >>>>> *adev, >>>>> if ((reg * 4) < adev->rmmio_size) { >>>>> if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && >>>>> - amdgpu_sriov_runtime(adev) && >>>>> - down_read_trylock(&adev->reset_domain->sem)) { >>>>> - amdgpu_kiq_wreg(adev, reg, v, 0); >>>>> - up_read(&adev->reset_domain->sem); >>>>> + amdgpu_sriov_runtime(adev) { >>>>> + if (down_read_trylock(&adev->reset_domain->sem)) { >>>>> + amdgpu_kiq_wreg(adev, reg, v, 0); >>>>> + up_read(&adev->reset_domain->sem); >>>>> + } >>>>> } else { >>>>> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); >>>>> } >>>>> @@ -812,11 +815,12 @@ void amdgpu_device_xcc_wreg(struct >>>>> amdgpu_device *adev, >>>>> &rlcg_flag)) { >>>>> amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, >>>>> GET_INST(GC, xcc_id)); >>>>> } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && >>>>> - amdgpu_sriov_runtime(adev) && >>>>> - down_read_trylock(&adev->reset_domain->sem)) { >>>>> - amdgpu_kiq_wreg(adev, reg, v, xcc_id); >>>>> - up_read(&adev->reset_domain->sem); >>>>> - } else { >>>>> + amdgpu_sriov_runtime(adev) { >>>>> + if (down_read_trylock(&adev->reset_domain->sem)) { >>>>> + amdgpu_kiq_wreg(adev, reg, v, xcc_id); >>>>> + up_read(&adev->reset_domain->sem); >>>>> + } >>>>> + } else { >>>>> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); >>>>> } >>>>> } else { >