Re: [PATCH 2/9] drm/amdgpu: Add helper funcs for jpeg devcoredump

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 1/28/2025 5:06 PM, Sundararaju, Sathishkumar wrote:
> Hi Lijo,
> 
> 
> On 1/28/2025 3:04 PM, Lazar, Lijo wrote:
>>
>> On 1/28/2025 2:39 PM, Sathishkumar S wrote:
>>> Add devcoredump helper functions that can be reused for all jpeg
>>> versions.
>>>
>>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@xxxxxxx>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 59 ++++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h |  7 +++
>>>   2 files changed, 66 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/
>>> drm/amd/amdgpu/amdgpu_jpeg.c
>>> index b6d2eb049f54..70f1e0e88f4b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>> @@ -452,3 +452,62 @@ void amdgpu_jpeg_sysfs_reset_mask_fini(struct
>>> amdgpu_device *adev)
>>>               device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
>>>       }
>>>   }
>>> +
>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block,
>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32
>>> reg_count)
>>> +{
>>> +    struct amdgpu_device *adev = ip_block->adev;
>>> +    u32 inst_off, inst_id, is_powered;
>>> +    int i, j;
>>> +
>>> +    if (!adev->jpeg.ip_dump)
>>> +        return;
>>> +
>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>> +        if (adev->jpeg.harvest_config & (1 << i))
>>> +            continue;
>>> +
>>> +        inst_id = GET_INST(JPEG, i);
>>> +        inst_off = i * reg_count;
>>> +        /* check power status from UVD_JPEG_POWER_STATUS */
>>> +        adev->jpeg.ip_dump[inst_off] =
>>> RREG32(SOC15_REG_ENTRY_OFFSET_INST(reg[0],
>>> +                                          inst_id));
>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>> +
>>> +        if (is_powered)
>>> +            for (j = 1; j < reg_count; j++)
>>> +                adev->jpeg.ip_dump[inst_off + j] =
>>> +                    RREG32(SOC15_REG_ENTRY_OFFSET_INST(reg[j],
>>> +                                       inst_id));
>>> +    }
>>> +}
>>> +
>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>> struct drm_printer *p,
>>> +                const struct amdgpu_hwip_reg_entry *reg, u32 reg_count)
>>> +{
>>> +    struct amdgpu_device *adev = ip_block->adev;
>>> +    u32 inst_off, is_powered;
>>> +    int i, j;
>>> +
>>> +    if (!adev->jpeg.ip_dump)
>>> +        return;
>>> +
>>> +    drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>> +        if (adev->jpeg.harvest_config & (1 << i)) {
>>> +            drm_printf(p, "\nHarvested Instance:JPEG%d Skipping
>>> dump\n", i);
>>> +            continue;
>>> +        }
>>> +
>>> +        inst_off = i * reg_count;
>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>> +
>>> +        if (is_powered) {
>>> +            drm_printf(p, "Active Instance:JPEG%d\n", i);
>>> +            for (j = 0; j < reg_count; j++)
>>> +                drm_printf(p, "%-50s \t 0x%08x\n", reg[j].reg_name,
>>> +                       adev->jpeg.ip_dump[inst_off + j]);
>>> +        } else
>>> +            drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
>>> +    }
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/
>>> drm/amd/amdgpu/amdgpu_jpeg.h
>>> index eb2096dcf1a6..1d334f35d8a8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>> @@ -92,6 +92,8 @@
>>>           *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value;    \
>>>       } while (0)
>>>   +struct amdgpu_hwip_reg_entry;
>>> +
>>>   enum amdgpu_jpeg_caps {
>>>       AMDGPU_JPEG_RRMT_ENABLED,
>>>   };
>>> @@ -137,6 +139,7 @@ struct amdgpu_jpeg {
>>>       bool    indirect_sram;
>>>       uint32_t supported_reset;
>>>       uint32_t caps;
>>> +    u32 *ip_dump;
>> It's better to keep this at per jpeg instance level (amdgpu_jpeg_inst).
>> If the hang happens for a single jpeg ring, that will help rather than
>> dumping out reg settings for all instances.
> The devcoredump infra in amdgpu does not propagate the job itself or the
> hung instance, so there is no feasible way of doing this, meaning
> dumping only the affected instance.
> And if every instance is represented by an ip_block then I can implement
> this with a flag to handle per instance case, but I doubt it is, as seen
> from the below commit
> fba4761ca00f drm/amdgpu: partially revert VCN IP block instancing support
> 

Even if the API in its current form doesn't support per instance dump,
suggest to keept the reg_dump data struct in jpeg_instance. That way it
doesn't need any calculation to find the right offset etc.

On the other hand, a single copy may be maintained for the reg address
list if that is feasible. As I see the address calculation is dynamic
based on the instance id and it may not make sense to keep it in all
instances.

Thanks,
Lijo

> Regards,
> Sathish
>>
>> Thanks,
>> Lijo
>>
>>>   };
>>>     int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
>>> @@ -161,5 +164,9 @@ int amdgpu_jpeg_psp_update_sram(struct
>>> amdgpu_device *adev, int inst_idx,
>>>   void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
>>>   int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
>>>   void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block,
>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32
>>> reg_count);
>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>> struct drm_printer *p,
>>> +                const struct amdgpu_hwip_reg_entry *reg, u32
>>> reg_count);
>>>     #endif /*__AMDGPU_JPEG_H__*/
> 




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux