Re: [RFC v2 01/15] drm/amdgpu: add helper functions to track status for ras manager

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 1/13/2025 7:12 AM, Jiang Liu wrote:
> Add helper functions to track status for ras manager and ip blocks.
> 
> Signed-off-by: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 38 +++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++++++
>  3 files changed, 85 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5e55a44f9eef..f0f773659faf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -377,12 +377,28 @@ int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
>  
>  #define AMDGPU_MAX_IP_NUM 16
>  
> +enum amdgpu_marker {
> +	// Markers for IRQs, used for both ip blocks and ras blocks.
> +	AMDGPU_MARKER_IRQ0 = 32,
> +	AMDGPU_MARKER_IRQ1,
> +	AMDGPU_MARKER_IRQ2,
> +	AMDGPU_MARKER_IRQ3,
> +	AMDGPU_MARKER_IRQ4,
> +	AMDGPU_MARKER_IRQ5,
> +	AMDGPU_MARKER_IRQ6,
> +	AMDGPU_MARKER_IRQ7,
> +	AMDGPU_MARKER_IRQ_MAX = 63,
> +};
> +
> +#define AMDGPU_MARKER_IRQ(idx)		(AMDGPU_MARKER_IRQ0 + (idx))
> +
>  struct amdgpu_ip_block_status {
>  	bool valid;
>  	bool sw;
>  	bool hw;
>  	bool late_initialized;
>  	bool hang;
> +	uint64_t markers;
>  };
>  

This fine grained levels maintained at IP layer doesn't look like a
proper solution. It's either IP or RAS block has the required IRQs
enabled or disabled. Unwinding them needs to be tracked at IRQ object
layer and not here.

Thanks,
Lijo

>  struct amdgpu_ip_block_version {
> @@ -410,6 +426,28 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
>  int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
>  			       const struct amdgpu_ip_block_version *ip_block_version);
>  
> +static inline void amdgpu_ip_block_set_marker(struct amdgpu_ip_block *ip_block,
> +					      enum amdgpu_marker marker)
> +{
> +	WARN_ON(marker > 63);
> +	WARN_ON(ip_block->status.markers & (0x1ull << marker));
> +	ip_block->status.markers |= 0x1ull << (int)marker;
> +}
> +
> +static inline bool amdgpu_ip_block_test_and_clear_marker(struct amdgpu_ip_block *ip_block,
> +							 enum amdgpu_marker marker)
> +{
> +	bool set = false;
> +	uint64_t value = 0x1ull << (int)marker;
> +
> +	if ((ip_block->status.markers & value) != 0) {
> +		ip_block->status.markers &= ~value;
> +		set = true;
> +	}
> +
> +	return set;
> +}
> +
>  /*
>   * BIOS.
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index f0924aa3f4e4..5e19d820ab34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -5207,3 +5207,40 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
>  
>  	return con->is_rma;
>  }
> +
> +bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
> +			    struct ras_common_if *head, int marker)
> +{
> +	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
> +
> +	if (obj && obj->markers & (0x1ull << marker))
> +		return true;
> +
> +	return false;
> +}
> +
> +void amdgpu_ras_set_marker(struct amdgpu_device *adev,
> +			   struct ras_common_if *head, int marker)
> +{
> +	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
> +
> +	WARN_ON(marker > 63);
> +	WARN_ON(obj->markers & (0x1ull << marker));
> +	if (obj)
> +		obj->markers |= 0x1ull << marker;
> +}
> +
> +bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
> +				      struct ras_common_if *head, int marker)
> +{
> +	bool set = false;
> +	uint64_t value = 0x1ull << marker;
> +	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
> +
> +	if (obj && (obj->markers & value) != 0) {
> +		obj->markers &= ~value;
> +		set = true;
> +	}
> +
> +	return set;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 82db986c36a0..35881087b17b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -634,6 +634,8 @@ struct ras_manager {
>  	struct ras_common_if head;
>  	/* reference count */
>  	int use;
> +	/* Flags for status tracking */
> +	uint64_t markers;
>  	/* ras block link */
>  	struct list_head node;
>  	/* the device */
> @@ -977,4 +979,12 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
>  				const char *fmt, ...);
>  
>  bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
> +
> +bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
> +			    struct ras_common_if *head, int marker);
> +void amdgpu_ras_set_marker(struct amdgpu_device *adev,
> +			   struct ras_common_if *head, int marker);
> +bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
> +				      struct ras_common_if *head,
> +				      int marker);
>  #endif




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux