Re: [PATCH v2] drm/amdgpu: Expose ras version & schema info

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 10/11/2023 2:55 PM, Asad Kamal wrote:
Expose ras table version & schema info to sysfs

v2: Updated schema to get poison support info
from ras context, removed asic specific checks

Signed-off-by: Asad Kamal <asad.kamal@xxxxxxx>

One nit inline. With/without that change,

Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx>

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 51 +++++++++++++++++++++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 ++
  2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9c8203e87859..cb9e48fb40d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1370,6 +1370,22 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
  	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
  }
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, version_attr);
+	return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, schema_attr);
+	return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
  static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
  {
  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1379,11 +1395,13 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
  				RAS_FS_NAME);
  }
-static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
  {
  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
  	struct attribute *attrs[] = {
  		&con->features_attr.attr,
+		&con->version_attr.attr,

+		&con->schema_attr.attr,
  		NULL
  	};
  	struct attribute_group group = {
@@ -1459,7 +1477,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
  	if (amdgpu_bad_page_threshold != 0)
  		amdgpu_ras_sysfs_remove_bad_page_node(adev);
- amdgpu_ras_sysfs_remove_feature_node(adev);
+	amdgpu_ras_sysfs_remove_dev_attr_node(adev);
return 0;
  }
@@ -1582,6 +1600,10 @@ static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
  		amdgpu_ras_sysfs_badpages_read, NULL, 0);
  static DEVICE_ATTR(features, S_IRUGO,
  		amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+		amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+		amdgpu_ras_sysfs_schema_show, NULL);
  static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
  {
  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1590,6 +1612,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
  	};
  	struct attribute *attrs[] = {
  		&con->features_attr.attr,
+		&con->version_attr.attr,
+		&con->schema_attr.attr,
  		NULL
  	};
  	struct bin_attribute *bin_attrs[] = {
@@ -1598,11 +1622,20 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
  	};
  	int r;
+ group.attrs = attrs;
+
  	/* add features entry */
  	con->features_attr = dev_attr_features;
-	group.attrs = attrs;
  	sysfs_attr_init(attrs[0]);
+ /* add version entry */
+	con->version_attr = dev_attr_version;
+	sysfs_attr_init(attrs[1]);
+
+	/* add schema entry */
+	con->schema_attr = dev_attr_schema;
+	sysfs_attr_init(attrs[2]);
+
  	if (amdgpu_bad_page_threshold != 0) {
  		/* add bad_page_features entry */
  		bin_attr_gpu_vram_bad_pages.private = NULL;
@@ -2594,6 +2627,14 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
  	}
  }
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
+{
+	return  (amdgpu_ras_is_poison_mode_supported(adev) << (ffs(AMDGPU_RAS_ERROR__POISON) - 1)) |

It's simpler and more readable with a ternary operator.

Thanks,
Lijo

+			AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
+			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
+			AMDGPU_RAS_ERROR__PARITY;
+}
+
  int amdgpu_ras_init(struct amdgpu_device *adev)
  {
  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -2636,6 +2677,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
con->update_channel_flag = false;
  	con->features = 0;
+	con->schema = 0;
  	INIT_LIST_HEAD(&con->head);
  	/* Might need get this flag from vbios. */
  	con->flags = RAS_DEFAULT_FLAGS;
@@ -2691,6 +2733,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_query_poison_mode(adev); + /* Get RAS schema for particular SOC */
+	con->schema = amdgpu_get_ras_schema(adev);
+
  	if (amdgpu_ras_fs_init(adev)) {
  		r = -EINVAL;
  		goto release_con;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 7999d202c9bc..728f98c6fc1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -389,9 +389,12 @@ struct amdgpu_ras {
  	/* ras infrastructure */
  	/* for ras itself. */
  	uint32_t features;
+	uint32_t schema;
  	struct list_head head;
  	/* sysfs */
  	struct device_attribute features_attr;
+	struct device_attribute version_attr;
+	struct device_attribute schema_attr;
  	struct bin_attribute badpages_attr;
  	struct dentry *de_ras_eeprom_table;
  	/* block array */



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux