Expose ras table version & schema info to sysfs Signed-off-by: Asad Kamal <asad.kamal@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 58 +++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 11 +++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9c8203e87859..362e32350af7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1370,6 +1370,22 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); } +static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, version_attr); + return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version); +} + +static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, schema_attr); + return sysfs_emit(buf, "schema: 0x%x\n", con->schema); +} + static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1379,11 +1395,13 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) RAS_FS_NAME); } -static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) +static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct attribute *attrs[] = { &con->features_attr.attr, + &con->version_attr.attr, + &con->schema_attr.attr, NULL }; struct attribute_group group = { @@ -1459,7 +1477,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) if (amdgpu_bad_page_threshold != 0) amdgpu_ras_sysfs_remove_bad_page_node(adev); - amdgpu_ras_sysfs_remove_feature_node(adev); + amdgpu_ras_sysfs_remove_dev_attr_node(adev); return 0; } @@ -1582,6 +1600,10 @@ static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, amdgpu_ras_sysfs_badpages_read, NULL, 0); static DEVICE_ATTR(features, S_IRUGO, amdgpu_ras_sysfs_features_read, NULL); +static DEVICE_ATTR(version, 0444, + amdgpu_ras_sysfs_version_show, NULL); +static DEVICE_ATTR(schema, 0444, + amdgpu_ras_sysfs_schema_show, NULL); static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1590,6 +1612,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) }; struct attribute *attrs[] = { &con->features_attr.attr, + &con->version_attr.attr, + &con->schema_attr.attr, NULL }; struct bin_attribute *bin_attrs[] = { @@ -1598,11 +1622,20 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) }; int r; + group.attrs = attrs; + /* add features entry */ con->features_attr = dev_attr_features; - group.attrs = attrs; sysfs_attr_init(attrs[0]); + /* add version entry */ + con->version_attr = dev_attr_version; + sysfs_attr_init(attrs[1]); + + /* add schema entry */ + con->schema_attr = dev_attr_schema; + sysfs_attr_init(attrs[2]); + if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ bin_attr_gpu_vram_bad_pages.private = NULL; @@ -2594,6 +2627,21 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) } } +static int amdgpu_get_ras_schema(struct amdgpu_device *adev) +{ + if (amdgpu_ras_asic_supported(adev)) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 6): + return AMDGPU_RAS_POISON_SUPPORTED | AMDGPU_RAS_SBC_SUPPORTED | + AMDGPU_RAS_DED_SUPPORTED | AMDGPU_RAS_PARITY_SUPPORTED; + default: + return 0; + } + + } else + return 0; +} + int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2636,10 +2684,14 @@ int amdgpu_ras_init(struct amdgpu_device *adev) con->update_channel_flag = false; con->features = 0; + con->schema = 0; INIT_LIST_HEAD(&con->head); /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; + /* Get RAS schema for particular SOC */ + con->schema = amdgpu_get_ras_schema(adev); + /* initialize nbio ras function ahead of any other * ras functions so hardware fatal error interrupt * can be enabled as early as possible */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7999d202c9bc..ebd53736606c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -38,6 +38,14 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_INST_MASK 0xfffff000 #define AMDGPU_RAS_INST_SHIFT 0xc +/* Bit mask for RAS error types, SBC=Single bit correctable error + * DED = Double Bit error detection + */ +#define AMDGPU_RAS_PARITY_SUPPORTED BIT(0) +#define AMDGPU_RAS_SBC_SUPPORTED BIT(1) +#define AMDGPU_RAS_DED_SUPPORTED BIT(2) +#define AMDGPU_RAS_POISON_SUPPORTED BIT(3) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -389,9 +397,12 @@ struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ uint32_t features; + uint32_t schema; struct list_head head; /* sysfs */ struct device_attribute features_attr; + struct device_attribute version_attr; + struct device_attribute schema_attr; struct bin_attribute badpages_attr; struct dentry *de_ras_eeprom_table; /* block array */ -- 2.42.0