Tip: ffs(AMDGPU_RAS_ERROR__POISON) - 1) == __ffs(AMDGPU_RAS_ERROR__POISON) if you want to get a value which start from 0, you can use __ffs() directly. Best Regards, Kevin -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Lazar, Lijo Sent: Wednesday, October 11, 2023 5:58 PM To: Kamal, Asad <Asad.Kamal@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Ma, Le <Le.Ma@xxxxxxx>; Zhang, Morris <Shiwu.Zhang@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: Re: [PATCH v2] drm/amdgpu: Expose ras version & schema info On 10/11/2023 2:55 PM, Asad Kamal wrote: > Expose ras table version & schema info to sysfs > > v2: Updated schema to get poison support info from ras context, > removed asic specific checks > > Signed-off-by: Asad Kamal <asad.kamal@xxxxxxx> One nit inline. With/without that change, Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 51 +++++++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++ > 2 files changed, 51 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 9c8203e87859..cb9e48fb40d2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1370,6 +1370,22 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, > return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); > } > > +static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev, > + struct device_attribute *attr, char *buf) { > + struct amdgpu_ras *con = > + container_of(attr, struct amdgpu_ras, version_attr); > + return sysfs_emit(buf, "table version: 0x%x\n", > +con->eeprom_control.tbl_hdr.version); > +} > + > +static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, > + struct device_attribute *attr, char *buf) { > + struct amdgpu_ras *con = > + container_of(attr, struct amdgpu_ras, schema_attr); > + return sysfs_emit(buf, "schema: 0x%x\n", con->schema); } > + > static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1379,11 > +1395,13 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) > RAS_FS_NAME); > } > > -static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device > *adev) > +static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device > +*adev) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > struct attribute *attrs[] = { > &con->features_attr.attr, > + &con->version_attr.attr, > + &con->schema_attr.attr, > NULL > }; > struct attribute_group group = { > @@ -1459,7 +1477,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) > if (amdgpu_bad_page_threshold != 0) > amdgpu_ras_sysfs_remove_bad_page_node(adev); > > - amdgpu_ras_sysfs_remove_feature_node(adev); > + amdgpu_ras_sysfs_remove_dev_attr_node(adev); > > return 0; > } > @@ -1582,6 +1600,10 @@ static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, > amdgpu_ras_sysfs_badpages_read, NULL, 0); > static DEVICE_ATTR(features, S_IRUGO, > amdgpu_ras_sysfs_features_read, NULL); > +static DEVICE_ATTR(version, 0444, > + amdgpu_ras_sysfs_version_show, NULL); static DEVICE_ATTR(schema, > +0444, > + amdgpu_ras_sysfs_schema_show, NULL); > static int amdgpu_ras_fs_init(struct amdgpu_device *adev) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1590,6 > +1612,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) > }; > struct attribute *attrs[] = { > &con->features_attr.attr, > + &con->version_attr.attr, > + &con->schema_attr.attr, > NULL > }; > struct bin_attribute *bin_attrs[] = { @@ -1598,11 +1622,20 @@ > static int amdgpu_ras_fs_init(struct amdgpu_device *adev) > }; > int r; > > + group.attrs = attrs; > + > /* add features entry */ > con->features_attr = dev_attr_features; > - group.attrs = attrs; > sysfs_attr_init(attrs[0]); > > + /* add version entry */ > + con->version_attr = dev_attr_version; > + sysfs_attr_init(attrs[1]); > + > + /* add schema entry */ > + con->schema_attr = dev_attr_schema; > + sysfs_attr_init(attrs[2]); > + > if (amdgpu_bad_page_threshold != 0) { > /* add bad_page_features entry */ > bin_attr_gpu_vram_bad_pages.private = NULL; @@ -2594,6 +2627,14 @@ > static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) > } > } > > +static int amdgpu_get_ras_schema(struct amdgpu_device *adev) { > + return (amdgpu_ras_is_poison_mode_supported(adev) << > +(ffs(AMDGPU_RAS_ERROR__POISON) - 1)) | It's simpler and more readable with a ternary operator. Thanks, Lijo > + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | > + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | > + AMDGPU_RAS_ERROR__PARITY; > +} > + > int amdgpu_ras_init(struct amdgpu_device *adev) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2636,6 > +2677,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > > con->update_channel_flag = false; > con->features = 0; > + con->schema = 0; > INIT_LIST_HEAD(&con->head); > /* Might need get this flag from vbios. */ > con->flags = RAS_DEFAULT_FLAGS; > @@ -2691,6 +2733,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > > amdgpu_ras_query_poison_mode(adev); > > + /* Get RAS schema for particular SOC */ > + con->schema = amdgpu_get_ras_schema(adev); > + > if (amdgpu_ras_fs_init(adev)) { > r = -EINVAL; > goto release_con; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 7999d202c9bc..728f98c6fc1c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -389,9 +389,12 @@ struct amdgpu_ras { > /* ras infrastructure */ > /* for ras itself. */ > uint32_t features; > + uint32_t schema; > struct list_head head; > /* sysfs */ > struct device_attribute features_attr; > + struct device_attribute version_attr; > + struct device_attribute schema_attr; > struct bin_attribute badpages_attr; > struct dentry *de_ras_eeprom_table; > /* block array */