[AMD Official Use Only - AMD Internal Distribution Only] The series is: Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx> > -----Original Message----- > From: Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx> > Sent: Wednesday, July 3, 2024 5:03 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Zhou1, Tao > <Tao.Zhou1@xxxxxxx> > Subject: [PATCH 4/4] drm/amdgpu: add ras event state device attribute support > > add amdgpu ras 'event_state' sysfs device attribute support > > Signed-off-by: Yang Wang <kevinyang.wang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 56 +++++++++++++++++++++++- > - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 7 +++- > 2 files changed, 58 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 11f8c37a97ef..d84e4f841ecc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1731,6 +1731,39 @@ static ssize_t > amdgpu_ras_sysfs_schema_show(struct device *dev, > return sysfs_emit(buf, "schema: 0x%x\n", con->schema); } > > +static struct { > + enum ras_event_type type; > + const char *name; > +} dump_event[] = { > + {RAS_EVENT_TYPE_ISR, "Fault Error"}, > + {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"}, > + {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"}, }; > + > +static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev, > + struct device_attribute *attr, > char *buf) { > + struct amdgpu_ras *con = > + container_of(attr, struct amdgpu_ras, event_state_attr); > + struct ras_event_manager *event_mgr = con->event_mgr; > + struct ras_event_state *event_state; > + int i, size = 0; > + > + if (!event_mgr) > + return -EINVAL; > + > + size += sysfs_emit_at(buf, size, "current seqno: %llu\n", > atomic64_read(&event_mgr->seqno)); > + for (i = 0; i < ARRAY_SIZE(dump_event); i++) { > + event_state = &event_mgr->event_state[dump_event[i].type]; > + size += sysfs_emit_at(buf, size, "%s : count:%llu, > last_seqno:%llu\n", > + dump_event[i].name, > + atomic64_read(&event_state->count), > + event_state->last_seqno); > + } > + > + return (ssize_t)size; > +} > + > static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device > *adev) { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1748,6 > +1781,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct > amdgpu_device *adev) > &con->features_attr.attr, > &con->version_attr.attr, > &con->schema_attr.attr, > + &con->event_state_attr.attr, > NULL > }; > struct attribute_group group = { > @@ -1980,6 +2014,8 @@ static DEVICE_ATTR(version, 0444, > amdgpu_ras_sysfs_version_show, NULL); static > DEVICE_ATTR(schema, 0444, > amdgpu_ras_sysfs_schema_show, NULL); > +static DEVICE_ATTR(event_state, 0444, > + amdgpu_ras_sysfs_event_state_show, NULL); > static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1990,6 > +2026,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) > &con->features_attr.attr, > &con->version_attr.attr, > &con->schema_attr.attr, > + &con->event_state_attr.attr, > NULL > }; > struct bin_attribute *bin_attrs[] = { > @@ -2012,6 +2049,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device > *adev) > con->schema_attr = dev_attr_schema; > sysfs_attr_init(attrs[2]); > > + /* add event_state entry */ > + con->event_state_attr = dev_attr_event_state; > + sysfs_attr_init(attrs[3]); > + > if (amdgpu_bad_page_threshold != 0) { > /* add bad_page_features entry */ > bin_attr_gpu_vram_bad_pages.private = NULL; @@ -3440,13 > +3481,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev) > > static void ras_event_mgr_init(struct ras_event_manager *mgr) { > + struct ras_event_state *event_state; > int i; > > memset(mgr, 0, sizeof(*mgr)); > atomic64_set(&mgr->seqno, 0); > > - for (i = 0; i < ARRAY_SIZE(mgr->last_seqno); i++) > - mgr->last_seqno[i] = RAS_EVENT_INVALID_ID; > + for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) { > + event_state = &mgr->event_state[i]; > + event_state->last_seqno = RAS_EVENT_INVALID_ID; > + atomic64_set(&event_state->count, 0); > + } > } > > static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) @@ - > 3960,6 +4005,7 @@ static struct ras_event_manager* > __get_ras_event_mgr(struct amdgpu_device *adev) int > amdgpu_ras_mark_ras_event(struct amdgpu_device *adev, enum > ras_event_type type) { > struct ras_event_manager *event_mgr; > + struct ras_event_state *event_state; > > if (type >= RAS_EVENT_TYPE_COUNT) > return -EINVAL; > @@ -3968,7 +4014,9 @@ int amdgpu_ras_mark_ras_event(struct > amdgpu_device *adev, enum ras_event_type ty > if (!event_mgr) > return -EINVAL; > > - event_mgr->last_seqno[type] = atomic64_inc_return(&event_mgr- > >seqno); > + event_state = &event_mgr->event_state[type]; > + event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno); > + atomic64_inc(&event_state->count); > > return 0; > } > @@ -3989,7 +4037,7 @@ u64 amdgpu_ras_acquire_event_id(struct > amdgpu_device *adev, enum ras_event_type > if (!event_mgr) > return RAS_EVENT_INVALID_ID; > > - id = event_mgr->last_seqno[type]; > + id = event_mgr->event_state[type].last_seqno; > break; > case RAS_EVENT_TYPE_INVALID: > default: > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 6086da67fa4e..189e2bf53a44 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -436,10 +436,14 @@ enum ras_event_type { > RAS_EVENT_TYPE_POISON_CONSUMPTION, > RAS_EVENT_TYPE_COUNT, > }; > +struct ras_event_state { > + u64 last_seqno; > + atomic64_t count; > +}; > > struct ras_event_manager { > atomic64_t seqno; > - u64 last_seqno[RAS_EVENT_TYPE_COUNT]; > + struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT]; > }; > > struct ras_event_id { > @@ -493,6 +497,7 @@ struct amdgpu_ras { > struct device_attribute features_attr; > struct device_attribute version_attr; > struct device_attribute schema_attr; > + struct device_attribute event_state_attr; > struct bin_attribute badpages_attr; > struct dentry *de_ras_eeprom_table; > /* block array */ > -- > 2.34.1