[AMD Official Use Only] The series is: Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx> Please make sure basic RAS tests are successful before submit the series. > -----Original Message----- > From: Chai, Thomas <YiPeng.Chai@xxxxxxx> > Sent: Wednesday, December 29, 2021 2:32 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Chai, Thomas <YiPeng.Chai@xxxxxxx>; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Clements, > John <John.Clements@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx> > Subject: [PATCH V3 01/12] drm/amdgpu: Unify ras block interface for each ras > block > > 1. Define unified ops interface for each block. > 2. Add ras_block_match function pointer in ops interface, each ras block can > customize specail match function to identify itself. > 3. Add amdgpu_ras_block_match_default new function. If a ras block doesn't > define .ras_block_match, default execute amdgpu_ras_block_match_default to > identify this ras block. > 4. Define unified basic ras block data for each ras block. > 5. Create dedicated amdgpu device ras block link list to manage all of the ras > blocks. > 6. Add amdgpu_ras_register_ras_block new function interface for each ras block > to register itself to ras controlling block. > > Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 46 ++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 28 +++++++++++++ > 4 files changed, 78 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index db1505455761..eddf230856e2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1151,6 +1151,8 @@ struct amdgpu_device { > bool barrier_has_auto_waitcnt; > > struct amdgpu_reset_control *reset_cntl; > + > + struct list_head ras_list; > }; > > static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 73ec46140d68..0980396ee709 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3578,6 +3578,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, > > INIT_LIST_HEAD(&adev->reset_list); > > + INIT_LIST_HEAD(&adev->ras_list); > + > INIT_DELAYED_WORK(&adev->delayed_init_work, > amdgpu_device_delayed_init_work_handler); > INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 90f0db3b4f65..9dd698354e04 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -862,6 +862,40 @@ static int amdgpu_ras_enable_all_features(struct > amdgpu_device *adev, } > /* feature ctl end */ > > +int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object* > +block_obj, enum amdgpu_ras_block block) { > + if(!block_obj) > + return -EINVAL; > + > + if (block_obj->block == block) > + return 0; > + > + return -EINVAL; > +} > + > +static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct > amdgpu_device *adev, > + enum amdgpu_ras_block block, > uint32_t sub_block_index) { > + struct amdgpu_ras_block_object *obj, *tmp; > + > + if (block >= AMDGPU_RAS_BLOCK__LAST) > + return NULL; > + > + if (!amdgpu_ras_is_supported(adev, block)) > + return NULL; > + > + list_for_each_entry_safe(obj, tmp, &adev->ras_list, node) { > + if (obj->ras_block_match) { > + if (obj->ras_block_match(obj, block, sub_block_index) > == 0) > + return obj; > + } else { > + if (amdgpu_ras_block_match_default(obj, block) == 0) > + return obj; > + } > + } > + > + return NULL; > +} > > void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, > struct ras_common_if *ras_block, @@ - > 2739,3 +2773,15 @@ static void > amdgpu_register_bad_pages_mca_notifier(void) > } > } > #endif > +/* Register each ip ras block into amdgpu ras */ int > +amdgpu_ras_register_ras_block(struct amdgpu_device *adev, > + struct amdgpu_ras_block_object* ras_block_obj) { > + if (!adev || !ras_block_obj) > + return -EINVAL; > + > + INIT_LIST_HEAD(&ras_block_obj->node); > + list_add_tail(&ras_block_obj->node, &adev->ras_list); > + > + return 0; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index cdd0010a5389..9dbe8d49b891 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -469,6 +469,33 @@ struct ras_debug_if { > }; > int op; > }; > + > +struct amdgpu_ras_block_object { > + /* block name */ > + char name[32]; > + > + enum amdgpu_ras_block block; > + > + uint32_t sub_block_index; > + > + /* ras block link */ > + struct list_head node; > + > + int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, > enum amdgpu_ras_block block, uint32_t sub_block_index); > + int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); > + void (*ras_fini)(struct amdgpu_device *adev); > + const struct amdgpu_ras_block_hw_ops *hw_ops; }; > + > +struct amdgpu_ras_block_hw_ops { > + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); > + void (*query_ras_error_count)(struct amdgpu_device *adev,void > *ras_error_status); > + void (*query_ras_error_status)(struct amdgpu_device *adev); > + void (*query_ras_error_address)(struct amdgpu_device *adev, void > *ras_error_status); > + void (*reset_ras_error_count)(struct amdgpu_device *adev); > + void (*reset_ras_error_status)(struct amdgpu_device *adev); }; > + > /* work flow > * vbios > * 1: ras feature enable (enabled by default) @@ -652,4 +679,5 @@ const char > *get_ras_block_str(struct ras_common_if *ras_block); > > bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); > > +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct > +amdgpu_ras_block_object* ras_block_obj); > #endif > -- > 2.25.1