[AMD Official Use Only - General] > -----Original Message----- > From: Stanley.Yang <Stanley.Yang@xxxxxxx> > Sent: Monday, June 19, 2023 9:50 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; > Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx> > Cc: Yang, Stanley <Stanley.Yang@xxxxxxx> > Subject: [PATCH Review V2 1/1] drm/amdgpu: Remove redundant poison > consumption handler function > > The function callback handle_poison_consumption and callback function > poison_consumption_handler are almost same to handle poison consumption, > remove poison_consumption_handler. > > Changed from V1: > Add handle poison consumption function for VCN2.6, VCN4.0, > JPEG2.6 and JPEG4.0, return false when handle VCN/JPEGP poison > consumption. > > Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 --------- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ---- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 +++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 12 +++++++++--- > drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 9 +++++++++ > 9 files changed, 50 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a33d4bc34cee..c15dbdb2e0f9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -840,15 +840,6 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device > *adev) > return 0; > } > > -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, > - struct amdgpu_iv_entry *entry) > -{ > - if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) > - return adev->gfx.ras->poison_consumption_handler(adev, > entry); > - > - return 0; > -} > - > int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, > void *err_data, > struct amdgpu_iv_entry *entry) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index d0c3f2955821..95b80bc8cdb9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -273,8 +273,6 @@ struct amdgpu_gfx_ras { > int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry); > - int (*poison_consumption_handler)(struct amdgpu_device *adev, > - struct amdgpu_iv_entry > *entry); > }; > > struct amdgpu_gfx_shadow_info { > @@ -538,8 +536,6 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device > *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, > uint32_t ucode_id); > > int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); -int > amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, > - struct amdgpu_iv_entry > *entry); > > bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); int > amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 5b6525d8dace..9ce7c7537751 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1668,7 +1668,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct > amdgpu_device *adev) static void > amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj, > struct amdgpu_iv_entry *entry) > { > - bool poison_stat = false; > + bool poison_stat = true; > struct amdgpu_device *adev = obj->adev; > struct amdgpu_ras_block_object *block_obj = > amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ - > 1694,15 +1694,13 @@ static void > amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * > amdgpu_umc_poison_handler(adev, false); > > if (block_obj->hw_ops && block_obj->hw_ops- > >handle_poison_consumption) > - poison_stat = block_obj->hw_ops- > >handle_poison_consumption(adev); > + poison_stat = block_obj->hw_ops- > >handle_poison_consumption(adev, > +entry); [Tao] !block_obj->hw_ops->handle_poison_consumption is allowed, we can add the following code to avoid adding handle_poison_consumption for vcn and jpeg. else poison_stat = false; For the change in GFX, it's better for Thomas to take a look. > > /* gpu reset is fallback for failed and default cases */ > - if (poison_stat) { > + if (poison_stat != true) { > dev_info(adev->dev, "GPU reset for %s RAS poison consumption > is issued!\n", > block_obj->ras_comm.name); > amdgpu_ras_reset_gpu(adev); > - } else { > - amdgpu_gfx_poison_consumption_handler(adev, entry); > } > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 46bf1889a9d7..03f3b3774b85 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -564,7 +564,8 @@ struct amdgpu_ras_block_hw_ops { > void (*reset_ras_error_count)(struct amdgpu_device *adev); > void (*reset_ras_error_status)(struct amdgpu_device *adev); > bool (*query_poison_status)(struct amdgpu_device *adev); > - bool (*handle_poison_consumption)(struct amdgpu_device *adev); > + bool (*handle_poison_consumption)(struct amdgpu_device *adev, > + struct amdgpu_iv_entry *entry); > }; > > /* work flow > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > index 26d6286d86c9..5b7eac547a05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > @@ -78,7 +78,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct > amdgpu_device *adev, > return 0; > } > > -static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device > *adev, > +static bool gfx_v11_0_3_handle_poison_consumption(struct amdgpu_device > +*adev, > struct amdgpu_iv_entry *entry) > { > /* Workaround: when vmid and pasid are both zero, trigger gpu reset in > KGD. */ @@ -99,10 +99,16 @@ static int > gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, > amdgpu_ras_reset_gpu(adev); > } > > - return 0; > + return true; > } > > +struct amdgpu_ras_block_hw_ops gfx_v11_0_3_ras_ops = { > + .handle_poison_consumption = > gfx_v11_0_3_handle_poison_consumption, > +}; > + > struct amdgpu_gfx_ras gfx_v11_0_3_ras = { > + .ras_block = { > + .hw_ops = &gfx_v11_0_3_ras_ops, > + }, > .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq, > - .poison_consumption_handler = > gfx_v11_0_3_poison_consumption_handler, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c > b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c > index aadb74de52bc..3d60610a9c70 100644 > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c > @@ -809,8 +809,17 @@ static bool jpeg_v2_6_query_ras_poison_status(struct > amdgpu_device *adev) > return !!poison_stat; > } > > +static bool jpeg_v2_6_handle_poison_consumption(struct amdgpu_device > *adev, > + struct amdgpu_iv_entry *entry) > +{ > + /* TODO: reset IP engine instead of reset GPU */ > + > + return false; > +} > + > const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { > .query_poison_status = jpeg_v2_6_query_ras_poison_status, > + .handle_poison_consumption = jpeg_v2_6_handle_poison_consumption, > }; > > static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { diff --git > a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c > index a707d407fbd0..328e3de14e35 100644 > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c > @@ -818,8 +818,17 @@ static bool jpeg_v4_0_query_ras_poison_status(struct > amdgpu_device *adev) > return !!poison_stat; > } > > +static bool jpeg_v4_0_handle_poison_consumption(struct amdgpu_device > *adev, > + struct amdgpu_iv_entry *entry) > +{ > + /* TODO: reset IP engine instead of reset GPU */ > + > + return false; > +} > + > const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = { > .query_poison_status = jpeg_v4_0_query_ras_poison_status, > + .handle_poison_consumption = jpeg_v4_0_handle_poison_consumption, > }; > > static struct amdgpu_jpeg_ras jpeg_v4_0_ras = { diff --git > a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > index bb1875f926f1..e6dbea441caa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > @@ -1974,8 +1974,17 @@ static bool vcn_v2_6_query_poison_status(struct > amdgpu_device *adev) > return !!poison_stat; > } > > +static bool vcn_v2_6_handle_poison_consumption(struct amdgpu_device *adev, > + struct amdgpu_iv_entry *entry) > +{ > + /* TODO: reset IP engine instead of reset GPU */ > + > + return false; > +} > + > const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { > .query_poison_status = vcn_v2_6_query_poison_status, > + .handle_poison_consumption = vcn_v2_6_handle_poison_consumption, > }; > > static struct amdgpu_vcn_ras vcn_v2_6_ras = { diff --git > a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > index b48bb5212488..eb8625a6df92 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > @@ -2134,8 +2134,17 @@ static bool vcn_v4_0_query_ras_poison_status(struct > amdgpu_device *adev) > return !!poison_stat; > } > > +static bool vcn_v4_0_handle_poison_consumption(struct amdgpu_device *adev, > + struct amdgpu_iv_entry *entry) > +{ > + /* TODO: reset IP engine instead of reset GPU */ > + > + return false; > +} > + > const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = { > .query_poison_status = vcn_v4_0_query_ras_poison_status, > + .handle_poison_consumption = vcn_v4_0_handle_poison_consumption, > }; > > static struct amdgpu_vcn_ras vcn_v4_0_ras = { > -- > 2.25.1