On 10/14/2024 01:04, Zhuo, Qiuxu wrote: >> From: Avadhut Naik <avadhut.naik@xxxxxxx> >> [...] >> Subject: [PATCH v5 1/5] x86/mce: Add wrapper for struct mce to export vendor >> [...] >> --- a/arch/x86/include/asm/mce.h >> +++ b/arch/x86/include/asm/mce.h >> @@ -187,6 +187,14 @@ enum mce_notifier_prios { >> MCE_PRIO_HIGHEST = MCE_PRIO_CEC >> }; >> >> +/** >> + * struct mce_hw_err - Hardware Error Record. >> + * @m: Machine Check record. >> + */ >> +struct mce_hw_err { >> + struct mce m; >> +}; >> + > > Just some thoughts: > > I noticed that many places only care about the 'mce' structure instead of the new 'mce_hw_err', > such as the EDAC drivers in the notifier chains. > > Could we still pass these users with the 'mce' structure instead of 'mce_hw_err', > since they only care about 'mce'? However, they still have the opportunity to > get 'mce_hw_err' from 'mce' on demand with the following macro. [1] > > #define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m) > > So, for those codes that need 'mce_hw_err', they can retrieve it from 'mce' > on demand by: > > struct mce_hw_err *err = to_mce_hw_err(mce); > > For those codes that don't need 'mce_hw_err', there are > no changes to them. I.e., they do not need to perform the extra operations > like below over and over: > > struct mce *mce = &err->m; > Thanks for the suggestion. Will try this out and update. >> [...] > >> static int mce_early_notifier(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *m = (struct mce *)data; >> + struct mce_hw_err *err = (struct mce_hw_err *)data; > > If using [1], then it can be: > > struct mce_hw_err *err = to_mce_hw_err(data); > >> - if (!m) >> + if (!err) >> return NOTIFY_DONE; >> >> /* Emit the trace record: */ >> - trace_mce_record(m); >> + trace_mce_record(err); >> >> set_bit(0, &mce_need_notify); >> >> @@ -597,7 +606,8 @@ static struct notifier_block early_nb = { static int >> uc_decode_notifier(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce_hw_err *err = (struct mce_hw_err *)data; >> + struct mce *mce = &err->m; > > If using [1], then there is no need for these changes. > >> [...] >> diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c >> b/arch/x86/kernel/cpu/mce/dev-mcelog.c >> index af44fd5dbd7c..d991ef370efd 100644 >> --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c >> +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c >> @@ -36,7 +36,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); >> static int dev_mce_log(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> unsigned int entry; >> >> if (mce->kflags & MCE_HANDLED_CEC) > > If using [1], then there is no need for these changes. > >> [...] >> @@ -83,8 +83,8 @@ void mce_gen_pool_process(struct work_struct >> *__unused) >> >> head = llist_reverse_order(head); >> llist_for_each_entry_safe(node, tmp, head, llnode) { >> - mce = &node->mce; >> - blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, >> mce); >> + err = &node->err; >> + blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, >> err); > > If using [1], then continue to pass 'mce' as before: > > mce = &node->err.m; > blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); > >> [...] >> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index >> ca87a0939135..4864191918db 100644 >> --- a/drivers/acpi/acpi_extlog.c >> +++ b/drivers/acpi/acpi_extlog.c >> @@ -134,7 +134,7 @@ static int print_extlog_rcd(const char *pfx, static int >> extlog_print(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> int bank = mce->bank; >> int cpu = mce->extcpu; >> struct acpi_hest_generic_status *estatus, *tmp; diff --git > > If using [1], then there is no need for these changes. > >> a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index >> d48a388b796e..b917988db794 100644 >> --- a/drivers/acpi/nfit/mce.c >> +++ b/drivers/acpi/nfit/mce.c >> @@ -13,7 +13,7 @@ >> static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> struct acpi_nfit_desc *acpi_desc; >> struct nfit_spa *nfit_spa; >> > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index >> 91e0a88ef904..d1e47cba0ff2 100644 >> --- a/drivers/edac/i7core_edac.c >> +++ b/drivers/edac/i7core_edac.c >> @@ -1810,7 +1810,7 @@ static void i7core_check_error(struct mem_ctl_info >> *mci, struct mce *m) static int i7core_mce_check_error(struct notifier_block >> *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> struct i7core_dev *i7_dev; >> struct mem_ctl_info *mci; >> > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index >> 189a2fc29e74..091126a42884 100644 >> --- a/drivers/edac/igen6_edac.c >> +++ b/drivers/edac/igen6_edac.c >> @@ -919,7 +919,7 @@ static int ecclog_nmi_handler(unsigned int cmd, struct >> pt_regs *regs) static int ecclog_mce_handler(struct notifier_block *nb, >> unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> char *type; >> >> if (mce->kflags & MCE_HANDLED_CEC) > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index >> 8130c3dc64da..c5fae99de781 100644 >> --- a/drivers/edac/mce_amd.c >> +++ b/drivers/edac/mce_amd.c >> @@ -792,7 +792,7 @@ static const char *decode_error_status(struct mce *m) >> static int amd_decode_mce(struct notifier_block *nb, unsigned long val, void >> *data) { >> - struct mce *m = (struct mce *)data; >> + struct mce *m = &((struct mce_hw_err *)data)->m; >> unsigned int fam = x86_family(m->cpuid); >> int ecc; >> > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index >> f93f2f2b1cf2..a3008f6eb2b1 100644 >> --- a/drivers/edac/pnd2_edac.c >> +++ b/drivers/edac/pnd2_edac.c >> @@ -1366,7 +1366,7 @@ static void pnd2_unregister_mci(struct mem_ctl_info >> *mci) >> */ >> static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, >> void *data) { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> struct mem_ctl_info *mci; >> struct dram_addr daddr; >> char *type; > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index >> d5f12219598a..f771c36540cf 100644 >> --- a/drivers/edac/sb_edac.c >> +++ b/drivers/edac/sb_edac.c >> @@ -3256,7 +3256,7 @@ static void sbridge_mce_output_error(struct >> mem_ctl_info *mci, static int sbridge_mce_check_error(struct notifier_block >> *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> struct mem_ctl_info *mci; >> char *type; >> > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index >> 85713646957b..5fa3a9038a77 100644 >> --- a/drivers/edac/skx_common.c >> +++ b/drivers/edac/skx_common.c >> @@ -644,7 +644,7 @@ static bool skx_error_in_mem(const struct mce *m) int >> skx_mce_check_error(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *mce = (struct mce *)data; >> + struct mce *mce = &((struct mce_hw_err *)data)->m; >> struct decoded_addr res; >> struct mem_ctl_info *mci; >> char *type; > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> index 1a1395c5fff1..88648a89fbdf 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> @@ -4160,7 +4160,7 @@ static struct amdgpu_device *find_adev(uint32_t >> node_id) static int amdgpu_bad_page_notifier(struct notifier_block *nb, >> unsigned long val, void *data) >> { >> - struct mce *m = (struct mce *)data; >> + struct mce *m = &((struct mce_hw_err *)data)->m; >> struct amdgpu_device *adev = NULL; >> uint32_t gpu_id = 0; >> uint32_t umc_inst = 0, ch_inst = 0; > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index >> 90de737fbc90..78dd4b192992 100644 >> --- a/drivers/ras/amd/fmpm.c >> +++ b/drivers/ras/amd/fmpm.c >> @@ -400,7 +400,7 @@ static void retire_dram_row(u64 addr, u64 id, u32 cpu) >> >> static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long >> val, void *data) { >> - struct mce *m = (struct mce *)data; >> + struct mce *m = &((struct mce_hw_err *)data)->m; >> struct fru_rec *rec; >> >> if (!mce_is_memory_error(m)) > > If using [1], then there is no need for these changes. > >> diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index >> e440b15fbabc..be785746f587 100644 >> --- a/drivers/ras/cec.c >> +++ b/drivers/ras/cec.c >> @@ -534,7 +534,7 @@ static int __init create_debugfs_nodes(void) static int >> cec_notifier(struct notifier_block *nb, unsigned long val, >> void *data) >> { >> - struct mce *m = (struct mce *)data; >> + struct mce *m = &((struct mce_hw_err *)data)->m; >> >> if (!m) >> return NOTIFY_DONE; > > If using [1], then there is no need for these changes. > >> [...] -- Thanks, Avadhut Naik