Hi Borislav > -----Original Message----- > From: Borislav Petkov <bp@xxxxxxxxx> > Sent: Wednesday, August 10, 2022 2:46 AM > To: Justin He <Justin.He@xxxxxxx> > Cc: Kani, Toshi <toshi.kani@xxxxxxx>; Rafael J. Wysocki <rafael@xxxxxxxxxx>; > Len Brown <lenb@xxxxxxxxxx>; James Morse <James.Morse@xxxxxxx>; > Tony Luck <tony.luck@xxxxxxxxx>; Mauro Carvalho Chehab > <mchehab@xxxxxxxxxx>; Robert Richter <rric@xxxxxxxxxx>; Shuai Xue > <xueshuai@xxxxxxxxxxxxxxxxx>; Jarkko Sakkinen <jarkko@xxxxxxxxxx>; ACPI > Devel Maling List <linux-acpi@xxxxxxxxxxxxxxx>; Linux Kernel Mailing List > <linux-kernel@xxxxxxxxxxxxxxx>; open list:EDAC-CORE > <linux-edac@xxxxxxxxxxxxxxx> > Subject: Re: 回复: [PATCH] ACPI: APEI: move edac_init ahead of ghes > platform drv register > [...] > --- > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index > d91ad378c00d..0919317b8313 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -118,6 +118,9 @@ module_param_named(disable, ghes_disable, bool, > 0); static LIST_HEAD(ghes_hed); static DEFINE_MUTEX(ghes_list_mutex); > > +static LIST_HEAD(ghes_devs); > +static DEFINE_MUTEX(ghes_devs_mutex); > + > /* > * Because the memory area used to transfer hardware error information > * from BIOS to Linux can be determined only in NMI, IRQ or timer @@ > -1376,7 +1379,11 @@ static int ghes_probe(struct platform_device > *ghes_dev) > > platform_set_drvdata(ghes_dev, ghes); > > - ghes_edac_register(ghes, &ghes_dev->dev); > + ghes->dev = &ghes_dev->dev; > + > + mutex_lock(&ghes_devs_mutex); > + list_add_tail(&ghes->elist, &ghes_devs); > + mutex_unlock(&ghes_devs_mutex); > > /* Handle any pending errors right away */ > spin_lock_irqsave(&ghes_notify_lock_irq, flags); @@ -1440,8 +1447,6 > @@ static int ghes_remove(struct platform_device *ghes_dev) > > ghes_fini(ghes); > > - ghes_edac_unregister(ghes); > - > kfree(ghes); > > platform_set_drvdata(ghes_dev, NULL); > @@ -1497,3 +1502,25 @@ void __init acpi_ghes_init(void) > else > pr_info(GHES_PFX "Failed to enable APEI firmware first > mode.\n"); } > + > +/* > + * Known x86 systems that prefer GHES error reporting: > + */ > +static struct acpi_platform_list plat_list[] = { > + {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, > + { } /* End */ > +}; > + > +struct list_head *ghes_get_devices(bool force) { > + int idx = -1; > + > + if (IS_ENABLED(CONFIG_X86)) { > + idx = acpi_match_platform_list(plat_list); > + if (idx < 0 && !force) > + return NULL; > + } > + > + return &ghes_devs; > +} > +EXPORT_SYMBOL_GPL(ghes_get_devices); > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index > 17562cf1fe97..df45db81858b 100644 > --- a/drivers/edac/Kconfig > +++ b/drivers/edac/Kconfig > @@ -53,8 +53,8 @@ config EDAC_DECODE_MCE > has been initialized. > > config EDAC_GHES > - bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" > - depends on ACPI_APEI_GHES && (EDAC=y) > + tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC" > + depends on ACPI_APEI_GHES > select UEFI_CPER > help > Not all machines support hardware-driven error report. Some of those > diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index > c8fa7dcfdbd0..da6d1a9e107d 100644 > --- a/drivers/edac/ghes_edac.c > +++ b/drivers/edac/ghes_edac.c > @@ -59,6 +59,8 @@ module_param(force_load, bool, 0); > > static bool system_scanned; > > +static struct list_head *ghes_devs; > + > /* Memory Device - Type 17 of SMBIOS spec */ struct memdev_dmi_entry > { > u8 type; > @@ -376,34 +378,15 @@ void ghes_edac_report_mem_error(int sev, struct > cper_sec_mem_err *mem_err) > spin_unlock_irqrestore(&ghes_lock, flags); } > > -/* > - * Known systems that are safe to enable this module. > - */ > -static struct acpi_platform_list plat_list[] = { > - {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, > - { } /* End */ > -}; > - > -int ghes_edac_register(struct ghes *ghes, struct device *dev) > +static int ghes_edac_register(struct device *dev) > { > bool fake = false; > struct mem_ctl_info *mci; > struct ghes_pvt *pvt; > struct edac_mc_layer layers[1]; > unsigned long flags; > - int idx = -1; > int rc = 0; > > - if (IS_ENABLED(CONFIG_X86)) { > - /* Check if safe to enable on this system */ > - idx = acpi_match_platform_list(plat_list); > - if (!force_load && idx < 0) > - return -ENODEV; > - } else { > - force_load = true; > - idx = 0; > - } > - > /* finish another registration/unregistration instance first */ > mutex_lock(&ghes_reg_mutex); > > @@ -447,7 +430,7 @@ int ghes_edac_register(struct ghes *ghes, struct > device *dev) > pr_info("This system has a very crappy BIOS: It doesn't even list the > DIMMS.\n"); > pr_info("Its SMBIOS info is wrong. It is doubtful that the error report > would\n"); > pr_info("work on such system. Use this driver with caution\n"); > - } else if (idx < 0) { > + } else if (force_load) { > pr_info("This EDAC driver relies on BIOS to enumerate memory and > get error reports.\n"); > pr_info("Unfortunately, not all BIOSes reflect the memory layout > correctly.\n"); > pr_info("So, the end result of using this driver varies from vendor to > vendor.\n"); @@ -517,7 +500,7 @@ int ghes_edac_register(struct ghes *ghes, > struct device *dev) > return rc; > } > > -void ghes_edac_unregister(struct ghes *ghes) > +static void ghes_edac_unregister(struct ghes *ghes) > { > struct mem_ctl_info *mci; > unsigned long flags; > @@ -551,3 +534,30 @@ void ghes_edac_unregister(struct ghes *ghes) > unlock: > mutex_unlock(&ghes_reg_mutex); > } > + > +static int __init ghes_edac_init(void) > +{ > + struct ghes *g, *g_tmp; > + > + ghes_devs = ghes_get_devices(force_load); > + if (ghes_devs) Should it be changed to if(!ghes_devs) ? Otherwise, this ghes_edac_init() on Arm will always return with ENODEV because ! ghes_get_devices(). What do you think of it? -- Cheers, Justin (Jia He) > + return -ENODEV; > + > + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { > + ghes_edac_register(g->dev); > + } > + > + return 0; > +} > +module_init(ghes_edac_init); > + > +static void __exit ghes_edac_exit(void) { > + struct ghes *g, *g_tmp; > + > + > + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { > + ghes_edac_unregister(g); > + } > +} > +module_exit(ghes_edac_exit); > diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index > 34fb3431a8f3..f39b75c3f9c6 100644 > --- a/include/acpi/ghes.h > +++ b/include/acpi/ghes.h > @@ -27,6 +27,8 @@ struct ghes { > struct timer_list timer; > unsigned int irq; > }; > + struct device *dev; > + struct list_head elist; > }; > > struct ghes_estatus_node { > @@ -69,35 +71,13 @@ int ghes_register_vendor_record_notifier(struct > notifier_block *nb); > * @nb: pointer to the notifier_block structure of the vendor record handler. > */ > void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); > +struct list_head *ghes_get_devices(bool force); #else static inline > +struct list_head *ghes_get_devices(bool force) { return NULL; } > #endif > > int ghes_estatus_pool_init(int num_ghes); > > -/* From drivers/edac/ghes_edac.c */ > - > -#ifdef CONFIG_EDAC_GHES > -void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err > *mem_err); > - > -int ghes_edac_register(struct ghes *ghes, struct device *dev); > - > -void ghes_edac_unregister(struct ghes *ghes); > - > -#else > -static inline void ghes_edac_report_mem_error(int sev, > - struct cper_sec_mem_err *mem_err) > -{ > -} > - > -static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) -{ > - return -ENODEV; > -} > - > -static inline void ghes_edac_unregister(struct ghes *ghes) -{ -} -#endif > - > static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata) > { > return gdata->revision >> 8; > > -- > Regards/Gruss, > Boris. > > https://people.kernel.org/tglx/notes-about-netiquette