Presently the vendor specific HW errors, in the non-standard format, are not reported to the vendor drivers for the recovery. This patch adds support to notify the vendor specific HW errors to the registered kernel drivers. Signed-off-by: Shiju Jose <shiju.jose@xxxxxxxxxx> --- drivers/acpi/apei/ghes.c | 118 +++++++++++++++++++++++++++++++++++++++++++++-- include/acpi/ghes.h | 47 +++++++++++++++++++ 2 files changed, 160 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index a66e00f..374d197 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -477,6 +477,77 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) #endif } +struct ghes_error_notify { + struct list_head list; + struct rcu_head rcu_head; + guid_t sec_type; /* guid of the error record */ + error_handle handle; /* error handler function */ + void *data; /* handler driver's private data if any */ +}; + +/* List to store the registered error handling functions */ +static DEFINE_MUTEX(ghes_error_notify_mutex); +static LIST_HEAD(ghes_error_notify_list); +static refcount_t ghes_ref_count; + +/** + * ghes_error_notify_register - register an error handling function + * for the hw errors. + * @sec_type: sec_type of the corresponding CPER to be notified. + * @handle: pointer to the error handling function. + * @data: handler driver's private data. + * + * return 0 : SUCCESS, non-zero : FAIL + */ +int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data) +{ + struct ghes_error_notify *err_notify; + + mutex_lock(&ghes_error_notify_mutex); + err_notify = kzalloc(sizeof(*err_notify), GFP_KERNEL); + if (!err_notify) + return -ENOMEM; + + err_notify->handle = handle; + guid_copy(&err_notify->sec_type, &sec_type); + err_notify->data = data; + list_add_rcu(&err_notify->list, &ghes_error_notify_list); + mutex_unlock(&ghes_error_notify_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(ghes_error_notify_register); + +/** + * ghes_error_notify_unregister - unregister an error handling function. + * @sec_type: sec_type of the corresponding CPER. + * @handle: pointer to the error handling function. + * + * return none. + */ +void ghes_error_notify_unregister(guid_t sec_type, error_handle handle) +{ + struct ghes_error_notify *err_notify; + bool found = 0; + + mutex_lock(&ghes_error_notify_mutex); + rcu_read_lock(); + list_for_each_entry_rcu(err_notify, &ghes_error_notify_list, list) { + if (guid_equal(&err_notify->sec_type, &sec_type) && + err_notify->handle == handle) { + list_del_rcu(&err_notify->list); + found = 1; + break; + } + } + rcu_read_unlock(); + synchronize_rcu(); + mutex_unlock(&ghes_error_notify_mutex); + if (found) + kfree(err_notify); +} +EXPORT_SYMBOL_GPL(ghes_error_notify_unregister); + static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -485,6 +556,8 @@ static void ghes_do_proc(struct ghes *ghes, guid_t *sec_type; guid_t *fru_id = &NULL_UUID_LE; char *fru_text = ""; + bool is_notify = 0; + struct ghes_error_notify *err_notify; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -512,11 +585,29 @@ static void ghes_do_proc(struct ghes *ghes, log_arm_hw_error(err); } else { - void *err = acpi_hest_get_payload(gdata); - - log_non_standard_event(sec_type, fru_id, fru_text, - sec_sev, err, - gdata->error_data_length); + rcu_read_lock(); + list_for_each_entry_rcu(err_notify, + &ghes_error_notify_list, list) { + if (guid_equal(&err_notify->sec_type, + sec_type)) { + /* The notification is called in the + * interrupt context, thus the handler + * functions should be take care of it. + */ + err_notify->handle(gdata, sev, + err_notify->data); + is_notify = 1; + } + } + rcu_read_unlock(); + + if (!is_notify) { + void *err = acpi_hest_get_payload(gdata); + + log_non_standard_event(sec_type, fru_id, + fru_text, sec_sev, err, + gdata->error_data_length); + } } } } @@ -1217,6 +1308,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes_edac_register(ghes, &ghes_dev->dev); + if (!refcount_read(&ghes_ref_count)) + refcount_set(&ghes_ref_count, 1); + else + refcount_inc(&ghes_ref_count); + /* Handle any pending errors right away */ spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); @@ -1237,6 +1333,7 @@ static int ghes_remove(struct platform_device *ghes_dev) int rc; struct ghes *ghes; struct acpi_hest_generic *generic; + struct ghes_error_notify *err_notify, *tmp; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -1279,6 +1376,17 @@ static int ghes_remove(struct platform_device *ghes_dev) ghes_fini(ghes); + if (refcount_dec_and_test(&ghes_ref_count) && + !list_empty(&ghes_error_notify_list)) { + mutex_lock(&ghes_error_notify_mutex); + list_for_each_entry_safe(err_notify, tmp, + &ghes_error_notify_list, list) { + list_del_rcu(&err_notify->list); + kfree_rcu(err_notify, rcu_head); + } + mutex_unlock(&ghes_error_notify_mutex); + } + ghes_edac_unregister(ghes); kfree(ghes); diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index e3f1cdd..d480537 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -50,6 +50,53 @@ enum { GHES_SEV_PANIC = 0x3, }; +/** + * error_handle - error handling function for the hw errors. + * This handle function is called in the interrupt context. + * @gdata: acpi_hest_generic_data. + * @sev: error severity of the entire error event defined in the + * ACPI spec table generic error status block. + * @data: handler driver's private data. + * + * return : none. + */ +typedef void (*error_handle)(struct acpi_hest_generic_data *gdata, int sev, + void *data); + +#ifdef CONFIG_ACPI_APEI_GHES +/** + * ghes_error_notify_register - register an error handling function + * for the hw errors. + * @sec_type: sec_type of the corresponding CPER to be notified. + * @handle: pointer to the error handling function. + * @data: handler driver's private data. + * + * return : 0 - SUCCESS, non-zero - FAIL. + */ +int ghes_error_notify_register(guid_t sec_type, error_handle handle, + void *data); + +/** + * ghes_error_notify_unregister - unregister an error handling function + * for the hw errors. + * @sec_type: sec_type of the corresponding CPER. + * @handle: pointer to the error handling function. + * + * return none. + */ +void ghes_error_notify_unregister(guid_t sec_type, error_handle handle); + +#else +int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data) +{ + return -ENODEV; +} + +void ghes_error_notify_unregister(guid_t sec_type, error_handle handle) +{ +} +#endif + int ghes_estatus_pool_init(int num_ghes); /* From drivers/edac/ghes_edac.c */ -- 1.9.1