On 21/10/16 18:30, Tyler Baicar wrote:
A RAS (Reliability, Availability, Serviceability) controller may be a separate processor running in parallel with OS execution, and may generate error records for consumption by the OS. If the RAS controller produces multiple error records, then they may be overwritten before the OS has consumed them. The Generic Hardware Error Source (GHES) v2 structure introduces the capability for the OS to acknowledge the consumption of the error record generated by the RAS controller. A RAS controller supporting GHESv2 shall wait for the acknowledgment before writing a new error record, thus eliminating the race condition. Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@xxxxxxxxxxxxxx> Signed-off-by: Richard Ruigrok <rruigrok@xxxxxxxxxxxxxx> Signed-off-by: Tyler Baicar <tbaicar@xxxxxxxxxxxxxx> Signed-off-by: Naveen Kaje <nkaje@xxxxxxxxxxxxxx> --- drivers/acpi/apei/ghes.c | 42 ++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/apei/hest.c | 7 +++++-- include/acpi/ghes.h | 5 ++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 60746ef..7d020b0 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -45,6 +45,7 @@ #include <linux/aer.h> #include <linux/nmi.h> +#include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> #include <asm/tlbflush.h> @@ -79,6 +80,10 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +#define HEST_TYPE_GENERIC_V2(ghes) \ + ((struct acpi_hest_header *)ghes->generic)->type == \ + ACPI_HEST_TYPE_GENERIC_ERROR_V2 + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -248,7 +253,15 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (HEST_TYPE_GENERIC_V2(ghes)) { + rc = apei_map_generic_address( + &ghes->generic_v2->read_ack_register); + if (rc) + goto err_unmap;
I think should be goto err_free, see more below.
+ } + rc = apei_map_generic_address(&generic->error_status_address); if (rc) goto err_free; @@ -270,6 +283,9 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) err_unmap: apei_unmap_generic_address(&generic->error_status_address); + if (HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + &ghes->generic_v2->read_ack_register);
We might end up trying to unmap (error_status_address) which is not mapped if we hit the error in mapping read_ack_register. The read_ack_register unmap hunk should be moved below to err_free.
err_free: kfree(ghes); return ERR_PTR(rc); @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(&ghes->generic->error_status_address); + if (HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + &ghes->generic_v2->read_ack_register); } static inline int ghes_severity(int severity) @@ -648,6 +667,23 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); }
+static int ghes_do_read_ack(struct acpi_hest_generic_v2 *generic_v2)
nit: We are actually writing something to the read_ack_register. The names read_ack_register (which may be as per standard) and more importantly the function name (ghes_do_read_ack) sounds a bit misleading. Rest looks fine to me. Suzuki -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html