On Tue, Apr 18, 2017 at 05:05:13PM -0600, Tyler Baicar wrote: > A RAS (Reliability, Availability, Serviceability) controller > may be a separate processor running in parallel with OS > execution, and may generate error records for consumption by > the OS. If the RAS controller produces multiple error records, > then they may be overwritten before the OS has consumed them. > > The Generic Hardware Error Source (GHES) v2 structure > introduces the capability for the OS to acknowledge the > consumption of the error record generated by the RAS > controller. A RAS controller supporting GHESv2 shall wait for > the acknowledgment before writing a new error record, thus > eliminating the race condition. > > Add support for parsing of GHESv2 sub-tables as well. > > Signed-off-by: Tyler Baicar <tbaicar@xxxxxxxxxxxxxx> > CC: Jonathan (Zhixiong) Zhang <zjzhang@xxxxxxxxxxxxxx> > Reviewed-by: James Morse <james.morse@xxxxxxx> > --- > drivers/acpi/apei/ghes.c | 55 +++++++++++++++++++++++++++++++++++++++++++++--- > drivers/acpi/apei/hest.c | 7 ++++-- > include/acpi/ghes.h | 5 ++++- > 3 files changed, 61 insertions(+), 6 deletions(-) > > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c > index 79b3c9c..6d87ab7 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -46,6 +46,7 @@ > #include <linux/nmi.h> > #include <linux/sched/clock.h> > > +#include <acpi/actbl1.h> > #include <acpi/ghes.h> > #include <acpi/apei.h> > #include <asm/tlbflush.h> > @@ -80,6 +81,10 @@ > ((struct acpi_hest_generic_status *) \ > ((struct ghes_estatus_node *)(estatus_node) + 1)) > > +#define IS_HEST_TYPE_GENERIC_V2(ghes) \ > + ((struct acpi_hest_header *)ghes->generic)->type == \ This is a nasty hack: casting the ghes->generic pointer to a pointer of its first member which is a acpi_hest_header. Why isn't this a nice inline function with proper dereferencing: static inline bool is_hest_type_generic_v2(struct ghes *ghes) { return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; } ? Also, please integrate scripts/checkpatch.pl in your patch creation workflow. Some of the warnings/errors *actually* make sense. > /* > * This driver isn't really modular, however for the time being, > * continuing to use module_param is the easiest way to remain > @@ -240,6 +245,17 @@ static int ghes_estatus_pool_expand(unsigned long len) > return 0; > } > > +static int map_gen_v2(struct ghes *ghes) > +{ > + return apei_map_generic_address(&ghes->generic_v2->read_ack_register); > +} > + > +static void unmap_gen_v2(struct ghes *ghes) > +{ > + apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); > + return; > +} Like this one, for example: WARNING: void function return statements are not generally useful #89: FILE: drivers/acpi/apei/ghes.c:257: + return; +} > + > static struct ghes *ghes_new(struct acpi_hest_generic *generic) > { > struct ghes *ghes; > @@ -249,10 +265,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) > ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); > if (!ghes) > return ERR_PTR(-ENOMEM); > + > ghes->generic = generic; > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { > + rc = map_gen_v2(ghes); > + if (rc) > + goto err_free; > + } > + > rc = apei_map_generic_address(&generic->error_status_address); > if (rc) > - goto err_free; > + goto err_unmap_read_ack_addr; > error_block_length = generic->error_block_length; > if (error_block_length > GHES_ESTATUS_MAX_SIZE) { > pr_warning(FW_WARN GHES_PFX > @@ -264,13 +287,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) > ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); > if (!ghes->estatus) { > rc = -ENOMEM; > - goto err_unmap; > + goto err_unmap_status_addr; > } > > return ghes; > > -err_unmap: > +err_unmap_status_addr: > apei_unmap_generic_address(&generic->error_status_address); > +err_unmap_read_ack_addr: > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) > + unmap_gen_v2(ghes); > err_free: > kfree(ghes); > return ERR_PTR(rc); > @@ -280,6 +306,8 @@ static void ghes_fini(struct ghes *ghes) > { > kfree(ghes->estatus); > apei_unmap_generic_address(&ghes->generic->error_status_address); > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) > + unmap_gen_v2(ghes); > } > > static inline int ghes_severity(int severity) > @@ -649,6 +677,21 @@ static void ghes_estatus_cache_add( > rcu_read_unlock(); > } > > +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) If you name this function parameter to something shorter, say gv2, for example... > +{ > + int rc; > + u64 val = 0; > + > + rc = apei_read(&val, &generic_v2->read_ack_register); > + if (rc) > + return rc; > + > + val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset; > + val |= generic_v2->read_ack_write << generic_v2->read_ack_register.bit_offset; ... you can align those two nicely while remaining within the 80 cols width: val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; and make them readable at a quick glance. > + > + return apei_write(val, &generic_v2->read_ack_register); > +} > + > static int ghes_proc(struct ghes *ghes) > { > int rc; > @@ -661,6 +704,12 @@ static int ghes_proc(struct ghes *ghes) > ghes_estatus_cache_add(ghes->generic, ghes->estatus); > } > ghes_do_proc(ghes, ghes->estatus); This needs a comment why v2 needs to ACK the error. The commit message is not necessarily something we'll find quickly in the future. > + > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { > + rc = ghes_ack_error(ghes->generic_v2); > + if (rc) > + return rc; > + } > out: > ghes_clear_estatus(ghes); > return rc; -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply.