Re: [PATCH V3 01/10] acpi: apei: read ack upon ghes record consumption

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Tyler,

A few comments below.

Tyler Baicar <tbaicar@xxxxxxxxxxxxxx> writes:

> A RAS (Reliability, Availability, Serviceability) controller
> may be a separate processor running in parallel with OS
> execution, and may generate error records for consumption by
> the OS. If the RAS controller produces multiple error records,
> then they may be overwritten before the OS has consumed them.
>
> The Generic Hardware Error Source (GHES) v2 structure
> introduces the capability for the OS to acknowledge the
> consumption of the error record generated by the RAS
> controller. A RAS controller supporting GHESv2 shall wait for
> the acknowledgment before writing a new error record, thus
> eliminating the race condition.
>
> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@xxxxxxxxxxxxxx>
> Signed-off-by: Richard Ruigrok <rruigrok@xxxxxxxxxxxxxx>
> Signed-off-by: Tyler Baicar <tbaicar@xxxxxxxxxxxxxx>
> Signed-off-by: Naveen Kaje <nkaje@xxxxxxxxxxxxxx>
> ---
>  drivers/acpi/apei/ghes.c | 41 +++++++++++++++++++++++++++++++++++++++++
>  drivers/acpi/apei/hest.c |  7 +++++--
>  include/acpi/ghes.h      |  1 +
>  3 files changed, 47 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 60746ef..3021f0e 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -45,6 +45,7 @@
>  #include <linux/aer.h>
>  #include <linux/nmi.h>
>  
> +#include <acpi/actbl1.h>
>  #include <acpi/ghes.h>
>  #include <acpi/apei.h>
>  #include <asm/tlbflush.h>
> @@ -244,10 +245,22 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
>  	struct ghes *ghes;
>  	unsigned int error_block_length;
>  	int rc;
> +	struct acpi_hest_header *hest_hdr;
>  
>  	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
>  	if (!ghes)
>  		return ERR_PTR(-ENOMEM);
> +
> +	hest_hdr = (struct acpi_hest_header *)generic;
> +	if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) {
> +		ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic;
> +		rc = apei_map_generic_address(
> +			&ghes->generic_v2->read_ack_register);
> +		if (rc)
> +			goto err_unmap;
> +	} else
> +		ghes->generic_v2 = NULL;

Since you kzalloc ghes, shouldn't ghes->generic_v2 be NULL already?

> +
>  	ghes->generic = generic;
>  	rc = apei_map_generic_address(&generic->error_status_address);
>  	if (rc)
> @@ -270,6 +283,9 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
>  
>  err_unmap:
>  	apei_unmap_generic_address(&generic->error_status_address);
> +	if (ghes->generic_v2)
> +		apei_unmap_generic_address(
> +			&ghes->generic_v2->read_ack_register);
>  err_free:
>  	kfree(ghes);
>  	return ERR_PTR(rc);
> @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes)
>  {
>  	kfree(ghes->estatus);
>  	apei_unmap_generic_address(&ghes->generic->error_status_address);
> +	if (ghes->generic_v2)
> +		apei_unmap_generic_address(
> +			&ghes->generic_v2->read_ack_register);
>  }
>  
>  static inline int ghes_severity(int severity)
> @@ -648,6 +667,22 @@ static void ghes_estatus_cache_add(
>  	rcu_read_unlock();
>  }
>  
> +static int ghes_do_read_ack(struct acpi_hest_generic_v2 *generic_v2)
> +{
> +	int rc;
> +	u64 val = 0;
> +
> +	rc = apei_read(&val, &generic_v2->read_ack_register);
> +	if (rc)
> +		return rc;
> +	val &= generic_v2->read_ack_preserve <<
> +		generic_v2->read_ack_register.bit_offset;
> +	val |= generic_v2->read_ack_write;

Reading the spec, it is not clear whether you need the left shift
above.

Having said that, if you do it for read_ack_preserve, do you also need
to left shift read_ack_write by read_ack_register.bit_offset?

> +	rc = apei_write(val, &generic_v2->read_ack_register);
> +
> +	return rc;
> +}
> +
>  static int ghes_proc(struct ghes *ghes)
>  {
>  	int rc;
> @@ -660,6 +695,12 @@ static int ghes_proc(struct ghes *ghes)
>  			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
>  	}
>  	ghes_do_proc(ghes, ghes->estatus);
> +
> +	if (ghes->generic_v2) {
> +		rc = ghes_do_read_ack(ghes->generic_v2);
> +		if (rc)
> +			return rc;
> +	}
>  out:
>  	ghes_clear_estatus(ghes);
>  	return 0;
> diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
> index 792a0d9..ef725a9 100644
> --- a/drivers/acpi/apei/hest.c
> +++ b/drivers/acpi/apei/hest.c
> @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
>  	[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
>  	[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
>  	[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
> +	[ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
>  };
>  
>  static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
> @@ -146,7 +147,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void
>  {
>  	int *count = data;
>  
> -	if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
> +	if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
> +	    hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
>  		(*count)++;
>  	return 0;
>  }
> @@ -157,7 +159,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
>  	struct ghes_arr *ghes_arr = data;
>  	int rc, i;
>  
> -	if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
> +	if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
> +	    hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
>  		return 0;
>  
>  	if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> index 720446c..d0108b6 100644
> --- a/include/acpi/ghes.h
> +++ b/include/acpi/ghes.h
> @@ -14,6 +14,7 @@
>  
>  struct ghes {
>  	struct acpi_hest_generic *generic;
> +	struct acpi_hest_generic_v2 *generic_v2;

You either have a GHES or a GHESv2 structure. Instead of duplication,
could this be represented as a union?

Thanks,
Punit

>  	struct acpi_hest_generic_status *estatus;
>  	u64 buffer_paddr;
>  	unsigned long flags;
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux