[PATCH 1/4] acpi: add find error record in BERT function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adding helper function for searching through BERT records and matching
memory based errors that matches in the given resource range given. A
callback function is passed in from the caller to process the matched
memory records. This is in preparation for adding bad memory ranges
fir nvdimm from the BERT.

Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx>
Cc: Ying Huang <ying.huang@xxxxxxxxx>
---
 drivers/acpi/apei/bert.c |  137 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/acpi.h     |   10 +++
 2 files changed, 134 insertions(+), 13 deletions(-)

diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 12771fcf0417..9569c15bd616 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/io.h>
+#include <acpi/ghes.h>
 
 #include "apei-internal.h"
 
@@ -34,33 +35,36 @@
 
 static int bert_disable;
 
-static void __init bert_print_all(struct acpi_bert_region *region,
-				  unsigned int region_len)
+static int bert_process_region(struct acpi_bert_region *region,
+		unsigned int region_len,
+		int (*process)(struct acpi_hest_generic_status *estatus,
+			void *data), void *data)
 {
 	struct acpi_hest_generic_status *estatus =
 		(struct acpi_hest_generic_status *)region;
 	int remain = region_len;
 	u32 estatus_len;
+	int rc;
 
 	if (!estatus->block_status)
-		return;
+		return -ENXIO;
 
 	while (remain > sizeof(struct acpi_bert_region)) {
 		if (cper_estatus_check(estatus)) {
 			pr_err(FW_BUG "Invalid error record.\n");
-			return;
+			return -ENXIO;
 		}
 
 		estatus_len = cper_estatus_len(estatus);
 		if (remain < estatus_len) {
-			pr_err(FW_BUG "Truncated status block (length: %u).\n",
-			       estatus_len);
-			return;
+			pr_err(FW_BUG "Truncated status block (len: %u).\n",
+					estatus_len);
+			return -ENXIO;
 		}
 
-		pr_info_once("Error records from previous boot:\n");
-
-		cper_estatus_print(KERN_INFO HW_ERR, estatus);
+		rc = process(estatus, data);
+		if (rc < 0)
+			return rc;
 
 		/*
 		 * Because the boot error source is "one-time polled" type,
@@ -72,10 +76,22 @@ static void __init bert_print_all(struct acpi_bert_region *region,
 		estatus = (void *)estatus + estatus_len;
 		/* No more error records. */
 		if (!estatus->block_status)
-			return;
+			return -ENXIO;
 
 		remain -= estatus_len;
 	}
+
+	return 0;
+}
+
+static int __init bert_print(struct acpi_hest_generic_status *estatus,
+		void *data)
+{
+	pr_info_once("Error records from previous boot:\n");
+
+	cper_estatus_print(KERN_INFO HW_ERR, estatus);
+
+	return 0;
 }
 
 static int __init setup_bert_disable(char *str)
@@ -86,7 +102,7 @@ static int __init setup_bert_disable(char *str)
 }
 __setup("bert_disable", setup_bert_disable);
 
-static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+static int bert_check_table(struct acpi_table_bert *bert_tab)
 {
 	if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
 	    bert_tab->region_length < sizeof(struct acpi_bert_region))
@@ -138,7 +154,8 @@ static int __init bert_init(void)
 		goto out_fini;
 	boot_error_region = ioremap_cache(bert_tab->address, region_len);
 	if (boot_error_region) {
-		bert_print_all(boot_error_region, region_len);
+		bert_process_region(boot_error_region, region_len,
+				bert_print, NULL);
 		iounmap(boot_error_region);
 	} else {
 		rc = -ENOMEM;
@@ -152,3 +169,97 @@ static int __init bert_init(void)
 }
 
 late_initcall(bert_init);
+
+struct mem_err_cb_ctx
+{
+	void (*cb)(void *data, u64 addr, u64 len);
+	void *data;
+	u64 addr;
+	u64 len;
+};
+
+static int bert_process_mem_err(struct acpi_hest_generic_status *estatus,
+		void *data)
+{
+	struct mem_err_cb_ctx *ctx = data;
+	u16 severity;
+	u64 end = ctx->addr + ctx->len - 1;
+	struct acpi_hest_generic_data *gdata;
+	int found = 0;
+
+	severity = estatus->error_severity;
+	if (severity != CPER_SEV_CORRECTED) {
+		apei_estatus_for_each_section(estatus, gdata) {
+			guid_t *sec_type =
+				(guid_t *)gdata->section_type;
+			struct cper_sec_mem_err *mem_err =
+				acpi_hest_get_payload(gdata);
+
+			if (!guid_equal(sec_type,
+					&CPER_SEC_PLATFORM_MEM))
+				continue;
+
+			if (!(mem_err->validation_bits &
+					CPER_MEM_VALID_PA))
+				continue;
+
+			if (ctx->addr > mem_err->physical_addr ||
+				end < mem_err->physical_addr)
+				continue;
+
+			ctx->cb(ctx->data, mem_err->physical_addr,
+					L1_CACHE_BYTES);
+			found++;
+		}
+	}
+
+	return found;
+}
+
+int bert_find_mem_error_record(void (*cb)(void *data, u64 addr, u64 len),
+		void *data, u64 addr, u64 len)
+{
+	acpi_status status;
+	int rc;
+	unsigned int region_len;
+	struct acpi_bert_region *bert_region;
+	struct acpi_table_bert *bert_tab;
+	struct mem_err_cb_ctx ctx = {
+		.cb = cb,
+		.data = data,
+		.addr = addr,
+		.len = len,
+	};
+
+	if (acpi_disabled)
+		return 0;
+
+	status = acpi_get_table(ACPI_SIG_BERT, 0,
+			(struct acpi_table_header **)&bert_tab);
+	if (status == AE_NOT_FOUND)
+		return 0;
+
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	rc = bert_check_table(bert_tab);
+	if (rc)
+		return rc;
+
+	region_len = bert_tab->region_length;
+	bert_region = acpi_os_map_memory(bert_tab->address, region_len);
+	if (!bert_region) {
+		rc = -ENOMEM;
+		goto put_table;
+	}
+
+	rc = bert_process_region(bert_region, region_len,
+				bert_process_mem_err, &ctx);
+
+	acpi_os_unmap_memory(bert_region, region_len);
+put_table:
+	acpi_put_table((struct acpi_table_header *)bert_tab);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(bert_find_mem_error_record);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 968173ec2726..57ed7b39f386 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1146,6 +1146,10 @@ int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr);
 					  (&ACPI_PROBE_TABLE_END(t) -	\
 					   &ACPI_PROBE_TABLE(t)));	\
 	})
+
+int bert_find_mem_error_record(
+		void (*cb)(void *data, u64 addr, u64 len),
+		void *data, u64 addr, u64 len);
 #else
 static inline int acpi_dev_get_property(struct acpi_device *adev,
 					const char *name, acpi_object_type type,
@@ -1247,6 +1251,12 @@ acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
 		     (void *) data }
 
 #define acpi_probe_device_table(t)	({ int __r = 0; __r;})
+int bert_find_mem_error_record(
+		void (*cb)(void *data, u64 addr, u64 len),
+		void *data, u64 addr, u64 len)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #ifdef CONFIG_ACPI_TABLE_UPGRADE

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux