Hi, On 8/11/2023 1:48 AM, Avadhut Naik wrote: > ACPI Platform Health Assessment Table (PHAT) enables a platform to expose > an extensible set of platform health related telemetry. The telemetry is > exposed through Firmware Version and Firmware Health Data Records which > provide version data and health-related information of their associated > components respectively. > > Additionally, the platform also provides Reset Reason Health Record in > the PHAT table highlighting the cause of last system reset or boot in case > of both expected and unexpected events. Vendor-specific data capturing the > underlying state of the system during reset can also be optionally provided > through the record.[1] > > Add support to parse the PHAT table during system bootup and have its > information logged into the dmesg buffer. > > [1] ACPI specification 6.5, section 5.2.31.5 > > Signed-off-by: Avadhut Naik <avadhut.naik@xxxxxxx> > --- > .../admin-guide/kernel-parameters.txt | 4 + > drivers/acpi/Kconfig | 9 + > drivers/acpi/Makefile | 1 + > drivers/acpi/phat.c | 270 ++++++++++++++++++ > include/acpi/actbl2.h | 18 ++ > 5 files changed, 302 insertions(+) > create mode 100644 drivers/acpi/phat.c > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 722b6eca2e93..33b932302ece 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -4490,6 +4490,10 @@ > allocator. This parameter is primarily for debugging > and performance comparison. > > + phat_disable= [ACPI] > + Disable PHAT table parsing and logging of Firmware > + Version and Health Data records. > + > pirq= [SMP,APIC] Manual mp-table setup > See Documentation/arch/x86/i386/IO-APIC.rst. > > diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig > index 00dd309b6682..06a7dd6e5a40 100644 > --- a/drivers/acpi/Kconfig > +++ b/drivers/acpi/Kconfig > @@ -96,6 +96,15 @@ config ACPI_FPDT > This table provides information on the timing of the system > boot, S3 suspend and S3 resume firmware code paths. > > +config ACPI_PHAT > + bool "ACPI Platform Health Assessment Table (PHAT) support" > + depends on X86_64 || ARM64 > + help > + Enable support for Platform Health Assessment Table (PHAT). > + This table exposes an extensible set of platform health > + related telemetry through Firmware Version and Firmware Health > + Data Records. > + > config ACPI_LPIT > bool > depends on X86_64 > diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile > index 3fc5a0d54f6e..93a4ec57ba6d 100644 > --- a/drivers/acpi/Makefile > +++ b/drivers/acpi/Makefile > @@ -69,6 +69,7 @@ acpi-$(CONFIG_ACPI_WATCHDOG) += acpi_watchdog.o > acpi-$(CONFIG_ACPI_PRMT) += prmt.o > acpi-$(CONFIG_ACPI_PCC) += acpi_pcc.o > acpi-$(CONFIG_ACPI_FFH) += acpi_ffh.o > +acpi-$(CONFIG_ACPI_PHAT) += phat.o > > # Address translation > acpi-$(CONFIG_ACPI_ADXL) += acpi_adxl.o > diff --git a/drivers/acpi/phat.c b/drivers/acpi/phat.c > new file mode 100644 > index 000000000000..6006dd7615fa > --- /dev/null > +++ b/drivers/acpi/phat.c > @@ -0,0 +1,270 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Platform Health Assessment Table (PHAT) support > + * > + * Copyright (C) 2023 Advanced Micro Devices, Inc. > + * > + * Author: Avadhut Naik <avadhut.naik@xxxxxxx> > + * > + * This file implements parsing of the Platform Health Assessment Table > + * through which a platform can expose an extensible set of platform > + * health related telemetry. The telemetry is exposed through Firmware > + * Version Data Records and Firmware Health Data Records. Additionally, > + * a platform, through system firmware, also exposes Reset Reason Health > + * Record to inform the operating system of the cause of last system > + * reset or boot. > + * > + * For more information on PHAT, please refer to ACPI specification > + * version 6.5, section 5.2.31 > + */ > + > +#include <linux/acpi.h> > + > +static int phat_disable __initdata; > +static const char *prefix = "ACPI PHAT: "; Wouldn't it be better if you used pr_fmt macro instead ? > + > +/* Reset Reason Health Record GUID */ > +static const guid_t reset_guid = > + GUID_INIT(0x7a014ce2, 0xf263, 0x4b77, > + 0xb8, 0x8a, 0xe6, 0x33, 0x6b, 0x78, 0x2c, 0x14); > + > +static struct { u8 mask; const char *str; } const reset_sources[] = { > + {BIT(0), "Unknown source"}, > + {BIT(1), "Hardware Source"}, > + {BIT(2), "Firmware Source"}, > + {BIT(3), "Software initiated reset"}, > + {BIT(4), "Supervisor initiated reset"}, > +}; > + > +static struct { u8 val; const char *str; } const reset_reasons[] = { > + {0, "UNKNOWN"}, > + {1, "COLD BOOT"}, > + {2, "COLD RESET"}, > + {3, "WARM RESET"}, > + {4, "UPDATE"}, > + {32, "UNEXPECTED RESET"}, > + {33, "FAULT"}, > + {34, "TIMEOUT"}, > + {35, "THERMAL"}, > + {36, "POWER LOSS"}, > + {37, "POWER BUTTON"}, > +}; > + > +/* > + * Print the last PHAT Version Element associated with a Firmware > + * Version Data Record. > + * Firmware Version Data Record consists of an array of PHAT Version > + * Elements with each entry in the array representing a modification > + * undertaken on a given platform component. > + * In the event the array has multiple entries, minimize logs on the > + * console and print only the last version element since it denotes > + * the currently running instance of the component. > + */ > +static int phat_version_data_parse(const char *pfx, > + struct acpi_phat_version_data *version) > +{ > + char newpfx[64]; > + u32 num_elems = version->element_count - 1; > + struct acpi_phat_version_element *element; > + int offset = sizeof(struct acpi_phat_version_data); > + > + if (!version->element_count) { > + pr_info("%sNo PHAT Version Elements found.\n", prefix); > + return 0; > + } > + > + offset += num_elems * sizeof(struct acpi_phat_version_element); > + element = (void *)version + offset; > + > + pr_info("%sPHAT Version Element:\n", pfx); > + snprintf(newpfx, sizeof(newpfx), "%s ", pfx); > + pr_info("%sComponent ID: %pUl\n", newpfx, element->guid); > + pr_info("%sVersion: 0x%llx\n", newpfx, element->version_value); > + snprintf(newpfx, sizeof(newpfx), KERN_INFO "%s ", pfx); > + print_hex_dump(newpfx, "Producer ID: ", DUMP_PREFIX_NONE, 16, 4, > + &element->producer_id, sizeof(element->producer_id), true); I do have to admit that all this dancing with pfx and newpfx confuses me. Couldn't you just use pr_fmt for everything printed using pr_* family of functions ? print_hex_dump() is not impacted by pr_fmt, as it just uses printk to do it's printing. > + > + return 0; > +} > + > +/* > + * Print the Reset Reason Health Record > + */ > +static int phat_reset_reason_parse(const char *pfx, > + struct acpi_phat_health_data *record) > +{ > + int idx; > + void *data; > + u32 data_len; > + char newpfx[64]; > + struct acpi_phat_reset_reason *rr; > + struct acpi_phat_vendor_reset_data *vdata; > + > + rr = (void *)record + record->device_specific_offset; > + > + for (idx = 0; idx < ARRAY_SIZE(reset_sources); idx++) { > + if (!rr->reset_source) { > + pr_info("%sUnknown Reset Source.\n", pfx); > + break; > + } > + if (rr->reset_source & reset_sources[idx].mask) { > + pr_info("%sReset Source: 0x%x\t%s\n", pfx, reset_sources[idx].mask, > + reset_sources[idx].str); > + /* According to ACPI v6.5 Table 5.168, Sub-Source is > + * defined only for Software initiated reset. > + */ > + if (idx == 0x3 && rr->reset_sub_source) > + pr_info("%sReset Sub-Source: %s\n", pfx, > + rr->reset_sub_source == 0x1 ? > + "Operating System" : "Hypervisor"); > + break; > + } > + } > + > + for (idx = 0; idx < ARRAY_SIZE(reset_reasons); idx++) { > + if (rr->reset_reason == reset_reasons[idx].val) { > + pr_info("%sReset Reason: 0x%x\t%s\n", pfx, reset_reasons[idx].val, > + reset_reasons[idx].str); > + break; > + } > + } > + > + if (!rr->vendor_count) > + return 0; > + > + pr_info("%sReset Reason Vendor Data:\n", pfx); > + vdata = (void *)rr + sizeof(*rr); > + > + for (idx = 0; idx < rr->vendor_count; idx++) { > + snprintf(newpfx, sizeof(newpfx), "%s ", pfx); > + data_len = vdata->length - sizeof(*vdata); > + data = (void *)vdata + sizeof(*vdata); > + pr_info("%sVendor Data ID: %pUl\n", newpfx, vdata->vendor_id); > + pr_info("%sRevision: 0x%x\n", newpfx, vdata->revision); > + snprintf(newpfx, sizeof(newpfx), KERN_INFO "%s ", pfx); > + print_hex_dump(newpfx, "Data: ", DUMP_PREFIX_NONE, 16, 4, > + data, data_len, false); > + vdata = (void *)vdata + vdata->length; > + } > + > + return 0; > +} > + > +/* > + * Print the Firmware Health Data Record. > + */ > +static int phat_health_data_parse(const char *pfx, > + struct acpi_phat_health_data *record) > +{ > + void *data; > + u32 data_len; > + char newpfx[64]; > + > + pr_info("%sHealth Records.\n", pfx); > + snprintf(newpfx, sizeof(newpfx), "%s ", pfx); > + pr_info("%sDevice Signature: %pUl\n", newpfx, record->device_guid); > + > + switch (record->health) { > + case ACPI_PHAT_ERRORS_FOUND: > + pr_info("%sAmHealthy: Errors found\n", newpfx); > + break; > + case ACPI_PHAT_NO_ERRORS: > + pr_info("%sAmHealthy: No errors found.\n", newpfx); > + break; > + case ACPI_PHAT_UNKNOWN_ERRORS: > + pr_info("%sAmHealthy: Unknown.\n", newpfx); > + break; > + case ACPI_PHAT_ADVISORY: > + pr_info("%sAmHealthy: Advisory – additional device-specific data exposed.\n", > + newpfx); > + break; > + default: > + break; > + } > + > + if (!record->device_specific_offset) > + return 0; > + > + /* Reset Reason Health Record has a unique GUID and is created as > + * a Health Record in the PHAT table. Check if this Health Record > + * is a Reset Reason Health Record. > + */ > + if (guid_equal((guid_t *)record->device_guid, &reset_guid)) { > + phat_reset_reason_parse(newpfx, record); > + return 0; > + } > + > + data = (void *)record + record->device_specific_offset; > + data_len = record->header.length - record->device_specific_offset; > + snprintf(newpfx, sizeof(newpfx), KERN_INFO "%s ", pfx); > + print_hex_dump(newpfx, "Device Data: ", DUMP_PREFIX_NONE, 16, 4, > + data, data_len, false); > + > + return 0; > +} > + > +static int parse_phat_table(const char *pfx, struct acpi_table_phat *phat_tab) > +{ > + char newpfx[64]; > + u32 offset = sizeof(*phat_tab); > + struct acpi_phat_header *phat_header; > + > + snprintf(newpfx, sizeof(newpfx), "%s ", pfx); > + > + while (offset < phat_tab->header.length) { > + phat_header = (void *)phat_tab + offset; > + switch (phat_header->type) { > + case ACPI_PHAT_TYPE_FW_VERSION_DATA: > + phat_version_data_parse(newpfx, (struct acpi_phat_version_data *) > + phat_header); > + break; > + case ACPI_PHAT_TYPE_FW_HEALTH_DATA: > + phat_health_data_parse(newpfx, (struct acpi_phat_health_data *) > + phat_header); > + break; > + default: > + break; > + } > + offset += phat_header->length; > + } > + return 0; > +} > + > +static int __init setup_phat_disable(char *str) > +{ > + phat_disable = 1; > + return 1; > +} > +__setup("phat_disable", setup_phat_disable); > + > +static int __init acpi_phat_init(void) > +{ > + acpi_status status; > + struct acpi_table_phat *phat_tab; > + > + if (acpi_disabled) > + return 0; > + > + if (phat_disable) { > + pr_err("%sPHAT support has been disabled.\n", prefix); > + return 0; > + } > + > + status = acpi_get_table(ACPI_SIG_PHAT, 0, > + (struct acpi_table_header **)&phat_tab); > + > + if (status == AE_NOT_FOUND) { > + pr_info("%sPHAT Table not found.\n", prefix); > + return 0; > + } else if (ACPI_FAILURE(status)) { > + pr_err("%sFailed to get PHAT Table: %s.\n", prefix, > + acpi_format_exception(status)); > + return -EINVAL; > + } > + > + pr_info("%sPlatform Telemetry Records.\n", prefix); > + parse_phat_table(prefix, phat_tab); So for now you're only dumping tables to the dmesg output ? Are you planning to create some sysfs interfaces similar to let's say EINJ ? > + > + return 0; > +} > +late_initcall(acpi_phat_init); > diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h > index 0029336775a9..c263893cbc7f 100644 > --- a/include/acpi/actbl2.h > +++ b/include/acpi/actbl2.h > @@ -2360,6 +2360,24 @@ struct acpi_phat_health_data { > #define ACPI_PHAT_UNKNOWN_ERRORS 2 > #define ACPI_PHAT_ADVISORY 3 > > +/* Reset Reason Health Record Structure */ > + > +struct acpi_phat_reset_reason { > + u8 supported_reset_sources; > + u8 reset_source; > + u8 reset_sub_source; > + u8 reset_reason; > + u16 vendor_count; > +}; > + > +/* Reset Reason Health Record Vendor Data Entry */ > + > +struct acpi_phat_vendor_reset_data { > + u8 vendor_id[16]; > + u16 length; > + u16 revision; > +}; > + > /******************************************************************************* > * > * PMTT - Platform Memory Topology Table (ACPI 5.0)