On 2017/8/18 22:23, Dongjiu Geng wrote: > This implements APEI GHES Table by passing the error CPER info > to the guest via a fw_cfg_blob. After a CPER info is recorded, an > SEA(Synchronous External Abort)/SEI(SError Interrupt) exception > will be injected into the guest OS. > > Below is the table layout, the max number of error soure is 11, > which is classified by notification type. > > etc/acpi/tables etc/hardware_errors > ==================== ========================================== > + +--------------------------+ +------------------+ > | | HEST | | address | +--------------+ > | +--------------------------+ | registers | | Error Status | > | | GHES0 | | +----------------+ | Data Block 0 | > | +--------------------------+ +--------->| |status_address0 |------------->| +------------+ > | | ................. | | | +----------------+ | | CPER | > | | error_status_address-----+-+ +------->| |status_address1 |----------+ | | CPER | > | | ................. | | | +----------------+ | | | .... | > | | read_ack_register--------+-+ | | ............. | | | | CPER | > | | read_ack_preserve | | | +------------------+ | | +-+------------+ > | | read_ack_write | | | +----->| |status_address10|--------+ | | Error Status | > + +--------------------------+ | | | | +----------------+ | | | Data Block 1 | > | | GHES1 | +-+-+----->| | ack_value0 | | +-->| +------------+ > + +--------------------------+ | | | +----------------+ | | | CPER | > | | ................. | | | +--->| | ack_value1 | | | | CPER | > | | error_status_address-----+---+ | | | +----------------+ | | | .... | > | | ................. | | | | | ............. | | | | CPER | > | | read_ack_register--------+-----+-+ | +----------------+ | +-+------------+ > | | read_ack_preserve | | +->| | ack_value10 | | | |.......... | > | | read_ack_write | | | | +----------------+ | | +------------+ > + +--------------------------| | | | | Error Status | > | | ............... | | | | | Data Block 10| > + +--------------------------+ | | +---->| +------------+ > | | GHES10 | | | | | CPER | > + +--------------------------+ | | | | CPER | > | | ................. | | | | | .... | > | | error_status_address-----+-----+ | | | CPER | > | | ................. | | +-+------------+ > | | read_ack_register--------+---------+ > | | read_ack_preserve | > | | read_ack_write | > + +--------------------------+ > > For GHESv2 error source, the OSPM must acknowledges the error via Read Ack register. > so user space must check the ack value to avoid read-write race condition. > > Signed-off-by: Dongjiu Geng <gengdongjiu@xxxxxxxxxx> > --- > hw/acpi/aml-build.c | 2 + > hw/acpi/hest_ghes.c | 345 ++++++++++++++++++++++++++++++++++++++++++++ > hw/arm/virt-acpi-build.c | 6 + > include/hw/acpi/aml-build.h | 1 + > include/hw/acpi/hest_ghes.h | 47 ++++++ > 5 files changed, 401 insertions(+) > create mode 100644 hw/acpi/hest_ghes.c Don't need to add the new file to hw/acpi/Makefile.objs? > create mode 100644 include/hw/acpi/hest_ghes.h > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > index 36a6cc4..6849e5f 100644 > --- a/hw/acpi/aml-build.c > +++ b/hw/acpi/aml-build.c > @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables) > tables->table_data = g_array_new(false, true /* clear */, 1); > tables->tcpalog = g_array_new(false, true /* clear */, 1); > tables->vmgenid = g_array_new(false, true /* clear */, 1); > + tables->hardware_errors = g_array_new(false, true /* clear */, 1); > tables->linker = bios_linker_loader_init(); > } > > @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre) > g_array_free(tables->table_data, true); > g_array_free(tables->tcpalog, mfre); > g_array_free(tables->vmgenid, mfre); > + g_array_free(tables->hardware_errors, mfre); > } > > /* Build rsdt table */ > diff --git a/hw/acpi/hest_ghes.c b/hw/acpi/hest_ghes.c > new file mode 100644 > index 0000000..ff6b5ef > --- /dev/null > +++ b/hw/acpi/hest_ghes.c > @@ -0,0 +1,345 @@ > +/* > + * APEI GHES table Generation > + * > + * Copyright (C) 2017 huawei. > + * > + * Author: Dongjiu Geng <gengdongjiu@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ Please unify this of this file and hest_ghes.h by refering to other files. > + > +#include "qemu/osdep.h" > +#include "qmp-commands.h" unnecessary including > +#include "hw/acpi/acpi.h" > +#include "hw/acpi/aml-build.h" > +#include "hw/acpi/hest_ghes.h" > +#include "hw/nvram/fw_cfg.h" > +#include "sysemu/sysemu.h" > +#include "qemu/error-report.h" > + > +/* The structure that stands for the layout > + * GHES_ERRORS_FW_CFG_FILE fw_cfg blob > + * > + * etc/hardware_errors > + * ========================================== > + * +------------------+ > + * | address | +--------------+ > + * | registers | | Error Status | > + * | +----------------+ | Data Block 0 | > + * | |status_address0 |------------->| +------------+ > + * | +----------------+ | | CPER | > + * | |status_address1 |----------+ | | CPER | > + * | +----------------+ | | | .... | > + * | |............. | | | | CPER | > + * | +----------------+ | | +------------+ > + * | |status_address10|-----+ | | Error Status | > + * | +----------------+ | | | Data Block 1 | > + * | |ack_value0 | | +-->| +------------+ > + * | +----------------+ | | | CPER | > + * | |ack_value1 | | | | CPER | > + * | +----------------+ | | | .... | > + * | | ............. | | | | CPER | > + * | +----------------+ | +-+------------+ > + * | |ack_value10 | | | |.......... | > + * | +----------------+ | | +------------+ > + * | | Error Status | > + * | | Data Block10 | > + * +------->+------------+ > + * | | CPER | > + * | | CPER | > + * | | .... | > + * | | CPER | > + * +-+------------+ > + */ > +struct hardware_errors_buffer { > + /* Generic Error Status Block register */ > + uint64_t gesb_address[GHES_ACPI_HEST_NOTIFY_RESERVED]; > + uint64_t ack_value[GHES_ACPI_HEST_NOTIFY_RESERVED]; > + char gesb[GHES_MAX_RAW_DATA_LENGTH][GHES_ACPI_HEST_NOTIFY_RESERVED]; > +}; > + > +static int ghes_record_cper(uint64_t error_block_address, > + uint64_t error_physical_addr) > +{ > + AcpiGenericErrorStatus block; > + AcpiGenericErrorData *gdata; > + UefiCperSecMemErr *mem_err; > + uint64_t current_block_length; > + unsigned char *buffer; > + /* memory section */ > + char mem_section_id_le[] = {0x14, 0x11, 0xBC, 0xA5, 0x64, 0x6F, 0xDE, > + 0x4E, 0xB8, 0x63, 0x3E, 0x83, 0xED, 0x7C, > + 0x83, 0xB1}; > + > + cpu_physical_memory_read(error_block_address, &block, > + sizeof(AcpiGenericErrorStatus)); > + > + /* Get the current generic error status block length */ > + current_block_length = sizeof(AcpiGenericErrorStatus) + > + le32_to_cpu(block.data_length); > + > + /* If the Generic Error Status Block is NULL, update > + * the block header > + */ > + if (!block.block_status) { > + block.block_status = ACPI_GEBS_UNCORRECTABLE; > + block.error_severity = ACPI_CPER_SEV_RECOVERABLE; > + } > + > + block.data_length += cpu_to_le32(sizeof(AcpiGenericErrorData)); > + block.data_length += cpu_to_le32(sizeof(UefiCperSecMemErr)); > + > + /* check whether it runs out of the preallocated memory */ > + if ((le32_to_cpu(block.data_length) + sizeof(AcpiGenericErrorStatus)) > > + GHES_MAX_RAW_DATA_LENGTH) { > + error_report("Record CPER out of boundary!!!"); > + return GHES_CPER_FAIL; > + } > + > + /* Write back the Generic Error Status Block to guest memory */ > + cpu_physical_memory_write(error_block_address, &block, > + sizeof(AcpiGenericErrorStatus)); > + > + /* Fill in Generic Error Data Entry */ > + buffer = g_malloc0(sizeof(AcpiGenericErrorData) + > + sizeof(UefiCperSecMemErr)); > + > + > + memset(buffer, 0, sizeof(AcpiGenericErrorData) + sizeof(UefiCperSecMemErr)); > + gdata = (AcpiGenericErrorData *)buffer; > + > + /* Memory section */ > + memcpy(&(gdata->section_type_le), &mem_section_id_le, > + sizeof(mem_section_id_le)); > + > + /* error severity is recoverable */ > + gdata->error_severity = ACPI_CPER_SEV_RECOVERABLE; > + gdata->revision = 0x300; /* the revision number is 0x300 */ > + gdata->error_data_length = cpu_to_le32(sizeof(UefiCperSecMemErr)); > + > + mem_err = (UefiCperSecMemErr *) (gdata + 1); > + > + /* User space only handle the memory section CPER */ > + > + /* Hard code to Multi-bit ECC error */ > + mem_err->validation_bits |= cpu_to_le32(UEFI_CPER_MEM_VALID_ERROR_TYPE); > + mem_err->error_type = cpu_to_le32(UEFI_CPER_MEM_ERROR_TYPE_MULTI_ECC); > + > + /* Record the physical address at which the memory error occurred */ > + mem_err->validation_bits |= cpu_to_le32(UEFI_CPER_MEM_VALID_PA); > + mem_err->physical_addr = cpu_to_le32(error_physical_addr); > + > + /* Write back the Generic Error Data Entry to guest memory */ > + cpu_physical_memory_write(error_block_address + current_block_length, > + buffer, sizeof(AcpiGenericErrorData) + sizeof(UefiCperSecMemErr)); > + > + g_free(buffer); > + return GHES_CPER_OK; > +} > + > +static void > +build_address(GArray *table_data, BIOSLinker *linker, > + uint32_t dst_patched_offset, uint32_t src_offset, > + uint8_t address_space_id , uint8_t register_bit_width, > + uint8_t register_bit_offset, uint8_t access_size) > +{ > + uint32_t address_size = sizeof(struct AcpiGenericAddress) - > + offsetof(struct AcpiGenericAddress, address); > + > + /* Address space */ > + build_append_int_noprefix(table_data, address_space_id, 1); > + /* register bit width */ > + build_append_int_noprefix(table_data, register_bit_width, 1); > + /* register bit offset */ > + build_append_int_noprefix(table_data, register_bit_offset, 1); > + /* access size */ > + build_append_int_noprefix(table_data, access_size, 1); > + acpi_data_push(table_data, address_size); > + > + /* Patch address of ERRORS fw_cfg blob into the TABLE fw_cfg blob so OSPM > + * can retrieve and read it. the address size is 64 bits. > + */ > + bios_linker_loader_add_pointer(linker, > + ACPI_BUILD_TABLE_FILE, dst_patched_offset, sizeof(uint64_t), > + GHES_ERRORS_FW_CFG_FILE, src_offset); > +} > + > +void ghes_build_acpi(GArray *table_data, GArray *hardware_error, > + BIOSLinker *linker) > +{ > + uint32_t ghes_start = table_data->len; > + uint32_t address_size, error_status_address_offset; > + uint32_t read_ack_register_offset, i; > + > + address_size = sizeof(struct AcpiGenericAddress) - > + offsetof(struct AcpiGenericAddress, address); > + > + error_status_address_offset = ghes_start + > + sizeof(AcpiHardwareErrorSourceTable) + > + offsetof(AcpiGenericHardwareErrorSourceV2, error_status_address) + > + offsetof(struct AcpiGenericAddress, address); > + > + read_ack_register_offset = ghes_start + > + sizeof(AcpiHardwareErrorSourceTable) + > + offsetof(AcpiGenericHardwareErrorSourceV2, read_ack_register) + > + offsetof(struct AcpiGenericAddress, address); > + > + acpi_data_push(hardware_error, > + offsetof(struct hardware_errors_buffer, ack_value)); > + for (i = 0; i < GHES_ACPI_HEST_NOTIFY_RESERVED; i++) > + /* Initialize read ack register */ > + build_append_int_noprefix((void *)hardware_error, 1, 8); > + > + /* Reserved the total size for ERRORS fw_cfg blob > + */ > + acpi_data_push(hardware_error, sizeof(struct hardware_errors_buffer)); > + > + /* Allocate guest memory for the Data fw_cfg blob */ > + bios_linker_loader_alloc(linker, GHES_ERRORS_FW_CFG_FILE, hardware_error, > + 1, false); > + /* Reserve table header size */ > + acpi_data_push(table_data, sizeof(AcpiTableHeader)); > + > + build_append_int_noprefix(table_data, GHES_ACPI_HEST_NOTIFY_RESERVED, 4); > + > + for (i = 0; i < GHES_ACPI_HEST_NOTIFY_RESERVED; i++) { > + build_append_int_noprefix(table_data, > + ACPI_HEST_SOURCE_GENERIC_ERROR_V2, 2); /* type */ > + /* source id */ > + build_append_int_noprefix(table_data, cpu_to_le16(i), 2); > + /* related source id */ > + build_append_int_noprefix(table_data, 0xffff, 2); > + build_append_int_noprefix(table_data, 0, 1); /* flags */ > + > + /* Currently only enable SEA notification type to avoid the kernel > + * warning, reserve the space for other notification error source > + */ > + if (i == ACPI_HEST_NOTIFY_SEA) { > + build_append_int_noprefix(table_data, 1, 1); /* enabled */ > + } else { > + build_append_int_noprefix(table_data, 0, 1); /* enabled */ > + } > + > + /* The number of error status block per generic hardware error source */ > + build_append_int_noprefix(table_data, 1, 4); > + /* Max sections per record */ > + build_append_int_noprefix(table_data, 1, 4); > + /* Max raw data length */ > + build_append_int_noprefix(table_data, GHES_MAX_RAW_DATA_LENGTH, 4); > + > + /* Build error status address*/ > + build_address(table_data, linker, error_status_address_offset + i * > + sizeof(AcpiGenericHardwareErrorSourceV2), i * address_size, > + AML_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */); > + > + /* Hardware error notification structure */ > + build_append_int_noprefix(table_data, i, 1); /* type */ > + /* length */ > + build_append_int_noprefix(table_data, sizeof(AcpiHestNotify), 1); > + build_append_int_noprefix(table_data, 0, 26); > + > + /* Error Status Block Length */ > + build_append_int_noprefix(table_data, > + cpu_to_le32(GHES_MAX_RAW_DATA_LENGTH), 4); > + > + /* Build read ack register */ > + build_address(table_data, linker, read_ack_register_offset + i * > + sizeof(AcpiGenericHardwareErrorSourceV2), > + offsetof(struct hardware_errors_buffer, ack_value) + > + i * address_size, AML_SYSTEM_MEMORY, 0x40, 0, > + 4 /* QWord access */); > + > + /* Read ack preserve */ > + build_append_int_noprefix(table_data, cpu_to_le64(0xfffffffe), 8); > + > + /* Read ack write */ > + build_append_int_noprefix(table_data, cpu_to_le64(0x1), 8); > + } > + > + for (i = 0; i < GHES_ACPI_HEST_NOTIFY_RESERVED; i++) > + /* Patch address of generic error status block into > + * the address register so OSPM can retrieve and read it. > + */ > + bios_linker_loader_add_pointer(linker, > + GHES_ERRORS_FW_CFG_FILE, address_size * i, address_size, > + GHES_ERRORS_FW_CFG_FILE, > + offsetof(struct hardware_errors_buffer, gesb) + > + i * GHES_MAX_RAW_DATA_LENGTH); > + > + /* Patch address of ERRORS fw_cfg blob into the ADDR fw_cfg blob > + * so QEMU can write the ERRORS there. The address is expected to be > + * < 4GB, but write 64 bits anyway. > + */ > + bios_linker_loader_write_pointer(linker, GHES_DATA_ADDR_FW_CFG_FILE, > + 0, address_size, GHES_ERRORS_FW_CFG_FILE, > + offsetof(struct hardware_errors_buffer, gesb)); > + > + build_header(linker, table_data, > + (void *)(table_data->data + ghes_start), "HEST", > + table_data->len - ghes_start, 1, NULL, "GHES"); > +} > + > +static GhesState ges; > +void ghes_add_fw_cfg(FWCfgState *s, GArray *hardware_error) > +{ > + > + size_t request_block_size = sizeof(uint64_t) + GHES_MAX_RAW_DATA_LENGTH; > + size_t size = GHES_ACPI_HEST_NOTIFY_RESERVED * request_block_size; > + > + /* Create a read-only fw_cfg file for GHES */ > + fw_cfg_add_file(s, GHES_ERRORS_FW_CFG_FILE, hardware_error->data, > + size); > + /* Create a read-write fw_cfg file for Address */ > + fw_cfg_add_file_callback(s, GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, > + &ges.ghes_addr_le, sizeof(ges.ghes_addr_le), false); > +} > + > +bool ghes_update_guest(uint32_t notify, uint64_t physical_address) > +{ > + uint64_t error_block_addr; > + uint64_t ack_value_addr, ack_value = 0; > + int loop = 0, ack_value_size; > + bool ret = GHES_CPER_FAIL; > + > + ack_value_size = (offsetof(struct hardware_errors_buffer, gesb) - > + offsetof(struct hardware_errors_buffer, ack_value)) / > + GHES_ACPI_HEST_NOTIFY_RESERVED; > + > + if (physical_address && notify < GHES_ACPI_HEST_NOTIFY_RESERVED) { > + error_block_addr = ges.ghes_addr_le + notify * GHES_MAX_RAW_DATA_LENGTH; > + error_block_addr = le32_to_cpu(error_block_addr); > + > + ack_value_addr = ges.ghes_addr_le - > + (GHES_ACPI_HEST_NOTIFY_RESERVED - notify) * ack_value_size; > +retry: > + cpu_physical_memory_read(ack_value_addr, &ack_value, ack_value_size); > + if (!ack_value) { > + if (loop < 3) { > + usleep(100 * 1000); > + loop++; > + goto retry; > + } else { > + error_report("Last time OSPM does not acknowledge the error," > + " record CPER failed this time, set the ack value to" > + " avoid blocking next time CPER record! exit"); > + ack_value = 1; > + cpu_physical_memory_write(ack_value_addr, > + &ack_value, ack_value_size); > + return ret; > + } > + } else { > + /* A zero value in ghes_addr means that BIOS has not yet written > + * the address > + */ > + if (error_block_addr) { > + ack_value = 0; > + cpu_physical_memory_write(ack_value_addr, > + &ack_value, ack_value_size); > + ret = ghes_record_cper(error_block_addr, physical_address); > + } > + } > + } > + return ret; > +} > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c > index 3d78ff6..def1ec1 100644 > --- a/hw/arm/virt-acpi-build.c > +++ b/hw/arm/virt-acpi-build.c > @@ -45,6 +45,7 @@ > #include "hw/arm/virt.h" > #include "sysemu/numa.h" > #include "kvm_arm.h" > +#include "hw/acpi/hest_ghes.h" > > #define ARM_SPI_BASE 32 > #define ACPI_POWER_BUTTON_DEVICE "PWRB" > @@ -771,6 +772,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) > acpi_add_table(table_offsets, tables_blob); > build_spcr(tables_blob, tables->linker, vms); > > + acpi_add_table(table_offsets, tables_blob); > + ghes_build_acpi(tables_blob, tables->hardware_errors, tables->linker); > + So we add this table unconditionally. Is there any bad impact if QEMU runs on old kvm? Does it need to check whether KVM supports RAS? > if (nb_numa_nodes > 0) { > acpi_add_table(table_offsets, tables_blob); > build_srat(tables_blob, tables->linker, vms); > @@ -887,6 +891,8 @@ void virt_acpi_setup(VirtMachineState *vms) > fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, > acpi_data_len(tables.tcpalog)); > > + ghes_add_fw_cfg(vms->fw_cfg, tables.hardware_errors); > + > build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp, > ACPI_BUILD_RSDP_FILE, 0); > > diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h > index 88d0738..7f7b55c 100644 > --- a/include/hw/acpi/aml-build.h > +++ b/include/hw/acpi/aml-build.h > @@ -211,6 +211,7 @@ struct AcpiBuildTables { > GArray *rsdp; > GArray *tcpalog; > GArray *vmgenid; > + GArray *hardware_errors; > BIOSLinker *linker; > } AcpiBuildTables; > > diff --git a/include/hw/acpi/hest_ghes.h b/include/hw/acpi/hest_ghes.h > new file mode 100644 > index 0000000..0772756 > --- /dev/null > +++ b/include/hw/acpi/hest_ghes.h > @@ -0,0 +1,47 @@ > +/* > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * Authors: > + * Dongjiu Geng <gengdongjiu@xxxxxxxxxx> > + * > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef ACPI_GHES_H > +#define ACPI_GHES_H > + > +#include "hw/acpi/bios-linker-loader.h" > + > +#define GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors" > +#define GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" > + > +#define GHES_GAS_ADDRESS_OFFSET 4 > +#define GHES_ERROR_STATUS_ADDRESS_OFFSET 20 > +#define GHES_NOTIFICATION_STRUCTURE 32 > + > +#define GHES_CPER_OK 1 > +#define GHES_CPER_FAIL 0 > + > +#define GHES_ACPI_HEST_NOTIFY_RESERVED 11 > +/* The max size in Bytes for one error block */ > +#define GHES_MAX_RAW_DATA_LENGTH 0x1000 > + > + > +typedef struct GhesState { > + uint64_t ghes_addr_le; > +} GhesState; > + > +void ghes_build_acpi(GArray *table_data, GArray *hardware_error, > + BIOSLinker *linker); > +void ghes_add_fw_cfg(FWCfgState *s, GArray *hardware_errors); > +bool ghes_update_guest(uint32_t notify, uint64_t error_physical_addr); > +#endif > -- Shannon