> > On Tue, 14 May 2019 04:18:23 -0700 > Dongjiu Geng <gengdongjiu@xxxxxxxxxx> wrote: > > > Add SIGBUS signal handler. In this handler, it checks the SIGBUS type, > > translates the host VA delivered by host to guest PA, then fill this > > PA to guest APEI GHES memory, then notify guest according to the SIGBUS type. > > > > If guest accesses the poisoned memory, it generates Synchronous > > External Abort(SEA). Then host kernel gets an APEI notification and > > call memory_failure() to unmapped the affected page for the guest's > > stage 2, finally return to guest. > > > > Guest continues to access PG_hwpoison page, it will trap to KVM as > > stage2 fault, then a SIGBUS_MCEERR_AR synchronous signal is delivered > > to Qemu, Qemu record this error address into guest APEI GHES memory > > and notify guest using Synchronous-External-Abort(SEA). > > > > Suggested-by: James Morse <james.morse@xxxxxxx> > > Signed-off-by: Dongjiu Geng <gengdongjiu@xxxxxxxxxx> > Hi Dongjiu, > > Good to see this moving forwards again. > > A few really minor things inline. Jonathan, It is good to see your comments, thanks for the review. > > Thanks, > > Jonathan > > > --- > > hw/acpi/acpi_ghes.c | 177 ++++++++++++++++++++++++++++++++++++++++++++ > > include/hw/acpi/acpi_ghes.h | 6 +- > > include/sysemu/kvm.h | 2 +- > > target/arm/kvm64.c | 39 ++++++++++ > > 4 files changed, 222 insertions(+), 2 deletions(-) > > > > diff --git a/hw/acpi/acpi_ghes.c b/hw/acpi/acpi_ghes.c index > > d03e797..06b7374 100644 > > --- a/hw/acpi/acpi_ghes.c > > +++ b/hw/acpi/acpi_ghes.c > > @@ -26,6 +26,101 @@ > > #include "sysemu/sysemu.h" > > #include "qemu/error-report.h" > > > > +/* UEFI 2.6: N.2.5 Memory Error Section */ static void > > +build_append_mem_cper(GArray *table, uint64_t error_physical_addr) { > > + /* > > + * Memory Error Record > > + */ > > + build_append_int_noprefix(table, > > + (1UL << 14) | /* Type Valid */ > > + (1UL << 1) /* Physical Address Valid */, > > + 8); > > + /* Memory error status information */ > > + build_append_int_noprefix(table, 0, 8); > > + /* The physical address at which the memory error occurred */ > > + build_append_int_noprefix(table, error_physical_addr, 8); > > + build_append_int_noprefix(table, 0, 48); > > This could do with a comment to say we are basically skipping all the detailed information normally found in such a record. Ok, it is good to have such a comment, I will add it. > > > > + build_append_int_noprefix(table, 0 /* Unknown error */, 1); > > + build_append_int_noprefix(table, 0, 7); > A similar comment for this last section would probably use useful as well. Ok > > > +} > > + > > +static int ghes_record_mem_error(uint64_t error_block_address, > > + uint64_t error_physical_addr) { > > + GArray *block; > > + uint64_t current_block_length; > > + uint32_t data_length; > > + /* Memory section */ > The variable name is clear I think, so not sure this comment adds any information. Ok, I will remove this comment since the variable name can tell the meaning. > > > + char mem_section_id_le[] = {0x14, 0x11, 0xBC, 0xA5, 0x64, 0x6F, 0xDE, > > + 0x4E, 0xB8, 0x63, 0x3E, 0x83, 0xED, 0x7C, > > + 0x83, 0xB1}; > > + uint8_t fru_id[16] = {0}; > > + uint8_t fru_text[20] = {0}; > > + > > + /* Generic Error Status Block > > + * | +---------------------+ > > + * | | block_status | > > + * | +---------------------+ > > + * | | raw_data_offset | > > + * | +---------------------+ > > + * | | raw_data_length | > > + * | +---------------------+ > > + * | | data_length | > > + * | +---------------------+ > > + * | | error_severity | > > + * | +---------------------+ > > + */ > > + block = g_array_new(false, true /* clear */, 1); > > + > > + /* Get the length of the Generic Error Data Entries */ > > + cpu_physical_memory_read(error_block_address + > > + offsetof(AcpiGenericErrorStatus, data_length), &data_length, > > + 4); > > + > > + /* The current whole length of the generic error status block */ > > + current_block_length = sizeof(AcpiGenericErrorStatus) + > > + le32_to_cpu(data_length); > > + > > + /* This is the length if adding a new generic error data entry*/ > > + data_length += GHES_DATA_LENGTH; > > + data_length += GHES_MEM_CPER_LENGTH; > > + > > + /* Check whether it will run out of the preallocated memory if adding a new > > + * generic error data entry > > + */ > > + if ((data_length + sizeof(AcpiGenericErrorStatus)) > GHES_MAX_RAW_DATA_LENGTH) { > > + error_report("Record CPER out of boundary!!!"); > > + return GHES_CPER_FAIL; > > + } > > + > > + /* Build the new generic error status block header */ > > + build_append_ghes_generic_status(block, cpu_to_le32(ACPI_GEBS_UNCORRECTABLE), 0, 0, > > + cpu_to_le32(data_length), > > + cpu_to_le32(ACPI_CPER_SEV_RECOVERABLE)); > > + > > + /* Write back above generic error status block header to guest memory */ > > + cpu_physical_memory_write(error_block_address, block->data, > > + block->len); > > + > > + /* Add a new generic error data entry */ > > + > > + data_length = block->len; > > + /* Build this new generic error data entry header */ > > + build_append_ghes_generic_data(block, mem_section_id_le, > > + cpu_to_le32(ACPI_CPER_SEV_RECOVERABLE), cpu_to_le32(0x300), 0, 0, > > + cpu_to_le32(80)/* the total size of Memory Error > > + Record */, fru_id, > > Use the define for that 80? Ok, I will. > > > + fru_text, 0); > > + > > + /* Build the memory section CPER for above new generic error data entry */ > > + build_append_mem_cper(block, error_physical_addr); > > + > > + /* Write back above this new generic error data entry to guest memory */ > > + cpu_physical_memory_write(error_block_address + current_block_length, > > + block->data + data_length, block->len - > > + data_length); > > + > > + g_array_free(block, true); > > + > > + return GHES_CPER_OK; > > +} > > + > > /* Build table for the hardware error fw_cfg blob */ void > > build_hardware_error_table(GArray *hardware_errors, BIOSLinker > > *linker) { @@ -169,3 +264,85 @@ void ghes_add_fw_cfg(FWCfgState *s, > > GArray *hardware_error) > > fw_cfg_add_file_callback(s, GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, NULL, > > &ges.ghes_addr_le, sizeof(ges.ghes_addr_le), false); } > > + > > +bool ghes_record_errors(uint32_t notify, uint64_t physical_address) { > > + uint64_t error_block_addr, read_ack_register_addr; > > + int read_ack_register = 0, loop = 0; > > + uint64_t start_addr = le32_to_cpu(ges.ghes_addr_le); > > + bool ret = GHES_CPER_FAIL; > > + const uint8_t error_source_id[] = { 0xff, 0xff, 0xff, 0xff, > > + 0xff, 0xff, 0xff, 0, 1}; > > + > > + /* > > + * | +---------------------+ ges.ghes_addr_le > > + * | |error_block_address0 | > > + * | +---------------------+ --+-- > > + * | | ............. | GHES_ADDRESS_SIZE > > + * | +---------------------+ --+-- > > + * | |error_block_addressN | > > + * | +---------------------+ > > + * | | read_ack_register0 | > > + * | +---------------------+ --+-- > > + * | | ............. | GHES_ADDRESS_SIZE > > + * | +---------------------+ --+-- > > + * | | read_ack_registerN | > > + * | +---------------------+ --+-- > > + * | | CPER | | > > + * | | .... | GHES_MAX_RAW_DATA_LENGT > > + * | | CPER | | > > + * | +---------------------+ --+-- > > + * | | .......... | > > + * | +---------------------+ > > + * | | CPER | > > + * | | .... | > > + * | | CPER | > > + * | +---------------------+ > > + */ > > + if (physical_address && notify < ACPI_HEST_NOTIFY_RESERVED) { > > + /* Find and check the source id for this new CPER */ > > + if (error_source_id[notify] != 0xff) { > > + start_addr += error_source_id[notify] * GHES_ADDRESS_SIZE; > > + } else { > > + goto out; > > + } > > + > > + cpu_physical_memory_read(start_addr, &error_block_addr, > > + GHES_ADDRESS_SIZE); > > + > > + read_ack_register_addr = start_addr + > > + ACPI_HEST_ERROR_SOURCE_COUNT * > > +GHES_ADDRESS_SIZE; > > +retry: > > + cpu_physical_memory_read(read_ack_register_addr, > > + &read_ack_register, > > +GHES_ADDRESS_SIZE); > > + > > + /* zero means OSPM does not acknowledge the error */ > > + if (!read_ack_register) { > > + if (loop < 3) { > > + usleep(100 * 1000); > > + loop++; > > + goto retry; > > + } else { > > + error_report("OSPM does not acknowledge previous error," > > + " so can not record CPER for current error, forcibly acknowledge" > > + " previous error to avoid blocking next time CPER record! Exit"); > > + read_ack_register = 1; > > + cpu_physical_memory_write(read_ack_register_addr, > > + &read_ack_register, GHES_ADDRESS_SIZE); > > + } > > + } else { > > + if (error_block_addr) { > > + read_ack_register = 0; > > + /* Clear the Read Ack Register, OSPM will write it to 1 when > > + * acknowledge this error. > > + */ > > + cpu_physical_memory_write(read_ack_register_addr, > > + &read_ack_register, GHES_ADDRESS_SIZE); > > + ret = ghes_record_mem_error(error_block_addr, physical_address); > > + } > > + } > > + } > > + > > +out: > > + return ret; > > +} > > diff --git a/include/hw/acpi/acpi_ghes.h b/include/hw/acpi/acpi_ghes.h > > index 38fd87c..6b38097 100644 > > --- a/include/hw/acpi/acpi_ghes.h > > +++ b/include/hw/acpi/acpi_ghes.h > > @@ -32,11 +32,14 @@ > > #define GHES_ADDRESS_SIZE 8 > > > > #define GHES_DATA_LENGTH 72 > > -#define GHES_CPER_LENGTH 80 > > +#define GHES_MEM_CPER_LENGTH 80 > > This is a good change to make, but please roll it into patch 7 where this was introduced rather than introducing it only to rename later. > > Actually it would be even better to just not introduce it in patch 7 and bring it in for the first time in this patch. Yes, you are right, I will do it. > > > > > #define ReadAckPreserve 0xfffffffe > > #define ReadAckWrite 0x1 > > > > +#define GHES_CPER_OK 1 > > +#define GHES_CPER_FAIL 0 > > + > > /* The max size in bytes for one error block */ > > #define GHES_MAX_RAW_DATA_LENGTH 0x1000 > > /* Now only have GPIO-Signal and ARMv8 SEA notification types error > > sources @@ -76,4 +79,5 @@ void build_apei_hest(GArray *table_data, > > GArray *hardware_error, > > > > void build_hardware_error_table(GArray *hardware_errors, BIOSLinker > > *linker); void ghes_add_fw_cfg(FWCfgState *s, GArray > > *hardware_errors); > > +bool ghes_record_errors(uint32_t notify, uint64_t > > +error_physical_addr); > > #endif > > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index > > a6d1cd1..1d1a7a8 100644 > > --- a/include/sysemu/kvm.h > > +++ b/include/sysemu/kvm.h > > @@ -377,7 +377,7 @@ bool kvm_vcpu_id_is_valid(int vcpu_id); > > /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned > > long kvm_arch_vcpu_id(CPUState *cpu); > > > > -#ifdef TARGET_I386 > > +#if defined(TARGET_I386) || defined(TARGET_AARCH64) > > #define KVM_HAVE_MCE_INJECTION 1 > > void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); > > #endif diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index > > c7bdc6a..d2eac28 100644 > > --- a/target/arm/kvm64.c > > +++ b/target/arm/kvm64.c > > @@ -27,6 +27,10 @@ > > #include "kvm_arm.h" > > #include "internals.h" > > #include "hw/arm/arm.h" > > +#include "exec/ram_addr.h" > > +#include "hw/acpi/acpi-defs.h" > > +#include "hw/acpi/acpi_ghes.h" > > +#include "hw/acpi/acpi.h" > > > > static bool have_guest_debug; > > > > @@ -1029,6 +1033,41 @@ int kvm_arch_get_registers(CPUState *cs) > > return ret; > > } > > > > +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { > > + ram_addr_t ram_addr; > > + hwaddr paddr; > > + > > + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); > > + > > + if (acpi_enabled && addr) { > > + ram_addr = qemu_ram_addr_from_host(addr); > > + if (ram_addr != RAM_ADDR_INVALID && > > + kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { > > + kvm_hwpoison_page_add(ram_addr); > > + /* Asynchronous signal will be masked by main thread, so > > + * only handle synchronous signal. > > + */ > > + if (code == BUS_MCEERR_AR) { > > + kvm_cpu_synchronize_state(c); > > + if (GHES_CPER_FAIL != ghes_record_errors(ACPI_HEST_NOTIFY_SEA, paddr)) { > > + kvm_inject_arm_sea(c); > > + } else { > > + fprintf(stderr, "failed to record the error\n"); > > + } > > + } > > + return; > > + } > > + fprintf(stderr, "Hardware memory error for memory used by " > > + "QEMU itself instead of guest system!\n"); > > + } > > + > > + if (code == BUS_MCEERR_AR) { > > + fprintf(stderr, "Hardware memory error!\n"); > > + exit(1); > > + } > > +} > > + > > /* C6.6.29 BRK instruction */ > > static const uint32_t brk_insn = 0xd4200000; > > >