On Sat, Feb 01, 2025 at 09:57:23AM +0000, “William Roche wrote: > From: William Roche <william.roche@xxxxxxxxxx> > > In case of a large page impacted by a memory error, provide an > information about the impacted large page before the memory > error injection message. > > This message would also appear on ras enabled ARM platforms, with > the introduction of an x86 similar error injection message. > > In the case of a large page impacted, we now report: > Memory Error on large page from <backend>:<address>+<fd_offset> +<page_size> > > The +<fd_offset> information is only provided with a file backend. > > Signed-off-by: William Roche <william.roche@xxxxxxxxxx> This is still pretty kvm / arch relevant patch that needs some reviews. I wonder do we really need this - we could fetch ramblock mapping (e.g. hwaddr -> HVA) via HMP "info ramblock", and also dmesg shows process ID + VA. IIUC we have all below info already as long as we do some math based on above. Would that work too? > --- > accel/kvm/kvm-all.c | 18 ++++++++++++++++++ > include/exec/cpu-common.h | 10 ++++++++++ > system/physmem.c | 22 ++++++++++++++++++++++ > target/arm/kvm.c | 3 +++ > 4 files changed, 53 insertions(+) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index f89568bfa3..9a0d970ce1 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -1296,6 +1296,24 @@ static void kvm_unpoison_all(void *param) > void kvm_hwpoison_page_add(ram_addr_t ram_addr) > { > HWPoisonPage *page; > + struct RAMBlockInfo rb_info; > + > + if (qemu_ram_block_info_from_addr(ram_addr, &rb_info)) { > + size_t ps = rb_info.page_size; > + > + if (ps > TARGET_PAGE_SIZE) { > + uint64_t offset = QEMU_ALIGN_DOWN(ram_addr - rb_info.offset, ps); > + > + if (rb_info.fd >= 0) { > + error_report("Memory Error on large page from %s:%" PRIx64 > + "+%" PRIx64 " +%zx", rb_info.idstr, offset, > + rb_info.fd_offset, ps); > + } else { > + error_report("Memory Error on large page from %s:%" PRIx64 > + " +%zx", rb_info.idstr, offset, ps); > + } > + } > + } > > QLIST_FOREACH(page, &hwpoison_page_list, list) { > if (page->ram_addr == ram_addr) { > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index 3771b2130c..190bd4f34a 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -110,6 +110,16 @@ int qemu_ram_get_fd(RAMBlock *rb); > size_t qemu_ram_pagesize(RAMBlock *block); > size_t qemu_ram_pagesize_largest(void); > > +struct RAMBlockInfo { > + char idstr[256]; > + ram_addr_t offset; > + int fd; > + uint64_t fd_offset; > + size_t page_size; > +}; > +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, > + struct RAMBlockInfo *block); > + > /** > * cpu_address_space_init: > * @cpu: CPU to add this address space to > diff --git a/system/physmem.c b/system/physmem.c > index e8ff930bc9..686f569270 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -1678,6 +1678,28 @@ size_t qemu_ram_pagesize_largest(void) > return largest; > } > > +/* Copy RAMBlock information associated to the given ram_addr location */ > +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, > + struct RAMBlockInfo *b_info) > +{ > + RAMBlock *rb; > + > + assert(b_info); > + > + RCU_READ_LOCK_GUARD(); > + rb = qemu_get_ram_block(ram_addr); > + if (!rb) { > + return false; > + } > + > + pstrcat(b_info->idstr, sizeof(b_info->idstr), rb->idstr); > + b_info->offset = rb->offset; > + b_info->fd = rb->fd; > + b_info->fd_offset = rb->fd_offset; > + b_info->page_size = rb->page_size; > + return true; > +} > + > static int memory_try_enable_merging(void *addr, size_t len) > { > if (!machine_mem_merge(current_machine)) { > diff --git a/target/arm/kvm.c b/target/arm/kvm.c > index da30bdbb23..d9dedc6d74 100644 > --- a/target/arm/kvm.c > +++ b/target/arm/kvm.c > @@ -2389,6 +2389,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > kvm_cpu_synchronize_state(c); > if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { > kvm_inject_arm_sea(c); > + error_report("Guest Memory Error at QEMU addr %p and " > + "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", > + addr, paddr, "BUS_MCEERR_AR"); > } else { > error_report("failed to record the error"); > abort(); > -- > 2.43.5 > -- Peter Xu