From: William Roche <william.roche@xxxxxxxxxx> The list of hwpoison pages used to remap the memory on reset is based on the backend real page size. To correctly handle hugetlb, we must mmap(MAP_FIXED) a complete hugetlb page; hugetlb pages cannot be partially mapped. Signed-off-by: William Roche <william.roche@xxxxxxxxxx> Co-developed-by: David Hildenbrand <david@xxxxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> --- accel/kvm/kvm-all.c | 2 +- include/exec/cpu-common.h | 2 +- system/physmem.c | 38 +++++++++++++++++++++++++++++--------- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index c65b790433..f89568bfa3 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param) QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) { QLIST_REMOVE(page, list); - qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE); + qemu_ram_remap(page->ram_addr); g_free(page); } } diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index b1d76d6985..3771b2130c 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t; /* memory API */ -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); +void qemu_ram_remap(ram_addr_t addr); /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(void *ptr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); diff --git a/system/physmem.c b/system/physmem.c index c76503aea8..3dd2adde73 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -2167,17 +2167,35 @@ void qemu_ram_free(RAMBlock *block) } #ifndef _WIN32 -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) +/* + * qemu_ram_remap - remap a single RAM page + * + * @addr: address in ram_addr_t address space. + * + * This function will try remapping a single page of guest RAM identified by + * @addr, essentially discarding memory to recover from previously poisoned + * memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr + * does not have to point at the start of the page. + * + * This function is only to be used during system resets; it will kill the + * VM if remapping failed. + */ +void qemu_ram_remap(ram_addr_t addr) { RAMBlock *block; - ram_addr_t offset; + uint64_t offset; int flags; void *area, *vaddr; int prot; + size_t page_size; RAMBLOCK_FOREACH(block) { offset = addr - block->offset; if (offset < block->max_length) { + /* Respect the pagesize of our RAMBlock */ + page_size = qemu_ram_pagesize(block); + offset = QEMU_ALIGN_DOWN(offset, page_size); + vaddr = ramblock_ptr(block, offset); if (block->flags & RAM_PREALLOC) { ; @@ -2191,21 +2209,23 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) prot = PROT_READ; prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; if (block->fd >= 0) { - area = mmap(vaddr, length, prot, flags, block->fd, + area = mmap(vaddr, page_size, prot, flags, block->fd, offset + block->fd_offset); } else { flags |= MAP_ANONYMOUS; - area = mmap(vaddr, length, prot, flags, -1, 0); + area = mmap(vaddr, page_size, prot, flags, -1, 0); } if (area != vaddr) { - error_report("Could not remap addr: " - RAM_ADDR_FMT "@" RAM_ADDR_FMT "", - length, addr); + error_report("Could not remap RAM %s:%" PRIx64 "+%" PRIx64 + " +%zx", block->idstr, offset, + block->fd_offset, page_size); exit(1); } - memory_try_enable_merging(vaddr, length); - qemu_ram_setup_dump(vaddr, length); + memory_try_enable_merging(vaddr, page_size); + qemu_ram_setup_dump(vaddr, page_size); } + + break; } } } -- 2.43.5