From: William Roche <william.roche@xxxxxxxxxx> The list of hwpoison pages used to remap the memory on reset is based on the backend real page size. When dealing with hugepages, we create a single entry for the entire page. To correctly handle hugetlb, we must mmap(MAP_FIXED) a complete hugetlb page; hugetlb pages cannot be partially mapped. Co-developed-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: William Roche <william.roche@xxxxxxxxxx> --- accel/kvm/kvm-all.c | 6 +++++- include/exec/cpu-common.h | 3 ++- system/physmem.c | 32 ++++++++++++++++++++++++++------ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index c65b790433..4f2abd5774 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param) QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) { QLIST_REMOVE(page, list); - qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE); + qemu_ram_remap(page->ram_addr); g_free(page); } } @@ -1296,6 +1296,10 @@ static void kvm_unpoison_all(void *param) void kvm_hwpoison_page_add(ram_addr_t ram_addr) { HWPoisonPage *page; + size_t page_size = qemu_ram_pagesize_from_addr(ram_addr); + + if (page_size > TARGET_PAGE_SIZE) + ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size); QLIST_FOREACH(page, &hwpoison_page_list, list) { if (page->ram_addr == ram_addr) { diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index b1d76d6985..dbdf22fded 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t; /* memory API */ -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); +void qemu_ram_remap(ram_addr_t addr); /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(void *ptr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); @@ -108,6 +108,7 @@ bool qemu_ram_is_named_file(RAMBlock *rb); int qemu_ram_get_fd(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); +size_t qemu_ram_pagesize_from_addr(ram_addr_t addr); size_t qemu_ram_pagesize_largest(void); /** diff --git a/system/physmem.c b/system/physmem.c index c76503aea8..7a87548f99 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1665,6 +1665,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb) return rb->page_size; } +/* Return backend real page size used for the given ram_addr */ +size_t qemu_ram_pagesize_from_addr(ram_addr_t addr) +{ + RAMBlock *rb; + + RCU_READ_LOCK_GUARD(); + rb = qemu_get_ram_block(addr); + if (!rb) { + return TARGET_PAGE_SIZE; + } + return qemu_ram_pagesize(rb); +} + /* Returns the largest size of page in use */ size_t qemu_ram_pagesize_largest(void) { @@ -2167,17 +2180,22 @@ void qemu_ram_free(RAMBlock *block) } #ifndef _WIN32 -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) +void qemu_ram_remap(ram_addr_t addr) { RAMBlock *block; ram_addr_t offset; int flags; void *area, *vaddr; int prot; + size_t page_size; RAMBLOCK_FOREACH(block) { offset = addr - block->offset; if (offset < block->max_length) { + /* Respect the pagesize of our RAMBlock */ + page_size = qemu_ram_pagesize(block); + offset = QEMU_ALIGN_DOWN(offset, page_size); + vaddr = ramblock_ptr(block, offset); if (block->flags & RAM_PREALLOC) { ; @@ -2191,21 +2209,23 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) prot = PROT_READ; prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; if (block->fd >= 0) { - area = mmap(vaddr, length, prot, flags, block->fd, + area = mmap(vaddr, page_size, prot, flags, block->fd, offset + block->fd_offset); } else { flags |= MAP_ANONYMOUS; - area = mmap(vaddr, length, prot, flags, -1, 0); + area = mmap(vaddr, page_size, prot, flags, -1, 0); } if (area != vaddr) { error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "", - length, addr); + page_size, addr); exit(1); } - memory_try_enable_merging(vaddr, length); - qemu_ram_setup_dump(vaddr, length); + memory_try_enable_merging(vaddr, page_size); + qemu_ram_setup_dump(vaddr, page_size); } + + break; } } } -- 2.43.5