From: William Roche <william.roche@xxxxxxxxxx> In case the SIGBUS handler is triggered by a BUS_MCEERR_AO signal and this handler needs to exit to let the VM pause during the memory mapping change, this SIGBUS won't be regenerated when the VM resumes. In this case we take note of this signal before exiting the handler to replay it when the VM resumes. Signed-off-by: William Roche <william.roche@xxxxxxxxxx> --- system/hugetlbfs_ras.c | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/system/hugetlbfs_ras.c b/system/hugetlbfs_ras.c index 90e399bbad..50f810f836 100644 --- a/system/hugetlbfs_ras.c +++ b/system/hugetlbfs_ras.c @@ -155,6 +155,56 @@ hugetlbfs_ras_backend_sz(void *addr) return rb->page_size; } + +/* + * List of BUS_MCEERR_AO signals received before replaying. + * Addition is serialized under large_hwpoison_mtx, but replay is + * asynchronous. + */ +typedef struct LargeHWPoisonAO { + void *addr; + QLIST_ENTRY(LargeHWPoisonAO) list; +} LargeHWPoisonAO; + +static QLIST_HEAD(, LargeHWPoisonAO) large_hwpoison_ao = + QLIST_HEAD_INITIALIZER(large_hwpoison_ao); + +static void +large_hwpoison_ao_record(void *addr) +{ + LargeHWPoisonAO *cel; + + cel = g_new(LargeHWPoisonAO, 1); + cel->addr = addr; + QLIST_INSERT_HEAD(&large_hwpoison_ao, cel, list); +} + +/* replay the possible BUS_MCEERR_AO recorded signal(s) */ +static void +hugetlbfs_ras_ao_replay_bh(void) +{ + LargeHWPoisonAO *cel, *next; + QLIST_HEAD(, LargeHWPoisonAO) local_list = + QLIST_HEAD_INITIALIZER(local_list); + + /* + * Copy to a local list to avoid holding large_hwpoison_mtx + * when calling kvm_on_sigbus(). + */ + qemu_mutex_lock(&large_hwpoison_mtx); + QLIST_FOREACH_SAFE(cel, &large_hwpoison_ao, list, next) { + QLIST_REMOVE(cel, list); + QLIST_INSERT_HEAD(&local_list, cel, list); + } + qemu_mutex_unlock(&large_hwpoison_mtx); + + QLIST_FOREACH_SAFE(cel, &local_list, list, next) { + DPRINTF("AO on %p\n", cel->addr); + kvm_on_sigbus(BUS_MCEERR_AO, cel->addr, _PAGE_SHIFT); + g_free(cel); + } +} + /* * Report if this std page address of the given faulted large page should be * retried or if the current signal handler should continue to deal with it. @@ -276,6 +326,15 @@ hugetlbfs_ras_correct(void **paddr, size_t *psz, int code) if (large_hwpoison_vm_stop) { DPRINTF("Handler exit requested as on page %p\n", page->page_addr); *paddr = NULL; + /* + * BUS_MCEERR_AO specific case: this signal is not regenerated, + * we keep it to replay when the VM is ready to take it. + */ + if (code == BUS_MCEERR_AO) { + large_hwpoison_ao_record(page->first_poison ? page->first_poison : + reported_addr); + } + } qemu_mutex_unlock(&large_hwpoison_mtx); @@ -522,6 +581,7 @@ static void coroutine_hugetlbfs_ras_vmstop_bh(void *opaque) static void coroutine_hugetlbfs_ras_vmstart_bh(void *opaque) { vm_start(); + hugetlbfs_ras_ao_replay_bh(); } static void * -- 2.43.5