From: William Roche <william.roche@xxxxxxxxxx> madvise MADV_HWPOISON can generate a SIGBUS when called, so the listener thread (the caller) needs to deal with this signal. The signal handler recognizes a thread specific variable allowing it to directly exit when generated from this thread. Signed-off-by: William Roche <william.roche@xxxxxxxxxx> --- system/cpus.c | 9 +++++++++ system/hugetlbfs_ras.c | 43 ++++++++++++++++++++++++++++++++++++++++-- system/hugetlbfs_ras.h | 1 + 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/system/cpus.c b/system/cpus.c index 12e630f760..642055f729 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -47,6 +47,10 @@ #include "hw/hw.h" #include "trace.h" +#ifdef CONFIG_HUGETLBFS_RAS +#include "system/hugetlbfs_ras.h" +#endif + #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -374,6 +378,11 @@ static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) sigbus_reraise(); } +#ifdef CONFIG_HUGETLBFS_RAS + /* skip error on the listener thread - does not return in this case */ + hugetlbfs_ras_signal_from_listener(); +#endif + if (current_cpu) { /* Called asynchronously in VCPU thread. */ if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, diff --git a/system/hugetlbfs_ras.c b/system/hugetlbfs_ras.c index 2f7e550f56..90e399bbad 100644 --- a/system/hugetlbfs_ras.c +++ b/system/hugetlbfs_ras.c @@ -70,6 +70,8 @@ static QemuCond large_hwpoison_vm_running; static QemuMutex large_hwpoison_mtx; static QemuThread thread; static void *hugetlbfs_ras_listener(void *arg); +static pthread_key_t id_key; +static sigjmp_buf listener_jmp_buf; static int vm_running; static bool hugetlbfs_ras_initialized; static int _PAGE_SIZE = 4096; @@ -105,6 +107,10 @@ hugetlbfs_ras_init(void) qemu_cond_init(&large_hwpoison_vm_running); qemu_mutex_init(&large_hwpoison_mtx); + if (pthread_key_create(&id_key, NULL) != 0) { + warn_report("No support for hugetlbfs largepage errors - no id_key"); + return -EIO; + } qemu_thread_create(&thread, "hugetlbfs_error", hugetlbfs_ras_listener, NULL, QEMU_THREAD_DETACHED); @@ -288,6 +294,19 @@ hugetlbfs_ras_correct(void **paddr, size_t *psz, int code) return (*paddr == NULL ? false : true); } +/* this madvise can generate a SIGBUS, use the jump buffer to deal with it */ +static bool poison_location(void *addr, int size) +{ + if (sigsetjmp(listener_jmp_buf, 1) == 0) { + if (madvise(addr, size, MADV_HWPOISON)) { + DPRINTF("poison injection failed: %s (addr:%p sz:%d)\n", + strerror(errno), addr, size); + return false; + } + } + return true; +} + /* * Sequentially read the valid data from the failed large page (shared) backend * file and copy that into our set of standard sized pages. @@ -321,7 +340,7 @@ static int take_valid_data_lpg(LargeHWPoisonPage *page, const char **err) slot_num = page->page_size / ps; if (!qemu_ram_is_shared(rb)) { /* we can't use the backend file */ - if (madvise(page->page_addr, page->page_size, MADV_HWPOISON) == 0) { + if (poison_location(page->page_addr, page->page_size)) { page->first_poison = page->page_addr; warn_report("Large memory error, unrecoverable section " "(unshared hugetlbfs): start:%p length: %ld", @@ -350,7 +369,7 @@ static int take_valid_data_lpg(LargeHWPoisonPage *page, const char **err) retrieved += count; } if (retrieved < ps) { /* consider this page as poisoned */ - if (madvise(page->page_addr + i * ps, ps, MADV_HWPOISON)) { + if (!poison_location(page->page_addr + i * ps, ps)) { if (err) { *err = "poison injection failed"; } @@ -402,6 +421,19 @@ void hugetlbfs_ras_empty(void) qemu_mutex_unlock(&large_hwpoison_mtx); } +/* + * Check if the signal is taken from the listener thread, + * in this thread we don't return as we jump after the madvise call. + */ +void +hugetlbfs_ras_signal_from_listener(void) +{ + /* check if we take the SIGBUS in the listener */ + if (pthread_getspecific(id_key) != NULL) { + siglongjmp(listener_jmp_buf, 1); + } +} + /* * Deal with the given page, initializing its data. */ @@ -498,6 +530,13 @@ hugetlbfs_ras_listener(void *arg) LargeHWPoisonPage *page; int new; const char *err; + sigset_t set; + + pthread_setspecific(id_key, (void *)1); + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); /* monitor any newly submitted element in the list */ qemu_mutex_lock(&large_hwpoison_mtx); diff --git a/system/hugetlbfs_ras.h b/system/hugetlbfs_ras.h index 324228bda3..9c2a6e49a1 100644 --- a/system/hugetlbfs_ras.h +++ b/system/hugetlbfs_ras.h @@ -1,3 +1,4 @@ bool hugetlbfs_ras_use(void); bool hugetlbfs_ras_correct(void **paddr, size_t *psz, int code); void hugetlbfs_ras_empty(void); +void hugetlbfs_ras_signal_from_listener(void); -- 2.43.5