On Thu, 2 Dec 2021 13:35:53 +0100 David Hildenbrand <david@xxxxxxxxxx> wrote: > We had a KVM BUG fixed by kernel commit a3e03bc1368c ("KVM: s390: index > kvm->arch.idle_mask by vcpu_idx"), whereby a floating interrupt might get > stuck forever because a CPU in the wait state would not get woken up. > > The issue can be triggered when CPUs are created in a nonlinear fashion, > such that the CPU address ("core-id") and the KVM cpu id don't match. > > So let's start with a floating interrupt test that will trigger a > floating interrupt (via SCLP) to be delivered to a CPU in the wait state. > > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> Reviewed-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > --- > lib/s390x/sclp.c | 11 ++-- > lib/s390x/sclp.h | 1 + > s390x/Makefile | 1 + > s390x/firq.c | 122 ++++++++++++++++++++++++++++++++++++++++++++ > s390x/unittests.cfg | 10 ++++ > 5 files changed, 142 insertions(+), 3 deletions(-) > create mode 100644 s390x/firq.c > > diff --git a/lib/s390x/sclp.c b/lib/s390x/sclp.c > index 0272249..33985eb 100644 > --- a/lib/s390x/sclp.c > +++ b/lib/s390x/sclp.c > @@ -60,9 +60,7 @@ void sclp_setup_int(void) > void sclp_handle_ext(void) > { > ctl_clear_bit(0, CTL0_SERVICE_SIGNAL); > - spin_lock(&sclp_lock); > - sclp_busy = false; > - spin_unlock(&sclp_lock); > + sclp_clear_busy(); > } > > void sclp_wait_busy(void) > @@ -89,6 +87,13 @@ void sclp_mark_busy(void) > } > } > > +void sclp_clear_busy(void) > +{ > + spin_lock(&sclp_lock); > + sclp_busy = false; > + spin_unlock(&sclp_lock); > +} > + > static void sclp_read_scp_info(ReadInfo *ri, int length) > { > unsigned int commands[] = { SCLP_CMDW_READ_SCP_INFO_FORCED, > diff --git a/lib/s390x/sclp.h b/lib/s390x/sclp.h > index 61e9cf5..fead007 100644 > --- a/lib/s390x/sclp.h > +++ b/lib/s390x/sclp.h > @@ -318,6 +318,7 @@ void sclp_setup_int(void); > void sclp_handle_ext(void); > void sclp_wait_busy(void); > void sclp_mark_busy(void); > +void sclp_clear_busy(void); > void sclp_console_setup(void); > void sclp_print(const char *str); > void sclp_read_info(void); > diff --git a/s390x/Makefile b/s390x/Makefile > index f95f2e6..1e567c1 100644 > --- a/s390x/Makefile > +++ b/s390x/Makefile > @@ -25,6 +25,7 @@ tests += $(TEST_DIR)/uv-host.elf > tests += $(TEST_DIR)/edat.elf > tests += $(TEST_DIR)/mvpg-sie.elf > tests += $(TEST_DIR)/spec_ex-sie.elf > +tests += $(TEST_DIR)/firq.elf > > tests_binary = $(patsubst %.elf,%.bin,$(tests)) > ifneq ($(HOST_KEY_DOCUMENT),) > diff --git a/s390x/firq.c b/s390x/firq.c > new file mode 100644 > index 0000000..1f87718 > --- /dev/null > +++ b/s390x/firq.c > @@ -0,0 +1,122 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Floating interrupt tests. > + * > + * Copyright 2021 Red Hat Inc > + * > + * Authors: > + * David Hildenbrand <david@xxxxxxxxxx> > + */ > +#include <libcflat.h> > +#include <asm/asm-offsets.h> > +#include <asm/interrupt.h> > +#include <asm/page.h> > +#include <asm-generic/barrier.h> > + > +#include <sclp.h> > +#include <smp.h> > +#include <alloc_page.h> > + > +static void wait_for_sclp_int(void) > +{ > + /* Enable SCLP interrupts on this CPU only. */ > + ctl_set_bit(0, CTL0_SERVICE_SIGNAL); > + > + /* Enable external interrupts and go to the wait state. */ > + wait_for_interrupt(PSW_MASK_EXT); > +} > + > +/* > + * Some KVM versions might mix CPUs when looking for a floating IRQ target, > + * accidentially detecting a stopped CPU as waiting and resulting in the actually > + * waiting CPU not getting woken up for the interrupt. > + */ > +static void test_wait_state_delivery(void) > +{ > + struct psw psw; > + SCCBHeader *h; > + int ret; > + > + report_prefix_push("wait state delivery"); > + > + if (smp_query_num_cpus() < 3) { > + report_skip("need at least 3 CPUs for this test"); > + goto out; > + } > + > + if (stap()) { > + report_skip("need to start on CPU #0"); > + goto out; > + } > + > + /* > + * We want CPU #2 to be stopped. This should be the case at this > + * point, however, we want to sense if it even exists as well. > + */ > + ret = smp_cpu_stop(2); > + if (ret) { > + report_skip("CPU #2 not found"); > + goto out; > + } > + > + /* > + * We're going to perform an SCLP service call but expect > + * the interrupt on CPU #1 while it is in the wait state. > + */ > + sclp_mark_busy(); > + > + /* Start CPU #1 and let it wait for the interrupt. */ > + psw.mask = extract_psw_mask(); > + psw.addr = (unsigned long)wait_for_sclp_int; > + ret = smp_cpu_setup(1, psw); > + if (ret) { > + sclp_clear_busy(); > + report_skip("cpu #1 not found"); > + goto out; > + } > + > + /* > + * We'd have to jump trough some hoops to sense e.g., via SIGP > + * CONDITIONAL EMERGENCY SIGNAL if CPU #1 is already in the > + * wait state. > + * > + * Although not completely reliable, use SIGP SENSE RUNNING STATUS > + * until not reported as running -- after all, our SCLP processing > + * will take some time as well and smp_cpu_setup() returns when we're > + * either already in wait_for_sclp_int() or just about to execute it. > + */ > + while(smp_sense_running_status(1)); > + > + h = alloc_page(); > + h->length = 4096; > + ret = servc(SCLP_CMDW_READ_CPU_INFO, __pa(h)); > + if (ret) { > + sclp_clear_busy(); > + report_fail("SCLP_CMDW_READ_CPU_INFO failed"); > + goto out_destroy; > + } > + > + /* > + * Wait until the interrupt gets delivered on CPU #1, marking the > + * SCLP requests as done. > + */ > + sclp_wait_busy(); > + > + report(true, "sclp interrupt delivered"); > + > +out_destroy: > + free_page(h); > + smp_cpu_destroy(1); > +out: > + report_prefix_pop(); > +} > + > +int main(void) > +{ > + report_prefix_push("firq"); > + > + test_wait_state_delivery(); > + > + report_prefix_pop(); > + return report_summary(); > +} > diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg > index 3b454b7..054560c 100644 > --- a/s390x/unittests.cfg > +++ b/s390x/unittests.cfg > @@ -112,3 +112,13 @@ file = mvpg-sie.elf > > [spec_ex-sie] > file = spec_ex-sie.elf > + > +[firq-linear-cpu-ids] > +file = firq.elf > +timeout = 20 > +extra_params = -smp 1,maxcpus=3 -cpu qemu -device qemu-s390x-cpu,core-id=1 -device qemu-s390x-cpu,core-id=2 > + > +[firq-nonlinear-cpu-ids] > +file = firq.elf > +timeout = 20 > +extra_params = -smp 1,maxcpus=3 -cpu qemu -device qemu-s390x-cpu,core-id=2 -device qemu-s390x-cpu,core-id=1