Thank you for the prompt review though the mailing list keeps you
busy. For the record, I inherited this code a while ago with the request
to make it acceptable for upstream and there was a lot of work to
do. Most of your complaints aren't originally my doing, but it's my
responsibility now.
Marc Zyngier <maz@xxxxxxxxxx> writes:
On Thu, 28 Sep 2023 22:02:01 +0100,
Colton Lewis <coltonlewis@xxxxxxxxxx> wrote:
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+#define msecs_to_usecs(msec) ((msec) * 1000LL)
+
+#define CVAL_MAX ~0ULL
+/* tval is a signed 32-bit int. */
+#define TVAL_MAX INT_MAX
+#define TVAL_MIN INT_MIN
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
We already have 3 tests that do their own GIC setup. Maybe it is time
someone either make vgic_v3_setup() deal with fixed addresses, or move
this into a helper.
That's a good idea. I can work on something like that.
+ ctl = timer_get_ctl(timer);
+ cval = timer_get_cval(timer);
+ cnt = timer_get_cntct(timer);
+ timer_condition = cnt >= cval;
+ istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+ GUEST_ASSERT_EQ(timer_condition, istatus);
+
+ /* Disable and mask the timer. */
+ timer_set_ctl(timer, CTL_IMASK);
What is the point of masking if the timer is disabled?
There isn't a reason to both mask and disable the timer as this function
call does. It was there when I started working on this code and I left
it alone. Does it matter?
+
+ atomic_inc(&shared_data.handled);
You don't have any ordering between atomic operations and system
register access. Could it be a problem?
+
+out:
+ gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_cval(timer, cval_cycles);
+ timer_set_ctl(timer, ctl);
Same question.
Neither operation depends on whether the other is visible. Nothing
written to the system registers depends on the shared_data recording
variables (though that global deserves a better name). And after this
the number of handled IRQs can't possibly increase again until the timer
fires again, which happens long after the timer is reset here.
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t
ctl,
+ enum timer_view tv)
+{
+ switch (tv) {
+ case TIMER_CVAL:
+ set_cval_irq(timer, xval, ctl);
+ break;
+ case TIMER_TVAL:
+ set_tval_irq(timer, xval, ctl);
+ break;
+ default:
+ GUEST_FAIL("Could not get timer %d", timer);
+ }
+}
+
+/*
+ * Should be called with IRQs masked.
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+ int h;
+
+ for (h = atomic_read(&shared_data.handled); h ==
atomic_read(&shared_data.handled);) {
+ asm volatile ("wfi\n"
+ "msr daifclr, #2\n"
+ /* handle IRQ */
+ "msr daifset, #2\n":::"memory");
There is no guarantee that a pending interrupt would fire at the point
where the comment is. R_RBZYL clearly state that you need a context
synchronisation event between these two instructions if you want
interrupts to be handled.
Understood. Thanks for pointing this out.
+ }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest (userspace=0)
or in
+ * userspace (e.g., userspace=1 and
userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Should be called with IRQs masked. Not really needed like the wfi
above, but
+ * it should match the others.
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(bool userspace, enum sync_cmd
userspace_cmd)
+{
+ int h;
+
+ h = atomic_read(&shared_data.handled);
+
+ local_irq_enable();
So half of this code is using local_irq_*(), and the rest is directly
poking at DAIF. Amusingly enough, the two aren't even playing with the
same set of bits.
I agree that's wrong. I don't think it makes a functional difference
since the only difference is local_irq_*() includes FIQs which are
irrelevant to the whole test, but it looks much better to be consistent.
+ while (h == atomic_read(&shared_data.handled)) {
+ if (userspace)
+ USERSPACE_CMD(userspace_cmd);
+ else
+ cpu_relax();
+ }
+ local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(false, -1);
Am I the only one who cries when seeing this -1 cast on an unsuspected
enum, together with a flag saying "hey, I'm giving you crap
arguments"? What was wrong having an actual enum value for it?
Don't cry. I'll fix that. I agree there should be an actual enum value.
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(true, USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(true, USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest (userspace=0) or
in
+ * userspace (e.g., userspace=1 and userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+ bool userspace, enum sync_cmd userspace_cmd)
+{
+ uint64_t cycles = usec_to_cycles(usec);
+ /* Whichever timer we are testing with, sleep with the other. */
+ enum arch_timer sleep_timer = 1 - test_timer;
+ uint64_t start = timer_get_cntct(sleep_timer);
+
+ while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+ if (userspace)
+ USERSPACE_CMD(userspace_cmd);
+ else
+ cpu_relax();
+ }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, false, -1);
More of the same stuff...
Frankly, I even have a hard time understanding what this code is
trying to achieve, let alone the lack of correctness from an
architecture perspective.
I presume you are talking about the polling rather than the test as a
whole. The goal is to provide a way to wait a while so there is a way to
test functionality when interrupts are masked. Did you have any
correctness concerns besides not having proper enum values?