Hi Alex, Nice set of tests, they are proving to be helpful. One question below. On Fri, Jul 31, 2015 at 5:54 PM, Alex Bennée <alex.bennee@xxxxxxxxxx> wrote: > From: Alex Bennée <alex@xxxxxxxxxx> > > This test has been written mainly to stress multi-threaded TCG behaviour > but will demonstrate failure by default on real hardware. The test takes > the following parameters: > > - "lock" use GCC's locking semantics > - "excl" use load/store exclusive semantics > - "acqrel" use acquire/release semantics > > Currently excl/acqrel lock up on MTTCG > > Signed-off-by: Alex Bennée <alex.bennee@xxxxxxxxxx> > --- > arm/barrier-test.c | 206 +++++++++++++++++++++++++++++++++++++++++++ > config/config-arm-common.mak | 2 + > 2 files changed, 208 insertions(+) > create mode 100644 arm/barrier-test.c > > diff --git a/arm/barrier-test.c b/arm/barrier-test.c > new file mode 100644 > index 0000000..53d690b > --- /dev/null > +++ b/arm/barrier-test.c > @@ -0,0 +1,206 @@ > +#include <libcflat.h> > +#include <asm/smp.h> > +#include <asm/cpumask.h> > +#include <asm/barrier.h> > +#include <asm/mmu.h> > + > +#include <prng.h> > + > +#define MAX_CPUS 4 > + > +/* How many increments to do */ > +static int increment_count = 10000000; > + > + > +/* shared value, we use the alignment to ensure the global_lock value > + * doesn't share a page */ > +static unsigned int shared_value; > + > +/* PAGE_SIZE * uint32_t means we span several pages */ > +static uint32_t memory_array[PAGE_SIZE]; > + > +__attribute__((aligned(PAGE_SIZE))) static unsigned int per_cpu_value[MAX_CPUS]; > +__attribute__((aligned(PAGE_SIZE))) static cpumask_t smp_test_complete; > +__attribute__((aligned(PAGE_SIZE))) static int global_lock; > + > +struct isaac_ctx prng_context[MAX_CPUS]; > + > +void (*inc_fn)(void); > + > +static void lock(int *lock_var) > +{ > + while (__sync_lock_test_and_set(lock_var, 1)); > +} > +static void unlock(int *lock_var) > +{ > + __sync_lock_release(lock_var); > +} > + > +static void increment_shared(void) > +{ > + shared_value++; > +} > + > +static void increment_shared_with_lock(void) > +{ > + lock(&global_lock); > + shared_value++; > + unlock(&global_lock); > +} > + > +static void increment_shared_with_excl(void) > +{ > +#if defined (__LP64__) || defined (_LP64) > + asm volatile( > + "1: ldxr w0, [%[sptr]]\n" > + " add w0, w0, #0x1\n" > + " stxr w1, w0, [%[sptr]]\n" > + " cbnz w1, 1b\n" > + : /* out */ > + : [sptr] "r" (&shared_value) /* in */ > + : "w0", "w1", "cc"); > +#else > + asm volatile( > + "1: ldrex r0, [%[sptr]]\n" > + " add r0, r0, #0x1\n" > + " strexeq r1, r0, [%[sptr]]\n" > + " cmpeq r1, #0\n" Why are we calling these last two instructions with the 'eq' suffix? Shouldn't we just strex r1, r0, [sptr] and then cmp r1, #0? Thank you, alvise > + " bne 1b\n" > + : /* out */ > + : [sptr] "r" (&shared_value) /* in */ > + : "r0", "r1", "cc"); > +#endif > +} > + > +static void increment_shared_with_acqrel(void) > +{ > +#if defined (__LP64__) || defined (_LP64) > + asm volatile( > + " ldar w0, [%[sptr]]\n" > + " add w0, w0, #0x1\n" > + " str w0, [%[sptr]]\n" > + : /* out */ > + : [sptr] "r" (&shared_value) /* in */ > + : "w0"); > +#else > + /* ARMv7 has no acquire/release semantics but we > + * can ensure the results of the write are propagated > + * with the use of barriers. > + */ > + asm volatile( > + "1: ldrex r0, [%[sptr]]\n" > + " add r0, r0, #0x1\n" > + " strexeq r1, r0, [%[sptr]]\n" > + " cmpeq r1, #0\n" > + " bne 1b\n" > + " dmb\n" > + : /* out */ > + : [sptr] "r" (&shared_value) /* in */ > + : "r0", "r1", "cc"); > +#endif > + > +} > + > +/* The idea of this is just to generate some random load/store > + * activity which may or may not race with an un-barried incremented > + * of the shared counter > + */ > +static void shuffle_memory(int cpu) > +{ > + int i; > + uint32_t lspat = isaac_next_uint32(&prng_context[cpu]); > + uint32_t seq = isaac_next_uint32(&prng_context[cpu]); > + int count = seq & 0x1f; > + uint32_t val=0; > + > + seq >>= 5; > + > + for (i=0; i<count; i++) { > + int index = seq & ~PAGE_MASK; > + if (lspat & 1) { > + val ^= memory_array[index]; > + } else { > + memory_array[index] = val; > + } > + seq >>= PAGE_SHIFT; > + seq ^= lspat; > + lspat >>= 1; > + } > + > +} > + > +static void do_increment(void) > +{ > + int i; > + int cpu = smp_processor_id(); > + > + printf("CPU%d online\n", cpu); > + > + for (i=0; i < increment_count; i++) { > + per_cpu_value[cpu]++; > + inc_fn(); > + > + shuffle_memory(cpu); > + } > + > + printf("CPU%d: Done, %d incs\n", cpu, per_cpu_value[cpu]); > + > + cpumask_set_cpu(cpu, &smp_test_complete); > + if (cpu != 0) > + halt(); > +} > + > +int main(int argc, char **argv) > +{ > + int cpu; > + unsigned int i, sum = 0; > + static const unsigned char seed[] = "myseed"; > + > + inc_fn = &increment_shared; > + > + isaac_init(&prng_context[0], &seed[0], sizeof(seed)); > + > + for (i=0; i<argc; i++) { > + char *arg = argv[i]; > + > + if (strcmp(arg, "lock") == 0) { > + inc_fn = &increment_shared_with_lock; > + report_prefix_push("lock"); > + } else if (strcmp(arg, "excl") == 0) { > + inc_fn = &increment_shared_with_excl; > + report_prefix_push("excl"); > + } else if (strcmp(arg, "acqrel") == 0) { > + inc_fn = &increment_shared_with_acqrel; > + report_prefix_push("acqrel"); > + } else { > + isaac_reseed(&prng_context[0], (unsigned char *) arg, strlen(arg)); > + } > + } > + > + /* fill our random page */ > + for (i=0; i<PAGE_SIZE; i++) { > + memory_array[i] = isaac_next_uint32(&prng_context[0]); > + } > + > + for_each_present_cpu(cpu) { > + uint32_t seed2 = isaac_next_uint32(&prng_context[0]); > + if (cpu == 0) > + continue; > + > + isaac_init(&prng_context[cpu], (unsigned char *) &seed2, sizeof(seed2)); > + smp_boot_secondary(cpu, do_increment); > + } > + > + do_increment(); > + > + while (!cpumask_full(&smp_test_complete)) > + cpu_relax(); > + > + /* All CPUs done, do we add up */ > + for_each_present_cpu(cpu) { > + sum += per_cpu_value[cpu]; > + } > + report("total incs %d", sum == shared_value, shared_value); > + > + return report_summary(); > +} > diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak > index 67a9dda..af628e6 100644 > --- a/config/config-arm-common.mak > +++ b/config/config-arm-common.mak > @@ -12,6 +12,7 @@ endif > tests-common = $(TEST_DIR)/selftest.flat > tests-common += $(TEST_DIR)/spinlock-test.flat > tests-common += $(TEST_DIR)/tlbflush-test.flat > +tests-common += $(TEST_DIR)/barrier-test.flat > > utils-common = $(TEST_DIR)/utils/kvm-query > > @@ -80,3 +81,4 @@ utils: $(utils-common) > $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o > $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o > $(TEST_DIR)/tlbflush-test.elf: $(cstart.o) $(TEST_DIR)/tlbflush-test.o > +$(TEST_DIR)/barrier-test.elf: $(cstart.o) $(TEST_DIR)/barrier-test.o > -- > 2.5.0 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html