On Mon, Aug 3, 2015 at 12:30 PM, Alex Bennée <alex.bennee@xxxxxxxxxx> wrote: > > alvise rigo <a.rigo@xxxxxxxxxxxxxxxxxxxxxx> writes: > >> Hi Alex, >> >> Nice set of tests, they are proving to be helpful. >> One question below. >> >> On Fri, Jul 31, 2015 at 5:54 PM, Alex Bennée <alex.bennee@xxxxxxxxxx> wrote: >>> From: Alex Bennée <alex@xxxxxxxxxx> >>> >>> This test has been written mainly to stress multi-threaded TCG behaviour >>> but will demonstrate failure by default on real hardware. The test takes >>> the following parameters: >>> >>> - "lock" use GCC's locking semantics >>> - "excl" use load/store exclusive semantics >>> - "acqrel" use acquire/release semantics >>> >>> Currently excl/acqrel lock up on MTTCG >>> >>> Signed-off-by: Alex Bennée <alex.bennee@xxxxxxxxxx> >>> --- >>> arm/barrier-test.c | 206 +++++++++++++++++++++++++++++++++++++++++++ >>> config/config-arm-common.mak | 2 + >>> 2 files changed, 208 insertions(+) >>> create mode 100644 arm/barrier-test.c >>> >>> diff --git a/arm/barrier-test.c b/arm/barrier-test.c >>> new file mode 100644 >>> index 0000000..53d690b >>> --- /dev/null >>> +++ b/arm/barrier-test.c >>> @@ -0,0 +1,206 @@ >>> +#include <libcflat.h> >>> +#include <asm/smp.h> >>> +#include <asm/cpumask.h> >>> +#include <asm/barrier.h> >>> +#include <asm/mmu.h> >>> + >>> +#include <prng.h> >>> + >>> +#define MAX_CPUS 4 >>> + >>> +/* How many increments to do */ >>> +static int increment_count = 10000000; >>> + >>> + >>> +/* shared value, we use the alignment to ensure the global_lock value >>> + * doesn't share a page */ >>> +static unsigned int shared_value; >>> + >>> +/* PAGE_SIZE * uint32_t means we span several pages */ >>> +static uint32_t memory_array[PAGE_SIZE]; >>> + >>> +__attribute__((aligned(PAGE_SIZE))) static unsigned int per_cpu_value[MAX_CPUS]; >>> +__attribute__((aligned(PAGE_SIZE))) static cpumask_t smp_test_complete; >>> +__attribute__((aligned(PAGE_SIZE))) static int global_lock; >>> + >>> +struct isaac_ctx prng_context[MAX_CPUS]; >>> + >>> +void (*inc_fn)(void); >>> + >>> +static void lock(int *lock_var) >>> +{ >>> + while (__sync_lock_test_and_set(lock_var, 1)); >>> +} >>> +static void unlock(int *lock_var) >>> +{ >>> + __sync_lock_release(lock_var); >>> +} >>> + >>> +static void increment_shared(void) >>> +{ >>> + shared_value++; >>> +} >>> + >>> +static void increment_shared_with_lock(void) >>> +{ >>> + lock(&global_lock); >>> + shared_value++; >>> + unlock(&global_lock); >>> +} >>> + >>> +static void increment_shared_with_excl(void) >>> +{ >>> +#if defined (__LP64__) || defined (_LP64) >>> + asm volatile( >>> + "1: ldxr w0, [%[sptr]]\n" >>> + " add w0, w0, #0x1\n" >>> + " stxr w1, w0, [%[sptr]]\n" >>> + " cbnz w1, 1b\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "w0", "w1", "cc"); >>> +#else >>> + asm volatile( >>> + "1: ldrex r0, [%[sptr]]\n" >>> + " add r0, r0, #0x1\n" >>> + " strexeq r1, r0, [%[sptr]]\n" >>> + " cmpeq r1, #0\n" >> >> Why are we calling these last two instructions with the 'eq' suffix? >> Shouldn't we just strex r1, r0, [sptr] and then cmp r1, #0? > > Possibly, my armv7 is a little rusty. I'm just looking at tweaking this > test now so I'll try and clean that up. > >> >> Thank you, >> alvise >> >>> + " bne 1b\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "r0", "r1", "cc"); >>> +#endif >>> +} >>> + >>> +static void increment_shared_with_acqrel(void) >>> +{ >>> +#if defined (__LP64__) || defined (_LP64) >>> + asm volatile( >>> + " ldar w0, [%[sptr]]\n" >>> + " add w0, w0, #0x1\n" >>> + " str w0, [%[sptr]]\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "w0"); >>> +#else >>> + /* ARMv7 has no acquire/release semantics but we >>> + * can ensure the results of the write are propagated >>> + * with the use of barriers. >>> + */ >>> + asm volatile( >>> + "1: ldrex r0, [%[sptr]]\n" >>> + " add r0, r0, #0x1\n" >>> + " strexeq r1, r0, [%[sptr]]\n" >>> + " cmpeq r1, #0\n" I have not tested it, but also this one looks wrong. Regards, alvise >>> + " bne 1b\n" >>> + " dmb\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "r0", "r1", "cc"); >>> +#endif >>> + >>> +} >>> + >>> +/* The idea of this is just to generate some random load/store >>> + * activity which may or may not race with an un-barried incremented >>> + * of the shared counter >>> + */ >>> +static void shuffle_memory(int cpu) >>> +{ >>> + int i; >>> + uint32_t lspat = isaac_next_uint32(&prng_context[cpu]); >>> + uint32_t seq = isaac_next_uint32(&prng_context[cpu]); >>> + int count = seq & 0x1f; >>> + uint32_t val=0; >>> + >>> + seq >>= 5; >>> + >>> + for (i=0; i<count; i++) { >>> + int index = seq & ~PAGE_MASK; >>> + if (lspat & 1) { >>> + val ^= memory_array[index]; >>> + } else { >>> + memory_array[index] = val; >>> + } >>> + seq >>= PAGE_SHIFT; >>> + seq ^= lspat; >>> + lspat >>= 1; >>> + } >>> + >>> +} >>> + >>> +static void do_increment(void) >>> +{ >>> + int i; >>> + int cpu = smp_processor_id(); >>> + >>> + printf("CPU%d online\n", cpu); >>> + >>> + for (i=0; i < increment_count; i++) { >>> + per_cpu_value[cpu]++; >>> + inc_fn(); >>> + >>> + shuffle_memory(cpu); >>> + } >>> + >>> + printf("CPU%d: Done, %d incs\n", cpu, per_cpu_value[cpu]); >>> + >>> + cpumask_set_cpu(cpu, &smp_test_complete); >>> + if (cpu != 0) >>> + halt(); >>> +} >>> + >>> +int main(int argc, char **argv) >>> +{ >>> + int cpu; >>> + unsigned int i, sum = 0; >>> + static const unsigned char seed[] = "myseed"; >>> + >>> + inc_fn = &increment_shared; >>> + >>> + isaac_init(&prng_context[0], &seed[0], sizeof(seed)); >>> + >>> + for (i=0; i<argc; i++) { >>> + char *arg = argv[i]; >>> + >>> + if (strcmp(arg, "lock") == 0) { >>> + inc_fn = &increment_shared_with_lock; >>> + report_prefix_push("lock"); >>> + } else if (strcmp(arg, "excl") == 0) { >>> + inc_fn = &increment_shared_with_excl; >>> + report_prefix_push("excl"); >>> + } else if (strcmp(arg, "acqrel") == 0) { >>> + inc_fn = &increment_shared_with_acqrel; >>> + report_prefix_push("acqrel"); >>> + } else { >>> + isaac_reseed(&prng_context[0], (unsigned char *) arg, strlen(arg)); >>> + } >>> + } >>> + >>> + /* fill our random page */ >>> + for (i=0; i<PAGE_SIZE; i++) { >>> + memory_array[i] = isaac_next_uint32(&prng_context[0]); >>> + } >>> + >>> + for_each_present_cpu(cpu) { >>> + uint32_t seed2 = isaac_next_uint32(&prng_context[0]); >>> + if (cpu == 0) >>> + continue; >>> + >>> + isaac_init(&prng_context[cpu], (unsigned char *) &seed2, sizeof(seed2)); >>> + smp_boot_secondary(cpu, do_increment); >>> + } >>> + >>> + do_increment(); >>> + >>> + while (!cpumask_full(&smp_test_complete)) >>> + cpu_relax(); >>> + >>> + /* All CPUs done, do we add up */ >>> + for_each_present_cpu(cpu) { >>> + sum += per_cpu_value[cpu]; >>> + } >>> + report("total incs %d", sum == shared_value, shared_value); >>> + >>> + return report_summary(); >>> +} >>> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak >>> index 67a9dda..af628e6 100644 >>> --- a/config/config-arm-common.mak >>> +++ b/config/config-arm-common.mak >>> @@ -12,6 +12,7 @@ endif >>> tests-common = $(TEST_DIR)/selftest.flat >>> tests-common += $(TEST_DIR)/spinlock-test.flat >>> tests-common += $(TEST_DIR)/tlbflush-test.flat >>> +tests-common += $(TEST_DIR)/barrier-test.flat >>> >>> utils-common = $(TEST_DIR)/utils/kvm-query >>> >>> @@ -80,3 +81,4 @@ utils: $(utils-common) >>> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o >>> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o >>> $(TEST_DIR)/tlbflush-test.elf: $(cstart.o) $(TEST_DIR)/tlbflush-test.o >>> +$(TEST_DIR)/barrier-test.elf: $(cstart.o) $(TEST_DIR)/barrier-test.o >>> -- >>> 2.5.0 >>> > > -- > Alex Bennée -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html