RE: Serious problem with ticket spinlocks on ia64

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Debugging this in the kernel seemed hard ... so I tried to construct a
user level test using the same code from the kernel. See attached. But
this fails so catastrophically with any number of threads greater than
one, that I suspect I made some mistake cut & pasting the relevant bits
of kernel infrastructure.  The goal of the program is to have several
child processes pounding on a lock while the parent looks in every 5
seconds to see if they are making progress.

-Tony
typedef struct {
        volatile unsigned int lock;
} arch_spinlock_t;

#define WORKERS 8

struct sh {
	arch_spinlock_t	l; char pad1[60];
	long success; char pad2[56];
	long worker[WORKERS];
	int locks[WORKERS];
} *s;

int me;

/* cut & paste infrastructure from kernel start here */
typedef unsigned long __u64;
#define __always_inline inline __attribute__((always_inline))
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))

extern unsigned long __bad_size_for_ia64_fetch_and_add (void);
extern unsigned long __bad_increment_for_ia64_fetch_and_add (void);

#define ia64_invala() asm volatile ("invala" ::: "memory")

#define ia64_hint_pause 0

#define ia64_hint(mode)                                         \
({                                                              \
        switch (mode) {                                         \
        case ia64_hint_pause:                                   \
                asm volatile ("hint @pause" ::: "memory");      \
                break;                                          \
        }                                                       \
})
#define cpu_relax() ia64_hint(ia64_hint_pause)

#define ia64_fetchadd4_acq(p, inc)                                              \
({                                                                              \
                                                                                \
        __u64 ia64_intri_res;                                                   \
        asm volatile ("fetchadd4.acq %0=[%1],%2"                                \
                                : "=r"(ia64_intri_res) : "r"(p), "i" (inc)      \
                                : "memory");                                    \
                                                                                \
        ia64_intri_res;                                                         \
})

#define IA64_FETCHADD(tmp,v,n,sz,sem)                                           \
({                                                                              \
        switch (sz) {                                                           \
              case 4:                                                           \
                tmp = ia64_fetchadd4_##sem((unsigned int *) v, n);              \
                break;                                                          \
                                                                                \
              case 8:                                                           \
                tmp = ia64_fetchadd8_##sem((unsigned long *) v, n);             \
                break;                                                          \
                                                                                \
              default:                                                          \
                __bad_size_for_ia64_fetch_and_add();                            \
        }                                                                       \
})

#define ia64_fetchadd(i,v,sem)                                                          \
({                                                                                      \
        __u64 _tmp;                                                                     \
        volatile __typeof__(*(v)) *_v = (v);                                            \
        /* Can't use a switch () here: gcc isn't always smart enough for that... */     \
        if ((i) == -16)                                                                 \
                IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem);                        \
        else if ((i) == -8)                                                             \
                IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem);                         \
        else if ((i) == -4)                                                             \
                IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem);                         \
        else if ((i) == -1)                                                             \
                IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem);                         \
        else if ((i) == 1)                                                              \
                IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem);                          \
        else if ((i) == 4)                                                              \
                IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem);                          \
        else if ((i) == 8)                                                              \
                IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem);                          \
        else if ((i) == 16)                                                             \
                IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem);                         \
        else                                                                            \
                _tmp = __bad_increment_for_ia64_fetch_and_add();                        \
        (__typeof__(*(v))) (_tmp);      /* return old value */                          \
})

#define TICKET_SHIFT    17
#define TICKET_BITS     15
#define TICKET_MASK     ((1 << TICKET_BITS) - 1)

static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
        int     *p = (int *)&lock->lock, ticket, serve;

        ticket = ia64_fetchadd(1, p, acq);

        if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
                return;

        ia64_invala();

s->locks[me] = ticket;
        for (;;) {
                asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory");

                if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) {
s->locks[me] = 0;
                        return;
		}
                cpu_relax();
        }
}

static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
        unsigned short  *p = (unsigned short *)&lock->lock + 1, tmp;

        asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
        ACCESS_ONCE(*p) = (tmp + 2) & ~1;
}
/* cut & paste infrastructure from kernel ends here */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>

work()
{
printf("Starting worker %d\n", me);
	while (1) {
		__ticket_spin_lock(&s->l);
		s->success++;
		s->worker[me]++;
		__ticket_spin_unlock(&s->l);
	}
}

main(int argc, char **argv)
{
	int i, pid;
	int workers = WORKERS;

	if (argc > 1) {
		workers = atoi(argv[1]);
		if (workers < 1 || workers > WORKERS)
			workers = WORKERS;
	}
	s = mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0L);

	printf("shared mapping at %p\n", s);

	for (i = 0; i < workers; i++) switch (pid = fork()) {
	case -1: perror("fork"); return 1;
	case 0: me = i; work(); return 0;
	}

	while (1) {
		sleep(5);
		printf("%ld [lock = %.8x]\n", s->success, s->l.lock);
		for (i = 0; i < workers; i++)
			printf(" %ld %.8x\n", s->worker[i], s->locks[i]);
	}
}

[Index of Archives]     [Linux Kernel]     [Sparc Linux]     [DCCP]     [Linux ARM]     [Yosemite News]     [Linux SCSI]     [Linux x86_64]     [Linux for Ham Radio]

  Powered by Linux