Re: [POC][RFC][PATCH v2] sched: Extended Scheduler Time Slice

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 26 Oct 2023 15:20:22 -0400
Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:

> Anyway, I changed the code to use:
> 
> static inline unsigned clrbit(volatile unsigned *ptr)
> {
> 	unsigned ret;
> 
> 	asm volatile("andb %b1,%0"
> 		     : "+m" (*(volatile char *)ptr)
> 		     : "iq" (0x2)
> 		     : "memory");
> 
> 	ret = *ptr;
> 	*ptr = 0;
> 
> 	return ret;
> }

Mathieu also told me that glibc's rseq has some extra padding at the end,
that happens to be big enough to hold this feature. That means you can run
the code without adding:

  GLIBC_TUNABLES=glibc.pthread.rseq=0

Attached is the updated test program.

-- Steve
// Run with: GLIBC_TUNABLES=glibc.pthread.rseq=0 

#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <errno.h>
#include <pthread.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <tracefs.h>
#include <sys/syscall.h>
#include "rseq-abi.h"
#include <linux/tls.h>

#define rseq(rseq, len, flags, sig) syscall(SYS_rseq, rseq, len, \
					    flags, sig);

#define __weak __attribute__((weak))

//#define barrier() asm volatile ("" ::: "memory")
#define rmb() asm volatile ("lfence" ::: "memory")
#define wmb() asm volatile ("sfence" ::: "memory")


static pthread_barrier_t pbarrier;

static __thread struct rseq_abi __attribute__((aligned(sizeof(struct rseq_abi)))) rseq_map;
static __thread struct rseq_abi *rseq_ptr;

static bool no_rseq;

static void init_extend_map(void)
{
	extern ptrdiff_t __rseq_offset;
	extern unsigned int __rseq_size;
	int ret;

	if (no_rseq)
		return;

	if (__rseq_size) {
		if (__rseq_size < sizeof(rseq_map)) {
			printf("glibc rseq less than required mapping\n");
			return;
		}
		rseq_ptr = __builtin_thread_pointer() + __rseq_offset;
		printf("Using glibc rseq %p\n", rseq_ptr);
		return;
	}

	rseq_ptr = &rseq_map;
	ret = rseq(rseq_ptr, sizeof(rseq_map), 0, 0);
	perror("rseq");
	printf("ret = %d (%zd) %p\n", ret, sizeof(rseq_map), &rseq_map);
	if (ret < 0)
		rseq_ptr = NULL;
}

struct data;

struct thread_data {
	unsigned long long			start_wait;
	unsigned long long			x_count;
	unsigned long long			total;
	unsigned long long			max;
	unsigned long long			min;
	unsigned long long			total_wait;
	unsigned long long			max_wait;
	unsigned long long			min_wait;
	struct data				*data;
};

struct data {
	unsigned long long		x;
	unsigned long			lock;
	struct thread_data		*tdata;
	bool				done;
};

static inline unsigned long
cmpxchg(volatile unsigned long *ptr, unsigned long old, unsigned long new)
{
        unsigned long prev;

	asm volatile("lock; cmpxchg %b1,%2"
		     : "=a"(prev)
		     : "q"(new), "m"(*(ptr)), "0"(old)
		     : "memory");
        return prev;
}

static inline unsigned clrbit(volatile unsigned *ptr)
{
	unsigned ret;

	asm volatile("andb %b1,%0"
		     : "+m" (*(volatile char *)ptr)
		     : "iq" (0x2)
		     : "memory");

	ret = *ptr;
	*ptr = 0;

	return ret;
}

static void extend(void)
{
	if (!rseq_ptr)
		return;

	rseq_ptr->cr_flags = 1;
}

static void unextend(void)
{
	unsigned prev;

	if (!rseq_ptr)
		return;

	prev = clrbit(&rseq_ptr->cr_flags);
	if (prev & 2) {
		tracefs_printf(NULL, "Yield!\n");
		sched_yield();
	}
}

#define sec2usec(sec) (sec * 1000000ULL)
#define usec2sec(usec) (usec / 1000000ULL)

static unsigned long long get_time(void)
{
	struct timeval tv;
	unsigned long long time;

	gettimeofday(&tv, NULL);

	time = sec2usec(tv.tv_sec);
	time += tv.tv_usec;

	return time;
}

static void grab_lock(struct thread_data *tdata, struct data *data)
{
	unsigned long long start, end, delta;
	unsigned long long end_wait;
	unsigned long long last;
	unsigned long prev;

	if (!tdata->start_wait)
		tdata->start_wait = get_time();

	while (data->lock && !data->done)
		rmb();

	extend();
	start = get_time();
	prev = cmpxchg(&data->lock, 0, 1);
	if (prev) {
		unextend();
		return;
	}
	end_wait = get_time();
	tracefs_printf(NULL, "Have lock!\n");

	delta = end_wait - tdata->start_wait;
	tdata->start_wait = 0;
	if (!tdata->total_wait || tdata->max_wait < delta)
		tdata->max_wait = delta;
	if (!tdata->total_wait || tdata->min_wait > delta)
		tdata->min_wait = delta;
	tdata->total_wait += delta;

	data->x++;
	last = data->x;

	if (data->lock != 1) {
		printf("Failed locking\n");
		exit(-1);
	}
	prev = cmpxchg(&data->lock, 1, 0);
	end = get_time();
	if (prev != 1) {
		printf("Failed unlocking\n");
		exit(-1);
	}
	tracefs_printf(NULL, "released lock!\n");
	unextend();

	delta = end - start;
	if (!tdata->total || tdata->max < delta)
		tdata->max = delta;

	if (!tdata->total || tdata->min > delta)
		tdata->min = delta;

	tdata->total += delta;
	tdata->x_count++;

	/* Let someone else have a turn */
	while (data->x == last && !data->done)
		rmb();
}

	
	
static void *run_thread(void *d)
{
	struct thread_data *tdata = d;
	struct data *data = tdata->data;

	init_extend_map();

	pthread_barrier_wait(&pbarrier);

	while (!data->done) {
		grab_lock(tdata, data);
	}
	return NULL;
}

int main (int argc, char **argv)
{
	unsigned long long total_wait = 0;
	unsigned long long secs;
	pthread_t *threads;
	struct data data;
	int cpus;

	memset(&data, 0, sizeof(data));

	cpus = sysconf(_SC_NPROCESSORS_CONF);

	threads = calloc(cpus + 1, sizeof(*threads));
	if (!threads) {
		perror("threads");
		exit(-1);
	}

	data.tdata = calloc(cpus + 1, sizeof(*data.tdata));
	if (!data.tdata) {
		perror("Allocating tdata");
		exit(-1);
	}

	tracefs_print_init(NULL);
	pthread_barrier_init(&pbarrier, NULL, cpus + 2);

	for (int i = 0; i <= cpus; i++) {
		int ret;

		data.tdata[i].data = &data;
		ret = pthread_create(&threads[i], NULL, run_thread, &data.tdata[i]);
		if (ret < 0) {
			perror("creating threads");
			exit(-1);
		}
	}

	pthread_barrier_wait(&pbarrier);
	sleep(5);

	printf("Finish up\n");
	data.done = true;
	wmb();

	for (int i = 0; i <= cpus; i++) {
		pthread_join(threads[i], NULL);
		printf("thread %i:\n", i);
		printf("   count:\t%lld\n", data.tdata[i].x_count);
		printf("   total:\t%lld\n", data.tdata[i].total);
		printf("     max:\t%lld\n", data.tdata[i].max);
		printf("     min:\t%lld\n", data.tdata[i].min);
		printf("   total wait:\t%lld\n", data.tdata[i].total_wait);
		printf("     max wait:\t%lld\n", data.tdata[i].max_wait);
		printf("     min wait:\t%lld\n", data.tdata[i].min_wait);
		total_wait += data.tdata[i].total_wait;
	}

	secs = usec2sec(total_wait);

	printf("Ran for %lld times\n", data.x);
	printf("Total wait time: %lld.%06lld\n", secs, total_wait - sec2usec(secs));
	return 0;
}

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux