On 5/25/21 12:26 AM, Ahmed S. Darwish wrote: > On Wed, Mar 31, 2021 at 07:06:26PM -0700, Dipen Patel wrote: >> Hi, >> >> I was following >> https://rt.wiki.kernel.org/index.php/Threaded_RT-application_with_memory_locking_and_stack_handling_example >> with some below changes: >> > > The example above is a bit inaccurate, as it prefaults the thread's > stack much later than it should be. > > ... > >> >> thread_fn { >> getrusage(RUSAGE_SELF, &usage);> >> print and save usage.ruminflt; >> prove_thread_stack_use_is_safe >> getrusage(RUSAGE_SELF, &usage); >> print usage.ruminflt - last_saved_cnt; >> } >> >> I observed there are still page faults. > > Well, in the snippet above, there will obviously be page faults, as > you're also measuring the faults generated by > prove_thread_stack_use_is_safe(). On first invocation, this is actually > the method prefaulting the thread stack. > Original example shown in above link uses the prove_thread_stack_use_is_safe same way. I just extended it to call it locally and calculate it locally because of the mutli thread. > To make sure the discussion is more concrete, can you please send a > complete, compilable, *.c file? > // Compile with 'gcc thisfile.c -lpthread -lrt -Wall' /* * This program is modified to have multiple threads each with CPU affinity * and priority from * https://rt.wiki.kernel.org/index.php/Threaded_RT-application_with_memory_locking_and_stack_handling_example */ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <sys/mman.h> // Needed for mlockall() #include <unistd.h> // needed for sysconf(int name); #include <malloc.h> #include <sys/time.h> // needed for getrusage #include <sys/types.h> #include <sys/resource.h> // needed for getrusage #include <pthread.h> #include <limits.h> #include <ctype.h> #include <sched.h> #define PRE_ALLOCATION_SIZE (100*1024*1024) /* 100MB pagefault free buffer */ #define MY_STACK_SIZE (100*1024) /* 100 kB is enough for now. */ /* Added by Dipen */ #define NUM_THREAD 8 /* Do not change this, start_rt_thread hard codes its usage */ int SEED_PRIO = 90; int NUM_PROC; struct th_info { int cpu_number; int other; int prio; } ti[NUM_THREAD]; pthread_t thread[NUM_THREAD]; pthread_attr_t attr[NUM_THREAD]; /* End */ static void setprio(int prio, int sched) { struct sched_param param; // Set realtime priority for this thread param.sched_priority = prio; if (sched_setscheduler(0, sched, ¶m) < 0) perror("sched_setscheduler"); } void show_new_pagefault_count(const char* logtext, const char* allowed_maj, const char* allowed_min) { static int last_majflt = 0, last_minflt = 0; struct rusage usage; getrusage(RUSAGE_SELF, &usage); printf("%-30.30s: Pagefaults, Major:%ld (Allowed %s), " \ "Minor:%ld (Allowed %s)\n", logtext, usage.ru_majflt - last_majflt, allowed_maj, usage.ru_minflt - last_minflt, allowed_min); last_majflt = usage.ru_majflt; last_minflt = usage.ru_minflt; } static void prove_thread_stack_use_is_safe(int stacksize) { volatile char buffer[stacksize]; int i; /* Prove that this thread is behaving well */ for (i = 0; i < stacksize; i += sysconf(_SC_PAGESIZE)) { /* Each write to this buffer shall NOT generate a pagefault. */ buffer[i] = i; } /* commented out by Dipen */ //show_new_pagefault_count("Caused by using thread stack", "0", "0"); } /* Added by Dipen */ static void confirm_sched_para() { int policy, ret; struct sched_param param; ret = pthread_getschedparam(pthread_self(), &policy, ¶m); if (ret) printf("ERROR getting sched param\n"); else printf("policy=%s, priority=%d\n", (policy == SCHED_FIFO) ? "SCHED_FIFO" : (policy == SCHED_RR) ? "SCHED_RR" : (policy == SCHED_OTHER) ? "SCHED_OTHER" : "???", param.sched_priority); } /*************************************************************/ /* The thread to start */ /* Modified to add CPU affinity and calculating page faults * locally in the thread */ static void *my_rt_thread(void *args) { struct th_info *ti = (struct th_info *)args; struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 10000000; int last_majflt = 0, last_minflt = 0; struct rusage usage; cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(ti->cpu_number , &cpuset); sched_setaffinity(0, sizeof(cpuset), &cpuset); if (ti->other != 1) { setprio(ti->prio, SCHED_FIFO); printf("I am an RT-thread [%d], executing on [%d]\n", pthread_self(), sched_getcpu()); } else { printf("I am an non-thread [%d], executing on [%d]\n", pthread_self(), sched_getcpu()); } confirm_sched_para(); //<do your RT-thing here> getrusage(RUSAGE_SELF, &usage); printf("[%d]Pagefaults, Major:%ld, Minor:%ld \n",pthread_self(), usage.ru_majflt - last_majflt, usage.ru_minflt - last_minflt); last_majflt = usage.ru_majflt; last_minflt = usage.ru_minflt; prove_thread_stack_use_is_safe(MY_STACK_SIZE); getrusage(RUSAGE_SELF, &usage); printf("[%d]After stack usage:Pagefaults, Major:%ld, Minor:%ld \n",pthread_self(), usage.ru_majflt - last_majflt, usage.ru_minflt - last_minflt); /* wait 400 ms before thread terminates */ clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL); printf("Thread %d leaving\n", pthread_self()); return NULL; } /*************************************************************/ static void error(int at) { /* Just exit on error */ fprintf(stderr, "Some error occured at %d", at); exit(1); } static void start_rt_thread(void) { int i = 0; int csnum; cpu_set_t cpuset; int RT_POLICY = SCHED_FIFO; int RT_POLICY_MIN_PRIORITY = sched_get_priority_min(RT_POLICY); int RT_POLICY_MAX_PRIORITY = sched_get_priority_max(RT_POLICY); int PRIO_LOW = RT_POLICY_MIN_PRIORITY; int PRIO_HIGH = RT_POLICY_MAX_PRIORITY - 5; int PRIO_MEDIUM = (PRIO_LOW + PRIO_HIGH) / 2; printf("prio low=%d, %d, %d\n", PRIO_LOW, PRIO_HIGH, PRIO_MEDIUM); /* init to default values */ for (; i < NUM_THREAD; i++) { if (pthread_attr_init(&attr[i])) error(1); if (pthread_attr_setstacksize(&attr[i], PTHREAD_STACK_MIN + MY_STACK_SIZE)) error(2); if (i < 3) csnum = i; else csnum = 3; ti[i].cpu_number = csnum; ti[i].other = 0; if (i >= 0 && i <= 2) ti[i].prio = PRIO_LOW; else if (i >= 3 && i < 5) ti[i].prio = PRIO_MEDIUM; else if (i >= 5 && i < 8) ti[i].prio = PRIO_HIGH; if (i == 7) ti[i].other = 1; if (pthread_attr_setinheritsched(&attr[i], PTHREAD_EXPLICIT_SCHED)) error(4); /* And finally start the actual thread */ if (!pthread_create(&thread[i], &attr[i], my_rt_thread, &ti[i])) { printf("Thread: %d created\n", thread[i]); //pthread_detach(thread[i]); } } } static void configure_malloc_behavior(void) { /* Now lock all current and future pages * from preventing of being paged */ if (mlockall(MCL_CURRENT | MCL_FUTURE)) perror("mlockall failed:"); /* Turn off malloc trimming.*/ mallopt(M_TRIM_THRESHOLD, -1); /* Turn off mmap usage. */ mallopt(M_MMAP_MAX, 0); } static void reserve_process_memory(int size) { int i; char *buffer; buffer = malloc(size); /* Touch each page in this piece of memory to get it mapped into RAM */ for (i = 0; i < size; i += sysconf(_SC_PAGESIZE)) { buffer[i] = 0; } free(buffer); } int main(int argc, char *argv[]) { show_new_pagefault_count("Initial count", ">=0", ">=0"); configure_malloc_behavior(); show_new_pagefault_count("mlockall() generated", ">=0", ">=0"); reserve_process_memory(PRE_ALLOCATION_SIZE); show_new_pagefault_count("malloc() and touch generated", ">=0", ">=0"); /* Now allocate the memory for the 2nd time and prove the number of * pagefaults are zero */ reserve_process_memory(PRE_ALLOCATION_SIZE); show_new_pagefault_count("2nd malloc() and use generated", "0", "0"); NUM_PROC = sysconf(_SC_NPROCESSORS_ONLN); printf("We have %d processors\n", NUM_PROC); start_rt_thread(); //<do your RT-thing> for (int i = 0; i < NUM_THREAD; i ++) { pthread_join(thread[i], NULL); } printf("main thread exit\n"); return 0; } > Good luck, > > -- > Ahmed S. Darwish > Linutronix GmbH >