On Oct 29, 2022, at 12:14 PM, Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote: > It didn't fail for me before, it doesn't fail for me with those patches. For the record, I tried to run the PoC on another machine, and it indeed did not fail. Turns out I had a small bug in one of the mechanisms that were intended to make the failure more likely (I should have mapped again or madvised HPAGE_SIZE to increase the time zap_pte_range spends to increase the probability of the race). I am still trying to figure out how to address this issue, and whether the fact that some rmap_walk(), which do not use PVMW_SYNC are of an issue. --- #define _GNU_SOURCE #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <pthread.h> #include <sys/mman.h> #include <sys/stat.h> #include <fcntl.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <time.h> #define handle_error(msg) \ do { perror(msg); exit(EXIT_FAILURE); } while (0) void *p; volatile bool stop = false; pid_t flusher_pid; int fd; #define PAGE_SIZE (4096ul) #define PAGES_PER_PMD (512) #define HPAGE_SIZE (PAGE_SIZE * PAGES_PER_PMD) // Comment MUNMAP_TEST for MADV_DONTNEED test #define MUNMAP_TEST void *dirtying_thread(void *arg) { int i; while (!stop) { for (i = 1; i < PAGES_PER_PMD; i++) { *(volatile char *)(p + (i * PAGE_SIZE) + 64) = 5; } } return NULL; } void *checking_thread(void *arg) { volatile unsigned long *ul_p = (volatile unsigned long*)p; unsigned long cnt = 0; while (!stop) { *ul_p = cnt; if (*ul_p != cnt) { printf("FAILED: expected %ld, got %ld\n", cnt, *ul_p); kill(flusher_pid, SIGTERM); exit(0); } cnt++; } return NULL; } void *remap_thread(void *arg) { void *ptr; struct timespec t = { .tv_nsec = 10000, }; while (!stop) { #ifdef MUNMAP_TEST ptr = mmap(p, HPAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED|MAP_POPULATE, fd, 0); if (ptr == MAP_FAILED) handle_error("remap_thread"); #else if (madvise(p, HPAGE_SIZE, MADV_DONTNEED) < 0) handle_error("MADV_DONTNEED"); nanosleep(&t, NULL); #endif } return NULL; } void flushing_process(void) { // Remove the pages to speed up rmap_walk and allow to drop caches. if (madvise(p, HPAGE_SIZE, MADV_DONTNEED) < 0) handle_error("MADV_DONTNEED"); while (true) { if (msync(p, PAGE_SIZE, MS_SYNC)) handle_error("msync"); if (posix_fadvise(fd, 0, PAGE_SIZE, POSIX_FADV_DONTNEED)) handle_error("posix_fadvise"); } } int main(int argc, char *argv[]) { void *(*thread_funcs[])(void*) = { &dirtying_thread, &checking_thread, &remap_thread, }; int r, i; int rc1, rc2; unsigned long addr; void *ptr; char *page = malloc(PAGE_SIZE); int n_threads = sizeof(thread_funcs) / sizeof(*thread_funcs); pthread_t *threads = malloc(sizeof(pthread_t) * n_threads); pid_t pid; if (argc < 2) { fprintf(stderr, "usages: %s [filename]\n", argv[0]); exit(EXIT_FAILURE); } fd = open(argv[1], O_RDWR|O_CREAT, 0666); if (fd == -1) handle_error("open fd"); for (i = 0; i < PAGES_PER_PMD; i++) { if (write(fd, page, PAGE_SIZE) != PAGE_SIZE) handle_error("write"); } free(page); ptr = mmap(NULL, HPAGE_SIZE * 2, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0); if (ptr == MAP_FAILED) handle_error("mmap anon"); addr = (unsigned long)(ptr + HPAGE_SIZE - 1) & ~(HPAGE_SIZE - 1); printf("starting...\n"); ptr = mmap((void *)addr, HPAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED|MAP_POPULATE, fd, 0); if (ptr == MAP_FAILED) handle_error("mmap file - start"); p = ptr; for (i = 0; i < n_threads; i++) { r = pthread_create(&threads[i], NULL, thread_funcs[i], NULL); if (r) handle_error("pthread_create"); } // Run the flushing process in a different process, so msync() would // not require mmap_lock. pid = fork(); if (pid == 0) flushing_process(); flusher_pid = pid; sleep(60); stop = true; for (i = 0; i < n_threads; i++) pthread_join(threads[i], NULL); kill(flusher_pid, SIGTERM); printf("Finished without an error\n"); exit(0); }