I'm messing around with an old microbenchmark which I massaged into state pluggable into will-it-scale [pasted at the end] I verified that with btrfs and xfs the bench stays on cpu the entire time. In contrast, running in on top of ext4 gives me about 50% idle. According to offcputime-bpfcc -K this is why: finish_task_switch.isra.0 __schedule schedule io_schedule folio_wait_bit_common folio_wait_writeback truncate_inode_partial_folio truncate_inode_pages_range ext4_evict_inode evict do_unlinkat __x64_sys_unlink do_syscall_64 entry_SYSCALL_64_after_hwframe - vfsmix2_process (22793) 3913285 The code reopens the file with O_TRUNC. Whacking the flag gets rid of the off cpu time. I have no interest in digging into it. I suspect this is an easy fix for someone familiar with the fs. git clone https://github.com/antonblanchard/will-it-scale.git plug the code below into tests/vfsmix2.c && gmake && ./vfsmix2_processes #include <stdlib.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <fcntl.h> #include <assert.h> /* * Repurposed code stolen from Ingo Molnar, see: * https://lkml.org/lkml/2015/5/19/1009 */ char *testcase_description = "vfsmix"; void testcase(unsigned long long *iterations, unsigned long nr) { char tmpfile[] = "/tmp/willitscale.XXXXXX"; int fd = mkstemp(tmpfile); assert(fd >= 0); close(fd); unlink(tmpfile); while (1) { fd = open(tmpfile, O_RDWR | O_CREAT | O_EXCL, 0600); assert(fd >= 0); int ret; ret = lseek(fd, 4095, SEEK_SET); assert(ret == 4095); close(fd); fd = open(tmpfile, O_RDWR|O_CREAT|O_TRUNC); assert(fd >= 0); { char c = 1; ret = write(fd, &c, 1); assert(ret == 1); } { char *mmap_buf = (char *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); assert(mmap_buf != (void *)-1L); mmap_buf[0] = 1; ret = munmap(mmap_buf, 4096); assert(ret == 0); } close(fd); ret = unlink(tmpfile); assert(ret == 0); (*iterations)++; } } -- Mateusz Guzik <mjguzik gmail.com>