[*] Note that this needs some cleaning up and not all the events work yet. Add a mount notification facility whereby notifications about changes in mount topology and configuration can be received. Note that this only covers vfsmount topology changes and not superblock events. A separate facility will be added for that. Firstly, an event queue needs to be created: fd = open("/dev/event_queue", O_RDWR); then a notification can be set up to report notifications via that queue: struct watch_notification_filter filter; memset(&filter, 0, sizeof(filter)); filter.subtype_filter[0] = ~0ULL; filter.info_id = 0x02000000; mount_notify(AT_FDCWD, "/", 0, fd, &filter); Note that the queue can be shared between multiple notifications of various types. Mount notifications propagate up the tree towards the root, so a watch will catch all of the events happening in the subtree rooted at the watch. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 arch/x86/entry/syscalls/syscall_64.tbl | 1 fs/Kconfig | 9 ++ fs/Makefile | 1 fs/fs_context.c | 1 fs/mount.h | 26 +++++ fs/mount_notify.c | 178 ++++++++++++++++++++++++++++++++ fs/namespace.c | 18 +++ include/linux/dcache.h | 1 include/linux/syscalls.h | 2 include/uapi/linux/watch_queue.h | 24 ++++ kernel/sys_ni.c | 3 + 12 files changed, 265 insertions(+) create mode 100644 fs/mount_notify.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 806760188a31..449bbcc19a6d 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -405,3 +405,4 @@ 391 i386 fsmount sys_fsmount __ia32_sys_fsmount 392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 fsinfo sys_fsinfo __ia32_sys_fsinfo +394 i386 mount_notify sys_mount_notify __ia32_sys_mount_notify diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 0823eed2b02e..f25fa7ff5fb9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -350,6 +350,7 @@ 339 common fsmount __x64_sys_fsmount 340 common fspick __x64_sys_fspick 341 common fsinfo __x64_sys_fsinfo +342 common mount_notify __x64_sys_mount_notify # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/fs/Kconfig b/fs/Kconfig index ac474a61be37..cbcca62d32e9 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -107,6 +107,15 @@ source "fs/crypto/Kconfig" source "fs/notify/Kconfig" +config MOUNT_NOTIFICATIONS + bool "Mount topology change notifications" + select WATCH_QUEUE + help + This option provides support for getting change notifications on the + mount tree topology. This makes use of the /dev/watch_queue misc + device to handle the notification buffer and provides the + mount_notify() system call to enable/disable watchpoints. + source "fs/quota/Kconfig" source "fs/autofs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index d3b33798998e..49b60030d905 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ +obj-$(CONFIG_MOUNT_NOTIFICATIONS) += mount_notify.o diff --git a/fs/fs_context.c b/fs/fs_context.c index 071723cf11c8..4fa99a438471 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -321,6 +321,7 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, case FS_CONTEXT_FOR_SUBMOUNT: fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); fc->net_ns = get_net(current->nsproxy->net_ns); + fc->sb_flags |= SB_SUBMOUNT; break; case FS_CONTEXT_FOR_RECONFIGURE: /* We don't pin any namespaces as the superblock's diff --git a/fs/mount.h b/fs/mount.h index f39bc9da4d73..7f72f824b958 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,6 +4,7 @@ #include <linux/poll.h> #include <linux/ns_common.h> #include <linux/fs_pin.h> +#include <linux/watch_queue.h> struct mnt_namespace { atomic_t count; @@ -67,9 +68,13 @@ struct mount { int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ + int mnt_nr_watchers; /* The number of subtree watches tracking this */ struct hlist_head mnt_pins; struct fs_pin mnt_umount; struct dentry *mnt_ex_mountpoint; +#ifdef CONFIG_MOUNT_NOTIFICATIONS + struct watch_list *mnt_watchers; /* Watches on dentries within this mount */ +#endif } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -146,3 +151,24 @@ static inline bool is_local_mountpoint(struct dentry *dentry) return __is_local_mountpoint(dentry); } + +extern void post_mount_notification(struct mount *changed, + struct mount_notification *notify); + +static inline void notify_mount(struct mount *changed, + struct mount *aux, + enum mount_notification_subtype subtype, + u32 info_flags) +{ +#ifdef CONFIG_MOUNT_NOTIFICATIONS + struct mount_notification n = { + .watch.type = WATCH_TYPE_MOUNT_NOTIFY, + .watch.subtype = subtype, + .watch.info = info_flags | sizeof(n), + .triggered_on = changed->mnt_id, + .changed_mount = aux ? aux->mnt_id : 0, + }; + + post_mount_notification(changed, &n); +#endif +} diff --git a/fs/mount_notify.c b/fs/mount_notify.c new file mode 100644 index 000000000000..b4905c363136 --- /dev/null +++ b/fs/mount_notify.c @@ -0,0 +1,178 @@ +/* Provide mount topology/attribute change notifications. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@xxxxxxxxxx) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/syscalls.h> +#include "mount.h" + +/* + * Post mount notifications to all watches going rootwards along the tree. + * + * Must be called with the mount_lock held. + */ +void post_mount_notification(struct mount *changed, + struct mount_notification *notify) +{ + struct path cursor; + struct mount *mnt; + unsigned seq; + + seq = 0; + rcu_read_lock(); +restart: + cursor.mnt = &changed->mnt; + cursor.dentry = changed->mnt.mnt_root; + mnt = real_mount(cursor.mnt); + notify->watch.info &= ~WATCH_INFO_IN_SUBTREE; + + read_seqbegin_or_lock(&rename_lock, &seq); + for (;;) { + if (mnt->mnt_watchers && + !hlist_empty(&mnt->mnt_watchers->watchers)) { + if (cursor.dentry->d_flags & DCACHE_MOUNT_WATCH) + post_watch_notification(mnt->mnt_watchers, + ¬ify->watch, + (unsigned long)cursor.dentry); + } else { + cursor.dentry = mnt->mnt.mnt_root; + } + notify->watch.info |= WATCH_INFO_IN_SUBTREE; + + if (cursor.dentry == cursor.mnt->mnt_root || + IS_ROOT(cursor.dentry)) { + struct mount *parent = READ_ONCE(mnt->mnt_parent); + + /* Escaped? */ + if (cursor.dentry != cursor.mnt->mnt_root) + break; + + /* Global root? */ + if (mnt != parent) { + cursor.dentry = READ_ONCE(mnt->mnt_mountpoint); + mnt = parent; + cursor.mnt = &mnt->mnt; + continue; + } + break; + } + + cursor.dentry = cursor.dentry->d_parent; + } + + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + + done_seqretry(&rename_lock, seq); + rcu_read_unlock(); +} + +static void release_mount_watch(struct watch_list *wlist, struct watch *watch) +{ + struct vfsmount *mnt = watch->private; + struct dentry *dentry = (struct dentry *)(unsigned long)watch->id; + + dput(dentry); + mntput(mnt); +} + +/** + * sys_mount_notify - Watch for mount topology/attribute changes + * @dfd: Base directory to pathwalk from or fd referring to mount. + * @filename: Path to mount to place the watch upon + * @at_flags: Pathwalk control flags + * @watch_fd: The watch queue to send notifications to. + * @watch_id: The watch ID to be placed in the notification (-1 to remove watch) + */ +SYSCALL_DEFINE5(mount_notify, + int, dfd, + const char __user *, filename, + unsigned int, at_flags, + int, watch_fd, + int, watch_id) +{ + struct watch_queue *wqueue; + struct watch_list *wlist = NULL; + struct watch *watch; + struct mount *m; + struct path path; + int ret; + + if (watch_id < -1 || watch_id > 0xff) + return -EINVAL; + + ret = user_path_at(dfd, filename, at_flags, &path); + if (ret) + return ret; + + wqueue = get_watch_queue(watch_fd); + if (IS_ERR(wqueue)) + goto err_path; + + m = real_mount(path.mnt); + + if (watch_id >= 0) { + if (!m->mnt_watchers) { + wlist = kzalloc(sizeof(*wlist), GFP_KERNEL); + if (!wlist) + goto err_wqueue; + INIT_HLIST_HEAD(&wlist->watchers); + spin_lock_init(&wlist->lock); + wlist->release_watch = release_mount_watch; + } + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (!watch) + goto err_wlist; + + init_watch(watch); + watch->id = (unsigned long)path.dentry; + watch->queue = wqueue; + watch->private = path.mnt; + watch->info_id = (u32)watch_id << 24; + + down_write(&m->mnt.mnt_sb->s_umount); + if (!m->mnt_watchers) { + m->mnt_watchers = wlist; + wlist = NULL; + } + + watch->watch_list = m->mnt_watchers; + ret = add_watch_to_object(watch); + if (ret == 0) { + spin_lock(&path.dentry->d_lock); + path.dentry->d_flags |= DCACHE_MOUNT_WATCH; + spin_unlock(&path.dentry->d_lock); + path_get(&path); + } + up_write(&m->mnt.mnt_sb->s_umount); + if (ret < 0) + kfree(watch); + } else if (m->mnt_watchers) { + down_write(&m->mnt.mnt_sb->s_umount); + ret = remove_watch_from_object(m->mnt_watchers, wqueue, + (unsigned long)path.dentry, + false); + up_write(&m->mnt.mnt_sb->s_umount); + } else { + ret = -EBADSLT; + } + +err_wlist: + kfree(wlist); +err_wqueue: + put_watch_queue(wqueue); +err_path: + path_put(&path); + return ret; +} diff --git a/fs/namespace.c b/fs/namespace.c index 7e7b1145d15d..d4d16111659d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -589,6 +589,9 @@ static int mnt_make_readonly(struct mount *mnt) smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; unlock_mount_hash(); + if (ret == 0) + notify_mount(mnt, NULL, notify_mount_readonly, + WATCH_INFO_FLAG_0); return ret; } @@ -597,6 +600,7 @@ static int __mnt_unmake_readonly(struct mount *mnt) lock_mount_hash(); mnt->mnt.mnt_flags &= ~MNT_READONLY; unlock_mount_hash(); + notify_mount(mnt, NULL, notify_mount_readonly, 0); return 0; } @@ -900,6 +904,7 @@ static void umount_mnt(struct mount *mnt) { /* old mountpoint will be dropped when we can do that */ mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint; + notify_mount(mnt->mnt_parent, mnt, notify_mount_unmount, 0); unhash_mnt(mnt); } @@ -1451,6 +1456,11 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); + +#ifdef CONFIG_MOUNT_NOTIFICATIONS + if (p->mnt_watchers) + remove_watch_list(p->mnt_watchers); +#endif ns = p->mnt_ns; if (ns) { ns->mounts--; @@ -2004,11 +2014,17 @@ static int attach_recursive_mnt(struct mount *source_mnt, lock_mount_hash(); } if (parent_path) { + notify_mount(source_mnt->mnt_parent, source_mnt, + notify_mount_move_from, 0); detach_mnt(source_mnt, parent_path); + notify_mount(dest_mnt, source_mnt, notify_mount_move_to, 0); attach_mnt(source_mnt, dest_mnt, dest_mp); touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); + notify_mount(dest_mnt, source_mnt, notify_mount_new_mount, + source_mnt->mnt.mnt_sb->s_flags & SB_SUBMOUNT ? + WATCH_INFO_FLAG_0 : 0); commit_tree(source_mnt); } @@ -2361,6 +2377,7 @@ static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags) mnt->mnt.mnt_flags = mnt_flags; touch_mnt_namespace(mnt->mnt_ns); unlock_mount_hash(); + notify_mount(mnt, NULL, notify_mount_setattr, 0); } /* @@ -2767,6 +2784,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) if (!xchg(&mnt->mnt_expiry_mark, 1) || propagate_mount_busy(mnt, 1)) continue; + notify_mount(mnt, NULL, notify_mount_expiry, 0); list_move(&mnt->mnt_expire, &graveyard); } while (!list_empty(&graveyard)) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 66c6e17e61e5..b0eb68ed5b9b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -217,6 +217,7 @@ struct dentry_operations { #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 +#define DCACHE_MOUNT_WATCH 0x40000000 /* There's a mount watch here */ extern seqlock_t rename_lock; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 84b653874ab8..7db37c58289a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -913,6 +913,8 @@ asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags) asmlinkage long sys_fsinfo(int dfd, const char __user *path, struct fsinfo_params __user *params, void __user *buffer, size_t buf_size); +asmlinkage long sys_mount_notify(int dfd, const char __user *path, + unsigned int at_flags, int watch_fd, int watch_id); /* * Architecture-specific system calls diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 3e0ab5fe388d..9d8e165e0065 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -103,4 +103,28 @@ struct key_notification { __u32 aux; /* Per-type auxiliary data */ }; +/* + * Type of mount topology change notification. + */ +enum mount_notification_subtype { + notify_mount_new_mount = 0, /* New mount added */ + notify_mount_unmount = 1, /* Mount removed manually */ + notify_mount_expiry = 2, /* Automount expired */ + notify_mount_readonly = 3, /* Mount R/O state changed */ + notify_mount_setattr = 4, /* Mount attributes changed */ + notify_mount_move_from = 5, /* Mount moved from here */ + notify_mount_move_to = 6, /* Mount moved to here (compare op_id) */ +}; + +/* + * Mount topology/configuration change notification record. + * - watch.type = WATCH_TYPE_MOUNT_NOTIFY + * - watch.subtype = enum mount_notification_subtype + */ +struct mount_notification { + struct watch_notification watch; /* WATCH_TYPE_MOUNT_NOTIFY */ + __u32 triggered_on; /* The mount that the notify was on */ + __u32 changed_mount; /* The mount that got changed */ +}; + #endif /* _UAPI_LINUX_WATCH_QUEUE_H */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index df556175be50..f608777be045 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -80,6 +80,9 @@ COND_SYSCALL(ioprio_get); /* fs/locks.c */ COND_SYSCALL(flock); +/* fs/mount_notify.c */ +COND_SYSCALL(mount_notify); + /* fs/namei.c */ /* fs/namespace.c */