Add a superblock event notification facility whereby notifications about superblock events, such as I/O errors (EIO), quota limits being hit (EDQUOT) and running out of space (ENOSPC) can be reported to a monitoring process asynchronously. Note that this does not cover vfsmount topology changes. mount_notify() is used for that. Firstly, an event queue needs to be created: fd = open("/dev/event_queue", O_RDWR); then a notification can be set up to report notifications via that queue: struct watch_notification_filter filter; memset(&filter, 0, sizeof(filter)); filter.subtype_filter[0] = ~0ULL; filter.info_id = 0x03000000; sb_notify(AT_FDCWD, "/home/dhowells", 0, fd, &filter); In this case, it would let me monitor my own homedir for events. Note that the queue can be shared between multiple notifications of various types. [*] QUESTION: Does this want to be per-sb, per-mount_namespace, per-some-new-notify-ns or per-system? Or do multiple options make sense? [*] QUESTION: I've done it this way so that anyone could theoretically monitor the superblock of any filesystem they can pathwalk to, but do we need other security controls? [*] QUESTION: Should the LSM be able to filter the events a queue can receive? For instance the opener of the queue would grant that queue subject creds (by ->f_cred) that could be used to govern what events could be seen, assuming the target superblock to have some object creds, based on, say, the mounter. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 arch/x86/entry/syscalls/syscall_64.tbl | 1 fs/Kconfig | 12 +++ fs/super.c | 116 ++++++++++++++++++++++++++++++++ include/linux/fs.h | 77 +++++++++++++++++++++ include/linux/syscalls.h | 2 + include/uapi/linux/watch_queue.h | 26 +++++++ kernel/sys_ni.c | 3 + 8 files changed, 238 insertions(+) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 449bbcc19a6d..c9db9d51a7df 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -406,3 +406,4 @@ 392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 fsinfo sys_fsinfo __ia32_sys_fsinfo 394 i386 mount_notify sys_mount_notify __ia32_sys_mount_notify +395 i386 sb_notify sys_sb_notify __ia32_sys_sb_notify diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f25fa7ff5fb9..17869bf7788a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -351,6 +351,7 @@ 340 common fspick __x64_sys_fspick 341 common fsinfo __x64_sys_fsinfo 342 common mount_notify __x64_sys_mount_notify +343 common sb_notify __x64_sys_sb_notify # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/fs/Kconfig b/fs/Kconfig index cbcca62d32e9..0551abf08504 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -116,6 +116,18 @@ config MOUNT_NOTIFICATIONS device to handle the notification buffer and provides the mount_notify() system call to enable/disable watchpoints. +config SB_NOTIFICATIONS + bool "Superblock event notifications" + select WATCH_QUEUE + help + This option provides support for receiving superblock event + notifications. This makes use of the /dev/watch_queue misc device to + handle the notification buffer and provides the sb_notify() system + call to enable/disable watches. + + Events can include things like changing between R/W and R/O, EIO + generation, ENOSPC generation and EDQUOT generation. + source "fs/quota/Kconfig" source "fs/autofs/Kconfig" diff --git a/fs/super.c b/fs/super.c index 3fe5d12b7697..1a1cf517dbd8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,8 @@ #include <linux/user_namespace.h> #include <uapi/linux/mount.h> #include <linux/fs_context.h> +#include <linux/syscalls.h> +#include <linux/namei.h> #include "internal.h" static int thaw_super_locked(struct super_block *sb); @@ -320,6 +322,10 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { +#ifdef CONFIG_SB_NOTIFICATIONS + if (s->s_watchers) + remove_watch_list(s->s_watchers); +#endif cleancache_invalidate_fs(s); unregister_shrinker(&s->s_shrink); fs->kill_sb(s); @@ -997,6 +1003,8 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; + notify_sb(sb, notify_superblock_readonly, + remount_ro ? WATCH_INFO_FLAG_0 : 0); /* * Some filesystems modify their metadata via some other path than the @@ -1810,3 +1818,111 @@ int vfs_get_tree(struct fs_context *fc) return ret; } EXPORT_SYMBOL(vfs_get_tree); + +#ifdef CONFIG_SB_NOTIFICATIONS +/* + * Post superblock notifications. + */ +void post_sb_notification(struct super_block *s, struct superblock_notification *n) +{ + post_watch_notification(s->s_watchers, &n->watch, s->s_watch_id); +} + +static void release_sb_watch(struct watch_list *wlist, struct watch *watch) +{ + struct super_block *s = watch->private; + + put_super(s); +} + +/** + * sys_sb_notify - Watch for superblock events. + * @dfd: Base directory to pathwalk from or fd referring to superblock. + * @filename: Path to superblock to place the watch upon + * @at_flags: Pathwalk control flags + * @watch_fd: The watch queue to send notifications to. + * @watch_id: The watch ID to be placed in the notification (-1 to remove watch) + */ +SYSCALL_DEFINE5(sb_notify, + int, dfd, + const char __user *, filename, + unsigned int, at_flags, + int, watch_fd, + int, watch_id) +{ + struct watch_queue *wqueue; + struct super_block *s; + struct watch_list *wlist = NULL; + struct watch *watch; + struct path path; + int ret; + + if (watch_id < -1 || watch_id > 0xff) + return -EINVAL; + + ret = user_path_at(dfd, filename, at_flags, &path); + if (ret) + return ret; + + wqueue = get_watch_queue(watch_fd); + if (IS_ERR(wqueue)) + goto err_path; + + s = path.dentry->d_sb; + if (watch_id >= 0) { + if (!s->s_watchers) { + wlist = kzalloc(sizeof(*wlist), GFP_KERNEL); + if (!wlist) + goto err_wqueue; + INIT_HLIST_HEAD(&wlist->watchers); + spin_lock_init(&wlist->lock); + wlist->release_watch = release_sb_watch; + } + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (!watch) + goto err_wlist; + + init_watch(watch); + watch->id = s->s_watch_id; + watch->queue = wqueue; + watch->private = s; + watch->info_id = (u32)watch_id << 24; + + down_write(&s->s_umount); + ret = -EIO; + if (atomic_read(&s->s_active)) { + if (!s->s_watchers) { + s->s_watchers = wlist; + wlist = NULL; + } + + watch->watch_list = s->s_watchers; + ret = add_watch_to_object(watch); + if (ret == 0) { + spin_lock(&sb_lock); + s->s_count++; + spin_unlock(&sb_lock); + } + } + up_write(&s->s_umount); + if (ret < 0) + kfree(watch); + } else if (s->s_watchers) { + down_write(&s->s_umount); + ret = remove_watch_from_object(s->s_watchers, wqueue, + s->s_watch_id, false); + up_write(&s->s_umount); + } else { + ret = -EBADSLT; + } + +err_wlist: + kfree(wlist); +err_wqueue: + put_watch_queue(wqueue); +err_path: + path_put(&path); + return ret; +} +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index bcbe94c0dfe8..6dbc4f9aa6c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -37,6 +37,7 @@ #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> +#include <linux/watch_queue.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -1463,6 +1464,12 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ + + /* Superblock event notifications */ +#ifdef CONFIG_SB_NOTIFICATIONS + struct watch_list *s_watchers; + u64 s_watch_id; +#endif } __randomize_layout; /* Helper functions so that in most cases filesystems will @@ -3458,4 +3465,74 @@ static inline bool dir_relax_shared(struct inode *inode) extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); +extern void post_sb_notification(struct super_block *, struct superblock_notification *); + +/** + * notify_sb: Post simple superblock notification. + * @s: The superblock the notification is about. + * @subtype: The type of notification. + */ +static inline void notify_sb(struct super_block *s, + enum superblock_notification_type subtype, + u32 info) +{ +#ifdef CONFIG_SB_NOTIFICATIONS + if (unlikely(s->s_watchers)) { + struct superblock_notification n = { + .watch.type = WATCH_TYPE_SB_NOTIFY, + .watch.subtype = subtype, + .watch.info = sizeof(n) | info, + .sb_id = s->s_watch_id, + }; + + post_sb_notification(s, &n); + } + +#endif +} + +/** + * sb_error: Post superblock error notification. + * @s: The superblock the notification is about. + */ +static inline int sb_error(struct super_block *s, int error) +{ +#ifdef CONFIG_SB_NOTIFICATIONS + if (unlikely(s->s_watchers)) { + struct superblock_error_notification n = { + .s.watch.type = WATCH_TYPE_SB_NOTIFY, + .s.watch.subtype = notify_superblock_error, + .s.watch.info = sizeof(n), + .s.sb_id = s->s_watch_id, + .error_number = error, + .error_cookie = 0, + }; + + post_sb_notification(s, &n.s); + } +#endif + return error; +} + +/** + * sb_EDQUOT: Post superblock quota overrun notification. + * @s: The superblock the notification is about. + */ +static inline int sb_EQDUOT(struct super_block *s) +{ +#ifdef CONFIG_SB_NOTIFICATIONS + if (unlikely(s->s_watchers)) { + struct superblock_notification n = { + .watch.type = WATCH_TYPE_SB_NOTIFY, + .watch.subtype = notify_superblock_edquot, + .watch.info = sizeof(n), + .sb_id = s->s_watch_id, + }; + + post_sb_notification(s, &n); + } +#endif + return -EDQUOT; +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7db37c58289a..4d852f218949 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -915,6 +915,8 @@ asmlinkage long sys_fsinfo(int dfd, const char __user *path, void __user *buffer, size_t buf_size); asmlinkage long sys_mount_notify(int dfd, const char __user *path, unsigned int at_flags, int watch_fd, int watch_id); +asmlinkage long sys_sb_notify(int dfd, const char __user *path, + unsigned int at_flags, int watch_fd, int watch_id); /* * Architecture-specific system calls diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 9d8e165e0065..40a3f809c73c 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -127,4 +127,30 @@ struct mount_notification { __u32 changed_mount; /* The mount that got changed */ }; +/* + * Type of superblock notification. + */ +enum superblock_notification_type { + notify_superblock_readonly = 0, /* Filesystem toggled between R/O and R/W */ + notify_superblock_error = 1, /* Error in filesystem or blockdev */ + notify_superblock_edquot = 2, /* EDQUOT notification */ + notify_superblock_network = 3, /* Network status change */ +}; + +/* + * Superblock notification record. + * - watch.type = WATCH_TYPE_MOUNT_NOTIFY + * - watch.subtype = enum superblock_notification_subtype + */ +struct superblock_notification { + struct watch_notification watch; /* WATCH_TYPE_SB_NOTIFY */ + __u64 sb_id; /* 64-bit superblock ID [fsinfo_ids::f_sb_id] */ +}; + +struct superblock_error_notification { + struct superblock_notification s; /* subtype = notify_superblock_error */ + __u32 error_number; + __u32 error_cookie; +}; + #endif /* _UAPI_LINUX_WATCH_QUEUE_H */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index f608777be045..3b5aacb8a5a0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -100,6 +100,9 @@ COND_SYSCALL(quotactl); /* fs/read_write.c */ +/* fs/sb_notify.c */ +COND_SYSCALL(sb_notify); + /* fs/sendfile.c */ /* fs/select.c */