Add notifications for attaching and detaching mounts. The following new event masks are added: FAN_MNT_ATTACH - Mount was attached FAN_MNT_DETACH - Mount was detached If a mount is moved, then the event is reported with (FAN_MNT_ATTACH | FAN_MNT_DETACH). These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing these fields identifying the affected mounts: __u64 mnt_id - the ID of the mount (see statmount(2)) FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events and no other type of event can be received with this report type. Marks are added with FAN_MARK_MNTNS, which records the mount namespace belonging to the supplied path. Prior to this patch mount namespace changes could be monitored by polling /proc/self/mountinfo, which did not convey any information about what changed. Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> --- v3: - use a global list protected for temporarily storing (Christian) - move fsnotify_* calls to namespace_unlock() (Christian) - downgrade namespace_sem to read for fsnotify_* calls (Christian) - add notification for reparenting in propagate_umount (Christian) - require nsfs file (/proc/PID/ns/mnt) in fanotify_mark(2) (Christian) - cleaner check for fsnotify being initialized (Amir) - fix stub __fsnotify_mntns_delete (kernel test robot) - don't add FANOTIFY_MOUNT_EVENTS to FANOTIFY_FD_EVENTS (Amir) v2: - notify for whole namespace as this seems to be what people prefer - move fsnotify() calls outside of mount_lock - only report mnt_id, not parent_id fs/mount.h | 21 ++++++++++ fs/namespace.c | 62 ++++++++++++++++++++++++++--- fs/notify/fanotify/fanotify.c | 37 +++++++++++++++-- fs/notify/fanotify/fanotify.h | 18 +++++++++ fs/notify/fanotify/fanotify_user.c | 64 ++++++++++++++++++++++++++++-- fs/notify/fdinfo.c | 2 + fs/notify/fsnotify.c | 47 ++++++++++++++++++---- fs/notify/fsnotify.h | 11 +++++ fs/notify/mark.c | 14 +++++-- fs/pnode.c | 4 +- include/linux/fanotify.h | 12 ++++-- include/linux/fsnotify.h | 20 ++++++++++ include/linux/fsnotify_backend.h | 40 ++++++++++++++++++- include/linux/mnt_namespace.h | 1 + include/uapi/linux/fanotify.h | 10 +++++ security/selinux/hooks.c | 4 ++ 16 files changed, 340 insertions(+), 27 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 185fc56afc13..0ad68c90e7e2 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -5,6 +5,8 @@ #include <linux/ns_common.h> #include <linux/fs_pin.h> +extern struct list_head notify_list; + struct mnt_namespace { struct ns_common ns; struct mount * root; @@ -14,6 +16,10 @@ struct mnt_namespace { u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; u64 event; +#ifdef CONFIG_FSNOTIFY + __u32 n_fsnotify_mask; + struct fsnotify_mark_connector __rcu *n_fsnotify_marks; +#endif unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ @@ -77,6 +83,9 @@ struct mount { int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; struct hlist_head mnt_stuck_children; + + struct list_head to_notify; /* need to queue notification */ + struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */ } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -167,3 +176,15 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); } + +static inline void add_notify(struct mount *m) +{ + /* Optimize the case where there are no watches */ + if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) || + (m->prev_ns && m->prev_ns->n_fsnotify_marks)) + list_add_tail(&m->to_notify, ¬ify_list); + else + m->prev_ns = m->mnt_ns; +} + +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry); diff --git a/fs/namespace.c b/fs/namespace.c index 6eec7794f707..186f660abd60 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -79,6 +79,7 @@ static struct kmem_cache *mnt_cache __ro_after_init; static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ +LIST_HEAD(notify_list); /* protected by namespace_sem */ static DEFINE_RWLOCK(mnt_ns_tree_lock); static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ @@ -145,6 +146,7 @@ static void mnt_ns_release(struct mnt_namespace *ns) /* keep alive for {list,stat}mount() */ if (refcount_dec_and_test(&ns->passive)) { + fsnotify_mntns_delete(ns); put_user_ns(ns->user_ns); kfree(ns); } @@ -1136,6 +1138,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) rb_link_node(&mnt->mnt_node, parent, link); rb_insert_color(&mnt->mnt_node, &ns->mounts); mnt->mnt.mnt_flags |= MNT_ONRB; + + add_notify(mnt); } /* @@ -1683,17 +1687,53 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); +static void notify_mount(struct mount *p) +{ + if (!p->prev_ns && p->mnt_ns) { + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } else if (p->prev_ns && !p->mnt_ns) { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + } else if (p->prev_ns == p->mnt_ns) { + fsnotify_mnt_move(p->mnt_ns, &p->mnt); + } else { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } + p->prev_ns = p->mnt_ns; +} + static void namespace_unlock(void) { struct hlist_head head; struct hlist_node *p; - struct mount *m; + struct mount *m, *tmp; LIST_HEAD(list); + bool notify = !list_empty(¬ify_list); hlist_move_list(&unmounted, &head); list_splice_init(&ex_mountpoints, &list); - up_write(&namespace_sem); + /* + * No point blocking out concurrent readers while notifications are + * sent. This will also allow statmount()/listmount() to run + * concurrently. + */ + if (unlikely(notify)) + downgrade_write(&namespace_sem); + + /* + * Notify about mounts that were added/reparented/remain connected after + * unmount. + */ + list_for_each_entry_safe(m, tmp, ¬ify_list, to_notify) { + notify_mount(m); + list_del_init(&m->to_notify); + } + + if (unlikely(notify)) + up_read(&namespace_sem); + else + up_write(&namespace_sem); shrink_dentry_list(&list); @@ -1806,6 +1846,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) change_mnt_propagation(p, MS_PRIVATE); if (disconnect) hlist_add_head(&p->mnt_umount, &unmounted); + add_notify(p); } } @@ -2103,16 +2144,24 @@ struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool pre } } +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry) +{ + if (!is_mnt_ns_file(dentry)) + return NULL; + + return to_mnt_ns(get_proc_ns(dentry->d_inode)); +} + static bool mnt_ns_loop(struct dentry *dentry) { /* Could bind mounting the mount namespace inode cause a * mount namespace loop? */ - struct mnt_namespace *mnt_ns; - if (!is_mnt_ns_file(dentry)) + struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry); + + if (!mnt_ns) return false; - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } @@ -2505,6 +2554,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, dest_mp = smp; unhash_mnt(source_mnt); attach_mnt(source_mnt, top_mnt, dest_mp, beneath); + add_notify(source_mnt); touch_mnt_namespace(source_mnt->mnt_ns); } else { if (source_mnt->mnt_ns) { @@ -4420,6 +4470,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, list_del_init(&new_mnt->mnt_expire); put_mountpoint(root_mp); unlock_mount_hash(); + add_notify(root_mnt); + add_notify(new_mnt); chroot_fs_refs(&root, &new); error = 0; out4: diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 24c7c5df4998..a9dc004291bf 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FS_ERROR: return fanotify_error_event_equal(FANOTIFY_EE(old), FANOTIFY_EE(new)); + case FANOTIFY_EVENT_TYPE_MNT: + return false; default: WARN_ON_ONCE(1); } @@ -303,7 +305,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - if (!fid_mode) { + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) + { + if (data_type != FSNOTIFY_EVENT_MNT) + return 0; + } else if (!fid_mode) { /* Do we have path to open a file descriptor? */ if (!path) return 0; @@ -548,6 +554,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } +static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) +{ + struct fanotify_mnt_event *pevent; + + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); + if (!pevent) + return NULL; + + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; + pevent->mnt_id = mnt_id; + + return &pevent->fae; +} + static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, gfp_t gfp) { @@ -715,6 +735,7 @@ static struct fanotify_event *fanotify_alloc_event( fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); + u64 mnt_id = fsnotify_data_mnt_id(data, data_type); struct mem_cgroup *old_memcg; struct dentry *moved = NULL; struct inode *child = NULL; @@ -810,8 +831,10 @@ static struct fanotify_event *fanotify_alloc_event( moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); - } else { + } else if (path) { event = fanotify_alloc_path_event(path, &hash, gfp); + } else /* if (mnt_id) */ { + event = fanotify_alloc_mnt_event(mnt_id, gfp); } if (!event) @@ -910,7 +933,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); BUILD_BUG_ON(FAN_RENAME != FS_RENAME); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); @@ -1011,6 +1034,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group, mempool_free(fee, &group->fanotify_data.error_events_pool); } +static void fanotify_free_mnt_event(struct fanotify_event *event) +{ + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); +} + static void fanotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { @@ -1037,6 +1065,9 @@ static void fanotify_free_event(struct fsnotify_group *group, case FANOTIFY_EVENT_TYPE_FS_ERROR: fanotify_free_error_event(group, event); break; + case FANOTIFY_EVENT_TYPE_MNT: + fanotify_free_mnt_event(event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index e5ab33cae6a7..f1a7cbedc9e3 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_fid_event_cachep; extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +extern struct kmem_cache *fanotify_mnt_event_cachep; /* Possible states of the permission event */ enum { @@ -244,6 +245,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ + FANOTIFY_EVENT_TYPE_MNT, __FANOTIFY_EVENT_TYPE_NUM }; @@ -409,12 +411,23 @@ struct fanotify_path_event { struct path path; }; +struct fanotify_mnt_event { + struct fanotify_event fae; + u64 mnt_id; +}; + static inline struct fanotify_path_event * FANOTIFY_PE(struct fanotify_event *event) { return container_of(event, struct fanotify_path_event, fae); } +static inline struct fanotify_mnt_event * +FANOTIFY_ME(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_mnt_event, fae); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -456,6 +469,11 @@ static inline bool fanotify_is_error_event(u32 mask) return mask & FAN_FS_ERROR; } +static inline bool fanotify_is_mnt_event(u32 mask) +{ + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH); +} + static inline const struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2d85c71717d6..1e111730b6bf 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -19,6 +19,7 @@ #include <linux/memcontrol.h> #include <linux/statfs.h> #include <linux/exportfs.h> +#include <linux/mnt_namespace.h> #include <asm/ioctls.h> @@ -114,6 +115,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init; struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; struct kmem_cache *fanotify_path_event_cachep __ro_after_init; struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; +struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ @@ -122,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) +#define FANOTIFY_MNT_INFO_LEN \ + (sizeof(struct fanotify_event_info_mnt)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -183,6 +187,8 @@ static size_t fanotify_event_len(unsigned int info_mode, fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } + if (fanotify_is_mnt_event(event->mask)) + event_len += FANOTIFY_MNT_INFO_LEN; return event_len; } @@ -380,6 +386,25 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_mnt_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_mnt info = { }; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; + info.hdr.len = FANOTIFY_MNT_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.mnt_id = FANOTIFY_ME(event)->mnt_id; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static size_t copy_error_info_to_user(struct fanotify_event *event, char __user *buf, int count) { @@ -642,6 +667,14 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_is_mnt_event(event->mask)) { + ret = copy_mnt_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } return total_bytes; } @@ -1449,6 +1482,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; + /* FIXME: check FAN_REPORT_MNT compatibility with other flags */ + switch (event_f_flags & O_ACCMODE) { case O_RDONLY: case O_RDWR: @@ -1718,6 +1753,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, case FAN_MARK_FILESYSTEM: obj_type = FSNOTIFY_OBJ_TYPE_SB; break; + case FAN_MARK_MNTNS: + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; + break; default: return -EINVAL; } @@ -1765,6 +1803,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; group = fd_file(f)->private_data; + /* Only report mount events on mnt namespace */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (mask & ~FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type != FAN_MARK_MNTNS) + return -EINVAL; + } else { + if (mask & FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type == FAN_MARK_MNTNS) + return -EINVAL; + } + /* * An unprivileged user is not allowed to setup mount nor filesystem * marks. This also includes setting up such marks by a group that @@ -1802,7 +1853,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) return -EINVAL; @@ -1855,8 +1906,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, mnt = path.mnt; if (mark_type == FAN_MARK_MOUNT) obj = mnt; - else + else if (mark_type == FAN_MARK_FILESYSTEM) obj = mnt->mnt_sb; + else /* if (mark_type == FAN_MARK_MNTNS) */ { + obj = mnt_ns_from_dentry(path.dentry); + ret = -EINVAL; + if (!obj) + goto path_put_and_out; + } } /* @@ -1952,7 +2009,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, @@ -1965,6 +2022,7 @@ static int __init fanotify_user_setup(void) fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index dec553034027..505aabd62abb 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -123,6 +123,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", sb->s_dev, mflags, mark->mask, mark->ignore_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + /* FIXME: print info for mntns */ } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index f976949d2634..2b2c3fd907c7 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + fsnotify_clear_marks_by_mntns(mntns); +} + /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. @@ -402,7 +407,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type, file_name, cookie, iter_info); } -static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; @@ -520,14 +525,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); - struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; - __u32 test_mask, marks_mask; + __u32 test_mask, marks_mask = 0; if (path) mnt = real_mount(path->mnt); @@ -560,17 +566,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!inode2 || !inode2->i_fsnotify_marks)) + (!inode2 || !inode2->i_fsnotify_marks) && + (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) return 0; - marks_mask = READ_ONCE(sb->s_fsnotify_mask); + if (sb) + marks_mask |= READ_ONCE(sb->s_fsnotify_mask); if (mnt) marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); - + if (mnt_data) + marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); /* * If this is a modify event we may need to clear some ignore masks. @@ -600,6 +609,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } + if (mnt_data) { + iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = + fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); + } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark @@ -623,11 +636,31 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, } EXPORT_SYMBOL_GPL(fsnotify); +void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{ + struct fsnotify_mnt data = { + .ns = ns, + .mnt_id = real_mount(mnt)->mnt_id_unique, + }; + + if (WARN_ON_ONCE(!ns)) + return; + + /* + * This is an optimization as well as making sure fsnotify_init() has + * been called. + */ + if (!ns->n_fsnotify_marks) + return; + + fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); +} + static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 663759ed6fbc..5950c7a67f41 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb( return conn->obj; } +static inline struct mnt_namespace *fsnotify_conn_mntns( + struct fsnotify_mark_connector *conn) +{ + return conn->obj; +} + static inline struct super_block *fsnotify_object_sb(void *obj, enum fsnotify_obj_type obj_type) { @@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) fsnotify_destroy_marks(fsnotify_sb_marks(sb)); } +static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) +{ + fsnotify_destroy_marks(&mntns->n_fsnotify_marks); +} + /* * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 4981439e6209..798340db69d7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, return &real_mount(obj)->mnt_fsnotify_marks; case FSNOTIFY_OBJ_TYPE_SB: return fsnotify_sb_marks(obj); + case FSNOTIFY_OBJ_TYPE_MNTNS: + return &((struct mnt_namespace *)obj)->n_fsnotify_marks; default: return NULL; } @@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) return &fsnotify_conn_sb(conn)->s_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) + return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; return NULL; } @@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; } rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, * Attach the sb info before attaching a connector to any object on sb. * The sb info will remain attached as long as sb lives. */ - if (!fsnotify_sb_info(sb)) { + if (sb && !fsnotify_sb_info(sb)) { err = fsnotify_attach_info_to_sb(sb); if (err) return err; @@ -770,7 +777,8 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone diff --git a/fs/pnode.c b/fs/pnode.c index a799e0315cc9..0c99de65724e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore) mp = parent->mnt_mp; parent = parent->mnt_parent; } - if (parent != mnt->mnt_parent) + if (parent != mnt->mnt_parent) { mnt_change_mountpoint(parent, mp, mnt); + add_notify(mnt); + } } } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 89ff45bd6f01..d8699c165d94 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT) /* * fanotify_init() flags that require CAP_SYS_ADMIN. @@ -38,7 +38,8 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS) + FAN_UNLIMITED_MARKS | \ + FAN_REPORT_MNT) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -58,7 +59,7 @@ #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ - FAN_MARK_FILESYSTEM) + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS) #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ FAN_MARK_FLUSH) @@ -99,10 +100,13 @@ /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ FANOTIFY_INODE_EVENTS | \ - FANOTIFY_ERROR_EVENTS) + FANOTIFY_ERROR_EVENTS | \ + FANOTIFY_MOUNT_EVENTS ) /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 278620e063ab..ea998551dd0d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -255,6 +255,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) __fsnotify_vfsmount_delete(mnt); } +static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + __fsnotify_mntns_delete(mntns); +} + /* * fsnotify_inoderemove - an inode is going away */ @@ -463,4 +468,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, NULL, NULL, NULL, 0); } +static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); +} + +static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_DETACH, ns, mnt); +} + +static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_MOVE, ns, mnt); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3ecf7768e577..b84ac1ed9721 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -56,6 +56,10 @@ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_MNT_ATTACH 0x00100000 /* Mount was attached */ +#define FS_MNT_DETACH 0x00200000 /* Mount was detached */ +#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH) + /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -102,7 +106,7 @@ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_ERROR) + FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT) @@ -288,6 +292,7 @@ enum fsnotify_data_type { FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_MNT, FSNOTIFY_EVENT_ERROR, }; @@ -297,6 +302,11 @@ struct fs_error_report { struct super_block *sb; }; +struct fsnotify_mnt { + const struct mnt_namespace *ns; + u64 mnt_id; +}; + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -354,6 +364,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data, } } +static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_MNT: + return data; + default: + return NULL; + } +} + +static inline u64 fsnotify_data_mnt_id(const void *data, int data_type) +{ + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + + return mnt_data ? mnt_data->mnt_id : 0; +} + static inline struct fs_error_report *fsnotify_data_error_report( const void *data, int data_type) @@ -379,6 +407,7 @@ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_MNTNS, FSNOTIFY_ITER_TYPE_COUNT }; @@ -388,6 +417,7 @@ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, + FSNOTIFY_OBJ_TYPE_MNTNS, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; @@ -572,8 +602,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns); extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); +extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { @@ -879,6 +911,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{} + static inline void fsnotify_sb_free(struct super_block *sb) {} @@ -893,6 +928,9 @@ static inline u32 fsnotify_get_cookie(void) static inline void fsnotify_unmount_inodes(struct super_block *sb) {} +static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 70b366b64816..05bc01c0f38a 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -10,6 +10,7 @@ struct mnt_namespace; struct fs_struct; struct user_namespace; struct ns_common; +struct vfsmount; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 34f221d3a1b9..332ef8532390 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,6 +25,8 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +#define FAN_MNT_ATTACH 0x00100000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x00200000 /* Mount was detached */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ @@ -61,6 +63,7 @@ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) @@ -91,6 +94,7 @@ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 /* * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY @@ -143,6 +147,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_MNT 6 /* Special info types for FAN_RENAME */ #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 @@ -189,6 +194,11 @@ struct fanotify_event_info_error { __u32 error_count; }; +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + /* * User space may need to record additional information about its decision. * The extra information type records what kind of information is included. diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 366c87a40bd1..aab8972a3124 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3395,6 +3395,10 @@ static int selinux_path_notify(const struct path *path, u64 mask, case FSNOTIFY_OBJ_TYPE_INODE: perm = FILE__WATCH; break; + case FSNOTIFY_OBJ_TYPE_MNTNS: + /* FIXME: Is this correct??? */ + perm = FILE__WATCH; + break; default: return -EINVAL; } -- 2.47.0