Re: [RFC PATCH] fanotify: notify on mount attach and detach

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 28, 2024 at 03:39:59PM +0100, Miklos Szeredi wrote:
> Add notifications for attaching and detaching mounts.  The following new
> event masks are added:
> 
>   FAN_MNT_ATTACH  - Mount was attached
>   FAN_MNT_DETACH  - Mount was detached
> 
> These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
> these fields identifying the affected mounts:
> 
>   __u64 mnt_id    - the attached or detached mount
>   __u64 parent_id - where the mount was attached to or detached from
> 
> The mountpoint object (file or directory) is also contained in the event
> either as an FD or a FID.
> 
> Adding marks with FAN_MARK_MOUNT and FAN_MARK_FILESYSTEM both work.
> 
> FAN_MARK_INODE doesn't, not sure why.  I think it might make sense to make
> it work, i.e. allow watching a single mountpoint.
> 
> Prior to this patch mount namespace changes could be monitored by polling
> /proc/self/mountinfo, which did not convey any information about what
> changed.
> 
> To monitor an entire mount namespace with this new interface, watches need
> to be added to all existing mounts.  This can be done by performing
> listmount()/statmount() recursively at startup and when a new mount is
> added.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> ---
>  fs/namespace.c                     | 23 +++++++++++++++++
>  fs/notify/fanotify/fanotify.c      | 10 +++++++-
>  fs/notify/fanotify/fanotify.h      |  7 +++++
>  fs/notify/fanotify/fanotify_user.c | 41 ++++++++++++++++++++++++++++++
>  fs/notify/fsnotify.c               |  2 +-
>  include/linux/fanotify.h           |  7 +++--
>  include/linux/fsnotify.h           | 18 +++++++++++++
>  include/linux/fsnotify_backend.h   | 30 +++++++++++++++++++++-
>  include/uapi/linux/fanotify.h      |  9 +++++++
>  9 files changed, 142 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 6b0a17487d0f..7724c78df945 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -988,12 +988,24 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
>  	}
>  }
>  
> +static inline void __fsnotify_mnt_detach(struct mount *mnt)
> +{
> +	struct path mountpoint = {
> +		.mnt = &mnt->mnt_parent->mnt,
> +		.dentry = mnt->mnt_mountpoint,
> +	};
> +	fsnotify_mnt_detach(&mountpoint, &mnt->mnt);
> +}
> +
>  /*
>   * vfsmount lock must be held for write
>   */
>  static struct mountpoint *unhash_mnt(struct mount *mnt)
>  {
>  	struct mountpoint *mp;
> +
> +	__fsnotify_mnt_detach(mnt);
> +
>  	mnt->mnt_parent = mnt;
>  	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
>  	list_del_init(&mnt->mnt_child);
> @@ -1027,6 +1039,15 @@ void mnt_set_mountpoint(struct mount *mnt,
>  	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
>  }
>  
> +static inline void __fsnotify_mnt_attach(struct mount *mnt)
> +{
> +	struct path mountpoint = {
> +		.mnt = &mnt->mnt_parent->mnt,
> +		.dentry = mnt->mnt_mountpoint,
> +	};
> +	fsnotify_mnt_attach(&mountpoint, &mnt->mnt);
> +}
> +
>  /**
>   * mnt_set_mountpoint_beneath - mount a mount beneath another one
>   *
> @@ -1059,6 +1080,8 @@ static void __attach_mnt(struct mount *mnt, struct mount *parent)
>  	hlist_add_head_rcu(&mnt->mnt_hash,
>  			   m_hash(&parent->mnt, mnt->mnt_mountpoint));
>  	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
> +
> +	__fsnotify_mnt_attach(mnt);
>  }

It was already mentioned in the thread but unhash_mnt() and
__attach_mnt() are called under lock_mnt_hash() which is problematic.

I do like that we seem to be getting away without a lot of aweful custom
machinery in the mount specific code. So that seems promising to me.

You shouldn't need to call that under lock_mount_hash(). Calling that
under namespace_sem should be perfectly fine as that needs to be held to
add or remove mounts into any namespace anyway.

One possibility would be to use the real_mount(mnt)->mnt_fsnotify_marks
struct and add a list_head or similar in there and link all new mounts
in attach_recursive_mnt() to a list. So basically:

diff --git a/fs/namespace.c b/fs/namespace.c
index dc789f2751d7..b4a150f76bf6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2475,6 +2475,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 {
        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        HLIST_HEAD(tree_list);
+       HLIST_HEAD(fsnotify_list);
        struct mnt_namespace *ns = top_mnt->mnt_ns;
        struct mountpoint *smp;
        struct mount *child, *dest_mnt, *p;
@@ -2508,6 +2509,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                        goto out;
                err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
        }
+
+       // Add all mounts from tree_list to fsnotify_list.
+
        lock_mount_hash();
        if (err)
                goto out_cleanup_ids;
@@ -2539,6 +2543,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                commit_tree(source_mnt);
        }

+       // Now add "main" mount to the beginning of the list.
+
        hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
                struct mount *q;
                hlist_del_init(&child->mnt_hash);
@@ -2555,6 +2561,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        put_mountpoint(smp);
        unlock_mount_hash();

+       // We know that namespace_sem is held, walk through fsnotify_list and
+       // send mount notifications.
+
        return 0;

>  /**
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 24c7c5df4998..9a93c9b58bd4 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -16,6 +16,7 @@
>  #include <linux/stringhash.h>
>  
>  #include "fanotify.h"
> +#include "../../mount.h"
>  
>  static bool fanotify_path_equal(const struct path *p1, const struct path *p2)
>  {
> @@ -715,6 +716,9 @@ static struct fanotify_event *fanotify_alloc_event(
>  					      fid_mode);
>  	struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
>  	const struct path *path = fsnotify_data_path(data, data_type);
> +	struct vfsmount *mnt = fsnotify_data_mnt(data, data_type);
> +	u64 mnt_id = mnt ? real_mount(mnt)->mnt_id_unique : 0;
> +	u64 parent_id = path ? real_mount(path->mnt)->mnt_id_unique : 0;
>  	struct mem_cgroup *old_memcg;
>  	struct dentry *moved = NULL;
>  	struct inode *child = NULL;
> @@ -824,8 +828,12 @@ static struct fanotify_event *fanotify_alloc_event(
>  
>  	/* Mix event info, FAN_ONDIR flag and pid into event merge key */
>  	hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS);
> +	hash ^= hash_64(mnt_id, FANOTIFY_EVENT_HASH_BITS);
> +	hash ^= hash_64(parent_id, FANOTIFY_EVENT_HASH_BITS);
>  	fanotify_init_event(event, hash, mask);
>  	event->pid = pid;
> +	event->mnt_id = mnt_id;
> +	event->parent_id = parent_id;
>  
>  out:
>  	set_active_memcg(old_memcg);
> @@ -910,7 +918,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
>  	BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
>  	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
>  
> -	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
> +	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23);
>  
>  	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
>  					 mask, data, data_type, dir);
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index e5ab33cae6a7..71cc9cb2335a 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -261,6 +261,8 @@ struct fanotify_event {
>  		unsigned int hash : FANOTIFY_EVENT_HASH_BITS;
>  	};
>  	struct pid *pid;
> +	u64 mnt_id;
> +	u64 parent_id;
>  };
>  
>  static inline void fanotify_init_event(struct fanotify_event *event,
> @@ -456,6 +458,11 @@ static inline bool fanotify_is_error_event(u32 mask)
>  	return mask & FAN_FS_ERROR;
>  }
>  
> +static inline bool fanotify_is_mnt_event(u32 mask)
> +{
> +	return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
> +}
> +
>  static inline const struct path *fanotify_event_path(struct fanotify_event *event)
>  {
>  	if (event->type == FANOTIFY_EVENT_TYPE_PATH)
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 2d85c71717d6..adf19e1a10bd 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -122,6 +122,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
>  	sizeof(struct fanotify_event_info_pidfd)
>  #define FANOTIFY_ERROR_INFO_LEN \
>  	(sizeof(struct fanotify_event_info_error))
> +#define FANOTIFY_MNT_INFO_LEN \
> +	(sizeof(struct fanotify_event_info_mnt))
>  
>  static int fanotify_fid_info_len(int fh_len, int name_len)
>  {
> @@ -159,6 +161,9 @@ static size_t fanotify_event_len(unsigned int info_mode,
>  	int fh_len;
>  	int dot_len = 0;
>  
> +	if (fanotify_is_mnt_event(event->mask))
> +		event_len += FANOTIFY_MNT_INFO_LEN;
> +
>  	if (!info_mode)
>  		return event_len;
>  
> @@ -380,6 +385,26 @@ static int process_access_response(struct fsnotify_group *group,
>  	return -ENOENT;
>  }
>  
> +static size_t copy_mnt_info_to_user(struct fanotify_event *event,
> +				    char __user *buf, int count)
> +{
> +	struct fanotify_event_info_mnt info = { };
> +
> +	info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
> +	info.hdr.len = FANOTIFY_MNT_INFO_LEN;
> +
> +	if (WARN_ON(count < info.hdr.len))
> +		return -EFAULT;
> +
> +	info.mnt_id = event->mnt_id;
> +	info.parent_id = event->parent_id;
> +
> +	if (copy_to_user(buf, &info, sizeof(info)))
> +		return -EFAULT;
> +
> +	return info.hdr.len;
> +}
> +
>  static size_t copy_error_info_to_user(struct fanotify_event *event,
>  				      char __user *buf, int count)
>  {
> @@ -656,6 +681,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
>  	unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
>  	struct file *f = NULL, *pidfd_file = NULL;
>  	int ret, pidfd = -ESRCH, fd = -EBADF;
> +	int total_bytes = 0;
>  
>  	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
>  
> @@ -755,14 +781,29 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
>  
>  	buf += FAN_EVENT_METADATA_LEN;
>  	count -= FAN_EVENT_METADATA_LEN;
> +	total_bytes += FAN_EVENT_METADATA_LEN;
>  
>  	if (info_mode) {
>  		ret = copy_info_records_to_user(event, info, info_mode, pidfd,
>  						buf, count);
>  		if (ret < 0)
>  			goto out_close_fd;
> +
> +		buf += ret;
> +		count -= ret;
> +		total_bytes += ret;
> +	}
> +
> +	if (fanotify_is_mnt_event(event->mask)) {
> +		ret = copy_mnt_info_to_user(event, buf, count);
> +		if (ret < 0)
> +			goto out_close_fd;
> +
> +		total_bytes += ret;
>  	}
>  
> +	WARN_ON(metadata.event_len != total_bytes);
> +
>  	if (f)
>  		fd_install(fd, f);
>  
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index f976949d2634..1d5831b127e6 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -627,7 +627,7 @@ static __init int fsnotify_init(void)
>  {
>  	int ret;
>  
> -	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
> +	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
>  
>  	ret = init_srcu_struct(&fsnotify_mark_srcu);
>  	if (ret)
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index 89ff45bd6f01..84e6f81bdb1b 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -90,7 +90,7 @@
>  				 FAN_RENAME)
>  
>  /* Events that can be reported with event->fd */
> -#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
> +#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS | FANOTIFY_MOUNT_EVENTS)
>  
>  /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */
>  #define FANOTIFY_INODE_EVENTS	(FANOTIFY_DIRENT_EVENTS | \
> @@ -99,10 +99,13 @@
>  /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
>  #define FANOTIFY_ERROR_EVENTS	(FAN_FS_ERROR)
>  
> +#define FANOTIFY_MOUNT_EVENTS	(FAN_MNT_ATTACH | FAN_MNT_DETACH)
> +
>  /* Events that user can request to be notified on */
>  #define FANOTIFY_EVENTS		(FANOTIFY_PATH_EVENTS | \
>  				 FANOTIFY_INODE_EVENTS | \
> -				 FANOTIFY_ERROR_EVENTS)
> +				 FANOTIFY_ERROR_EVENTS | \
> +				 FANOTIFY_MOUNT_EVENTS )
>  
>  /* Events that require a permission response from user */
>  #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
> diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
> index 278620e063ab..4129347e4f16 100644
> --- a/include/linux/fsnotify.h
> +++ b/include/linux/fsnotify.h
> @@ -463,4 +463,22 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
>  			NULL, NULL, NULL, 0);
>  }
>  
> +static inline void fsnotify_mnt_attach(struct path *mountpoint, struct vfsmount *mnt)
> +{
> +	struct fsnotify_mnt data = {
> +		.path = mountpoint,
> +		.mnt = mnt,
> +	};
> +	fsnotify(FS_MNT_ATTACH, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
> +}
> +
> +static inline void fsnotify_mnt_detach(struct path *mountpoint, struct vfsmount *mnt)
> +{
> +	struct fsnotify_mnt data = {
> +		.path = mountpoint,
> +		.mnt = mnt,
> +	};
> +	fsnotify(FS_MNT_DETACH, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
> +}
> +
>  #endif	/* _LINUX_FS_NOTIFY_H */
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 3ecf7768e577..1e9c15ad64b6 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -56,6 +56,9 @@
>  #define FS_ACCESS_PERM		0x00020000	/* access event in a permissions hook */
>  #define FS_OPEN_EXEC_PERM	0x00040000	/* open/exec event in a permission hook */
>  
> +#define FS_MNT_ATTACH		0x00100000	/* Mount was attached */
> +#define FS_MNT_DETACH		0x00200000	/* Mount was detached */
> +
>  /*
>   * Set on inode mark that cares about things that happen to its children.
>   * Always set for dnotify and inotify.
> @@ -102,7 +105,7 @@
>  			     FS_EVENTS_POSS_ON_CHILD | \
>  			     FS_DELETE_SELF | FS_MOVE_SELF | \
>  			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
> -			     FS_ERROR)
> +			     FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH)
>  
>  /* Extra flags that may be reported with event or control handling of events */
>  #define ALL_FSNOTIFY_FLAGS  (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
> @@ -288,6 +291,7 @@ enum fsnotify_data_type {
>  	FSNOTIFY_EVENT_PATH,
>  	FSNOTIFY_EVENT_INODE,
>  	FSNOTIFY_EVENT_DENTRY,
> +	FSNOTIFY_EVENT_MNT,
>  	FSNOTIFY_EVENT_ERROR,
>  };
>  
> @@ -297,6 +301,11 @@ struct fs_error_report {
>  	struct super_block *sb;
>  };
>  
> +struct fsnotify_mnt {
> +	const struct path *path;
> +	struct vfsmount *mnt;
> +};
> +
>  static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
>  {
>  	switch (data_type) {
> @@ -306,6 +315,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
>  		return d_inode(data);
>  	case FSNOTIFY_EVENT_PATH:
>  		return d_inode(((const struct path *)data)->dentry);
> +	case FSNOTIFY_EVENT_MNT:
> +		return d_inode(((struct fsnotify_mnt *)data)->path->dentry);
>  	case FSNOTIFY_EVENT_ERROR:
>  		return ((struct fs_error_report *)data)->inode;
>  	default:
> @@ -321,6 +332,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ
>  		return (struct dentry *)data;
>  	case FSNOTIFY_EVENT_PATH:
>  		return ((const struct path *)data)->dentry;
> +	case FSNOTIFY_EVENT_MNT:
> +		return ((const struct fsnotify_mnt *)data)->path->dentry;
>  	default:
>  		return NULL;
>  	}
> @@ -332,6 +345,8 @@ static inline const struct path *fsnotify_data_path(const void *data,
>  	switch (data_type) {
>  	case FSNOTIFY_EVENT_PATH:
>  		return data;
> +	case FSNOTIFY_EVENT_MNT:
> +		return ((const struct fsnotify_mnt *)data)->path;
>  	default:
>  		return NULL;
>  	}
> @@ -347,6 +362,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
>  		return ((struct dentry *)data)->d_sb;
>  	case FSNOTIFY_EVENT_PATH:
>  		return ((const struct path *)data)->dentry->d_sb;
> +	case FSNOTIFY_EVENT_MNT:
> +		return ((const struct fsnotify_mnt *)data)->mnt->mnt_sb;
>  	case FSNOTIFY_EVENT_ERROR:
>  		return ((struct fs_error_report *) data)->sb;
>  	default:
> @@ -354,6 +371,17 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
>  	}
>  }
>  
> +static inline struct vfsmount *fsnotify_data_mnt(const void *data,
> +						 int data_type)
> +{
> +	switch (data_type) {
> +	case FSNOTIFY_EVENT_MNT:
> +		return ((const struct fsnotify_mnt *)data)->mnt;
> +	default:
> +		return NULL;
> +	}
> +}
> +
>  static inline struct fs_error_report *fsnotify_data_error_report(
>  							const void *data,
>  							int data_type)
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index 34f221d3a1b9..8b2d47947bc2 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -25,6 +25,8 @@
>  #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
>  #define FAN_ACCESS_PERM		0x00020000	/* File accessed in perm check */
>  #define FAN_OPEN_EXEC_PERM	0x00040000	/* File open/exec in perm check */
> +#define FAN_MNT_ATTACH		0x00100000	/* Mount was attached */
> +#define FAN_MNT_DETACH		0x00200000	/* Mount was detached */
>  
>  #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
>  
> @@ -143,6 +145,7 @@ struct fanotify_event_metadata {
>  #define FAN_EVENT_INFO_TYPE_DFID	3
>  #define FAN_EVENT_INFO_TYPE_PIDFD	4
>  #define FAN_EVENT_INFO_TYPE_ERROR	5
> +#define FAN_EVENT_INFO_TYPE_MNT		6
>  
>  /* Special info types for FAN_RENAME */
>  #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME	10
> @@ -189,6 +192,12 @@ struct fanotify_event_info_error {
>  	__u32 error_count;
>  };
>  
> +struct fanotify_event_info_mnt {
> +	struct fanotify_event_info_header hdr;
> +	__u64 mnt_id;
> +	__u64 parent_id;
> +};

Hm, I wonder whether we should indicate to the caller whether the mount
they received a notification for was propagated or not or whether they
should just infer this from statmount(). Probably enough to let them
infer this from statmount() via mnt_ns_id.




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux