Re: [PATCH v7 05/18] fsnotify: introduce pre-content permission events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 13, 2024 at 5:57 PM Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> On Tue, 12 Nov 2024 at 16:06, Amir Goldstein <amir73il@xxxxxxxxx> wrote:
> >
> > Maybe I could use just this one bit, but together with the existing
> > FMODE_NONOTIFY bit, I get 4 modes, which correspond to the
> > highest watching priority:
>
> So you'd use two bits, but one of those would re-use the existing
> FMODE_NONOTIFY? That sounds perfectly fine to me.
>

Yes, exactly, like this:

/*
 * The two FMODE_NONOTIFY_ bits used together have a special meaning of
 * not reporting any events at all including non-permission events.
 * These are the possible values of FMODE_NOTIFY(f->f_mode) and their meaning:
 *
 * FMODE_NONOTIFY_HSM - suppress only pre-content events.
 * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events.
 * FMODE_NONOTIFY - suppress all (incl. non-permission) events.
 */
#define FMODE_NONOTIFY_MASK \
        (FMODE_NONOTIFY_HSM | FMODE_NONOTIFY_PERM)
#define FMODE_NONOTIFY FMODE_NONOTIFY_MASK
#define FMODE_NOTIFY(mode) \
        ((mode) & FMODE_NONOTIFY_MASK)

Please see attached patch (build and sanity tested) to make sure that
we are on the same page.

Going forward in the patch series, the choice of the NONOTIFY lingo
creates some double negatives, like:

        /*
         * read()/write() and other types of access generate pre-content events.
         */
        if (!likely(file->f_mode & FMODE_NONOTIFY_HSM)) {
                int ret = fsnotify_path(&file->f_path);

But it was easier for me to work with NONOTIFY flags.

Thanks,
Amir.
From 7a2cd74654a53684d545b96c57c9091e420b3add Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@xxxxxxxxx>
Date: Tue, 12 Nov 2024 13:46:08 +0100
Subject: [PATCH] fsnotify: opt-in for permission events at file open time

Legacy inotify/fanotify listeners can add watches for events on inode,
parent or mount and expect to get events (e.g. FS_MODIFY) on files that
were already open at the time of setting up the watches.

fanotify permission events are typically used by Anti-malware sofware,
that is watching the entire mount and it is not common to have more that
one Anti-malware engine installed on a system.

To reduce the overhead of the fsnotify_file_perm() hooks on every file
access, relax the semantics of the legacy FAN_ACCESS_PERM event to generate
events only if there were *any* permission event listeners on the
filesystem at the time that the file was opened.

The new semantic is implemented by extending the FMODE_NONOTIFY bit into
two FMODE_NONOTIFY_* bits, that are used to store a mode for which of the
events types to report.

This is going to apply to the new fanotify pre-content events in order
to reduce the cost of the new pre-content event vfs hooks.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wj8L=mtcRTi=NECHMGfZQgXOp_uix1YVh04fEmrKaMnXA@xxxxxxxxxxxxxx/
Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
---
 fs/open.c                |  8 ++++-
 include/linux/fs.h       | 26 ++++++++++++---
 include/linux/fsnotify.h | 71 +++++++++++++++++++++++++++++++---------
 3 files changed, 83 insertions(+), 22 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 226aca8c7909..194c2c8d8cd4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -901,7 +901,7 @@ static int do_dentry_open(struct file *f,
 	f->f_sb_err = file_sample_sb_err(f);
 
 	if (unlikely(f->f_flags & O_PATH)) {
-		f->f_mode = FMODE_PATH | FMODE_OPENED;
+		f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY;
 		f->f_op = &empty_fops;
 		return 0;
 	}
@@ -929,6 +929,12 @@ static int do_dentry_open(struct file *f,
 	if (error)
 		goto cleanup_all;
 
+	/*
+	 * Set FMODE_NONOTIFY_* bits according to existing permission watches.
+	 * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
+	 * change anything.
+	 */
+	f->f_mode |= fsnotify_file_mode(f);
 	error = fsnotify_open_perm(f);
 	if (error)
 		goto cleanup_all;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70359dd669ff..dd583ce7dba8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -173,13 +173,14 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 
 #define	FMODE_NOREUSE		((__force fmode_t)(1 << 23))
 
-/* FMODE_* bit 24 */
-
 /* File is embedded in backing_file object */
-#define FMODE_BACKING		((__force fmode_t)(1 << 25))
+#define FMODE_BACKING		((__force fmode_t)(1 << 24))
+
+/* File shouldn't generate fanotify pre-content events */
+#define FMODE_NONOTIFY_HSM	((__force fmode_t)(1 << 25))
 
-/* File was opened by fanotify and shouldn't generate fanotify events */
-#define FMODE_NONOTIFY		((__force fmode_t)(1 << 26))
+/* File shouldn't generate fanotify permission events */
+#define FMODE_NONOTIFY_PERM	((__force fmode_t)(1 << 26))
 
 /* File is capable of returning -EAGAIN if I/O will block */
 #define FMODE_NOWAIT		((__force fmode_t)(1 << 27))
@@ -190,6 +191,21 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File does not contribute to nr_files count */
 #define FMODE_NOACCOUNT		((__force fmode_t)(1 << 29))
 
+/*
+ * The two FMODE_NONOTIFY_ bits used together have a special meaning of
+ * not reporting any events at all including non-permission events.
+ * These are the possible values of FMODE_NOTIFY(f->f_mode) and their meaning:
+ *
+ * FMODE_NONOTIFY_HSM - suppress only pre-content events.
+ * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events.
+ * FMODE_NONOTIFY - suppress all (incl. non-permission) events.
+ */
+#define FMODE_NONOTIFY_MASK \
+	(FMODE_NONOTIFY_HSM | FMODE_NONOTIFY_PERM)
+#define FMODE_NONOTIFY FMODE_NONOTIFY_MASK
+#define FMODE_NOTIFY(mode) \
+	((mode) & FMODE_NONOTIFY_MASK)
+
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 278620e063ab..9f13c7c19b74 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -108,38 +108,66 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
 	fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
 }
 
-static inline int fsnotify_file(struct file *file, __u32 mask)
+static inline int fsnotify_path(const struct path *path, __u32 mask)
 {
-	const struct path *path;
+	return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
+}
 
+static inline int fsnotify_file(struct file *file, __u32 mask)
+{
 	/*
 	 * FMODE_NONOTIFY are fds generated by fanotify itself which should not
 	 * generate new events. We also don't want to generate events for
 	 * FMODE_PATH fds (involves open & close events) as they are just
 	 * handle creation / destruction events and not "real" file events.
 	 */
-	if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
-		return 0;
-
-	path = &file->f_path;
-	/* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */
-	if (mask & ALL_FSNOTIFY_PERM_EVENTS &&
-	    !fsnotify_sb_has_priority_watchers(path->dentry->d_sb,
-					       FSNOTIFY_PRIO_CONTENT))
+	if (FMODE_NOTIFY(file->f_mode) == FMODE_NONOTIFY)
 		return 0;
 
-	return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
+	return fsnotify_path(&file->f_path, mask);
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+/*
+ * At open time we check fsnotify_sb_has_priority_watchers() and set the
+ * FMODE_NONOTIFY_ mode bits accordignly.
+ * Later, fsnotify permission hooks do not check if there are permission event
+ * watches, but that there were permission event watches at open time.
+ */
+static inline fmode_t fsnotify_file_mode(struct file *file)
+{
+	struct super_block *sb = file->f_path.dentry->d_sb;
+
+	/* Is it a file opened by fanotify? */
+	if (FMODE_NOTIFY(file->f_mode) == FMODE_NONOTIFY)
+		return FMODE_NONOTIFY;
+
+	/*
+	 * Permission events is a super set of pre-content events, so if there
+	 * are no permission event watchers, there are also no pre-content event
+	 * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit.
+	 */
+	if (likely(!fsnotify_sb_has_priority_watchers(sb,
+						FSNOTIFY_PRIO_CONTENT)))
+		return FMODE_NONOTIFY_PERM;
+
+	/*
+	 * FMODE_NONOTIFY_HSM bit means there are permission event watchers, but
+	 * no pre-content event watchers.
+	 */
+	if (likely(!fsnotify_sb_has_priority_watchers(sb,
+						FSNOTIFY_PRIO_PRE_CONTENT)))
+		return FMODE_NONOTIFY_HSM;
+
+	return 0;
+}
+
 /*
  * fsnotify_file_area_perm - permission hook before access to file range
  */
 static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
 					  const loff_t *ppos, size_t count)
 {
-	__u32 fsnotify_mask = FS_ACCESS_PERM;
-
 	/*
 	 * filesystem may be modified in the context of permission events
 	 * (e.g. by HSM filling a file on access), so sb freeze protection
@@ -150,7 +178,10 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
 	if (!(perm_mask & MAY_READ))
 		return 0;
 
-	return fsnotify_file(file, fsnotify_mask);
+	if (likely(file->f_mode & FMODE_NONOTIFY_PERM))
+		return 0;
+
+	return fsnotify_path(&file->f_path, FS_ACCESS_PERM);
 }
 
 /*
@@ -168,16 +199,24 @@ static inline int fsnotify_open_perm(struct file *file)
 {
 	int ret;
 
+	if (likely(file->f_mode & FMODE_NONOTIFY_PERM))
+		return 0;
+
 	if (file->f_flags & __FMODE_EXEC) {
-		ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
+		ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM);
 		if (ret)
 			return ret;
 	}
 
-	return fsnotify_file(file, FS_OPEN_PERM);
+	return fsnotify_path(&file->f_path, FS_OPEN_PERM);
 }
 
 #else
+static inline fmode_t fsnotify_file_mode(struct file *file)
+{
+	return 0;
+}
+
 static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
 					  const loff_t *ppos, size_t count)
 {
-- 
2.34.1


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux