Add a high level hook fsnotify_path_create() which is called from syscall context where mount context is available, so that FAN_CREATE event can be added to a mount mark mask. This high level hook is called in addition to fsnotify_create(), fsnotify_mkdir() and fsnotify_link() hooks in vfs helpers where the mount context is not available. In the context where fsnotify_path_create() will be called, a dentry flag flag is set on the new dentry the suppress the FS_CREATE event in the vfs level hooks. This functionality was requested by Christian Brauner to replace recursive inotify watches for detecting when some path was created under an idmapped mount without having to monitor FAN_CREATE events in the entire filesystem. In combination with more changes to allow unprivileged fanotify listener to watch an idmapped mount, this functionality would be usable also by nested container managers. Link: https://lore.kernel.org/linux-fsdevel/20210318143140.jxycfn3fpqntq34z@wittgenstein/ Cc: Christian Brauner <christian.brauner@xxxxxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- Jan, After trying several different approaches, I finally realized that making FAN_CREATE available for mount marks is not that hard and it could be very useful IMO. Adding support for other "inode events" with mount mark, such as FAN_ATTRIB, FAN_DELETE, FAN_MOVE may also be possible, but adding support for FAN_CREATE was really easy due to the fact that all call sites are already surrounded by filename_creat()/done_path_create() calls. Also, there is an inherent a-symetry between FAN_CREATE and other events. All the rest of the events may be set when watching a postive path, for example, to know when a path of a bind mount that was "injected" to a container was moved or deleted, it is possible to start watching that directory before injecting the bind mount. It is not possible to do the same with a "negative" path to know when a positive dentry was instantiated at that path. This patch provides functionality that is independant of other changes, but I also tested it along with other changes that demonstrate how it would be utilized in userns setups [1][2]. As can be seen in dcache.h patch, this patch comes on top a revert patch to reclaim an unused dentry flag. If you accept this proposal, I will post the full series. Thanks, Amir. [1] https://github.com/amir73il/linux/commits/fanotify_userns [2] https://github.com/amir73il/inotify-tools/commits/fanotify_userns fs/namei.c | 21 ++++++++++++++++++++- include/linux/dcache.h | 2 +- include/linux/fanotify.h | 8 ++++---- include/linux/fsnotify.h | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 6 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 216f16e74351..cf979e956938 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3288,7 +3288,7 @@ static const char *open_last_lookups(struct nameidata *nd, inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED)) - fsnotify_create(dir->d_inode, dentry); + fsnotify_path_create(&nd->path, dentry); if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else @@ -3560,6 +3560,20 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, return file; } +static void d_set_path_create(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_PATH_CREATE; + spin_unlock(&dentry->d_lock); +} + +static void d_clear_path_create(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_PATH_CREATE; + spin_unlock(&dentry->d_lock); +} + static struct dentry *filename_create(int dfd, struct filename *name, struct path *path, unsigned int lookup_flags) { @@ -3617,6 +3631,8 @@ static struct dentry *filename_create(int dfd, struct filename *name, goto fail; } putname(name); + /* Start "path create" context that ends in done_path_create() */ + d_set_path_create(dentry); return dentry; fail: dput(dentry); @@ -3641,6 +3657,9 @@ EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) { + if (d_inode(dentry)) + fsnotify_path_create(path, dentry); + d_clear_path_create(dentry); dput(dentry); inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4225caa8cf02..d153793d5b95 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -213,7 +213,7 @@ struct dentry_operations { #define DCACHE_SYMLINK_TYPE 0x00600000 /* Symlink (or fallthru to such) */ #define DCACHE_MAY_FREE 0x00800000 -/* Was #define DCACHE_FALLTHRU 0x01000000 */ +#define DCACHE_PATH_CREATE 0x01000000 /* "path_create" context */ #define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ #define DCACHE_OP_REAL 0x04000000 diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bad41bcb25df..f0c5a4a82b6e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -65,10 +65,10 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. - * Note that FAN_MODIFY can also be reported with data type + * Note that FAN_MODIFY and FAN_CREATE can also be reported with data type * FSNOTIFY_EVENT_INODE. */ -#define FANOTIFY_PATH_EVENTS (FAN_ACCESS | FAN_MODIFY | \ +#define FANOTIFY_PATH_EVENTS (FAN_ACCESS | FAN_MODIFY | FAN_CREATE | \ FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC) /* @@ -78,8 +78,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ -#define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ - FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF) +#define FANOTIFY_INODE_EVENTS (FAN_MOVE | FAN_DELETE | FAN_ATTRIB | \ + FAN_MOVE_SELF | FAN_DELETE_SELF) /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f8acddcf54fb..9a3d9f7beeb2 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -179,6 +179,30 @@ static inline void fsnotify_inoderemove(struct inode *inode) __fsnotify_inode_delete(inode); } +/* + * fsnotify_path_create - an inode was linked to namespace + * + * This higher level hook is called in addition to fsnotify_create(), + * fsnotify_mkdir() and fsnotify_link() vfs hooks when the mount context is + * available, so that FS_CREATE event can be added to a mount mark mask. + * + * In that case the, DCACHE_PATH_CREATE flag is set to suppress the FS_CREATE + * event in the lower level vfs hooks. + */ +static inline void fsnotify_path_create(struct path *path, + struct dentry *child) +{ + struct inode *dir = path->dentry->d_inode; + __u32 mask = FS_CREATE; + + WARN_ON_ONCE(!inode_is_locked(dir)); + + if (S_ISDIR(d_inode(child)->i_mode)) + mask |= FS_ISDIR; + + fsnotify(mask, path, FSNOTIFY_EVENT_PATH, dir, &child->d_name, NULL, 0); +} + /* * fsnotify_create - 'name' was linked in */ @@ -186,6 +210,10 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + /* fsnotify_path_create() will be called */ + if (dentry->d_flags & DCACHE_PATH_CREATE) + return; + fsnotify_dirent(inode, dentry, FS_CREATE); } @@ -200,6 +228,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); + /* fsnotify_path_create() will be called */ + if (new_dentry->d_flags & DCACHE_PATH_CREATE) + return; + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } @@ -223,6 +255,10 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + /* fsnotify_path_create() will be called */ + if (dentry->d_flags & DCACHE_PATH_CREATE) + return; + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } -- 2.30.0