On Fri 14-04-23 21:29:02, Amir Goldstein wrote: > inotify generates unsolicited IN_UNMOUNT events for every inode > mark before the filesystem containing the inode is shutdown. > > Unlike IN_UNMOUNT, FAN_UNMOUNT is an opt-in event that can only be > set on a mount mark and is generated when the mount is unmounted. > > FAN_UNMOUNT requires FAN_REPORT_FID and reports an fid info record > with fsid of the filesystem and an empty file handle. > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> Seeing the discussion further in this thread regarding FAN_IGNORED won't it be more consistent (extensible) to implement the above functionality as FAN_IGNORED delivered to mount mark when it is getting destroyed? I.e., define FAN_IGNORED as an event that gets delivered when a mark is getting destroyed (with the records identifying the mark). For now start supporting it on mount marks, later we can add support to other mark types if there's demand. Thoughts? Honza > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index 90d9210dc0d2..384d2b2e55e7 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c > @@ -713,7 +713,7 @@ static struct fanotify_event *fanotify_alloc_error_event( > inode = report->inode; > fh_len = fanotify_encode_fh_len(inode); > > - /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ > + /* Record empty fh for errors not associated with specific inode */ > if (!fh_len && inode) > inode = NULL; > > @@ -745,7 +745,10 @@ static struct fanotify_event *fanotify_alloc_event( > bool ondir = mask & FAN_ONDIR; > struct pid *pid; > > - if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { > + if (mask & FAN_UNMOUNT && !WARN_ON_ONCE(!path || !fid_mode)) { > + /* Record fid event with fsid and empty fh */ > + id = NULL; > + } else if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { > /* > * For certain events and group flags, report the child fid > * in addition to reporting the parent fid and maybe child name. > @@ -951,10 +954,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, > BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); > BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); > BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); > + BUILD_BUG_ON(FAN_UNMOUNT != FS_UNMOUNT); > BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); > BUILD_BUG_ON(FAN_RENAME != FS_RENAME); > > - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); > + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); > > mask = fanotify_group_event_mask(group, iter_info, &match_mask, > mask, data, data_type, dir); > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > index 7f0bf00a90f0..f98dcf5b7a19 100644 > --- a/fs/notify/fanotify/fanotify.h > +++ b/fs/notify/fanotify/fanotify.h > @@ -382,10 +382,12 @@ static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event) > return info ? fanotify_info_dir2_fh_len(info) : 0; > } > > +/* For error and unmount events, fsid with empty fh are reported. */ > +#define FANOTIFY_EMPTY_FH_EVENTS (FAN_FS_ERROR | FAN_UNMOUNT) > + > static inline bool fanotify_event_has_object_fh(struct fanotify_event *event) > { > - /* For error events, even zeroed fh are reported. */ > - if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) > + if (event->mask & FANOTIFY_EMPTY_FH_EVENTS) > return true; > return fanotify_event_object_fh_len(event) > 0; > } > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 554b335b1733..0b3de6218c56 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -1766,6 +1766,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > (!fid_mode || mark_type == FAN_MARK_MOUNT)) > goto fput_and_out; > > + /* > + * inotify sends unsoliciled IN_UNMOUNT per marked inode on sb shutdown. > + * FAN_UNMOUNT event is about unmount of a mount, not about sb shutdown, > + * so allow setting it only in mount mark mask. > + * FAN_UNMOUNT requires FAN_REPORT_FID to report fsid with empty fh. > + */ > + if (mask & FAN_UNMOUNT && > + (!(fid_mode & FAN_REPORT_FID) || mark_type != FAN_MARK_MOUNT)) > + goto fput_and_out; > + > /* > * FAN_RENAME uses special info type records to report the old and > * new parent+name. Reporting only old and new parent id is less > diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h > index 4c6f40a701c2..a64c26d9626f 100644 > --- a/include/linux/fanotify.h > +++ b/include/linux/fanotify.h > @@ -80,7 +80,8 @@ > * FSNOTIFY_EVENT_INODE. > */ > #define FANOTIFY_PATH_EVENTS (FAN_ACCESS | FAN_MODIFY | \ > - FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC) > + FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC | \ > + FAN_UNMOUNT) > > /* > * Directory entry modification events - reported only to directory > diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h > index bb8467cd11ae..3898bf858407 100644 > --- a/include/linux/fsnotify.h > +++ b/include/linux/fsnotify.h > @@ -176,11 +176,27 @@ static inline void fsnotify_inode_delete(struct inode *inode) > __fsnotify_inode_delete(inode); > } > > +/* > + * fsnotify_unmount - mount was unmounted. > + */ > +static inline int fsnotify_unmount(struct vfsmount *mnt) > +{ > + struct path path = { .mnt = mnt, .dentry = mnt->mnt_root }; > + > + if (atomic_long_read(&mnt->mnt_sb->s_fsnotify_connectors) == 0) > + return 0; > + > + return fsnotify(FS_UNMOUNT, &path, FSNOTIFY_EVENT_PATH, NULL, NULL, > + d_inode(path.dentry), 0); > +} > + > /* > * fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed > */ > static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) > { > + /* Send FS_UNMOUNT to groups and then clear mount marks */ > + fsnotify_unmount(mnt); > __fsnotify_vfsmount_delete(mnt); > } > > diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h > index 014e9682bd76..70f2d43e8ba4 100644 > --- a/include/uapi/linux/fanotify.h > +++ b/include/uapi/linux/fanotify.h > @@ -19,6 +19,7 @@ > #define FAN_MOVE_SELF 0x00000800 /* Self was moved */ > #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ > > +#define FAN_UNMOUNT 0x00002000 /* Filesystem unmounted */ > #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ > #define FAN_FS_ERROR 0x00008000 /* Filesystem error */ > > -- > 2.34.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR