Re: [PATCH 08/11] fanotify: Introduce FAN_ERROR event

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, May 21, 2021 at 5:42 AM Gabriel Krisman Bertazi
<krisman@xxxxxxxxxxxxx> wrote:
>
> The FAN_ERROR event is used by filesystem wide monitoring tools to
> receive notifications of type FS_ERROR_EVENT, emited by filesystems when
> a problem is detected.  The error notification includes a generic error
> descriptor.
>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@xxxxxxxxxxxxx>
>
> Changes since v1:
>   - Pass dentry to fanotify_check_fsid (Amir)
>   - FANOTIFY_EVENT_TYPE_ERROR -> FANOTIFY_EVENT_TYPE_FS_ERROR
>   - Merge previous patch into it
>   - Use a single slot
> ---
>  fs/notify/fanotify/fanotify.c      |  74 ++++++++++++++++-
>  fs/notify/fanotify/fanotify.h      |  28 ++++++-
>  fs/notify/fanotify/fanotify_user.c | 123 ++++++++++++++++++++++++++---
>  include/linux/fanotify.h           |   6 +-
>  include/uapi/linux/fanotify.h      |  10 +++
>  5 files changed, 225 insertions(+), 16 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 34e2ee759b39..197291a8c41d 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -269,7 +269,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
>         pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
>                  __func__, iter_info->report_mask, event_mask, data, data_type);
>
> -       if (!fid_mode) {
> +       if (!fid_mode && data_type != FSNOTIFY_EVENT_ERROR) {
>                 /* Do we have path to open a file descriptor? */
>                 if (!path)
>                         return 0;
> @@ -657,6 +657,51 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>         return event;
>  }
>
> +static void fanotify_init_error_event(struct fanotify_error_event *event,
> +                                     __kernel_fsid_t fsid,
> +                                     const struct fs_error_report *report)
> +{
> +       event->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
> +       event->err_count = 1;
> +       event->fsid = fsid;
> +       event->error = report->error;
> +       event->ino = (report->inode) ? report->inode->i_ino : 0;
> +}
> +
> +static int fanotify_queue_error_event(struct fsnotify_iter_info *iter_info,
> +                                     struct fsnotify_group *group,
> +                                     __kernel_fsid_t fsid,
> +                                     const struct fs_error_report *report)
> +{
> +       struct fanotify_mark *mark;
> +       int type;
> +       int ret = -ENOMEM;
> +
> +       fsnotify_foreach_obj_type(type) {
> +               if (!fsnotify_iter_should_report_type(iter_info, type))
> +                       continue;
> +               mark = FANOTIFY_MARK(iter_info->marks[type]);
> +       }

Two options.
If FAN_ERROR is allowed on inode marks, need to do the below
for every mark found in mark type iterator.
If FAN_ERROR is only allowed in sb marks use
fsnotify_iter_sb_mark(iter_info) instead of mark iterator.

> +
> +       spin_lock(&mark->fsn_mark.lock);
> +       if (mark->error_event) {
> +               if (list_empty(&mark->error_event->fae.fse.list)) {

event list check should be protected with notification_lock.
err_count++ as well.

I suggest that you use the merge() callback of fsnotify_add_event()
to do err_count++ under notification_lock and the insert() callback
to do fsnotify_get_mark() and fanotify_init_error_event() under
notification_lock.

> +                       fsnotify_get_mark(&mark->fsn_mark);
> +                       fanotify_init_error_event(mark->error_event, fsid, report);
> +                       ret = fsnotify_add_event(group, &mark->error_event->fae.fse,
> +                                                NULL, NULL);
> +                       if (ret)
> +                               fsnotify_put_mark(&mark->fsn_mark);
> +               } else {
> +                       mark->error_event->err_count++;
> +                       ret = 0;
> +               }
> +       }
> +       spin_unlock(&mark->fsn_mark.lock);
> +
> +       return ret;
> +}
> +
>  /*
>   * Get cached fsid of the filesystem containing the object from any connector.
>   * All connectors are supposed to have the same fsid, but we do not verify that
> @@ -738,8 +783,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
>         BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
>         BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
>         BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
> +       BUILD_BUG_ON(FAN_ERROR != FS_ERROR);
>
> -       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
> +       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
>
>         mask = fanotify_group_event_mask(group, iter_info, mask, data,
>                                          data_type, dir);
> @@ -757,13 +803,20 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
>                         return 0;
>         }
>
> -       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
> +       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS) || fanotify_is_error_event(mask)) {

Jan likes to keep code inside 80-columns.
Please run checkpatch.pl --max-line-length=80 on your patches.

>                 fsid = fanotify_get_fsid(iter_info);
>                 /* Racing with mark destruction or creation? */
>                 if (!fsid.val[0] && !fsid.val[1])
>                         return 0;
>         }
>
> +       if (fanotify_is_error_event(mask)) {
> +               ret = fanotify_queue_error_event(iter_info, group, fsid, data);
> +               if (ret)
> +                       fsnotify_queue_overflow(group);

fsnotify_add_event() already takes care of overflow event.

> +               goto finish;
> +       }
> +
>         event = fanotify_alloc_event(group, mask, data, data_type, dir,
>                                      file_name, &fsid);
>         ret = -ENOMEM;
> @@ -833,6 +886,17 @@ static void fanotify_free_name_event(struct fanotify_event *event)
>         kfree(FANOTIFY_NE(event));
>  }
>
> +static void fanotify_free_error_event(struct fanotify_event *event)
> +{
> +       /*
> +        * Just drop the reference acquired by
> +        * fanotify_queue_error_event.
> +        *
> +        * The actual memory is freed with the mark.
> +        */
> +       fsnotify_put_mark(&(FANOTIFY_EE(event)->mark->fsn_mark));
> +}
> +
>  static void fanotify_free_event(struct fsnotify_event *fsn_event)
>  {
>         struct fanotify_event *event;
> @@ -855,6 +919,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
>         case FANOTIFY_EVENT_TYPE_OVERFLOW:
>                 kfree(event);
>                 break;
> +       case FANOTIFY_EVENT_TYPE_FS_ERROR:
> +               fanotify_free_error_event(event);
> +               break;
>         default:
>                 WARN_ON_ONCE(1);
>         }
> @@ -871,6 +938,7 @@ static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
>  {
>         struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);
>
> +       kfree(mark->error_event);
>         kmem_cache_free(fanotify_mark_cache, mark);
>  }
>
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index a399c5e2615d..ebe9e593dfbf 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -151,6 +151,7 @@ enum fanotify_event_type {
>         FANOTIFY_EVENT_TYPE_PATH,
>         FANOTIFY_EVENT_TYPE_PATH_PERM,
>         FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
> +       FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
>         __FANOTIFY_EVENT_TYPE_NUM
>  };
>
> @@ -206,12 +207,31 @@ FANOTIFY_NE(struct fanotify_event *event)
>         return container_of(event, struct fanotify_name_event, fae);
>  }
>
> +struct fanotify_error_event {
> +       struct fanotify_event fae;
> +       __kernel_fsid_t fsid;
> +       unsigned long ino;
> +       int error;
> +       u32 err_count;
> +
> +       /* Back reference to the mark this error refers to. */
> +       struct fanotify_mark *mark;
> +};
> +
> +static inline struct fanotify_error_event *
> +FANOTIFY_EE(struct fanotify_event *event)
> +{
> +       return container_of(event, struct fanotify_error_event, fae);
> +}
> +
>  static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
>  {
>         if (event->type == FANOTIFY_EVENT_TYPE_FID)
>                 return &FANOTIFY_FE(event)->fsid;
>         else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
>                 return &FANOTIFY_NE(event)->fsid;
> +       else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
> +               return &FANOTIFY_EE(event)->fsid;
>         else
>                 return NULL;
>  }
> @@ -297,6 +317,11 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
>         return container_of(fse, struct fanotify_event, fse);
>  }
>
> +static inline bool fanotify_is_error_event(u32 mask)
> +{
> +       return mask & FANOTIFY_ERROR_EVENTS;
> +}
> +
>  static inline bool fanotify_event_has_path(struct fanotify_event *event)
>  {
>         return event->type == FANOTIFY_EVENT_TYPE_PATH ||
> @@ -325,7 +350,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
>   */
>  static inline bool fanotify_is_hashed_event(u32 mask)
>  {
> -       return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
> +       return (!fanotify_is_perm_event(mask) && !fanotify_is_error_event(mask)
> +               && !(mask & FS_Q_OVERFLOW));

Please do us the service off adding helper fanotify_is_overflow_event(mask)

>  }
>
>  static inline unsigned int fanotify_event_hash_bucket(
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 00210535a78e..ea9b9f8f7c21 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -106,6 +106,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
>  #define FANOTIFY_EVENT_ALIGN 4
>  #define FANOTIFY_INFO_HDR_LEN \
>         (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
> +#define FANOTIFY_INFO_ERROR_LEN \
> +       (sizeof(struct fanotify_event_info_error))
>
>  static int fanotify_fid_info_len(int fh_len, int name_len)
>  {
> @@ -126,6 +128,9 @@ static size_t fanotify_event_len(struct fanotify_event *event,
>         int fh_len;
>         int dot_len = 0;
>
> +       if (fanotify_is_error_event(event->mask))
> +               return event_len + FANOTIFY_INFO_ERROR_LEN;
> +
>         if (!fid_mode)
>                 return event_len;
>
> @@ -149,6 +154,30 @@ static size_t fanotify_event_len(struct fanotify_event *event,
>         return event_len;
>  }
>
> +static struct fanotify_event *fanotify_dequeue_error_event(struct fsnotify_group *group,
> +                                                          struct fanotify_event *event,
> +                                                          struct fanotify_error_event *error_event)
> +{
> +       struct fsnotify_mark *mark = &(FANOTIFY_EE(event)->mark->fsn_mark);
> +       /*
> +        * In order to avoid missing an error count update, the
> +        * queued event is de-queued and duplicated to an
> +        * in-stack fanotify_error_event while still inside
> +        * mark->lock.  Once the event is dequeued, it can be
> +        * immediately re-used for a new event.
> +        *
> +        * The ownership of the mark reference is dropped later
> +        * by destroy_event.
> +        */
> +       spin_lock(&mark->lock);
> +       memcpy(error_event, FANOTIFY_EE(event), sizeof(*error_event));
> +       fsnotify_init_event(&error_event->fae.fse);
> +       fsnotify_remove_queued_event(group, &event->fse);
> +       spin_unlock(&mark->lock);
> +
> +       return &error_event->fae;
> +}
> +
>  /*
>   * Remove an hashed event from merge hash table.
>   */
> @@ -173,7 +202,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
>   * updated accordingly.
>   */
>  static struct fanotify_event *get_one_event(struct fsnotify_group *group,
> -                                           size_t count)
> +                                           size_t count,
> +                                           struct fanotify_error_event *error_event)
>  {
>         size_t event_size;
>         struct fanotify_event *event = NULL;
> @@ -197,9 +227,14 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
>
>         /*
>          * Held the notification_lock the whole time, so this is the
> -        * same event we peeked above.
> +        * same event we peeked above, unless it is copied to
> +        * error_event.
>          */
> -       fsnotify_remove_first_event(group);
> +       if (fanotify_is_error_event(event->mask))
> +               event = fanotify_dequeue_error_event(group, event, error_event);
> +       else
> +               fsnotify_remove_first_event(group);
> +
>         if (fanotify_is_perm_event(event->mask))
>                 FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
>         if (fanotify_is_hashed_event(event->mask))
> @@ -309,6 +344,30 @@ static int process_access_response(struct fsnotify_group *group,
>         return -ENOENT;
>  }
>
> +static size_t copy_error_info_to_user(struct fanotify_event *event,
> +                                     char __user *buf, int count)
> +{
> +       struct fanotify_event_info_error info;
> +       struct fanotify_error_event *fee = FANOTIFY_EE(event);
> +
> +       info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
> +       info.hdr.pad = 0;
> +       info.hdr.len = sizeof(struct fanotify_event_info_error);
> +
> +       if (WARN_ON(count < info.hdr.len))
> +               return -EFAULT;
> +
> +       info.fsid = fee->fsid;
> +       info.error = fee->error;
> +       info.inode = fee->ino;
> +       info.error_count = fee->err_count;
> +
> +       if (copy_to_user(buf, &info, sizeof(info)))
> +               return -EFAULT;
> +
> +       return info.hdr.len;
> +}
> +
>  static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
>                              int info_type, const char *name, size_t name_len,
>                              char __user *buf, size_t count)
> @@ -523,6 +582,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
>                 count -= ret;
>         }
>
> +       if (fanotify_is_error_event(event->mask)) {
> +               ret = copy_error_info_to_user(event, buf, count);
> +               if (ret < 0)
> +                       return ret;
> +               buf += ret;
> +               count -= ret;
> +       }
> +
>         return metadata.event_len;
>
>  out_close_fd:
> @@ -553,6 +620,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
>  {
>         struct fsnotify_group *group;
>         struct fanotify_event *event;
> +       struct fanotify_error_event error_event;
>         char __user *start;
>         int ret;
>         DEFINE_WAIT_FUNC(wait, woken_wake_function);
> @@ -569,7 +637,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
>                  * in case there are lots of available events.
>                  */
>                 cond_resched();
> -               event = get_one_event(group, count);
> +               event = get_one_event(group, count, &error_event);
>                 if (IS_ERR(event)) {
>                         ret = PTR_ERR(event);
>                         break;
> @@ -888,16 +956,33 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
>                                     flags, umask);
>  }
>
> -static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> -                                      __u32 mask,
> -                                      unsigned int flags)
> +static int fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> +                                      __u32 mask, unsigned int flags,
> +                                    __u32 *modified_mask)

It really means 'added' not 'modified' just like the local variable name.

>  {
> +       struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);
> +       struct fanotify_error_event *error_event = NULL;
> +       bool addition = !(flags & FAN_MARK_IGNORED_MASK);

'addition' is confusing here. I would call it 'ignored',
but do we want to allow FAN_ERROR in ignored mask?
for what case? on inode mark?
If we do not, let's not allow it.

>         __u32 oldmask = -1;
>
> +       /* Only pre-alloc error_event if needed. */
> +       if (addition && (mask & FAN_ERROR) && !mark->error_event) {
> +               error_event = kzalloc(sizeof(*error_event), GFP_KERNEL);
> +               if (!error_event)
> +                       return -ENOMEM;
> +               fanotify_init_event(&error_event->fae, 0, FS_ERROR);
> +               error_event->mark = mark;
> +       }
> +
>         spin_lock(&fsn_mark->lock);
> -       if (!(flags & FAN_MARK_IGNORED_MASK)) {
> +       if (addition) {
>                 oldmask = fsn_mark->mask;
>                 fsn_mark->mask |= mask;
> +
> +               if (!mark->error_event) {
> +                       mark->error_event = error_event;
> +                       error_event = NULL;
> +               }
>         } else {
>                 fsn_mark->ignored_mask |= mask;
>                 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
> @@ -905,7 +990,11 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
>         }
>         spin_unlock(&fsn_mark->lock);
>
> -       return mask & ~oldmask;
> +       kfree(error_event);
> +
> +       *modified_mask = mask & ~oldmask;
> +       return 0;
> +
>  }
>
>  static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> @@ -955,6 +1044,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>  {
>         struct fsnotify_mark *fsn_mark;
>         __u32 added;
> +       int ret = 0;
>
>         mutex_lock(&group->mark_mutex);
>         fsn_mark = fsnotify_find_mark(connp, group);
> @@ -965,13 +1055,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>                         return PTR_ERR(fsn_mark);
>                 }
>         }
> -       added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
> +       ret = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &added);
> +       if (ret)
> +               goto out;
> +
>         if (added & ~fsnotify_conn_mask(fsn_mark->connector))
>                 fsnotify_recalc_mask(fsn_mark->connector);
> +
> +out:
>         mutex_unlock(&group->mark_mutex);
>
>         fsnotify_put_mark(fsn_mark);
> -       return 0;
> +       return ret;
>  }
>
>  static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
> @@ -1377,6 +1472,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>
>                 fsid = &__fsid;
>         }
> +       if (mask & FAN_ERROR) {
> +               ret = fanotify_test_fsid(path.dentry, &__fsid);
> +               if (ret)
> +                       goto path_put_and_out;
> +               fsid = &__fsid;
> +       }
>
>         /* inode held in place by reference to path; group by fget on fd */
>         if (mark_type == FAN_MARK_INODE)
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index bad41bcb25df..05c929d588e4 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -81,9 +81,12 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
>  #define FANOTIFY_INODE_EVENTS  (FANOTIFY_DIRENT_EVENTS | \
>                                  FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
>
> +#define FANOTIFY_ERROR_EVENTS  (FAN_ERROR)
> +
>  /* Events that user can request to be notified on */
>  #define FANOTIFY_EVENTS                (FANOTIFY_PATH_EVENTS | \
> -                                FANOTIFY_INODE_EVENTS)
> +                                FANOTIFY_INODE_EVENTS | \
> +                                FANOTIFY_ERROR_EVENTS)
>
>  /* Events that require a permission response from user */
>  #define FANOTIFY_PERM_EVENTS   (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
> @@ -95,6 +98,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
>  /* Events that may be reported to user */
>  #define FANOTIFY_OUTGOING_EVENTS       (FANOTIFY_EVENTS | \
>                                          FANOTIFY_PERM_EVENTS | \
> +                                        FANOTIFY_ERROR_EVENTS | \
>                                          FAN_Q_OVERFLOW | FAN_ONDIR)
>
>  #define ALL_FANOTIFY_EVENT_BITS                (FANOTIFY_OUTGOING_EVENTS | \
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index fbf9c5c7dd59..e3920597112f 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -20,6 +20,7 @@
>  #define FAN_OPEN_EXEC          0x00001000      /* File was opened for exec */
>
>  #define FAN_Q_OVERFLOW         0x00004000      /* Event queued overflowed */
> +#define FAN_ERROR              0x00008000      /* Filesystem error */
>
>  #define FAN_OPEN_PERM          0x00010000      /* File open in perm check */
>  #define FAN_ACCESS_PERM                0x00020000      /* File accessed in perm check */
> @@ -123,6 +124,7 @@ struct fanotify_event_metadata {
>  #define FAN_EVENT_INFO_TYPE_FID                1
>  #define FAN_EVENT_INFO_TYPE_DFID_NAME  2
>  #define FAN_EVENT_INFO_TYPE_DFID       3
> +#define FAN_EVENT_INFO_TYPE_ERROR      4
>
>  /* Variable length info record following event metadata */
>  struct fanotify_event_info_header {
> @@ -148,6 +150,14 @@ struct fanotify_event_info_fid {
>         unsigned char handle[0];
>  };
>
> +struct fanotify_event_info_error {
> +       struct fanotify_event_info_header hdr;
> +       int error;
> +       __kernel_fsid_t fsid;
> +       unsigned long inode;

I think for UAPI this should use __u64 and better call this ino and
not inode?

Thanks,
Amir.



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux