On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote: > We may fail to pin one of the marks in fsnotify_prepare_user_wait() when > dropping the srcu read lock, resulting in use after free at the next > iteration. > > Solution is to store both marks in iter_info instead of just the one we'll > be sending the event for, as well as pinning the group for both (if they > have the same group: fine, pinnig it twice doesn't hurt). > > Also blind increment of group's user_waits is not enough, we could be far > enough in the group's destruction that it isn't taken into account. > Instead we need to check (under lock) that the mark is still attached to > the group after having obtained a ref to the mark. If not, skip it. > > In the process of fixing the above, clean up fsnotify_prepare_user_wait()/ > fsnotify_finish_user_wait(). > Change looks correct, but it was so hard to follow this patch. The moving of helpers around created a completely garbled diff where it is impossible to see the logic changed. Suggest to separate to 3 patches: create helpers (move them around), pass the 2 marks in iter_info and fix increment of group's user_waits. > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> > Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler") > Cc: <stable@xxxxxxxxxxxxxxx> # v4.12 > --- > fs/notify/fsnotify.c | 6 ++-- > fs/notify/mark.c | 100 +++++++++++++++++++++++---------------------------- > 2 files changed, 48 insertions(+), 58 deletions(-) > > diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c > index 0c4583b61717..48ec61f4c4d5 100644 > --- a/fs/notify/fsnotify.c > +++ b/fs/notify/fsnotify.c > @@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, > vfsmount_group = vfsmount_mark->group; > } > > + iter_info.inode_mark = inode_mark; > + iter_info.vfsmount_mark = vfsmount_mark; > + > if (inode_group && vfsmount_group) { > int cmp = fsnotify_compare_groups(inode_group, > vfsmount_group); > @@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, > } > } > > - iter_info.inode_mark = inode_mark; > - iter_info.vfsmount_mark = vfsmount_mark; > - > ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, > data, data_is, cookie, file_name, > &iter_info); > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > index 9991f8826734..9a7c8dbeeb59 100644 > --- a/fs/notify/mark.c > +++ b/fs/notify/mark.c > @@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) > atomic_inc(&mark->refcnt); > } > > -/* > - * Get mark reference when we found the mark via lockless traversal of object > - * list. Mark can be already removed from the list by now and on its way to be > - * destroyed once SRCU period ends. > - */ > -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) > -{ > - return atomic_inc_not_zero(&mark->refcnt); > -} > - > static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > { > u32 new_mask = 0; > @@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) > FSNOTIFY_REAPER_DELAY); > } > > -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) > +/* > + * Get mark reference when we found the mark via lockless traversal of object > + * list. Mark can be already removed from the list by now and on its way to be > + * destroyed once SRCU period ends. > + */ > +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) > { > - struct fsnotify_group *group; > - > - if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) > - return false; > - > - if (iter_info->inode_mark) > - group = iter_info->inode_mark->group; > - else > - group = iter_info->vfsmount_mark->group; > + if (!mark) > + return true; > + > + if (atomic_inc_not_zero(&mark->refcnt)) { > + spin_lock(&mark->lock); > + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { > + /* mark is attached, group is still alive then */ > + atomic_inc(&mark->group->user_waits); > + spin_unlock(&mark->lock); > + return true; > + } > + spin_unlock(&mark->lock); > + fsnotify_put_mark(mark); > + } > + return false; > +} > > - /* > - * Since acquisition of mark reference is an atomic op as well, we can > - * be sure this inc is seen before any effect of refcount increment. > - */ > - atomic_inc(&group->user_waits); > +static void fsnotify_put_mark_wait(struct fsnotify_mark *mark) > +{ > + if (mark) { > + struct fsnotify_group *group = mark->group; > > - if (iter_info->inode_mark) { > - /* This can fail if mark is being removed */ > - if (!fsnotify_get_mark_safe(iter_info->inode_mark)) > - goto out_wait; > + fsnotify_put_mark(mark); > + /* > + * We abuse notification_waitq on group shutdown for waiting for all > + * marks pinned when waiting for userspace. > + */ > + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) > + wake_up(&group->notification_waitq); > } > - if (iter_info->vfsmount_mark) { > - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) > - goto out_inode; > +} > + > +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) > +{ > + /* This can fail if mark is being removed */ > + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) > + return false; > + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { > + fsnotify_put_mark_wait(iter_info->inode_mark); > + return false; > } > > /* > @@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) > srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); > > return true; > -out_inode: > - if (iter_info->inode_mark) > - fsnotify_put_mark(iter_info->inode_mark); > -out_wait: > - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) > - wake_up(&group->notification_waitq); > - return false; > } > > void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) > { > - struct fsnotify_group *group = NULL; > - > iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); > - if (iter_info->inode_mark) { > - group = iter_info->inode_mark->group; > - fsnotify_put_mark(iter_info->inode_mark); > - } > - if (iter_info->vfsmount_mark) { > - group = iter_info->vfsmount_mark->group; > - fsnotify_put_mark(iter_info->vfsmount_mark); > - } > - /* > - * We abuse notification_waitq on group shutdown for waiting for all > - * marks pinned when waiting for userspace. > - */ > - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) > - wake_up(&group->notification_waitq); > + fsnotify_put_mark_wait(iter_info->inode_mark); > + fsnotify_put_mark_wait(iter_info->vfsmount_mark); > } > > /* > -- > 2.5.5 >