Instead of removing mark from object list from fsnotify_detach_mark(), remove the mark when last reference to the mark is dropped. This will allow fanotify to wait for userspace response to event without having to hold onto fsnotify_mark_srcu. Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/notify/mark.c | 166 +++++++++++++++++++++++++-------------- include/linux/fsnotify_backend.h | 4 +- 2 files changed, 108 insertions(+), 62 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index a9870beabe67..55550dad6617 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -49,7 +49,13 @@ * * A list of notification marks relating to inode / mnt is contained in * fsnotify_mark_list. That structure is alive as long as there are any marks - * in the list and is also protected by fsnotify_mark_srcu. + * in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_list when last reference to the mark is dropped. + * Thus having mark reference is enough to protect mark->obj_list_head pointer + * and to make sure fsnotify_mark_list cannot disappear. Also because we remove + * mark from g_list before dropping mark reference associated with that, any + * mark found through g_list is guaranteed to have mark->obj_list_head set + * until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their @@ -102,15 +108,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) -{ - if (atomic_dec_and_test(&mark->refcnt)) { - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); - } -} - static void __fsnotify_recalc_mask(struct fsnotify_mark_list *list) { u32 new_mask = 0; @@ -165,34 +162,47 @@ static void fsnotify_list_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) +static struct inode *fsnotify_detach_list_from_object( + struct fsnotify_mark_list *list) +{ + struct inode *inode = NULL; + + if (list->flags & FSNOTIFY_LIST_TYPE_INODE) { + inode = list->inode; + inode->i_fsnotify_marks = NULL; + inode->i_fsnotify_mask = 0; + list->inode = NULL; + list->flags &= ~FSNOTIFY_LIST_TYPE_INODE; + } else if (list->flags & FSNOTIFY_LIST_TYPE_VFSMOUNT) { + real_mount(list->mnt)->mnt_fsnotify_marks = NULL; + real_mount(list->mnt)->mnt_fsnotify_mask = 0; + list->mnt = NULL; + list->flags &= ~FSNOTIFY_LIST_TYPE_VFSMOUNT; + } + + return inode; +} + +/* Called with mark->obj_list_head->lock held, releases it */ +static void fsnotify_detach_from_object(struct fsnotify_mark *mark) { struct fsnotify_mark_list *list; struct inode *inode = NULL; bool free_list = false; list = mark->obj_list_head; - spin_lock(&list->lock); hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&list->list)) { - if (list->flags & FSNOTIFY_LIST_TYPE_INODE) { - inode = list->inode; - inode->i_fsnotify_marks = NULL; - inode->i_fsnotify_mask = 0; - list->inode = NULL; - list->flags &= ~FSNOTIFY_LIST_TYPE_INODE; - } else if (list->flags & FSNOTIFY_LIST_TYPE_VFSMOUNT) { - real_mount(list->mnt)->mnt_fsnotify_marks = NULL; - real_mount(list->mnt)->mnt_fsnotify_mask = 0; - list->mnt = NULL; - list->flags &= ~FSNOTIFY_LIST_TYPE_VFSMOUNT; - } + inode = fsnotify_detach_list_from_object(list); free_list = true; } else __fsnotify_recalc_mask(list); mark->obj_list_head = NULL; spin_unlock(&list->lock); + if (inode) + iput(inode); + if (free_list) { spin_lock(&destroy_lock); list->destroy_next = list_destroy_list; @@ -200,49 +210,66 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) spin_unlock(&destroy_lock); queue_work(system_unbound_wq, &list_reaper_work); } +} - return inode; +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ + if (mark->group) + fsnotify_put_group(mark->group); + mark->free_mark(mark); +} + +void fsnotify_put_mark(struct fsnotify_mark *mark) +{ + /* Catch marks that were actually never attached to object */ + if (!mark->obj_list_head && atomic_dec_and_test(&mark->refcnt)) { + fsnotify_final_mark_destroy(mark); + return; + } + + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (atomic_dec_and_lock(&mark->refcnt, &mark->obj_list_head->lock)) { + fsnotify_detach_from_object(mark); + /* + * Note that we didn't update flags telling whether inode cares + * about what's happening with children. We update these flags + * from __fsnotify_parent() lazily when next event happens on + * one of our children. + */ + fsnotify_final_mark_destroy(mark); + } } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. + * Mark mark as dead, remove it from group list. Mark still stays in object + * list until its last reference is dropped. The reference corresponding to + * group list gets dropped after SRCU period ends from + * fsnotify_mark_destroy_list(). Note that we rely on mark being removed from + * group list before corresponding reference to it is dropped. In particular we + * rely on mark->obj_list_head being valid while we hold group->mark_mutex if + * we found the mark through g_list. * * Must be called with group->mark_mutex held. */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - struct inode *inode = NULL; struct fsnotify_group *group = mark->group; BUG_ON(!mutex_is_locked(&group->mark_mutex)); spin_lock(&mark->lock); - /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&mark->lock); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; - - inode = fsnotify_detach_from_object(mark); - - /* - * Note that we didn't update flags telling whether inode cares about - * what's happening with children. We update these flags from - * __fsnotify_parent() lazily when next event happens on one of our - * children. - */ - list_del_init(&mark->g_list); - spin_unlock(&mark->lock); - if (inode) - iput(inode); - atomic_dec(&group->num_marks); } @@ -445,7 +472,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, hlist_for_each_entry(lmark, &list->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) { + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { err = -EEXIST; goto out_err; } @@ -496,7 +525,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); @@ -549,7 +578,8 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_list **listp, if (!list) return NULL; hlist_for_each_entry(mark, &list->list, obj_list) { - if (mark->group == group) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); fsnotify_put_list(list); return mark; @@ -629,23 +659,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) void fsnotify_destroy_marks(struct fsnotify_mark_list **listp) { struct fsnotify_mark_list *list; - struct fsnotify_mark *mark; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while ((list = fsnotify_grab_list(listp))) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop the list - * lock, mark can get removed from the obj_list and destroyed. - * But we are holding mark reference so mark cannot be freed - * and calling fsnotify_destroy_mark() more than once is fine. - */ - mark = hlist_entry(list->list.first, struct fsnotify_mark, - obj_list); + list = fsnotify_grab_list(listp); + if (!list) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the list->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining list->lock. + */ + hlist_for_each_entry(mark, &list->list, obj_list) { fsnotify_get_mark(mark); - fsnotify_put_list(list); + spin_unlock(&list->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); + spin_lock(&list->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_list_from_object(list); + fsnotify_put_list(list); + if (inode) + iput(inode); + if (old_mark) + fsnotify_put_mark(old_mark); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6086fc7ff6df..76b3c34172c7 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -244,9 +244,9 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [obj_list_head->lock] */ + /* List of marks for inode / vfsmount [obj_list_head->lock, mark ref] */ struct hlist_node obj_list; - /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ + /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_list *obj_list_head; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html