fanotify wants to drop fsnotify_mark_srcu lock when waiting for response from userspace so that the whole notification subsystem is not blocked during that time. This patch provides a framework for safely getting mark reference for a mark found in the object list which pins the mark in that list. We can then drop fsnotify_mark_srcu, wait for userspace response and then safely continue iteration of the object list once we reaquire fsnotify_mark_srcu. Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/notify/group.c | 1 + fs/notify/mark.c | 87 ++++++++++++++++++++++++++++++++++++++++ include/linux/fsnotify_backend.h | 8 ++++ 3 files changed, 96 insertions(+) diff --git a/fs/notify/group.c b/fs/notify/group.c index 0fb4aadcc19f..79439cdf16e0 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -126,6 +126,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) /* set to 0 when there a no external references to this group */ atomic_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); + atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); INIT_LIST_HEAD(&group->notification_list); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index fee4255e9227..c5c1dcc8fa00 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -109,6 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) atomic_inc(&mark->refcnt); } +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) +{ + return atomic_inc_not_zero(&mark->refcnt); +} + static void __fsnotify_recalc_mask(struct fsnotify_mark_list *list) { u32 new_mask = 0; @@ -248,6 +258,77 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) } } +bool fsnotify_prepare_user_wait(struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + int *srcu_idx) +{ + struct fsnotify_group *group; + + if (WARN_ON_ONCE(!inode_mark && !vfsmount_mark)) + return false; + + if (inode_mark) + group = inode_mark->group; + else + group = vfsmount_mark->group; + + /* + * Since acquisition of mark reference is an atomic op as well, we can + * be sure this inc is seen before any effect of refcount increment. + */ + atomic_inc(&group->user_waits); + + if (inode_mark) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(inode_mark)) + goto out_wait; + } + if (vfsmount_mark) { + if (!fsnotify_get_mark_safe(vfsmount_mark)) + goto out_inode; + } + + /* + * Now that both marks are pinned by refcount we can drop SRCU lock. + * Marks can still be removed from the list but because of refcount + * they cannot be destroyed and we can safely resume the list iteration + * once userspace returns. + */ + srcu_read_unlock(&fsnotify_mark_srcu, *srcu_idx); + + return true; +out_inode: + if (inode_mark) + fsnotify_put_mark(inode_mark); +out_wait: + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); + return false; +} + +void fsnotify_finish_user_wait(struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + int *srcu_idx) +{ + struct fsnotify_group *group = NULL; + + *srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); + if (inode_mark) { + group = inode_mark->group; + fsnotify_put_mark(inode_mark); + } + if (vfsmount_mark) { + group = vfsmount_mark->group; + fsnotify_put_mark(vfsmount_mark); + } + /* + * We abuse notification_waitq on group shutdown for waiting for all + * marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); +} + /* * Mark mark as dead, remove it from group list. Mark still stays in object * list until its last reference is dropped. Note that we rely on mark being @@ -636,6 +717,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) fsnotify_free_mark(mark); fsnotify_put_mark(mark); } + /* + * Some marks can still be pinned when waiting for response from + * userspace. Wait for those now. fsnotify_prepare_user_wait() will + * not succeed now so this wait is race-free. + */ + wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); } /* Destroy all marks attached to inode / vfsmount */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 76b3c34172c7..27223e254e00 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -162,6 +162,8 @@ struct fsnotify_group { struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ + atomic_t user_waits; /* Number of tasks waiting for user + * response */ /* groups can define private fields here or use the void *private */ union { @@ -367,6 +369,12 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_finish_user_wait(struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + int *srcu_idx); +extern bool fsnotify_prepare_user_wait(struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + int *srcu_idx); /* put here because inotify does some weird stuff when destroying watches */ extern void fsnotify_init_event(struct fsnotify_event *event, -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html