On Fri, Jan 6, 2017 at 12:43 PM, Jan Kara <jack@xxxxxxx> wrote: > Instead of removing mark from object list from fsnotify_detach_mark(), > remove the mark when last reference to the mark is dropped. This will > allow fanotify to wait for userspace response to event without having to > hold onto fsnotify_mark_srcu. > > Signed-off-by: Jan Kara <jack@xxxxxxx> Some nits here. and re-echoing of comments from the patch that introduces fsnotify_mark_connector, because IMO, this code makes a good example for these arguments. > --- > fs/notify/mark.c | 198 +++++++++++++++++++++++---------------- > include/linux/fsnotify_backend.h | 4 +- > kernel/audit_tree.c | 20 +--- > 3 files changed, 124 insertions(+), 98 deletions(-) > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > index 521c4bd9b497..dc94b5df7627 100644 > --- a/fs/notify/mark.c > +++ b/fs/notify/mark.c > @@ -49,7 +49,13 @@ > * > * A list of notification marks relating to inode / mnt is contained in > * fsnotify_mark_connector. That structure is alive as long as there are any marks > - * in the list and is also protected by fsnotify_mark_srcu. > + * in the list and is also protected by fsnotify_mark_srcu. A mark gets > + * detached from fsnotify_mark_connector when last reference to the mark is dropped. > + * Thus having mark reference is enough to protect mark->connector pointer > + * and to make sure fsnotify_mark_connector cannot disappear. Also because we remove > + * mark from g_list before dropping mark reference associated with that, any > + * mark found through g_list is guaranteed to have mark->connector set > + * until we drop group->mark_mutex. > * > * LIFETIME: > * Inode marks survive between when they are added to an inode and when their > @@ -103,17 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) > atomic_inc(&mark->refcnt); > } > > -void fsnotify_put_mark(struct fsnotify_mark *mark) > -{ > - if (atomic_dec_and_test(&mark->refcnt)) { > - spin_lock(&destroy_lock); > - list_add(&mark->g_list, &destroy_list); > - spin_unlock(&destroy_lock); > - queue_delayed_work(system_unbound_wq, &reaper_work, > - FSNOTIFY_REAPER_DELAY); > - } > -} > - > static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > { > u32 new_mask = 0; > @@ -168,55 +163,97 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) > } > } > > -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > +static struct inode *fsnotify_detach_connector_from_object( > + struct fsnotify_mark_connector *conn) > { > - struct fsnotify_mark_connector *conn; > struct inode *inode = NULL; > - bool free_conn = false; > > - conn = mark->connector; > - spin_lock(&conn->lock); > - hlist_del_init_rcu(&mark->obj_list); > - if (hlist_empty(&conn->list)) { > - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > - inode = conn->inode; > - inode->i_fsnotify_marks = NULL; > - inode->i_fsnotify_mask = 0; > - conn->inode = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > - real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; > - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > - conn->mnt = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > - } > - free_conn = true; > - } else > - __fsnotify_recalc_mask(conn); > - mark->connector = NULL; > - spin_unlock(&conn->lock); > + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > + inode = conn->inode; > + inode->i_fsnotify_marks = NULL; Repeating question about the need to use rcu_assign_pointer here, so you may address it in this patch instead of the patch that introduced the connector struct. > + inode->i_fsnotify_mask = 0; > + conn->inode = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > + real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; > + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > + conn->mnt = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > + } > + > + return inode; > +} > + > +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) > +{ > + if (mark->group) > + fsnotify_put_group(mark->group); > + mark->free_mark(mark); > +} > > - if (free_conn) { > +void fsnotify_put_mark(struct fsnotify_mark *mark) > +{ > + /* Catch marks that were actually never attached to object */ > + if (!mark->connector && atomic_dec_and_test(&mark->refcnt)) { > + fsnotify_final_mark_destroy(mark); > + return; > + } > + > + /* > + * We have to be careful so that traversals of obj_list under lock can > + * safely grab mark reference. > + */ > + if (atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) { Style nit: maybe if (!atomic_dec_and_lock(...)) return; ? > + struct fsnotify_mark_connector *conn; > + struct inode *inode = NULL; > + bool free_conn = false; > + > + conn = mark->connector; > + hlist_del_init_rcu(&mark->obj_list); > + if (hlist_empty(&conn->list)) { > + inode = fsnotify_detach_connector_from_object(conn); > + free_conn = true; > + } else > + __fsnotify_recalc_mask(conn); > + mark->connector = NULL; > + spin_unlock(&conn->lock); > + > + if (inode) > + iput(inode); > + > + if (free_conn) { > + spin_lock(&destroy_lock); > + conn->destroy_next = connector_destroy_list; > + connector_destroy_list = conn; > + spin_unlock(&destroy_lock); > + queue_work(system_unbound_wq, &connector_reaper_work); > + } > + /* > + * Note that we didn't update flags telling whether inode cares > + * about what's happening with children. We update these flags > + * from __fsnotify_parent() lazily when next event happens on > + * one of our children. > + */ > spin_lock(&destroy_lock); > - conn->destroy_next = connector_destroy_list; > - connector_destroy_list = conn; > + list_add(&mark->g_list, &destroy_list); > spin_unlock(&destroy_lock); > - queue_work(system_unbound_wq, &connector_reaper_work); > + queue_delayed_work(system_unbound_wq, &reaper_work, > + FSNOTIFY_REAPER_DELAY); > } > - > - return inode; > } > > /* > - * Remove mark from inode / vfsmount list, group list, drop inode reference > - * if we got one. > + * Mark mark as dead, remove it from group list. Mark still stays in object Nit: marking mark as "dead" would imply mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE which does not happen here. need to be careful with wording... You probably meant to write "Mark mark as detached...". > + * list until its last reference is dropped. Note that we rely on mark being > + * removed from group list before corresponding reference to it is dropped. In > + * particular we rely on mark->connector being valid while we hold > + * group->mark_mutex if we found the mark through g_list. > * > * Must be called with group->mark_mutex held. The caller must either hold > * reference to the mark or be protected by fsnotify_mark_srcu. > */ > void fsnotify_detach_mark(struct fsnotify_mark *mark) > { > - struct inode *inode = NULL; > struct fsnotify_group *group = mark->group; > > WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); > @@ -225,31 +262,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) > !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); > > spin_lock(&mark->lock); > - > /* something else already called this function on this mark */ > if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > spin_unlock(&mark->lock); > return; > } > - > mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; > - > - inode = fsnotify_detach_from_object(mark); > - > - /* > - * Note that we didn't update flags telling whether inode cares about > - * what's happening with children. We update these flags from > - * __fsnotify_parent() lazily when next event happens on one of our > - * children. > - */ > - > list_del_init(&mark->g_list); > - > spin_unlock(&mark->lock); > > - if (inode) > - iput(inode); > - > atomic_dec(&group->num_marks); > > /* Drop mark reference acquired in fsnotify_add_mark_locked() */ > @@ -436,7 +457,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > hlist_for_each_entry(lmark, &conn->list, obj_list) { > last = lmark; > > - if ((lmark->group == mark->group) && !allow_dups) { > + if ((lmark->group == mark->group) && > + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && > + !allow_dups) { > err = -EEXIST; > goto out_err; > } > @@ -487,7 +510,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > mark->group = group; > list_add(&mark->g_list, &group->marks_list); > atomic_inc(&group->num_marks); > - fsnotify_get_mark(mark); /* for i_list and g_list */ > + fsnotify_get_mark(mark); /* for g_list */ > spin_unlock(&mark->lock); > > ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); > @@ -533,7 +556,8 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector **connp, > if (!conn) > return NULL; > hlist_for_each_entry(mark, &conn->list, obj_list) { > - if (mark->group == group) { > + if (mark->group == group && > + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > fsnotify_get_mark(mark); > fsnotify_put_connector(conn); > return mark; > @@ -613,23 +637,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) > void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp) > { > struct fsnotify_mark_connector *conn; > - struct fsnotify_mark *mark; > + struct fsnotify_mark *mark, *old_mark = NULL; > + struct inode *inode; > > - while ((conn = fsnotify_grab_connector(connp))) { > - /* > - * We have to be careful since we can race with e.g. > - * fsnotify_clear_marks_by_group() and once we drop the list > - * lock, mark can get removed from the obj_list and destroyed. > - * But we are holding mark reference so mark cannot be freed > - * and calling fsnotify_destroy_mark() more than once is fine. > - */ > - mark = hlist_entry(conn->list.first, struct fsnotify_mark, > - obj_list); > + conn = fsnotify_grab_connector(connp); > + if (!conn) > + return; > + /* > + * We have to be careful since we can race with e.g. > + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the > + * list can get modified. However we are holding mark reference and > + * thus our mark cannot be removed from obj_list so we can continue > + * iteration after regaining conn->lock. > + */ > + hlist_for_each_entry(mark, &conn->list, obj_list) { > fsnotify_get_mark(mark); > - fsnotify_put_connector(conn); > + spin_unlock(&conn->lock); > + if (old_mark) > + fsnotify_put_mark(old_mark); > + old_mark = mark; > fsnotify_destroy_mark(mark, mark->group); > - fsnotify_put_mark(mark); > + spin_lock(&conn->lock); > } > + /* > + * Detach list from object now so that we don't pin inode until all > + * mark references get dropped. It would lead to strange results such > + * as delaying inode deletion or blocking unmount. > + */ > + inode = fsnotify_detach_connector_from_object(conn); > + fsnotify_put_connector(conn); In this code, it is extra dissonant that spin_unlock(&conn->lock); is obfuscated by fsnotify_put_connector(conn); since you (rightfully) used spin_unlock() inside the for loop. As I suggested in earlier patch - get rid of fsnotify_put_connector() helper is the easy road. > + if (inode) > + iput(inode); > + if (old_mark) > + fsnotify_put_mark(old_mark); It is not clear to me from the comment above if the order of iput(inode) and fsnotify_put_mark(old_mark) is meaningful or arbitrary, because af far as I understand "until all mark references get dropped" does not refer to "old_mark". If the order is arbitrary, to me it would have looked better to see the snippet missing from within the for loop here: spin_unlock(&conn->lock); if (old_mark) fsnotify_put_mark(old_mark); And iput(inode) as the final chord of this function. But if order is not arbitrary, please elaborate in comment. > } > > /* > @@ -662,9 +702,7 @@ void fsnotify_mark_destroy_list(void) > > list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { > list_del_init(&mark->g_list); > - if (mark->group) > - fsnotify_put_group(mark->group); > - mark->free_mark(mark); > + fsnotify_final_mark_destroy(mark); > } > } > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html