So far list of marks attached to an object (inode / vfsmount) was protected by i_lock or mnt_root->d_lock. This dictates that the list must be empty before the object can be destroyed although the list is now anchored in the fsnotify_mark_connector structure. Protect the list by a spinlock in the fsnotify_mark_connector structure to decouple lifetime of a list of marks from a lifetime of the object. This also simplifies the code quite a bit since we don't have to differentiate between inode and vfsmount lists in quite a few places anymore. Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx> Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/notify/mark.c | 93 +++++++++++++++------------------------- include/linux/fsnotify_backend.h | 3 +- 2 files changed, 36 insertions(+), 60 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index a9dc7afe803d..150ccbdd9795 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -33,7 +33,7 @@ * * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock * * group->mark_mutex protects the marks_list anchored inside a given group and * each mark is hooked via the g_list. It also protects the groups private @@ -44,10 +44,12 @@ * is assigned to as well as the access to a reference of the inode/vfsmount * that is being watched by the mark. * - * inode->i_lock protects the i_fsnotify_marks list anchored inside a - * given inode and each mark is hooked via the i_list. (and sorta the - * free_i_list) + * mark->connector->lock protects the list of marks anchored inside an + * inode / vfsmount and each mark is hooked via the i_list. * + * A list of notification marks relating to inode / mnt is contained in + * fsnotify_mark_connector. That structure is alive as long as there are any marks + * in the list and is also protected by fsnotify_mark_srcu. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their @@ -110,6 +112,7 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) u32 new_mask = 0; struct fsnotify_mark *mark; + assert_spin_locked(&conn->lock); hlist_for_each_entry(mark, &conn->list, obj_list) { if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; @@ -127,22 +130,19 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) */ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { + struct inode *inode = NULL; + if (!conn) return; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) - spin_lock(&conn->inode->i_lock); - else - spin_lock(&conn->mnt->mnt_root->d_lock); + spin_lock(&conn->lock); __fsnotify_recalc_mask(conn); - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { - struct inode *inode = igrab(conn->inode); - - spin_unlock(&inode->i_lock); + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + inode = igrab(conn->inode); + spin_unlock(&conn->lock); + if (inode) { __fsnotify_update_child_dentry_flags(inode); iput(inode); - } else { - spin_unlock(&conn->mnt->mnt_root->d_lock); } } @@ -150,14 +150,9 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn; struct inode *inode = NULL; - spinlock_t *lock; conn = mark->connector; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) - lock = &conn->inode->i_lock; - else - lock = &conn->mnt->mnt_root->d_lock; - spin_lock(lock); + spin_lock(&conn->lock); hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) @@ -166,7 +161,7 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) __fsnotify_recalc_mask(conn); } mark->connector = NULL; - spin_unlock(lock); + spin_unlock(&conn->lock); return inode; } @@ -343,25 +338,22 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; struct fsnotify_mark_connector **connp; - spinlock_t *lock; int cmp; int err = 0; if (WARN_ON(!inode && !mnt)) return -EINVAL; - if (inode) { + if (inode) connp = &inode->i_fsnotify_marks; - lock = &inode->i_lock; - } else { + else connp = &real_mount(mnt)->mnt_fsnotify_marks; - lock = &mnt->mnt_root->d_lock; - } if (!*connp) { conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); if (!conn) return -ENOMEM; + spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); if (inode) { conn->flags = FSNOTIFY_OBJ_TYPE_INODE; @@ -371,23 +363,17 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, conn->mnt = mnt; } /* - * Make sure 'conn' initialization is visible. Matches - * lockless_dereference() in fsnotify(). + * cmpxchg() provides the barrier so that readers of *connp can + * see only initialized structure */ - smp_wmb(); - spin_lock(&mark->lock); - spin_lock(lock); - if (!*connp) { - *connp = conn; - } else { + if (cmpxchg(connp, NULL, conn)) { + /* Someone else created list structure for us */ kmem_cache_free(fsnotify_mark_connector_cachep, conn); - conn = *connp; } - } else { - spin_lock(&mark->lock); - spin_lock(lock); - conn = *connp; } + conn = *connp; + spin_lock(&mark->lock); + spin_lock(&conn->lock); /* is mark the first mark? */ if (hlist_empty(&conn->list)) { @@ -419,7 +405,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, added: mark->connector = conn; out_err: - spin_unlock(lock); + spin_unlock(&conn->lock); spin_unlock(&mark->lock); return err; } @@ -443,7 +429,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, * LOCKING ORDER!!!! * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock */ spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; @@ -499,24 +485,19 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector *conn, struct fsnotify_group *group) { struct fsnotify_mark *mark; - spinlock_t *lock; if (!conn) return NULL; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) - lock = &conn->inode->i_lock; - else - lock = &conn->mnt->mnt_root->d_lock; - spin_lock(lock); + spin_lock(&conn->lock); hlist_for_each_entry(mark, &conn->list, obj_list) { if (mark->group == group) { fsnotify_get_mark(mark); - spin_unlock(lock); + spin_unlock(&conn->lock); return mark; } } - spin_unlock(lock); + spin_unlock(&conn->lock); return NULL; } @@ -589,16 +570,10 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) void fsnotify_destroy_marks(struct fsnotify_mark_connector *conn) { struct fsnotify_mark *mark; - spinlock_t *lock; if (!conn) return; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) - lock = &conn->inode->i_lock; - else - lock = &conn->mnt->mnt_root->d_lock; - while (1) { /* * We have to be careful since we can race with e.g. @@ -607,15 +582,15 @@ void fsnotify_destroy_marks(struct fsnotify_mark_connector *conn) * we are holding mark reference so mark cannot be freed and * calling fsnotify_destroy_mark() more than once is fine. */ - spin_lock(lock); + spin_lock(&conn->lock); if (hlist_empty(&conn->list)) { - spin_unlock(lock); + spin_unlock(&conn->lock); break; } mark = hlist_entry(conn->list.first, struct fsnotify_mark, obj_list); fsnotify_get_mark(mark); - spin_unlock(lock); + spin_unlock(&conn->lock); fsnotify_destroy_mark(mark, mark->group); fsnotify_put_mark(mark); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b954f1b2571c..02c6fac652a4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -201,6 +201,7 @@ struct fsnotify_group { * inode / vfsmount gets freed. */ struct fsnotify_mark_connector { + spinlock_t lock; #define FSNOTIFY_OBJ_TYPE_INODE 0x01 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 unsigned int flags; /* Type of object [lock] */ @@ -240,7 +241,7 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [obj_lock] */ + /* List of marks for inode / vfsmount [connector->lock] */ struct hlist_node obj_list; /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ struct fsnotify_mark_connector *connector; -- 2.10.2