On Tue, Jan 03, 2017 at 04:17:14PM +1300, Eric W. Biederman wrote: > Al Viro <viro@xxxxxxxxxxxxxxxxxx> writes: > > > On Tue, Jan 03, 2017 at 01:51:36PM +1300, Eric W. Biederman wrote: > > > >> The only significant thing I see is that you have not taken the > >> mount_lock on the path where new_mountpoint adds the new struct > >> mountpoint into the mountpoint hash table. > > > > Umm... Point, but I really don't like that bouncing mount_lock up > > and down there. It's not going to cause any serious overhead, > > but it just looks ugly... ;-/ > > > > Let me think for a while... > > The other possibility is to grab namespace_sem in mntput_no_expire > around the call of umount_mnt. That is the only path where > put_mountpoint can be called where we are not holding namespace_sem. > That works in the small but I haven't traced the callers of mntput and > mntput_no_expire yet to see if it works in practice. No, that's a really bad idea. Final mntput should _not_ happen under namespace_lock, but I don't want grabbing it in that place. How about this instead: diff --git a/fs/namespace.c b/fs/namespace.c index b5b1259e064f..20fc797277f8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -742,29 +742,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) -{ - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; - int ret; - - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) - return ERR_PTR(-ENOMEM); - - ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); - } - - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); - return mp; -} - static void put_mountpoint(struct mountpoint *mp) { if (!--mp->m_count) { @@ -1595,11 +1572,11 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); @@ -1609,9 +1586,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -2027,8 +2004,11 @@ static int attach_recursive_mnt(struct mount *source_mnt, static struct mountpoint *lock_mount(struct path *path) { + struct mountpoint *mp = NULL; struct vfsmount *mnt; struct dentry *dentry = path->dentry; + int err; + retry: inode_lock(dentry->d_inode); if (unlikely(cant_mount(dentry))) { @@ -2037,29 +2017,60 @@ static struct mountpoint *lock_mount(struct path *path) } namespace_lock(); mnt = lookup_mnt(path); - if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); - if (IS_ERR(mp)) { + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(path->dentry->d_inode); + path_put(path); + path->mnt = mnt; + dentry = path->dentry = dget(mnt->mnt_root); + goto retry; + } + + /* + * OK, we have namespace_lock held, nothing is overmounting + * *path and inode of mountpoint to be is locked. + */ + if (likely(!d_mountpoint(dentry))) + mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + read_seqlock_excl(&mount_lock); + if (!mp && !d_mountpoint(dentry)) { + read_sequnlock_excl(&mount_lock); + mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + read_seqlock_excl(&mount_lock); + } + if (d_mountpoint(dentry)) { + kfree(mp); + mp = lookup_mountpoint(dentry); + } else { + if (unlikely(!mp)) { + read_sequnlock_excl(&mount_lock); namespace_unlock(); inode_unlock(dentry->d_inode); - return mp; + return ERR_PTR(-ENOMEM); } - return mp; + err = d_set_mounted(dentry); + if (unlikely(err)) { + kfree(mp); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); + inode_unlock(dentry->d_inode); + return ERR_PTR(err); + } + mp->m_dentry = dentry; + mp->m_count = 1; + hlist_add_head(&mp->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&mp->m_list); } - namespace_unlock(); - inode_unlock(path->dentry->d_inode); - path_put(path); - path->mnt = mnt; - dentry = path->dentry = dget(mnt->mnt_root); - goto retry; + read_sequnlock_excl(&mount_lock); + return mp; } static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); namespace_unlock(); inode_unlock(dentry->d_inode); } @@ -3137,7 +3148,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, list_del_init(&new_mnt->mnt_expire); unlock_mount_hash(); chroot_fs_refs(&root, &new); + read_seqlock_excl(&mount_lock); put_mountpoint(root_mp); + read_sequnlock_excl(&mount_lock); error = 0; out4: unlock_mount(old_mp); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html