Overlayfs is using clone_private_mount() to create internal mounts for underlying layers. These are used for operations requiring a path, such as dentry_open(). Since these private mounts are not in any namespace they are treated as short term, "detached" mounts and mntput() involves taking the global mount_lock, which can result in serious cacheline pingpong. Make these private mounts longterm instead, which trade the penalty on mntput() for a slightly longer shutdown time due to an added RCU grace period when putting these mounts. Introduce a new helper kern_unmount_many() that can take care of multiple longterm mounts with a single RCU grace period. Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> --- fs/namespace.c | 16 ++++++++++++++++ fs/overlayfs/super.c | 19 ++++++++++++++----- include/linux/mount.h | 2 ++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a28e4db075ed..5d16d87b6b8b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); + /* Longterm mount to be removed by kern_unmount*() */ + new_mnt->mnt_ns = MNT_NS_INTERNAL; + return &new_mnt->mnt; } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt) } EXPORT_SYMBOL(kern_unmount); +void kern_unmount_many(struct vfsmount *mnt[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + if (mnt[i]) + real_mount(mnt[i])->mnt_ns = NULL; + synchronize_rcu_expedited(); + for (i = 0; i < num; i++) + mntput(mnt[i]); +} +EXPORT_SYMBOL(kern_unmount_many); + bool our_mnt(struct vfsmount *mnt) { return check_mnt(real_mount(mnt)); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 60dfb27bc12b..a938dd2521b2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -225,12 +225,21 @@ static void ovl_free_fs(struct ovl_fs *ofs) dput(ofs->workbasedir); if (ofs->upperdir_locked) ovl_inuse_unlock(ofs->upper_mnt->mnt_root); - mntput(ofs->upper_mnt); - for (i = 1; i < ofs->numlayer; i++) { - iput(ofs->layers[i].trap); - mntput(ofs->layers[i].mnt); + + if (!ofs->layers) { + /* Deal with partial setup */ + kern_unmount(ofs->upper_mnt); + } else { + /* Hack! Reuse ofs->layers as a mounts array */ + struct vfsmount **mounts = (struct vfsmount **) ofs->layers; + + for (i = 0; i < ofs->numlayer; i++) { + iput(ofs->layers[i].trap); + mounts[i] = ofs->layers[i].mnt; + } + kern_unmount_many(mounts, ofs->numlayer); + kfree(ofs->layers); } - kfree(ofs->layers); for (i = 0; i < ofs->numfs; i++) free_anon_bdev(ofs->fs[i].pseudo_dev); kfree(ofs->fs); diff --git a/include/linux/mount.h b/include/linux/mount.h index bf8cc4108b8f..e3e994bfcecb 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max; extern bool path_is_mountpoint(const struct path *path); +extern void kern_unmount_many(struct vfsmount *mnt[], unsigned int num); + #endif /* _LINUX_MOUNT_H */ -- 2.21.1