On Wed, May 17, 2017 at 12:54:34AM -0500, Eric W. Biederman wrote: > > While investigating some poor umount performance I realized that in > the case of overlapping mount trees where some of the mounts are locked > the code has been failing to unmount all of the mounts it should > have been unmounting. > > This failure to unmount all of the necessary > mounts can be reproduced with: > > $ cat locked_mounts_test.sh > > mount -t tmpfs test-base /mnt > mount --make-shared /mnt > mkdir -p /mnt/b > > mount -t tmpfs test1 /mnt/b > mount --make-shared /mnt/b > mkdir -p /mnt/b/10 > > mount -t tmpfs test2 /mnt/b/10 > mount --make-shared /mnt/b/10 > mkdir -p /mnt/b/10/20 > > mount --rbind /mnt/b /mnt/b/10/20 > > unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi' > sleep 1 > umount -l /mnt/b > wait %% > > $ unshare -Urm ./locked_mounts_test.sh > > This failure is corrected by removing the prepass that marks mounts > that may be umounted. > > A first pass is added that umounts mounts if possible and if not sets > mount mark if they could be unmounted if they weren't locked and adds > them to a list to umount possibilities. This first pass reconsiders > the mounts parent if it is on the list of umount possibilities, ensuring > that information of umoutability will pass from child to mount parent. > > A second pass then walks through all mounts that are umounted and processes > their children unmounting them or marking them for reparenting. > > A last pass cleans up the state on the mounts that could not be umounted > and if applicable reparents them to their first parent that remained > mounted. > > While a bit longer than the old code this code is much more robust > as it allows information to flow up from the leaves and down > from the trunk making the order in which mounts are encountered > in the umount propgation tree irrelevant. Eric, I think we can accomplish what you want in a much simpler way. Would the patch below; UNTESTED BUT COMPILED, resolve your issue? Its a two pass unmount. First pass marks mounts that can be unmounted, and second pass does the neccessary unlinks. It does mark TUCKED mounts, and uses that information to peel off the correct mounts. Key points are a) a tucked mount never entertain any unmount propagation on its root dentry. b) when the child on the root dentry of a tucked mount is unmounted, the tucked mount is not a tucked mount anymore. c) if the child is a tucked mount, than its child is reparented to the parent. Signed-off-by: "Ram Pai" <linuxram@xxxxxxxxxx> fs/namespace.c | 4 ++- fs/pnode.c | 53 +++++++++++++++++++++++++++++++++++++------------- fs/pnode.h | 3 ++ include/linux/mount.h | 1 diff --git a/fs/namespace.c b/fs/namespace.c index cc1375ef..ff3ec90 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2050,8 +2050,10 @@ static int attach_recursive_mnt(struct mount *source_mnt, hlist_del_init(&child->mnt_hash); q = __lookup_mnt(&child->mnt_parent->mnt, child->mnt_mountpoint); - if (q) + if (q) { mnt_change_mountpoint(child, smp, q); + SET_MNT_TUCKED(child); + } commit_tree(child); } put_mountpoint(smp); diff --git a/fs/pnode.c b/fs/pnode.c index 5bc7896..b44a544 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -448,31 +448,58 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *topper; - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint); - /* - * umount the child only if the child has no children - * and the child is marked safe to unmount. + struct mount *topper, *child; + + /* Tucked mount must drop umount propagation events on + * its **root dentry**. + * The tucked mount did not exist when that child came + * into existence. It never received that mount propagation. + * Hence it should never entertain the umount propagation + * aswell. */ + if (IS_MNT_TUCKED(m) && list_is_singular(&mnt->mnt_mounts)) + continue; + + + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); + if (!child || !IS_MNT_MARKED(child)) continue; + CLEAR_MNT_MARK(child); - /* If there is exactly one mount covering all of child - * replace child with that mount. - */ - topper = find_topper(child); - if (topper) - mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, - topper); + if (IS_MNT_TUCKED(child) && + (list_is_singular(&child->mnt_mounts))) { + topper = find_topper(child); + if (topper) { + mnt_change_mountpoint(child->mnt_parent, + child->mnt_mp, topper); + CLEAR_MNT_TUCKED(child); /*lets be precise*/ + } + } if (list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } +#if 0 + else { + mntput(child); /* mark it for deletion. It will + be deleted whenever it looses + all its remaining references. + TODO: some more thought + needed, please validate */ + } +#endif } + + /* + * This explicit umount operation is exposing the parent. + * In case the parent was a 'tucked' mount, it cannot be so + * anymore. + */ + CLEAR_MNT_TUCKED(parent); } /* diff --git a/fs/pnode.h b/fs/pnode.h index dc87e65..9ebd1a8 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -18,8 +18,11 @@ #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) +#define SET_MNT_TUCKED(m) ((m)->mnt.mnt_flags |= MNT_TUCKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) +#define CLEAR_MNT_TUCKED(m) ((m)->mnt.mnt_flags &= ~MNT_TUCKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) +#define IS_MNT_TUCKED(m) ((m)->mnt.mnt_flags & MNT_TUCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 diff --git a/include/linux/mount.h b/include/linux/mount.h index 8e0352a..41674e7 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -62,6 +62,7 @@ #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 +#define MNT_TUCKED 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */