Hi Eric, I tested both patches and I haven't found any issue. Thanks. On Wed, May 17, 2017 at 12:55:16AM -0500, Eric W. Biederman wrote: > > Andrei Vagin pointed out that time to executue propagate_umount can go > non-linear (and take a ludicrious amount of time) when the mount > propogation trees of the mounts to be unmunted by a lazy unmount > overlap. > > Make the walk of the mount propagation trees nearly linear by > remembering which mounts have already been visited, allowing > subsequent walks to detect when walking a mount propgation tree or a > subtree of a mount propgation tree would be duplicate work and to skip > them entirely. > > Walk the list of mounts whose propgatation trees need to be traversed > from the mount highest in the mount tree to mounts lower in the mount > tree so that odds are higher that the code will walk the largest trees > first, allowing later tree walks to be skipped entirely. > > Add cleanup_umount_visitation to remover the code's memory of which > mounts have been visited. > > Add the functions last_slave and skip_propagation_subtree to allow > skipping appropriate parts of the mount propagation tree without > needing to change the logic of the rest of the code. > > A script to generate overlapping mount propagation trees: > > $ cat runs.h > set -e > mount -t tmpfs zdtm /mnt > mkdir -p /mnt/1 /mnt/2 > mount -t tmpfs zdtm /mnt/1 > mount --make-shared /mnt/1 > mkdir /mnt/1/1 > > iteration=10 > if [ -n "$1" ] ; then > iteration=$1 > fi > > for i in $(seq $iteration); do > mount --bind /mnt/1/1 /mnt/1/1 > done > > mount --rbind /mnt/1 /mnt/2 > > TIMEFORMAT='%Rs' > nr=$(( ( 2 ** ( $iteration + 1 ) ) + 1 )) > echo -n "umount -l /mnt/1 -> $nr " > time umount -l /mnt/1 > > nr=$(cat /proc/self/mountinfo | grep zdtm | wc -l ) > time umount -l /mnt/2 > > $ for i in $(seq 9 19); do echo $i; unshare -Urm bash ./run.sh $i; done > > Here are the performance numbers with and without the patch: > > mhash | 8192 | 8192 | 1048576 | 1048576 > mounts | before | after | before | after > ------------------------------------------------ > 1025 | 0.040s | 0.016s | 0.038s | 0.019s > 2049 | 0.094s | 0.017s | 0.080s | 0.018s > 4097 | 0.243s | 0.019s | 0.206s | 0.023s > 8193 | 1.202s | 0.028s | 1.562s | 0.032s > 16385 | 9.635s | 0.036s | 9.952s | 0.041s > 32769 | 60.928s | 0.063s | 44.321s | 0.064s > 65537 | | 0.097s | | 0.097s > 131073 | | 0.233s | | 0.176s > 262145 | | 0.653s | | 0.344s > 524289 | | 2.305s | | 0.735s > 1048577 | | 7.107s | | 2.603s > > Andrei Vagin reports fixing the performance problem is part of the > work to fix CVE-2016-6213. > > Cc: stable@xxxxxxxxxxxxxxx > Fixes: a05964f3917c ("[PATCH] shared mounts handling: umount") > Reported-by: Andrei Vagin <avagin@xxxxxxxxxx> > Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> > --- > fs/pnode.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 62 insertions(+), 1 deletion(-) > > diff --git a/fs/pnode.c b/fs/pnode.c > index fbaca7df2eb0..53d411a371ce 100644 > --- a/fs/pnode.c > +++ b/fs/pnode.c > @@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p) > return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); > } > > +static inline struct mount *last_slave(struct mount *p) > +{ > + return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); > +} > + > static inline struct mount *next_slave(struct mount *p) > { > return list_entry(p->mnt_slave.next, struct mount, mnt_slave); > @@ -162,6 +167,19 @@ static struct mount *propagation_next(struct mount *m, > } > } > > +static struct mount *skip_propagation_subtree(struct mount *m, > + struct mount *origin) > +{ > + /* > + * Advance m such that propagation_next will not return > + * the slaves of m. > + */ > + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) > + m = last_slave(m); > + > + return m; > +} > + > static struct mount *next_group(struct mount *m, struct mount *origin) > { > while (1) { > @@ -505,6 +523,15 @@ static void restore_mounts(struct list_head *to_restore) > } > } > > +static void cleanup_umount_visitations(struct list_head *visited) > +{ > + while (!list_empty(visited)) { > + struct mount *mnt = > + list_first_entry(visited, struct mount, mnt_umounting); > + list_del_init(&mnt->mnt_umounting); > + } > +} > + > /* > * collect all mounts that receive propagation from the mount in @list, > * and return these additional mounts in the same list. > @@ -517,11 +544,23 @@ int propagate_umount(struct list_head *list) > struct mount *mnt; > LIST_HEAD(to_restore); > LIST_HEAD(to_umount); > + LIST_HEAD(visited); > > - list_for_each_entry(mnt, list, mnt_list) { > + /* Find candidates for unmounting */ > + list_for_each_entry_reverse(mnt, list, mnt_list) { > struct mount *parent = mnt->mnt_parent; > struct mount *m; > > + /* > + * If this mount has already been visited it is known that it's > + * entire peer group and all of their slaves in the propagation > + * tree for the mountpoint has already been visited and there is > + * no need to visit them again. > + */ > + if (!list_empty(&mnt->mnt_umounting)) > + continue; > + > + list_add_tail(&mnt->mnt_umounting, &visited); > for (m = propagation_next(parent, parent); m; > m = propagation_next(m, parent)) { > struct mount *child = __lookup_mnt(&m->mnt, > @@ -529,6 +568,27 @@ int propagate_umount(struct list_head *list) > if (!child) > continue; > > + if (!list_empty(&child->mnt_umounting)) { > + /* > + * If the child has already been visited it is > + * know that it's entire peer group and all of > + * their slaves in the propgation tree for the > + * mountpoint has already been visited and there > + * is no need to visit this subtree again. > + */ > + m = skip_propagation_subtree(m, parent); > + continue; > + } else if (child->mnt.mnt_flags & MNT_UMOUNT) { > + /* > + * We have come accross an partially unmounted > + * mount in list that has not been visited yet. > + * Remember it has been visited and continue > + * about our merry way. > + */ > + list_add_tail(&child->mnt_umounting, &visited); > + continue; > + } > + > /* Check the child and parents while progress is made */ > while (__propagate_umount(child, > &to_umount, &to_restore)) { > @@ -542,6 +602,7 @@ int propagate_umount(struct list_head *list) > > umount_list(&to_umount, &to_restore); > restore_mounts(&to_restore); > + cleanup_umount_visitations(&visited); > list_splice_tail(&to_umount, list); > > return 0; > -- > 2.10.1 >