On Tue, Feb 16, 2016 at 11:07:32PM -0800, Maxim Patlasov wrote: > On 02/16/2016 11:54 AM, Al Viro wrote: > >On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote: > >>propagate_one(m) calculates "type" argument for copy_tree() like this: > >> > >>> if (m->mnt_group_id == last_dest->mnt_group_id) { > >>> type = CL_MAKE_SHARED; > >>> } else { > >>> type = CL_SLAVE; > >>> if (IS_MNT_SHARED(m)) > >>> type |= CL_MAKE_SHARED; > >>> } > >>The "type" argument then governs clone_mnt() behavior with respect to flags > >>and mnt_master of new mount. When we iterate through a slave group, it is > >>possible that both current "m" and "last_dest" are not shared (although, > >>both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison > >>above erroneously makes new mount shared and sets its mnt_master to > >>last_source->mnt_master. The patch fixes the problem by handling zero > >>mnt_group_id-s as though they are unequal. > >> > >>The similar problem exists in the implementation of "else" clause above > >>when we have to ascend upward in the master/slave tree by calling: > >> > >>> last_source = last_source->mnt_master; > >>> last_dest = last_source->mnt_parent; > >>proper number of times. The last step is governed by > >>"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if > >>both are zero. The patch fixes this case in the same way as the former one. > >Mind putting together a reproducer? > > There are two files attached: reproducer1.c and reproducer2.c. The former > demonstrates the problem before applying the patch. The latter demonstrates > why the first hunk of the patch is not enough. > > [root@f22ml ~]# reproducer1 > main pid = 1496 > monitor pid = 1497 > child pid = 1498 > grand-child pid = 1499 > > [root@f22ml ~]# grep "child" /proc/1496/mountinfo > 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel > [root@f22ml ~]# grep "child" /proc/1498/mountinfo > 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs > rw,seclabel > [root@f22ml ~]# grep "child" /proc/1499/mountinfo > 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel > [root@f22ml ~]# grep "child" /proc/1497/mountinfo > 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs > rw,seclabel > > while expected info for 1497 would be: > 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel > Here is a simpler reproducer without additional namespaces and processes. [root@fc22-vm tmp]# cat test.sh set -e d=`pwd` mount -t tmpfs test $d cd $d mkdir root mount -t tmpfs root root mount --make-shared root mkdir monitor mount --bind root monitor/ mount --make-slave monitor/ mkdir child mount --bind root child/ mount --make-slave child mount --make-shared child/ mkdir grand_child mount --bind child grand_child mount --make-slave grand_child mkdir root/test mount --bind root/test root/test cat /proc/self/mountinfo | grep $d echo --- cat /proc/self/mountinfo | grep monitor/test | grep shared && echo FAIL || echo PASS exit [root@fc22-vm tmp]# bash test.sh 80 61 0:41 / /root/tmp rw,relatime shared:32 - tmpfs test rw 82 80 0:42 / /root/tmp/root rw,relatime shared:33 - tmpfs root rw 84 80 0:42 / /root/tmp/monitor rw,relatime master:33 - tmpfs root rw 86 80 0:42 / /root/tmp/child rw,relatime shared:34 master:33 - tmpfs root rw 88 80 0:42 / /root/tmp/grand_child rw,relatime master:34 - tmpfs root rw 90 82 0:42 /test /root/tmp/root/test rw,relatime shared:33 - tmpfs root rw 94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw 92 88 0:42 /test /root/tmp/grand_child/test rw,relatime master:35 - tmpfs root rw 91 86 0:42 /test /root/tmp/child/test rw,relatime shared:35 master:33 - tmpfs root rw --- 94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw FAIL > Now, assuming that only the first hunk of the patch is applied: > > > - if (m->mnt_group_id == last_dest->mnt_group_id) { > > + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) { > > [root@f22ml ~]# reproducer2 > main pid = 1506 > monitor pid = 1507 > child pid = 1508 > grand-child pid = 1509 > > [root@f22ml ~]# grep "child" /proc/1506/mountinfo > 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel > [root@f22ml ~]# grep "child" /proc/1508/mountinfo > 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel > [root@f22ml ~]# grep "child" /proc/1509/mountinfo > 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel > [root@f22ml ~]# grep "child" /proc/1507/mountinfo > 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel > > while expected info for 1507 would be: > 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel > > Thanks, > Maxim > #define _GNU_SOURCE > #include <stdio.h> > #include <unistd.h> > #include <stdlib.h> > #include <sys/stat.h> > #include <sys/types.h> > #include <errno.h> > #include <sys/mount.h> > #include <sys/syscall.h> > #include <sched.h> > > int main() > { > const char *child = "/tmp/child"; > int ret; > > printf("main pid = %d\n", getpid()); > > /* make our own private playground ... */ > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); > if (ret) { > perror("mount"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount2"); > exit(1); > } > > /* fork monitor ... */ > ret = fork(); > if (ret < 0) { > perror("fork"); > exit(1); > } else if (!ret) { > printf("monitor pid = %d\n", getpid()); > > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in monitor"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount in monitor"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); > if (ret) { > perror("mount2 in monitor"); > exit(1); > } > > sleep(-1); > } > > /* wait monitor to setup */ > sleep(1); > > > /* fork child ... */ > ret = fork(); > if (ret < 0) { > perror("fork"); > exit(1); > } else if (!ret) { > printf("child pid = %d\n", getpid()); > > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in child"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); > if (ret) { > perror("mount in child"); > exit(1); > } > ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount2 in child"); > exit(1); > } > > if (!fork()) { /* grand-child */ > printf("grand-child pid = %d\n", getpid()); > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in grand-child"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount in grand-child"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); > if (ret) { > perror("mount2 in grand-child"); > exit(1); > } > > sleep(-1); > } > > sleep(-1); > } > > /* wait child and grand-child to setup */ > sleep(1); > > ret = mkdir(child, 0755); > if (ret && errno != EEXIST) { > perror("mkdir"); > exit(1); > } > > /* let "child" mount slip to everyone' namespaces ... */ > ret = mount(child, child, NULL, MS_BIND, NULL); > if (ret) { > perror("bind mount"); > exit(1); > } > > sleep(-1); > } > > #define _GNU_SOURCE > #include <stdio.h> > #include <unistd.h> > #include <stdlib.h> > #include <sys/stat.h> > #include <sys/types.h> > #include <errno.h> > #include <sys/mount.h> > #include <sys/syscall.h> > #include <sched.h> > > int main() > { > const char *child = "/tmp/child"; > int ret; > > printf("main pid = %d\n", getpid()); > > /* make our own private playground ... */ > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); > if (ret) { > perror("mount"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount2"); > exit(1); > } > > /* fork monitor ... */ > ret = fork(); > if (ret < 0) { > perror("fork"); > exit(1); > } else if (!ret) { > printf("monitor pid = %d\n", getpid()); > > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in monitor"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount in monitor"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); > if (ret) { > perror("mount2 in monitor"); > exit(1); > } > > sleep(-1); > } > > /* wait monitor to setup */ > sleep(1); > > > /* fork child ... */ > ret = fork(); > if (ret < 0) { > perror("fork"); > exit(1); > } else if (!ret) { > printf("child pid = %d\n", getpid()); > > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in child"); > exit(1); > } > > if (!fork()) { /* grand-child */ > printf("grand-child pid = %d\n", getpid()); > ret = unshare(CLONE_NEWNS); > if (ret) { > perror("unshare in grand-child"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); > if (ret) { > perror("mount in grand-child"); > exit(1); > } > > ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); > if (ret) { > perror("mount2 in grand-child"); > exit(1); > } > > sleep(-1); > } > > sleep(-1); > } > > /* wait child and grand-child to setup */ > sleep(1); > > ret = mkdir(child, 0755); > if (ret && errno != EEXIST) { > perror("mkdir"); > exit(1); > } > > /* let "child" mount slip to everyone' namespaces ... */ > ret = mount(child, child, NULL, MS_BIND, NULL); > if (ret) { > perror("bind mount"); > exit(1); > } > > sleep(-1); > } > > _______________________________________________ > Devel mailing list > Devel@xxxxxxxxxx > https://lists.openvz.org/mailman/listinfo/devel -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html