On 02/16/2016 11:54 AM, Al Viro wrote:
On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
propagate_one(m) calculates "type" argument for copy_tree() like this:
if (m->mnt_group_id == last_dest->mnt_group_id) {
type = CL_MAKE_SHARED;
} else {
type = CL_SLAVE;
if (IS_MNT_SHARED(m))
type |= CL_MAKE_SHARED;
}
The "type" argument then governs clone_mnt() behavior with respect to flags
and mnt_master of new mount. When we iterate through a slave group, it is
possible that both current "m" and "last_dest" are not shared (although,
both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
above erroneously makes new mount shared and sets its mnt_master to
last_source->mnt_master. The patch fixes the problem by handling zero
mnt_group_id-s as though they are unequal.
The similar problem exists in the implementation of "else" clause above
when we have to ascend upward in the master/slave tree by calling:
last_source = last_source->mnt_master;
last_dest = last_source->mnt_parent;
proper number of times. The last step is governed by
"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
both are zero. The patch fixes this case in the same way as the former one.
Mind putting together a reproducer?
There are two files attached: reproducer1.c and reproducer2.c. The
former demonstrates the problem before applying the patch. The latter
demonstrates why the first hunk of the patch is not enough.
[root@f22ml ~]# reproducer1
main pid = 1496
monitor pid = 1497
child pid = 1498
grand-child pid = 1499
[root@f22ml ~]# grep "child" /proc/1496/mountinfo
243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1498/mountinfo
244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
rw,seclabel
[root@f22ml ~]# grep "child" /proc/1499/mountinfo
245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1497/mountinfo
246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
rw,seclabel
while expected info for 1497 would be:
246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
Now, assuming that only the first hunk of the patch is applied:
> - if (m->mnt_group_id == last_dest->mnt_group_id) {
> + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
[root@f22ml ~]# reproducer2
main pid = 1506
monitor pid = 1507
child pid = 1508
grand-child pid = 1509
[root@f22ml ~]# grep "child" /proc/1506/mountinfo
243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1508/mountinfo
244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1509/mountinfo
245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1507/mountinfo
246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
while expected info for 1507 would be:
246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
Thanks,
Maxim
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sched.h>
int main()
{
const char *child = "/tmp/child";
int ret;
printf("main pid = %d\n", getpid());
/* make our own private playground ... */
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret) {
perror("mount");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2");
exit(1);
}
/* fork monitor ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("monitor pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in monitor");
exit(1);
}
sleep(-1);
}
/* wait monitor to setup */
sleep(1);
/* fork child ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount in child");
exit(1);
}
ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2 in child");
exit(1);
}
if (!fork()) { /* grand-child */
printf("grand-child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in grand-child");
exit(1);
}
sleep(-1);
}
sleep(-1);
}
/* wait child and grand-child to setup */
sleep(1);
ret = mkdir(child, 0755);
if (ret && errno != EEXIST) {
perror("mkdir");
exit(1);
}
/* let "child" mount slip to everyone' namespaces ... */
ret = mount(child, child, NULL, MS_BIND, NULL);
if (ret) {
perror("bind mount");
exit(1);
}
sleep(-1);
}
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sched.h>
int main()
{
const char *child = "/tmp/child";
int ret;
printf("main pid = %d\n", getpid());
/* make our own private playground ... */
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret) {
perror("mount");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2");
exit(1);
}
/* fork monitor ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("monitor pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in monitor");
exit(1);
}
sleep(-1);
}
/* wait monitor to setup */
sleep(1);
/* fork child ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in child");
exit(1);
}
if (!fork()) { /* grand-child */
printf("grand-child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in grand-child");
exit(1);
}
sleep(-1);
}
sleep(-1);
}
/* wait child and grand-child to setup */
sleep(1);
ret = mkdir(child, 0755);
if (ret && errno != EEXIST) {
perror("mkdir");
exit(1);
}
/* let "child" mount slip to everyone' namespaces ... */
ret = mount(child, child, NULL, MS_BIND, NULL);
if (ret) {
perror("bind mount");
exit(1);
}
sleep(-1);
}