Re: [Devel] [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal

From: Andrew Vagin
Date: Thu Feb 25 2016 - 19:26:25 EST


On Tue, Feb 16, 2016 at 11:07:32PM -0800, Maxim Patlasov wrote:
> On 02/16/2016 11:54 AM, Al Viro wrote:
> >On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
> >>propagate_one(m) calculates "type" argument for copy_tree() like this:
> >>
> >>> if (m->mnt_group_id == last_dest->mnt_group_id) {
> >>> type = CL_MAKE_SHARED;
> >>> } else {
> >>> type = CL_SLAVE;
> >>> if (IS_MNT_SHARED(m))
> >>> type |= CL_MAKE_SHARED;
> >>> }
> >>The "type" argument then governs clone_mnt() behavior with respect to flags
> >>and mnt_master of new mount. When we iterate through a slave group, it is
> >>possible that both current "m" and "last_dest" are not shared (although,
> >>both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
> >>above erroneously makes new mount shared and sets its mnt_master to
> >>last_source->mnt_master. The patch fixes the problem by handling zero
> >>mnt_group_id-s as though they are unequal.
> >>
> >>The similar problem exists in the implementation of "else" clause above
> >>when we have to ascend upward in the master/slave tree by calling:
> >>
> >>> last_source = last_source->mnt_master;
> >>> last_dest = last_source->mnt_parent;
> >>proper number of times. The last step is governed by
> >>"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
> >>both are zero. The patch fixes this case in the same way as the former one.
> >Mind putting together a reproducer?
>
> There are two files attached: reproducer1.c and reproducer2.c. The former
> demonstrates the problem before applying the patch. The latter demonstrates
> why the first hunk of the patch is not enough.
>
> [root@f22ml ~]# reproducer1
> main pid = 1496
> monitor pid = 1497
> child pid = 1498
> grand-child pid = 1499
>
> [root@f22ml ~]# grep "child" /proc/1496/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1498/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
> rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1499/mountinfo
> 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1497/mountinfo
> 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
> rw,seclabel
>
> while expected info for 1497 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>

Here is a simpler reproducer without additional namespaces and
processes.

[root@fc22-vm tmp]# cat test.sh
set -e
d=`pwd`
mount -t tmpfs test $d
cd $d
mkdir root
mount -t tmpfs root root
mount --make-shared root
mkdir monitor
mount --bind root monitor/
mount --make-slave monitor/
mkdir child
mount --bind root child/
mount --make-slave child
mount --make-shared child/
mkdir grand_child
mount --bind child grand_child
mount --make-slave grand_child
mkdir root/test
mount --bind root/test root/test

cat /proc/self/mountinfo | grep $d
echo ---
cat /proc/self/mountinfo | grep monitor/test | grep shared && echo FAIL || echo PASS
exit

[root@fc22-vm tmp]# bash test.sh
80 61 0:41 / /root/tmp rw,relatime shared:32 - tmpfs test rw
82 80 0:42 / /root/tmp/root rw,relatime shared:33 - tmpfs root rw
84 80 0:42 / /root/tmp/monitor rw,relatime master:33 - tmpfs root rw
86 80 0:42 / /root/tmp/child rw,relatime shared:34 master:33 - tmpfs root rw
88 80 0:42 / /root/tmp/grand_child rw,relatime master:34 - tmpfs root rw
90 82 0:42 /test /root/tmp/root/test rw,relatime shared:33 - tmpfs root rw
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
92 88 0:42 /test /root/tmp/grand_child/test rw,relatime master:35 - tmpfs root rw
91 86 0:42 /test /root/tmp/child/test rw,relatime shared:35 master:33 - tmpfs root rw
---
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
FAIL

> Now, assuming that only the first hunk of the patch is applied:
>
> > - if (m->mnt_group_id == last_dest->mnt_group_id) {
> > + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
>
> [root@f22ml ~]# reproducer2
> main pid = 1506
> monitor pid = 1507
> child pid = 1508
> grand-child pid = 1509
>
> [root@f22ml ~]# grep "child" /proc/1506/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1508/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1509/mountinfo
> 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1507/mountinfo
> 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
>
> while expected info for 1507 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>
> Thanks,
> Maxim

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
>
> int main()
> {
> const char *child = "/tmp/child";
> int ret;
>
> printf("main pid = %d\n", getpid());
>
> /* make our own private playground ... */
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> if (ret) {
> perror("mount");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2");
> exit(1);
> }
>
> /* fork monitor ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("monitor pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in monitor");
> exit(1);
> }
>
> sleep(-1);
> }
>
> /* wait monitor to setup */
> sleep(1);
>
>
> /* fork child ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("child pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount in child");
> exit(1);
> }
> ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2 in child");
> exit(1);
> }
>
> if (!fork()) { /* grand-child */
> printf("grand-child pid = %d\n", getpid());
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in grand-child");
> exit(1);
> }
>
> sleep(-1);
> }
>
> sleep(-1);
> }
>
> /* wait child and grand-child to setup */
> sleep(1);
>
> ret = mkdir(child, 0755);
> if (ret && errno != EEXIST) {
> perror("mkdir");
> exit(1);
> }
>
> /* let "child" mount slip to everyone' namespaces ... */
> ret = mount(child, child, NULL, MS_BIND, NULL);
> if (ret) {
> perror("bind mount");
> exit(1);
> }
>
> sleep(-1);
> }
>

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
>
> int main()
> {
> const char *child = "/tmp/child";
> int ret;
>
> printf("main pid = %d\n", getpid());
>
> /* make our own private playground ... */
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> if (ret) {
> perror("mount");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2");
> exit(1);
> }
>
> /* fork monitor ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("monitor pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in monitor");
> exit(1);
> }
>
> sleep(-1);
> }
>
> /* wait monitor to setup */
> sleep(1);
>
>
> /* fork child ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("child pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in child");
> exit(1);
> }
>
> if (!fork()) { /* grand-child */
> printf("grand-child pid = %d\n", getpid());
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in grand-child");
> exit(1);
> }
>
> sleep(-1);
> }
>
> sleep(-1);
> }
>
> /* wait child and grand-child to setup */
> sleep(1);
>
> ret = mkdir(child, 0755);
> if (ret && errno != EEXIST) {
> perror("mkdir");
> exit(1);
> }
>
> /* let "child" mount slip to everyone' namespaces ... */
> ret = mount(child, child, NULL, MS_BIND, NULL);
> if (ret) {
> perror("bind mount");
> exit(1);
> }
>
> sleep(-1);
> }
>

> _______________________________________________
> Devel mailing list
> Devel@xxxxxxxxxx
> https://lists.openvz.org/mailman/listinfo/devel