Re: [RFC][PATCH] rbind across namespaces

From: Ram
Date: Mon May 23 2005 - 02:28:47 EST


On Sun, 2005-05-22 at 22:08, Miklos Szeredi wrote:
> > The patch failed rbinds in some cases. Fixed it. The enclosed patch
> > has a high chance of being bug free.
>
> Except not setting mnt_namespace corretly, it does seem to be ok.

yes done. enclosed the patch.

But seems like this patch opens up questions like:

Should mounts/umounts/remounts/pivot_root in foreign namespaces
be allowed? Probably check_mnt() can simply go, and the correct
semaphores have to be held in each of the functions.

RP

>
> Miklos
Summary:

Allows rbinds across any two namespaces. NOTE: currenly bind from foriegn
namespace to current namespace is allowed. This patch now allows:

1. binds/rbinds from any namespace to any other namespace, under the assumption
that if a process has access to a namespace, it ought to have permission to
manipulate that namespace.
2. bind/rbind to detached mounts are disallowed.
3. rbind from detached mount is treated as bind.

The patch incorporates ideas from Miklos and Jamie, and is dependent on
Miklos's "fix race in mark_mounts_for_expiry" patch to function correctly. Also
it depends on Miklos's "fix bind mount from foreign namespace" patch, because
without that patch umounts would fail.

--- /home/linux/views/linux-2.6.12-rc4/fs/namespace.c 2005-05-06 23:22:29.000000000 -0700
+++ linux-2.6.12-rc4/fs/namespace.c 2005-05-23 00:20:39.000000000 -0700
@@ -148,7 +148,7 @@ static struct vfsmount *next_mnt(struct
}

static struct vfsmount *
-clone_mnt(struct vfsmount *old, struct dentry *root)
+clone_mnt(struct vfsmount *old, struct dentry *root, struct namespace *ns)
{
struct super_block *sb = old->mnt_sb;
struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
@@ -160,7 +160,7 @@ clone_mnt(struct vfsmount *old, struct d
mnt->mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
- mnt->mnt_namespace = old->mnt_namespace;
+ mnt->mnt_namespace = ns;

/* stick the duplicate mount on the same expiry list
* as the original if that was on one */
@@ -533,13 +533,14 @@ lives_below_in_same_fs(struct dentry *d,
}
}

-static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
+static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
+ struct namespace *ns)
{
struct vfsmount *res, *p, *q, *r, *s;
struct list_head *h;
struct nameidata nd;

- res = q = clone_mnt(mnt, dentry);
+ res = q = clone_mnt(mnt, dentry, ns);
if (!q)
goto Enomem;
q->mnt_mountpoint = mnt->mnt_mountpoint;
@@ -558,7 +559,7 @@ static struct vfsmount *copy_tree(struct
p = s;
nd.mnt = q;
nd.dentry = p->mnt_mountpoint;
- q = clone_mnt(p, p->mnt_root);
+ q = clone_mnt(p, p->mnt_root, ns);
if (!q)
goto Enomem;
spin_lock(&vfsmount_lock);
@@ -616,11 +617,14 @@ out_unlock:
}

/*
- * do loopback mount.
+ * do loopback mount. The loopback mount can be done from any namespace to any
+ * other namespace including the current namespace, as long as the task acquired
+ * rights to manipulate them.
*/
static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
{
struct nameidata old_nd;
+ struct namespace *mntpt_ns, *old_ns;
struct vfsmount *mnt = NULL;
int err = mount_is_safe(nd);
if (err)
@@ -631,16 +635,58 @@ static int do_loopback(struct nameidata
if (err)
return err;

- down_write(&current->namespace->sem);
err = -EINVAL;
- if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
- err = -ENOMEM;
- if (recurse)
- mnt = copy_tree(old_nd.mnt, old_nd.dentry);
- else
- mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+ spin_lock(&vfsmount_lock);
+
+ /*
+ * Disallow bind/rbind mounts to detached mounts and treat rbind from
+ * detached mount as normal bind.
+ */
+ mntpt_ns = nd->mnt->mnt_namespace;
+ old_ns = old_nd.mnt->mnt_namespace;
+ if (!mntpt_ns || !mntpt_ns->root) {
+ spin_unlock(&vfsmount_lock);
+ goto out;
}

+ if (!old_ns || !old_ns->root)
+ recurse = 0;
+
+ get_namespace(mntpt_ns);
+ if (recurse && old_ns != mntpt_ns)
+ get_namespace(old_ns);
+
+ spin_unlock(&vfsmount_lock);
+
+ /*
+ * make sure we don't race with some other thread manipulating the
+ * namespaces.
+ */
+ if (recurse && (old_ns < mntpt_ns))
+ down_write(&old_ns->sem);
+ down_write(&mntpt_ns->sem);
+ if (recurse && (old_ns > mntpt_ns))
+ down_write(&old_ns->sem);
+
+
+ /*
+ * well... mounts might have detached while we acquired
+ * the semaphores. Revalidate that the destination mount
+ * is still attached.
+ */
+ if (!nd->mnt->mnt_namespace)
+ goto error_out;
+
+ /*
+ * and if the source mount is detached, than just do
+ * a bind, instead of a rbind
+ */
+ err = -ENOMEM;
+ if (recurse && old_nd.mnt->mnt_namespace)
+ mnt = copy_tree(old_nd.mnt, old_nd.dentry, mntpt_ns);
+ else
+ mnt = clone_mnt(old_nd.mnt, old_nd.dentry, mntpt_ns);
+
if (mnt) {
/* stop bind mounts from expiring */
spin_lock(&vfsmount_lock);
@@ -656,7 +702,15 @@ static int do_loopback(struct nameidata
mntput(mnt);
}

- up_write(&current->namespace->sem);
+error_out:
+ up_write(&mntpt_ns->sem);
+ if (recurse && (old_ns != mntpt_ns)) {
+ up_write(&old_ns->sem);
+ put_namespace(old_ns);
+ }
+ put_namespace(mntpt_ns);
+
+out:
path_release(&old_nd);
return err;
}
@@ -1090,7 +1144,8 @@ int copy_namespace(int flags, struct tas

down_write(&tsk->namespace->sem);
/* First pass: copy the tree topology */
- new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
+ new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
+ new_ns);
if (!new_ns->root) {
up_write(&tsk->namespace->sem);
kfree(new_ns);
@@ -1108,7 +1163,6 @@ int copy_namespace(int flags, struct tas
p = namespace->root;
q = new_ns->root;
while (p) {
- q->mnt_namespace = new_ns;
if (fs) {
if (p == fs->rootmnt) {
rootmnt = p;