[PATCH -mm 1/5] New system call, unshare

From: JANAK DESAI
Date: Thu Dec 08 2005 - 17:09:10 EST



[PATCH -mm 1/5] unshare system call: System call handler function
sys_unshare

Signed-off-by: Janak Desai


fs/namespace.c | 55 +++++++++-----
include/linux/namespace.h | 1
kernel/fork.c | 175
++++++++++++++++++++++++++++++++++++++--------
3 files changed, 185 insertions(+), 46 deletions(-)


diff -Naurp 2.6.15-rc5-mm1/fs/namespace.c
2.6.15-rc5-mm1+unshare/fs/namespace.c
--- 2.6.15-rc5-mm1/fs/namespace.c 2005-12-06 21:06:14.000000000 +0000
+++ 2.6.15-rc5-mm1+unshare/fs/namespace.c 2005-12-07 15:42:03.000000000
+0000
@@ -1314,7 +1314,11 @@ dput_out:
return retval;
}

-int copy_namespace(int flags, struct task_struct *tsk)
+/*
+ * Allocate a new namespace structure and populate it with contents
+ * copied from the namespace of the passed in task structure.
+ */
+struct namespace *dup_namespace(struct task_struct *tsk)
{
struct namespace *namespace = tsk->namespace;
struct namespace *new_ns;
@@ -1322,19 +1326,6 @@ int copy_namespace(int flags, struct tas
struct fs_struct *fs = tsk->fs;
struct vfsmount *p, *q;

- if (!namespace)
- return 0;
-
- get_namespace(namespace);
-
- if (!(flags & CLONE_NEWNS))
- return 0;
-
- if (!capable(CAP_SYS_ADMIN)) {
- put_namespace(namespace);
- return -EPERM;
- }
-
new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
if (!new_ns)
goto out;
@@ -1385,8 +1376,6 @@ int copy_namespace(int flags, struct tas
}
up_write(&namespace_sem);

- tsk->namespace = new_ns;
-
if (rootmnt)
mntput(rootmnt);
if (pwdmnt)
@@ -1394,12 +1383,40 @@ int copy_namespace(int flags, struct tas
if (altrootmnt)
mntput(altrootmnt);

- put_namespace(namespace);
- return 0;
+out:
+ return new_ns;
+}
+
+int copy_namespace(int flags, struct task_struct *tsk)
+{
+ struct namespace *namespace = tsk->namespace;
+ struct namespace *new_ns;
+ int err = 0;
+
+ if (!namespace)
+ return 0;
+
+ get_namespace(namespace);
+
+ if (!(flags & CLONE_NEWNS))
+ return 0;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ new_ns = dup_namespace(tsk);
+ if (!new_ns) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tsk->namespace = new_ns;

out:
put_namespace(namespace);
- return -ENOMEM;
+ return err;
}

asmlinkage long sys_mount(char __user * dev_name, char __user *
dir_name,
diff -Naurp 2.6.15-rc5-mm1/include/linux/namespace.h
2.6.15-rc5-mm1+unshare/include/linux/namespace.h
--- 2.6.15-rc5-mm1/include/linux/namespace.h 2005-12-06
21:06:21.000000000 +0000
+++ 2.6.15-rc5-mm1+unshare/include/linux/namespace.h 2005-12-07
15:40:54.000000000 +0000
@@ -15,6 +15,7 @@ struct namespace {

extern int copy_namespace(int, struct task_struct *);
extern void __put_namespace(struct namespace *namespace);
+extern struct namespace *dup_namespace(struct task_struct *);

static inline void put_namespace(struct namespace *namespace)
{
diff -Naurp 2.6.15-rc5-mm1/kernel/fork.c
2.6.15-rc5-mm1+unshare/kernel/fork.c
--- 2.6.15-rc5-mm1/kernel/fork.c 2005-12-06 21:06:22.000000000 +0000
+++ 2.6.15-rc5-mm1+unshare/kernel/fork.c 2005-12-07 16:51:37.000000000
+0000
@@ -445,6 +445,55 @@ void mm_release(struct task_struct *tsk,
}
}

+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+ struct mm_struct *mm, *oldmm = current->mm;
+ int err;
+
+ if (!oldmm)
+ return NULL;
+
+ mm = allocate_mm();
+ if (!mm)
+ goto fail_nomem;
+
+ memcpy(mm, oldmm, sizeof(*mm));
+
+ if (!mm_init(mm))
+ goto fail_nomem;
+
+ if (init_new_context(tsk, mm))
+ goto fail_nocontext;
+
+ err = dup_mmap(mm, oldmm);
+ if (err)
+ goto free_pt;
+
+ mm->hiwater_rss = get_mm_rss(mm);
+ mm->hiwater_vm = mm->total_vm;
+
+ return mm;
+
+free_pt:
+ mmput(mm);
+
+fail_nomem:
+ return NULL;
+
+fail_nocontext:
+ /*
+ * If init_new_context() failed, we cannot use mmput() to free the mm
+ * because it calls destroy_context()
+ */
+ mm_free_pgd(mm);
+ free_mm(mm);
+ return NULL;
+}
+
static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
struct mm_struct * mm, *oldmm;
@@ -472,43 +521,17 @@ static int copy_mm(unsigned long clone_f
}

retval = -ENOMEM;
- mm = allocate_mm();
+ mm = dup_mm(tsk);
if (!mm)
goto fail_nomem;

- /* Copy the current MM stuff.. */
- memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm))
- goto fail_nomem;
-
- if (init_new_context(tsk,mm))
- goto fail_nocontext;
-
- retval = dup_mmap(mm, oldmm);
- if (retval)
- goto free_pt;
-
- mm->hiwater_rss = get_mm_rss(mm);
- mm->hiwater_vm = mm->total_vm;
-
good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
return 0;

-free_pt:
- mmput(mm);
fail_nomem:
return retval;
-
-fail_nocontext:
- /*
- * If init_new_context() failed, we cannot use mmput() to free the mm
- * because it calls destroy_context()
- */
- mm_free_pgd(mm);
- free_mm(mm);
- return retval;
}

static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -1311,3 +1334,101 @@ void __init proc_caches_init(void)
sizeof(struct mm_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
}
+
+/*
+ * Performs sanity checks on the flags passed to the unshare system
+ * call.
+ */
+static inline int check_unshare_flags(unsigned long unshare_flags)
+{
+ int err = -EINVAL;
+
+ if (unshare_flags & ~(CLONE_NEWNS | CLONE_VM))
+ goto errout;
+
+ /*
+ * Cannot unshare namespace if the fs structure is being shared
+ * through a previous call to clone()
+ */
+ if ((unshare_flags & CLONE_NEWNS) &&
+ (atomic_read(&current->fs->count) > 1))
+ goto errout;
+
+ /*
+ * Cannot unshare vm if sighnal handlers are being shared through
+ * a previous call to clone()
+ */
+ if ((unshare_flags & CLONE_VM) &&
+ (atomic_read(&current->sighand->count) > 1))
+ goto errout;
+
+ return 0;
+
+errout:
+ return err;
+
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone. copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+ int err = 0;
+ struct namespace *new_ns = NULL, *ns = current->namespace;
+ struct mm_struct *new_mm = NULL, *active_mm = NULL, *mm = current->mm;
+
+ err = check_unshare_flags(unshare_flags);
+ if (err)
+ goto unshare_out;
+
+ if ((unshare_flags & CLONE_NEWNS) &&
+ (ns && atomic_read(&ns->count) > 1)) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto unshare_out;
+
+ err = -ENOMEM;
+ new_ns = dup_namespace(current);
+ if (!new_ns)
+ goto unshare_out;
+ }
+
+ if ((unshare_flags & CLONE_VM) && (atomic_read(&mm->mm_users) > 1)) {
+ err = -ENOMEM;
+ new_mm = dup_mm(current);
+ if (!new_mm)
+ goto unshare_cleanup_ns;
+ }
+
+ if (new_ns) {
+ task_lock(current);
+ current->namespace = new_ns;
+ task_unlock(current);
+ put_namespace(ns);
+ }
+
+ if (new_mm) {
+ task_lock(current);
+ active_mm = current->active_mm;
+ current->mm = new_mm;
+ current->active_mm = new_mm;
+ activate_mm(active_mm, new_mm);
+ task_unlock(current);
+ mmput(mm);
+ }
+
+ return 0;
+
+unshare_cleanup_ns:
+ if (new_ns)
+ put_namespace(new_ns);
+
+unshare_out:
+ return err;
+}


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/