[PATCH] Add supplementary UIDs, and getusers/setusers system calls

From: Josh Triplett
Date: Sun Nov 16 2014 - 02:09:00 EST


Analogous to the supplementary GID list, the supplementary UID list
provides a set of additional user credentials that a process can act as.
A process with CAP_SETUID can set its UID list arbitrarily; a process
without CAP_SETUID can only reduce its UID list.

This allows each user to have a set of UIDs that they can then use to
further sandbox individual child processes without first escalating to
root to change UIDs. For instance, a PAM module could give each user a
block of UIDs to work with.

Tested via the following test program:

#include <err.h>
#include <stdio.h>
#include <sys/syscall.h>
#include <unistd.h>

static int getusers(int count, uid_t *uids)
{
return syscall(322, count, uids);
}

static int setusers(int count, const uid_t *uids)
{
return syscall(323, count, uids);
}

static void show_users(void)
{
uid_t uids[65536];
int i, count = getusers(65536, uids);
if (count < 0)
err(1, "getusers");
printf("UIDs:");
for (i = 0; i < count; i++)
printf(" %u", (unsigned)uids[i]);
printf("\n");
}

int main(void)
{
uid_t list1[] = { 1, 2, 3, 4, 5 };
uid_t list2[] = { 1, 2, 3, 4 };
uid_t list3[] = { 2, 3, 4 };
show_users();
if (setusers(5, list1) < 0)
err(1, "setusers 1");
show_users();
if (setresgid(1, 1, 1) < 0)
err(1, "setresgid");
if (setresuid(1, 1, 1) < 0)
err(1, "setresuid");
if (setusers(4, list2) < 0)
err(1, "setusers 2");
show_users();
if (setusers(3, list3) < 0)
err(1, "setusers 3");
show_users();
if (setusers(4, list2) < 0)
err(1, "setusers 4");
show_users();
if (setresuid(2, 2, 2) < 0)
err(1, "setresuid 2");
if (setusers(5, list1) < 0)
err(1, "setusers 5");
show_users();

return 0;
}

In this test, all but the last call to setusers succeeds; the last call
fails with EPERM because the unprivileged process attempts to add UID 5
to the supplementary UID list, which it does not currently have.

Signed-off-by: Josh Triplett <josh@xxxxxxxxxxxxxxxx>
---
arch/x86/syscalls/syscall_32.tbl | 2 +
arch/x86/syscalls/syscall_64.tbl | 2 +
include/linux/cred.h | 66 +++++++++++++++
include/linux/syscalls.h | 2 +
include/uapi/asm-generic/unistd.h | 6 +-
include/uapi/linux/limits.h | 1 +
init/Kconfig | 9 ++
kernel/cred.c | 4 +
kernel/groups.c | 173 ++++++++++++++++++++++++++++++++++++++
kernel/sys.c | 21 +++--
kernel/sys_ni.c | 2 +
11 files changed, 280 insertions(+), 8 deletions(-)

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 9fe1b5d..55717d7 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -364,3 +364,5 @@
355 i386 getrandom sys_getrandom
356 i386 memfd_create sys_memfd_create
357 i386 bpf sys_bpf
+358 i386 getusers sys_getusers
+359 i386 setusers sys_setusers
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 281150b..5572e67 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -328,6 +328,8 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
+322 common getusers sys_getusers
+323 common setusers sys_setusers

#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820..31169fe 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -76,6 +76,8 @@ extern int groups_search(const struct group_info *, kgid_t);
extern int in_group_p(kgid_t);
extern int in_egroup_p(kgid_t);

+struct user_info;
+
/*
* The security context of a task
*
@@ -135,6 +137,12 @@ struct cred {
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+ struct user_info *user_info; /* supplementary users */
+#define INIT_USER_INFO .user_info = &init_users,
+#else
+#define INIT_USER_INFO
+#endif
struct rcu_head rcu; /* RCU deletion hook */
};

@@ -381,4 +389,62 @@ do { \
*(_fsgid) = __cred->fsgid; \
} while(0)

+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+struct user_info {
+ atomic_t usage;
+ int nusers;
+ int nblocks;
+ kuid_t small_block[NGROUPS_SMALL];
+ kuid_t *blocks[0];
+};
+
+#define USER_AT(ui, i) GROUP_AT(ui, i)
+extern struct user_info init_users;
+void users_free(struct user_info *);
+bool has_supplementary_uid(kuid_t);
+
+/**
+ * get_user_info - Get a reference to a user_info structure
+ * @user_info: The user_info to reference
+ *
+ * This gets a reference to a set of supplementary users.
+ *
+ * If the caller is accessing a task's credentials, they must hold the RCU read
+ * lock when reading.
+ */
+static inline struct user_info *get_user_info(struct user_info *ui)
+{
+ atomic_inc(&ui->usage);
+ return ui;
+}
+
+static inline void get_cred_user_info(struct cred *cred)
+{
+ get_user_info(cred->user_info);
+}
+
+/**
+ * put_user_info - Release a reference to a user_info structure
+ * @user_info: The user_info to release
+ */
+static inline void put_user_info(struct user_info *ui)
+{
+ if (atomic_dec_and_test(&ui->usage))
+ users_free(ui);
+}
+
+static inline void put_cred_user_info(struct cred *cred)
+{
+ if (cred->user_info)
+ put_user_info(cred->user_info);
+}
+#else /* CONFIG_SUPPLEMENTARY_UIDS */
+static inline bool has_supplementary_uid(kuid_t uid)
+{
+ return false;
+}
+static inline void get_cred_user_info(struct cred *cred) {}
+static inline void put_cred_user_info(struct cred *cred) {}
+#endif
+
#endif /* _LINUX_CRED_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bda9b81..3bde665 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -232,6 +232,7 @@ asmlinkage long sys_getpgid(pid_t pid);
asmlinkage long sys_getpgrp(void);
asmlinkage long sys_getsid(pid_t pid);
asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist);
+asmlinkage long sys_getusers(int uidsetsize, uid_t __user *userlist);

asmlinkage long sys_setregid(gid_t rgid, gid_t egid);
asmlinkage long sys_setgid(gid_t gid);
@@ -244,6 +245,7 @@ asmlinkage long sys_setfsgid(gid_t gid);
asmlinkage long sys_setpgid(pid_t pid, pid_t pgid);
asmlinkage long sys_setsid(void);
asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist);
+asmlinkage long sys_setusers(int uidsetsize, uid_t __user *grouplist);

asmlinkage long sys_acct(const char __user *name);
asmlinkage long sys_capget(cap_user_header_t header,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c1..d6696cf 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -707,9 +707,13 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
__SYSCALL(__NR_memfd_create, sys_memfd_create)
#define __NR_bpf 280
__SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_getusers 281
+__SYSCALL(__NR_getusers, sys_getusers)
+#define __NR_setusers 282
+__SYSCALL(__NR_setusers, sys_setusers)

#undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 283

/*
* All syscalls below here should go away really,
diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h
index 2d0f941..bae1b4c 100644
--- a/include/uapi/linux/limits.h
+++ b/include/uapi/linux/limits.h
@@ -4,6 +4,7 @@
#define NR_OPEN 1024

#define NGROUPS_MAX 65536 /* supplemental group IDs are available */
+#define NUSERS_MAX 65536 /* supplemental user IDs available */
#define ARG_MAX 131072 /* # bytes of args + environ for exec() */
#define LINK_MAX 127 /* # links a file may have */
#define MAX_CANON 255 /* size of the canonical input queue */
diff --git a/init/Kconfig b/init/Kconfig
index 3ee28ae..d85b159 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1358,6 +1358,15 @@ config UID16
help
This enables the legacy 16-bit UID syscall wrappers.

+config SUPPLEMENTARY_UIDS
+ bool "Enable supplementary UIDs and system calls" if EXPERT
+ default y
+ help
+ This option adds a list of supplementary UIDs to each process, along
+ with system calls to manage that list. If building an embedded
+ system where no applications use this functionality, you can disable
+ this option to save space.
+
config SGETMASK_SYSCALL
bool "sgetmask/ssetmask syscalls support" if EXPERT
def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH
diff --git a/kernel/cred.c b/kernel/cred.c
index e0573a4..1700a03 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -54,6 +54,7 @@ struct cred init_cred = {
.user = INIT_USER,
.user_ns = &init_user_ns,
.group_info = &init_groups,
+ INIT_USER_INFO
};

static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -112,6 +113,7 @@ static void put_cred_rcu(struct rcu_head *rcu)
key_put(cred->request_key_auth);
if (cred->group_info)
put_group_info(cred->group_info);
+ put_cred_user_info(cred);
free_uid(cred->user);
put_user_ns(cred->user_ns);
kmem_cache_free(cred_jar, cred);
@@ -252,6 +254,7 @@ struct cred *prepare_creds(void)
atomic_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_group_info(new->group_info);
+ get_cred_user_info(new);
get_uid(new->user);
get_user_ns(new->user_ns);

@@ -607,6 +610,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
get_uid(new->user);
get_user_ns(new->user_ns);
get_group_info(new->group_info);
+ get_cred_user_info(new);

#ifdef CONFIG_KEYS
new->session_keyring = NULL;
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f..d5de27d 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -269,3 +269,176 @@ int in_egroup_p(kgid_t grp)
}

EXPORT_SYMBOL(in_egroup_p);
+
+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+/* init to 2 - one for init_task, one to ensure it is never freed */
+struct user_info init_users = { .usage = ATOMIC_INIT(2) };
+
+static struct user_info *users_alloc(int uidsetsize)
+{
+ return (struct user_info *)groups_alloc(uidsetsize);
+}
+
+void users_free(struct user_info *user_info)
+{
+ groups_free((struct group_info *)user_info);
+}
+
+/* export the user_info to a user-space array */
+static int users_to_user(uid_t __user *userlist,
+ const struct user_info *user_info)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ unsigned int count = user_info->nusers;
+
+ for (i = 0; i < count; i++) {
+ uid_t uid;
+ uid = from_kuid_munged(user_ns, USER_AT(user_info, i));
+ if (put_user(uid, userlist+i))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+/* fill a user_info from a user-space array - it must be allocated already */
+static int users_from_user(struct user_info *user_info, uid_t __user *userlist)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ unsigned int count = user_info->nusers;
+
+ for (i = 0; i < count; i++) {
+ uid_t uid;
+ kuid_t kuid;
+ if (get_user(uid, userlist+i))
+ return -EFAULT;
+
+ kuid = make_kuid(user_ns, uid);
+ if (!uid_valid(kuid))
+ return -EINVAL;
+
+ USER_AT(user_info, i) = kuid;
+ }
+ return 0;
+}
+
+static void users_sort(struct user_info *user_info)
+{
+ groups_sort((struct group_info *)user_info);
+}
+
+static bool users_search(const struct user_info *user_info, kuid_t uid)
+{
+ return groups_search((const struct group_info *)user_info, *(kgid_t *)&uid);
+}
+
+/* Return true if the user_info is a subset of the user_info of the specified
+ * credentials. Also allow the first user_info to contain the uid, euid, or
+ * suid of the credentials.
+ */
+static bool user_subset(const struct user_info *u1, const struct cred *cred2)
+{
+ const struct user_info *u2 = cred2->user_info;
+ unsigned int i, j;
+
+ for (i = 0, j = 0; i < u1->nusers; i++) {
+ kuid_t uid1 = USER_AT(u1, i);
+ kuid_t uid2;
+ for (; j < u2->nusers; j++) {
+ uid2 = USER_AT(u2, j);
+ if (uid_lte(uid1, uid2))
+ break;
+ }
+ if (j >= u2->nusers || !uid_eq(uid1, uid2)) {
+ if (!uid_eq(uid1, cred2->uid)
+ && !uid_eq(uid1, cred2->euid)
+ && !uid_eq(uid1, cred2->suid))
+ return false;
+ } else {
+ j++;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * set_current_users - Change current's supplementary user list
+ * @user_info: The user list to impose
+ *
+ * Validate a user list and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_users(struct user_info *user_info)
+{
+ struct cred *new;
+
+ users_sort(user_info);
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ if (!(ns_capable(current_user_ns(), CAP_SETUID)
+ || user_subset(user_info, new))) {
+ abort_creds(new);
+ return -EPERM;
+ }
+
+ put_user_info(new->user_info);
+ get_user_info(user_info);
+ new->user_info = user_info;
+ return commit_creds(new);
+}
+
+SYSCALL_DEFINE2(getusers, int, uidsetsize, uid_t __user *, userlist)
+{
+ const struct cred *cred = current_cred();
+ int i;
+
+ if (uidsetsize < 0)
+ return -EINVAL;
+
+ /* no need to grab task_lock here; it cannot change */
+ i = cred->user_info->nusers;
+ if (uidsetsize) {
+ if (i > uidsetsize) {
+ i = -EINVAL;
+ goto out;
+ }
+ if (users_to_user(userlist, cred->user_info)) {
+ i = -EFAULT;
+ goto out;
+ }
+ }
+out:
+ return i;
+}
+
+SYSCALL_DEFINE2(setusers, int, uidsetsize, uid_t __user *, userlist)
+{
+ struct user_info *user_info;
+ int retval;
+
+ if ((unsigned)uidsetsize > NUSERS_MAX)
+ return -EINVAL;
+
+ user_info = users_alloc(uidsetsize);
+ if (!user_info)
+ return -ENOMEM;
+ retval = users_from_user(user_info, userlist);
+ if (retval) {
+ put_user_info(user_info);
+ return retval;
+ }
+
+ retval = set_current_users(user_info);
+ put_user_info(user_info);
+
+ return retval;
+}
+
+bool has_supplementary_uid(kuid_t uid)
+{
+ return users_search(current_cred()->user_info, uid);
+}
+#endif /* CONFIG_SUPPLEMENTARY_UIDS */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1eaa2f0..412dda9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -472,7 +472,8 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
new->uid = kruid;
if (!uid_eq(old->uid, kruid) &&
!uid_eq(old->euid, kruid) &&
- !ns_capable(old->user_ns, CAP_SETUID))
+ !ns_capable(old->user_ns, CAP_SETUID) &&
+ !has_supplementary_uid(kruid))
goto error;
}

@@ -481,7 +482,8 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
if (!uid_eq(old->uid, keuid) &&
!uid_eq(old->euid, keuid) &&
!uid_eq(old->suid, keuid) &&
- !ns_capable(old->user_ns, CAP_SETUID))
+ !ns_capable(old->user_ns, CAP_SETUID) &&
+ !has_supplementary_uid(keuid))
goto error;
}

@@ -542,7 +544,8 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
if (retval < 0)
goto error;
}
- } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
+ } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid) &&
+ !has_supplementary_uid(kuid)) {
goto error;
}

@@ -594,13 +597,16 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
retval = -EPERM;
if (!ns_capable(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
+ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid) &&
+ !has_supplementary_uid(kruid))
goto error;
if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
+ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid) &&
+ !has_supplementary_uid(keuid))
goto error;
if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
+ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid) &&
+ !has_supplementary_uid(ksuid))
goto error;
}

@@ -750,7 +756,8 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)

if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
- ns_capable(old->user_ns, CAP_SETUID)) {
+ ns_capable(old->user_ns, CAP_SETUID) ||
+ has_supplementary_uid(kuid)) {
if (!uid_eq(kuid, old->fsuid)) {
new->fsuid = kuid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 02aa418..a8a8f02 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,8 @@ cond_syscall(sys_uselib);
cond_syscall(sys_fadvise64);
cond_syscall(sys_fadvise64_64);
cond_syscall(sys_madvise);
+cond_syscall(sys_getusers);
+cond_syscall(sys_setusers);

/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/